Skip to content
Snippets Groups Projects
Commit 5d1de868 authored by Erik Nygren's avatar Erik Nygren
Browse files

fixed reward function bug where taking illegal actions at split switch were rewarded!

parent 62dbb589
No related branches found
No related tags found
No related merge requests found
...@@ -231,7 +231,7 @@ class RailEnv(Environment): ...@@ -231,7 +231,7 @@ class RailEnv(Environment):
self.rewards_dict[iAgent] += stop_penalty self.rewards_dict[iAgent] += stop_penalty
if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING): if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
# Only allow agent to start moving by pressing forward. # Allow agent to start with any forward or direction action
agent.moving = True agent.moving = True
self.rewards_dict[iAgent] += start_penalty self.rewards_dict[iAgent] += start_penalty
...@@ -270,6 +270,7 @@ class RailEnv(Environment): ...@@ -270,6 +270,7 @@ class RailEnv(Environment):
else: else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move. # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[iAgent] += invalid_action_penalty self.rewards_dict[iAgent] += invalid_action_penalty
self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed']
agent.moving = False agent.moving = False
self.rewards_dict[iAgent] += stop_penalty self.rewards_dict[iAgent] += stop_penalty
...@@ -277,6 +278,7 @@ class RailEnv(Environment): ...@@ -277,6 +278,7 @@ class RailEnv(Environment):
else: else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move. # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[iAgent] += invalid_action_penalty self.rewards_dict[iAgent] += invalid_action_penalty
self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed']
agent.moving = False agent.moving = False
self.rewards_dict[iAgent] += stop_penalty self.rewards_dict[iAgent] += stop_penalty
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment