From 5d1de8684cc012a654a30c0fbd36abcb42b8c8b0 Mon Sep 17 00:00:00 2001 From: Erik Nygren <erik.nygren@sbb.ch> Date: Fri, 5 Jul 2019 15:27:35 -0400 Subject: [PATCH] fixed reward function bug where taking illegal actions at split switch were rewarded! --- flatland/envs/rail_env.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index b4a56a8..abc8a73 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -231,7 +231,7 @@ class RailEnv(Environment): self.rewards_dict[iAgent] += stop_penalty if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING): - # Only allow agent to start moving by pressing forward. + # Allow agent to start with any forward or direction action agent.moving = True self.rewards_dict[iAgent] += start_penalty @@ -270,6 +270,7 @@ class RailEnv(Environment): else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[iAgent] += invalid_action_penalty + self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed'] agent.moving = False self.rewards_dict[iAgent] += stop_penalty @@ -277,6 +278,7 @@ class RailEnv(Environment): else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[iAgent] += invalid_action_penalty + self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed'] agent.moving = False self.rewards_dict[iAgent] += stop_penalty -- GitLab