From 5d1de8684cc012a654a30c0fbd36abcb42b8c8b0 Mon Sep 17 00:00:00 2001
From: Erik Nygren <erik.nygren@sbb.ch>
Date: Fri, 5 Jul 2019 15:27:35 -0400
Subject: [PATCH] fixed reward function bug where taking illegal actions at
 split switch were rewarded!

---
 flatland/envs/rail_env.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index b4a56a8..abc8a73 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -231,7 +231,7 @@ class RailEnv(Environment):
                 self.rewards_dict[iAgent] += stop_penalty
 
             if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
-                # Only allow agent to start moving by pressing forward.
+                # Allow agent to start with any forward or direction action
                 agent.moving = True
                 self.rewards_dict[iAgent] += start_penalty
 
@@ -270,6 +270,7 @@ class RailEnv(Environment):
                             else:
                                 # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                                 self.rewards_dict[iAgent] += invalid_action_penalty
+                                self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed']
                                 agent.moving = False
                                 self.rewards_dict[iAgent] += stop_penalty
 
@@ -277,6 +278,7 @@ class RailEnv(Environment):
                         else:
                             # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                             self.rewards_dict[iAgent] += invalid_action_penalty
+                            self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed']
                             agent.moving = False
                             self.rewards_dict[iAgent] += stop_penalty
 
-- 
GitLab