diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index 280fd345d8c1db206c42dc30ba2d7b5fa2e8a69e..8486fc7f5f024023b44a5c6139bdb0d628e12303 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -294,7 +294,8 @@ class RailEnv(Environment):
 
         alpha = 1.0
         beta = 1.0
-
+        # Epsilon to avoid rounding errors
+        epsilon = 0.01
         invalid_action_penalty = 0  # previously -2; GIACOMO: we decided that invalid actions will carry no penalty
         step_penalty = -1 * alpha
         global_reward = 1 * beta
@@ -310,7 +311,6 @@ class RailEnv(Environment):
             self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()}
             return self._get_observations(), self.rewards_dict, self.dones, {}
 
-        # for i in range(len(self.agents_handles)):
         for i_agent in range(self.get_num_agents()):
             agent = self.agents[i_agent]
             agent.old_direction = agent.direction
@@ -331,7 +331,7 @@ class RailEnv(Environment):
                 agent.malfunction_data['malfunction'] -= 1
 
                 # Broken agents are stopped
-                self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                self.rewards_dict[i_agent] += step_penalty  # * agent.speed_data['speed']
                 self.agents[i_agent].moving = False
                 action_dict[i_agent] = RailEnvActions.DO_NOTHING
 
@@ -350,7 +350,8 @@ class RailEnv(Environment):
                 # Keep moving
                 action = RailEnvActions.MOVE_FORWARD
 
-            if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] == 0.:
+            if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data[
+                'position_fraction'] <= epsilon:
                 # Only allow halting an agent on entering new cells.
                 agent.moving = False
                 self.rewards_dict[i_agent] += stop_penalty
@@ -372,7 +373,7 @@ class RailEnv(Environment):
 
             # If the agent can make an action
             action_selected = False
-            if agent.speed_data['position_fraction'] == 0.:
+            if agent.speed_data['position_fraction'] <= epsilon:
                 if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
                     cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
                         self._check_action_on_agent(action, agent)
@@ -395,14 +396,14 @@ class RailEnv(Environment):
                             else:
                                 # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                                 self.rewards_dict[i_agent] += invalid_action_penalty
-                                self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                                self.rewards_dict[i_agent] += step_penalty  #* agent.speed_data['speed']
                                 self.rewards_dict[i_agent] += stop_penalty
                                 agent.moving = False
                                 continue
                         else:
                             # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                             self.rewards_dict[i_agent] += invalid_action_penalty
-                            self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                            self.rewards_dict[i_agent] += step_penalty  #* agent.speed_data['speed']
                             self.rewards_dict[i_agent] += stop_penalty
                             agent.moving = False
                             continue