diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 4926fb1fc153f25ade27f59db5d546ada8804c15..ea9c16e1a7f5fcf91fc424559640679668092e51 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -339,7 +339,7 @@ class RailEnv(Environment): agent.malfunction_data['malfunction'] -= 1 # Broken agents are stopped - self.rewards_dict[i_agent] += step_penalty # * agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.agents[i_agent].moving = False action_dict[i_agent] = RailEnvActions.DO_NOTHING @@ -404,14 +404,14 @@ class RailEnv(Environment): else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty agent.moving = False continue else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty agent.moving = False continue