diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 280fd345d8c1db206c42dc30ba2d7b5fa2e8a69e..8486fc7f5f024023b44a5c6139bdb0d628e12303 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -294,7 +294,8 @@ class RailEnv(Environment): alpha = 1.0 beta = 1.0 - + # Epsilon to avoid rounding errors + epsilon = 0.01 invalid_action_penalty = 0 # previously -2; GIACOMO: we decided that invalid actions will carry no penalty step_penalty = -1 * alpha global_reward = 1 * beta @@ -310,7 +311,6 @@ class RailEnv(Environment): self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()} return self._get_observations(), self.rewards_dict, self.dones, {} - # for i in range(len(self.agents_handles)): for i_agent in range(self.get_num_agents()): agent = self.agents[i_agent] agent.old_direction = agent.direction @@ -331,7 +331,7 @@ class RailEnv(Environment): agent.malfunction_data['malfunction'] -= 1 # Broken agents are stopped - self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty # * agent.speed_data['speed'] self.agents[i_agent].moving = False action_dict[i_agent] = RailEnvActions.DO_NOTHING @@ -350,7 +350,8 @@ class RailEnv(Environment): # Keep moving action = RailEnvActions.MOVE_FORWARD - if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] == 0.: + if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data[ + 'position_fraction'] <= epsilon: # Only allow halting an agent on entering new cells. agent.moving = False self.rewards_dict[i_agent] += stop_penalty @@ -372,7 +373,7 @@ class RailEnv(Environment): # If the agent can make an action action_selected = False - if agent.speed_data['position_fraction'] == 0.: + if agent.speed_data['position_fraction'] <= epsilon: if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING: cell_free, new_cell_valid, new_direction, new_position, transition_valid = \ self._check_action_on_agent(action, agent) @@ -395,14 +396,14 @@ class RailEnv(Environment): else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty agent.moving = False continue else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] + self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty agent.moving = False continue