minor updates to how multi speed steps are handled

0da9e3c3 · Erik Nygren · d83c686d · 0da9e3c3
Commit 0da9e3c3 authored 5 years ago by Erik Nygren
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -294,7 +294,8 @@ class RailEnv(Environment):
        alpha = 1.0
        beta = 1.0
+        # Epsilon to avoid rounding errors
+        epsilon = 0.01
        invalid_action_penalty = 0  # previously -2; GIACOMO: we decided that invalid actions will carry no penalty
        step_penalty = -1 * alpha
        global_reward = 1 * beta
@@ -310,7 +311,6 @@ class RailEnv(Environment):
            self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()}
            return self._get_observations(), self.rewards_dict, self.dones, {}
-        # for i in range(len(self.agents_handles)):
        for i_agent in range(self.get_num_agents()):
            agent = self.agents[i_agent]
            agent.old_direction = agent.direction
@@ -331,7 +331,7 @@ class RailEnv(Environment):
                agent.malfunction_data['malfunction'] -= 1
                # Broken agents are stopped
-                self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                self.rewards_dict[i_agent] += step_penalty  # * agent.speed_data['speed']
                self.agents[i_agent].moving = False
                action_dict[i_agent] = RailEnvActions.DO_NOTHING
@@ -350,7 +350,8 @@ class RailEnv(Environment):
                # Keep moving
                action = RailEnvActions.MOVE_FORWARD
-            if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] == 0.:
+            if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data[
+                'position_fraction'] <= epsilon:
                # Only allow halting an agent on entering new cells.
                agent.moving = False
                self.rewards_dict[i_agent] += stop_penalty
@@ -372,7 +373,7 @@ class RailEnv(Environment):
            # If the agent can make an action
            action_selected = False
-            if agent.speed_data['position_fraction'] == 0.:
+            if agent.speed_data['position_fraction'] <= epsilon:
                if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
                    cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
                        self._check_action_on_agent(action, agent)
@@ -395,14 +396,14 @@ class RailEnv(Environment):
                            else:
                                # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                                self.rewards_dict[i_agent] += invalid_action_penalty
-                                self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                                self.rewards_dict[i_agent] += step_penalty  #* agent.speed_data['speed']
                                self.rewards_dict[i_agent] += stop_penalty
                                agent.moving = False
                                continue
                        else:
                            # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                            self.rewards_dict[i_agent] += invalid_action_penalty
-                            self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
+                            self.rewards_dict[i_agent] += step_penalty  #* agent.speed_data['speed']
                            self.rewards_dict[i_agent] += stop_penalty
                            agent.moving = False
                            continue