From ec96fd973042377f72944bee8aa4d85041c8594a Mon Sep 17 00:00:00 2001
From: Giacomo Spigler <spiglerg@gmail.com>
Date: Wed, 5 Jun 2019 21:48:05 +0200
Subject: [PATCH] agents keep going forward if motion started, until stopped +
 RailEnvActions enum + agent.moving flag for the motion status of each agent +
 penalties for starting/stopping agents, but set to 0

---
 flatland/envs/rail_env.py | 55 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index ba4e9e46..a7678e06 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -183,12 +183,18 @@ class RailEnv(Environment):
         return self._get_observations()
 
     def step(self, action_dict):
+        # TODO: possible re-factoring. The function may be re-written to better exploit the new agent.moving flag:
+        # actions set the motion and stopping of the agent, and then movement is performed checking the flag directly,
+        # rather than overwriting the agents' actions to 'forward'.
+
         alpha = 1.0
         beta = 1.0
 
-        invalid_action_penalty = 0 # -2 GIACOMO: we decided that invalid actions will carry no penalty
+        invalid_action_penalty = 0  # previously -2; GIACOMO: we decided that invalid actions will carry no penalty
         step_penalty = -1 * alpha
         global_reward = 1 * beta
+        stop_penalty = 0  # penalty for stopping a moving agent
+        start_penalty = 0  # penalty for starting a stopped agent
 
         # Reset the step rewards
         self.rewards_dict = dict()
@@ -234,11 +240,15 @@ class RailEnv(Environment):
                 action_dict[iAgent] = RailEnvActions.DO_NOTHING
                 action = RailEnvActions.DO_NOTHING
                 agent.moving = False
-                # TODO: possibly, penalty for stopping!
+                self.rewards_dict[iAgent] += stop_penalty
+
+            if not agent.moving and \
+               (action == RailEnvActions.MOVE_LEFT or
+               action == RailEnvActions.MOVE_FORWARD or
+               action == RailEnvActions.MOVE_RIGHT):
 
-            if not agent.moving and (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_FORWARD or action == RailEnvActions.MOVE_RIGHT):
                 agent.moving = True
-                # TODO: possibly, may add a penalty for starting, but the best is only for stopping (GIACOMO's opinion)
+                self.rewards_dict[iAgent] += start_penalty
 
             if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
                 # pos = agent.position #  self.agents_position[i]
@@ -297,8 +307,41 @@ class RailEnv(Environment):
                     agent.position = new_position
                     agent.direction = new_direction
                 else:
-                    # the action was not valid, add penalty
-                    self.rewards_dict[iAgent] += invalid_action_penalty
+                    # TODO: IMPROVE this UGLY redundant code; logic: if the chosen action is invalid,
+                    # and it was LEFT or RIGHT, and the agent was moving, then keep moving FORWARD.
+                    if action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT and agent.moving:
+                        new_direction, transition_isValid = self.check_action(agent, RailEnvActions.MOVE_FORWARD)
+                        new_position = get_new_position(agent.position, new_direction)
+
+                        new_cell_isValid = (
+                            np.array_equal(  # Check the new position is still in the grid
+                                new_position,
+                                np.clip(new_position, [0, 0], [self.height - 1, self.width - 1]))
+                            and  # check the new position has some transitions (ie is not an empty cell)
+                            self.rail.get_transitions(new_position) > 0)
+
+                        # If transition validity hasn't been checked yet.
+                        if transition_isValid is None:
+                            transition_isValid = self.rail.get_transition(
+                                (*agent.position, agent.direction),
+                                new_direction)
+
+                        cell_isFree = not np.any(
+                            np.equal(new_position, [agent2.position for agent2 in self.agents]).all(1))
+
+                        if all([new_cell_isValid, transition_isValid, cell_isFree]):
+                            agent.old_direction = agent.direction
+                            agent.old_position = agent.position
+                            agent.position = new_position
+                            agent.direction = new_direction
+
+                        else:
+                            # the action was not valid, add penalty
+                            self.rewards_dict[iAgent] += invalid_action_penalty
+
+                    else:
+                        # the action was not valid, add penalty
+                        self.rewards_dict[iAgent] += invalid_action_penalty
 
             # if agent is not in target position, add step penalty
             # if self.agents_position[i][0] == self.agents_target[i][0] and \
-- 
GitLab