From 15801ce106228121f6f7a590460e0787d086513a Mon Sep 17 00:00:00 2001
From: Adrian Egli <adrian.egli@sbb.ch>
Date: Thu, 8 Oct 2020 16:25:27 +0200
Subject: [PATCH] other testing

---
 aicrowd.json                                  |   2 +-
 run.py                                        |  11 +-
 src/extra.py                                  | 808 +++++++++---------
 src/simple/ClassifyProblemInstance.py         |  94 ++
 src/simple/DeadLock_Avoidance.py              | 574 +++++++++++++
 src/simple/ShortestPathPredictorForRailEnv.py | 107 +++
 6 files changed, 1190 insertions(+), 406 deletions(-)
 create mode 100644 src/simple/ClassifyProblemInstance.py
 create mode 100644 src/simple/DeadLock_Avoidance.py
 create mode 100644 src/simple/ShortestPathPredictorForRailEnv.py

diff --git a/aicrowd.json b/aicrowd.json
index de611d3..976e3fd 100644
--- a/aicrowd.json
+++ b/aicrowd.json
@@ -2,6 +2,6 @@
   "challenge_id": "neurips-2020-flatland-challenge",
   "grader_id": "neurips-2020-flatland-challenge",
   "debug": false,
-  "tags": ["RL"]
+  "tags": ["other"]
 }
 
diff --git a/run.py b/run.py
index a2f071c..5cd92d3 100644
--- a/run.py
+++ b/run.py
@@ -4,10 +4,12 @@ import numpy as np
 from flatland.envs.agent_utils import RailAgentStatus
 from flatland.evaluators.client import FlatlandRemoteClient
 
+
 #####################################################################
 # Instantiate a Remote Client
 #####################################################################
 from src.extra import Extra
+from src.simple.DeadLock_Avoidance import calculate_one_step_heuristics, calculate_one_step_package_implementation,calculate_one_step,calculate_one_step_primitive_implementation
 
 remote_client = FlatlandRemoteClient()
 
@@ -19,9 +21,16 @@ remote_client = FlatlandRemoteClient()
 # compute the necessary action for this step for all (or even some)
 # of the agents
 #####################################################################
-def my_controller(extra: Extra, observation, info):
+def my_controller_RL(extra: Extra, observation, info):
     return extra.rl_agent_act(observation, info)
 
+def my_controller(local_env, obs, number_of_agents):
+    _action, _ = calculate_one_step(extra.env)
+    # _action, _ = calculate_one_step_package_implementation(local_env)
+    # _action, _ = calculate_one_step_primitive_implementation(local_env)
+    # _action, _ = calculate_one_step_heuristics(local_env)
+    return _action
+
 
 #####################################################################
 # Instantiate your custom Observation Builder
diff --git a/src/extra.py b/src/extra.py
index 312cebb..b70830f 100644
--- a/src/extra.py
+++ b/src/extra.py
@@ -1,404 +1,404 @@
-#
-# Author Adrian Egli
-#
-# This observation solves the FLATland challenge ROUND 1 - with agent's done 19.3%
-#
-# Training:
-# For the training of the PPO RL agent I showed 10k episodes - The episodes used for the training
-# consists of 1..20 agents on a 50x50 grid. Thus the RL agent has to learn to handle 1 upto 20 agents.
-#
-#   - https://github.com/mitchellgoffpc/flatland-training
-# ./adrian_egli_ppo_training_done.png
-#
-# The key idea behind this observation is that agent's can not freely choose where they want.
-#
-# ./images/adrian_egli_decisions.png
-# ./images/adrian_egli_info.png
-# ./images/adrian_egli_start.png
-# ./images/adrian_egli_target.png
-#
-# Private submission
-# http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8
-
-import numpy as np
-from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.grid.grid4_utils import get_new_position
-from flatland.envs.agent_utils import RailAgentStatus
-from flatland.envs.rail_env import RailEnvActions
-
-from src.ppo.agent import Agent
-
-
-# ------------------------------------- USE FAST_METHOD from FLATland master ------------------------------------------
-# Adrian Egli performance fix (the fast methods brings more than 50%)
-
-def fast_isclose(a, b, rtol):
-    return (a < (b + rtol)) or (a < (b - rtol))
-
-
-def fast_clip(position: (int, int), min_value: (int, int), max_value: (int, int)) -> bool:
-    return (
-        max(min_value[0], min(position[0], max_value[0])),
-        max(min_value[1], min(position[1], max_value[1]))
-    )
-
-
-def fast_argmax(possible_transitions: (int, int, int, int)) -> bool:
-    if possible_transitions[0] == 1:
-        return 0
-    if possible_transitions[1] == 1:
-        return 1
-    if possible_transitions[2] == 1:
-        return 2
-    return 3
-
-
-def fast_position_equal(pos_1: (int, int), pos_2: (int, int)) -> bool:
-    return pos_1[0] == pos_2[0] and pos_1[1] == pos_2[1]
-
-
-def fast_count_nonzero(possible_transitions: (int, int, int, int)):
-    return possible_transitions[0] + possible_transitions[1] + possible_transitions[2] + possible_transitions[3]
-
-
-# ------------------------------- END - USE FAST_METHOD from FLATland master ------------------------------------------
-
-class Extra(ObservationBuilder):
-
-    def __init__(self, max_depth):
-        self.max_depth = max_depth
-        self.observation_dim = 26
-        self.agent = None
-        self.random_agent_starter = []
-
-    def build_data(self):
-        if self.env is not None:
-            self.env.dev_obs_dict = {}
-        self.switches = {}
-        self.switches_neighbours = {}
-        self.debug_render_list = []
-        self.debug_render_path_list = []
-        if self.env is not None:
-            self.find_all_cell_where_agent_can_choose()
-
-    def find_all_cell_where_agent_can_choose(self):
-
-        switches = {}
-        for h in range(self.env.height):
-            for w in range(self.env.width):
-                pos = (h, w)
-                for dir in range(4):
-                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
-                    num_transitions = fast_count_nonzero(possible_transitions)
-                    if num_transitions > 1:
-                        if pos not in switches.keys():
-                            switches.update({pos: [dir]})
-                        else:
-                            switches[pos].append(dir)
-
-        switches_neighbours = {}
-        for h in range(self.env.height):
-            for w in range(self.env.width):
-                # look one step forward
-                for dir in range(4):
-                    pos = (h, w)
-                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
-                    for d in range(4):
-                        if possible_transitions[d] == 1:
-                            new_cell = get_new_position(pos, d)
-                            if new_cell in switches.keys() and pos not in switches.keys():
-                                if pos not in switches_neighbours.keys():
-                                    switches_neighbours.update({pos: [dir]})
-                                else:
-                                    switches_neighbours[pos].append(dir)
-
-        self.switches = switches
-        self.switches_neighbours = switches_neighbours
-
-    def check_agent_descision(self, position, direction):
-        switches = self.switches
-        switches_neighbours = self.switches_neighbours
-        agents_on_switch = False
-        agents_near_to_switch = False
-        agents_near_to_switch_all = False
-        if position in switches.keys():
-            agents_on_switch = direction in switches[position]
-
-        if position in switches_neighbours.keys():
-            new_cell = get_new_position(position, direction)
-            if new_cell in switches.keys():
-                if not direction in switches[new_cell]:
-                    agents_near_to_switch = direction in switches_neighbours[position]
-            else:
-                agents_near_to_switch = direction in switches_neighbours[position]
-
-            agents_near_to_switch_all = direction in switches_neighbours[position]
-
-        return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
-
-    def required_agent_descision(self):
-        agents_can_choose = {}
-        agents_on_switch = {}
-        agents_near_to_switch = {}
-        agents_near_to_switch_all = {}
-        for a in range(self.env.get_num_agents()):
-            ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all = \
-                self.check_agent_descision(
-                    self.env.agents[a].position,
-                    self.env.agents[a].direction)
-            agents_on_switch.update({a: ret_agents_on_switch})
-            ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
-            agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
-
-            agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
-
-            agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
-
-        return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
-
-    def debug_render(self, env_renderer):
-        agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all = \
-            self.required_agent_descision()
-        self.env.dev_obs_dict = {}
-        for a in range(max(3, self.env.get_num_agents())):
-            self.env.dev_obs_dict.update({a: []})
-
-        selected_agent = None
-        if agents_can_choose[0]:
-            if self.env.agents[0].position is not None:
-                self.debug_render_list.append(self.env.agents[0].position)
-            else:
-                self.debug_render_list.append(self.env.agents[0].initial_position)
-
-        if self.env.agents[0].position is not None:
-            self.debug_render_path_list.append(self.env.agents[0].position)
-        else:
-            self.debug_render_path_list.append(self.env.agents[0].initial_position)
-
-        env_renderer.gl.agent_colors[0] = env_renderer.gl.rgb_s2i("FF0000")
-        env_renderer.gl.agent_colors[1] = env_renderer.gl.rgb_s2i("666600")
-        env_renderer.gl.agent_colors[2] = env_renderer.gl.rgb_s2i("006666")
-        env_renderer.gl.agent_colors[3] = env_renderer.gl.rgb_s2i("550000")
-
-        self.env.dev_obs_dict[0] = self.debug_render_list
-        self.env.dev_obs_dict[1] = self.switches.keys()
-        self.env.dev_obs_dict[2] = self.switches_neighbours.keys()
-        self.env.dev_obs_dict[3] = self.debug_render_path_list
-
-    def normalize_observation(self, obsData):
-        return obsData
-
-    def is_collision(self, obsData):
-        return False
-
-    def reset(self):
-        self.build_data()
-        return
-
-    def fast_argmax(self, array):
-        if array[0] == 1:
-            return 0
-        if array[1] == 1:
-            return 1
-        if array[2] == 1:
-            return 2
-        return 3
-
-    def _explore(self, handle, new_position, new_direction, depth=0):
-        has_opp_agent = 0
-        has_same_agent = 0
-        visited = []
-
-        # stop exploring (max_depth reached)
-        if depth >= self.max_depth:
-            return has_opp_agent, has_same_agent, visited
-
-        # max_explore_steps = 100
-        cnt = 0
-        while cnt < 100:
-            cnt += 1
-
-            visited.append(new_position)
-            opp_a = self.env.agent_positions[new_position]
-            if opp_a != -1 and opp_a != handle:
-                if self.env.agents[opp_a].direction != new_direction:
-                    # opp agent found
-                    has_opp_agent = 1
-                    return has_opp_agent, has_same_agent, visited
-                else:
-                    has_same_agent = 1
-                    return has_opp_agent, has_same_agent, visited
-
-            # convert one-hot encoding to 0,1,2,3
-            possible_transitions = self.env.rail.get_transitions(*new_position, new_direction)
-            agents_on_switch, \
-            agents_near_to_switch, \
-            agents_near_to_switch_all = \
-                self.check_agent_descision(new_position, new_direction)
-            if agents_near_to_switch:
-                return has_opp_agent, has_same_agent, visited
-
-            if agents_on_switch:
-                for dir_loop in range(4):
-                    if possible_transitions[dir_loop] == 1:
-                        hoa, hsa, v = self._explore(handle,
-                                                    get_new_position(new_position, dir_loop),
-                                                    dir_loop,
-                                                    depth + 1)
-                        visited.append(v)
-                        has_opp_agent = 0.5 * (has_opp_agent + hoa)
-                        has_same_agent = 0.5 * (has_same_agent + hsa)
-                return has_opp_agent, has_same_agent, visited
-            else:
-                new_direction = fast_argmax(possible_transitions)
-                new_position = get_new_position(new_position, new_direction)
-        return has_opp_agent, has_same_agent, visited
-
-    def get(self, handle):
-        # all values are [0,1]
-        # observation[0]  : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path
-        # observation[1]  : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path
-        # observation[2]  : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path
-        # observation[3]  : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path
-        # observation[4]  : int(agent.status == RailAgentStatus.READY_TO_DEPART)
-        # observation[5]  : int(agent.status == RailAgentStatus.ACTIVE)
-        # observation[6]  : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED)
-        # observation[7]  : current agent is located at a switch, where it can take a routing decision
-        # observation[8]  : current agent is located at a cell, where it has to take a stop-or-go decision
-        # observation[9]  : current agent is located one step before/after a switch
-        # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0)
-        # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1)
-        # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2)
-        # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3)
-        # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1
-        # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1
-        # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1
-        # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1
-        # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1
-        # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1
-        # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1
-        # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1
-
-        observation = np.zeros(self.observation_dim)
-        visited = []
-        agent = self.env.agents[handle]
-
-        agent_done = False
-        if agent.status == RailAgentStatus.READY_TO_DEPART:
-            agent_virtual_position = agent.initial_position
-            observation[4] = 1
-        elif agent.status == RailAgentStatus.ACTIVE:
-            agent_virtual_position = agent.position
-            observation[5] = 1
-        else:
-            observation[6] = 1
-            agent_virtual_position = (-1, -1)
-            agent_done = True
-
-        if not agent_done:
-            visited.append(agent_virtual_position)
-            distance_map = self.env.distance_map.get()
-            current_cell_dist = distance_map[handle,
-                                             agent_virtual_position[0], agent_virtual_position[1],
-                                             agent.direction]
-            possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
-            orientation = agent.direction
-            if fast_count_nonzero(possible_transitions) == 1:
-                orientation = np.argmax(possible_transitions)
-
-            for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]):
-                if possible_transitions[branch_direction]:
-                    new_position = get_new_position(agent_virtual_position, branch_direction)
-
-                    new_cell_dist = distance_map[handle,
-                                                 new_position[0], new_position[1],
-                                                 branch_direction]
-                    if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)):
-                        observation[dir_loop] = int(new_cell_dist < current_cell_dist)
-
-                    has_opp_agent, has_same_agent, v = self._explore(handle, new_position, branch_direction)
-                    visited.append(v)
-
-                    observation[10 + dir_loop] = 1
-                    observation[14 + dir_loop] = has_opp_agent
-                    observation[18 + dir_loop] = has_same_agent
-
-                    opp_a = self.env.agent_positions[new_position]
-                    if opp_a != -1 and opp_a != handle:
-                        observation[22 + dir_loop] = 1
-
-        agents_on_switch, \
-        agents_near_to_switch, \
-        agents_near_to_switch_all = \
-            self.check_agent_descision(agent_virtual_position, agent.direction)
-        observation[7] = int(agents_on_switch)
-        observation[8] = int(agents_near_to_switch)
-        observation[9] = int(agents_near_to_switch_all)
-
-        self.env.dev_obs_dict.update({handle: visited})
-
-        return observation
-
-    def rl_agent_act_ADRIAN(self, observation, info, eps=0.0):
-        self.loadAgent()
-        action_dict = {}
-        for a in range(self.env.get_num_agents()):
-            if info['action_required'][a]:
-                action_dict[a] = self.agent.act(observation[a], eps=eps)
-                # action_dict[a] = np.random.randint(5)
-            else:
-                action_dict[a] = RailEnvActions.DO_NOTHING
-
-        return action_dict
-
-    def rl_agent_act(self, observation, info, eps=0.0):
-        if len(self.random_agent_starter) != self.env.get_num_agents():
-            self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0
-            self.loadAgent()
-
-        action_dict = {}
-        for a in range(self.env.get_num_agents()):
-            if self.random_agent_starter[a] > self.env._elapsed_steps:
-                action_dict[a] = RailEnvActions.STOP_MOVING
-            elif info['action_required'][a]:
-                action_dict[a] = self.agent.act(observation[a], eps=eps)
-                # action_dict[a] = np.random.randint(5)
-            else:
-                action_dict[a] = RailEnvActions.DO_NOTHING
-
-        return action_dict
-
-    def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0):
-        self.loadAgent()
-        action_dict = {}
-        active_cnt = 0
-        for a in range(self.env.get_num_agents()):
-            if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
-                if observation[a][6] == 1:
-                    active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE)
-                    action_dict[a] = RailEnvActions.STOP_MOVING
-                else:
-                    active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE)
-                    if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \
-                            (self.env.agents[a].status < RailAgentStatus.ACTIVE):
-                        if info['action_required'][a]:
-                            action_dict[a] = self.agent.act(observation[a], eps=eps)
-                            # action_dict[a] = np.random.randint(5)
-                        else:
-                            action_dict[a] = RailEnvActions.MOVE_FORWARD
-                    else:
-                        action_dict[a] = RailEnvActions.MOVE_FORWARD
-            else:
-                action_dict[a] = RailEnvActions.STOP_MOVING
-
-        return action_dict
-
-    def loadAgent(self):
-        if self.agent is not None:
-            return
-        self.state_size = self.env.obs_builder.observation_dim
-        self.action_size = 5
-        print("action_size: ", self.action_size)
-        print("state_size: ", self.state_size)
-        self.agent = Agent(self.state_size, self.action_size, 0)
-        self.agent.load('./checkpoints/', 0, 1.0)
+#
+# Author Adrian Egli
+#
+# This observation solves the FLATland challenge ROUND 1 - with agent's done 19.3%
+#
+# Training:
+# For the training of the PPO RL agent I showed 10k episodes - The episodes used for the training
+# consists of 1..20 agents on a 50x50 grid. Thus the RL agent has to learn to handle 1 upto 20 agents.
+#
+#   - https://github.com/mitchellgoffpc/flatland-training
+# ./adrian_egli_ppo_training_done.png
+#
+# The key idea behind this observation is that agent's can not freely choose where they want.
+#
+# ./images/adrian_egli_decisions.png
+# ./images/adrian_egli_info.png
+# ./images/adrian_egli_start.png
+# ./images/adrian_egli_target.png
+#
+# Private submission
+# http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8
+
+import numpy as np
+from flatland.core.env_observation_builder import ObservationBuilder
+from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.rail_env import RailEnvActions
+
+from src.ppo.agent import Agent
+
+
+# ------------------------------------- USE FAST_METHOD from FLATland master ------------------------------------------
+# Adrian Egli performance fix (the fast methods brings more than 50%)
+
+def fast_isclose(a, b, rtol):
+    return (a < (b + rtol)) or (a < (b - rtol))
+
+
+def fast_clip(position: (int, int), min_value: (int, int), max_value: (int, int)) -> bool:
+    return (
+        max(min_value[0], min(position[0], max_value[0])),
+        max(min_value[1], min(position[1], max_value[1]))
+    )
+
+
+def fast_argmax(possible_transitions: (int, int, int, int)) -> bool:
+    if possible_transitions[0] == 1:
+        return 0
+    if possible_transitions[1] == 1:
+        return 1
+    if possible_transitions[2] == 1:
+        return 2
+    return 3
+
+
+def fast_position_equal(pos_1: (int, int), pos_2: (int, int)) -> bool:
+    return pos_1[0] == pos_2[0] and pos_1[1] == pos_2[1]
+
+
+def fast_count_nonzero(possible_transitions: (int, int, int, int)):
+    return possible_transitions[0] + possible_transitions[1] + possible_transitions[2] + possible_transitions[3]
+
+
+# ------------------------------- END - USE FAST_METHOD from FLATland master ------------------------------------------
+
+class Extra(ObservationBuilder):
+
+    def __init__(self, max_depth):
+        self.max_depth = max_depth
+        self.observation_dim = 26
+        self.agent = None
+        self.random_agent_starter = []
+
+    def build_data(self):
+        if self.env is not None:
+            self.env.dev_obs_dict = {}
+        self.switches = {}
+        self.switches_neighbours = {}
+        self.debug_render_list = []
+        self.debug_render_path_list = []
+        if self.env is not None:
+            self.find_all_cell_where_agent_can_choose()
+
+    def find_all_cell_where_agent_can_choose(self):
+
+        switches = {}
+        for h in range(self.env.height):
+            for w in range(self.env.width):
+                pos = (h, w)
+                for dir in range(4):
+                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
+                    num_transitions = fast_count_nonzero(possible_transitions)
+                    if num_transitions > 1:
+                        if pos not in switches.keys():
+                            switches.update({pos: [dir]})
+                        else:
+                            switches[pos].append(dir)
+
+        switches_neighbours = {}
+        for h in range(self.env.height):
+            for w in range(self.env.width):
+                # look one step forward
+                for dir in range(4):
+                    pos = (h, w)
+                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
+                    for d in range(4):
+                        if possible_transitions[d] == 1:
+                            new_cell = get_new_position(pos, d)
+                            if new_cell in switches.keys() and pos not in switches.keys():
+                                if pos not in switches_neighbours.keys():
+                                    switches_neighbours.update({pos: [dir]})
+                                else:
+                                    switches_neighbours[pos].append(dir)
+
+        self.switches = switches
+        self.switches_neighbours = switches_neighbours
+
+    def check_agent_descision(self, position, direction):
+        switches = self.switches
+        switches_neighbours = self.switches_neighbours
+        agents_on_switch = False
+        agents_near_to_switch = False
+        agents_near_to_switch_all = False
+        if position in switches.keys():
+            agents_on_switch = direction in switches[position]
+
+        if position in switches_neighbours.keys():
+            new_cell = get_new_position(position, direction)
+            if new_cell in switches.keys():
+                if not direction in switches[new_cell]:
+                    agents_near_to_switch = direction in switches_neighbours[position]
+            else:
+                agents_near_to_switch = direction in switches_neighbours[position]
+
+            agents_near_to_switch_all = direction in switches_neighbours[position]
+
+        return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
+
+    def required_agent_descision(self):
+        agents_can_choose = {}
+        agents_on_switch = {}
+        agents_near_to_switch = {}
+        agents_near_to_switch_all = {}
+        for a in range(self.env.get_num_agents()):
+            ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all = \
+                self.check_agent_descision(
+                    self.env.agents[a].position,
+                    self.env.agents[a].direction)
+            agents_on_switch.update({a: ret_agents_on_switch})
+            ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
+            agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
+
+            agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
+
+            agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
+
+        return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
+
+    def debug_render(self, env_renderer):
+        agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all = \
+            self.required_agent_descision()
+        self.env.dev_obs_dict = {}
+        for a in range(max(3, self.env.get_num_agents())):
+            self.env.dev_obs_dict.update({a: []})
+
+        selected_agent = None
+        if agents_can_choose[0]:
+            if self.env.agents[0].position is not None:
+                self.debug_render_list.append(self.env.agents[0].position)
+            else:
+                self.debug_render_list.append(self.env.agents[0].initial_position)
+
+        if self.env.agents[0].position is not None:
+            self.debug_render_path_list.append(self.env.agents[0].position)
+        else:
+            self.debug_render_path_list.append(self.env.agents[0].initial_position)
+
+        env_renderer.gl.agent_colors[0] = env_renderer.gl.rgb_s2i("FF0000")
+        env_renderer.gl.agent_colors[1] = env_renderer.gl.rgb_s2i("666600")
+        env_renderer.gl.agent_colors[2] = env_renderer.gl.rgb_s2i("006666")
+        env_renderer.gl.agent_colors[3] = env_renderer.gl.rgb_s2i("550000")
+
+        self.env.dev_obs_dict[0] = self.debug_render_list
+        self.env.dev_obs_dict[1] = self.switches.keys()
+        self.env.dev_obs_dict[2] = self.switches_neighbours.keys()
+        self.env.dev_obs_dict[3] = self.debug_render_path_list
+
+    def normalize_observation(self, obsData):
+        return obsData
+
+    def is_collision(self, obsData):
+        return False
+
+    def reset(self):
+        self.build_data()
+        return
+
+    def fast_argmax(self, array):
+        if array[0] == 1:
+            return 0
+        if array[1] == 1:
+            return 1
+        if array[2] == 1:
+            return 2
+        return 3
+
+    def _explore(self, handle, new_position, new_direction, depth=0):
+        has_opp_agent = 0
+        has_same_agent = 0
+        visited = []
+
+        # stop exploring (max_depth reached)
+        if depth >= self.max_depth:
+            return has_opp_agent, has_same_agent, visited
+
+        # max_explore_steps = 100
+        cnt = 0
+        while cnt < 100:
+            cnt += 1
+
+            visited.append(new_position)
+            opp_a = self.env.agent_positions[new_position]
+            if opp_a != -1 and opp_a != handle:
+                if self.env.agents[opp_a].direction != new_direction:
+                    # opp agent found
+                    has_opp_agent = 1
+                    return has_opp_agent, has_same_agent, visited
+                else:
+                    has_same_agent = 1
+                    return has_opp_agent, has_same_agent, visited
+
+            # convert one-hot encoding to 0,1,2,3
+            possible_transitions = self.env.rail.get_transitions(*new_position, new_direction)
+            agents_on_switch, \
+            agents_near_to_switch, \
+            agents_near_to_switch_all = \
+                self.check_agent_descision(new_position, new_direction)
+            if agents_near_to_switch:
+                return has_opp_agent, has_same_agent, visited
+
+            if agents_on_switch:
+                for dir_loop in range(4):
+                    if possible_transitions[dir_loop] == 1:
+                        hoa, hsa, v = self._explore(handle,
+                                                    get_new_position(new_position, dir_loop),
+                                                    dir_loop,
+                                                    depth + 1)
+                        visited.append(v)
+                        has_opp_agent = 0.5 * (has_opp_agent + hoa)
+                        has_same_agent = 0.5 * (has_same_agent + hsa)
+                return has_opp_agent, has_same_agent, visited
+            else:
+                new_direction = fast_argmax(possible_transitions)
+                new_position = get_new_position(new_position, new_direction)
+        return has_opp_agent, has_same_agent, visited
+
+    def get(self, handle):
+        # all values are [0,1]
+        # observation[0]  : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path
+        # observation[1]  : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path
+        # observation[2]  : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path
+        # observation[3]  : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path
+        # observation[4]  : int(agent.status == RailAgentStatus.READY_TO_DEPART)
+        # observation[5]  : int(agent.status == RailAgentStatus.ACTIVE)
+        # observation[6]  : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED)
+        # observation[7]  : current agent is located at a switch, where it can take a routing decision
+        # observation[8]  : current agent is located at a cell, where it has to take a stop-or-go decision
+        # observation[9]  : current agent is located one step before/after a switch
+        # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0)
+        # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1)
+        # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2)
+        # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3)
+        # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1
+        # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1
+        # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1
+        # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1
+        # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1
+        # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1
+        # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1
+        # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1
+
+        observation = np.zeros(self.observation_dim)
+        visited = []
+        agent = self.env.agents[handle]
+
+        agent_done = False
+        if agent.status == RailAgentStatus.READY_TO_DEPART:
+            agent_virtual_position = agent.initial_position
+            observation[4] = 1
+        elif agent.status == RailAgentStatus.ACTIVE:
+            agent_virtual_position = agent.position
+            observation[5] = 1
+        else:
+            observation[6] = 1
+            agent_virtual_position = (-1, -1)
+            agent_done = True
+
+        if not agent_done:
+            visited.append(agent_virtual_position)
+            distance_map = self.env.distance_map.get()
+            current_cell_dist = distance_map[handle,
+                                             agent_virtual_position[0], agent_virtual_position[1],
+                                             agent.direction]
+            possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
+            orientation = agent.direction
+            if fast_count_nonzero(possible_transitions) == 1:
+                orientation = np.argmax(possible_transitions)
+
+            for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]):
+                if possible_transitions[branch_direction]:
+                    new_position = get_new_position(agent_virtual_position, branch_direction)
+
+                    new_cell_dist = distance_map[handle,
+                                                 new_position[0], new_position[1],
+                                                 branch_direction]
+                    if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)):
+                        observation[dir_loop] = int(new_cell_dist < current_cell_dist)
+
+                    has_opp_agent, has_same_agent, v = self._explore(handle, new_position, branch_direction)
+                    visited.append(v)
+
+                    observation[10 + dir_loop] = 1
+                    observation[14 + dir_loop] = has_opp_agent
+                    observation[18 + dir_loop] = has_same_agent
+
+                    opp_a = self.env.agent_positions[new_position]
+                    if opp_a != -1 and opp_a != handle:
+                        observation[22 + dir_loop] = 1
+
+        agents_on_switch, \
+        agents_near_to_switch, \
+        agents_near_to_switch_all = \
+            self.check_agent_descision(agent_virtual_position, agent.direction)
+        observation[7] = int(agents_on_switch)
+        observation[8] = int(agents_near_to_switch)
+        observation[9] = int(agents_near_to_switch_all)
+
+        self.env.dev_obs_dict.update({handle: visited})
+
+        return observation
+
+    def rl_agent_act_ADRIAN(self, observation, info, eps=0.0):
+        self.loadAgent()
+        action_dict = {}
+        for a in range(self.env.get_num_agents()):
+            if info['action_required'][a]:
+                action_dict[a] = self.agent.act(observation[a], eps=eps)
+                # action_dict[a] = np.random.randint(5)
+            else:
+                action_dict[a] = RailEnvActions.DO_NOTHING
+
+        return action_dict
+
+    def rl_agent_act(self, observation, info, eps=0.0):
+        if len(self.random_agent_starter) != self.env.get_num_agents():
+            self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0
+            self.loadAgent()
+
+        action_dict = {}
+        for a in range(self.env.get_num_agents()):
+            if self.random_agent_starter[a] > self.env._elapsed_steps:
+                action_dict[a] = RailEnvActions.STOP_MOVING
+            elif info['action_required'][a]:
+                action_dict[a] = self.agent.act(observation[a], eps=eps)
+                # action_dict[a] = np.random.randint(5)
+            else:
+                action_dict[a] = RailEnvActions.DO_NOTHING
+
+        return action_dict
+
+    def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0):
+        self.loadAgent()
+        action_dict = {}
+        active_cnt = 0
+        for a in range(self.env.get_num_agents()):
+            if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
+                if observation[a][6] == 1:
+                    active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE)
+                    action_dict[a] = RailEnvActions.STOP_MOVING
+                else:
+                    active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE)
+                    if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \
+                            (self.env.agents[a].status < RailAgentStatus.ACTIVE):
+                        if info['action_required'][a]:
+                            action_dict[a] = self.agent.act(observation[a], eps=eps)
+                            # action_dict[a] = np.random.randint(5)
+                        else:
+                            action_dict[a] = RailEnvActions.MOVE_FORWARD
+                    else:
+                        action_dict[a] = RailEnvActions.MOVE_FORWARD
+            else:
+                action_dict[a] = RailEnvActions.STOP_MOVING
+
+        return action_dict
+
+    def loadAgent(self):
+        if self.agent is not None:
+            return
+        self.state_size = self.env.obs_builder.observation_dim
+        self.action_size = 5
+        print("action_size: ", self.action_size)
+        print("state_size: ", self.state_size)
+        self.agent = Agent(self.state_size, self.action_size, 0)
+        self.agent.load('./checkpoints/', 0, 1.0)
diff --git a/src/simple/ClassifyProblemInstance.py b/src/simple/ClassifyProblemInstance.py
new file mode 100644
index 0000000..cabd67e
--- /dev/null
+++ b/src/simple/ClassifyProblemInstance.py
@@ -0,0 +1,94 @@
+from enum import IntEnum
+
+import numpy as np
+
+
+class ProblemInstanceClass(IntEnum):
+    SHORTEST_PATH_ONLY = 0
+    SHORTEST_PATH_ORDERING_PROBLEM = 1
+    REQUIRE_ALTERNATIVE_PATH = 2
+
+
+def check_is_only_shortest_path_problem(env, project_path_matrix):
+    x = project_path_matrix.copy()
+    x[x < 2] = 0
+    return np.sum(x) == 0
+
+
+def check_is_shortest_path_and_ordering_problem(env, project_path_matrix):
+    x = project_path_matrix.copy()
+    for a in range(env.get_num_agents()):
+        # loop over all path and project start position and target into the project_path_matrix
+        agent = env.agents[a]
+        if x[agent.position[0]][agent.position[1]] > 1:
+            return False
+        if x[agent.target[0]][agent.target[1]] > 1:
+            return False
+    return True
+
+
+def check_is_require_alternative_path(env, project_path_matrix):
+    paths = env.dev_pred_dict
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        path = paths[a]
+        for path_loop in range(len(path)):
+            p = path[path_loop]
+            if p[0] == agent.target[0] and p[1] == agent.target[1]:
+                break
+            if project_path_matrix[p[0]][p[1]] > 1:
+                # potential overlapping path found
+                for opp_a in range(env.get_num_agents()):
+                    opp_agent = env.agents[opp_a]
+                    opp_path = paths[opp_a]
+                    if p[0] == opp_agent.position[0] and p[1] == opp_agent.position[1]:
+                        opp_path_loop = 0
+                        tmp_path_loop = path_loop
+                        while True:
+                            if tmp_path_loop > len(path) - 1:
+                                break
+                            opp_p = opp_path[opp_path_loop]
+                            tmp_p = path[tmp_path_loop + 1]
+                            if opp_p[0] == opp_agent.target[0] and opp_p[1] == opp_agent.target[1]:
+                                return True
+                            if not (opp_p[0] == tmp_p[0] and opp_p[1] == tmp_p[1]):
+                                break
+                            if tmp_p[0] == agent.target[0] and tmp_p[1] == agent.target[1]:
+                                break
+                            opp_path_loop += 1
+                            tmp_path_loop += 1
+
+    return False
+
+
+def classify_problem_instance(env):
+    # shortest path from ShortesPathPredictorForRailEnv
+    paths = env.dev_pred_dict
+
+    project_path_matrix = np.zeros(shape=(env.height, env.width))
+    for a in range(env.get_num_agents()):
+        # loop over all path and project start position and target into the project_path_matrix
+        agent = env.agents[a]
+        project_path_matrix[agent.position[0]][agent.position[1]] += 1.0
+        project_path_matrix[agent.target[0]][agent.target[1]] += 1.0
+
+        if not (agent.target[0] == agent.position[0] and agent.target[1] == agent.position[1]):
+            # project the whole path into
+            path = paths[a]
+            for path_loop in range(len(path)):
+                p = path[path_loop]
+                if p[0] == agent.target[0] and p[1] == agent.target[1]:
+                    break
+                else:
+                    project_path_matrix[p[0]][p[1]] += 1.0
+
+    return \
+        {
+            # analyse : SHORTEST_PATH_ONLY -> if conflict_mat does not contain any number > 1
+            "SHORTEST_PATH_ONLY": check_is_only_shortest_path_problem(env, project_path_matrix),
+            # analyse : SHORTEST_PATH_ORDERING_PROBLEM -> if agent_start and agent_target position does not contain any number > 1
+            "SHORTEST_PATH_ORDERING_PROBLEM": check_is_shortest_path_and_ordering_problem(env, project_path_matrix),
+            # analyse : REQUIRE_ALTERNATIVE_PATH -> if agent_start and agent_target position does not contain any number > 1
+            "REQUIRE_ALTERNATIVE_PATH": check_is_require_alternative_path(env, project_path_matrix)
+
+        }
diff --git a/src/simple/DeadLock_Avoidance.py b/src/simple/DeadLock_Avoidance.py
new file mode 100644
index 0000000..7e80a46
--- /dev/null
+++ b/src/simple/DeadLock_Avoidance.py
@@ -0,0 +1,574 @@
+import math
+from typing import Dict, List, Optional, Tuple, Set
+from typing import NamedTuple
+
+import numpy as np
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
+from flatland.core.grid.grid4_utils import get_new_position
+from flatland.core.transition_map import GridTransitionMap
+from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.distance_map import DistanceMap
+from flatland.envs.rail_env import RailEnvNextAction, RailEnvActions
+from flatland.envs.rail_env_shortest_paths import get_shortest_paths
+from flatland.utils.ordered_set import OrderedSet
+
+WalkingElement = NamedTuple('WalkingElement',
+                            [('position', Tuple[int, int]), ('direction', int),
+                             ('next_action_element', RailEnvActions)])
+
+
+def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum,
+                            agent_position: Tuple[int, int],
+                            rail: GridTransitionMap) -> Set[RailEnvNextAction]:
+    """
+    Get the valid move actions (forward, left, right) for an agent.
+
+    Parameters
+    ----------
+    agent_direction : Grid4TransitionsEnum
+    agent_position: Tuple[int,int]
+    rail : GridTransitionMap
+
+
+    Returns
+    -------
+    Set of `RailEnvNextAction` (tuples of (action,position,direction))
+        Possible move actions (forward,left,right) and the next position/direction they lead to.
+        It is not checked that the next cell is free.
+    """
+    valid_actions: Set[RailEnvNextAction] = OrderedSet()
+    possible_transitions = rail.get_transitions(*agent_position, agent_direction)
+    num_transitions = np.count_nonzero(possible_transitions)
+    # Start from the current orientation, and see which transitions are available;
+    # organize them as [left, forward, right], relative to the current orientation
+    # If only one transition is possible, the forward branch is aligned with it.
+    if rail.is_dead_end(agent_position):
+        action = RailEnvActions.MOVE_FORWARD
+        exit_direction = (agent_direction + 2) % 4
+        if possible_transitions[exit_direction]:
+            new_position = get_new_position(agent_position, exit_direction)
+            valid_actions.add(RailEnvNextAction(action, new_position, exit_direction))
+    elif num_transitions == 1:
+        action = RailEnvActions.MOVE_FORWARD
+        for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+            if possible_transitions[new_direction]:
+                new_position = get_new_position(agent_position, new_direction)
+                valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+    else:
+        for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+            if possible_transitions[new_direction]:
+                if new_direction == agent_direction:
+                    action = RailEnvActions.MOVE_FORWARD
+                elif new_direction == (agent_direction + 1) % 4:
+                    action = RailEnvActions.MOVE_RIGHT
+                elif new_direction == (agent_direction - 1) % 4:
+                    action = RailEnvActions.MOVE_LEFT
+                else:
+                    raise Exception("Illegal state")
+
+                new_position = get_new_position(agent_position, new_direction)
+                valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+    return valid_actions
+
+
+# N.B. get_shortest_paths is not part of distance_map since it refers to RailEnvActions (would lead to circularity!)
+def get_paths(distance_map: DistanceMap, max_depth: Optional[int] = None, agent_handle: Optional[int] = None) \
+        -> Dict[int, Optional[List[WalkingElement]]]:
+    """
+    Computes the shortest path for each agent to its target and the action to be taken to do so.
+    The paths are derived from a `DistanceMap`.
+
+    If there is no path (rail disconnected), the path is given as None.
+    The agent state (moving or not) and its speed are not taken into account
+
+    example:
+            agent_fixed_travel_paths = get_shortest_paths(env.distance_map, None, agent.handle)
+            path = agent_fixed_travel_paths[agent.handle]
+
+    Parameters
+    ----------
+    distance_map : reference to the distance_map
+    max_depth : max path length, if the shortest path is longer, it will be cutted
+    agent_handle : if set, the shortest for agent.handle will be returned , otherwise for all agents
+
+    Returns
+    -------
+        Dict[int, Optional[List[WalkingElement]]]
+
+    """
+    shortest_paths = dict()
+
+    def _shortest_path_for_agent(agent):
+        if agent.status == RailAgentStatus.READY_TO_DEPART:
+            position = agent.initial_position
+        elif agent.status == RailAgentStatus.ACTIVE:
+            position = agent.position
+        elif agent.status == RailAgentStatus.DONE:
+            position = agent.target
+        else:
+            shortest_paths[agent.handle] = None
+            return
+        direction = agent.direction
+        shortest_paths[agent.handle] = []
+        distance = math.inf
+        depth = 0
+        cnt = 0
+        while (position != agent.target and (max_depth is None or depth < max_depth)) and cnt < 1000:
+            cnt = cnt + 1
+            next_actions = get_valid_move_actions_(direction, position, distance_map.rail)
+            best_next_action = None
+
+            for next_action in next_actions:
+                next_action_distance = distance_map.get()[
+                    agent.handle, next_action.next_position[0], next_action.next_position[
+                        1], next_action.next_direction]
+                if next_action_distance < distance:
+                    best_next_action = next_action
+                    distance = next_action_distance
+
+            for next_action in next_actions:
+                if next_action.action == RailEnvActions.MOVE_LEFT:
+                    next_action_distance = distance_map.get()[
+                        agent.handle, next_action.next_position[0], next_action.next_position[
+                            1], next_action.next_direction]
+                    if abs(next_action_distance - distance) < 5:
+                        best_next_action = next_action
+                        distance = next_action_distance
+
+            shortest_paths[agent.handle].append(WalkingElement(position, direction, best_next_action))
+            depth += 1
+
+            # if there is no way to continue, the rail must be disconnected!
+            # (or distance map is incorrect)
+            if best_next_action is None:
+                shortest_paths[agent.handle] = None
+                return
+
+            position = best_next_action.next_position
+            direction = best_next_action.next_direction
+        if max_depth is None or depth < max_depth:
+            shortest_paths[agent.handle].append(
+                WalkingElement(position, direction,
+                               RailEnvNextAction(RailEnvActions.STOP_MOVING, position, direction)))
+
+    if agent_handle is not None:
+        _shortest_path_for_agent(distance_map.agents[agent_handle])
+    else:
+        for agent in distance_map.agents:
+            _shortest_path_for_agent(agent)
+
+    return shortest_paths
+
+
+def agent_fake_position(agent):
+    if agent.position is not None:
+        return (agent.position[0], agent.position[1], 0)
+    return (-agent.handle - 1, -1, None)
+
+
+def compare_position_equal(a, b):
+    if a is None and b is None:
+        return True
+    if a is None or b is None:
+        return False
+    return (a[0] == b[0] and a[1] == b[1])
+
+
+def calc_conflict_matrix_next_step(env, paths, do_move, agent_position_matrix, agent_target_matrix,
+                                   agent_next_position_matrix):
+    # look step forward
+    conflict_mat = np.zeros(shape=(env.get_num_agents(), env.get_num_agents())) - 1
+
+    # calculate weighted (priority)
+    priority = np.arange(env.get_num_agents()).astype(float)
+    unique_ordered_priority = np.argsort(priority).astype(int)
+
+    # build one-step away dead-lock matrix
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        path = paths[a]
+        if path is None:
+            continue
+
+        conflict_mat[a][a] = unique_ordered_priority[a]
+        for path_loop in range(len(path)):
+            p_el = path[path_loop]
+            p = p_el.position
+            if compare_position_equal(agent.target, p):
+                break
+            else:
+                a_loop = 0
+                opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop])
+
+                cnt = 0
+                while (opp_a > -1) and (cnt < 1000):
+                    cnt = cnt + 1
+                    opp_path = paths[opp_a]
+                    if opp_path is not None:
+                        opp_a_p1 = opp_path[0].next_action_element.next_position
+                        if path_loop < len(path) - 1:
+                            p1 = path[path_loop + 1].next_action_element.next_position
+                            if not compare_position_equal(opp_a_p1, p1):
+                                conflict_mat[a][opp_a] = unique_ordered_priority[opp_a]
+                                conflict_mat[opp_a][a] = unique_ordered_priority[a]
+                        a_loop += 1
+                        opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop])
+
+    # update one-step away
+    for a in range(env.get_num_agents()):
+        if not do_move[a]:
+            conflict_mat[conflict_mat == unique_ordered_priority[a]] = -1
+
+    return conflict_mat
+
+
+def avoid_dead_lock(env, a, paths, conflict_matrix, agent_position_matrix, agent_target_matrix,
+                    agent_next_position_matrix):
+    # performance optimisation
+    if conflict_matrix is not None:
+        if np.argmax(conflict_matrix[a]) == a:
+            return True
+
+    # dead lock algorithm
+    agent = env.agents[a]
+    agent_position = agent_fake_position(agent)
+    if compare_position_equal(agent_position, agent.target):
+        return True
+
+    path = paths[a]
+    if path is None:
+        return True
+
+    max_path_step_allowed = np.inf
+    # iterate over agent a's travel path (fixed path)
+    for path_loop in range(len(path)):
+        p_el = path[path_loop]
+        p = p_el.position
+        if compare_position_equal(p, agent.target):
+            break
+
+        # iterate over all agents (opposite)
+        # for opp_a in range(env.get_num_agents()):
+        a_loop = 0
+        opp_a = 0
+        cnt = 0
+        while (a_loop < env.get_num_agents() and opp_a > -1) and cnt < 1000:
+            cnt = cnt + 1
+            if conflict_matrix is not None:
+                opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop])
+                a_loop += 1
+            else:
+                opp_a = (int)(agent_position_matrix[p[0]][p[1]])
+                a_loop = env.get_num_agents()
+            if opp_a > -1:
+                if opp_a != a:
+                    opp_agent = env.agents[opp_a]
+                    opp_path = paths[opp_a]
+                    if opp_path is not None:
+                        opp_path_0 = opp_path[0]
+
+                        # find all position in the opp.-path which are equal to current position.
+                        # the method has to scan all path through
+                        all_path_idx_offset_array = [0]
+                        for opp_path_loop_itr in range(len(path)):
+                            opp_p_el = opp_path[opp_path_loop_itr]
+                            opp_p = opp_p_el.position
+                            if compare_position_equal(opp_p, opp_agent.target):
+                                break
+                            opp_agent_position = agent_fake_position(opp_agent)
+                            if compare_position_equal(opp_p, opp_agent_position):
+                                all_path_idx_offset_array.extend([opp_path_loop_itr])
+                            opp_p_next = opp_p_el.next_action_element.next_position
+                            if compare_position_equal(opp_p_next, opp_agent_position):
+                                all_path_idx_offset_array.extend([opp_path_loop_itr])
+
+                        for all_path_idx_offset_loop in range(len(all_path_idx_offset_array)):
+                            all_path_idx_offset = all_path_idx_offset_array[all_path_idx_offset_loop]
+                            opp_path_0_el = opp_path[all_path_idx_offset]
+                            opp_path_0 = opp_path_0_el.position
+                            # if check_in_details is set to -1: no dead-lock candidate found
+                            # if check_in_details is set to  0: dead-lock candidate are not yet visible (agents need one step to become visible)(case A)
+                            # if check_in_details is set to  1: dead-lock candidate are visible, thus we have to collect them (case B)
+                            check_in_detail = -1
+
+                            # check mode, if conflict_matrix is set, then we are looking ..
+                            if conflict_matrix is not None:
+                                # Case A
+                                if np.argmax(conflict_matrix[a]) != a:
+                                    # avoid (parallel issue)
+                                    if compare_position_equal(opp_path_0, p):
+                                        check_in_detail = 0
+                            else:
+                                # Case B
+                                # collect all dead-lock candidates and check
+                                opp_agent_position = agent_fake_position(opp_agent)
+                                if compare_position_equal(opp_agent_position, p):
+                                    check_in_detail = 1
+
+                            if check_in_detail > -1:
+                                # print("Conflict risk found. My [", a, "] path is occupied by [", opp_a, "]")
+                                opp_path_loop = all_path_idx_offset
+                                back_path_loop = path_loop - check_in_detail
+                                cnt = 0
+                                while (opp_path_loop < len(opp_path) and back_path_loop > -1) and cnt < 1000:
+                                    cnt = cnt + 1
+                                    # retrieve position information
+                                    opp_p_el = opp_path[opp_path_loop]
+                                    opp_p = opp_p_el.position
+                                    me_p_el = path[back_path_loop]
+                                    me_p = me_p_el.next_action_element.next_position
+
+                                    if not compare_position_equal(opp_p, me_p):
+                                        # Case 1: The opposite train travels in same direction as the current train (agent a)
+                                        # Case 2: The opposite train travels in opposite direction and the path divergent
+                                        break
+
+                                    # make one step backwards (agent a) and one step forward for opposite train (agent opp_a)
+                                    # train a can no travel further than given position, because no divergent paths, this will cause a dead-lock
+                                    max_path_step_allowed = min(max_path_step_allowed, back_path_loop)
+                                    opp_path_loop += 1
+                                    back_path_loop -= 1
+
+                                    # check whether at least one step is allowed
+                                    if max_path_step_allowed < 1:
+                                        return False
+
+                                if back_path_loop == -1:
+                                    # No divergent path found, it cause a deadlock
+                                    # print("conflict (stop): (", a, ",", opp_a, ")")
+                                    return False
+
+    # check whether at least one step is allowed
+    return max_path_step_allowed > 0
+
+
+def calculate_one_step(env):
+    # can agent move array
+    do_move = np.zeros(env.get_num_agents())
+    if True:
+        cnt = 0
+        cnt_done = 0
+        for a in range(env.get_num_agents()):
+            agent = env.agents[a]
+            if agent.status < RailAgentStatus.DONE:
+                cnt += 1
+                if cnt < 30:
+                    do_move[a] = True
+            else:
+                cnt_done += 1
+        print("\r{}/{}\t".format(cnt_done, env.get_num_agents()), end="")
+    else:
+        agent_fixed_travel_paths = get_paths(env.distance_map, 1)
+        # can agent move array
+        do_move = np.zeros(env.get_num_agents())
+        for a in range(env.get_num_agents()):
+            agent = env.agents[a]
+            if agent.position is not None and not compare_position_equal(agent.position, agent.target):
+                do_move[a] = True
+                break
+
+        if np.sum(do_move) == 0:
+            for a in range(env.get_num_agents()):
+                agent = env.agents[a]
+                if agent_fixed_travel_paths[a] is not None:
+                    if agent.position is None and compare_position_equal(agent.initial_position, agent.target):
+                        do_move[a] = True
+                        break
+                    elif not compare_position_equal(agent.initial_position, agent.target):
+                        do_move[a] = True
+                        break
+
+        initial_position = None
+        for a in range(env.get_num_agents()):
+            agent = env.agents[a]
+            if do_move[a]:
+                initial_position = agent.initial_position
+
+            if initial_position is not None:
+                if compare_position_equal(agent.initial_position, initial_position):
+                    do_move[a] = True
+
+    # copy of agents fixed travel path (current path to follow) : only once : quite expensive
+    # agent_fixed_travel_paths = get_shortest_paths(env.distance_map)
+    agent_fixed_travel_paths = dict()
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if do_move[a]:
+            agent_fixed_travel_paths[agent.handle] = get_paths(env.distance_map, None, agent.handle)[agent.handle]
+        else:
+            agent_fixed_travel_paths[agent.handle] = None
+
+    # copy position, target and next position into cache (matrices)
+    # (The cache idea increases the run-time performance)
+    agent_position_matrix = np.zeros(shape=(env.height, env.width)) - 1.0
+    agent_target_matrix = np.zeros(shape=(env.height, env.width)) - 1.0
+    agent_next_position_matrix = np.zeros(shape=(env.height, env.width, env.get_num_agents() + 1)) - 1.0
+    for a in range(env.get_num_agents()):
+        if do_move[a] == False:
+            continue
+        agent = env.agents[a]
+        agent_position = agent_fake_position(agent)
+        if agent_position[2] is None:
+            agent_position = agent.initial_position
+        agent_position_matrix[agent_position[0]][agent_position[1]] = a
+        agent_target_matrix[agent.target[0]][agent.target[1]] = a
+        if not compare_position_equal(agent.target, agent_position):
+            path = agent_fixed_travel_paths[a]
+            if path is not None:
+                p_el = path[0]
+                p = p_el.position
+                a_loop = 0
+                cnt = 0
+                while (agent_next_position_matrix[p[0]][p[1]][a_loop] > -1) and cnt < 1000:
+                    cnt = cnt + 1
+                    a_loop += 1
+                agent_next_position_matrix[p[0]][p[1]][a_loop] = a
+
+    # check which agents can move (see : avoid_dead_lock (case b))
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if not compare_position_equal(agent.position, agent.target) and do_move[a]:
+            do_move[a] = avoid_dead_lock(env, a, agent_fixed_travel_paths, None, agent_position_matrix,
+                                         agent_target_matrix,
+                                         agent_next_position_matrix)
+
+    # check which agents can move (see : avoid_dead_lock (case a))
+    # calculate possible candidate for hidden one-step away dead-lock candidates
+    conflict_matrix = calc_conflict_matrix_next_step(env, agent_fixed_travel_paths, do_move, agent_position_matrix,
+                                                     agent_target_matrix,
+                                                     agent_next_position_matrix)
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if not compare_position_equal(agent.position, agent.target):
+            if do_move[a]:
+                do_move[a] = avoid_dead_lock(env, a, agent_fixed_travel_paths, conflict_matrix, agent_position_matrix,
+                                             agent_target_matrix,
+                                             agent_next_position_matrix)
+
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if agent.position is not None and compare_position_equal(agent.position, agent.target):
+            do_move[a] = False
+
+    # main loop (calculate actions for all agents)
+    action_dict = {}
+    is_moving_cnt = 0
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        action = RailEnvActions.MOVE_FORWARD
+
+        if do_move[a] and is_moving_cnt < 10:
+            is_moving_cnt += 1
+            # check for deadlock:
+            path = agent_fixed_travel_paths[a]
+            if path is not None:
+                action = path[0].next_action_element.action
+        else:
+            action = RailEnvActions.STOP_MOVING
+        action_dict[a] = action
+
+    return action_dict, do_move
+
+
+def calculate_one_step_heuristics(env):
+    # copy of agents fixed travel path (current path to follow)
+    agent_fixed_travel_paths = get_paths(env.distance_map, 1)
+
+    # main loop (calculate actions for all agents)
+    action_dict = {}
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        action = RailEnvActions.MOVE_FORWARD
+
+        # check for deadlock:
+        path = agent_fixed_travel_paths[a]
+        if path is not None:
+            action = path[0].next_action_element.action
+        action_dict[a] = action
+
+    return action_dict, None
+
+
+def calculate_one_step_primitive_implementation(env):
+    # can agent move array
+    do_move = np.zeros(env.get_num_agents())
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if agent.status > RailAgentStatus.ACTIVE:
+            continue
+        if (agent.status == RailAgentStatus.ACTIVE):
+            do_move[a] = True
+            break
+        do_move[a] = True
+        break
+
+    # main loop (calculate actions for all agents)
+    action_dict = {}
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        action = RailEnvActions.MOVE_FORWARD
+        if do_move[a]:
+            # check for deadlock:
+            # copy of agents fixed travel path (current path to follow)
+            agent_fixed_travel_paths = get_shortest_paths(env.distance_map, 1, agent.handle)
+            path = agent_fixed_travel_paths[agent.handle]
+            if path is not None:
+                print("\rAgent:{:4d}/{:<4d} ".format(a + 1, env.get_num_agents()), end=" ")
+                action = path[0].next_action_element.action
+        else:
+            action = RailEnvActions.STOP_MOVING
+        action_dict[a] = action
+
+    return action_dict, do_move
+
+
+def calculate_one_step_package_implementation(env):
+    # copy of agents fixed travel path (current path to follow)
+    # agent_fixed_travel_paths = get_shortest_paths(env.distance_map,1)
+    agent_fixed_travel_paths = get_paths(env.distance_map, 1)
+
+    # can agent move array
+    do_move = np.zeros(env.get_num_agents())
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if agent.position is not None and not compare_position_equal(agent.position, agent.target):
+            do_move[a] = True
+            break
+
+    if np.sum(do_move) == 0:
+        for a in range(env.get_num_agents()):
+            agent = env.agents[a]
+            if agent_fixed_travel_paths[a] is not None:
+                if agent.position is None and compare_position_equal(agent.initial_position, agent.target):
+                    do_move[a] = True
+                    break
+                elif not compare_position_equal(agent.initial_position, agent.target):
+                    do_move[a] = True
+                    break
+
+    initial_position = None
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        if do_move[a]:
+            initial_position = agent.initial_position
+
+        if initial_position is not None:
+            if compare_position_equal(agent.initial_position, initial_position):
+                do_move[a] = True
+
+    # main loop (calculate actions for all agents)
+    action_dict = {}
+    for a in range(env.get_num_agents()):
+        agent = env.agents[a]
+        action = RailEnvActions.MOVE_FORWARD
+
+        if do_move[a]:
+            # check for deadlock:
+            path = agent_fixed_travel_paths[a]
+            if path is not None:
+                action = path[0].next_action_element.action
+        else:
+            action = RailEnvActions.STOP_MOVING
+        action_dict[a] = action
+
+    return action_dict, do_move
diff --git a/src/simple/ShortestPathPredictorForRailEnv.py b/src/simple/ShortestPathPredictorForRailEnv.py
new file mode 100644
index 0000000..f820253
--- /dev/null
+++ b/src/simple/ShortestPathPredictorForRailEnv.py
@@ -0,0 +1,107 @@
+import numpy as np
+
+from flatland.core.env_prediction_builder import PredictionBuilder
+from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.rail_env import RailEnvActions
+
+
+class AdrianShortestPathPredictorForRailEnv(PredictionBuilder):
+    """
+    ShortestPathPredictorForRailEnv object.
+
+    This object returns shortest-path predictions for agents in the RailEnv environment.
+    The prediction acts as if no other agent is in the environment and always takes the forward action.
+    """
+
+    def __init__(self, max_depth=20):
+        # Initialize with depth 20
+        self.max_depth = max_depth
+
+    def get(self, custom_args=None, handle=None):
+        """
+        Called whenever get_many in the observation build is called.
+        Requires distance_map to extract the shortest path.
+
+        Parameters
+        -------
+        custom_args: dict
+            - distance_map : dict
+        handle : int (optional)
+            Handle of the agent for which to compute the observation vector.
+
+        Returns
+        -------
+        np.array
+            Returns a dictionary indexed by the agent handle and for each agent a vector of (max_depth + 1)x5 elements:
+            - time_offset
+            - position axis 0
+            - position axis 1
+            - direction
+            - action taken to come here
+            The prediction at 0 is the current position, direction etc.
+        """
+
+        agents = self.env.agents
+        if handle:
+            agents = [self.env.agents[handle]]
+        assert custom_args is not None
+        distance_map = custom_args.get('distance_map')
+        assert distance_map is not None
+
+        prediction_dict = {}
+        for agent in agents:
+            _agent_initial_position = agent.position
+            _agent_initial_direction = agent.direction
+            prediction = np.zeros(shape=(self.max_depth + 1, 5))
+            prediction[0] = [0, *_agent_initial_position, _agent_initial_direction, 0]
+            visited = []
+            for index in range(1, self.max_depth + 1):
+                # if we're at the target, stop moving...
+                if agent.position == agent.target:
+                    prediction[index] = [index, *agent.target, agent.direction, RailEnvActions.STOP_MOVING]
+                    visited.append((agent.position[0], agent.position[1], agent.direction))
+                    continue
+                # Take shortest possible path
+                cell_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
+
+                new_position = None
+                new_direction = None
+                if np.sum(cell_transitions) == 1:
+                    new_direction = np.argmax(cell_transitions)
+                    new_position = get_new_position(agent.position, new_direction)
+                elif np.sum(cell_transitions) > 1:
+                    min_dist = np.inf
+                    no_dist_found = True
+                    for direction in range(4):
+                        if cell_transitions[direction] == 1:
+                            neighbour_cell = get_new_position(agent.position, direction)
+                            target_dist = distance_map[agent.handle, neighbour_cell[0], neighbour_cell[1], direction]
+                            if target_dist < min_dist or no_dist_found:
+                                min_dist = target_dist
+                                new_direction = direction
+                                no_dist_found = False
+                    new_position = get_new_position(agent.position, new_direction)
+                else:
+                    print("--------------------")
+                    print(agent.position, agent.direction, "valid:", self.env.rail.cell_neighbours_valid(
+                          agent.position),
+                          self.env.rail.get_full_transitions(agent.position[0],agent.position[1])
+                          )
+                    print("--------------------")
+                    raise Exception("No transition possible {}".format(cell_transitions))
+
+                # update the agent's position and direction
+                agent.position = new_position
+                agent.direction = new_direction
+
+                # prediction is ready
+                prediction[index] = [index, *new_position, new_direction, 0]
+                visited.append((new_position[0], new_position[1], new_direction))
+            self.env.dev_pred_dict[agent.handle] = visited
+            prediction_dict[agent.handle] = prediction
+
+            # cleanup: reset initial position
+            agent.position = _agent_initial_position
+            agent.direction = _agent_initial_direction
+
+        return prediction_dict
-- 
GitLab