Compare revisions

0006c040 · 0006c040 · 0006c040 · 0006c040 · 0006c040 · 0006c040
--- a/run_fast_methods.py
+++ b/run_fast_methods.py
+from time import time
+
+import numpy as np
+from flatland.envs.rail_env import fast_isclose
+
+
+def print_timing(label, start_time, end_time):
+    print("{:>10.4f}ms".format(1000 * (end_time - start_time)) + "\t" + label)
+
+
+def check_isclose(nbr=100000):
+    s = time()
+    for x in range(nbr):
+        fast_isclose(x, 0.0, rtol=1e-03)
+    e = time()
+    print_timing("fast_isclose", start_time=s, end_time=e)
+
+    s = time()
+    for x in range(nbr):
+        np.isclose(x, 0.0, rtol=1e-03)
+    e = time()
+    print_timing("np.isclose", start_time=s, end_time=e)
+
+
+if __name__ == "__main__":
+    check_isclose()
--- a/runs_bench/Jan14_10-56-32_K57261_PPO_reduced/events.out.tfevents.1610618195.K57261.15412.0
+++ b/runs_bench/Jan14_10-56-32_K57261_PPO_reduced/events.out.tfevents.1610618195.K57261.15412.0
--- a/runs_bench/Jan18_09-32-17_K57261_DDDQN_reduced/events.out.tfevents.1610958740.K57261.6608.0
+++ b/runs_bench/Jan18_09-32-17_K57261_DDDQN_reduced/events.out.tfevents.1610958740.K57261.6608.0
--- a/runs_bench/Jan18_09-34-10_K57261_DeadLockAvoidance_EPS_reduced/events.out.tfevents.1610958853.K57261.10660.0
+++ b/runs_bench/Jan18_09-34-10_K57261_DeadLockAvoidance_EPS_reduced/events.out.tfevents.1610958853.K57261.10660.0
--- a/runs_bench/Jan18_11-47-54_K57261_DeadLockAvoidance_reduced/events.out.tfevents.1610966876.K57261.4332.0
+++ b/runs_bench/Jan18_11-47-54_K57261_DeadLockAvoidance_reduced/events.out.tfevents.1610966876.K57261.4332.0
--- a/runs_bench/Jan18_11-56-16_K57261_DeadLockAvoidanceWithDecision_reduced/events.out.tfevents.1610967379.K57261.14680.0
+++ b/runs_bench/Jan18_11-56-16_K57261_DeadLockAvoidanceWithDecision_reduced/events.out.tfevents.1610967379.K57261.14680.0
--- a/runs_bench/Jan18_13-46-59_K57261_MultiDecisionAgent_reduced/events.out.tfevents.1610974021.K57261.12972.0
+++ b/runs_bench/Jan18_13-46-59_K57261_MultiDecisionAgent_reduced/events.out.tfevents.1610974021.K57261.12972.0
--- a/runs_bench/Jan18_14-53-57_K57261_PPO_full/events.out.tfevents.1610978039.K57261.484.0
+++ b/runs_bench/Jan18_14-53-57_K57261_PPO_full/events.out.tfevents.1610978039.K57261.484.0
--- a/runs_bench/Jan18_14-57-56_K57261_DDDQN_full/events.out.tfevents.1610978281.K57261.19984.0
+++ b/runs_bench/Jan18_14-57-56_K57261_DDDQN_full/events.out.tfevents.1610978281.K57261.19984.0
--- a/runs_bench/Jan18_16-05-23_K57261_DeadLockAvoidance_EPS_full/events.out.tfevents.1610982327.K57261.6264.0
+++ b/runs_bench/Jan18_16-05-23_K57261_DeadLockAvoidance_EPS_full/events.out.tfevents.1610982327.K57261.6264.0
--- a/runs_bench/Jan18_16-14-19_K57261_DeadLockAvoidance_full/events.out.tfevents.1610982862.K57261.14612.0
+++ b/runs_bench/Jan18_16-14-19_K57261_DeadLockAvoidance_full/events.out.tfevents.1610982862.K57261.14612.0
--- a/runs_bench/Jan18_16-43-41_K57261_DeadLockAvoidanceWithDecision_full/events.out.tfevents.1610984623.K57261.17628.0
+++ b/runs_bench/Jan18_16-43-41_K57261_DeadLockAvoidanceWithDecision_full/events.out.tfevents.1610984623.K57261.17628.0
--- a/runs_bench/Jan18_16-45-04_K57261_MultiDecision_full/events.out.tfevents.1610984709.K57261.1796.0
+++ b/runs_bench/Jan18_16-45-04_K57261_MultiDecision_full/events.out.tfevents.1610984709.K57261.1796.0
--- a/runs_bench/Screenshots/full.png
+++ b/runs_bench/Screenshots/full.png
--- a/runs_bench/Screenshots/reduced.png
+++ b/runs_bench/Screenshots/reduced.png
--- a/utils/agent_action_config.py
+++ b/utils/agent_action_config.py
+from flatland.envs.rail_env import RailEnvActions
+
+# global action size
+global _agent_action_config_action_size
+_agent_action_config_action_size = 5
+
+
+def get_flatland_full_action_size():
+    # The action space of flatland is 5 discrete actions
+    return 5
+
+
+def set_action_size_full():
+    global _agent_action_config_action_size
+    # The agents (DDDQN, PPO, ... ) have this actions space
+    _agent_action_config_action_size = 5
+
+
+def set_action_size_reduced():
+    global _agent_action_config_action_size
+    # The agents (DDDQN, PPO, ... ) have this actions space
+    _agent_action_config_action_size = 4
+
+
+def get_action_size():
+    global _agent_action_config_action_size
+    # The agents (DDDQN, PPO, ... ) have this actions space
+    return _agent_action_config_action_size
+
+
+def map_actions(actions):
+    # Map the
+    if get_action_size() != get_flatland_full_action_size():
+        for key in actions:
+            value = actions.get(key, 0)
+            actions.update({key: map_action(value)})
+    return actions
+
+
+def map_action_policy(action):
+    if get_action_size() != get_flatland_full_action_size():
+        return action - 1
+    return action
+
+
+def map_action(action):
+    if get_action_size() == get_flatland_full_action_size():
+        return action
+
+    if action == 0:
+        return RailEnvActions.MOVE_LEFT
+    if action == 1:
+        return RailEnvActions.MOVE_FORWARD
+    if action == 2:
+        return RailEnvActions.MOVE_RIGHT
+    if action == 3:
+        return RailEnvActions.STOP_MOVING
+
+
+def map_rail_env_action(action):
+    if get_action_size() == get_flatland_full_action_size():
+        return action
+
+    if action == RailEnvActions.MOVE_LEFT:
+        return 0
+    elif action == RailEnvActions.MOVE_FORWARD:
+        return 1
+    elif action == RailEnvActions.MOVE_RIGHT:
+        return 2
+    elif action == RailEnvActions.STOP_MOVING:
+        return 3
+    # action == RailEnvActions.DO_NOTHING:
+    return 3
--- a/utils/agent_can_choose_helper.py
+++ b/utils/agent_can_choose_helper.py
+from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.rail_env import fast_count_nonzero
+
+
+class AgentCanChooseHelper:
+    def __init__(self):
+        pass
+
+    def build_data(self, env):
+        self.env = env
+        if self.env is not None:
+            self.env.dev_obs_dict = {}
+        self.switches = {}
+        self.switches_neighbours = {}
+        if self.env is not None:
+            self.find_all_cell_where_agent_can_choose()
+
+    def find_all_switches(self):
+        # Search the environment (rail grid) for all switch cells. A switch is a cell where more than one tranisation
+        # exists and collect all direction where the switch is a switch.
+        self.switches = {}
+        for h in range(self.env.height):
+            for w in range(self.env.width):
+                pos = (h, w)
+                for dir in range(4):
+                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
+                    num_transitions = fast_count_nonzero(possible_transitions)
+                    if num_transitions > 1:
+                        if pos not in self.switches.keys():
+                            self.switches.update({pos: [dir]})
+                        else:
+                            self.switches[pos].append(dir)
+
+    def find_all_switch_neighbours(self):
+        # Collect all cells where is a neighbour to a switch cell. All cells are neighbour where the agent can make
+        # just one step and he stands on a switch. A switch is a cell where the agents has more than one transition.
+        self.switches_neighbours = {}
+        for h in range(self.env.height):
+            for w in range(self.env.width):
+                # look one step forward
+                for dir in range(4):
+                    pos = (h, w)
+                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
+                    for d in range(4):
+                        if possible_transitions[d] == 1:
+                            new_cell = get_new_position(pos, d)
+                            if new_cell in self.switches.keys() and pos not in self.switches.keys():
+                                if pos not in self.switches_neighbours.keys():
+                                    self.switches_neighbours.update({pos: [dir]})
+                                else:
+                                    self.switches_neighbours[pos].append(dir)
+
+    def find_all_cell_where_agent_can_choose(self):
+        # prepare the memory - collect all cells where the agent can choose more than FORWARD/STOP.
+        self.find_all_switches()
+        self.find_all_switch_neighbours()
+
+    def check_agent_decision(self, position, direction):
+        # Decide whether the agent is
+        # - on a switch
+        # - at a switch neighbour (near to switch). The switch must be a switch where the agent has more option than
+        #   FORWARD/STOP
+        # - all switch : doesn't matter whether the agent has more options than FORWARD/STOP
+        # - all switch neightbors : doesn't matter the agent has more then one options (transistion) when he reach the
+        #   switch
+        agents_on_switch = False
+        agents_on_switch_all = False
+        agents_near_to_switch = False
+        agents_near_to_switch_all = False
+        if position in self.switches.keys():
+            agents_on_switch = direction in self.switches[position]
+            agents_on_switch_all = True
+
+        if position in self.switches_neighbours.keys():
+            new_cell = get_new_position(position, direction)
+            if new_cell in self.switches.keys():
+                if not direction in self.switches[new_cell]:
+                    agents_near_to_switch = direction in self.switches_neighbours[position]
+            else:
+                agents_near_to_switch = direction in self.switches_neighbours[position]
+
+            agents_near_to_switch_all = direction in self.switches_neighbours[position]
+
+        return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
+
+    def required_agent_decision(self):
+        agents_can_choose = {}
+        agents_on_switch = {}
+        agents_on_switch_all = {}
+        agents_near_to_switch = {}
+        agents_near_to_switch_all = {}
+        for a in range(self.env.get_num_agents()):
+            ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all, ret_agents_on_switch_all = \
+                self.check_agent_decision(
+                    self.env.agents[a].position,
+                    self.env.agents[a].direction)
+            agents_on_switch.update({a: ret_agents_on_switch})
+            agents_on_switch_all.update({a: ret_agents_on_switch_all})
+            ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
+            agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
+
+            agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
+
+            agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
+
+        return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
--- a/utils/dead_lock_avoidance_agent.py
+++ b/utils/dead_lock_avoidance_agent.py
-from typing import Optional, List
-
-import matplotlib.pyplot as plt
-import numpy as np
-from flatland.core.env_observation_builder import DummyObservationBuilder
-from flatland.envs.agent_utils import RailAgentStatus
-from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero
-
-from reinforcement_learning.policy import Policy
-from utils.shortest_distance_walker import ShortestDistanceWalker
-
-
-class DeadlockAvoidanceObservation(DummyObservationBuilder):
-    def __init__(self):
-        self.counter = 0
-
-    def get_many(self, handles: Optional[List[int]] = None) -> bool:
-        self.counter += 1
-        obs = np.ones(len(handles), 2)
-        for handle in handles:
-            obs[handle][0] = handle
-            obs[handle][1] = self.counter
-        return obs
-
-
-class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
-    def __init__(self, env: RailEnv, agent_positions, switches):
-        super().__init__(env)
-        self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
-                                                     self.env.height,
-                                                     self.env.width),
-                                                    dtype=int) - 1
-
-        self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
-                                                          self.env.height,
-                                                          self.env.width),
-                                                         dtype=int) - 1
-
-        self.agent_positions = agent_positions
-
-        self.opp_agent_map = {}
-        self.same_agent_map = {}
-        self.switches = switches
-
-    def getData(self):
-        return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
-
-    def callback(self, handle, agent, position, direction, action, possible_transitions):
-        opp_a = self.agent_positions[position]
-        if opp_a != -1 and opp_a != handle:
-            if self.env.agents[opp_a].direction != direction:
-                d = self.opp_agent_map.get(handle, [])
-                if opp_a not in d:
-                    d.append(opp_a)
-                self.opp_agent_map.update({handle: d})
-            else:
-                if len(self.opp_agent_map.get(handle, [])) == 0:
-                    d = self.same_agent_map.get(handle, [])
-                    if opp_a not in d:
-                        d.append(opp_a)
-                    self.same_agent_map.update({handle: d})
-
-        if len(self.opp_agent_map.get(handle, [])) == 0:
-            if self.switches.get(position, None) is None:
-                self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
-        self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
-
-
-class DeadLockAvoidanceAgent(Policy):
-    def __init__(self, env: RailEnv, show_debug_plot=False):
-        self.env = env
-        self.memory = None
-        self.loss = 0
-        self.agent_can_move = {}
-        self.switches = {}
-        self.show_debug_plot = show_debug_plot
-
-    def step(self, state, action, reward, next_state, done):
-        pass
-
-    def act(self, state, eps=0.):
-        # agent = self.env.agents[state[0]]
-        check = self.agent_can_move.get(state[0], None)
-        if check is None:
-            return RailEnvActions.STOP_MOVING
-        return check[3]
-
-    def reset(self):
-        self.agent_positions = None
-        self.shortest_distance_walker = None
-        self.switches = {}
-        for h in range(self.env.height):
-            for w in range(self.env.width):
-                pos = (h, w)
-                for dir in range(4):
-                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
-                    num_transitions = fast_count_nonzero(possible_transitions)
-                    if num_transitions > 1:
-                        if pos not in self.switches.keys():
-                            self.switches.update({pos: [dir]})
-                        else:
-                            self.switches[pos].append(dir)
-
-    def start_step(self):
-        self.build_agent_position_map()
-        self.shortest_distance_mapper()
-        self.extract_agent_can_move()
-
-    def end_step(self):
-        pass
-
-    def get_actions(self):
-        pass
-
-    def build_agent_position_map(self):
-        # build map with agent positions (only active agents)
-        self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
-        for handle in range(self.env.get_num_agents()):
-            agent = self.env.agents[handle]
-            if agent.status == RailAgentStatus.ACTIVE:
-                if agent.position is not None:
-                    self.agent_positions[agent.position] = handle
-
-    def shortest_distance_mapper(self):
-        self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
-                                                                                self.agent_positions,
-                                                                                self.switches)
-        for handle in range(self.env.get_num_agents()):
-            agent = self.env.agents[handle]
-            if agent.status <= RailAgentStatus.ACTIVE:
-                self.shortest_distance_walker.walk_to_target(handle)
-
-    def extract_agent_can_move(self):
-        self.agent_can_move = {}
-        shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData()
-        for handle in range(self.env.get_num_agents()):
-            agent = self.env.agents[handle]
-            if agent.status < RailAgentStatus.DONE:
-                next_step_ok = self.check_agent_can_move(shortest_distance_agent_map[handle],
-                                                         self.shortest_distance_walker.same_agent_map.get(handle, []),
-                                                         self.shortest_distance_walker.opp_agent_map.get(handle, []),
-                                                         full_shortest_distance_agent_map)
-                if next_step_ok:
-                    next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle)
-                    self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
-
-        if self.show_debug_plot:
-            a = np.floor(np.sqrt(self.env.get_num_agents()))
-            b = np.ceil(self.env.get_num_agents() / a)
-            for handle in range(self.env.get_num_agents()):
-                plt.subplot(a, b, handle + 1)
-                plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle])
-            plt.show(block=False)
-            plt.pause(0.01)
-
-    def check_agent_can_move(self,
-                             my_shortest_walking_path,
-                             same_agents,
-                             opp_agents,
-                             full_shortest_distance_agent_map):
-        agent_positions_map = (self.agent_positions > -1).astype(int)
-        delta = my_shortest_walking_path
-        next_step_ok = True
-        for opp_a in opp_agents:
-            opp = full_shortest_distance_agent_map[opp_a]
-            delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int)
-            if np.sum(delta) < (3 + len(opp_agents)):
-                next_step_ok = False
-        return next_step_ok
-
-    def save(self, filename):
-        pass
-
-    def load(self, filename):
-        pass
+from typing import Optional, List
+
+import matplotlib.pyplot as plt
+import numpy as np
+from flatland.core.env_observation_builder import DummyObservationBuilder
+from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero
+
+from reinforcement_learning.policy import HeuristicPolicy, DummyMemory
+from utils.agent_action_config import map_rail_env_action
+from utils.shortest_distance_walker import ShortestDistanceWalker
+
+
+class DeadlockAvoidanceObservation(DummyObservationBuilder):
+    def __init__(self):
+        self.counter = 0
+
+    def get_many(self, handles: Optional[List[int]] = None) -> bool:
+        self.counter += 1
+        obs = np.ones(len(handles), 2)
+        for handle in handles:
+            obs[handle][0] = handle
+            obs[handle][1] = self.counter
+        return obs
+
+
+class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
+    def __init__(self, env: RailEnv, agent_positions, switches):
+        super().__init__(env)
+        self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
+                                                     self.env.height,
+                                                     self.env.width),
+                                                    dtype=int) - 1
+
+        self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
+                                                          self.env.height,
+                                                          self.env.width),
+                                                         dtype=int) - 1
+
+        self.agent_positions = agent_positions
+
+        self.opp_agent_map = {}
+        self.same_agent_map = {}
+        self.switches = switches
+
+    def getData(self):
+        return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
+
+    def callback(self, handle, agent, position, direction, action, possible_transitions):
+        opp_a = self.agent_positions[position]
+        if opp_a != -1 and opp_a != handle:
+            if self.env.agents[opp_a].direction != direction:
+                d = self.opp_agent_map.get(handle, [])
+                if opp_a not in d:
+                    d.append(opp_a)
+                self.opp_agent_map.update({handle: d})
+            else:
+                if len(self.opp_agent_map.get(handle, [])) == 0:
+                    d = self.same_agent_map.get(handle, [])
+                    if opp_a not in d:
+                        d.append(opp_a)
+                    self.same_agent_map.update({handle: d})
+
+        if len(self.opp_agent_map.get(handle, [])) == 0:
+            if self.switches.get(position, None) is None:
+                self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
+        self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
+
+class DeadLockAvoidanceAgent(HeuristicPolicy):
+    def __init__(self, env: RailEnv, action_size, enable_eps=False, show_debug_plot=False):
+        print(">> DeadLockAvoidance")
+        self.env = env
+        self.memory = DummyMemory()
+        self.loss = 0
+        self.action_size = action_size
+        self.agent_can_move = {}
+        self.agent_can_move_value = {}
+        self.switches = {}
+        self.show_debug_plot = show_debug_plot
+        self.enable_eps = enable_eps
+
+    def step(self, handle, state, action, reward, next_state, done):
+        pass
+
+    def act(self, handle, state, eps=0.):
+        # Epsilon-greedy action selection
+        if self.enable_eps:
+            if np.random.random() < eps:
+                return np.random.choice(np.arange(self.action_size))
+
+        # agent = self.env.agents[state[0]]
+        check = self.agent_can_move.get(handle, None)
+        act = RailEnvActions.STOP_MOVING
+        if check is not None:
+            act = check[3]
+        return map_rail_env_action(act)
+
+    def get_agent_can_move_value(self, handle):
+        return self.agent_can_move_value.get(handle, np.inf)
+
+    def reset(self, env):
+        self.env = env
+        self.agent_positions = None
+        self.shortest_distance_walker = None
+        self.switches = {}
+        for h in range(self.env.height):
+            for w in range(self.env.width):
+                pos = (h, w)
+                for dir in range(4):
+                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
+                    num_transitions = fast_count_nonzero(possible_transitions)
+                    if num_transitions > 1:
+                        if pos not in self.switches.keys():
+                            self.switches.update({pos: [dir]})
+                        else:
+                            self.switches[pos].append(dir)
+
+    def start_step(self, train):
+        self.build_agent_position_map()
+        self.shortest_distance_mapper()
+        self.extract_agent_can_move()
+
+    def end_step(self, train):
+        pass
+
+    def get_actions(self):
+        pass
+
+    def build_agent_position_map(self):
+        # build map with agent positions (only active agents)
+        self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
+        for handle in range(self.env.get_num_agents()):
+            agent = self.env.agents[handle]
+            if agent.status == RailAgentStatus.ACTIVE:
+                if agent.position is not None:
+                    self.agent_positions[agent.position] = handle
+
+    def shortest_distance_mapper(self):
+        self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
+                                                                                self.agent_positions,
+                                                                                self.switches)
+        for handle in range(self.env.get_num_agents()):
+            agent = self.env.agents[handle]
+            if agent.status <= RailAgentStatus.ACTIVE:
+                self.shortest_distance_walker.walk_to_target(handle)
+
+    def extract_agent_can_move(self):
+        self.agent_can_move = {}
+        shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData()
+        for handle in range(self.env.get_num_agents()):
+            agent = self.env.agents[handle]
+            if agent.status < RailAgentStatus.DONE:
+                next_step_ok = self.check_agent_can_move(handle,
+                                                         shortest_distance_agent_map[handle],
+                                                         self.shortest_distance_walker.same_agent_map.get(handle, []),
+                                                         self.shortest_distance_walker.opp_agent_map.get(handle, []),
+                                                         full_shortest_distance_agent_map)
+                if next_step_ok:
+                    next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle)
+                    self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
+
+        if self.show_debug_plot:
+            a = np.floor(np.sqrt(self.env.get_num_agents()))
+            b = np.ceil(self.env.get_num_agents() / a)
+            for handle in range(self.env.get_num_agents()):
+                plt.subplot(a, b, handle + 1)
+                plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle])
+            plt.show(block=False)
+            plt.pause(0.01)
+
+    def check_agent_can_move(self,
+                             handle,
+                             my_shortest_walking_path,
+                             same_agents,
+                             opp_agents,
+                             full_shortest_distance_agent_map):
+        agent_positions_map = (self.agent_positions > -1).astype(int)
+        delta = my_shortest_walking_path
+        next_step_ok = True
+        for opp_a in opp_agents:
+            opp = full_shortest_distance_agent_map[opp_a]
+            delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int)
+            if np.sum(delta) < (3 + len(opp_agents)):
+                next_step_ok = False
+            v = self.agent_can_move_value.get(handle, np.inf)
+            v = min(v, np.sum(delta))
+            self.agent_can_move_value.update({handle: v})
+        return next_step_ok
+
+    def save(self, filename):
+        pass
+
+    def load(self, filename):
+        pass
--- a/utils/deadlock_check.py
+++ b/utils/deadlock_check.py
+import numpy as np
+
 from flatland.core.grid.grid4_utils import get_new_position
 from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.rail_env import fast_count_nonzero
+
+
+def get_agent_positions(env):
+    agent_positions: np.ndarray = np.full((env.height, env.width), -1)
+    for agent_handle in env.get_agent_handles():
+        agent = env.agents[agent_handle]
+        if agent.status == RailAgentStatus.ACTIVE:
+            position = agent.position
+            if position is None:
+                position = agent.initial_position
+            agent_positions[position] = agent_handle
+    return agent_positions
+
+
+def get_agent_targets(env):
+    agent_targets = []
+    for agent_handle in env.get_agent_handles():
+        agent = env.agents[agent_handle]
+        if agent.status == RailAgentStatus.ACTIVE:
+            agent_targets.append(agent.target)
+    return agent_targets
+
+
+def check_for_deadlock(handle, env, agent_positions, check_position=None, check_direction=None):
+    agent = env.agents[handle]
+    if agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED:
+        return False
+
+    position = agent.position
+    if position is None:
+        position = agent.initial_position
+    if check_position is not None:
+        position = check_position
+    direction = agent.direction
+    if check_direction is not None:
+        direction = check_direction
+
+    possible_transitions = env.rail.get_transitions(*position, direction)
+    num_transitions = fast_count_nonzero(possible_transitions)
+    for dir_loop in range(4):
+        if possible_transitions[dir_loop] == 1:
+            new_position = get_new_position(position, dir_loop)
+            opposite_agent = agent_positions[new_position]
+            if opposite_agent != handle and opposite_agent != -1:
+                num_transitions -= 1
+            else:
+                return False
+
+    is_deadlock = num_transitions <= 0
+    return is_deadlock


 def check_if_all_blocked(env):

--- a/utils/fast_tree_obs.py
+++ b/utils/fast_tree_obs.py
No results found