From 15801ce106228121f6f7a590460e0787d086513a Mon Sep 17 00:00:00 2001 From: Adrian Egli <adrian.egli@sbb.ch> Date: Thu, 8 Oct 2020 16:25:27 +0200 Subject: [PATCH] other testing --- aicrowd.json | 2 +- run.py | 11 +- src/extra.py | 808 +++++++++--------- src/simple/ClassifyProblemInstance.py | 94 ++ src/simple/DeadLock_Avoidance.py | 574 +++++++++++++ src/simple/ShortestPathPredictorForRailEnv.py | 107 +++ 6 files changed, 1190 insertions(+), 406 deletions(-) create mode 100644 src/simple/ClassifyProblemInstance.py create mode 100644 src/simple/DeadLock_Avoidance.py create mode 100644 src/simple/ShortestPathPredictorForRailEnv.py diff --git a/aicrowd.json b/aicrowd.json index de611d3..976e3fd 100644 --- a/aicrowd.json +++ b/aicrowd.json @@ -2,6 +2,6 @@ "challenge_id": "neurips-2020-flatland-challenge", "grader_id": "neurips-2020-flatland-challenge", "debug": false, - "tags": ["RL"] + "tags": ["other"] } diff --git a/run.py b/run.py index a2f071c..5cd92d3 100644 --- a/run.py +++ b/run.py @@ -4,10 +4,12 @@ import numpy as np from flatland.envs.agent_utils import RailAgentStatus from flatland.evaluators.client import FlatlandRemoteClient + ##################################################################### # Instantiate a Remote Client ##################################################################### from src.extra import Extra +from src.simple.DeadLock_Avoidance import calculate_one_step_heuristics, calculate_one_step_package_implementation,calculate_one_step,calculate_one_step_primitive_implementation remote_client = FlatlandRemoteClient() @@ -19,9 +21,16 @@ remote_client = FlatlandRemoteClient() # compute the necessary action for this step for all (or even some) # of the agents ##################################################################### -def my_controller(extra: Extra, observation, info): +def my_controller_RL(extra: Extra, observation, info): return extra.rl_agent_act(observation, info) +def my_controller(local_env, obs, number_of_agents): + _action, _ = calculate_one_step(extra.env) + # _action, _ = calculate_one_step_package_implementation(local_env) + # _action, _ = calculate_one_step_primitive_implementation(local_env) + # _action, _ = calculate_one_step_heuristics(local_env) + return _action + ##################################################################### # Instantiate your custom Observation Builder diff --git a/src/extra.py b/src/extra.py index 312cebb..b70830f 100644 --- a/src/extra.py +++ b/src/extra.py @@ -1,404 +1,404 @@ -# -# Author Adrian Egli -# -# This observation solves the FLATland challenge ROUND 1 - with agent's done 19.3% -# -# Training: -# For the training of the PPO RL agent I showed 10k episodes - The episodes used for the training -# consists of 1..20 agents on a 50x50 grid. Thus the RL agent has to learn to handle 1 upto 20 agents. -# -# - https://github.com/mitchellgoffpc/flatland-training -# ./adrian_egli_ppo_training_done.png -# -# The key idea behind this observation is that agent's can not freely choose where they want. -# -# ./images/adrian_egli_decisions.png -# ./images/adrian_egli_info.png -# ./images/adrian_egli_start.png -# ./images/adrian_egli_target.png -# -# Private submission -# http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8 - -import numpy as np -from flatland.core.env_observation_builder import ObservationBuilder -from flatland.core.grid.grid4_utils import get_new_position -from flatland.envs.agent_utils import RailAgentStatus -from flatland.envs.rail_env import RailEnvActions - -from src.ppo.agent import Agent - - -# ------------------------------------- USE FAST_METHOD from FLATland master ------------------------------------------ -# Adrian Egli performance fix (the fast methods brings more than 50%) - -def fast_isclose(a, b, rtol): - return (a < (b + rtol)) or (a < (b - rtol)) - - -def fast_clip(position: (int, int), min_value: (int, int), max_value: (int, int)) -> bool: - return ( - max(min_value[0], min(position[0], max_value[0])), - max(min_value[1], min(position[1], max_value[1])) - ) - - -def fast_argmax(possible_transitions: (int, int, int, int)) -> bool: - if possible_transitions[0] == 1: - return 0 - if possible_transitions[1] == 1: - return 1 - if possible_transitions[2] == 1: - return 2 - return 3 - - -def fast_position_equal(pos_1: (int, int), pos_2: (int, int)) -> bool: - return pos_1[0] == pos_2[0] and pos_1[1] == pos_2[1] - - -def fast_count_nonzero(possible_transitions: (int, int, int, int)): - return possible_transitions[0] + possible_transitions[1] + possible_transitions[2] + possible_transitions[3] - - -# ------------------------------- END - USE FAST_METHOD from FLATland master ------------------------------------------ - -class Extra(ObservationBuilder): - - def __init__(self, max_depth): - self.max_depth = max_depth - self.observation_dim = 26 - self.agent = None - self.random_agent_starter = [] - - def build_data(self): - if self.env is not None: - self.env.dev_obs_dict = {} - self.switches = {} - self.switches_neighbours = {} - self.debug_render_list = [] - self.debug_render_path_list = [] - if self.env is not None: - self.find_all_cell_where_agent_can_choose() - - def find_all_cell_where_agent_can_choose(self): - - switches = {} - for h in range(self.env.height): - for w in range(self.env.width): - pos = (h, w) - for dir in range(4): - possible_transitions = self.env.rail.get_transitions(*pos, dir) - num_transitions = fast_count_nonzero(possible_transitions) - if num_transitions > 1: - if pos not in switches.keys(): - switches.update({pos: [dir]}) - else: - switches[pos].append(dir) - - switches_neighbours = {} - for h in range(self.env.height): - for w in range(self.env.width): - # look one step forward - for dir in range(4): - pos = (h, w) - possible_transitions = self.env.rail.get_transitions(*pos, dir) - for d in range(4): - if possible_transitions[d] == 1: - new_cell = get_new_position(pos, d) - if new_cell in switches.keys() and pos not in switches.keys(): - if pos not in switches_neighbours.keys(): - switches_neighbours.update({pos: [dir]}) - else: - switches_neighbours[pos].append(dir) - - self.switches = switches - self.switches_neighbours = switches_neighbours - - def check_agent_descision(self, position, direction): - switches = self.switches - switches_neighbours = self.switches_neighbours - agents_on_switch = False - agents_near_to_switch = False - agents_near_to_switch_all = False - if position in switches.keys(): - agents_on_switch = direction in switches[position] - - if position in switches_neighbours.keys(): - new_cell = get_new_position(position, direction) - if new_cell in switches.keys(): - if not direction in switches[new_cell]: - agents_near_to_switch = direction in switches_neighbours[position] - else: - agents_near_to_switch = direction in switches_neighbours[position] - - agents_near_to_switch_all = direction in switches_neighbours[position] - - return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all - - def required_agent_descision(self): - agents_can_choose = {} - agents_on_switch = {} - agents_near_to_switch = {} - agents_near_to_switch_all = {} - for a in range(self.env.get_num_agents()): - ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all = \ - self.check_agent_descision( - self.env.agents[a].position, - self.env.agents[a].direction) - agents_on_switch.update({a: ret_agents_on_switch}) - ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART - agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)}) - - agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]}) - - agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)}) - - return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all - - def debug_render(self, env_renderer): - agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all = \ - self.required_agent_descision() - self.env.dev_obs_dict = {} - for a in range(max(3, self.env.get_num_agents())): - self.env.dev_obs_dict.update({a: []}) - - selected_agent = None - if agents_can_choose[0]: - if self.env.agents[0].position is not None: - self.debug_render_list.append(self.env.agents[0].position) - else: - self.debug_render_list.append(self.env.agents[0].initial_position) - - if self.env.agents[0].position is not None: - self.debug_render_path_list.append(self.env.agents[0].position) - else: - self.debug_render_path_list.append(self.env.agents[0].initial_position) - - env_renderer.gl.agent_colors[0] = env_renderer.gl.rgb_s2i("FF0000") - env_renderer.gl.agent_colors[1] = env_renderer.gl.rgb_s2i("666600") - env_renderer.gl.agent_colors[2] = env_renderer.gl.rgb_s2i("006666") - env_renderer.gl.agent_colors[3] = env_renderer.gl.rgb_s2i("550000") - - self.env.dev_obs_dict[0] = self.debug_render_list - self.env.dev_obs_dict[1] = self.switches.keys() - self.env.dev_obs_dict[2] = self.switches_neighbours.keys() - self.env.dev_obs_dict[3] = self.debug_render_path_list - - def normalize_observation(self, obsData): - return obsData - - def is_collision(self, obsData): - return False - - def reset(self): - self.build_data() - return - - def fast_argmax(self, array): - if array[0] == 1: - return 0 - if array[1] == 1: - return 1 - if array[2] == 1: - return 2 - return 3 - - def _explore(self, handle, new_position, new_direction, depth=0): - has_opp_agent = 0 - has_same_agent = 0 - visited = [] - - # stop exploring (max_depth reached) - if depth >= self.max_depth: - return has_opp_agent, has_same_agent, visited - - # max_explore_steps = 100 - cnt = 0 - while cnt < 100: - cnt += 1 - - visited.append(new_position) - opp_a = self.env.agent_positions[new_position] - if opp_a != -1 and opp_a != handle: - if self.env.agents[opp_a].direction != new_direction: - # opp agent found - has_opp_agent = 1 - return has_opp_agent, has_same_agent, visited - else: - has_same_agent = 1 - return has_opp_agent, has_same_agent, visited - - # convert one-hot encoding to 0,1,2,3 - possible_transitions = self.env.rail.get_transitions(*new_position, new_direction) - agents_on_switch, \ - agents_near_to_switch, \ - agents_near_to_switch_all = \ - self.check_agent_descision(new_position, new_direction) - if agents_near_to_switch: - return has_opp_agent, has_same_agent, visited - - if agents_on_switch: - for dir_loop in range(4): - if possible_transitions[dir_loop] == 1: - hoa, hsa, v = self._explore(handle, - get_new_position(new_position, dir_loop), - dir_loop, - depth + 1) - visited.append(v) - has_opp_agent = 0.5 * (has_opp_agent + hoa) - has_same_agent = 0.5 * (has_same_agent + hsa) - return has_opp_agent, has_same_agent, visited - else: - new_direction = fast_argmax(possible_transitions) - new_position = get_new_position(new_position, new_direction) - return has_opp_agent, has_same_agent, visited - - def get(self, handle): - # all values are [0,1] - # observation[0] : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path - # observation[1] : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path - # observation[2] : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path - # observation[3] : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path - # observation[4] : int(agent.status == RailAgentStatus.READY_TO_DEPART) - # observation[5] : int(agent.status == RailAgentStatus.ACTIVE) - # observation[6] : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED) - # observation[7] : current agent is located at a switch, where it can take a routing decision - # observation[8] : current agent is located at a cell, where it has to take a stop-or-go decision - # observation[9] : current agent is located one step before/after a switch - # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0) - # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1) - # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2) - # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3) - # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1 - # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1 - # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1 - # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1 - # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1 - # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1 - # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1 - # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1 - - observation = np.zeros(self.observation_dim) - visited = [] - agent = self.env.agents[handle] - - agent_done = False - if agent.status == RailAgentStatus.READY_TO_DEPART: - agent_virtual_position = agent.initial_position - observation[4] = 1 - elif agent.status == RailAgentStatus.ACTIVE: - agent_virtual_position = agent.position - observation[5] = 1 - else: - observation[6] = 1 - agent_virtual_position = (-1, -1) - agent_done = True - - if not agent_done: - visited.append(agent_virtual_position) - distance_map = self.env.distance_map.get() - current_cell_dist = distance_map[handle, - agent_virtual_position[0], agent_virtual_position[1], - agent.direction] - possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction) - orientation = agent.direction - if fast_count_nonzero(possible_transitions) == 1: - orientation = np.argmax(possible_transitions) - - for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]): - if possible_transitions[branch_direction]: - new_position = get_new_position(agent_virtual_position, branch_direction) - - new_cell_dist = distance_map[handle, - new_position[0], new_position[1], - branch_direction] - if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)): - observation[dir_loop] = int(new_cell_dist < current_cell_dist) - - has_opp_agent, has_same_agent, v = self._explore(handle, new_position, branch_direction) - visited.append(v) - - observation[10 + dir_loop] = 1 - observation[14 + dir_loop] = has_opp_agent - observation[18 + dir_loop] = has_same_agent - - opp_a = self.env.agent_positions[new_position] - if opp_a != -1 and opp_a != handle: - observation[22 + dir_loop] = 1 - - agents_on_switch, \ - agents_near_to_switch, \ - agents_near_to_switch_all = \ - self.check_agent_descision(agent_virtual_position, agent.direction) - observation[7] = int(agents_on_switch) - observation[8] = int(agents_near_to_switch) - observation[9] = int(agents_near_to_switch_all) - - self.env.dev_obs_dict.update({handle: visited}) - - return observation - - def rl_agent_act_ADRIAN(self, observation, info, eps=0.0): - self.loadAgent() - action_dict = {} - for a in range(self.env.get_num_agents()): - if info['action_required'][a]: - action_dict[a] = self.agent.act(observation[a], eps=eps) - # action_dict[a] = np.random.randint(5) - else: - action_dict[a] = RailEnvActions.DO_NOTHING - - return action_dict - - def rl_agent_act(self, observation, info, eps=0.0): - if len(self.random_agent_starter) != self.env.get_num_agents(): - self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0 - self.loadAgent() - - action_dict = {} - for a in range(self.env.get_num_agents()): - if self.random_agent_starter[a] > self.env._elapsed_steps: - action_dict[a] = RailEnvActions.STOP_MOVING - elif info['action_required'][a]: - action_dict[a] = self.agent.act(observation[a], eps=eps) - # action_dict[a] = np.random.randint(5) - else: - action_dict[a] = RailEnvActions.DO_NOTHING - - return action_dict - - def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0): - self.loadAgent() - action_dict = {} - active_cnt = 0 - for a in range(self.env.get_num_agents()): - if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE: - if observation[a][6] == 1: - active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE) - action_dict[a] = RailEnvActions.STOP_MOVING - else: - active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE) - if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \ - (self.env.agents[a].status < RailAgentStatus.ACTIVE): - if info['action_required'][a]: - action_dict[a] = self.agent.act(observation[a], eps=eps) - # action_dict[a] = np.random.randint(5) - else: - action_dict[a] = RailEnvActions.MOVE_FORWARD - else: - action_dict[a] = RailEnvActions.MOVE_FORWARD - else: - action_dict[a] = RailEnvActions.STOP_MOVING - - return action_dict - - def loadAgent(self): - if self.agent is not None: - return - self.state_size = self.env.obs_builder.observation_dim - self.action_size = 5 - print("action_size: ", self.action_size) - print("state_size: ", self.state_size) - self.agent = Agent(self.state_size, self.action_size, 0) - self.agent.load('./checkpoints/', 0, 1.0) +# +# Author Adrian Egli +# +# This observation solves the FLATland challenge ROUND 1 - with agent's done 19.3% +# +# Training: +# For the training of the PPO RL agent I showed 10k episodes - The episodes used for the training +# consists of 1..20 agents on a 50x50 grid. Thus the RL agent has to learn to handle 1 upto 20 agents. +# +# - https://github.com/mitchellgoffpc/flatland-training +# ./adrian_egli_ppo_training_done.png +# +# The key idea behind this observation is that agent's can not freely choose where they want. +# +# ./images/adrian_egli_decisions.png +# ./images/adrian_egli_info.png +# ./images/adrian_egli_start.png +# ./images/adrian_egli_target.png +# +# Private submission +# http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8 + +import numpy as np +from flatland.core.env_observation_builder import ObservationBuilder +from flatland.core.grid.grid4_utils import get_new_position +from flatland.envs.agent_utils import RailAgentStatus +from flatland.envs.rail_env import RailEnvActions + +from src.ppo.agent import Agent + + +# ------------------------------------- USE FAST_METHOD from FLATland master ------------------------------------------ +# Adrian Egli performance fix (the fast methods brings more than 50%) + +def fast_isclose(a, b, rtol): + return (a < (b + rtol)) or (a < (b - rtol)) + + +def fast_clip(position: (int, int), min_value: (int, int), max_value: (int, int)) -> bool: + return ( + max(min_value[0], min(position[0], max_value[0])), + max(min_value[1], min(position[1], max_value[1])) + ) + + +def fast_argmax(possible_transitions: (int, int, int, int)) -> bool: + if possible_transitions[0] == 1: + return 0 + if possible_transitions[1] == 1: + return 1 + if possible_transitions[2] == 1: + return 2 + return 3 + + +def fast_position_equal(pos_1: (int, int), pos_2: (int, int)) -> bool: + return pos_1[0] == pos_2[0] and pos_1[1] == pos_2[1] + + +def fast_count_nonzero(possible_transitions: (int, int, int, int)): + return possible_transitions[0] + possible_transitions[1] + possible_transitions[2] + possible_transitions[3] + + +# ------------------------------- END - USE FAST_METHOD from FLATland master ------------------------------------------ + +class Extra(ObservationBuilder): + + def __init__(self, max_depth): + self.max_depth = max_depth + self.observation_dim = 26 + self.agent = None + self.random_agent_starter = [] + + def build_data(self): + if self.env is not None: + self.env.dev_obs_dict = {} + self.switches = {} + self.switches_neighbours = {} + self.debug_render_list = [] + self.debug_render_path_list = [] + if self.env is not None: + self.find_all_cell_where_agent_can_choose() + + def find_all_cell_where_agent_can_choose(self): + + switches = {} + for h in range(self.env.height): + for w in range(self.env.width): + pos = (h, w) + for dir in range(4): + possible_transitions = self.env.rail.get_transitions(*pos, dir) + num_transitions = fast_count_nonzero(possible_transitions) + if num_transitions > 1: + if pos not in switches.keys(): + switches.update({pos: [dir]}) + else: + switches[pos].append(dir) + + switches_neighbours = {} + for h in range(self.env.height): + for w in range(self.env.width): + # look one step forward + for dir in range(4): + pos = (h, w) + possible_transitions = self.env.rail.get_transitions(*pos, dir) + for d in range(4): + if possible_transitions[d] == 1: + new_cell = get_new_position(pos, d) + if new_cell in switches.keys() and pos not in switches.keys(): + if pos not in switches_neighbours.keys(): + switches_neighbours.update({pos: [dir]}) + else: + switches_neighbours[pos].append(dir) + + self.switches = switches + self.switches_neighbours = switches_neighbours + + def check_agent_descision(self, position, direction): + switches = self.switches + switches_neighbours = self.switches_neighbours + agents_on_switch = False + agents_near_to_switch = False + agents_near_to_switch_all = False + if position in switches.keys(): + agents_on_switch = direction in switches[position] + + if position in switches_neighbours.keys(): + new_cell = get_new_position(position, direction) + if new_cell in switches.keys(): + if not direction in switches[new_cell]: + agents_near_to_switch = direction in switches_neighbours[position] + else: + agents_near_to_switch = direction in switches_neighbours[position] + + agents_near_to_switch_all = direction in switches_neighbours[position] + + return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all + + def required_agent_descision(self): + agents_can_choose = {} + agents_on_switch = {} + agents_near_to_switch = {} + agents_near_to_switch_all = {} + for a in range(self.env.get_num_agents()): + ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all = \ + self.check_agent_descision( + self.env.agents[a].position, + self.env.agents[a].direction) + agents_on_switch.update({a: ret_agents_on_switch}) + ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART + agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)}) + + agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]}) + + agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)}) + + return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all + + def debug_render(self, env_renderer): + agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all = \ + self.required_agent_descision() + self.env.dev_obs_dict = {} + for a in range(max(3, self.env.get_num_agents())): + self.env.dev_obs_dict.update({a: []}) + + selected_agent = None + if agents_can_choose[0]: + if self.env.agents[0].position is not None: + self.debug_render_list.append(self.env.agents[0].position) + else: + self.debug_render_list.append(self.env.agents[0].initial_position) + + if self.env.agents[0].position is not None: + self.debug_render_path_list.append(self.env.agents[0].position) + else: + self.debug_render_path_list.append(self.env.agents[0].initial_position) + + env_renderer.gl.agent_colors[0] = env_renderer.gl.rgb_s2i("FF0000") + env_renderer.gl.agent_colors[1] = env_renderer.gl.rgb_s2i("666600") + env_renderer.gl.agent_colors[2] = env_renderer.gl.rgb_s2i("006666") + env_renderer.gl.agent_colors[3] = env_renderer.gl.rgb_s2i("550000") + + self.env.dev_obs_dict[0] = self.debug_render_list + self.env.dev_obs_dict[1] = self.switches.keys() + self.env.dev_obs_dict[2] = self.switches_neighbours.keys() + self.env.dev_obs_dict[3] = self.debug_render_path_list + + def normalize_observation(self, obsData): + return obsData + + def is_collision(self, obsData): + return False + + def reset(self): + self.build_data() + return + + def fast_argmax(self, array): + if array[0] == 1: + return 0 + if array[1] == 1: + return 1 + if array[2] == 1: + return 2 + return 3 + + def _explore(self, handle, new_position, new_direction, depth=0): + has_opp_agent = 0 + has_same_agent = 0 + visited = [] + + # stop exploring (max_depth reached) + if depth >= self.max_depth: + return has_opp_agent, has_same_agent, visited + + # max_explore_steps = 100 + cnt = 0 + while cnt < 100: + cnt += 1 + + visited.append(new_position) + opp_a = self.env.agent_positions[new_position] + if opp_a != -1 and opp_a != handle: + if self.env.agents[opp_a].direction != new_direction: + # opp agent found + has_opp_agent = 1 + return has_opp_agent, has_same_agent, visited + else: + has_same_agent = 1 + return has_opp_agent, has_same_agent, visited + + # convert one-hot encoding to 0,1,2,3 + possible_transitions = self.env.rail.get_transitions(*new_position, new_direction) + agents_on_switch, \ + agents_near_to_switch, \ + agents_near_to_switch_all = \ + self.check_agent_descision(new_position, new_direction) + if agents_near_to_switch: + return has_opp_agent, has_same_agent, visited + + if agents_on_switch: + for dir_loop in range(4): + if possible_transitions[dir_loop] == 1: + hoa, hsa, v = self._explore(handle, + get_new_position(new_position, dir_loop), + dir_loop, + depth + 1) + visited.append(v) + has_opp_agent = 0.5 * (has_opp_agent + hoa) + has_same_agent = 0.5 * (has_same_agent + hsa) + return has_opp_agent, has_same_agent, visited + else: + new_direction = fast_argmax(possible_transitions) + new_position = get_new_position(new_position, new_direction) + return has_opp_agent, has_same_agent, visited + + def get(self, handle): + # all values are [0,1] + # observation[0] : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path + # observation[1] : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path + # observation[2] : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path + # observation[3] : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path + # observation[4] : int(agent.status == RailAgentStatus.READY_TO_DEPART) + # observation[5] : int(agent.status == RailAgentStatus.ACTIVE) + # observation[6] : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED) + # observation[7] : current agent is located at a switch, where it can take a routing decision + # observation[8] : current agent is located at a cell, where it has to take a stop-or-go decision + # observation[9] : current agent is located one step before/after a switch + # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0) + # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1) + # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2) + # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3) + # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1 + # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1 + # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1 + # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1 + # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1 + # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1 + # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1 + # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1 + + observation = np.zeros(self.observation_dim) + visited = [] + agent = self.env.agents[handle] + + agent_done = False + if agent.status == RailAgentStatus.READY_TO_DEPART: + agent_virtual_position = agent.initial_position + observation[4] = 1 + elif agent.status == RailAgentStatus.ACTIVE: + agent_virtual_position = agent.position + observation[5] = 1 + else: + observation[6] = 1 + agent_virtual_position = (-1, -1) + agent_done = True + + if not agent_done: + visited.append(agent_virtual_position) + distance_map = self.env.distance_map.get() + current_cell_dist = distance_map[handle, + agent_virtual_position[0], agent_virtual_position[1], + agent.direction] + possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction) + orientation = agent.direction + if fast_count_nonzero(possible_transitions) == 1: + orientation = np.argmax(possible_transitions) + + for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]): + if possible_transitions[branch_direction]: + new_position = get_new_position(agent_virtual_position, branch_direction) + + new_cell_dist = distance_map[handle, + new_position[0], new_position[1], + branch_direction] + if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)): + observation[dir_loop] = int(new_cell_dist < current_cell_dist) + + has_opp_agent, has_same_agent, v = self._explore(handle, new_position, branch_direction) + visited.append(v) + + observation[10 + dir_loop] = 1 + observation[14 + dir_loop] = has_opp_agent + observation[18 + dir_loop] = has_same_agent + + opp_a = self.env.agent_positions[new_position] + if opp_a != -1 and opp_a != handle: + observation[22 + dir_loop] = 1 + + agents_on_switch, \ + agents_near_to_switch, \ + agents_near_to_switch_all = \ + self.check_agent_descision(agent_virtual_position, agent.direction) + observation[7] = int(agents_on_switch) + observation[8] = int(agents_near_to_switch) + observation[9] = int(agents_near_to_switch_all) + + self.env.dev_obs_dict.update({handle: visited}) + + return observation + + def rl_agent_act_ADRIAN(self, observation, info, eps=0.0): + self.loadAgent() + action_dict = {} + for a in range(self.env.get_num_agents()): + if info['action_required'][a]: + action_dict[a] = self.agent.act(observation[a], eps=eps) + # action_dict[a] = np.random.randint(5) + else: + action_dict[a] = RailEnvActions.DO_NOTHING + + return action_dict + + def rl_agent_act(self, observation, info, eps=0.0): + if len(self.random_agent_starter) != self.env.get_num_agents(): + self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0 + self.loadAgent() + + action_dict = {} + for a in range(self.env.get_num_agents()): + if self.random_agent_starter[a] > self.env._elapsed_steps: + action_dict[a] = RailEnvActions.STOP_MOVING + elif info['action_required'][a]: + action_dict[a] = self.agent.act(observation[a], eps=eps) + # action_dict[a] = np.random.randint(5) + else: + action_dict[a] = RailEnvActions.DO_NOTHING + + return action_dict + + def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0): + self.loadAgent() + action_dict = {} + active_cnt = 0 + for a in range(self.env.get_num_agents()): + if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE: + if observation[a][6] == 1: + active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE) + action_dict[a] = RailEnvActions.STOP_MOVING + else: + active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE) + if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \ + (self.env.agents[a].status < RailAgentStatus.ACTIVE): + if info['action_required'][a]: + action_dict[a] = self.agent.act(observation[a], eps=eps) + # action_dict[a] = np.random.randint(5) + else: + action_dict[a] = RailEnvActions.MOVE_FORWARD + else: + action_dict[a] = RailEnvActions.MOVE_FORWARD + else: + action_dict[a] = RailEnvActions.STOP_MOVING + + return action_dict + + def loadAgent(self): + if self.agent is not None: + return + self.state_size = self.env.obs_builder.observation_dim + self.action_size = 5 + print("action_size: ", self.action_size) + print("state_size: ", self.state_size) + self.agent = Agent(self.state_size, self.action_size, 0) + self.agent.load('./checkpoints/', 0, 1.0) diff --git a/src/simple/ClassifyProblemInstance.py b/src/simple/ClassifyProblemInstance.py new file mode 100644 index 0000000..cabd67e --- /dev/null +++ b/src/simple/ClassifyProblemInstance.py @@ -0,0 +1,94 @@ +from enum import IntEnum + +import numpy as np + + +class ProblemInstanceClass(IntEnum): + SHORTEST_PATH_ONLY = 0 + SHORTEST_PATH_ORDERING_PROBLEM = 1 + REQUIRE_ALTERNATIVE_PATH = 2 + + +def check_is_only_shortest_path_problem(env, project_path_matrix): + x = project_path_matrix.copy() + x[x < 2] = 0 + return np.sum(x) == 0 + + +def check_is_shortest_path_and_ordering_problem(env, project_path_matrix): + x = project_path_matrix.copy() + for a in range(env.get_num_agents()): + # loop over all path and project start position and target into the project_path_matrix + agent = env.agents[a] + if x[agent.position[0]][agent.position[1]] > 1: + return False + if x[agent.target[0]][agent.target[1]] > 1: + return False + return True + + +def check_is_require_alternative_path(env, project_path_matrix): + paths = env.dev_pred_dict + for a in range(env.get_num_agents()): + agent = env.agents[a] + path = paths[a] + for path_loop in range(len(path)): + p = path[path_loop] + if p[0] == agent.target[0] and p[1] == agent.target[1]: + break + if project_path_matrix[p[0]][p[1]] > 1: + # potential overlapping path found + for opp_a in range(env.get_num_agents()): + opp_agent = env.agents[opp_a] + opp_path = paths[opp_a] + if p[0] == opp_agent.position[0] and p[1] == opp_agent.position[1]: + opp_path_loop = 0 + tmp_path_loop = path_loop + while True: + if tmp_path_loop > len(path) - 1: + break + opp_p = opp_path[opp_path_loop] + tmp_p = path[tmp_path_loop + 1] + if opp_p[0] == opp_agent.target[0] and opp_p[1] == opp_agent.target[1]: + return True + if not (opp_p[0] == tmp_p[0] and opp_p[1] == tmp_p[1]): + break + if tmp_p[0] == agent.target[0] and tmp_p[1] == agent.target[1]: + break + opp_path_loop += 1 + tmp_path_loop += 1 + + return False + + +def classify_problem_instance(env): + # shortest path from ShortesPathPredictorForRailEnv + paths = env.dev_pred_dict + + project_path_matrix = np.zeros(shape=(env.height, env.width)) + for a in range(env.get_num_agents()): + # loop over all path and project start position and target into the project_path_matrix + agent = env.agents[a] + project_path_matrix[agent.position[0]][agent.position[1]] += 1.0 + project_path_matrix[agent.target[0]][agent.target[1]] += 1.0 + + if not (agent.target[0] == agent.position[0] and agent.target[1] == agent.position[1]): + # project the whole path into + path = paths[a] + for path_loop in range(len(path)): + p = path[path_loop] + if p[0] == agent.target[0] and p[1] == agent.target[1]: + break + else: + project_path_matrix[p[0]][p[1]] += 1.0 + + return \ + { + # analyse : SHORTEST_PATH_ONLY -> if conflict_mat does not contain any number > 1 + "SHORTEST_PATH_ONLY": check_is_only_shortest_path_problem(env, project_path_matrix), + # analyse : SHORTEST_PATH_ORDERING_PROBLEM -> if agent_start and agent_target position does not contain any number > 1 + "SHORTEST_PATH_ORDERING_PROBLEM": check_is_shortest_path_and_ordering_problem(env, project_path_matrix), + # analyse : REQUIRE_ALTERNATIVE_PATH -> if agent_start and agent_target position does not contain any number > 1 + "REQUIRE_ALTERNATIVE_PATH": check_is_require_alternative_path(env, project_path_matrix) + + } diff --git a/src/simple/DeadLock_Avoidance.py b/src/simple/DeadLock_Avoidance.py new file mode 100644 index 0000000..7e80a46 --- /dev/null +++ b/src/simple/DeadLock_Avoidance.py @@ -0,0 +1,574 @@ +import math +from typing import Dict, List, Optional, Tuple, Set +from typing import NamedTuple + +import numpy as np +from flatland.core.grid.grid4 import Grid4TransitionsEnum +from flatland.core.grid.grid4_utils import get_new_position +from flatland.core.transition_map import GridTransitionMap +from flatland.envs.agent_utils import RailAgentStatus +from flatland.envs.distance_map import DistanceMap +from flatland.envs.rail_env import RailEnvNextAction, RailEnvActions +from flatland.envs.rail_env_shortest_paths import get_shortest_paths +from flatland.utils.ordered_set import OrderedSet + +WalkingElement = NamedTuple('WalkingElement', + [('position', Tuple[int, int]), ('direction', int), + ('next_action_element', RailEnvActions)]) + + +def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum, + agent_position: Tuple[int, int], + rail: GridTransitionMap) -> Set[RailEnvNextAction]: + """ + Get the valid move actions (forward, left, right) for an agent. + + Parameters + ---------- + agent_direction : Grid4TransitionsEnum + agent_position: Tuple[int,int] + rail : GridTransitionMap + + + Returns + ------- + Set of `RailEnvNextAction` (tuples of (action,position,direction)) + Possible move actions (forward,left,right) and the next position/direction they lead to. + It is not checked that the next cell is free. + """ + valid_actions: Set[RailEnvNextAction] = OrderedSet() + possible_transitions = rail.get_transitions(*agent_position, agent_direction) + num_transitions = np.count_nonzero(possible_transitions) + # Start from the current orientation, and see which transitions are available; + # organize them as [left, forward, right], relative to the current orientation + # If only one transition is possible, the forward branch is aligned with it. + if rail.is_dead_end(agent_position): + action = RailEnvActions.MOVE_FORWARD + exit_direction = (agent_direction + 2) % 4 + if possible_transitions[exit_direction]: + new_position = get_new_position(agent_position, exit_direction) + valid_actions.add(RailEnvNextAction(action, new_position, exit_direction)) + elif num_transitions == 1: + action = RailEnvActions.MOVE_FORWARD + for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]: + if possible_transitions[new_direction]: + new_position = get_new_position(agent_position, new_direction) + valid_actions.add(RailEnvNextAction(action, new_position, new_direction)) + else: + for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]: + if possible_transitions[new_direction]: + if new_direction == agent_direction: + action = RailEnvActions.MOVE_FORWARD + elif new_direction == (agent_direction + 1) % 4: + action = RailEnvActions.MOVE_RIGHT + elif new_direction == (agent_direction - 1) % 4: + action = RailEnvActions.MOVE_LEFT + else: + raise Exception("Illegal state") + + new_position = get_new_position(agent_position, new_direction) + valid_actions.add(RailEnvNextAction(action, new_position, new_direction)) + return valid_actions + + +# N.B. get_shortest_paths is not part of distance_map since it refers to RailEnvActions (would lead to circularity!) +def get_paths(distance_map: DistanceMap, max_depth: Optional[int] = None, agent_handle: Optional[int] = None) \ + -> Dict[int, Optional[List[WalkingElement]]]: + """ + Computes the shortest path for each agent to its target and the action to be taken to do so. + The paths are derived from a `DistanceMap`. + + If there is no path (rail disconnected), the path is given as None. + The agent state (moving or not) and its speed are not taken into account + + example: + agent_fixed_travel_paths = get_shortest_paths(env.distance_map, None, agent.handle) + path = agent_fixed_travel_paths[agent.handle] + + Parameters + ---------- + distance_map : reference to the distance_map + max_depth : max path length, if the shortest path is longer, it will be cutted + agent_handle : if set, the shortest for agent.handle will be returned , otherwise for all agents + + Returns + ------- + Dict[int, Optional[List[WalkingElement]]] + + """ + shortest_paths = dict() + + def _shortest_path_for_agent(agent): + if agent.status == RailAgentStatus.READY_TO_DEPART: + position = agent.initial_position + elif agent.status == RailAgentStatus.ACTIVE: + position = agent.position + elif agent.status == RailAgentStatus.DONE: + position = agent.target + else: + shortest_paths[agent.handle] = None + return + direction = agent.direction + shortest_paths[agent.handle] = [] + distance = math.inf + depth = 0 + cnt = 0 + while (position != agent.target and (max_depth is None or depth < max_depth)) and cnt < 1000: + cnt = cnt + 1 + next_actions = get_valid_move_actions_(direction, position, distance_map.rail) + best_next_action = None + + for next_action in next_actions: + next_action_distance = distance_map.get()[ + agent.handle, next_action.next_position[0], next_action.next_position[ + 1], next_action.next_direction] + if next_action_distance < distance: + best_next_action = next_action + distance = next_action_distance + + for next_action in next_actions: + if next_action.action == RailEnvActions.MOVE_LEFT: + next_action_distance = distance_map.get()[ + agent.handle, next_action.next_position[0], next_action.next_position[ + 1], next_action.next_direction] + if abs(next_action_distance - distance) < 5: + best_next_action = next_action + distance = next_action_distance + + shortest_paths[agent.handle].append(WalkingElement(position, direction, best_next_action)) + depth += 1 + + # if there is no way to continue, the rail must be disconnected! + # (or distance map is incorrect) + if best_next_action is None: + shortest_paths[agent.handle] = None + return + + position = best_next_action.next_position + direction = best_next_action.next_direction + if max_depth is None or depth < max_depth: + shortest_paths[agent.handle].append( + WalkingElement(position, direction, + RailEnvNextAction(RailEnvActions.STOP_MOVING, position, direction))) + + if agent_handle is not None: + _shortest_path_for_agent(distance_map.agents[agent_handle]) + else: + for agent in distance_map.agents: + _shortest_path_for_agent(agent) + + return shortest_paths + + +def agent_fake_position(agent): + if agent.position is not None: + return (agent.position[0], agent.position[1], 0) + return (-agent.handle - 1, -1, None) + + +def compare_position_equal(a, b): + if a is None and b is None: + return True + if a is None or b is None: + return False + return (a[0] == b[0] and a[1] == b[1]) + + +def calc_conflict_matrix_next_step(env, paths, do_move, agent_position_matrix, agent_target_matrix, + agent_next_position_matrix): + # look step forward + conflict_mat = np.zeros(shape=(env.get_num_agents(), env.get_num_agents())) - 1 + + # calculate weighted (priority) + priority = np.arange(env.get_num_agents()).astype(float) + unique_ordered_priority = np.argsort(priority).astype(int) + + # build one-step away dead-lock matrix + for a in range(env.get_num_agents()): + agent = env.agents[a] + path = paths[a] + if path is None: + continue + + conflict_mat[a][a] = unique_ordered_priority[a] + for path_loop in range(len(path)): + p_el = path[path_loop] + p = p_el.position + if compare_position_equal(agent.target, p): + break + else: + a_loop = 0 + opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop]) + + cnt = 0 + while (opp_a > -1) and (cnt < 1000): + cnt = cnt + 1 + opp_path = paths[opp_a] + if opp_path is not None: + opp_a_p1 = opp_path[0].next_action_element.next_position + if path_loop < len(path) - 1: + p1 = path[path_loop + 1].next_action_element.next_position + if not compare_position_equal(opp_a_p1, p1): + conflict_mat[a][opp_a] = unique_ordered_priority[opp_a] + conflict_mat[opp_a][a] = unique_ordered_priority[a] + a_loop += 1 + opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop]) + + # update one-step away + for a in range(env.get_num_agents()): + if not do_move[a]: + conflict_mat[conflict_mat == unique_ordered_priority[a]] = -1 + + return conflict_mat + + +def avoid_dead_lock(env, a, paths, conflict_matrix, agent_position_matrix, agent_target_matrix, + agent_next_position_matrix): + # performance optimisation + if conflict_matrix is not None: + if np.argmax(conflict_matrix[a]) == a: + return True + + # dead lock algorithm + agent = env.agents[a] + agent_position = agent_fake_position(agent) + if compare_position_equal(agent_position, agent.target): + return True + + path = paths[a] + if path is None: + return True + + max_path_step_allowed = np.inf + # iterate over agent a's travel path (fixed path) + for path_loop in range(len(path)): + p_el = path[path_loop] + p = p_el.position + if compare_position_equal(p, agent.target): + break + + # iterate over all agents (opposite) + # for opp_a in range(env.get_num_agents()): + a_loop = 0 + opp_a = 0 + cnt = 0 + while (a_loop < env.get_num_agents() and opp_a > -1) and cnt < 1000: + cnt = cnt + 1 + if conflict_matrix is not None: + opp_a = (int)(agent_next_position_matrix[p[0]][p[1]][a_loop]) + a_loop += 1 + else: + opp_a = (int)(agent_position_matrix[p[0]][p[1]]) + a_loop = env.get_num_agents() + if opp_a > -1: + if opp_a != a: + opp_agent = env.agents[opp_a] + opp_path = paths[opp_a] + if opp_path is not None: + opp_path_0 = opp_path[0] + + # find all position in the opp.-path which are equal to current position. + # the method has to scan all path through + all_path_idx_offset_array = [0] + for opp_path_loop_itr in range(len(path)): + opp_p_el = opp_path[opp_path_loop_itr] + opp_p = opp_p_el.position + if compare_position_equal(opp_p, opp_agent.target): + break + opp_agent_position = agent_fake_position(opp_agent) + if compare_position_equal(opp_p, opp_agent_position): + all_path_idx_offset_array.extend([opp_path_loop_itr]) + opp_p_next = opp_p_el.next_action_element.next_position + if compare_position_equal(opp_p_next, opp_agent_position): + all_path_idx_offset_array.extend([opp_path_loop_itr]) + + for all_path_idx_offset_loop in range(len(all_path_idx_offset_array)): + all_path_idx_offset = all_path_idx_offset_array[all_path_idx_offset_loop] + opp_path_0_el = opp_path[all_path_idx_offset] + opp_path_0 = opp_path_0_el.position + # if check_in_details is set to -1: no dead-lock candidate found + # if check_in_details is set to 0: dead-lock candidate are not yet visible (agents need one step to become visible)(case A) + # if check_in_details is set to 1: dead-lock candidate are visible, thus we have to collect them (case B) + check_in_detail = -1 + + # check mode, if conflict_matrix is set, then we are looking .. + if conflict_matrix is not None: + # Case A + if np.argmax(conflict_matrix[a]) != a: + # avoid (parallel issue) + if compare_position_equal(opp_path_0, p): + check_in_detail = 0 + else: + # Case B + # collect all dead-lock candidates and check + opp_agent_position = agent_fake_position(opp_agent) + if compare_position_equal(opp_agent_position, p): + check_in_detail = 1 + + if check_in_detail > -1: + # print("Conflict risk found. My [", a, "] path is occupied by [", opp_a, "]") + opp_path_loop = all_path_idx_offset + back_path_loop = path_loop - check_in_detail + cnt = 0 + while (opp_path_loop < len(opp_path) and back_path_loop > -1) and cnt < 1000: + cnt = cnt + 1 + # retrieve position information + opp_p_el = opp_path[opp_path_loop] + opp_p = opp_p_el.position + me_p_el = path[back_path_loop] + me_p = me_p_el.next_action_element.next_position + + if not compare_position_equal(opp_p, me_p): + # Case 1: The opposite train travels in same direction as the current train (agent a) + # Case 2: The opposite train travels in opposite direction and the path divergent + break + + # make one step backwards (agent a) and one step forward for opposite train (agent opp_a) + # train a can no travel further than given position, because no divergent paths, this will cause a dead-lock + max_path_step_allowed = min(max_path_step_allowed, back_path_loop) + opp_path_loop += 1 + back_path_loop -= 1 + + # check whether at least one step is allowed + if max_path_step_allowed < 1: + return False + + if back_path_loop == -1: + # No divergent path found, it cause a deadlock + # print("conflict (stop): (", a, ",", opp_a, ")") + return False + + # check whether at least one step is allowed + return max_path_step_allowed > 0 + + +def calculate_one_step(env): + # can agent move array + do_move = np.zeros(env.get_num_agents()) + if True: + cnt = 0 + cnt_done = 0 + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent.status < RailAgentStatus.DONE: + cnt += 1 + if cnt < 30: + do_move[a] = True + else: + cnt_done += 1 + print("\r{}/{}\t".format(cnt_done, env.get_num_agents()), end="") + else: + agent_fixed_travel_paths = get_paths(env.distance_map, 1) + # can agent move array + do_move = np.zeros(env.get_num_agents()) + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent.position is not None and not compare_position_equal(agent.position, agent.target): + do_move[a] = True + break + + if np.sum(do_move) == 0: + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent_fixed_travel_paths[a] is not None: + if agent.position is None and compare_position_equal(agent.initial_position, agent.target): + do_move[a] = True + break + elif not compare_position_equal(agent.initial_position, agent.target): + do_move[a] = True + break + + initial_position = None + for a in range(env.get_num_agents()): + agent = env.agents[a] + if do_move[a]: + initial_position = agent.initial_position + + if initial_position is not None: + if compare_position_equal(agent.initial_position, initial_position): + do_move[a] = True + + # copy of agents fixed travel path (current path to follow) : only once : quite expensive + # agent_fixed_travel_paths = get_shortest_paths(env.distance_map) + agent_fixed_travel_paths = dict() + for a in range(env.get_num_agents()): + agent = env.agents[a] + if do_move[a]: + agent_fixed_travel_paths[agent.handle] = get_paths(env.distance_map, None, agent.handle)[agent.handle] + else: + agent_fixed_travel_paths[agent.handle] = None + + # copy position, target and next position into cache (matrices) + # (The cache idea increases the run-time performance) + agent_position_matrix = np.zeros(shape=(env.height, env.width)) - 1.0 + agent_target_matrix = np.zeros(shape=(env.height, env.width)) - 1.0 + agent_next_position_matrix = np.zeros(shape=(env.height, env.width, env.get_num_agents() + 1)) - 1.0 + for a in range(env.get_num_agents()): + if do_move[a] == False: + continue + agent = env.agents[a] + agent_position = agent_fake_position(agent) + if agent_position[2] is None: + agent_position = agent.initial_position + agent_position_matrix[agent_position[0]][agent_position[1]] = a + agent_target_matrix[agent.target[0]][agent.target[1]] = a + if not compare_position_equal(agent.target, agent_position): + path = agent_fixed_travel_paths[a] + if path is not None: + p_el = path[0] + p = p_el.position + a_loop = 0 + cnt = 0 + while (agent_next_position_matrix[p[0]][p[1]][a_loop] > -1) and cnt < 1000: + cnt = cnt + 1 + a_loop += 1 + agent_next_position_matrix[p[0]][p[1]][a_loop] = a + + # check which agents can move (see : avoid_dead_lock (case b)) + for a in range(env.get_num_agents()): + agent = env.agents[a] + if not compare_position_equal(agent.position, agent.target) and do_move[a]: + do_move[a] = avoid_dead_lock(env, a, agent_fixed_travel_paths, None, agent_position_matrix, + agent_target_matrix, + agent_next_position_matrix) + + # check which agents can move (see : avoid_dead_lock (case a)) + # calculate possible candidate for hidden one-step away dead-lock candidates + conflict_matrix = calc_conflict_matrix_next_step(env, agent_fixed_travel_paths, do_move, agent_position_matrix, + agent_target_matrix, + agent_next_position_matrix) + for a in range(env.get_num_agents()): + agent = env.agents[a] + if not compare_position_equal(agent.position, agent.target): + if do_move[a]: + do_move[a] = avoid_dead_lock(env, a, agent_fixed_travel_paths, conflict_matrix, agent_position_matrix, + agent_target_matrix, + agent_next_position_matrix) + + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent.position is not None and compare_position_equal(agent.position, agent.target): + do_move[a] = False + + # main loop (calculate actions for all agents) + action_dict = {} + is_moving_cnt = 0 + for a in range(env.get_num_agents()): + agent = env.agents[a] + action = RailEnvActions.MOVE_FORWARD + + if do_move[a] and is_moving_cnt < 10: + is_moving_cnt += 1 + # check for deadlock: + path = agent_fixed_travel_paths[a] + if path is not None: + action = path[0].next_action_element.action + else: + action = RailEnvActions.STOP_MOVING + action_dict[a] = action + + return action_dict, do_move + + +def calculate_one_step_heuristics(env): + # copy of agents fixed travel path (current path to follow) + agent_fixed_travel_paths = get_paths(env.distance_map, 1) + + # main loop (calculate actions for all agents) + action_dict = {} + for a in range(env.get_num_agents()): + agent = env.agents[a] + action = RailEnvActions.MOVE_FORWARD + + # check for deadlock: + path = agent_fixed_travel_paths[a] + if path is not None: + action = path[0].next_action_element.action + action_dict[a] = action + + return action_dict, None + + +def calculate_one_step_primitive_implementation(env): + # can agent move array + do_move = np.zeros(env.get_num_agents()) + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent.status > RailAgentStatus.ACTIVE: + continue + if (agent.status == RailAgentStatus.ACTIVE): + do_move[a] = True + break + do_move[a] = True + break + + # main loop (calculate actions for all agents) + action_dict = {} + for a in range(env.get_num_agents()): + agent = env.agents[a] + action = RailEnvActions.MOVE_FORWARD + if do_move[a]: + # check for deadlock: + # copy of agents fixed travel path (current path to follow) + agent_fixed_travel_paths = get_shortest_paths(env.distance_map, 1, agent.handle) + path = agent_fixed_travel_paths[agent.handle] + if path is not None: + print("\rAgent:{:4d}/{:<4d} ".format(a + 1, env.get_num_agents()), end=" ") + action = path[0].next_action_element.action + else: + action = RailEnvActions.STOP_MOVING + action_dict[a] = action + + return action_dict, do_move + + +def calculate_one_step_package_implementation(env): + # copy of agents fixed travel path (current path to follow) + # agent_fixed_travel_paths = get_shortest_paths(env.distance_map,1) + agent_fixed_travel_paths = get_paths(env.distance_map, 1) + + # can agent move array + do_move = np.zeros(env.get_num_agents()) + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent.position is not None and not compare_position_equal(agent.position, agent.target): + do_move[a] = True + break + + if np.sum(do_move) == 0: + for a in range(env.get_num_agents()): + agent = env.agents[a] + if agent_fixed_travel_paths[a] is not None: + if agent.position is None and compare_position_equal(agent.initial_position, agent.target): + do_move[a] = True + break + elif not compare_position_equal(agent.initial_position, agent.target): + do_move[a] = True + break + + initial_position = None + for a in range(env.get_num_agents()): + agent = env.agents[a] + if do_move[a]: + initial_position = agent.initial_position + + if initial_position is not None: + if compare_position_equal(agent.initial_position, initial_position): + do_move[a] = True + + # main loop (calculate actions for all agents) + action_dict = {} + for a in range(env.get_num_agents()): + agent = env.agents[a] + action = RailEnvActions.MOVE_FORWARD + + if do_move[a]: + # check for deadlock: + path = agent_fixed_travel_paths[a] + if path is not None: + action = path[0].next_action_element.action + else: + action = RailEnvActions.STOP_MOVING + action_dict[a] = action + + return action_dict, do_move diff --git a/src/simple/ShortestPathPredictorForRailEnv.py b/src/simple/ShortestPathPredictorForRailEnv.py new file mode 100644 index 0000000..f820253 --- /dev/null +++ b/src/simple/ShortestPathPredictorForRailEnv.py @@ -0,0 +1,107 @@ +import numpy as np + +from flatland.core.env_prediction_builder import PredictionBuilder +from flatland.core.grid.grid4_utils import get_new_position +from flatland.envs.rail_env import RailEnvActions + + +class AdrianShortestPathPredictorForRailEnv(PredictionBuilder): + """ + ShortestPathPredictorForRailEnv object. + + This object returns shortest-path predictions for agents in the RailEnv environment. + The prediction acts as if no other agent is in the environment and always takes the forward action. + """ + + def __init__(self, max_depth=20): + # Initialize with depth 20 + self.max_depth = max_depth + + def get(self, custom_args=None, handle=None): + """ + Called whenever get_many in the observation build is called. + Requires distance_map to extract the shortest path. + + Parameters + ------- + custom_args: dict + - distance_map : dict + handle : int (optional) + Handle of the agent for which to compute the observation vector. + + Returns + ------- + np.array + Returns a dictionary indexed by the agent handle and for each agent a vector of (max_depth + 1)x5 elements: + - time_offset + - position axis 0 + - position axis 1 + - direction + - action taken to come here + The prediction at 0 is the current position, direction etc. + """ + + agents = self.env.agents + if handle: + agents = [self.env.agents[handle]] + assert custom_args is not None + distance_map = custom_args.get('distance_map') + assert distance_map is not None + + prediction_dict = {} + for agent in agents: + _agent_initial_position = agent.position + _agent_initial_direction = agent.direction + prediction = np.zeros(shape=(self.max_depth + 1, 5)) + prediction[0] = [0, *_agent_initial_position, _agent_initial_direction, 0] + visited = [] + for index in range(1, self.max_depth + 1): + # if we're at the target, stop moving... + if agent.position == agent.target: + prediction[index] = [index, *agent.target, agent.direction, RailEnvActions.STOP_MOVING] + visited.append((agent.position[0], agent.position[1], agent.direction)) + continue + # Take shortest possible path + cell_transitions = self.env.rail.get_transitions(*agent.position, agent.direction) + + new_position = None + new_direction = None + if np.sum(cell_transitions) == 1: + new_direction = np.argmax(cell_transitions) + new_position = get_new_position(agent.position, new_direction) + elif np.sum(cell_transitions) > 1: + min_dist = np.inf + no_dist_found = True + for direction in range(4): + if cell_transitions[direction] == 1: + neighbour_cell = get_new_position(agent.position, direction) + target_dist = distance_map[agent.handle, neighbour_cell[0], neighbour_cell[1], direction] + if target_dist < min_dist or no_dist_found: + min_dist = target_dist + new_direction = direction + no_dist_found = False + new_position = get_new_position(agent.position, new_direction) + else: + print("--------------------") + print(agent.position, agent.direction, "valid:", self.env.rail.cell_neighbours_valid( + agent.position), + self.env.rail.get_full_transitions(agent.position[0],agent.position[1]) + ) + print("--------------------") + raise Exception("No transition possible {}".format(cell_transitions)) + + # update the agent's position and direction + agent.position = new_position + agent.direction = new_direction + + # prediction is ready + prediction[index] = [index, *new_position, new_direction, 0] + visited.append((new_position[0], new_position[1], new_direction)) + self.env.dev_pred_dict[agent.handle] = visited + prediction_dict[agent.handle] = prediction + + # cleanup: reset initial position + agent.position = _agent_initial_position + agent.direction = _agent_initial_direction + + return prediction_dict -- GitLab