Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hebe0663/neurips2020-flatland-starter-kit
  • flatland/neurips2020-flatland-starter-kit
  • manavsinghal157/marl-flatland
3 results
Show changes
Showing
with 720 additions and 487 deletions
from time import time
import numpy as np
from flatland.envs.rail_env import fast_isclose
def print_timing(label, start_time, end_time):
print("{:>10.4f}ms".format(1000 * (end_time - start_time)) + "\t" + label)
def check_isclose(nbr=100000):
s = time()
for x in range(nbr):
fast_isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("fast_isclose", start_time=s, end_time=e)
s = time()
for x in range(nbr):
np.isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("np.isclose", start_time=s, end_time=e)
if __name__ == "__main__":
check_isclose()
runs_bench/Screenshots/full.png

139 KiB

runs_bench/Screenshots/reduced.png

178 KiB

from flatland.envs.rail_env import RailEnvActions
# global action size
global _agent_action_config_action_size
_agent_action_config_action_size = 5
def get_flatland_full_action_size():
# The action space of flatland is 5 discrete actions
return 5
def set_action_size_full():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
_agent_action_config_action_size = 5
def set_action_size_reduced():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
_agent_action_config_action_size = 4
def get_action_size():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
return _agent_action_config_action_size
def map_actions(actions):
# Map the
if get_action_size() != get_flatland_full_action_size():
for key in actions:
value = actions.get(key, 0)
actions.update({key: map_action(value)})
return actions
def map_action_policy(action):
if get_action_size() != get_flatland_full_action_size():
return action - 1
return action
def map_action(action):
if get_action_size() == get_flatland_full_action_size():
return action
if action == 0:
return RailEnvActions.MOVE_LEFT
if action == 1:
return RailEnvActions.MOVE_FORWARD
if action == 2:
return RailEnvActions.MOVE_RIGHT
if action == 3:
return RailEnvActions.STOP_MOVING
def map_rail_env_action(action):
if get_action_size() == get_flatland_full_action_size():
return action
if action == RailEnvActions.MOVE_LEFT:
return 0
elif action == RailEnvActions.MOVE_FORWARD:
return 1
elif action == RailEnvActions.MOVE_RIGHT:
return 2
elif action == RailEnvActions.STOP_MOVING:
return 3
# action == RailEnvActions.DO_NOTHING:
return 3
from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import fast_count_nonzero
class AgentCanChooseHelper:
def __init__(self):
pass
def build_data(self, env):
self.env = env
if self.env is not None:
self.env.dev_obs_dict = {}
self.switches = {}
self.switches_neighbours = {}
if self.env is not None:
self.find_all_cell_where_agent_can_choose()
def find_all_switches(self):
# Search the environment (rail grid) for all switch cells. A switch is a cell where more than one tranisation
# exists and collect all direction where the switch is a switch.
self.switches = {}
for h in range(self.env.height):
for w in range(self.env.width):
pos = (h, w)
for dir in range(4):
possible_transitions = self.env.rail.get_transitions(*pos, dir)
num_transitions = fast_count_nonzero(possible_transitions)
if num_transitions > 1:
if pos not in self.switches.keys():
self.switches.update({pos: [dir]})
else:
self.switches[pos].append(dir)
def find_all_switch_neighbours(self):
# Collect all cells where is a neighbour to a switch cell. All cells are neighbour where the agent can make
# just one step and he stands on a switch. A switch is a cell where the agents has more than one transition.
self.switches_neighbours = {}
for h in range(self.env.height):
for w in range(self.env.width):
# look one step forward
for dir in range(4):
pos = (h, w)
possible_transitions = self.env.rail.get_transitions(*pos, dir)
for d in range(4):
if possible_transitions[d] == 1:
new_cell = get_new_position(pos, d)
if new_cell in self.switches.keys() and pos not in self.switches.keys():
if pos not in self.switches_neighbours.keys():
self.switches_neighbours.update({pos: [dir]})
else:
self.switches_neighbours[pos].append(dir)
def find_all_cell_where_agent_can_choose(self):
# prepare the memory - collect all cells where the agent can choose more than FORWARD/STOP.
self.find_all_switches()
self.find_all_switch_neighbours()
def check_agent_decision(self, position, direction):
# Decide whether the agent is
# - on a switch
# - at a switch neighbour (near to switch). The switch must be a switch where the agent has more option than
# FORWARD/STOP
# - all switch : doesn't matter whether the agent has more options than FORWARD/STOP
# - all switch neightbors : doesn't matter the agent has more then one options (transistion) when he reach the
# switch
agents_on_switch = False
agents_on_switch_all = False
agents_near_to_switch = False
agents_near_to_switch_all = False
if position in self.switches.keys():
agents_on_switch = direction in self.switches[position]
agents_on_switch_all = True
if position in self.switches_neighbours.keys():
new_cell = get_new_position(position, direction)
if new_cell in self.switches.keys():
if not direction in self.switches[new_cell]:
agents_near_to_switch = direction in self.switches_neighbours[position]
else:
agents_near_to_switch = direction in self.switches_neighbours[position]
agents_near_to_switch_all = direction in self.switches_neighbours[position]
return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
def required_agent_decision(self):
agents_can_choose = {}
agents_on_switch = {}
agents_on_switch_all = {}
agents_near_to_switch = {}
agents_near_to_switch_all = {}
for a in range(self.env.get_num_agents()):
ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all, ret_agents_on_switch_all = \
self.check_agent_decision(
self.env.agents[a].position,
self.env.agents[a].direction)
agents_on_switch.update({a: ret_agents_on_switch})
agents_on_switch_all.update({a: ret_agents_on_switch_all})
ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
from typing import Optional, List from typing import Optional, List
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from flatland.core.env_observation_builder import DummyObservationBuilder from flatland.core.env_observation_builder import DummyObservationBuilder
from flatland.envs.agent_utils import RailAgentStatus from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero
from reinforcement_learning.policy import Policy from reinforcement_learning.policy import HeuristicPolicy, DummyMemory
from utils.shortest_distance_walker import ShortestDistanceWalker from utils.agent_action_config import map_rail_env_action
from utils.shortest_distance_walker import ShortestDistanceWalker
class DeadlockAvoidanceObservation(DummyObservationBuilder):
def __init__(self): class DeadlockAvoidanceObservation(DummyObservationBuilder):
self.counter = 0 def __init__(self):
self.counter = 0
def get_many(self, handles: Optional[List[int]] = None) -> bool:
self.counter += 1 def get_many(self, handles: Optional[List[int]] = None) -> bool:
obs = np.ones(len(handles), 2) self.counter += 1
for handle in handles: obs = np.ones(len(handles), 2)
obs[handle][0] = handle for handle in handles:
obs[handle][1] = self.counter obs[handle][0] = handle
return obs obs[handle][1] = self.counter
return obs
class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
def __init__(self, env: RailEnv, agent_positions, switches): class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
super().__init__(env) def __init__(self, env: RailEnv, agent_positions, switches):
self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(), super().__init__(env)
self.env.height, self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.width), self.env.height,
dtype=int) - 1 self.env.width),
dtype=int) - 1
self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.height, self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.width), self.env.height,
dtype=int) - 1 self.env.width),
dtype=int) - 1
self.agent_positions = agent_positions
self.agent_positions = agent_positions
self.opp_agent_map = {}
self.same_agent_map = {} self.opp_agent_map = {}
self.switches = switches self.same_agent_map = {}
self.switches = switches
def getData(self):
return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map def getData(self):
return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
def callback(self, handle, agent, position, direction, action, possible_transitions):
opp_a = self.agent_positions[position] def callback(self, handle, agent, position, direction, action, possible_transitions):
if opp_a != -1 and opp_a != handle: opp_a = self.agent_positions[position]
if self.env.agents[opp_a].direction != direction: if opp_a != -1 and opp_a != handle:
d = self.opp_agent_map.get(handle, []) if self.env.agents[opp_a].direction != direction:
if opp_a not in d: d = self.opp_agent_map.get(handle, [])
d.append(opp_a) if opp_a not in d:
self.opp_agent_map.update({handle: d}) d.append(opp_a)
else: self.opp_agent_map.update({handle: d})
if len(self.opp_agent_map.get(handle, [])) == 0: else:
d = self.same_agent_map.get(handle, []) if len(self.opp_agent_map.get(handle, [])) == 0:
if opp_a not in d: d = self.same_agent_map.get(handle, [])
d.append(opp_a) if opp_a not in d:
self.same_agent_map.update({handle: d}) d.append(opp_a)
self.same_agent_map.update({handle: d})
if len(self.opp_agent_map.get(handle, [])) == 0:
if self.switches.get(position, None) is None: if len(self.opp_agent_map.get(handle, [])) == 0:
self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1 if self.switches.get(position, None) is None:
self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1 self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
class DeadLockAvoidanceAgent(Policy): class DeadLockAvoidanceAgent(HeuristicPolicy):
def __init__(self, env: RailEnv, show_debug_plot=False): def __init__(self, env: RailEnv, action_size, enable_eps=False, show_debug_plot=False):
self.env = env print(">> DeadLockAvoidance")
self.memory = None self.env = env
self.loss = 0 self.memory = DummyMemory()
self.agent_can_move = {} self.loss = 0
self.switches = {} self.action_size = action_size
self.show_debug_plot = show_debug_plot self.agent_can_move = {}
self.agent_can_move_value = {}
def step(self, state, action, reward, next_state, done): self.switches = {}
pass self.show_debug_plot = show_debug_plot
self.enable_eps = enable_eps
def act(self, state, eps=0.):
# agent = self.env.agents[state[0]] def step(self, handle, state, action, reward, next_state, done):
check = self.agent_can_move.get(state[0], None) pass
if check is None:
return RailEnvActions.STOP_MOVING def act(self, handle, state, eps=0.):
return check[3] # Epsilon-greedy action selection
if self.enable_eps:
def reset(self): if np.random.random() < eps:
self.agent_positions = None return np.random.choice(np.arange(self.action_size))
self.shortest_distance_walker = None
self.switches = {} # agent = self.env.agents[state[0]]
for h in range(self.env.height): check = self.agent_can_move.get(handle, None)
for w in range(self.env.width): act = RailEnvActions.STOP_MOVING
pos = (h, w) if check is not None:
for dir in range(4): act = check[3]
possible_transitions = self.env.rail.get_transitions(*pos, dir) return map_rail_env_action(act)
num_transitions = fast_count_nonzero(possible_transitions)
if num_transitions > 1: def get_agent_can_move_value(self, handle):
if pos not in self.switches.keys(): return self.agent_can_move_value.get(handle, np.inf)
self.switches.update({pos: [dir]})
else: def reset(self, env):
self.switches[pos].append(dir) self.env = env
self.agent_positions = None
def start_step(self): self.shortest_distance_walker = None
self.build_agent_position_map() self.switches = {}
self.shortest_distance_mapper() for h in range(self.env.height):
self.extract_agent_can_move() for w in range(self.env.width):
pos = (h, w)
def end_step(self): for dir in range(4):
pass possible_transitions = self.env.rail.get_transitions(*pos, dir)
num_transitions = fast_count_nonzero(possible_transitions)
def get_actions(self): if num_transitions > 1:
pass if pos not in self.switches.keys():
self.switches.update({pos: [dir]})
def build_agent_position_map(self): else:
# build map with agent positions (only active agents) self.switches[pos].append(dir)
self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
for handle in range(self.env.get_num_agents()): def start_step(self, train):
agent = self.env.agents[handle] self.build_agent_position_map()
if agent.status == RailAgentStatus.ACTIVE: self.shortest_distance_mapper()
if agent.position is not None: self.extract_agent_can_move()
self.agent_positions[agent.position] = handle
def end_step(self, train):
def shortest_distance_mapper(self): pass
self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
self.agent_positions, def get_actions(self):
self.switches) pass
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle] def build_agent_position_map(self):
if agent.status <= RailAgentStatus.ACTIVE: # build map with agent positions (only active agents)
self.shortest_distance_walker.walk_to_target(handle) self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
for handle in range(self.env.get_num_agents()):
def extract_agent_can_move(self): agent = self.env.agents[handle]
self.agent_can_move = {} if agent.status == RailAgentStatus.ACTIVE:
shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData() if agent.position is not None:
for handle in range(self.env.get_num_agents()): self.agent_positions[agent.position] = handle
agent = self.env.agents[handle]
if agent.status < RailAgentStatus.DONE: def shortest_distance_mapper(self):
next_step_ok = self.check_agent_can_move(shortest_distance_agent_map[handle], self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
self.shortest_distance_walker.same_agent_map.get(handle, []), self.agent_positions,
self.shortest_distance_walker.opp_agent_map.get(handle, []), self.switches)
full_shortest_distance_agent_map) for handle in range(self.env.get_num_agents()):
if next_step_ok: agent = self.env.agents[handle]
next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle) if agent.status <= RailAgentStatus.ACTIVE:
self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]}) self.shortest_distance_walker.walk_to_target(handle)
if self.show_debug_plot: def extract_agent_can_move(self):
a = np.floor(np.sqrt(self.env.get_num_agents())) self.agent_can_move = {}
b = np.ceil(self.env.get_num_agents() / a) shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData()
for handle in range(self.env.get_num_agents()): for handle in range(self.env.get_num_agents()):
plt.subplot(a, b, handle + 1) agent = self.env.agents[handle]
plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle]) if agent.status < RailAgentStatus.DONE:
plt.show(block=False) next_step_ok = self.check_agent_can_move(handle,
plt.pause(0.01) shortest_distance_agent_map[handle],
self.shortest_distance_walker.same_agent_map.get(handle, []),
def check_agent_can_move(self, self.shortest_distance_walker.opp_agent_map.get(handle, []),
my_shortest_walking_path, full_shortest_distance_agent_map)
same_agents, if next_step_ok:
opp_agents, next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle)
full_shortest_distance_agent_map): self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
agent_positions_map = (self.agent_positions > -1).astype(int)
delta = my_shortest_walking_path if self.show_debug_plot:
next_step_ok = True a = np.floor(np.sqrt(self.env.get_num_agents()))
for opp_a in opp_agents: b = np.ceil(self.env.get_num_agents() / a)
opp = full_shortest_distance_agent_map[opp_a] for handle in range(self.env.get_num_agents()):
delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int) plt.subplot(a, b, handle + 1)
if np.sum(delta) < (3 + len(opp_agents)): plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle])
next_step_ok = False plt.show(block=False)
return next_step_ok plt.pause(0.01)
def save(self, filename): def check_agent_can_move(self,
pass handle,
my_shortest_walking_path,
def load(self, filename): same_agents,
pass opp_agents,
full_shortest_distance_agent_map):
agent_positions_map = (self.agent_positions > -1).astype(int)
delta = my_shortest_walking_path
next_step_ok = True
for opp_a in opp_agents:
opp = full_shortest_distance_agent_map[opp_a]
delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int)
if np.sum(delta) < (3 + len(opp_agents)):
next_step_ok = False
v = self.agent_can_move_value.get(handle, np.inf)
v = min(v, np.sum(delta))
self.agent_can_move_value.update({handle: v})
return next_step_ok
def save(self, filename):
pass
def load(self, filename):
pass
import numpy as np
from flatland.core.grid.grid4_utils import get_new_position from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import fast_count_nonzero
def get_agent_positions(env):
agent_positions: np.ndarray = np.full((env.height, env.width), -1)
for agent_handle in env.get_agent_handles():
agent = env.agents[agent_handle]
if agent.status == RailAgentStatus.ACTIVE:
position = agent.position
if position is None:
position = agent.initial_position
agent_positions[position] = agent_handle
return agent_positions
def get_agent_targets(env):
agent_targets = []
for agent_handle in env.get_agent_handles():
agent = env.agents[agent_handle]
if agent.status == RailAgentStatus.ACTIVE:
agent_targets.append(agent.target)
return agent_targets
def check_for_deadlock(handle, env, agent_positions, check_position=None, check_direction=None):
agent = env.agents[handle]
if agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED:
return False
position = agent.position
if position is None:
position = agent.initial_position
if check_position is not None:
position = check_position
direction = agent.direction
if check_direction is not None:
direction = check_direction
possible_transitions = env.rail.get_transitions(*position, direction)
num_transitions = fast_count_nonzero(possible_transitions)
for dir_loop in range(4):
if possible_transitions[dir_loop] == 1:
new_position = get_new_position(position, dir_loop)
opposite_agent = agent_positions[new_position]
if opposite_agent != handle and opposite_agent != -1:
num_transitions -= 1
else:
return False
is_deadlock = num_transitions <= 0
return is_deadlock
def check_if_all_blocked(env): def check_if_all_blocked(env):
......
This diff is collapsed.