Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hebe0663/neurips2020-flatland-starter-kit
  • flatland/neurips2020-flatland-starter-kit
  • manavsinghal157/marl-flatland
3 results
Show changes
Showing
with 720 additions and 487 deletions
from time import time
import numpy as np
from flatland.envs.rail_env import fast_isclose
def print_timing(label, start_time, end_time):
print("{:>10.4f}ms".format(1000 * (end_time - start_time)) + "\t" + label)
def check_isclose(nbr=100000):
s = time()
for x in range(nbr):
fast_isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("fast_isclose", start_time=s, end_time=e)
s = time()
for x in range(nbr):
np.isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("np.isclose", start_time=s, end_time=e)
if __name__ == "__main__":
check_isclose()
runs_bench/Screenshots/full.png

139 KiB

runs_bench/Screenshots/reduced.png

178 KiB

from flatland.envs.rail_env import RailEnvActions
# global action size
global _agent_action_config_action_size
_agent_action_config_action_size = 5
def get_flatland_full_action_size():
# The action space of flatland is 5 discrete actions
return 5
def set_action_size_full():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
_agent_action_config_action_size = 5
def set_action_size_reduced():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
_agent_action_config_action_size = 4
def get_action_size():
global _agent_action_config_action_size
# The agents (DDDQN, PPO, ... ) have this actions space
return _agent_action_config_action_size
def map_actions(actions):
# Map the
if get_action_size() != get_flatland_full_action_size():
for key in actions:
value = actions.get(key, 0)
actions.update({key: map_action(value)})
return actions
def map_action_policy(action):
if get_action_size() != get_flatland_full_action_size():
return action - 1
return action
def map_action(action):
if get_action_size() == get_flatland_full_action_size():
return action
if action == 0:
return RailEnvActions.MOVE_LEFT
if action == 1:
return RailEnvActions.MOVE_FORWARD
if action == 2:
return RailEnvActions.MOVE_RIGHT
if action == 3:
return RailEnvActions.STOP_MOVING
def map_rail_env_action(action):
if get_action_size() == get_flatland_full_action_size():
return action
if action == RailEnvActions.MOVE_LEFT:
return 0
elif action == RailEnvActions.MOVE_FORWARD:
return 1
elif action == RailEnvActions.MOVE_RIGHT:
return 2
elif action == RailEnvActions.STOP_MOVING:
return 3
# action == RailEnvActions.DO_NOTHING:
return 3
from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import fast_count_nonzero
class AgentCanChooseHelper:
def __init__(self):
pass
def build_data(self, env):
self.env = env
if self.env is not None:
self.env.dev_obs_dict = {}
self.switches = {}
self.switches_neighbours = {}
if self.env is not None:
self.find_all_cell_where_agent_can_choose()
def find_all_switches(self):
# Search the environment (rail grid) for all switch cells. A switch is a cell where more than one tranisation
# exists and collect all direction where the switch is a switch.
self.switches = {}
for h in range(self.env.height):
for w in range(self.env.width):
pos = (h, w)
for dir in range(4):
possible_transitions = self.env.rail.get_transitions(*pos, dir)
num_transitions = fast_count_nonzero(possible_transitions)
if num_transitions > 1:
if pos not in self.switches.keys():
self.switches.update({pos: [dir]})
else:
self.switches[pos].append(dir)
def find_all_switch_neighbours(self):
# Collect all cells where is a neighbour to a switch cell. All cells are neighbour where the agent can make
# just one step and he stands on a switch. A switch is a cell where the agents has more than one transition.
self.switches_neighbours = {}
for h in range(self.env.height):
for w in range(self.env.width):
# look one step forward
for dir in range(4):
pos = (h, w)
possible_transitions = self.env.rail.get_transitions(*pos, dir)
for d in range(4):
if possible_transitions[d] == 1:
new_cell = get_new_position(pos, d)
if new_cell in self.switches.keys() and pos not in self.switches.keys():
if pos not in self.switches_neighbours.keys():
self.switches_neighbours.update({pos: [dir]})
else:
self.switches_neighbours[pos].append(dir)
def find_all_cell_where_agent_can_choose(self):
# prepare the memory - collect all cells where the agent can choose more than FORWARD/STOP.
self.find_all_switches()
self.find_all_switch_neighbours()
def check_agent_decision(self, position, direction):
# Decide whether the agent is
# - on a switch
# - at a switch neighbour (near to switch). The switch must be a switch where the agent has more option than
# FORWARD/STOP
# - all switch : doesn't matter whether the agent has more options than FORWARD/STOP
# - all switch neightbors : doesn't matter the agent has more then one options (transistion) when he reach the
# switch
agents_on_switch = False
agents_on_switch_all = False
agents_near_to_switch = False
agents_near_to_switch_all = False
if position in self.switches.keys():
agents_on_switch = direction in self.switches[position]
agents_on_switch_all = True
if position in self.switches_neighbours.keys():
new_cell = get_new_position(position, direction)
if new_cell in self.switches.keys():
if not direction in self.switches[new_cell]:
agents_near_to_switch = direction in self.switches_neighbours[position]
else:
agents_near_to_switch = direction in self.switches_neighbours[position]
agents_near_to_switch_all = direction in self.switches_neighbours[position]
return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
def required_agent_decision(self):
agents_can_choose = {}
agents_on_switch = {}
agents_on_switch_all = {}
agents_near_to_switch = {}
agents_near_to_switch_all = {}
for a in range(self.env.get_num_agents()):
ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all, ret_agents_on_switch_all = \
self.check_agent_decision(
self.env.agents[a].position,
self.env.agents[a].direction)
agents_on_switch.update({a: ret_agents_on_switch})
agents_on_switch_all.update({a: ret_agents_on_switch_all})
ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all
from typing import Optional, List
import matplotlib.pyplot as plt
import numpy as np
from flatland.core.env_observation_builder import DummyObservationBuilder
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero
from reinforcement_learning.policy import Policy
from utils.shortest_distance_walker import ShortestDistanceWalker
class DeadlockAvoidanceObservation(DummyObservationBuilder):
def __init__(self):
self.counter = 0
def get_many(self, handles: Optional[List[int]] = None) -> bool:
self.counter += 1
obs = np.ones(len(handles), 2)
for handle in handles:
obs[handle][0] = handle
obs[handle][1] = self.counter
return obs
class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
def __init__(self, env: RailEnv, agent_positions, switches):
super().__init__(env)
self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.height,
self.env.width),
dtype=int) - 1
self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.height,
self.env.width),
dtype=int) - 1
self.agent_positions = agent_positions
self.opp_agent_map = {}
self.same_agent_map = {}
self.switches = switches
def getData(self):
return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
def callback(self, handle, agent, position, direction, action, possible_transitions):
opp_a = self.agent_positions[position]
if opp_a != -1 and opp_a != handle:
if self.env.agents[opp_a].direction != direction:
d = self.opp_agent_map.get(handle, [])
if opp_a not in d:
d.append(opp_a)
self.opp_agent_map.update({handle: d})
else:
if len(self.opp_agent_map.get(handle, [])) == 0:
d = self.same_agent_map.get(handle, [])
if opp_a not in d:
d.append(opp_a)
self.same_agent_map.update({handle: d})
if len(self.opp_agent_map.get(handle, [])) == 0:
if self.switches.get(position, None) is None:
self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
class DeadLockAvoidanceAgent(Policy):
def __init__(self, env: RailEnv, show_debug_plot=False):
self.env = env
self.memory = None
self.loss = 0
self.agent_can_move = {}
self.switches = {}
self.show_debug_plot = show_debug_plot
def step(self, state, action, reward, next_state, done):
pass
def act(self, state, eps=0.):
# agent = self.env.agents[state[0]]
check = self.agent_can_move.get(state[0], None)
if check is None:
return RailEnvActions.STOP_MOVING
return check[3]
def reset(self):
self.agent_positions = None
self.shortest_distance_walker = None
self.switches = {}
for h in range(self.env.height):
for w in range(self.env.width):
pos = (h, w)
for dir in range(4):
possible_transitions = self.env.rail.get_transitions(*pos, dir)
num_transitions = fast_count_nonzero(possible_transitions)
if num_transitions > 1:
if pos not in self.switches.keys():
self.switches.update({pos: [dir]})
else:
self.switches[pos].append(dir)
def start_step(self):
self.build_agent_position_map()
self.shortest_distance_mapper()
self.extract_agent_can_move()
def end_step(self):
pass
def get_actions(self):
pass
def build_agent_position_map(self):
# build map with agent positions (only active agents)
self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status == RailAgentStatus.ACTIVE:
if agent.position is not None:
self.agent_positions[agent.position] = handle
def shortest_distance_mapper(self):
self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
self.agent_positions,
self.switches)
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status <= RailAgentStatus.ACTIVE:
self.shortest_distance_walker.walk_to_target(handle)
def extract_agent_can_move(self):
self.agent_can_move = {}
shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData()
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status < RailAgentStatus.DONE:
next_step_ok = self.check_agent_can_move(shortest_distance_agent_map[handle],
self.shortest_distance_walker.same_agent_map.get(handle, []),
self.shortest_distance_walker.opp_agent_map.get(handle, []),
full_shortest_distance_agent_map)
if next_step_ok:
next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle)
self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
if self.show_debug_plot:
a = np.floor(np.sqrt(self.env.get_num_agents()))
b = np.ceil(self.env.get_num_agents() / a)
for handle in range(self.env.get_num_agents()):
plt.subplot(a, b, handle + 1)
plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle])
plt.show(block=False)
plt.pause(0.01)
def check_agent_can_move(self,
my_shortest_walking_path,
same_agents,
opp_agents,
full_shortest_distance_agent_map):
agent_positions_map = (self.agent_positions > -1).astype(int)
delta = my_shortest_walking_path
next_step_ok = True
for opp_a in opp_agents:
opp = full_shortest_distance_agent_map[opp_a]
delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int)
if np.sum(delta) < (3 + len(opp_agents)):
next_step_ok = False
return next_step_ok
def save(self, filename):
pass
def load(self, filename):
pass
from typing import Optional, List
import matplotlib.pyplot as plt
import numpy as np
from flatland.core.env_observation_builder import DummyObservationBuilder
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import RailEnv, RailEnvActions, fast_count_nonzero
from reinforcement_learning.policy import HeuristicPolicy, DummyMemory
from utils.agent_action_config import map_rail_env_action
from utils.shortest_distance_walker import ShortestDistanceWalker
class DeadlockAvoidanceObservation(DummyObservationBuilder):
def __init__(self):
self.counter = 0
def get_many(self, handles: Optional[List[int]] = None) -> bool:
self.counter += 1
obs = np.ones(len(handles), 2)
for handle in handles:
obs[handle][0] = handle
obs[handle][1] = self.counter
return obs
class DeadlockAvoidanceShortestDistanceWalker(ShortestDistanceWalker):
def __init__(self, env: RailEnv, agent_positions, switches):
super().__init__(env)
self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.height,
self.env.width),
dtype=int) - 1
self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
self.env.height,
self.env.width),
dtype=int) - 1
self.agent_positions = agent_positions
self.opp_agent_map = {}
self.same_agent_map = {}
self.switches = switches
def getData(self):
return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
def callback(self, handle, agent, position, direction, action, possible_transitions):
opp_a = self.agent_positions[position]
if opp_a != -1 and opp_a != handle:
if self.env.agents[opp_a].direction != direction:
d = self.opp_agent_map.get(handle, [])
if opp_a not in d:
d.append(opp_a)
self.opp_agent_map.update({handle: d})
else:
if len(self.opp_agent_map.get(handle, [])) == 0:
d = self.same_agent_map.get(handle, [])
if opp_a not in d:
d.append(opp_a)
self.same_agent_map.update({handle: d})
if len(self.opp_agent_map.get(handle, [])) == 0:
if self.switches.get(position, None) is None:
self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
class DeadLockAvoidanceAgent(HeuristicPolicy):
def __init__(self, env: RailEnv, action_size, enable_eps=False, show_debug_plot=False):
print(">> DeadLockAvoidance")
self.env = env
self.memory = DummyMemory()
self.loss = 0
self.action_size = action_size
self.agent_can_move = {}
self.agent_can_move_value = {}
self.switches = {}
self.show_debug_plot = show_debug_plot
self.enable_eps = enable_eps
def step(self, handle, state, action, reward, next_state, done):
pass
def act(self, handle, state, eps=0.):
# Epsilon-greedy action selection
if self.enable_eps:
if np.random.random() < eps:
return np.random.choice(np.arange(self.action_size))
# agent = self.env.agents[state[0]]
check = self.agent_can_move.get(handle, None)
act = RailEnvActions.STOP_MOVING
if check is not None:
act = check[3]
return map_rail_env_action(act)
def get_agent_can_move_value(self, handle):
return self.agent_can_move_value.get(handle, np.inf)
def reset(self, env):
self.env = env
self.agent_positions = None
self.shortest_distance_walker = None
self.switches = {}
for h in range(self.env.height):
for w in range(self.env.width):
pos = (h, w)
for dir in range(4):
possible_transitions = self.env.rail.get_transitions(*pos, dir)
num_transitions = fast_count_nonzero(possible_transitions)
if num_transitions > 1:
if pos not in self.switches.keys():
self.switches.update({pos: [dir]})
else:
self.switches[pos].append(dir)
def start_step(self, train):
self.build_agent_position_map()
self.shortest_distance_mapper()
self.extract_agent_can_move()
def end_step(self, train):
pass
def get_actions(self):
pass
def build_agent_position_map(self):
# build map with agent positions (only active agents)
self.agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status == RailAgentStatus.ACTIVE:
if agent.position is not None:
self.agent_positions[agent.position] = handle
def shortest_distance_mapper(self):
self.shortest_distance_walker = DeadlockAvoidanceShortestDistanceWalker(self.env,
self.agent_positions,
self.switches)
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status <= RailAgentStatus.ACTIVE:
self.shortest_distance_walker.walk_to_target(handle)
def extract_agent_can_move(self):
self.agent_can_move = {}
shortest_distance_agent_map, full_shortest_distance_agent_map = self.shortest_distance_walker.getData()
for handle in range(self.env.get_num_agents()):
agent = self.env.agents[handle]
if agent.status < RailAgentStatus.DONE:
next_step_ok = self.check_agent_can_move(handle,
shortest_distance_agent_map[handle],
self.shortest_distance_walker.same_agent_map.get(handle, []),
self.shortest_distance_walker.opp_agent_map.get(handle, []),
full_shortest_distance_agent_map)
if next_step_ok:
next_position, next_direction, action, _ = self.shortest_distance_walker.walk_one_step(handle)
self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
if self.show_debug_plot:
a = np.floor(np.sqrt(self.env.get_num_agents()))
b = np.ceil(self.env.get_num_agents() / a)
for handle in range(self.env.get_num_agents()):
plt.subplot(a, b, handle + 1)
plt.imshow(full_shortest_distance_agent_map[handle] + shortest_distance_agent_map[handle])
plt.show(block=False)
plt.pause(0.01)
def check_agent_can_move(self,
handle,
my_shortest_walking_path,
same_agents,
opp_agents,
full_shortest_distance_agent_map):
agent_positions_map = (self.agent_positions > -1).astype(int)
delta = my_shortest_walking_path
next_step_ok = True
for opp_a in opp_agents:
opp = full_shortest_distance_agent_map[opp_a]
delta = ((my_shortest_walking_path - opp - agent_positions_map) > 0).astype(int)
if np.sum(delta) < (3 + len(opp_agents)):
next_step_ok = False
v = self.agent_can_move_value.get(handle, np.inf)
v = min(v, np.sum(delta))
self.agent_can_move_value.update({handle: v})
return next_step_ok
def save(self, filename):
pass
def load(self, filename):
pass
import numpy as np
from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import fast_count_nonzero
def get_agent_positions(env):
agent_positions: np.ndarray = np.full((env.height, env.width), -1)
for agent_handle in env.get_agent_handles():
agent = env.agents[agent_handle]
if agent.status == RailAgentStatus.ACTIVE:
position = agent.position
if position is None:
position = agent.initial_position
agent_positions[position] = agent_handle
return agent_positions
def get_agent_targets(env):
agent_targets = []
for agent_handle in env.get_agent_handles():
agent = env.agents[agent_handle]
if agent.status == RailAgentStatus.ACTIVE:
agent_targets.append(agent.target)
return agent_targets
def check_for_deadlock(handle, env, agent_positions, check_position=None, check_direction=None):
agent = env.agents[handle]
if agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED:
return False
position = agent.position
if position is None:
position = agent.initial_position
if check_position is not None:
position = check_position
direction = agent.direction
if check_direction is not None:
direction = check_direction
possible_transitions = env.rail.get_transitions(*position, direction)
num_transitions = fast_count_nonzero(possible_transitions)
for dir_loop in range(4):
if possible_transitions[dir_loop] == 1:
new_position = get_new_position(position, dir_loop)
opposite_agent = agent_positions[new_position]
if opposite_agent != handle and opposite_agent != -1:
num_transitions -= 1
else:
return False
is_deadlock = num_transitions <= 0
return is_deadlock
def check_if_all_blocked(env):
......
This diff is collapsed.