Commit dece6c16 authored by u214892's avatar u214892
Browse files

#141 different agent classes

parent 7f351228
Pipeline #1829 failed with stages
in 7 minutes and 15 seconds
......@@ -3,6 +3,7 @@ import random
import numpy as np
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
......@@ -15,6 +16,7 @@ def run_benchmark():
# Example generate a random rail
env = RailEnv(width=15, height=15,
rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=5)
n_trials = 20
......
......@@ -5,6 +5,7 @@ import numpy as np
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.core.grid.grid_utils import coordinate_to_position
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import random_rail_generator, complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
......@@ -20,6 +21,7 @@ class SimpleObs(ObservationBuilder):
Simplest observation builder. The object returns observation vectors with 5 identical components,
all equal to the ID of the respective agent.
"""
def __init__(self):
self.observation_space = [5]
......@@ -53,6 +55,7 @@ class SingleAgentNavigationObs(TreeObsForRailEnv):
E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
will be [1, 0, 0].
"""
def __init__(self):
super().__init__(max_depth=0)
self.observation_space = [3]
......@@ -90,6 +93,7 @@ class SingleAgentNavigationObs(TreeObsForRailEnv):
env = RailEnv(width=7,
height=7,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, seed=0),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=1,
obs_builder_object=SingleAgentNavigationObs())
......@@ -97,8 +101,8 @@ obs = env.reset()
env_renderer = RenderTool(env, gl="PILSVG")
env_renderer.render_env(show=True, frames=True, show_observations=True)
for step in range(100):
action = np.argmax(obs[0])+1
obs, all_rewards, done, _ = env.step({0:action})
action = np.argmax(obs[0]) + 1
obs, all_rewards, done, _ = env.step({0: action})
print("Rewards: ", all_rewards, " [done=", done, "]")
env_renderer.render_env(show=True, frames=True, show_observations=True)
time.sleep(0.1)
......@@ -200,6 +204,7 @@ CustomObsBuilder = ObservePredictions(CustomPredictor)
env = RailEnv(width=10,
height=10,
rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=8, max_dist=99999, seed=0),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=3,
obs_builder_object=CustomObsBuilder)
......
......@@ -3,6 +3,7 @@ import time
import numpy as np
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.rail_env import RailEnv
......@@ -11,6 +12,7 @@ from flatland.utils.rendertools import RenderTool
random.seed(1)
np.random.seed(1)
class SingleAgentNavigationObs(TreeObsForRailEnv):
"""
We derive our bbservation builder from TreeObsForRailEnv, to exploit the existing implementation to compute
......@@ -21,6 +23,7 @@ class SingleAgentNavigationObs(TreeObsForRailEnv):
E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
will be [1, 0, 0].
"""
def __init__(self):
super().__init__(max_depth=0)
self.observation_space = [3]
......@@ -58,6 +61,7 @@ class SingleAgentNavigationObs(TreeObsForRailEnv):
env = RailEnv(width=14,
height=14,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, seed=0),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=2,
obs_builder_object=SingleAgentNavigationObs())
......@@ -67,11 +71,11 @@ env_renderer.render_env(show=True, frames=True, show_observations=False)
for step in range(100):
actions = {}
for i in range(len(obs)):
actions[i] = np.argmax(obs[i])+1
actions[i] = np.argmax(obs[i]) + 1
if step%5 == 0:
if step % 5 == 0:
print("Agent halts")
actions[0] = 4 # Halt
actions[0] = 4 # Halt
obs, all_rewards, done, _ = env.step(actions)
if env.agents[0].malfunction_data['malfunction'] > 0:
......@@ -82,4 +86,3 @@ for step in range(100):
if done["__all__"]:
break
env_renderer.close_window()
......@@ -2,6 +2,7 @@ import random
import numpy as np
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.rail_env import RailEnv
......@@ -13,6 +14,7 @@ np.random.seed(1)
env = RailEnv(width=7,
height=7,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=0),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=2,
obs_builder_object=TreeObsForRailEnv(max_depth=2))
......
import numpy as np
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv, LocalObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
......@@ -16,11 +17,13 @@ LocalGridObs = LocalObsForRailEnv(view_height=10, view_width=2, center=2)
env = RailEnv(width=20,
height=20,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=0),
agent_generator=complex_rail_generator_agents_placer(),
obs_builder_object=TreeObservation,
number_of_agents=3)
env_renderer = RenderTool(env, gl="PILSVG", )
# Import your own Agent or use RLlib to train agents on Flatland
# As an example we use a random agent here
......
......@@ -2,29 +2,33 @@
"""Console script for flatland."""
import sys
import time
import click
import numpy as np
import time
import redis
from flatland.envs.agent_generators import complex_rail_generator_agents_placer
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
from flatland.evaluators.service import FlatlandRemoteEvaluationService
import redis
from flatland.utils.rendertools import RenderTool
@click.command()
def demo(args=None):
"""Demo script to check installation"""
env = RailEnv(
width=15,
height=15,
rail_generator=complex_rail_generator(
nr_start_goal=10,
nr_extra=1,
min_dist=8,
max_dist=99999),
number_of_agents=5)
width=15,
height=15,
rail_generator=complex_rail_generator(
nr_start_goal=10,
nr_extra=1,
min_dist=8,
max_dist=99999),
agent_generator=complex_rail_generator_agents_placer(),
number_of_agents=5)
env._max_episode_steps = int(15 * (env.width + env.height))
env_renderer = RenderTool(env)
......@@ -52,12 +56,12 @@ def demo(args=None):
@click.command()
@click.option('--tests',
@click.option('--tests',
type=click.Path(exists=True),
help="Path to folder containing Flatland tests",
required=True
)
@click.option('--service_id',
@click.option('--service_id',
default="FLATLAND_RL_SERVICE_ID",
help="Evaluation Service ID. This has to match the service id on the client.",
required=False
......@@ -70,14 +74,14 @@ def evaluator(tests, service_id):
raise Exception(
"\nRedis server does not seem to be running on your localhost.\n"
"Please ensure that you have a redis server running on your localhost"
)
)
grader = FlatlandRemoteEvaluationService(
test_env_folder=tests,
flatland_rl_service_id=service_id,
visualize=False,
verbose=False
)
test_env_folder=tests,
flatland_rl_service_id=service_id,
visualize=False,
verbose=False
)
grader.run()
......
"""Agent generators (railway undertaking, "EVU")."""
from typing import Tuple, List, Callable, Mapping, Optional, Any
import msgpack
import numpy as np
from flatland.core.grid.grid4_utils import get_new_position
from flatland.core.transition_map import GridTransitionMap
from flatland.envs.agent_utils import EnvAgentStatic
AgentPosition = Tuple[int, int]
AgentGeneratorProduct = Tuple[List[AgentPosition], List[AgentPosition], List[AgentPosition], List[float]]
AgentGenerator = Callable[[GridTransitionMap, int, Optional[Any]], AgentGeneratorProduct]
def speed_initialization_helper(nb_agents: int, speed_ratio_map: Mapping[float, float] = None) -> List[float]:
"""
Parameters
-------
nb_agents : int
The number of agents to generate a speed for
speed_ratio_map : Mapping[float,float]
A map of speeds mappint to their ratio of appearance. The ratios must sum up to 1.
Returns
-------
List[float]
A list of size nb_agents of speeds with the corresponding probabilistic ratios.
"""
if speed_ratio_map is None:
return [1.0] * nb_agents
nb_classes = len(speed_ratio_map.keys())
speed_ratio_map_as_list: List[Tuple[float, float]] = list(speed_ratio_map.items())
speed_ratios = list(map(lambda t: t[1], speed_ratio_map_as_list))
speeds = list(map(lambda t: t[0], speed_ratio_map_as_list))
return list(map(lambda index: speeds[index], np.random.choice(nb_classes, nb_agents, p=speed_ratios)))
def complex_rail_generator_agents_placer(speed_ratio_map: Mapping[float, float] = None) -> AgentGenerator:
def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
start_goal = hints['start_goal']
start_dir = hints['start_dir']
agents_position = [sg[0] for sg in start_goal[:num_agents]]
agents_target = [sg[1] for sg in start_goal[:num_agents]]
agents_direction = start_dir[:num_agents]
if speed_ratio_map:
speeds = speed_initialization_helper(num_agents, speed_ratio_map)
else:
speeds = [1.0] * len(agents_position)
return agents_position, agents_direction, agents_target, speeds
return generator
def get_rnd_agents_pos_tgt_dir_on_rail(speed_ratio_map: Mapping[float, float] = None) -> AgentGenerator:
"""
Given a `rail' GridTransitionMap, return a random placement of agents (initial position, direction and target).
Parameters
-------
rail : GridTransitionMap
The railway to place agents on.
num_agents : int
The number of agents to generate a speed for
speed_ratio_map : Mapping[float,float]
A map of speeds mappint to their ratio of appearance. The ratios must sum up to 1.
Returns
-------
Tuple[List[Tuple[int,int]], List[Tuple[int,int]], List[Tuple[int,int]], List[float]]
initial positions, directions, targets speeds
"""
def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
def _path_exists(rail, start, direction, end):
# BFS - Check if a path exists between the 2 nodes
visited = set()
stack = [(start, direction)]
while stack:
node = stack.pop()
if node[0][0] == end[0] and node[0][1] == end[1]:
return 1
if node not in visited:
visited.add(node)
moves = rail.get_transitions(node[0][0], node[0][1], node[1])
for move_index in range(4):
if moves[move_index]:
stack.append((get_new_position(node[0], move_index),
move_index))
# If cell is a dead-end, append previous node with reversed
# orientation!
nbits = 0
tmp = rail.get_full_transitions(node[0][0], node[0][1])
while tmp > 0:
nbits += (tmp & 1)
tmp = tmp >> 1
if nbits == 1:
stack.append((node[0], (node[1] + 2) % 4))
return 0
valid_positions = []
for r in range(rail.height):
for c in range(rail.width):
if rail.get_full_transitions(r, c) > 0:
valid_positions.append((r, c))
if len(valid_positions) == 0:
return [], [], [], []
re_generate = True
while re_generate:
agents_position = [
valid_positions[i] for i in
np.random.choice(len(valid_positions), num_agents)]
agents_target = [
valid_positions[i] for i in
np.random.choice(len(valid_positions), num_agents)]
# agents_direction must be a direction for which a solution is
# guaranteed.
agents_direction = [0] * num_agents
re_generate = False
for i in range(num_agents):
valid_movements = []
for direction in range(4):
position = agents_position[i]
moves = rail.get_transitions(position[0], position[1], direction)
for move_index in range(4):
if moves[move_index]:
valid_movements.append((direction, move_index))
valid_starting_directions = []
for m in valid_movements:
new_position = get_new_position(agents_position[i], m[1])
if m[0] not in valid_starting_directions and _path_exists(rail, new_position, m[0],
agents_target[i]):
valid_starting_directions.append(m[0])
if len(valid_starting_directions) == 0:
re_generate = True
else:
agents_direction[i] = valid_starting_directions[
np.random.choice(len(valid_starting_directions), 1)[0]]
agents_speed = speed_initialization_helper(num_agents, speed_ratio_map)
return agents_position, agents_direction, agents_target, agents_speed
return generator
def agents_from_file(filename) -> AgentGenerator:
"""
Utility to load pickle file
Parameters
-------
input_file : Pickle file generated by env.save() or editor
Returns
-------
Tuple[List[Tuple[int,int]], List[Tuple[int,int]], List[Tuple[int,int]], List[float]]
initial positions, directions, targets speeds
"""
def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
with open(filename, "rb") as file_in:
load_data = file_in.read()
data = msgpack.unpackb(load_data, use_list=False)
# agents are always reset as not moving
agents_static = [EnvAgentStatic(d[0], d[1], d[2], moving=False) for d in data[b"agents_static"]]
# setup with loaded data
agents_position = [a.position for a in agents_static]
agents_direction = [a.direction for a in agents_static]
agents_target = [a.target for a in agents_static]
return agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
return generator
from typing import Mapping, Tuple, List, Callable
"""Rail generators (infrastructure manager, "Infrastrukturbetreiber")."""
from typing import Callable, Tuple, Any, Optional
import msgpack
import numpy as np
......@@ -7,12 +8,12 @@ from flatland.core.grid.grid4_utils import get_direction, mirror
from flatland.core.grid.grid_utils import distance_on_rail
from flatland.core.grid.rail_env_grid import RailEnvTransitions
from flatland.core.transition_map import GridTransitionMap
from flatland.envs.agent_utils import EnvAgentStatic
from flatland.envs.grid4_generators_utils import connect_rail
from flatland.envs.grid4_generators_utils import get_rnd_agents_pos_tgt_dir_on_rail
RailGenerator = Callable[[int, int, int, int], Tuple[GridTransitionMap, Optional[Any]]]
def empty_rail_generator():
def empty_rail_generator() -> RailGenerator:
"""
Returns a generator which returns an empty rail mail with no agents.
Primarily used by the editor
......@@ -24,7 +25,7 @@ def empty_rail_generator():
rail_array = grid_map.grid
rail_array.fill(0)
return grid_map, [], [], [], []
return [grid_map, None]
return generator
......@@ -33,8 +34,7 @@ def complex_rail_generator(nr_start_goal=1,
nr_extra=100,
min_dist=20,
max_dist=99999,
seed=0,
speed_initializer: Callable[[int], List[float]] = None):
seed=0) -> RailGenerator:
"""
Parameters
-------
......@@ -42,8 +42,6 @@ def complex_rail_generator(nr_start_goal=1,
The width (number of cells) of the grid to generate.
height : int
The height (number of cells) of the grid to generate.
speed_initializer : Callable[[int], List[float]]
Function that returns a list of speeds for the numer of agents given as argument.
Returns
-------
......@@ -56,8 +54,7 @@ def complex_rail_generator(nr_start_goal=1,
if num_agents > nr_start_goal:
num_agents = nr_start_goal
print("complex_rail_generator: num_agents > nr_start_goal, changing num_agents")
rail_trans = RailEnvTransitions()
grid_map = GridTransitionMap(width=width, height=height, transitions=rail_trans)
grid_map = GridTransitionMap(width=width, height=height, transitions=RailEnvTransitions())
rail_array = grid_map.grid
rail_array.fill(0)
......@@ -81,6 +78,7 @@ def complex_rail_generator(nr_start_goal=1,
# - return transition map + list of [start_pos, start_dir, goal_pos] points
#
rail_trans = grid_map.transitions
start_goal = []
start_dir = []
nr_created = 0
......@@ -150,15 +148,10 @@ def complex_rail_generator(nr_start_goal=1,
if len(new_path) >= 2:
nr_created += 1
agents_position = [sg[0] for sg in start_goal[:num_agents]]
agents_target = [sg[1] for sg in start_goal[:num_agents]]
agents_direction = start_dir[:num_agents]
if speed_initializer:
speeds = speed_initializer(num_agents)
else:
speeds = [1.0] * len(agents_position)
return grid_map, agents_position, agents_direction, agents_target, speeds
return grid_map, {'agents_hints': {
'start_goal': start_goal,
'start_dir': start_dir
}}
return generator
......@@ -202,22 +195,18 @@ def rail_from_manual_specifications_generator(rail_spec):
effective_transition_cell = rail_env_transitions.rotate_transition(basic_type_of_cell_, rotation_cell_)
rail.set_transitions((r, c), effective_transition_cell)
agents_position, agents_direction, agents_target = get_rnd_agents_pos_tgt_dir_on_rail(
rail,
num_agents)
return rail, agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
return [rail, None]
return generator
def rail_from_file(filename):
def rail_from_file(filename) -> RailGenerator:
"""
Utility to load pickle file
Parameters
-------
input_file : Pickle file generated by env.save() or editor
filename : Pickle file generated by env.save() or editor
Returns
-------
......@@ -235,26 +224,16 @@ def rail_from_file(filename):
grid = np.array(data[b"grid"])
rail = GridTransitionMap(width=np.shape(grid)[1], height=np.shape(grid)[0], transitions=rail_env_transitions)
rail.grid = grid
# agents are always reset as not moving
agents_static = [EnvAgentStatic(d[0], d[1], d[2], moving=False) for d in data[b"agents_static"]]
# setup with loaded data
agents_position = [a.position for a in agents_static]
agents_direction = [a.direction for a in agents_static]
agents_target = [a.target for a in agents_static]
if b"distance_maps" in data.keys():
distance_maps = data[b"distance_maps"]
if len(distance_maps) > 0:
return rail, agents_position, agents_direction, agents_target, [1.0] * len(
agents_position), distance_maps
else:
return rail, agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
else:
return rail, agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
return rail, {'distance_maps': distance_maps}
return [rail, None]
return generator
def rail_from_grid_transition_map(rail_map):
def rail_from_grid_transition_map(rail_map) -> RailGenerator:
"""
Utility to convert a rail given by a GridTransitionMap map with the correct
16-bit transitions specifications.
......@@ -271,16 +250,12 @@ def rail_from_grid_transition_map(rail_map):
"""
def generator(width, height, num_agents, num_resets=0):
agents_position, agents_direction, agents_target = get_rnd_agents_pos_tgt_dir_on_rail(
rail_map,
num_agents)
return rail_map, agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
return [rail_map, None]
return generator
def random_rail_generator(cell_type_relative_proportion=[1.0] * 11):
def random_rail_generator(cell_type_relative_proportion=[1.0] * 11) -> RailGenerator:
"""
Dummy random level generator:
- fill in cells at random in [width-2, height-2]
......@@ -544,31 +519,6 @@ def random_rail_generator(cell_type_relative_proportion=[1.0] * 11):
return_rail = GridTransitionMap(width=width, height=height, transitions=t_utils)
return_rail.grid = tmp_rail
agents_position, agents_direction, agents_target = get_rnd_agents_pos_tgt_dir_on_rail(
return_rail,
num_agents)
return return_rail, agents_position, agents_direction, agents_target, [1.0] * len(agents_position)
return [return_rail, None]
return generator
def speed_initialization_helper(nb_agents: int, speed_ratio_map: Mapping[float, float]) -> List[float]:
"""
Parameters
-------
nb_agents : int
The number of agents to generate a speed for
speed_ratio_map : Mapping[float,float]
A map of speeds mappint to their ratio of appearance. The ratios must sum up to 1.
Returns
-------
List[float]
A list of size nb_agents of speeds with the corresponding probabilistic ratios.
"""
nb_classes = len(speed_ratio_map.keys())
speed_ratio_map_as_list: List[Tuple[float, float]] = list(speed_ratio_map.items())
speed_ratios = list(map(lambda t: t[1], speed_ratio_map_as_list))
speeds = list(map(lambda t: t[0], speed_ratio_map_as_list))
return list(map(lambda index: speeds[index], np.random.choice(nb_classes, nb_agents, p=speed_ratios)))