Commit 63ee753f authored by manuschn's avatar manuschn

Merge branch 'master' into global-density-obs

parents bbebe763 79edf57b
......@@ -127,4 +127,8 @@ dmypy.json
.pyre/
# misc
.idea
\ No newline at end of file
.idea
# custom extras
small_tree_video/
test.yaml
import getopt
import os
import sys
import time
......@@ -30,6 +31,11 @@ from ray.rllib.offline.json_writer import JsonWriter
imitate = True
## Legacy Code for the correct expert actions
# change below line in method malfunction_from_file in the file flatland.envs.malfunction_generators.py
# mean_malfunction_rate = 1/oMPD.malfunction_rate
def main(args):
try:
opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
......@@ -55,8 +61,8 @@ def main(args):
max_depth = 30
tree_depth = 2
trial_start = 0
n_trials = 97
trial_start = 100
n_trials = 999
start = 0
columns = ['Agents', 'X_DIM', 'Y_DIM', 'TRIAL_NO',
......@@ -66,14 +72,22 @@ def main(args):
for trials in range(trial_start, n_trials + 1):
env_file = f"envs-100-999/envs/Level_{trials}.pkl"
# env_file = f"../env_configs/test-envs-small/Test_0/Level_{trials}.mpk"
# file = f"../env_configs/actions-small/Test_0/Level_{trials}.mpk"
file = f"envs-100-999/actions/envs/Level_{trials}.json"
if not os.path.isfile(env_file) or not os.path.isfile(file):
print("Missing file!", env_file, file)
continue
step = 0
obs_builder_object = TreeObsForRailEnv(max_depth=tree_depth,
predictor=ShortestPathPredictorForRailEnv(
max_depth))
env_file = f"../env_configs/test-envs-small/Test_0/Level_{trials}.mpk"
env = RailEnv(width=1, height=1,
rail_generator=rail_from_file(env_file),
schedule_generator=schedule_from_file(env_file),
......@@ -88,8 +102,6 @@ def main(args):
random_seed=1001
)
file = f"../env_configs/actions-small/Test_0/Level_{trials}.mpk"
with open(file, "r") as files:
expert_actions = json.load(files)
......@@ -244,7 +256,7 @@ def main(args):
step,
np.mean(reward_window),
np.mean(scores_window),
100 * np.mean(done_window)), end=" ")
100 * np.mean(done_window)))
if visuals:
env_renderer.close_window()
......
......@@ -8,6 +8,6 @@ dependencies:
- pyhumps==1.3.1
- gputil==1.4.0
- pyhumps==1.3.1
- wandb==0.8.35
- wandb==0.8.36
- ray[rllib]==0.8.5
- tensorflow==2.1.0
\ No newline at end of file
......@@ -9,6 +9,6 @@ dependencies:
- pyhumps==1.3.1
- gputil==1.4.0
- pyhumps==1.3.1
- wandb==0.8
- wandb==0.8.36
- ray[rllib]==0.8.5
- tensorflow==2.1.0
\ No newline at end of file
......@@ -10,5 +10,5 @@ dependencies:
- pyhumps==1.3.1
- gputil==1.4.0
- pyhumps==1.3.1
- wandb==0.8
- wandb==0.8.36
- ray[rllib]==0.8.5
\ No newline at end of file
......@@ -10,5 +10,5 @@ dependencies:
- pyhumps==1.3.1
- gputil==1.4.0
- pyhumps==1.3.1
- wandb==0.8
- wandb==0.8.36
- ray[rllib]==0.8.5
\ No newline at end of file
# ADRIAN_V0
# config shared by Adrian, without malfunctions
width: 30
height: 30
number_of_agents: 5
max_num_cities: 40
max_rails_between_cities: 2
max_rails_in_city: 8
grid_mode: False
seed: 0
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
\ No newline at end of file
......@@ -3,9 +3,34 @@ import numpy as np
from flatland.core.env import Environment
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.envs.observations import GlobalObsForRailEnv
from flatland.core.grid import grid4
from envs.flatland.observations import Observation, register_obs
'''
A 2-d array matrix on-hot encoded similar to tf.one_hot function
https://stackoverflow.com/questions/36960320/convert-a-2d-matrix-to-a-3d-one-hot-matrix-numpy/36960495
'''
def one_hot2d(arr,depth):
return (np.arange(depth) == arr[...,None]).astype(int)
def preprocess_obs(obs):
transition_map, agents_state, targets = obs
new_agents_state = agents_state.transpose([2,0,1])
*states, = new_agents_state
processed_agents_state_layers = []
for i, feature_layer in enumerate(states):
if i in {0, 1}: # agent direction (categorical)
# feature_layer = tf.one_hot(tf.cast(feature_layer, tf.int32), depth=len(grid4.Grid4TransitionsEnum) + 1,
# dtype=tf.float32).numpy()
# Numpy Version
feature_layer = one_hot2d(feature_layer, depth=len(grid4.Grid4TransitionsEnum) + 1)
elif i in {2, 4}: # counts
feature_layer = np.expand_dims(np.log(feature_layer + 1), axis=-1)
else: # well behaved scalars
feature_layer = np.expand_dims(feature_layer, axis=-1)
processed_agents_state_layers.append(feature_layer)
return np.concatenate([transition_map, targets] + processed_agents_state_layers, axis=-1)
@register_obs("global")
class GlobalObservation(Observation):
......@@ -20,11 +45,7 @@ class GlobalObservation(Observation):
def observation_space(self) -> gym.Space:
grid_shape = (self._config['max_width'], self._config['max_height'])
return gym.spaces.Tuple([
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (16,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (5,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (2,), dtype=np.float32),
])
return gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (31,), dtype=np.float32)
class PaddedGlobalObsForRailEnv(ObservationBuilder):
......@@ -47,7 +68,7 @@ class PaddedGlobalObsForRailEnv(ObservationBuilder):
pad_height, pad_width = self._max_height - height, self._max_width - width
obs[1] = obs[1] + 1 # get rid of -1
assert pad_height >= 0 and pad_width >= 0
return tuple([
return preprocess_obs(tuple([
np.pad(o, ((0, pad_height), (0, pad_height), (0, 0)), constant_values=0)
for o in obs
])
]))
import gym
import numpy as np
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import RailEnv
from envs.flatland.observations import Observation, register_obs
@register_obs("shortest_path")
class ShortestPathObservation(Observation):
def __init__(self, config) -> None:
super().__init__(config)
self._config = config
self._builder = ShortestPathForRailEnv(encode_one_hot=True)
def builder(self) -> ObservationBuilder:
return self._builder
def observation_space(self) -> gym.Space:
return gym.spaces.Tuple([
gym.spaces.Box(low=0, high=1, shape=(4,)), # shortest path direction (one-hot)
gym.spaces.Box(low=0, high=1, shape=(1,)), # shortest path distance to target
gym.spaces.Box(low=0, high=1, shape=(1,)), # conflict when following shortest path (1=true, 0=false)
gym.spaces.Box(low=0, high=1, shape=(4,)), # other path direction (all zero if not available)
gym.spaces.Box(low=0, high=1, shape=(1,)), # other path direction (zero if not available)
gym.spaces.Box(low=0, high=1, shape=(1,)), # conflict when following other path (1=true, 0=false)
])
class ShortestPathForRailEnv(ObservationBuilder):
def __init__(self, encode_one_hot=True):
super().__init__()
self._encode_one_hot = encode_one_hot
def reset(self):
pass
def get(self, handle: int = 0):
self.env: RailEnv = self.env
agent = self.env.agents[handle]
if agent.status == RailAgentStatus.READY_TO_DEPART:
agent_virtual_position = agent.initial_position
elif agent.status == RailAgentStatus.ACTIVE:
agent_virtual_position = agent.position
elif agent.status == RailAgentStatus.DONE:
agent_virtual_position = agent.target
else:
return None
directions = list(range(4))
possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
distance_map = self.env.distance_map.get()
nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool)
max_distance = np.max(distance_map[nan_inf_mask])
assert not np.isnan(max_distance)
assert max_distance != np.inf
possible_steps = []
# look in all directions for possible moves
for movement in directions:
if possible_transitions[movement]:
next_move = movement
pos = get_new_position(agent_virtual_position, movement)
distance = distance_map[agent.handle][pos + (movement,)] # new distance to target
distance = max_distance if (distance == np.inf or np.isnan(distance)) else distance # TODO: why does this happen?
# look ahead if there is an agent between the agent and the next intersection
# Todo: currently any train between the agent and the next intersection is reported. This includes
# those that are moving away from the agent and therefore are not really conflicting. Will be improved.
conflict = self.env.agent_positions[pos] != -1
next_possible_moves = self.env.rail.get_transitions(*pos, movement)
while np.count_nonzero(next_possible_moves) == 1 and not conflict:
movement = np.argmax(next_possible_moves)
pos = get_new_position(pos, movement)
conflict = self.env.agent_positions[pos] != -1
next_possible_moves = self.env.rail.get_transitions(*pos, movement)
if self._encode_one_hot:
next_move_one_hot = np.zeros(len(directions))
next_move_one_hot[next_move] = 1
next_move = next_move_one_hot
possible_steps.append((next_move, [distance/max_distance], [int(conflict)]))
if len(possible_steps) == 1:
# print(possible_steps[0] + (np.zeros(len(directions)), [.0], [0]))
return possible_steps[0] + (np.zeros(len(directions)), [.0], [0])
elif len(possible_steps) == 2:
possible_steps = sorted(possible_steps, key=lambda step: step[1]) # sort by distance, ascending
# print(possible_steps[0] + possible_steps[1])
return possible_steps[0] + possible_steps[1]
else:
raise ValueError(f"More than two possibles steps at {agent_virtual_position}. Looks like a bug.")
import logging
import random
from typing import NamedTuple
from flatland.envs.malfunction_generators import malfunction_from_params
from flatland.envs.rail_env import RailEnv
# from flatland.envs.rail_env import RailEnv
from envs.flatland.utils.gym_env_wrappers import FlatlandRenderWrapper as RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
MalfunctionParameters = NamedTuple('MalfunctionParameters', [('malfunction_rate', float), ('min_duration', int), ('max_duration', int)])
def random_sparse_env_small(random_seed, max_width, max_height, observation_builder):
random.seed(random_seed)
......@@ -24,6 +28,9 @@ def random_sparse_env_small(random_seed, max_width, max_height, observation_buil
max_rails_between_cities=max_rails_between_cities,
max_rails_in_city=max_rails_in_cities)
# new version:
# stochastic_data = MalfunctionParameters(malfunction_rate, malfunction_min_duration, malfunction_max_duration)
stochastic_data = {'malfunction_rate': malfunction_rate, 'min_duration': malfunction_min_duration,
'max_duration': malfunction_max_duration}
......@@ -31,10 +38,17 @@ def random_sparse_env_small(random_seed, max_width, max_height, observation_buil
while width <= max_width and height <= max_height:
try:
return RailEnv(width=width, height=height, rail_generator=rail_generator,
schedule_generator=schedule_generator, number_of_agents=nr_trains,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
obs_builder_object=observation_builder, remove_agents_at_target=False)
env = RailEnv(width=width, height=height, rail_generator=rail_generator,
schedule_generator=schedule_generator, number_of_agents=nr_trains,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
obs_builder_object=observation_builder, remove_agents_at_target=False)
print("[{}] {}x{} {} cities {} trains, max {} rails between cities, max {} rails in cities. Malfunction rate {}, {} to {} steps.".format(
random_seed, width, height, nr_cities, nr_trains, max_rails_between_cities,
max_rails_in_cities, malfunction_rate, malfunction_min_duration, malfunction_max_duration
))
return env
except ValueError as e:
logging.error(f"Error: {e}")
width += 5
......@@ -42,4 +56,3 @@ def random_sparse_env_small(random_seed, max_width, max_height, observation_buil
logging.info("Try again with larger env: (w,h):", width, height)
logging.error(f"Unable to generate env with seed={random_seed}, max_width={max_height}, max_height={max_height}")
return None
......@@ -2,8 +2,8 @@ from collections import defaultdict
from typing import Dict, NamedTuple, Any, Optional
import gym
from flatland.envs.rail_env import RailEnv, RailEnvActions
from gym import wrappers
from flatland.envs.rail_env import RailEnv,RailEnvActions
class StepOutput(NamedTuple):
obs: Dict[int, Any] # depends on observation builder
......@@ -13,6 +13,13 @@ class StepOutput(NamedTuple):
class FlatlandGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 10,
'semantics.autoreset': True
}
def __init__(self,
rail_env: RailEnv,
observation_space: gym.spaces.Space,
......@@ -29,10 +36,7 @@ class FlatlandGymEnv(gym.Env):
self.action_space = gym.spaces.Discrete(5)
self.observation_space = observation_space
if render:
from flatland.utils.rendertools import RenderTool
self.renderer = RenderTool(self.rail_env, gl="PILSVG")
else:
self.renderer = None
self.rail_env.set_renderer(render)
def step(self, action_dict: Dict[int, RailEnvActions]) -> StepOutput:
d, r, o = None, None, None
......@@ -42,9 +46,6 @@ class FlatlandGymEnv(gym.Env):
# The observation is `None` if an agent is done or malfunctioning.
obs, rewards, dones, infos = self.rail_env.step(action_dict)
if self.renderer is not None:
self.renderer.render_env(show=True, show_predictions=True, show_observations=False)
d, r, o = dict(), dict(), dict()
for agent, done in dones.items():
if agent != '__all__' and not agent in obs:
......@@ -83,9 +84,10 @@ class FlatlandGymEnv(gym.Env):
obs, infos = self.rail_env.reset(regenerate_rail=self._regenerate_rail_on_reset,
regenerate_schedule=self._regenerate_schedule_on_reset,
random_seed=random_seed)
if self.renderer is not None:
self.renderer.reset()
return {k: o for k, o in obs.items() if not k == '__all__'}
def render(self, mode='human'):
raise NotImplementedError
return self.rail_env.render(mode)
def close(self):
self.rail_env.close()
This diff is collapsed.
from gym.wrappers import monitor
from ray.rllib import MultiAgentEnv
def _after_step(self, observation, reward, done, info):
if not self.enabled: return done
if type(done)== dict:
_done_check = done['__all__']
else:
_done_check = done
if _done_check and self.env_semantics_autoreset:
# For envs with BlockingReset wrapping VNCEnv, this observation will be the first one of the new episode
self.reset_video_recorder()
self.episode_id += 1
self._flush()
# Record stats - Disabled as it causes error in multi-agent set up
# self.stats_recorder.after_step(observation, reward, done, info)
# Record video
self.video_recorder.capture_frame()
return done
class FlatlandBase(MultiAgentEnv):