Commit efcb9c75 authored by hagrid67's avatar hagrid67

Merge branch 'master' of gitlab.aicrowd.com:flatland/neurips2020-flatland-baselines

parents 2678094e ae94980a
......@@ -127,4 +127,8 @@ dmypy.json
.pyre/
# misc
.idea
\ No newline at end of file
.idea
# custom extras
small_tree_video/
test.yaml
......@@ -3,9 +3,34 @@ import numpy as np
from flatland.core.env import Environment
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.envs.observations import GlobalObsForRailEnv
from flatland.core.grid import grid4
from envs.flatland.observations import Observation, register_obs
'''
A 2-d array matrix on-hot encoded similar to tf.one_hot function
https://stackoverflow.com/questions/36960320/convert-a-2d-matrix-to-a-3d-one-hot-matrix-numpy/36960495
'''
def one_hot2d(arr,depth):
return (np.arange(depth) == arr[...,None]).astype(int)
def preprocess_obs(obs):
transition_map, agents_state, targets = obs
new_agents_state = agents_state.transpose([2,0,1])
*states, = new_agents_state
processed_agents_state_layers = []
for i, feature_layer in enumerate(states):
if i in {0, 1}: # agent direction (categorical)
# feature_layer = tf.one_hot(tf.cast(feature_layer, tf.int32), depth=len(grid4.Grid4TransitionsEnum) + 1,
# dtype=tf.float32).numpy()
# Numpy Version
feature_layer = one_hot2d(feature_layer, depth=len(grid4.Grid4TransitionsEnum) + 1)
elif i in {2, 4}: # counts
feature_layer = np.expand_dims(np.log(feature_layer + 1), axis=-1)
else: # well behaved scalars
feature_layer = np.expand_dims(feature_layer, axis=-1)
processed_agents_state_layers.append(feature_layer)
return np.concatenate([transition_map, targets] + processed_agents_state_layers, axis=-1)
@register_obs("global")
class GlobalObservation(Observation):
......@@ -20,11 +45,7 @@ class GlobalObservation(Observation):
def observation_space(self) -> gym.Space:
grid_shape = (self._config['max_width'], self._config['max_height'])
return gym.spaces.Tuple([
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (16,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (5,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (2,), dtype=np.float32),
])
return gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (31,), dtype=np.float32)
class PaddedGlobalObsForRailEnv(ObservationBuilder):
......@@ -47,7 +68,7 @@ class PaddedGlobalObsForRailEnv(ObservationBuilder):
pad_height, pad_width = self._max_height - height, self._max_width - width
obs[1] = obs[1] + 1 # get rid of -1
assert pad_height >= 0 and pad_width >= 0
return tuple([
return preprocess_obs(tuple([
np.pad(o, ((0, pad_height), (0, pad_height), (0, 0)), constant_values=0)
for o in obs
])
]))
import gym
import numpy as np
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.core.grid.grid4_utils import get_new_position
from flatland.envs.agent_utils import RailAgentStatus
from flatland.envs.rail_env import RailEnv
from envs.flatland.observations import Observation, register_obs
@register_obs("shortest_path")
class ShortestPathObservation(Observation):
def __init__(self, config) -> None:
super().__init__(config)
self._config = config
self._builder = ShortestPathForRailEnv(encode_one_hot=True)
def builder(self) -> ObservationBuilder:
return self._builder
def observation_space(self) -> gym.Space:
return gym.spaces.Tuple([
gym.spaces.Box(low=0, high=1, shape=(4,)), # shortest path direction (one-hot)
gym.spaces.Box(low=0, high=1, shape=(1,)), # shortest path distance to target
gym.spaces.Box(low=0, high=1, shape=(1,)), # conflict when following shortest path (1=true, 0=false)
gym.spaces.Box(low=0, high=1, shape=(4,)), # other path direction (all zero if not available)
gym.spaces.Box(low=0, high=1, shape=(1,)), # other path direction (zero if not available)
gym.spaces.Box(low=0, high=1, shape=(1,)), # conflict when following other path (1=true, 0=false)
])
class ShortestPathForRailEnv(ObservationBuilder):
def __init__(self, encode_one_hot=True):
super().__init__()
self._encode_one_hot = encode_one_hot
def reset(self):
pass
def get(self, handle: int = 0):
self.env: RailEnv = self.env
agent = self.env.agents[handle]
if agent.status == RailAgentStatus.READY_TO_DEPART:
agent_virtual_position = agent.initial_position
elif agent.status == RailAgentStatus.ACTIVE:
agent_virtual_position = agent.position
elif agent.status == RailAgentStatus.DONE:
agent_virtual_position = agent.target
else:
return None
directions = list(range(4))
possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
distance_map = self.env.distance_map.get()
nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool)
max_distance = np.max(distance_map[nan_inf_mask])
assert not np.isnan(max_distance)
assert max_distance != np.inf
possible_steps = []
# look in all directions for possible moves
for movement in directions:
if possible_transitions[movement]:
next_move = movement
pos = get_new_position(agent_virtual_position, movement)
distance = distance_map[agent.handle][pos + (movement,)] # new distance to target
distance = max_distance if (distance == np.inf or np.isnan(distance)) else distance # TODO: why does this happen?
# look ahead if there is an agent between the agent and the next intersection
# Todo: currently any train between the agent and the next intersection is reported. This includes
# those that are moving away from the agent and therefore are not really conflicting. Will be improved.
conflict = self.env.agent_positions[pos] != -1
next_possible_moves = self.env.rail.get_transitions(*pos, movement)
while np.count_nonzero(next_possible_moves) == 1 and not conflict:
movement = np.argmax(next_possible_moves)
pos = get_new_position(pos, movement)
conflict = self.env.agent_positions[pos] != -1
next_possible_moves = self.env.rail.get_transitions(*pos, movement)
if self._encode_one_hot:
next_move_one_hot = np.zeros(len(directions))
next_move_one_hot[next_move] = 1
next_move = next_move_one_hot
possible_steps.append((next_move, [distance/max_distance], [int(conflict)]))
if len(possible_steps) == 1:
# print(possible_steps[0] + (np.zeros(len(directions)), [.0], [0]))
return possible_steps[0] + (np.zeros(len(directions)), [.0], [0])
elif len(possible_steps) == 2:
possible_steps = sorted(possible_steps, key=lambda step: step[1]) # sort by distance, ascending
# print(possible_steps[0] + possible_steps[1])
return possible_steps[0] + possible_steps[1]
else:
raise ValueError(f"More than two possibles steps at {agent_virtual_position}. Looks like a bug.")
......@@ -3,7 +3,8 @@ import random
from typing import NamedTuple
from flatland.envs.malfunction_generators import malfunction_from_params
from flatland.envs.rail_env import RailEnv
# from flatland.envs.rail_env import RailEnv
from envs.flatland.utils.gym_env_wrappers import FlatlandRenderWrapper as RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
......
......@@ -2,8 +2,8 @@ from collections import defaultdict
from typing import Dict, NamedTuple, Any, Optional
import gym
from flatland.envs.rail_env import RailEnv, RailEnvActions
from gym import wrappers
from flatland.envs.rail_env import RailEnv,RailEnvActions
class StepOutput(NamedTuple):
obs: Dict[int, Any] # depends on observation builder
......@@ -13,6 +13,13 @@ class StepOutput(NamedTuple):
class FlatlandGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 10,
'semantics.autoreset': True
}
def __init__(self,
rail_env: RailEnv,
observation_space: gym.spaces.Space,
......@@ -29,10 +36,7 @@ class FlatlandGymEnv(gym.Env):
self.action_space = gym.spaces.Discrete(5)
self.observation_space = observation_space
if render:
from flatland.utils.rendertools import RenderTool
self.renderer = RenderTool(self.rail_env, gl="PILSVG")
else:
self.renderer = None
self.rail_env.set_renderer(render)
def step(self, action_dict: Dict[int, RailEnvActions]) -> StepOutput:
d, r, o = None, None, None
......@@ -42,9 +46,6 @@ class FlatlandGymEnv(gym.Env):
# The observation is `None` if an agent is done or malfunctioning.
obs, rewards, dones, infos = self.rail_env.step(action_dict)
if self.renderer is not None:
self.renderer.render_env(show=True, show_predictions=True, show_observations=False)
d, r, o = dict(), dict(), dict()
for agent, done in dones.items():
if agent != '__all__' and not agent in obs:
......@@ -83,9 +84,10 @@ class FlatlandGymEnv(gym.Env):
obs, infos = self.rail_env.reset(regenerate_rail=self._regenerate_rail_on_reset,
regenerate_schedule=self._regenerate_schedule_on_reset,
random_seed=random_seed)
if self.renderer is not None:
self.renderer.reset()
return {k: o for k, o in obs.items() if not k == '__all__'}
def render(self, mode='human'):
raise NotImplementedError
return self.rail_env.render(mode)
def close(self):
self.rail_env.close()
This diff is collapsed.
from gym.wrappers import monitor
from ray.rllib import MultiAgentEnv
def _after_step(self, observation, reward, done, info):
if not self.enabled: return done
if type(done)== dict:
_done_check = done['__all__']
else:
_done_check = done
if _done_check and self.env_semantics_autoreset:
# For envs with BlockingReset wrapping VNCEnv, this observation will be the first one of the new episode
self.reset_video_recorder()
self.episode_id += 1
self._flush()
# Record stats - Disabled as it causes error in multi-agent set up
# self.stats_recorder.after_step(observation, reward, done, info)
# Record video
self.video_recorder.capture_frame()
return done
class FlatlandBase(MultiAgentEnv):
reward_range = (-float('inf'), float('inf'))
spec = None
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 10,
'semantics.autoreset': True
}
def step(self, action_dict):
obs, all_rewards, done, info = self._env.step(action_dict)
if done['__all__']:
self.close()
return obs, all_rewards, done, info
def reset(self, *args, **kwargs):
if self._env_config.get('render', None):
env_name="flatland"
monitor.FILE_PREFIX = env_name
folder = self._env_config.get('video_dir',env_name)
monitor.Monitor._after_step =_after_step
self._env = monitor.Monitor(self._env, folder, resume=True)
return self._env.reset(*args, **kwargs)
def render(self,mode='human'):
return self._env.render(self._env_config.get('render'))
def close(self):
self._env.close()
import random
import gym
from ray.rllib import MultiAgentEnv
from envs.flatland.utils.env_generators import random_sparse_env_small
from envs.flatland.observations import make_obs
from envs.flatland.utils.gym_env import FlatlandGymEnv
from envs.flatland.utils.gym_env_wrappers import SkipNoChoiceCellsWrapper, AvailableActionsWrapper
from envs.flatland.utils.gym_env_wrappers import SkipNoChoiceCellsWrapper, AvailableActionsWrapper, DeadlockWrapper, \
SparseRewardWrapper, ShortestPathActionWrapper, DeadlockResolutionWrapper
from envs.flatland_base import FlatlandBase
class FlatlandRandomSparseSmall(FlatlandBase):
class FlatlandRandomSparseSmall(MultiAgentEnv):
def __init__(self, env_config) -> None:
super().__init__()
self._env_config = env_config
......@@ -27,12 +28,22 @@ class FlatlandRandomSparseSmall(MultiAgentEnv):
self._env = FlatlandGymEnv(
rail_env=self._launch(),
observation_space=self._observation.observation_space(),
# render=env_config['render'], # TODO need to fix gl compatibility first
render=env_config.get('render'),
regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'],
regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset']
)
if env_config['observation'] == 'shortest_path':
self._env = ShortestPathActionWrapper(self._env)
if env_config.get('sparse_reward', False):
self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1),
not_finished_reward=env_config.get('not_finished_reward', -1))
if env_config.get('deadlock_reward', 0) != 0:
self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward'])
if env_config.get('resolve_deadlocks', False):
deadlock_reward = env_config.get('deadlock_reward', 0)
self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
if env_config.get('skip_no_choice_cells', False):
self._env = SkipNoChoiceCellsWrapper(self._env)
self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False))
if env_config.get('available_actions_obs', False):
self._env = AvailableActionsWrapper(self._env)
......@@ -67,9 +78,6 @@ class FlatlandRandomSparseSmall(MultiAgentEnv):
raise RuntimeError(f"Unable to launch env within {max_tries} tries.")
return env
def step(self, action_dict):
return self._env.step(action_dict)
def reset(self):
if self._test or (
self._env_config['reset_env_freq'] is not None
......@@ -78,4 +86,4 @@ class FlatlandRandomSparseSmall(MultiAgentEnv):
):
self._env.env = self._launch()
self._num_resets += 1
return self._env.reset(random_seed=self._next_test_seed if self._test else self._generate_random_seed())
return super().reset(random_seed=self._next_test_seed if self._test else self._generate_random_seed())
......@@ -11,7 +11,8 @@ from envs.flatland import get_generator_config
from envs.flatland.observations import make_obs
from envs.flatland.utils.gym_env import FlatlandGymEnv, StepOutput
from envs.flatland.utils.gym_env_wrappers import SkipNoChoiceCellsWrapper, AvailableActionsWrapper
from envs.flatland.utils.gym_env_wrappers import SkipNoChoiceCellsWrapper, AvailableActionsWrapper, \
ShortestPathActionWrapper, SparseRewardWrapper, DeadlockWrapper, DeadlockResolutionWrapper
class FlatlandSingle(gym.Env):
......@@ -27,8 +28,18 @@ class FlatlandSingle(gym.Env):
regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
regenerate_schedule_on_reset=self._config['regenerate_schedule_on_reset']
)
if env_config['observation'] == 'shortest_path':
self._env = ShortestPathActionWrapper(self._env)
if env_config.get('sparse_reward', False):
self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1),
not_finished_reward=env_config.get('not_finished_reward', -1))
if env_config.get('deadlock_reward', 0) != 0:
self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward'])
if env_config.get('resolve_deadlocks', False):
deadlock_reward = env_config.get('deadlock_reward', 0)
self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
if env_config.get('skip_no_choice_cells', False):
self._env = SkipNoChoiceCellsWrapper(self._env)
self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False))
if env_config.get('available_actions_obs', False):
self._env = AvailableActionsWrapper(self._env)
......
......@@ -3,23 +3,28 @@ from pprint import pprint
import gym
from flatland.envs.malfunction_generators import malfunction_from_params, no_malfunction_generator
from flatland.envs.rail_env import RailEnv
# from flatland.envs.rail_env import RailEnv
from envs.flatland.utils.gym_env_wrappers import FlatlandRenderWrapper as RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from ray.rllib import MultiAgentEnv
from envs.flatland import get_generator_config
from envs.flatland.observations import make_obs
from envs.flatland.utils.gym_env import FlatlandGymEnv
from envs.flatland.utils.gym_env_wrappers import AvailableActionsWrapper, SkipNoChoiceCellsWrapper
from envs.flatland.utils.gym_env_wrappers import AvailableActionsWrapper, SkipNoChoiceCellsWrapper, SparseRewardWrapper, \
DeadlockWrapper, ShortestPathActionWrapper, DeadlockResolutionWrapper
from envs.flatland_base import FlatlandBase
class FlatlandSparse(FlatlandBase):
class FlatlandSparse(MultiAgentEnv):
def __init__(self, env_config) -> None:
super().__init__()
# TODO implement other generators
assert env_config['generator'] == 'sparse_rail_generator'
self._env_config = env_config
self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
self._config = get_generator_config(env_config['generator_config'])
......@@ -32,18 +37,27 @@ class FlatlandSparse(MultiAgentEnv):
self._env = FlatlandGymEnv(
rail_env=self._launch(),
observation_space=self._observation.observation_space(),
# render=env_config['render'], # TODO need to fix gl compatibility first
render=env_config.get('render'),
regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
regenerate_schedule_on_reset=self._config['regenerate_schedule_on_reset']
)
if env_config['observation'] == 'shortest_path':
self._env = ShortestPathActionWrapper(self._env)
if env_config.get('sparse_reward', False):
self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1),
not_finished_reward=env_config.get('not_finished_reward', -1))
if env_config.get('deadlock_reward', 0) != 0:
self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward'])
if env_config.get('resolve_deadlocks', False):
deadlock_reward = env_config.get('deadlock_reward', 0)
self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
if env_config.get('skip_no_choice_cells', False):
self._env = SkipNoChoiceCellsWrapper(self._env)
self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False))
if env_config.get('available_actions_obs', False):
self._env = AvailableActionsWrapper(self._env)
@property
def observation_space(self) -> gym.spaces.Space:
print(self._env.observation_space)
return self._env.observation_space
@property
......@@ -86,7 +100,10 @@ class FlatlandSparse(MultiAgentEnv):
malfunction_generator_and_process_data=malfunction_generator,
obs_builder_object=self._observation.builder(),
remove_agents_at_target=False,
random_seed=self._config['seed']
random_seed=self._config['seed'],
# Should Below line be commented as here the env tries different configs,
# hence opening it can be wasteful, morever the render has to be closed
use_renderer=self._env_config.get('render')
)
env.reset()
......@@ -96,9 +113,3 @@ class FlatlandSparse(MultiAgentEnv):
logging.error("=" * 50)
return env
def step(self, action_dict):
return self._env.step(action_dict)
def reset(self):
return self._env.reset()
flatland-random-sparse-small-global-marwil-fc-ppo:
run: MARWIL
env: flatland_sparse
stop:
timesteps_total: 1000000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
beta:
grid_search: [0,0.25,0.5,0.75, 1] # compare IL (beta=0) vs MARWIL [0,0.25,0.5,0.75, 1]
input: /tmp/flatland
input_evaluation: [is, wis, simulation]
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
observation: global
observation_config:
max_width: 45
max_height: 45
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: neurips2020-flatland-baselines
entity: nilabha2007
tags: ["small_v0", "global_obs", "MARWIL"] # TODO should be set programmatically
model:
custom_model: global_obs_model
custom_options:
architecture: impala
architecture_options:
residual_layers: [[16,2], [32, 4]]
## Instructions to run
Global obs requirea a lot of memory to run. An experiment was run with the below config
```bash python trainImitate.py -f MARWIL.yaml --ray-object-store-memory 55000000000 --ray-memory 55000000000 --ray-redis-max-memory 55000000000 ```
Performance was poor with ~25-30% completion rate
flatland-sparse-small-sortest_path-fc-apex:
run: APEX
env: flatland_sparse
stop:
timesteps_total: 100000000 # 1e8
checkpoint_freq: 10
checkpoint_at_end: True
# keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
num_workers: 15
num_envs_per_worker: 5
num_gpus: 0
env_config:
observation: shortest_path
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_v0", "tree_obs", "apex", "sparse_reward", "deadlock_reward"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-sparse-small-sortest_path-fc-ppo:
run: PPO
env: flatland_sparse
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: False
# clip_param: 0.1
# vf_clip_param: 500.0
vf_clip_param: 10.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 15
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
env_config:
observation: shortest_path
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_v0", "tree_obs", "ppo", "sparse_reward", "deadlock_reward"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-sparse-small-sparse-reward-tree-fc-apex:
run: APEX
env: flatland_sparse
stop:
timesteps_total: 100000000 # 1e8
checkpoint_freq: 10
checkpoint_at_end: True
# keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
num_workers: 15
num_envs_per_worker: 5
num_gpus: 0
env_config:
sparse_reward: True
done_reward: 1
not_finished_reward: -1
deadlock_reward: -1
skip_no_choice_cells: False
available_actions_obs: False
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_v0", "tree_obs", "apex", "sparse_reward", "deadlock_reward"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False