Commit e5dcbcf6 authored by nilabha's avatar nilabha
Browse files

Merge branch 'global_obs_IL' into 'master'

Global obs IL changes to master

See merge request !6
parents d3fdcef7 7e5058c2
......@@ -3,9 +3,34 @@ import numpy as np
from flatland.core.env import Environment
from flatland.core.env_observation_builder import ObservationBuilder
from flatland.envs.observations import GlobalObsForRailEnv
from flatland.core.grid import grid4
from envs.flatland.observations import Observation, register_obs
'''
A 2-d array matrix on-hot encoded similar to tf.one_hot function
https://stackoverflow.com/questions/36960320/convert-a-2d-matrix-to-a-3d-one-hot-matrix-numpy/36960495
'''
def one_hot2d(arr,depth):
return (np.arange(depth) == arr[...,None]).astype(int)
def preprocess_obs(obs):
transition_map, agents_state, targets = obs
new_agents_state = agents_state.transpose([2,0,1])
*states, = new_agents_state
processed_agents_state_layers = []
for i, feature_layer in enumerate(states):
if i in {0, 1}: # agent direction (categorical)
# feature_layer = tf.one_hot(tf.cast(feature_layer, tf.int32), depth=len(grid4.Grid4TransitionsEnum) + 1,
# dtype=tf.float32).numpy()
# Numpy Version
feature_layer = one_hot2d(feature_layer, depth=len(grid4.Grid4TransitionsEnum) + 1)
elif i in {2, 4}: # counts
feature_layer = np.expand_dims(np.log(feature_layer + 1), axis=-1)
else: # well behaved scalars
feature_layer = np.expand_dims(feature_layer, axis=-1)
processed_agents_state_layers.append(feature_layer)
return np.concatenate([transition_map, targets] + processed_agents_state_layers, axis=-1)
@register_obs("global")
class GlobalObservation(Observation):
......@@ -20,11 +45,7 @@ class GlobalObservation(Observation):
def observation_space(self) -> gym.Space:
grid_shape = (self._config['max_width'], self._config['max_height'])
return gym.spaces.Tuple([
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (16,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (5,), dtype=np.float32),
gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (2,), dtype=np.float32),
])
return gym.spaces.Box(low=0, high=np.inf, shape=grid_shape + (31,), dtype=np.float32)
class PaddedGlobalObsForRailEnv(ObservationBuilder):
......@@ -47,7 +68,7 @@ class PaddedGlobalObsForRailEnv(ObservationBuilder):
pad_height, pad_width = self._max_height - height, self._max_width - width
obs[1] = obs[1] + 1 # get rid of -1
assert pad_height >= 0 and pad_width >= 0
return tuple([
return preprocess_obs(tuple([
np.pad(o, ((0, pad_height), (0, pad_height), (0, 0)), constant_values=0)
for o in obs
])
]))
flatland-random-sparse-small-global-marwil-fc-ppo:
run: MARWIL
env: flatland_sparse
stop:
timesteps_total: 1000000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
beta:
grid_search: [0,0.25,0.5,0.75, 1] # compare IL (beta=0) vs MARWIL [0,0.25,0.5,0.75, 1]
input: /tmp/flatland
input_evaluation: [is, wis, simulation]
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
observation: global
observation_config:
max_width: 45
max_height: 45
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: neurips2020-flatland-baselines
entity: nilabha2007
tags: ["small_v0", "global_obs", "MARWIL"] # TODO should be set programmatically
model:
custom_model: global_obs_model
custom_options:
architecture: impala
architecture_options:
residual_layers: [[16,2], [32, 4]]
## Instructions to run
Global obs requirea a lot of memory to run. An experiment was run with the below config
```bash python trainImitate.py -f MARWIL.yaml --ray-object-store-memory 55000000000 --ray-memory 55000000000 --ray-redis-max-memory 55000000000 ```
Performance was poor with ~25-30% completion rate
......@@ -17,12 +17,12 @@ class GlobalObsModel(TFModelV2):
self._mask_unavailable_actions = self._options.get("mask_unavailable_actions", False)
if self._mask_unavailable_actions:
obs_space = obs_space.original_space['obs']
obs_space = obs_space['obs']
else:
obs_space = obs_space.original_space
obs_space = obs_space
observations = [tf.keras.layers.Input(shape=o.shape) for o in obs_space]
processed_observations = preprocess_obs(tuple(observations))
observations = tf.keras.layers.Input(shape=obs_space.shape)
processed_observations = observations # preprocess_obs(tuple(observations))
if self._options['architecture'] == 'nature':
conv_out = NatureCNN(activation_out=True, **self._options['architecture_options'])(processed_observations)
......@@ -34,7 +34,7 @@ class GlobalObsModel(TFModelV2):
baseline = tf.keras.layers.Dense(units=1)(conv_out)
self._model = tf.keras.Model(inputs=observations, outputs=[logits, baseline])
self.register_variables(self._model.variables)
self._model.summary()
# self._model.summary()
def forward(self, input_dict, state, seq_lens):
# obs = preprocess_obs(input_dict['obs'])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment