From 088c5f168a2aedbb171196457bae652503da2ef7 Mon Sep 17 00:00:00 2001
From: Guillaume Mollard <>
Date: Wed, 29 May 2019 10:19:53 +0200
Subject: [PATCH] Convolutional model and preprocessor added

 RLLib_training/               | 101 ++++++++++++++++++ =>} |  10 ++
 .../conv_model_test/config.gin                |  21 ++++
 RLLib_training/            |  39 ++++---
 4 files changed, 159 insertions(+), 12 deletions(-)
 create mode 100644 RLLib_training/
 rename RLLib_training/{ =>} (81%)
 create mode 100644 RLLib_training/experiment_configs/conv_model_test/config.gin

diff --git a/RLLib_training/ b/RLLib_training/
new file mode 100644
index 0000000..81f5223
--- /dev/null
+++ b/RLLib_training/
@@ -0,0 +1,101 @@
+from ray.rllib.models import ModelCatalog, Model
+from ray.rllib.models.misc import normc_initializer
+import tensorflow as tf
+class ConvModelGlobalObs(Model):
+    def _build_layers_v2(self, input_dict, num_outputs, options):
+        """Define the layers of a custom model.
+        Arguments:
+            input_dict (dict): Dictionary of input tensors, including "obs",
+                "prev_action", "prev_reward", "is_training".
+            num_outputs (int): Output tensor must be of size
+                [BATCH_SIZE, num_outputs].
+            options (dict): Model options.
+        Returns:
+            (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
+                and [BATCH_SIZE, desired_feature_size].
+        When using dict or tuple observation spaces, you can access
+        the nested sub-observation batches here as well:
+        Examples:
+            >>> print(input_dict)
+            {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
+             'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
+             'is_training': <tf.Tensor shape=(), dtype=bool>,
+             'obs': (observation, features)
+        """
+        # Convolutional Layer #1
+        Relu = tf.nn.relu
+        BatchNormalization = tf.layers.batch_normalization
+        Dropout = tf.layers.dropout
+        Dense = tf.contrib.layers.fully_connected
+        map_size = int(input_dict['obs'][0].shape[0])
+        N_CHANNELS = 96
+        conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2)))
+        # conv2 = Relu(self.conv2d(conv1, 64, 'valid'))
+        # conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
+        conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)])
+        # conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1)
+        s_fc1 = Relu(Dense(conv2_flat, 256))
+        layerN_minus_1 = Relu(Dense(s_fc1, 64))
+        layerN = Dense(layerN_minus_1, num_outputs)
+        return layerN, layerN_minus_1
+    def conv2d(self, x, out_channels, padding, strides=(1,1)):
+        return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding,
+                                use_bias=True, strides=strides)
+class LightModel(Model):
+    def _build_layers_v2(self, input_dict, num_outputs, options):
+        """Define the layers of a custom model.
+        Arguments:
+            input_dict (dict): Dictionary of input tensors, including "obs",
+                "prev_action", "prev_reward", "is_training".
+            num_outputs (int): Output tensor must be of size
+                [BATCH_SIZE, num_outputs].
+            options (dict): Model options.
+        Returns:
+            (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
+                and [BATCH_SIZE, desired_feature_size].
+        When using dict or tuple observation spaces, you can access
+        the nested sub-observation batches here as well:
+        Examples:
+            >>> print(input_dict)
+            {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
+             'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
+             'is_training': <tf.Tensor shape=(), dtype=bool>,
+             'obs': (observation, features)
+        """
+        # print(input_dict)
+        # Convolutional Layer #1
+        self.sess = tf.get_default_session()
+        Relu = tf.nn.relu
+        BatchNormalization = tf.layers.batch_normalization
+        Dropout = tf.layers.dropout
+        Dense = tf.contrib.layers.fully_connected
+        #conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid'))
+        conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid'))
+        conv2 = Relu(self.conv2d(conv1, 16, 'valid'))
+        # conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
+        conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2])
+        #conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1)
+        s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0)))
+        # layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False))
+        layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01))
+        return layerN, s_fc1
+    def conv2d(self, x, out_channels, padding):
+        return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True)
+                                # weights_initializer=normc_initializer(1.0))
diff --git a/RLLib_training/ b/RLLib_training/
similarity index 81%
rename from RLLib_training/
rename to RLLib_training/
index 7d23b8c..cc58a0d 100644
--- a/RLLib_training/
+++ b/RLLib_training/
@@ -59,6 +59,16 @@ class CustomPreprocessor(Preprocessor):
             return observation
+class ConvModelPreprocessor(Preprocessor):
+    def _init_shape(self, obs_space, options):
+        out_shape = (obs_space[0].shape[0], obs_space[0].shape[1], sum([space.shape[2] for space in obs_space]))
+        return out_shape
+    def transform(self, observation):
+        return np.concatenate([observation[0],
+                               observation[1],
+                               observation[2]], axis=2)
 # class NoPreprocessor:
diff --git a/RLLib_training/experiment_configs/conv_model_test/config.gin b/RLLib_training/experiment_configs/conv_model_test/config.gin
new file mode 100644
index 0000000..a55633e
--- /dev/null
+++ b/RLLib_training/experiment_configs/conv_model_test/config.gin
@@ -0,0 +1,21 @@ = "observation_benchmark_results"
+run_experiment.num_iterations = 1002
+run_experiment.save_every = 100
+run_experiment.hidden_sizes = [32, 32]
+run_experiment.map_width = 20
+run_experiment.map_height = 20
+run_experiment.n_agents = 5
+run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_{config[n_agents]}_agents_conv_model_{config[conv_model]}_"
+run_experiment.horizon = 50
+run_experiment.seed = 123
+run_experiment.conv_model = {"grid_search": [True, False]}
+run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]}
+TreeObsForRailEnv.max_depth = 2
+LocalObsForRailEnv.view_radius = 5
+run_experiment.entropy_coeff = 0.01
diff --git a/RLLib_training/ b/RLLib_training/
index 330aced..223b905 100644
--- a/RLLib_training/
+++ b/RLLib_training/
@@ -1,4 +1,4 @@
-from baselines.RailEnvRLLibWrapper import RailEnvRLLibWrapper
+from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper
 import gym
@@ -14,7 +14,9 @@ from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
 from ray.rllib.models import ModelCatalog
 from ray.tune.logger import pretty_print
-from baselines.CustomPreprocessor import CustomPreprocessor
+from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
+from baselines.RLLib_training.custom_models import ConvModelGlobalObs
 import ray
@@ -28,15 +30,20 @@ import gin
 from ray import tune
 from ray.rllib.utils.seed import seed as set_seed
-from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, LocalObsForRailEnv
+from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\
+                                       LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
 from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor
 ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
 ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
+ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor)
+ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
 ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000)
@@ -70,13 +77,16 @@ def train(config, reporter):
         obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
         preprocessor = "tree_obs_prep"
-    elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
+    elif isinstance(config["obs_builder"], GlobalObsForRailEnv) or \
+         isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent):
         obs_space = gym.spaces.Tuple((
             gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 3)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 4)),
-            gym.spaces.Box(low=0, high=1, shape=(4,))))
-        preprocessor = "global_obs_prep"
+            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)),
+            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
+        if config['conv_model']:
+            preprocessor = "conv_obs_prep"
+        else:
+            preprocessor = "global_obs_prep"
     elif isinstance(config["obs_builder"], LocalObsForRailEnv):
         view_radius = config["obs_builder"].view_radius
@@ -104,7 +114,11 @@ def train(config, reporter):
     # Trainer configuration
     trainer_config = DEFAULT_CONFIG.copy()
-    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
+    if config['conv_model']:
+        trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor}
+    else:
+        trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
     trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
                                   "policy_mapping_fn": policy_mapping_fn,
                                   "policies_to_train": list(policy_graphs.keys())}
@@ -151,7 +165,7 @@ def train(config, reporter):
 def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                    map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
-                   entropy_coeff, seed):
+                   entropy_coeff, seed, conv_model):
@@ -167,7 +181,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                 'policy_folder_name': policy_folder_name,
                 "obs_builder": obs_builder,
                 "entropy_coeff": entropy_coeff,
-                "seed": seed
+                "seed": seed,
+                "conv_model": conv_model
             "cpu": 2,
@@ -179,6 +194,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
 if __name__ == '__main__':
-    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/experiment_configs/observation_benchmark'  # To Modify
+    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test'  # To Modify
     gin.parse_config_file(dir + '/config.gin')