diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..63972a8bcb2fba2edbb88a401b200e33374d4c23
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+Examples of scripts to train agents in the Flatland environment.
+
+It should be cloned inside the main flatland repository.
+
+The `torch_training` folder shows an example of how to train agents with a DQN implemented in pytorch.
+
+The `RLLib_training` folder shows an example of how to train agents with  algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib>
+
diff --git a/RLLib_training/README.md b/RLLib_training/README.md
index 63afbd4c35e0cd03683920d784048914f5b30c04..21665a54eea1595b1679ebd587392cb1d5725eea 100644
--- a/RLLib_training/README.md
+++ b/RLLib_training/README.md
@@ -1,27 +1,25 @@
 This repository allows to run Rail Environment multi agent training with the RLLib Library.
 
-It should be clone inside the main flatland repository.
-
 ## Installation:
 ```sh
 pip install ray
 pip install gin-config
 ```
 
-To start a grid search on some parameters, you can create a folder containing a config.gin file (see example in `grid_search_configs/n_agents_grid_search/config.gin`.
+To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`.
 
-Then, you can modify the config.gin file path at the end of the `grid_search_train.py` file.
+Then, you can modify the config.gin file path at the end of the `train_experiment.py` file.
 
 The results will be stored inside the folder, and the learning curves can be visualized in 
 tensorboard:
 
 ```
-tensorboard --logdir=/path/to/foler_containing_config_gin_file
+tensorboard --logdir=/path/to/folder_containing_config_gin_file
 ```
 
 ## Gin config files
 
-In each config.gin files, all the parameters, except `local_dir` of the `run_experiment` functions have to be specified.
+In each config.gin files, all the parameters of the `run_experiment` functions have to be specified.
 For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added:
 
 ```
@@ -54,4 +52,26 @@ Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv()
 
 
 
-More documentation on how to use gin-config can be found on the library github repository: https://github.com/google/gin-config
+More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config
+
+## Run an example:
+To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script:
+```
+python baselines/RLLib_training/train_experiment.py
+```
+This will load the gin config file in the folder `experiment_configs/config_examples`.
+
+To visualize the result of a training, one can load a training checkpoint and use the policy learned.
+This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script:
+
+```
+CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
+                               'checkpoint_101', 'checkpoint-101')
+```
+and load the corresponding gin config file:
+
+```
+gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin'))
+```
+
+
diff --git a/RLLib_training/custom_models.py b/RLLib_training/custom_models.py
deleted file mode 100644
index 81f5223393833986082727feee58699aaa9c60d5..0000000000000000000000000000000000000000
--- a/RLLib_training/custom_models.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from ray.rllib.models import ModelCatalog, Model
-from ray.rllib.models.misc import normc_initializer
-
-import tensorflow as tf
-
-
-class ConvModelGlobalObs(Model):
-    def _build_layers_v2(self, input_dict, num_outputs, options):
-        """Define the layers of a custom model.
-        Arguments:
-            input_dict (dict): Dictionary of input tensors, including "obs",
-                "prev_action", "prev_reward", "is_training".
-            num_outputs (int): Output tensor must be of size
-                [BATCH_SIZE, num_outputs].
-            options (dict): Model options.
-        Returns:
-            (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
-                and [BATCH_SIZE, desired_feature_size].
-        When using dict or tuple observation spaces, you can access
-        the nested sub-observation batches here as well:
-        Examples:
-            >>> print(input_dict)
-            {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
-             'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
-             'is_training': <tf.Tensor shape=(), dtype=bool>,
-             'obs': (observation, features)
-        """
-        # Convolutional Layer #1
-
-        Relu = tf.nn.relu
-        BatchNormalization = tf.layers.batch_normalization
-        Dropout = tf.layers.dropout
-        Dense = tf.contrib.layers.fully_connected
-
-        map_size = int(input_dict['obs'][0].shape[0])
-
-        N_CHANNELS = 96
-
-        conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2)))
-
-        # conv2 = Relu(self.conv2d(conv1, 64, 'valid'))
-
-        # conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
-
-        conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)])
-        # conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1)
-        s_fc1 = Relu(Dense(conv2_flat, 256))
-        layerN_minus_1 = Relu(Dense(s_fc1, 64))
-        layerN = Dense(layerN_minus_1, num_outputs)
-        return layerN, layerN_minus_1
-
-    def conv2d(self, x, out_channels, padding, strides=(1,1)):
-        return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding,
-                                use_bias=True, strides=strides)
-
-
-class LightModel(Model):
-    def _build_layers_v2(self, input_dict, num_outputs, options):
-        """Define the layers of a custom model.
-        Arguments:
-            input_dict (dict): Dictionary of input tensors, including "obs",
-                "prev_action", "prev_reward", "is_training".
-            num_outputs (int): Output tensor must be of size
-                [BATCH_SIZE, num_outputs].
-            options (dict): Model options.
-        Returns:
-            (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
-                and [BATCH_SIZE, desired_feature_size].
-        When using dict or tuple observation spaces, you can access
-        the nested sub-observation batches here as well:
-        Examples:
-            >>> print(input_dict)
-            {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
-             'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
-             'is_training': <tf.Tensor shape=(), dtype=bool>,
-             'obs': (observation, features)
-        """
-        # print(input_dict)
-        # Convolutional Layer #1
-        self.sess = tf.get_default_session()
-        Relu = tf.nn.relu
-        BatchNormalization = tf.layers.batch_normalization
-        Dropout = tf.layers.dropout
-        Dense = tf.contrib.layers.fully_connected
-
-        #conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid'))
-        conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid'))
-        conv2 = Relu(self.conv2d(conv1, 16, 'valid'))
-
-        # conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
-
-        conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2])
-        #conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1)
-        s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0)))
-        # layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False))
-        layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01))
-        return layerN, s_fc1
-
-    def conv2d(self, x, out_channels, padding):
-        return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True)
-                                # weights_initializer=normc_initializer(1.0))
diff --git a/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py b/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/RLLib_training/train.py b/RLLib_training/train.py
deleted file mode 100644
index ba5f4eab43f5173dd410bc6d9b306d90e0e21ffc..0000000000000000000000000000000000000000
--- a/RLLib_training/train.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import random
-
-import gym
-import numpy as np
-import ray
-import ray.rllib.agents.ppo.ppo as ppo
-from RailEnvRLLibWrapper import RailEnvRLLibWrapper
-from flatland.envs.generators import complex_rail_generator
-from ray.rllib.agents.ppo.ppo import PPOTrainer
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
-from ray.rllib.models import ModelCatalog
-from ray.tune.logger import pretty_print
-
-from RLLib_training.custom_preprocessors import CustomPreprocessor
-
-ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor)
-ray.init()
-
-
-def train(config):
-    print('Init Env')
-    random.seed(1)
-    np.random.seed(1)
-
-    transition_probability = [15,  # empty cell - Case 0
-                              5,  # Case 1 - straight
-                              5,  # Case 2 - simple switch
-                              1,  # Case 3 - diamond crossing
-                              1,  # Case 4 - single slip
-                              1,  # Case 5 - double slip
-                              1,  # Case 6 - symmetrical
-                              0,  # Case 7 - dead end
-                              1,  # Case 1b (8)  - simple turn right
-                              1,  # Case 1c (9)  - simple turn left
-                              1]  # Case 2b (10) - simple switch mirrored
-
-    # Example generate a random rail
-    env_config = {"width": 20,
-                  "height": 20,
-                  "rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
-                  "number_of_agents": 5}
-
-    obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
-    act_space = gym.spaces.Discrete(4)
-
-    # Dict with the different policies to train
-    policy_graphs = {
-        "ppo_policy": (PPOPolicyGraph, obs_space, act_space, {})
-    }
-
-    def policy_mapping_fn(agent_id):
-        return f"ppo_policy"
-
-    agent_config = ppo.DEFAULT_CONFIG.copy()
-    agent_config['model'] = {"fcnet_hiddens": [32, 32], "custom_preprocessor": "my_prep"}
-    agent_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                  "policy_mapping_fn": policy_mapping_fn,
-                                  "policies_to_train": list(policy_graphs.keys())}
-    agent_config["horizon"] = 50
-    agent_config["num_workers"] = 0
-    # agent_config["sample_batch_size"]: 1000
-    # agent_config["num_cpus_per_worker"] = 40
-    # agent_config["num_gpus"] = 2.0
-    # agent_config["num_gpus_per_worker"] = 2.0
-    # agent_config["num_cpus_for_driver"] = 5
-    # agent_config["num_envs_per_worker"] = 15
-    agent_config["env_config"] = env_config
-    # agent_config["batch_mode"] = "complete_episodes"
-
-    ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config)
-
-    for i in range(100000 + 2):
-        print("== Iteration", i, "==")
-
-        print("-- PPO --")
-        print(pretty_print(ppo_trainer.train()))
-
-        # if i % config['save_every'] == 0:
-        #     checkpoint = ppo_trainer.save()
-        #     print("checkpoint saved at", checkpoint)
-
-
-train({})