diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py
index bb6b21e316e72177dfad54f2f02d3ac527810520..d4c81a83f1c05317315a3f71f99565006e9311e1 100644
--- a/RLLib_training/custom_preprocessors.py
+++ b/RLLib_training/custom_preprocessors.py
@@ -1,52 +1,6 @@
 import numpy as np
 from ray.rllib.models.preprocessors import Preprocessor
-
-def max_lt(seq, val):
-    """
-    Return greatest item in seq for which item < val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    max = 0
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
-            max = seq[idx]
-        idx -= 1
-    return max
-
-
-def min_lt(seq, val):
-    """
-    Return smallest item in seq for which item > val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    min = np.inf
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] >= val and seq[idx] < min:
-            min = seq[idx]
-        idx -= 1
-    return min
-
-
-def norm_obs_clip(obs, clip_min=-1, clip_max=1):
-    """
-    This function returns the difference between min and max value of an observation
-    :param obs: Observation that should be normalized
-    :param clip_min: min value where observation will be clipped
-    :param clip_max: max value where observation will be clipped
-    :return: returnes normalized and clipped observatoin
-    """
-    max_obs = max(1, max_lt(obs, 1000))
-    min_obs = min(max_obs, min_lt(obs, 0))
-
-    if max_obs == min_obs:
-        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
-    norm = np.abs(max_obs - min_obs)
-    if norm == 0:
-        norm = 1.
-    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
-
+from utils.observation_utils import norm_obs_clip
 
 class TreeObsPreprocessor(Preprocessor):
     def _init_shape(self, obs_space, options):
diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin
index 22f807893eaee57a52e860e1d227c48dedb724ad..59d2dfb508f13cccf4b9152f24ab06d44c290450 100644
--- a/RLLib_training/experiment_configs/config_example/config.gin
+++ b/RLLib_training/experiment_configs/config_example/config.gin
@@ -5,7 +5,7 @@ run_experiment.hidden_sizes = [32, 32]
 
 run_experiment.map_width = 20
 run_experiment.map_height = 20
-run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
+run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]}
 run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
 run_experiment.nr_extra = 5
 run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index c4c3be578b50608b962f0fd807bf213ed9990c4b..7435a8fed728ec363321ba7a2bcf04b186513559 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -54,11 +54,12 @@ def on_episode_end(info):
     for k, v in episode._agent_reward_history.items():
         score += np.sum(v)
     score /= (len(episode._agent_reward_history) * episode.horizon)
-
+    
     # Calculation of the proportion of solved episodes before the maximum time step
-    done = 1
-    if len(episode._agent_reward_history[0]) == episode.horizon:
-        done = 0
+    done = 0
+    if len(episode._agent_reward_history[0]) <= episode.horizon-5:
+        done = 1
+
     episode.custom_metrics["score"] = score
     episode.custom_metrics["proportion_episode_solved"] = done
 
@@ -68,6 +69,15 @@ def train(config, reporter):
 
     set_seed(config['seed'], config['seed'], config['seed'])
 
+    # Given the depth of the tree observation and the number of features per node we get the following state_size
+    num_features_per_node = config['obs_builder'].observation_dim
+    tree_depth = 2
+    nr_nodes = 0
+    for i in range(tree_depth + 1):
+        nr_nodes += np.power(4, i)
+    obs_size = num_features_per_node * nr_nodes
+
+
     # Environment parameters
     env_config = {"width": config['map_width'],
                   "height": config['map_height'],
@@ -81,7 +91,7 @@ def train(config, reporter):
 
     # Observation space and action space definitions
     if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2)
+        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2)
         preprocessor = "tree_obs_prep"
     else:
         raise ValueError("Undefined observation space") # Only TreeObservation implemented for now.
@@ -100,7 +110,7 @@ def train(config, reporter):
     # Trainer configuration
     trainer_config = DEFAULT_CONFIG.copy()
     trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
-                               "custom_options": {"step_memory": config["step_memory"]}}
+            "custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}}
 
     trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
                                     "policy_mapping_fn": policy_mapping_fn,
@@ -111,9 +121,9 @@ def train(config, reporter):
 
     # Parameters for calculation parallelization
     trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 3
-    trainer_config["num_gpus"] = 0.0
-    trainer_config["num_gpus_per_worker"] = 0.0
+    trainer_config["num_cpus_per_worker"] = 8
+    trainer_config["num_gpus"] = 0.2
+    trainer_config["num_gpus_per_worker"] = 0.2
     trainer_config["num_cpus_for_driver"] = 1
     trainer_config["num_envs_per_worker"] = 1
 
@@ -185,8 +195,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                                             # the observation of last time step will be given as input the the model.
                 },
         resources_per_trial={
-            "cpu": 3,
-            "gpu": 0
+            "cpu": 8,
+            "gpu": 0.2
         },
         verbose=2,
         local_dir=local_dir
diff --git a/torch_training/Getting_Started_Training.md b/torch_training/Getting_Started_Training.md
index b69467ebb05024ce7892bd83b83e662a8b168f35..d575e1c673665013b55bccd3faecde526cec8ab4 100644
--- a/torch_training/Getting_Started_Training.md
+++ b/torch_training/Getting_Started_Training.md
@@ -105,7 +105,7 @@ We have no successfully set up the environment for training. To visualize it in
 env_renderer = RenderTool(env, gl="PILSVG", )
 ```
 
-###Setting up the agent
+### Setting up the agent
 
 To set up a appropriate agent we need the state and action space sizes. From the discussion above about the tree observation we end up with: