diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py
index 6d93aea149131465447d36578865cc3ccebe9a8f..bb6b21e316e72177dfad54f2f02d3ac527810520 100644
--- a/RLLib_training/custom_preprocessors.py
+++ b/RLLib_training/custom_preprocessors.py
@@ -50,15 +50,23 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
 
 class TreeObsPreprocessor(Preprocessor):
     def _init_shape(self, obs_space, options):
+        print(options)
+        self.step_memory = options["custom_options"]["step_memory"]
         return sum([space.shape[0] for space in obs_space]),
 
     def transform(self, observation):
-        data = norm_obs_clip(observation[0][0])
-        distance = norm_obs_clip(observation[0][1])
-        agent_data = np.clip(observation[0][2], -1, 1)
-        data2 = norm_obs_clip(observation[1][0])
-        distance2 = norm_obs_clip(observation[1][1])
-        agent_data2 = np.clip(observation[1][2], -1, 1)
+
+        if self.step_memory == 2:
+            data = norm_obs_clip(observation[0][0])
+            distance = norm_obs_clip(observation[0][1])
+            agent_data = np.clip(observation[0][2], -1, 1)
+            data2 = norm_obs_clip(observation[1][0])
+            distance2 = norm_obs_clip(observation[1][1])
+            agent_data2 = np.clip(observation[1][2], -1, 1)
+        else:
+            data = norm_obs_clip(observation[0])
+            distance = norm_obs_clip(observation[1])
+            agent_data = np.clip(observation[2], -1, 1)
 
         return np.concatenate((np.concatenate((np.concatenate((data, distance)), agent_data)), np.concatenate((np.concatenate((data2, distance2)), agent_data2))))
 
diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py
index 668c763d162bbafd1bb5c62fb2317324998768f3..1ee7cc1ce394f3b40791706871aa180ec0510b52 100644
--- a/RLLib_training/render_training_result.py
+++ b/RLLib_training/render_training_result.py
@@ -105,8 +105,8 @@ def render_training_result(config):
 
     policy = trainer.get_policy("ppo_policy")
 
-    preprocessor = preprocessor(obs_space)
-    env_renderer = RenderTool(env, gl="PIL")
+    preprocessor = preprocessor(obs_space, {"step_memory": config["step_memory"]})
+    env_renderer = RenderTool(env, gl="PILSVG")
     for episode in range(N_EPISODES):
 
         observation = env.reset()
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index 808bbc34556ce541ca41fc560b464c31b1dd96a3..c4c3be578b50608b962f0fd807bf213ed9990c4b 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -99,7 +99,8 @@ def train(config, reporter):
 
     # Trainer configuration
     trainer_config = DEFAULT_CONFIG.copy()
-    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
+    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
+                               "custom_options": {"step_memory": config["step_memory"]}}
 
     trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
                                     "policy_mapping_fn": policy_mapping_fn,
@@ -131,6 +132,7 @@ def train(config, reporter):
             "on_episode_end": tune.function(on_episode_end)
         }
 
+
     def logger_creator(conf):
         """Creates a Unified logger with a default logdir prefix."""
         logdir = config['policy_folder_name'].format(**locals())
@@ -179,7 +181,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                 "kl_coeff": kl_coeff,
                 "lambda_gae": lambda_gae,
                 "min_dist": min_dist,
-                    "step_memory": step_memory
+                "step_memory": step_memory  # If equal to two, the current observation plus
+                                            # the observation of last time step will be given as input the the model.
                 },
         resources_per_trial={
             "cpu": 3,