diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin
index 7a4c7c1c817b3a020f77b356e00b09f07fce0d1a..22f807893eaee57a52e860e1d227c48dedb724ad 100644
--- a/RLLib_training/experiment_configs/config_example/config.gin
+++ b/RLLib_training/experiment_configs/config_example/config.gin
@@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32]
 
 run_experiment.map_width = 20
 run_experiment.map_height = 20
-run_experiment.n_agents = 7#{"grid_search": [3, 4, 5, 6, 7, 8]}
-run_experiment.rail_generator = "complex_rail_generator"
+run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
+run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
 run_experiment.nr_extra = 5
-run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"#_map_size_{config[map_width]}"
+run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
 
 run_experiment.seed = 123
 
diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py
index 021b9c49130aac813def6dbf0a4d7da8f285ebec..90b38a3329b47163a4f22f61b39d81b0f1ab26ac 100644
--- a/RLLib_training/render_training_result.py
+++ b/RLLib_training/render_training_result.py
@@ -33,8 +33,6 @@ __file_dirname__ = os.path.dirname(os.path.realpath(__file__))
 
 CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
                                'checkpoint_101', 'checkpoint-101')
-CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/ppo_policy_two_obs_with_predictions_n_agents_7_8e5zko1_/checkpoint_1301/checkpoint-1301'
-
 N_EPISODES = 10
 N_STEPS_PER_EPISODE = 50
 
@@ -67,8 +65,7 @@ def render_training_result(config):
 
     # Dict with the different policies to train
     policy_graphs = {
-        config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {})
-        # "ppo_policy": (PolicyGraph, obs_space, act_space, {})
+        "ppo_policy": (PolicyGraph, obs_space, act_space, {})
     }
 
     def policy_mapping_fn(agent_id):
@@ -106,8 +103,7 @@ def render_training_result(config):
 
     trainer.restore(CHECKPOINT_PATH)
 
-    # policy = trainer.get_policy("ppo_policy")
-    policy = trainer.get_policy(config['policy_folder_name'].format(**locals()))
+    policy = trainer.get_policy("ppo_policy")
 
     preprocessor = preprocessor(obs_space)
     env_renderer = RenderTool(env, gl="PIL")
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index 1bac614fb29f903570df3327602bc73206f51fd7..e7e9709aa092ebd2b886aa005731e386f5f2e89c 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -47,12 +47,17 @@ def on_episode_start(info):
 
 def on_episode_end(info):
     episode = info['episode']
+
+    # Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents
+    # and the number of the maximum time steps of the episode.
     score = 0
     for k, v in episode._agent_reward_history.items():
         score += np.sum(v)
     score /= (len(episode._agent_reward_history) * episode.horizon)
+
+    # Calculation of the proportion of solved episodes before the maximum time step
     done = 1
-    if len(episode._agent_reward_history) == episode.horizon:
+    if len(episode._agent_reward_history[0]) == episode.horizon:
         done = 0
     episode.custom_metrics["score"] = score
     episode.custom_metrics["proportion_episode_solved"] = done