From 14a359daee3c5c6bc81e6fa3166c73c09590635b Mon Sep 17 00:00:00 2001
From: Guillaume Mollard <guillaume.mollard2@gmail.com>
Date: Thu, 27 Jun 2019 15:50:54 +0200
Subject: [PATCH] done metric

---
 RLLib_training/train_experiment.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index cd25ad0..654a100 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -50,13 +50,7 @@ def on_episode_start(info):
     episode = info['episode']
     map_width = info['env'].envs[0].width
     map_height = info['env'].envs[0].height
-    episode.horizon = map_width + map_height
-    
-
-# def on_episode_step(info):
-#     episode = info['episode']
-#     print('#########################', episode._agent_reward_history)
-#     # print(ds)
+    episode.horizon = 3*(map_width + map_height)
 
 
 def on_episode_end(info):
@@ -64,17 +58,20 @@ def on_episode_end(info):
     score = 0
     for k, v in episode._agent_reward_history.items():
         score += np.sum(v)
-    score /= (len(episode._agent_reward_history) * 3 * episode.horizon)
+    score /= (len(episode._agent_reward_history) * episode.horizon)
+    done = 1
+    if len(episode._agent_reward_history) == episode.horizon:
+        done = 0
     episode.custom_metrics["score"] = score
+    episode.custom_metrics["proportion_episode_solved"] = done
 
 
 def train(config, reporter):
     print('Init Env')
 
     set_seed(config['seed'], config['seed'], config['seed'])
-    config['map_height'] = config['map_width']
 
-    # Example configuration to generate a random rail
+    # Environment parameters
     env_config = {"width": config['map_width'],
                   "height": config['map_height'],
                   "rail_generator": config["rail_generator"],
-- 
GitLab