From 14a359daee3c5c6bc81e6fa3166c73c09590635b Mon Sep 17 00:00:00 2001 From: Guillaume Mollard <guillaume.mollard2@gmail.com> Date: Thu, 27 Jun 2019 15:50:54 +0200 Subject: [PATCH] done metric --- RLLib_training/train_experiment.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index cd25ad0..654a100 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -50,13 +50,7 @@ def on_episode_start(info): episode = info['episode'] map_width = info['env'].envs[0].width map_height = info['env'].envs[0].height - episode.horizon = map_width + map_height - - -# def on_episode_step(info): -# episode = info['episode'] -# print('#########################', episode._agent_reward_history) -# # print(ds) + episode.horizon = 3*(map_width + map_height) def on_episode_end(info): @@ -64,17 +58,20 @@ def on_episode_end(info): score = 0 for k, v in episode._agent_reward_history.items(): score += np.sum(v) - score /= (len(episode._agent_reward_history) * 3 * episode.horizon) + score /= (len(episode._agent_reward_history) * episode.horizon) + done = 1 + if len(episode._agent_reward_history) == episode.horizon: + done = 0 episode.custom_metrics["score"] = score + episode.custom_metrics["proportion_episode_solved"] = done def train(config, reporter): print('Init Env') set_seed(config['seed'], config['seed'], config['seed']) - config['map_height'] = config['map_width'] - # Example configuration to generate a random rail + # Environment parameters env_config = {"width": config['map_width'], "height": config['map_height'], "rail_generator": config["rail_generator"], -- GitLab