From 63285ab023efdc67d69f034c4832c01489d74d10 Mon Sep 17 00:00:00 2001 From: Guillaume Mollard <guillaume.mollard2@gmail.com> Date: Sat, 29 Jun 2019 14:27:52 +0200 Subject: [PATCH] small fixxes --- .../experiment_configs/config_example/config.gin | 6 +++--- RLLib_training/render_training_result.py | 8 ++------ RLLib_training/train_experiment.py | 7 ++++++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin index 7a4c7c1..22f8078 100644 --- a/RLLib_training/experiment_configs/config_example/config.gin +++ b/RLLib_training/experiment_configs/config_example/config.gin @@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 run_experiment.map_height = 20 -run_experiment.n_agents = 7#{"grid_search": [3, 4, 5, 6, 7, 8]} -run_experiment.rail_generator = "complex_rail_generator" +run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]} +run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene run_experiment.nr_extra = 5 -run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"#_map_size_{config[map_width]}" +run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_" run_experiment.seed = 123 diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py index 021b9c4..90b38a3 100644 --- a/RLLib_training/render_training_result.py +++ b/RLLib_training/render_training_result.py @@ -33,8 +33,6 @@ __file_dirname__ = os.path.dirname(os.path.realpath(__file__)) CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7', 'checkpoint_101', 'checkpoint-101') -CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/ppo_policy_two_obs_with_predictions_n_agents_7_8e5zko1_/checkpoint_1301/checkpoint-1301' - N_EPISODES = 10 N_STEPS_PER_EPISODE = 50 @@ -67,8 +65,7 @@ def render_training_result(config): # Dict with the different policies to train policy_graphs = { - config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {}) - # "ppo_policy": (PolicyGraph, obs_space, act_space, {}) + "ppo_policy": (PolicyGraph, obs_space, act_space, {}) } def policy_mapping_fn(agent_id): @@ -106,8 +103,7 @@ def render_training_result(config): trainer.restore(CHECKPOINT_PATH) - # policy = trainer.get_policy("ppo_policy") - policy = trainer.get_policy(config['policy_folder_name'].format(**locals())) + policy = trainer.get_policy("ppo_policy") preprocessor = preprocessor(obs_space) env_renderer = RenderTool(env, gl="PIL") diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 1bac614..e7e9709 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -47,12 +47,17 @@ def on_episode_start(info): def on_episode_end(info): episode = info['episode'] + + # Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents + # and the number of the maximum time steps of the episode. score = 0 for k, v in episode._agent_reward_history.items(): score += np.sum(v) score /= (len(episode._agent_reward_history) * episode.horizon) + + # Calculation of the proportion of solved episodes before the maximum time step done = 1 - if len(episode._agent_reward_history) == episode.horizon: + if len(episode._agent_reward_history[0]) == episode.horizon: done = 0 episode.custom_metrics["score"] = score episode.custom_metrics["proportion_episode_solved"] = done -- GitLab