small fixxes

63285ab0 · gmollard · b905b1e2 · 63285ab0 · 63285ab0 · 63285ab0
Commit 63285ab0 authored 5 years ago by gmollard
--- a/RLLib_training/experiment_configs/config_example/config.gin
+++ b/RLLib_training/experiment_configs/config_example/config.gin
@@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32]
 run_experiment.map_width = 20
 run_experiment.map_height = 20
-run_experiment.n_agents = 7#{"grid_search": [3, 4, 5, 6, 7, 8]}
+run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
-run_experiment.rail_generator = "complex_rail_generator"
+run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
 run_experiment.nr_extra = 5
-run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"#_map_size_{config[map_width]}"
+run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
 run_experiment.seed = 123

--- a/RLLib_training/render_training_result.py
+++ b/RLLib_training/render_training_result.py
@@ -33,8 +33,6 @@ __file_dirname__ = os.path.dirname(os.path.realpath(__file__))
 CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
                               'checkpoint_101', 'checkpoint-101')
-CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/ppo_policy_two_obs_with_predictions_n_agents_7_8e5zko1_/checkpoint_1301/checkpoint-1301'
 N_EPISODES = 10
 N_STEPS_PER_EPISODE = 50
@@ -67,8 +65,7 @@ def render_training_result(config):
    # Dict with the different policies to train
    policy_graphs = {
-        config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {})
+        "ppo_policy": (PolicyGraph, obs_space, act_space, {})
-        # "ppo_policy": (PolicyGraph, obs_space, act_space, {})
    }
    def policy_mapping_fn(agent_id):
@@ -106,8 +103,7 @@ def render_training_result(config):
    trainer.restore(CHECKPOINT_PATH)
-    # policy = trainer.get_policy("ppo_policy")
+    policy = trainer.get_policy("ppo_policy")
-    policy = trainer.get_policy(config['policy_folder_name'].format(**locals()))
    preprocessor = preprocessor(obs_space)
    env_renderer = RenderTool(env, gl="PIL")

--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -47,12 +47,17 @@ def on_episode_start(info):
 def on_episode_end(info):
    episode = info['episode']
+    # Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents
+    # and the number of the maximum time steps of the episode.
    score = 0
    for k, v in episode._agent_reward_history.items():
        score += np.sum(v)
    score /= (len(episode._agent_reward_history) * episode.horizon)
+    # Calculation of the proportion of solved episodes before the maximum time step
    done = 1
-    if len(episode._agent_reward_history) == episode.horizon:
+    if len(episode._agent_reward_history[0]) == episode.horizon:
        done = 0
    episode.custom_metrics["score"] = score
    episode.custom_metrics["proportion_episode_solved"] = done