diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin index 7a4c7c1c817b3a020f77b356e00b09f07fce0d1a..1fc64cbd3f8f0b9d9e0e1af32299553f342ca19f 100644 --- a/RLLib_training/experiment_configs/config_example/config.gin +++ b/RLLib_training/experiment_configs/config_example/config.gin @@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 run_experiment.map_height = 20 -run_experiment.n_agents = 7#{"grid_search": [3, 4, 5, 6, 7, 8]} +run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]} run_experiment.rail_generator = "complex_rail_generator" run_experiment.nr_extra = 5 -run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"#_map_size_{config[map_width]}" +run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_map_size_{config[map_width]}_" run_experiment.seed = 123 diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 1bac614fb29f903570df3327602bc73206f51fd7..44fa26daf99b8f18610bc721d27245a51b8588fc 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -51,9 +51,9 @@ def on_episode_end(info): for k, v in episode._agent_reward_history.items(): score += np.sum(v) score /= (len(episode._agent_reward_history) * episode.horizon) - done = 1 - if len(episode._agent_reward_history) == episode.horizon: - done = 0 + done = 0 + if len(episode._agent_reward_history[0]) <= episode.horizon-5: + done = 1 episode.custom_metrics["score"] = score episode.custom_metrics["proportion_episode_solved"] = done