From 66a677eeb5bdaf4d05d75d9c00f0fe2af40d0256 Mon Sep 17 00:00:00 2001 From: "Egli Adrian (IT-SCI-API-PFI)" <adrian.egli@sbb.ch> Date: Sat, 14 Nov 2020 21:50:50 +0100 Subject: [PATCH] ==================================================================================================== ## Server Performance Stats ==================================================================================================== - message_queue_latency => min: 0.00038123130798339844 || mean: 0.0015168707755788499 || max: 1.0546770095825195 - current_episode_controller_inference_time => min: 0.0006983280181884766 || mean: 0.03446188241071793 || max: 0.4753117561340332 - controller_inference_time => min: 0.0006983280181884766 || mean: 0.03446188241071793 || max: 0.4753117561340332 - internal_env_step_time => min: 0.00036644935607910156 || mean: 0.008951559738510565 || max: 0.10788345336914062 ==================================================================================================== #################################################################################################### EVALUATION COMPLETE !! #################################################################################################### # Mean Reward : -5167.23 # Sum Normalized Reward : 20.058875800849986 (primary score) # Mean Percentage Complete : 0.753 (secondary score) # Mean Normalized Reward : 0.66863 #################################################################################################### #################################################################################################### --- .../multi_agent_training.py | 24 ++++++++++++++----- run.py | 5 +++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/reinforcement_learning/multi_agent_training.py b/reinforcement_learning/multi_agent_training.py index 487d44d..4bdbe4e 100755 --- a/reinforcement_learning/multi_agent_training.py +++ b/reinforcement_learning/multi_agent_training.py @@ -172,7 +172,8 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params): completion_window = deque(maxlen=checkpoint_interval) # Double Dueling DQN policy - USE_SINGLE_AGENT_TRAINING = False + USE_SINGLE_AGENT_TRAINING = True + UPDATE_POLICY2_N_EPISODE = 1000 policy = DDDQNPolicy(state_size, action_size, train_params) # policy = PPOAgent(state_size, action_size, n_agents) # Load existing policy @@ -227,7 +228,7 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params): obs, info = train_env.reset(regenerate_rail=True, regenerate_schedule=True) policy.reset() - if episode_idx % 100 == 0: + if episode_idx % UPDATE_POLICY2_N_EPISODE == 0: policy2 = policy.clone() reset_timer.end() @@ -499,14 +500,14 @@ def eval_policy(env, tree_observation, policy, train_params, obs_params): if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=5400, type=int) + parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=54000, type=int) parser.add_argument("-t", "--training_env_config", help="training config id (eg 0 for Test_0)", default=2, type=int) - parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=2, + parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=1, type=int) - parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=25, type=int) + parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=2, type=int) parser.add_argument("--checkpoint_interval", help="checkpoint interval", default=100, type=int) - parser.add_argument("--eps_start", help="max exploration", default=1.0, type=float) + parser.add_argument("--eps_start", help="max exploration", default=0.1, type=float) parser.add_argument("--eps_end", help="min exploration", default=0.01, type=float) parser.add_argument("--eps_decay", help="exploration decay", default=0.9975, type=float) parser.add_argument("--buffer_size", help="replay buffer size", default=int(1e7), type=int) @@ -563,6 +564,17 @@ if __name__ == "__main__": "malfunction_rate": 1 / 200, "seed": 0 }, + { + # Test_3 + "n_agents": 58, + "x_dim": 40, + "y_dim": 40, + "n_cities": 5, + "max_rails_between_cities": 2, + "max_rails_in_city": 3, + "malfunction_rate": 1 / 200, + "seed": 0 + }, ] obs_params = { diff --git a/run.py b/run.py index f4eb48e..08b8580 100644 --- a/run.py +++ b/run.py @@ -26,7 +26,10 @@ from reinforcement_learning.dddqn_policy import DDDQNPolicy VERBOSE = True # Checkpoint to use (remember to push it!) -checkpoint = "./checkpoints/201112143850-4100.pth" # 21.543589381053096 DEPTH=2 +checkpoint = "./checkpoints/201112143850-5400.pth" # 21.220418678677177 DEPTH=2 AGENTS=10 +checkpoint = "./checkpoints/201113070245-5400.pth" # 19.690047767961005 DEPTH=2 AGENTS=20 +checkpoint = "./checkpoints/201113211844-6100.pth" # 19.690047767961005 DEPTH=2 AGENTS=20 + # Use last action cache USE_ACTION_CACHE = False -- GitLab