Skip to content
Snippets Groups Projects
Commit 66a677ee authored by Egli Adrian (IT-SCI-API-PFI)'s avatar Egli Adrian (IT-SCI-API-PFI)
Browse files

==============================================================================...

====================================================================================================
## Server Performance Stats
====================================================================================================
         - message_queue_latency         => min: 0.00038123130798339844 || mean: 0.0015168707755788499 || max: 1.0546770095825195
         - current_episode_controller_inference_time     => min: 0.0006983280181884766 || mean: 0.03446188241071793 || max: 0.4753117561340332
         - controller_inference_time     => min: 0.0006983280181884766 || mean: 0.03446188241071793 || max: 0.4753117561340332
         - internal_env_step_time        => min: 0.00036644935607910156 || mean: 0.008951559738510565 || max: 0.10788345336914062
====================================================================================================
####################################################################################################
EVALUATION COMPLETE !!
####################################################################################################
# Mean Reward : -5167.23
# Sum Normalized Reward : 20.058875800849986 (primary score)
# Mean Percentage Complete : 0.753 (secondary score)
# Mean Normalized Reward : 0.66863
####################################################################################################
####################################################################################################
parent 625403b0
No related branches found
No related tags found
No related merge requests found
......@@ -172,7 +172,8 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params):
completion_window = deque(maxlen=checkpoint_interval)
# Double Dueling DQN policy
USE_SINGLE_AGENT_TRAINING = False
USE_SINGLE_AGENT_TRAINING = True
UPDATE_POLICY2_N_EPISODE = 1000
policy = DDDQNPolicy(state_size, action_size, train_params)
# policy = PPOAgent(state_size, action_size, n_agents)
# Load existing policy
......@@ -227,7 +228,7 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params):
obs, info = train_env.reset(regenerate_rail=True, regenerate_schedule=True)
policy.reset()
if episode_idx % 100 == 0:
if episode_idx % UPDATE_POLICY2_N_EPISODE == 0:
policy2 = policy.clone()
reset_timer.end()
......@@ -499,14 +500,14 @@ def eval_policy(env, tree_observation, policy, train_params, obs_params):
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=5400, type=int)
parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=54000, type=int)
parser.add_argument("-t", "--training_env_config", help="training config id (eg 0 for Test_0)", default=2,
type=int)
parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=2,
parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=1,
type=int)
parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=25, type=int)
parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=2, type=int)
parser.add_argument("--checkpoint_interval", help="checkpoint interval", default=100, type=int)
parser.add_argument("--eps_start", help="max exploration", default=1.0, type=float)
parser.add_argument("--eps_start", help="max exploration", default=0.1, type=float)
parser.add_argument("--eps_end", help="min exploration", default=0.01, type=float)
parser.add_argument("--eps_decay", help="exploration decay", default=0.9975, type=float)
parser.add_argument("--buffer_size", help="replay buffer size", default=int(1e7), type=int)
......@@ -563,6 +564,17 @@ if __name__ == "__main__":
"malfunction_rate": 1 / 200,
"seed": 0
},
{
# Test_3
"n_agents": 58,
"x_dim": 40,
"y_dim": 40,
"n_cities": 5,
"max_rails_between_cities": 2,
"max_rails_in_city": 3,
"malfunction_rate": 1 / 200,
"seed": 0
},
]
obs_params = {
......
......@@ -26,7 +26,10 @@ from reinforcement_learning.dddqn_policy import DDDQNPolicy
VERBOSE = True
# Checkpoint to use (remember to push it!)
checkpoint = "./checkpoints/201112143850-4100.pth" # 21.543589381053096 DEPTH=2
checkpoint = "./checkpoints/201112143850-5400.pth" # 21.220418678677177 DEPTH=2 AGENTS=10
checkpoint = "./checkpoints/201113070245-5400.pth" # 19.690047767961005 DEPTH=2 AGENTS=20
checkpoint = "./checkpoints/201113211844-6100.pth" # 19.690047767961005 DEPTH=2 AGENTS=20
# Use last action cache
USE_ACTION_CACHE = False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment