diff --git a/checkpoints/201106170544-5400.pth.local b/checkpoints/201106170544-5400.pth.local new file mode 100644 index 0000000000000000000000000000000000000000..ea068d906ebad8ed514f0c0b4fa70b54f17cedbf Binary files /dev/null and b/checkpoints/201106170544-5400.pth.local differ diff --git a/checkpoints/201106170544-5400.pth.target b/checkpoints/201106170544-5400.pth.target new file mode 100644 index 0000000000000000000000000000000000000000..f789a5afa01268708e0009ae5d03e98166db36e2 Binary files /dev/null and b/checkpoints/201106170544-5400.pth.target differ diff --git a/reinforcement_learning/multi_agent_training.py b/reinforcement_learning/multi_agent_training.py index b28eb694a78b6f2a2de515eeee7a061bee3f2d3d..98a62c180dc076ff99da7ab594f3e1c3c7978a70 100755 --- a/reinforcement_learning/multi_agent_training.py +++ b/reinforcement_learning/multi_agent_training.py @@ -495,13 +495,13 @@ if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=5400, type=int) parser.add_argument("-t", "--training_env_config", help="training config id (eg 0 for Test_0)", default=1, type=int) - parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=0, + parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=1, type=int) - parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=5, type=int) + parser.add_argument("--n_evaluation_episodes", help="number of evaluation episodes", default=25, type=int) parser.add_argument("--checkpoint_interval", help="checkpoint interval", default=100, type=int) parser.add_argument("--eps_start", help="max exploration", default=1.0, type=float) parser.add_argument("--eps_end", help="min exploration", default=0.01, type=float) - parser.add_argument("--eps_decay", help="exploration decay", default=0.998, type=float) + parser.add_argument("--eps_decay", help="exploration decay", default=0.9998, type=float) parser.add_argument("--buffer_size", help="replay buffer size", default=int(1e7), type=int) parser.add_argument("--buffer_min_size", help="min buffer size to start training", default=0, type=int) parser.add_argument("--restore_replay_buffer", help="replay buffer to restore", default="", type=str) diff --git a/run.py b/run.py index e40e932cebedeb218bae6ef645a45b55a72b0e15..b780e21e2287c0f3b87472c8b207c9851c0cd218 100644 --- a/run.py +++ b/run.py @@ -29,7 +29,7 @@ VERBOSE = True # Checkpoint to use (remember to push it!) checkpoint = "./checkpoints/201105222046-5400.pth" # 17.66104361971127 Depth 1 checkpoint = "./checkpoints/201106073658-4400.pth" # 15.64082361736683 Depth 1 -checkpoint = "./checkpoints/201106090621-4500.pth" # 15.64082361736683 Depth 1 +checkpoint = "./checkpoints/201106170544-5400.pth" # 15.64082361736683 Depth 1 # Use last action cache USE_ACTION_CACHE = False