diff --git a/checkpoints/201106073658-4400.pth.optimizer b/checkpoints/201106073658-4400.pth.optimizer new file mode 100644 index 0000000000000000000000000000000000000000..a860868a8beb873d2ec52ee3cb14c7ed715ebdae Binary files /dev/null and b/checkpoints/201106073658-4400.pth.optimizer differ diff --git a/checkpoints/201106073658-4400.pth.policy b/checkpoints/201106073658-4400.pth.policy new file mode 100644 index 0000000000000000000000000000000000000000..fa6348dc3330fa4699b2ee429296bc97c67f044c Binary files /dev/null and b/checkpoints/201106073658-4400.pth.policy differ diff --git a/run.py b/run.py index 06405868c06c8463eae756a38f063571421a7b8b..40d808ab42e5856fefa7f51b1896ef69bb33fed2 100644 --- a/run.py +++ b/run.py @@ -27,8 +27,8 @@ VERBOSE = True # Checkpoint to use (remember to push it!) checkpoint = "./checkpoints/201105222046-5400.pth" # 17.66104361971127 Depth 1 -checkpoint = "./checkpoints/201106073658-4300.pth" # 15.64082361736683 Depth 1 -checkpoint = "./checkpoints/201106090621-3300.pth" # 15.64082361736683 Depth 1 +checkpoint = "./checkpoints/201106073658-4400.pth" # 15.64082361736683 Depth 1 +# checkpoint = "./checkpoints/201106090621-3300.pth" # 15.64082361736683 Depth 1 # Use last action cache USE_ACTION_CACHE = False @@ -52,8 +52,8 @@ state_size = tree_observation.observation_dim action_size = 5 # Creates the policy. No GPU on evaluation server. -policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True) -# policy = PPOAgent(state_size, action_size, 10) +# policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True) +policy = PPOAgent(state_size, action_size, 10) policy.load(checkpoint) #####################################################################