diff --git a/checkpoints/201106090621-4500.pth.local b/checkpoints/201106090621-4500.pth.local new file mode 100644 index 0000000000000000000000000000000000000000..5b608c3ba00b82a04d5150dc653ea3fd94a6de68 Binary files /dev/null and b/checkpoints/201106090621-4500.pth.local differ diff --git a/checkpoints/201106090621-4500.pth.target b/checkpoints/201106090621-4500.pth.target new file mode 100644 index 0000000000000000000000000000000000000000..8d1c3d4b211eacbb51704cefea4d7aeb083b50c5 Binary files /dev/null and b/checkpoints/201106090621-4500.pth.target differ diff --git a/reinforcement_learning/ppo/ppo_agent.py b/reinforcement_learning/ppo/ppo_agent.py index 350119a225dff9feef6f8ab0589e476126f4ac2b..be23960414fbb57628a400a300e9d90e00ae202e 100644 --- a/reinforcement_learning/ppo/ppo_agent.py +++ b/reinforcement_learning/ppo/ppo_agent.py @@ -39,10 +39,10 @@ class PPOAgent(Policy): # Decide on an action to take in the environment def act(self, state, eps=None): - if eps is not None: - # Epsilon-greedy action selection - if np.random.random() < eps: - return np.random.choice(np.arange(self.action_size)) + # if eps is not None: + # # Epsilon-greedy action selection + # if np.random.random() < eps: + # return np.random.choice(np.arange(self.action_size)) self.policy.eval() with torch.no_grad(): diff --git a/run.py b/run.py index 626b8e38fb0c18000388c64a3d19b8f5bb96ffe1..e40e932cebedeb218bae6ef645a45b55a72b0e15 100644 --- a/run.py +++ b/run.py @@ -29,7 +29,7 @@ VERBOSE = True # Checkpoint to use (remember to push it!) checkpoint = "./checkpoints/201105222046-5400.pth" # 17.66104361971127 Depth 1 checkpoint = "./checkpoints/201106073658-4400.pth" # 15.64082361736683 Depth 1 -# checkpoint = "./checkpoints/201106090621-3300.pth" # 15.64082361736683 Depth 1 +checkpoint = "./checkpoints/201106090621-4500.pth" # 15.64082361736683 Depth 1 # Use last action cache USE_ACTION_CACHE = False @@ -53,8 +53,8 @@ state_size = tree_observation.observation_dim action_size = 5 # Creates the policy. No GPU on evaluation server. -# policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True) -policy = PPOAgent(state_size, action_size, 10) +policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True) +# policy = PPOAgent(state_size, action_size, 10) policy.load(checkpoint) #####################################################################