diff --git a/checkpoints/201106090621-4500.pth.local b/checkpoints/201106090621-4500.pth.local
new file mode 100644
index 0000000000000000000000000000000000000000..5b608c3ba00b82a04d5150dc653ea3fd94a6de68
Binary files /dev/null and b/checkpoints/201106090621-4500.pth.local differ
diff --git a/checkpoints/201106090621-4500.pth.target b/checkpoints/201106090621-4500.pth.target
new file mode 100644
index 0000000000000000000000000000000000000000..8d1c3d4b211eacbb51704cefea4d7aeb083b50c5
Binary files /dev/null and b/checkpoints/201106090621-4500.pth.target differ
diff --git a/reinforcement_learning/ppo/ppo_agent.py b/reinforcement_learning/ppo/ppo_agent.py
index 350119a225dff9feef6f8ab0589e476126f4ac2b..be23960414fbb57628a400a300e9d90e00ae202e 100644
--- a/reinforcement_learning/ppo/ppo_agent.py
+++ b/reinforcement_learning/ppo/ppo_agent.py
@@ -39,10 +39,10 @@ class PPOAgent(Policy):
     # Decide on an action to take in the environment
 
     def act(self, state, eps=None):
-        if eps is not None:
-            # Epsilon-greedy action selection
-            if np.random.random() < eps:
-                return np.random.choice(np.arange(self.action_size))
+        # if eps is not None:
+        #     # Epsilon-greedy action selection
+        #     if np.random.random() < eps:
+        #         return np.random.choice(np.arange(self.action_size))
 
         self.policy.eval()
         with torch.no_grad():
diff --git a/run.py b/run.py
index 626b8e38fb0c18000388c64a3d19b8f5bb96ffe1..e40e932cebedeb218bae6ef645a45b55a72b0e15 100644
--- a/run.py
+++ b/run.py
@@ -29,7 +29,7 @@ VERBOSE = True
 # Checkpoint to use (remember to push it!)
 checkpoint = "./checkpoints/201105222046-5400.pth"  # 17.66104361971127 Depth 1
 checkpoint = "./checkpoints/201106073658-4400.pth"  # 15.64082361736683 Depth 1
-# checkpoint = "./checkpoints/201106090621-3300.pth"  # 15.64082361736683 Depth 1
+checkpoint = "./checkpoints/201106090621-4500.pth"  # 15.64082361736683 Depth 1
 
 # Use last action cache
 USE_ACTION_CACHE = False
@@ -53,8 +53,8 @@ state_size = tree_observation.observation_dim
 action_size = 5
 
 # Creates the policy. No GPU on evaluation server.
-# policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True)
-policy = PPOAgent(state_size, action_size, 10)
+policy = DDDQNPolicy(state_size, action_size, Namespace(**{'use_gpu': False}), evaluation_mode=True)
+# policy = PPOAgent(state_size, action_size, 10)
 policy.load(checkpoint)
 
 #####################################################################