DQN & PPO

cabf5514 · Egli Adrian (IT-SCI-API-PFI) · 0af1cb38 · cabf5514
Commit cabf5514 authored 4 years ago by Egli Adrian (IT-SCI-API-PFI)
--- a/reinforcement_learning/ppo/ppo_agent.py
+++ b/reinforcement_learning/ppo/ppo_agent.py
@@ -39,6 +39,11 @@ class PPOAgent(Policy):
    # Decide on an action to take in the environment

    def act(self, state, eps=None):
+        if eps is not None:
+            # Epsilon-greedy action selection
+            if np.random.random() < eps:
+                return np.random.choice(np.arange(self.action_size))
+
        self.policy.eval()
        with torch.no_grad():
            output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device))