From cabf5514156ea888ac627e8b0ea04682521999c8 Mon Sep 17 00:00:00 2001
From: "Egli Adrian (IT-SCI-API-PFI)" <adrian.egli@sbb.ch>
Date: Fri, 6 Nov 2020 14:19:09 +0100
Subject: [PATCH] DQN & PPO

---
 reinforcement_learning/ppo/ppo_agent.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/reinforcement_learning/ppo/ppo_agent.py b/reinforcement_learning/ppo/ppo_agent.py
index a7431f8..350119a 100644
--- a/reinforcement_learning/ppo/ppo_agent.py
+++ b/reinforcement_learning/ppo/ppo_agent.py
@@ -39,6 +39,11 @@ class PPOAgent(Policy):
     # Decide on an action to take in the environment
 
     def act(self, state, eps=None):
+        if eps is not None:
+            # Epsilon-greedy action selection
+            if np.random.random() < eps:
+                return np.random.choice(np.arange(self.action_size))
+
         self.policy.eval()
         with torch.no_grad():
             output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device))
-- 
GitLab