From cabf5514156ea888ac627e8b0ea04682521999c8 Mon Sep 17 00:00:00 2001 From: "Egli Adrian (IT-SCI-API-PFI)" <adrian.egli@sbb.ch> Date: Fri, 6 Nov 2020 14:19:09 +0100 Subject: [PATCH] DQN & PPO --- reinforcement_learning/ppo/ppo_agent.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reinforcement_learning/ppo/ppo_agent.py b/reinforcement_learning/ppo/ppo_agent.py index a7431f8..350119a 100644 --- a/reinforcement_learning/ppo/ppo_agent.py +++ b/reinforcement_learning/ppo/ppo_agent.py @@ -39,6 +39,11 @@ class PPOAgent(Policy): # Decide on an action to take in the environment def act(self, state, eps=None): + if eps is not None: + # Epsilon-greedy action selection + if np.random.random() < eps: + return np.random.choice(np.arange(self.action_size)) + self.policy.eval() with torch.no_grad(): output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device)) -- GitLab