Skip to content
Snippets Groups Projects
Commit cabf5514 authored by Egli Adrian (IT-SCI-API-PFI)'s avatar Egli Adrian (IT-SCI-API-PFI)
Browse files

DQN & PPO

parent 0af1cb38
No related branches found
No related tags found
No related merge requests found
...@@ -39,6 +39,11 @@ class PPOAgent(Policy): ...@@ -39,6 +39,11 @@ class PPOAgent(Policy):
# Decide on an action to take in the environment # Decide on an action to take in the environment
def act(self, state, eps=None): def act(self, state, eps=None):
if eps is not None:
# Epsilon-greedy action selection
if np.random.random() < eps:
return np.random.choice(np.arange(self.action_size))
self.policy.eval() self.policy.eval()
with torch.no_grad(): with torch.no_grad():
output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device)) output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment