From 716119c9f6fc519c90760f845a548c98a65ae320 Mon Sep 17 00:00:00 2001 From: "Egli Adrian (IT-SCI-API-PFI)" <adrian.egli@sbb.ch> Date: Mon, 7 Dec 2020 22:04:50 +0100 Subject: [PATCH] typo --- reinforcement_learning/ppo_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reinforcement_learning/ppo_agent.py b/reinforcement_learning/ppo_agent.py index 44c5701..e603e70 100644 --- a/reinforcement_learning/ppo_agent.py +++ b/reinforcement_learning/ppo_agent.py @@ -189,7 +189,7 @@ class PPOAgent(Policy): # The loss function is used to estimate the gardient and use the entropy function based # heuristic to penalize the gradient function when the policy becomes deterministic this would let - # the gardient to become very flat and so the gradient is no longer useful. + # the gradient becomes very flat and so the gradient is no longer useful. loss = \ -torch.min(surr1, surr2) \ + self.weight_loss * self.loss_function(state_values, rewards) \ -- GitLab