diff --git a/reinforcement_learning/ppo_agent.py b/reinforcement_learning/ppo_agent.py index 44c57010b80b5c073bc074e71b6af9eeff1024ca..e603e70255c4eca211fd2c22f0b410f9d900e709 100644 --- a/reinforcement_learning/ppo_agent.py +++ b/reinforcement_learning/ppo_agent.py @@ -189,7 +189,7 @@ class PPOAgent(Policy): # The loss function is used to estimate the gardient and use the entropy function based # heuristic to penalize the gradient function when the policy becomes deterministic this would let - # the gardient to become very flat and so the gradient is no longer useful. + # the gradient becomes very flat and so the gradient is no longer useful. loss = \ -torch.min(surr1, surr2) \ + self.weight_loss * self.loss_function(state_values, rewards) \