diff --git a/reinforcement_learning/ppo_agent.py b/reinforcement_learning/ppo_agent.py
index 2c4119f481e20369e94a44903df85e46747d5f9a..7b7d65bdeae385382dbe38e6cc791011a2fe486e 100644
--- a/reinforcement_learning/ppo_agent.py
+++ b/reinforcement_learning/ppo_agent.py
@@ -1,7 +1,6 @@
 import copy
 import os
 
-import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -10,7 +9,7 @@ from torch.distributions import Categorical
 # Hyperparameters
 from reinforcement_learning.policy import Policy
 
-device = torch.device("cpu")#"cuda:0" if torch.cuda.is_available() else "cpu")
+device = torch.device("cpu")  # "cuda:0" if torch.cuda.is_available() else "cpu")
 print("device:", device)
 
 
@@ -99,8 +98,8 @@ class PPOAgent(Policy):
         self.learning_rate = 0.1e-4
         self.gamma = 0.99
         self.surrogate_eps_clip = 0.2
-        self.K_epoch = 3
-        self.weight_loss = 0.5
+        self.K_epoch = 30
+        self.weight_loss = 1.0
         self.weight_entropy = 0.01
 
         # objects
@@ -108,7 +107,7 @@ class PPOAgent(Policy):
         self.loss = 0
         self.actor_critic_model = ActorCriticModel(state_size, action_size)
         self.optimizer = optim.Adam(self.actor_critic_model.parameters(), lr=self.learning_rate)
-        self.loss_function = nn.MSELoss()
+        self.loss_function = nn.SmoothL1Loss()  # nn.MSELoss()
 
     def reset(self):
         pass