diff --git a/reinforcement_learning/multi_agent_training.py b/reinforcement_learning/multi_agent_training.py
index 730774bc4cdfc2ada170d1fadf5f0c5f9dc6b23e..68bcb64d7ad86e050b6bb15d01671b7458895208 100755
--- a/reinforcement_learning/multi_agent_training.py
+++ b/reinforcement_learning/multi_agent_training.py
@@ -560,7 +560,7 @@ def eval_policy(env, tree_observation, policy, train_params, obs_params):
 if __name__ == "__main__":
     parser = ArgumentParser()
     parser.add_argument("-n", "--n_episodes", help="number of episodes to run", default=25000, type=int)
-    parser.add_argument("-t", "--training_env_config", help="training config id (eg 0 for Test_0)", default=2,
+    parser.add_argument("-t", "--training_env_config", help="training config id (eg 0 for Test_0)", default=0,
                         type=int)
     parser.add_argument("-e", "--evaluation_env_config", help="evaluation config id (eg 0 for Test_0)", default=0,
                         type=int)
diff --git a/reinforcement_learning/ppo/ppo_agent.py b/reinforcement_learning/ppo/ppo_agent.py
index 77a0fb5c8c933b2966e45128191e34eeee187c3d..446012bbf8d8886d516ffe55982aa2d1afa9e158 100644
--- a/reinforcement_learning/ppo/ppo_agent.py
+++ b/reinforcement_learning/ppo/ppo_agent.py
@@ -10,14 +10,6 @@ from torch.distributions import Categorical
 # Hyperparameters
 from reinforcement_learning.policy import Policy
 
-LEARNING_RATE = 0.1e-4
-GAMMA = 0.98
-LAMBDA = 0.9
-SURROGATE_EPS_CLIP = 0.01
-K_EPOCH = 3
-WEIGHT_LOSS = 0.5
-WEIGHT_ENTROPY = 0.01
-
 device = torch.device("cpu")  # "cuda:0" if torch.cuda.is_available() else "cpu")
 print("device:", device)
 
@@ -101,10 +93,20 @@ class ActorCriticModel(nn.Module):
 class PPOAgent(Policy):
     def __init__(self, state_size, action_size):
         super(PPOAgent, self).__init__()
+
+        # parameters
+        self.learning_rate = 0.1e-3
+        self.gamma = 0.98
+        self.surrogate_eps_clip = 0.1
+        self.K_epoch = 3
+        self.weight_loss = 0.9
+        self.weight_entropy = 0.01
+
+        # objects
         self.memory = DataBuffers()
         self.loss = 0
         self.actor_critic_model = ActorCriticModel(state_size, action_size)
-        self.optimizer = optim.Adam(self.actor_critic_model.parameters(), lr=LEARNING_RATE)
+        self.optimizer = optim.Adam(self.actor_critic_model.parameters(), lr=self.learning_rate)
         self.lossFunction = nn.MSELoss()
 
     def reset(self):
@@ -136,7 +138,7 @@ class PPOAgent(Policy):
                 discounted_reward = 0
                 done_list.insert(0, 1)
             else:
-                discounted_reward = reward_i + GAMMA * discounted_reward
+                discounted_reward = reward_i + self.gamma * discounted_reward
                 done_list.insert(0, 0)
             reward_list.insert(0, discounted_reward)
             state_next_list.insert(0, state_next_i)
@@ -165,7 +167,7 @@ class PPOAgent(Policy):
                     self._convert_transitions_to_torch_tensors(agent_episode_history)
 
                 # Optimize policy for K epochs:
-                for _ in range(K_EPOCH):
+                for _ in range(self.K_epoch):
                     # evaluating actions (actor) and values (critic)
                     logprobs, state_values, dist_entropy = self.actor_critic_model.evaluate(states, actions)
 
@@ -175,11 +177,11 @@ class PPOAgent(Policy):
                     # finding Surrogate Loss:
                     advantages = rewards - state_values.detach()
                     surr1 = ratios * advantages
-                    surr2 = torch.clamp(ratios, 1 - SURROGATE_EPS_CLIP, 1 + SURROGATE_EPS_CLIP) * advantages
+                    surr2 = torch.clamp(ratios, 1 - self.surrogate_eps_clip, 1 + self.surrogate_eps_clip) * advantages
                     loss = \
                         -torch.min(surr1, surr2) \
-                        + WEIGHT_LOSS * self.lossFunction(state_values, rewards) \
-                        - WEIGHT_ENTROPY * dist_entropy
+                        + self.weight_loss * self.lossFunction(state_values, rewards) \
+                        - self.weight_entropy * dist_entropy
 
                     # make a gradient step
                     self.optimizer.zero_grad()
diff --git a/reinforcement_learning/single_agent_training.py b/reinforcement_learning/single_agent_training.py
index a5ee6c5132652757cdd8fd8dab6992e08fdfd14b..bfcc88656c8b37a8c09e72b51701d0750cf7f238 100644
--- a/reinforcement_learning/single_agent_training.py
+++ b/reinforcement_learning/single_agent_training.py
@@ -103,9 +103,9 @@ def train_agent(n_episodes):
         'buffer_size': int(1e5),
         'batch_size': 32,
         'update_every': 8,
-        'LEARNING_RATE': 0.5e-4,
+        'learning_rate': 0.5e-4,
         'tau': 1e-3,
-        'GAMMA': 0.99,
+        'gamma': 0.99,
         'buffer_min_size': 0,
         'hidden_size': 256,
         'use_gpu': False