Skip to content
Snippets Groups Projects
Commit 41d4b483 authored by Egli Adrian (IT-SCI-API-PFI)'s avatar Egli Adrian (IT-SCI-API-PFI)
Browse files

convergates much faster :-)

parent 729722e3
No related branches found
No related tags found
No related merge requests found
import copy
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
......@@ -10,7 +9,7 @@ from torch.distributions import Categorical
# Hyperparameters
from reinforcement_learning.policy import Policy
device = torch.device("cpu")#"cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu") # "cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)
......@@ -99,8 +98,8 @@ class PPOAgent(Policy):
self.learning_rate = 0.1e-4
self.gamma = 0.99
self.surrogate_eps_clip = 0.2
self.K_epoch = 3
self.weight_loss = 0.5
self.K_epoch = 30
self.weight_loss = 1.0
self.weight_entropy = 0.01
# objects
......@@ -108,7 +107,7 @@ class PPOAgent(Policy):
self.loss = 0
self.actor_critic_model = ActorCriticModel(state_size, action_size)
self.optimizer = optim.Adam(self.actor_critic_model.parameters(), lr=self.learning_rate)
self.loss_function = nn.MSELoss()
self.loss_function = nn.SmoothL1Loss() # nn.MSELoss()
def reset(self):
pass
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment