Skip to content
Snippets Groups Projects
Commit 84e8aeb1 authored by Erik Nygren's avatar Erik Nygren
Browse files

fixed training issues

parent 734bd8a8
No related branches found
No related tags found
No related merge requests found
No preview for this file type
import random import random
from collections import deque from collections import deque
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import torch import torch
from dueling_double_dqn import Agent from dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import DummyPredictorForRailEnv from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool from flatland.utils.rendertools import RenderTool
...@@ -46,10 +47,10 @@ env = RailEnv(width=10, ...@@ -46,10 +47,10 @@ env = RailEnv(width=10,
env.load("./railway/complex_scene.pkl") env.load("./railway/complex_scene.pkl")
""" """
env = RailEnv(width=8, env = RailEnv(width=12,
height=8, height=12,
rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=4, max_dist=99999, seed=0), rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=5, min_dist=10, max_dist=99999, seed=0),
obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=DummyPredictorForRailEnv()), obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=3) number_of_agents=3)
env.reset(True, True) env.reset(True, True)
...@@ -59,8 +60,8 @@ handle = env.get_agent_handles() ...@@ -59,8 +60,8 @@ handle = env.get_agent_handles()
state_size = 168 * 2 state_size = 168 * 2
action_size = 5 action_size = 5
n_trials = 15000 n_trials = 20000
max_steps = int(1.5 * (env.height + env.width)) max_steps = int(3 * (env.height + env.width))
eps = 1. eps = 1.
eps_end = 0.005 eps_end = 0.005
eps_decay = 0.9995 eps_decay = 0.9995
...@@ -75,7 +76,7 @@ action_prob = [0] * action_size ...@@ -75,7 +76,7 @@ action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents() agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0) agent = Agent(state_size, action_size, "FC", 0)
# agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth')) agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint20000.pth'))
demo = False demo = False
...@@ -220,3 +221,5 @@ for trials in range(1, n_trials + 1): ...@@ -220,3 +221,5 @@ for trials in range(1, n_trials + 1):
torch.save(agent.qnetwork_local.state_dict(), torch.save(agent.qnetwork_local.state_dict(),
'./Nets/avoid_checkpoint' + str(trials) + '.pth') './Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size action_prob = [1] * action_size
plt.plot(scores)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment