Skip to content
Snippets Groups Projects
Commit 60b730d0 authored by u214892's avatar u214892
Browse files

#42 run baselines in ci

parent e57ff31c
No related branches found
No related tags found
No related merge requests found
git+https://gitlab.aicrowd.com/flatland/flatland.git@master git+https://gitlab.aicrowd.com/flatland/flatland.git@master
importlib-metadata>=0.17
importlib_resources>=1.0.2
torch>=1.1.0 torch>=1.1.0
\ No newline at end of file
from collections import deque from collections import deque
from sys import path
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import random import random
import torch import torch
from dueling_double_dqn import Agent from dueling_double_dqn import Agent
from importlib_resources import path
import torch_training.Nets import torch_training.Nets
from flatland.envs.generators import complex_rail_generator from flatland.envs.generators import complex_rail_generator
......
from sys import path
import random
from collections import deque from collections import deque
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import random
import torch import torch
from dueling_double_dqn import Agent from dueling_double_dqn import Agent
from importlib_resources import path
import torch_training.Nets import torch_training.Nets
from flatland.envs.generators import complex_rail_generator from flatland.envs.generators import complex_rail_generator
...@@ -14,7 +13,6 @@ from flatland.envs.observations import TreeObsForRailEnv ...@@ -14,7 +13,6 @@ from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool from flatland.utils.rendertools import RenderTool
from utils.observation_utils import norm_obs_clip, split_tree from utils.observation_utils import norm_obs_clip, split_tree
random.seed(1) random.seed(1)
...@@ -70,7 +68,7 @@ file_load = False ...@@ -70,7 +68,7 @@ file_load = False
""" """
observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()) observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
env_renderer = RenderTool(env, gl="PILSVG",) env_renderer = RenderTool(env, gl="PILSVG", )
handle = env.get_agent_handles() handle = env.get_agent_handles()
features_per_node = 9 features_per_node = 9
state_size = features_per_node * 85 * 2 state_size = features_per_node * 85 * 2
...@@ -94,11 +92,9 @@ agent = Agent(state_size, action_size, "FC", 0) ...@@ -94,11 +92,9 @@ agent = Agent(state_size, action_size, "FC", 0)
with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in: with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in:
agent.qnetwork_local.load_state_dict(torch.load(file_in)) agent.qnetwork_local.load_state_dict(torch.load(file_in))
demo = True demo = True
record_images = False record_images = False
for trials in range(1, n_trials + 1): for trials in range(1, n_trials + 1):
if trials % 50 == 0 and not demo: if trials % 50 == 0 and not demo:
...@@ -136,7 +132,7 @@ for trials in range(1, n_trials + 1): ...@@ -136,7 +132,7 @@ for trials in range(1, n_trials + 1):
agent_data = np.clip(agent_data, -1, 1) agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
agent_data = env.agents[a] agent_data = env.agents[a]
speed = 1 #np.random.randint(1,5) speed = 1 # np.random.randint(1,5)
agent_data.speed_data['speed'] = 1. / speed agent_data.speed_data['speed'] = 1. / speed
for i in range(2): for i in range(2):
...@@ -145,7 +141,6 @@ for trials in range(1, n_trials + 1): ...@@ -145,7 +141,6 @@ for trials in range(1, n_trials + 1):
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
score = 0 score = 0
env_done = 0 env_done = 0
# Run episode # Run episode
...@@ -206,10 +201,10 @@ for trials in range(1, n_trials + 1): ...@@ -206,10 +201,10 @@ for trials in range(1, n_trials + 1):
print( print(
'\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), x_dim, y_dim, env.get_num_agents(), x_dim, y_dim,
trials, trials,
np.mean(scores_window), np.mean(scores_window),
100 * np.mean(done_window), 100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)), end=" ") eps, action_prob / np.sum(action_prob)), end=" ")
if trials % 100 == 0: if trials % 100 == 0:
print( print(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment