Skip to content
Snippets Groups Projects
Commit e0a28d85 authored by u214892's avatar u214892
Browse files

#42 run baselines in ci

parent a0beb3d3
No related branches found
No related tags found
No related merge requests found
import getopt
import sys
from collections import deque from collections import deque
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -15,184 +17,198 @@ from flatland.envs.rail_env import RailEnv ...@@ -15,184 +17,198 @@ from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool from flatland.utils.rendertools import RenderTool
from utils.observation_utils import norm_obs_clip, split_tree from utils.observation_utils import norm_obs_clip, split_tree
random.seed(1) def main(argv):
np.random.seed(1) try:
opts, args = getopt.getopt(argv, "n:", ["n_trials="])
""" except getopt.GetoptError:
env = RailEnv(width=10, print('training_navigation.py -n <n_trials>')
height=20, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())) sys.exit(2)
env.load("./railway/complex_scene.pkl") for opt, arg in opts:
file_load = True if opt in ('-n','--n_trials'):
""" n_trials = int(arg)
random.seed(1)
x_dim = np.random.randint(8, 20) np.random.seed(1)
y_dim = np.random.randint(8, 20)
n_agents = np.random.randint(3, 8) """
n_goals = n_agents + np.random.randint(0, 3) env = RailEnv(width=10,
min_dist = int(0.75 * min(x_dim, y_dim)) height=20, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()))
env = RailEnv(width=x_dim, env.load("./railway/complex_scene.pkl")
height=y_dim, file_load = True
rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, """
max_dist=99999,
seed=0), x_dim = np.random.randint(8, 20)
obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), y_dim = np.random.randint(8, 20)
number_of_agents=n_agents) n_agents = np.random.randint(3, 8)
env.reset(True, True) n_goals = n_agents + np.random.randint(0, 3)
file_load = False min_dist = int(0.75 * min(x_dim, y_dim))
""" env = RailEnv(width=x_dim,
height=y_dim,
""" rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()) max_dist=99999,
env_renderer = RenderTool(env, gl="PILSVG", ) seed=0),
handle = env.get_agent_handles() obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
features_per_node = 9 number_of_agents=n_agents)
state_size = features_per_node * 85 * 2 env.reset(True, True)
action_size = 5 file_load = False
n_trials = 30000 """
max_steps = int(3 * (env.height + env.width))
eps = 1. """
eps_end = 0.005 observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
eps_decay = 0.9995 env_renderer = RenderTool(env, gl="PILSVG", )
action_dict = dict() handle = env.get_agent_handles()
final_action_dict = dict() features_per_node = 9
scores_window = deque(maxlen=100) state_size = features_per_node * 85 * 2
done_window = deque(maxlen=100) action_size = 5
time_obs = deque(maxlen=2) # We set the number of episodes we would like to train on
scores = [] if 'n_trials' not in locals():
dones_list = [] n_trials = 30000
action_prob = [0] * action_size max_steps = int(3 * (env.height + env.width))
agent_obs = [None] * env.get_num_agents() eps = 1.
agent_next_obs = [None] * env.get_num_agents() eps_end = 0.005
agent = Agent(state_size, action_size, "FC", 0) eps_decay = 0.9995
with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in: action_dict = dict()
agent.qnetwork_local.load_state_dict(torch.load(file_in)) final_action_dict = dict()
scores_window = deque(maxlen=100)
demo = False done_window = deque(maxlen=100)
record_images = False time_obs = deque(maxlen=2)
frame_step = 0 scores = []
for trials in range(1, n_trials + 1): dones_list = []
action_prob = [0] * action_size
if trials % 50 == 0 and not demo: agent_obs = [None] * env.get_num_agents()
x_dim = np.random.randint(8, 20) agent_next_obs = [None] * env.get_num_agents()
y_dim = np.random.randint(8, 20) agent = Agent(state_size, action_size, "FC", 0)
n_agents = np.random.randint(3, 8) with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in:
n_goals = n_agents + np.random.randint(0, 3) agent.qnetwork_local.load_state_dict(torch.load(file_in))
min_dist = int(0.75 * min(x_dim, y_dim))
env = RailEnv(width=x_dim, demo = False
height=y_dim, record_images = False
rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, frame_step = 0
max_dist=99999, for trials in range(1, n_trials + 1):
seed=0),
obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), if trials % 50 == 0 and not demo:
number_of_agents=n_agents) x_dim = np.random.randint(8, 20)
env.reset(True, True) y_dim = np.random.randint(8, 20)
max_steps = int(3 * (env.height + env.width)) n_agents = np.random.randint(3, 8)
agent_obs = [None] * env.get_num_agents() n_goals = n_agents + np.random.randint(0, 3)
agent_next_obs = [None] * env.get_num_agents() min_dist = int(0.75 * min(x_dim, y_dim))
# Reset environment env = RailEnv(width=x_dim,
if file_load: height=y_dim,
obs = env.reset(False, False) rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
else: max_dist=99999,
obs = env.reset(True, True) seed=0),
if demo: obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
env_renderer.set_new_rail() number_of_agents=n_agents)
obs_original = obs.copy() env.reset(True, True)
final_obs = obs.copy() max_steps = int(3 * (env.height + env.width))
final_obs_next = obs.copy() agent_obs = [None] * env.get_num_agents()
for a in range(env.get_num_agents()): agent_next_obs = [None] * env.get_num_agents()
data, distance, agent_data = split_tree(tree=np.array(obs[a]), # Reset environment
current_depth=0) if file_load:
data = norm_obs_clip(data) obs = env.reset(False, False)
distance = norm_obs_clip(distance) else:
agent_data = np.clip(agent_data, -1, 1) obs = env.reset(True, True)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
agent_data = env.agents[a]
speed = 1 # np.random.randint(1,5)
agent_data.speed_data['speed'] = 1. / speed
for i in range(2):
time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
if demo: if demo:
env_renderer.renderEnv(show=True, show_observations=False) env_renderer.set_new_rail()
# observation_helper.util_print_obs_subtree(obs_original[0]) obs_original = obs.copy()
if record_images: final_obs = obs.copy()
env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step)) final_obs_next = obs.copy()
frame_step += 1
# print(step)
# Action
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
if demo: data, distance, agent_data = split_tree(tree=np.array(obs[a]),
eps = 0
# action = agent.act(np.array(obs[a]), eps=eps)
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
# print(all_rewards,action)
obs_original = next_obs.copy()
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
current_depth=0) current_depth=0)
data = norm_obs_clip(data) data = norm_obs_clip(data)
distance = norm_obs_clip(distance) distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1) agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs) agent_data = env.agents[a]
speed = 1 # np.random.randint(1,5)
# Update replay buffer and train agent agent_data.speed_data['speed'] = 1. / speed
for i in range(2):
time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
if done[a]:
final_obs[a] = agent_obs[a].copy() score = 0
final_obs_next[a] = agent_next_obs[a].copy() env_done = 0
final_action_dict.update({a: action_dict[a]}) # Run episode
if not demo and not done[a]: for step in range(max_steps):
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) if demo:
score += all_rewards[a] / env.get_num_agents() env_renderer.renderEnv(show=True, show_observations=False)
# observation_helper.util_print_obs_subtree(obs_original[0])
agent_obs = agent_next_obs.copy() if record_images:
if done['__all__']: env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
env_done = 1 frame_step += 1
# print(step)
# Action
for a in range(env.get_num_agents()):
if demo:
eps = 0
# action = agent.act(np.array(obs[a]), eps=eps)
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
# print(all_rewards,action)
obs_original = next_obs.copy()
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a]) data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
break current_depth=0)
# Epsilon decay data = norm_obs_clip(data)
eps = max(eps_end, eps_decay * eps) # decrease epsilon distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
done_window.append(env_done) next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
scores_window.append(score / max_steps) # save most recent score time_obs.append(next_obs)
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window))) # Update replay buffer and train agent
for a in range(env.get_num_agents()):
print( agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
'\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( if done[a]:
env.get_num_agents(), x_dim, y_dim, final_obs[a] = agent_obs[a].copy()
trials, final_obs_next[a] = agent_next_obs[a].copy()
np.mean(scores_window), final_action_dict.update({a: action_dict[a]})
100 * np.mean(done_window), if not demo and not done[a]:
eps, action_prob / np.sum(action_prob)), end=" ") agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
score += all_rewards[a] / env.get_num_agents()
if trials % 100 == 0:
agent_obs = agent_next_obs.copy()
if done['__all__']:
env_done = 1
for a in range(env.get_num_agents()):
agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
break
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
done_window.append(env_done)
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
print( print(
'\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), env.get_num_agents(), x_dim, y_dim,
trials, trials,
np.mean(scores_window), np.mean(scores_window),
100 * np.mean(done_window), 100 * np.mean(done_window),
eps, eps, action_prob / np.sum(action_prob)), end=" ")
action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(), if trials % 100 == 0:
'./Nets/avoid_checkpoint' + str(trials) + '.pth') print(
action_prob = [1] * action_size '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
plt.plot(scores) env.get_num_agents(),
plt.show() trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps,
action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'./Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size
plt.plot(scores)
plt.show()
if __name__ == '__main__':
main(sys.argv[1:])
import getopt
import random
import sys import sys
from collections import deque from collections import deque
import getopt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import random
import torch import torch
from dueling_double_dqn import Agent from dueling_double_dqn import Agent
...@@ -16,15 +16,14 @@ from utils.observation_utils import norm_obs_clip, split_tree ...@@ -16,15 +16,14 @@ from utils.observation_utils import norm_obs_clip, split_tree
def main(argv): def main(argv):
try: try:
opts, args = getopt.getopt(argv, "n:", ["n_trials="]) opts, args = getopt.getopt(argv, "n:", ["n_trials="])
except getopt.GetoptError: except getopt.GetoptError:
print('training_navigation.py -n <n_trials>') print('training_navigation.py -n <n_trials>')
sys.exit(2) sys.exit(2)
for opt, arg in opts: for opt, arg in opts:
if opt in ('-n','--n_trials'): if opt in ('-n', '--n_trials'):
n_trials = arg n_trials = int(arg)
random.seed(1) random.seed(1)
np.random.seed(1) np.random.seed(1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment