diff --git a/score_test.py b/score_test.py index a8bbda7ef257025c4dd79c303394f32cdef095a6..5f61300637561e5a29acfdfe498cc564b42a3e54 100644 --- a/score_test.py +++ b/score_test.py @@ -1,27 +1,21 @@ -import random import time -from collections import deque -import matplotlib.pyplot as plt import numpy as np import torch -from torch_training.dueling_double_dqn import Agent -from flatland.envs.observations import TreeObsForRailEnv -from flatland.envs.predictions import ShortestPathPredictorForRailEnv -from flatland.envs.rail_env import RailEnv -from flatland.utils.rendertools import RenderTool -from flatland.envs.generators import complex_rail_generator -from utils.observation_utils import norm_obs_clip, split_tree -from flatland.utils.rendertools import RenderTool -from utils.misc_utils import printProgressBar, RandomAgent +from torch_training.dueling_double_dqn import Agent +from utils.misc_utils import RandomAgent, run_test with open('parameters.txt','r') as inf: parameters = eval(inf.read()) # Parameter initialization features_per_node = 9 -state_size = features_per_node*21 * 2 +tree_depth = 3 +nodes = 0 +for i in range(tree_depth + 1): + nodes += np.power(4, i) +state_size = features_per_node * nodes * 2 action_size = 5 action_dict = dict() nr_trials_per_test = 100 @@ -29,96 +23,18 @@ test_results = [] test_times = [] test_dones = [] # Load agent -#agent = Agent(state_size, action_size, "FC", 0) -#agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint30000.pth')) +agent = Agent(state_size, action_size, "FC", 0) +agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint1700.pth')) agent = RandomAgent(state_size, action_size) start_time_scoring = time.time() -for test_nr in parameters: - current_parameters = parameters[test_nr] - print('\nRunning {} with (x_dim,ydim) = ({},{}) and {} Agents.'.format(test_nr,current_parameters[0],current_parameters[1],current_parameters[2])) - # Reset all measurements - time_obs = deque(maxlen=2) - test_scores = [] - - tot_dones = 0 - tot_test_score = 0 - - # Reset environment - random.seed(current_parameters[3]) - np.random.seed(current_parameters[3]) - nr_paths = max(2,current_parameters[2] + int(0.5*current_parameters[2])) - min_dist = int(min([current_parameters[0], current_parameters[1]])*0.75) - env = RailEnv(width=current_parameters[0], - height=current_parameters[1], - rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, max_dist=99999, - seed=current_parameters[3]), - obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()), - number_of_agents=current_parameters[2]) - max_steps = max_steps = int(3 * (env.height + env.width)) - agent_obs = [None] * env.get_num_agents() - env_renderer = RenderTool(env, gl="PILSVG", ) - printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) - start = time.time() - for trial in range(nr_trials_per_test): - # Reset the env - printProgressBar(trial+1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) - obs = env.reset(True, True) - #env_renderer.set_new_rail() - for a in range(env.get_num_agents()): - data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9, - current_depth=0) - data = norm_obs_clip(data) - distance = norm_obs_clip(distance) - agent_data = np.clip(agent_data, -1, 1) - obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) - - for i in range(2): - time_obs.append(obs) - - for a in range(env.get_num_agents()): - agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) - - # Run episode - trial_score = 0 - for step in range(max_steps): - - for a in range(env.get_num_agents()): - - action = agent.act(agent_obs[a], eps=0) - action_dict.update({a: action}) - - # Environment step - next_obs, all_rewards, done, _ = env.step(action_dict) - - for a in range(env.get_num_agents()): - data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), num_features_per_node=features_per_node, - current_depth=0) - data = norm_obs_clip(data) - distance = norm_obs_clip(distance) - agent_data = np.clip(agent_data, -1, 1) - next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) - time_obs.append(next_obs) - for a in range(env.get_num_agents()): - agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) - trial_score += all_rewards[a] / env.get_num_agents() - if done['__all__']: - tot_dones += 1 - break - test_scores.append(trial_score / max_steps) - end = time.time() - comp_time = end-start - tot_test_score = np.mean(test_scores) - test_results.append(tot_test_score) - test_times.append(comp_time) - test_dones.append(tot_dones/nr_trials_per_test*100) -end_time_scoring = time.time() -tot_test_time = end_time_scoring-start_time_scoring test_idx = 0 -print('-----------------------------------------------') -print(' RESULTS') -print('-----------------------------------------------') for test_nr in parameters: - print('{} score was = {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.'.format(test_nr, - test_results[test_idx],test_dones[test_idx],test_times[test_idx])) - test_idx += 1 -print('Total scoring duration was', tot_test_time) \ No newline at end of file + current_parameters = parameters[test_nr] + test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_idx) + print('---------') + print(' RESULTS') + print('---------') + print('{} score was = {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.\n\n\n'.format( + test_nr, + np.mean(test_score), np.mean(test_dones) * 100, test_time)) + test_idx += 1 \ No newline at end of file diff --git a/torch_training/Nets/avoid_checkpoint15000.pth b/torch_training/Nets/avoid_checkpoint15000.pth index 1ab84e032c64b9b07e138bfa1191a7cfe09f9b6c..ba488f1a074af3f62ad87d54f61a251e8292ae50 100644 Binary files a/torch_training/Nets/avoid_checkpoint15000.pth and b/torch_training/Nets/avoid_checkpoint15000.pth differ diff --git a/torch_training/training_navigation.py b/torch_training/training_navigation.py index 65f1e9d00d6893732e96d3edbcab9642a07aeb2d..0356b531a5c9518820754e04f8e16a450256f5d2 100644 --- a/torch_training/training_navigation.py +++ b/torch_training/training_navigation.py @@ -5,11 +5,12 @@ import matplotlib.pyplot as plt import numpy as np import torch from dueling_double_dqn import Agent +from flatland.envs.generators import complex_rail_generator from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.predictions import ShortestPathPredictorForRailEnv from flatland.envs.rail_env import RailEnv from flatland.utils.rendertools import RenderTool -from flatland.envs.generators import complex_rail_generator + from utils.observation_utils import norm_obs_clip, split_tree random.seed(1) @@ -47,26 +48,34 @@ env = RailEnv(width=10, env.load("./railway/complex_scene.pkl") file_load = True """ - -env = RailEnv(width=100, - height=100, - rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=5, min_dist=5, max_dist=99999, seed=0), - obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()), - number_of_agents=1) -file_load = False +x_dim = np.random.randint(8, 20) +y_dim = np.random.randint(8, 20) +n_agents = np.random.randint(3, 8) +n_goals = n_agents + np.random.randint(0, 3) +min_dist = int(0.75 * min(x_dim, y_dim)) +env = RailEnv(width=x_dim, + height=y_dim, + rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, + max_dist=99999, + seed=0), + obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=n_agents) env.reset(True, True) +file_load = False + """ """ +observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()) env_renderer = RenderTool(env, gl="PILSVG",) handle = env.get_agent_handles() features_per_node = 9 -state_size = features_per_node*21 * 2 +state_size = features_per_node * 85 * 2 action_size = 5 n_trials = 30000 max_steps = int(3 * (env.height + env.width)) eps = 1. eps_end = 0.005 -eps_decay = 0.9997 +eps_decay = 0.9995 action_dict = dict() final_action_dict = dict() scores_window = deque(maxlen=100) @@ -78,23 +87,39 @@ action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() agent = Agent(state_size, action_size, "FC", 0) -agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint30000.pth')) +# agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth')) -demo = True +demo = False record_images = False - - for trials in range(1, n_trials + 1): + if trials % 50 == 0 and not demo: + x_dim = np.random.randint(8, 20) + y_dim = np.random.randint(8, 20) + n_agents = np.random.randint(3, 8) + n_goals = n_agents + np.random.randint(0, 3) + min_dist = int(0.75 * min(x_dim, y_dim)) + env = RailEnv(width=x_dim, + height=y_dim, + rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, + max_dist=99999, + seed=0), + obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=n_agents) + env.reset(True, True) + max_steps = int(3 * (env.height + env.width)) + agent_obs = [None] * env.get_num_agents() + agent_next_obs = [None] * env.get_num_agents() # Reset environment - if file_load : + if file_load: obs = env.reset(False, False) else: obs = env.reset(True, True) if demo: env_renderer.set_new_rail() + obs_original = obs.copy() final_obs = obs.copy() final_obs_next = obs.copy() for a in range(env.get_num_agents()): @@ -120,7 +145,8 @@ for trials in range(1, n_trials + 1): # Run episode for step in range(max_steps): if demo: - env_renderer.renderEnv(show=True, show_observations=False) + env_renderer.renderEnv(show=True, show_observations=True) + observation_helper.util_print_obs_subtree(obs_original[0]) if record_images: env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(step)) # print(step) @@ -135,6 +161,8 @@ for trials in range(1, n_trials + 1): # Environment step next_obs, all_rewards, done, _ = env.step(action_dict) + # print(all_rewards,action) + obs_original = next_obs.copy() for a in range(env.get_num_agents()): data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), num_features_per_node=features_per_node, current_depth=0) @@ -170,8 +198,8 @@ for trials in range(1, n_trials + 1): dones_list.append((np.mean(done_window))) print( - '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( - env.get_num_agents(), + '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + env.get_num_agents(), x_dim, y_dim, trials, np.mean(scores_window), 100 * np.mean(done_window), diff --git a/utils/misc_utils.py b/utils/misc_utils.py index 62189c52fda1a4f862df44268f5c815c1462a434..097450b6dbf1c3eab92d2c110df7528f2850a62e 100644 --- a/utils/misc_utils.py +++ b/utils/misc_utils.py @@ -1,5 +1,16 @@ -# Print iterations progress +import random +import time +from collections import deque + import numpy as np +from flatland.envs.generators import complex_rail_generator +from flatland.envs.observations import TreeObsForRailEnv +from flatland.envs.predictions import ShortestPathPredictorForRailEnv +from flatland.envs.rail_env import RailEnv + +from utils.observation_utils import norm_obs_clip, split_tree + + def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '*'): """ Call in a loop to create terminal progress bar @@ -49,3 +60,84 @@ class RandomAgent: def load(self, filename): # Load a policy return + + +def run_test(parameters, agent, test_nr=0, tree_depth=3): + # Parameter initialization + features_per_node = 9 + start_time_scoring = time.time() + action_dict = dict() + nr_trials_per_test = 100 + print('Running Test {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], + parameters[2])) + + # Reset all measurements + time_obs = deque(maxlen=2) + test_scores = [] + test_dones = [] + + # Reset environment + random.seed(parameters[3]) + np.random.seed(parameters[3]) + nr_paths = max(2, parameters[2] + int(0.5 * parameters[2])) + min_dist = int(min([parameters[0], parameters[1]]) * 0.75) + env = RailEnv(width=parameters[0], + height=parameters[1], + rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, + max_dist=99999, + seed=parameters[3]), + obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth, + predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=parameters[2]) + max_steps = int(3 * (env.height + env.width)) + agent_obs = [None] * env.get_num_agents() + printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + for trial in range(nr_trials_per_test): + # Reset the env + obs = env.reset(True, True) + for a in range(env.get_num_agents()): + data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9, + current_depth=0) + data = norm_obs_clip(data) + distance = norm_obs_clip(distance) + agent_data = np.clip(agent_data, -1, 1) + obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + + for i in range(2): + time_obs.append(obs) + + for a in range(env.get_num_agents()): + agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) + + # Run episode + trial_score = 0 + for step in range(max_steps): + + for a in range(env.get_num_agents()): + action = agent.act(agent_obs[a], eps=0) + action_dict.update({a: action}) + + # Environment step + next_obs, all_rewards, done, _ = env.step(action_dict) + + for a in range(env.get_num_agents()): + data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), + num_features_per_node=features_per_node, + current_depth=0) + data = norm_obs_clip(data) + distance = norm_obs_clip(distance) + agent_data = np.clip(agent_data, -1, 1) + next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + time_obs.append(next_obs) + for a in range(env.get_num_agents()): + agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) + trial_score += all_rewards[a] / env.get_num_agents() + + if done['__all__']: + break + test_scores.append(trial_score / max_steps) + test_dones.append(done['__all__']) + printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + end_time_scoring = time.time() + tot_test_time = end_time_scoring - start_time_scoring + return test_scores, test_dones, tot_test_time