diff --git a/scoring/README.md b/scoring/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scoring/Tests/Test_0/.gitignore b/scoring/Tests/Test_0/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_0/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_1/.gitignore b/scoring/Tests/Test_1/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_1/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_2/.gitignore b/scoring/Tests/Test_2/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_2/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_3/.gitignore b/scoring/Tests/Test_3/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_3/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_4/.gitignore b/scoring/Tests/Test_4/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_4/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_5/.gitignore b/scoring/Tests/Test_5/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_5/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_6/.gitignore b/scoring/Tests/Test_6/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_6/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_7/.gitignore b/scoring/Tests/Test_7/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_7/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_8/.gitignore b/scoring/Tests/Test_8/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_8/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/Tests/Test_9/.gitignore b/scoring/Tests/Test_9/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b073808516582983e670f519099583d95e28489f --- /dev/null +++ b/scoring/Tests/Test_9/.gitignore @@ -0,0 +1,4 @@ +## Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/scoring/generate_tests.py b/scoring/generate_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..6839b039c42f08661196f05a3e1e8e899b5d8f26 --- /dev/null +++ b/scoring/generate_tests.py @@ -0,0 +1,25 @@ +import time + +import numpy as np + +from utils.misc_utils import create_testsfiles + +with open('parameters.txt', 'r') as inf: + parameters = eval(inf.read()) + +# Parameter initialization +features_per_node = 9 +tree_depth = 3 +nodes = 0 +for i in range(tree_depth + 1): + nodes += np.power(4, i) +state_size = features_per_node * nodes * 2 +action_size = 5 +action_dict = dict() +nr_trials_per_test = 100 +test_idx = 0 + +for test_nr in parameters: + current_parameters = parameters[test_nr] + create_testsfiles(current_parameters,test_nr) + diff --git a/scoring/parameters.txt b/scoring/parameters.txt new file mode 100644 index 0000000000000000000000000000000000000000..dda391abfaee39c1ff3a8f844aa76993cdd1d270 --- /dev/null +++ b/scoring/parameters.txt @@ -0,0 +1,11 @@ +{'Test_0':[10,10,1,3], +'Test_1':[10,10,3,3], +'Test_2':[10,10,5,3], +'Test_3':[50,10,10,3], +'Test_4':[20,50,10,3], +'Test_5':[20,20,15,3], +'Test_6':[50,50,10,3], +'Test_7':[50,50,40,3], +'Test_8':[100,100,10,3], +'Test_9':[100,100,50,3] +} \ No newline at end of file diff --git a/scoring/score_test.py b/scoring/score_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1e46f080d8208af896134b1e6cacfdd476bf6b3c --- /dev/null +++ b/scoring/score_test.py @@ -0,0 +1,56 @@ +import time + +import numpy as np +import torch +from utils.misc_utils import run_test,run_test_sequential +from dueling_double_dqn import Agent +from sequential_agent.simple_order_agent import OrderedAgent +with open('parameters.txt','r') as inf: + parameters = eval(inf.read()) + +# Parameter initialization +features_per_node = 9 +tree_depth = 3 +nodes = 0 +for i in range(tree_depth + 1): + nodes += np.power(4, i) +state_size = features_per_node * nodes +action_size = 5 +action_dict = dict() +nr_trials_per_test = 100 +test_results = [] +test_times = [] +test_dones = [] +sequential_agent_test = False + + +# Load agent +if sequential_agent_test: + agent = OrderedAgent() +else: + agent = Agent(state_size, action_size, "FC", 0) + agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint60000.pth')) + +start_time_scoring = time.time() + +score_board = [] +for test_nr in parameters: + current_parameters = parameters[test_nr] + if sequential_agent_test: + test_score, test_dones, test_time = run_test_sequential(current_parameters, agent, test_nr=test_nr, tree_depth=1) + + else: + test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_nr,tree_depth=3) + print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format( + test_nr, + np.mean(test_score), np.mean(test_dones) * 100, test_time)) + + score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time]) +print('---------') +print(' RESULTS') +print('---------') +test_idx = 0 +for test_nr in parameters: + print('{} score was {:.3f}\twith {:.2f}% environments solved.\tTest took {:.2f} Seconds to complete.'.format( + test_nr, score_board[test_idx][0], score_board[test_idx][1], score_board[test_idx][2])) + test_idx += 1 diff --git a/scoring/show_tests.py b/scoring/show_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..11d7f29fffb2c723d68eaa9771c4c1749f7acd52 --- /dev/null +++ b/scoring/show_tests.py @@ -0,0 +1,25 @@ +import time + +import numpy as np + +from utils.misc_utils import render_test + +with open('parameters.txt','r') as inf: + parameters = eval(inf.read()) + +# Parameter initialization +features_per_node = 9 +tree_depth = 3 +nodes = 0 +for i in range(tree_depth + 1): + nodes += np.power(4, i) +state_size = features_per_node * nodes * 2 +action_size = 5 +action_dict = dict() +nr_trials_per_test = 100 +test_idx = 0 + +for test_nr in parameters: + current_parameters = parameters[test_nr] + render_test(current_parameters, test_nr, nr_examples=2) + diff --git a/scoring/utils/__init__.py b/scoring/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scoring/utils/misc_utils.py b/scoring/utils/misc_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e4aab524164275b47601783bde9d6b0703098ca0 --- /dev/null +++ b/scoring/utils/misc_utils.py @@ -0,0 +1,252 @@ +import random +import time +from collections import deque + +import numpy as np +from flatland.envs.generators import complex_rail_generator, rail_from_file +from flatland.envs.observations import GlobalObsForRailEnv, TreeObsForRailEnv +from flatland.envs.predictions import ShortestPathPredictorForRailEnv +from flatland.utils.rendertools import RenderTool +from flatland.envs.rail_env import RailEnv + +from utils.observation_utils import norm_obs_clip, split_tree, max_lt + +# Time factor to test the max time allowed for an env. +max_time_factor = 1 + +def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='*'): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + """ + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filledLength = int(length * iteration // total) + bar = fill * filledLength + '_' * (length - filledLength) + print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end=" ") + # Print New Line on Complete + if iteration == total: + print('') + + +class RandomAgent: + + def __init__(self, state_size, action_size): + self.state_size = state_size + self.action_size = action_size + + def act(self, state, eps=0): + """ + :param state: input is the observation of the agent + :return: returns an action + """ + return np.random.choice(np.arange(self.action_size)) + + def step(self, memories): + """ + Step function to improve agent by adjusting policy given the observations + + :param memories: SARS Tuple to be + :return: + """ + return + + def save(self, filename): + # Store the current policy + return + + def load(self, filename): + # Load a policy + return + + +def run_test(parameters, agent, test_nr=0, tree_depth=3): + # Parameter initialization + features_per_node = 9 + start_time_scoring = time.time() + action_dict = dict() + nr_trials_per_test = 100 + print('Running {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], + parameters[2])) + + # Reset all measurements + test_scores = [] + test_dones = [] + + # Reset environment + random.seed(parameters[3]) + np.random.seed(parameters[3]) + + + printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + for trial in range(nr_trials_per_test): + # Reset the env + file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + + env = RailEnv(width=3, + height=3, + rail_generator=rail_from_file(file_name), + obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=1, + ) + + obs = env.reset() + agent_obs = [None] * env.get_num_agents() + for a in range(env.get_num_agents()): + data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9, + current_depth=0) + data = norm_obs_clip(data, fixed_radius=10) + distance = norm_obs_clip(distance) + agent_data = np.clip(agent_data, -1, 1) + agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + + + # Run episode + trial_score = 0 + max_steps = int(max_time_factor* (env.height + env.width)) + for step in range(max_steps): + + for a in range(env.get_num_agents()): + action = agent.act(agent_obs[a], eps=0) + action_dict.update({a: action}) + + # Environment step + next_obs, all_rewards, done, _ = env.step(action_dict) + + for a in range(env.get_num_agents()): + data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), + num_features_per_node=features_per_node, + current_depth=0) + data = norm_obs_clip(data, fixed_radius=10) + distance = norm_obs_clip(distance) + agent_data = np.clip(agent_data, -1, 1) + agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + trial_score += np.mean(all_rewards[a]) + if done['__all__']: + break + test_scores.append(trial_score / max_steps) + test_dones.append(done['__all__']) + printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + end_time_scoring = time.time() + tot_test_time = end_time_scoring - start_time_scoring + return test_scores, test_dones, tot_test_time + + +def create_testsfiles(parameters, test_nr=0): + # Parameter initialization + nr_trials_per_test = 100 + print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], + parameters[2])) + # Reset environment + random.seed(parameters[3]) + np.random.seed(parameters[3]) + nr_paths = max(4, parameters[2] + int(0.5 * parameters[2])) + min_dist = int(min([parameters[0], parameters[1]]) * 0.75) + env = RailEnv(width=parameters[0], + height=parameters[1], + rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, + max_dist=99999, + seed=parameters[3]), + obs_builder_object=TreeObsForRailEnv(max_depth=2), + number_of_agents=parameters[2]) + printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + for trial in range(nr_trials_per_test): + # Reset the env + env.reset(True, True) + env.save("./Envs/{}/Level_{}.pkl".format(test_nr, trial)) + printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + + return + + +def render_test(parameters, test_nr=0, nr_examples=5): + for trial in range(nr_examples): + # Reset the env + print('Showing {} Level {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, trial, parameters[0], + parameters[1], + parameters[2])) + file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + + env = RailEnv(width=1, + height=1, + rail_generator=rail_from_file(file_name), + obs_builder_object=TreeObsForRailEnv(max_depth=2), + number_of_agents=1, + ) + env_renderer = RenderTool(env, gl="PILSVG", ) + env_renderer.set_new_rail() + + env.reset(False, False) + env_renderer.render_env(show=True, show_observations=False) + + time.sleep(0.1) + env_renderer.close_window() + return + +def run_test_sequential(parameters, agent, test_nr=0, tree_depth=3): + # Parameter initialization + features_per_node = 9 + start_time_scoring = time.time() + action_dict = dict() + nr_trials_per_test = 100 + print('Running {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], + parameters[2])) + + # Reset all measurements + test_scores = [] + test_dones = [] + + # Reset environment + random.seed(parameters[3]) + np.random.seed(parameters[3]) + + + printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + for trial in range(nr_trials_per_test): + # Reset the env + file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + + env = RailEnv(width=3, + height=3, + rail_generator=rail_from_file(file_name), + obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=1, + ) + + obs = env.reset() + done = env.dones + # Run episode + trial_score = 0 + max_steps = int(max_time_factor* (env.height + env.width)) + for step in range(max_steps): + + # Action + acting_agent = 0 + for a in range(env.get_num_agents()): + if done[a]: + acting_agent += 1 + if acting_agent == a: + action = agent.act(obs[acting_agent], eps=0) + else: + action = 0 + action_dict.update({a: action}) + + # Environment step + + obs, all_rewards, done, _ = env.step(action_dict) + for a in range(env.get_num_agents()): + trial_score += np.mean(all_rewards[a]) + if done['__all__']: + break + test_scores.append(trial_score / max_steps) + test_dones.append(done['__all__']) + printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) + end_time_scoring = time.time() + tot_test_time = end_time_scoring - start_time_scoring + return test_scores, test_dones, tot_test_time diff --git a/scoring/utils/observation_utils.py b/scoring/utils/observation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..787dfcf97a9fea85775c609c1ab8be4f38654d37 --- /dev/null +++ b/scoring/utils/observation_utils.py @@ -0,0 +1,101 @@ +import numpy as np + + +def max_lt(seq, val): + """ + Return greatest item in seq for which item < val applies. + None is returned if seq was empty or all items in seq were >= val. + """ + max = 0 + idx = len(seq) - 1 + while idx >= 0: + if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max: + max = seq[idx] + idx -= 1 + return max + + +def min_lt(seq, val): + """ + Return smallest item in seq for which item > val applies. + None is returned if seq was empty or all items in seq were >= val. + """ + min = np.inf + idx = len(seq) - 1 + while idx >= 0: + if seq[idx] >= val and seq[idx] < min: + min = seq[idx] + idx -= 1 + return min + + +def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0): + """ + This function returns the difference between min and max value of an observation + :param obs: Observation that should be normalized + :param clip_min: min value where observation will be clipped + :param clip_max: max value where observation will be clipped + :return: returnes normalized and clipped observatoin + """ + if fixed_radius > 0: + max_obs = fixed_radius + else: + max_obs = max(1, max_lt(obs, 1000)) + + min_obs = 0 #min(max_obs, min_lt(obs, 0)) + + if max_obs == min_obs: + return np.clip(np.array(obs) / max_obs, clip_min, clip_max) + norm = np.abs(max_obs - min_obs) + if norm == 0: + norm = 1. + return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max) + + +def split_tree(tree, num_features_per_node=9, current_depth=0): + """ + Splits the tree observation into different sub groups that need the same normalization. + This is necessary because the tree observation includes two different distance: + 1. Distance from the agent --> This is measured in cells from current agent location + 2. Distance to targer --> This is measured as distance from cell to agent target + 3. Binary data --> Contains information about presence of object --> No normalization necessary + Number 1. will depend on the depth and size of the tree search + Number 2. will depend on the size of the map and thus the max distance on the map + Number 3. Is independent of tree depth and map size and thus must be handled differently + Therefore we split the tree into these two classes for better normalization. + :param tree: Tree that needs to be split + :param num_features_per_node: Features per node ATTENTION! this parameter is vital to correct splitting of the tree. + :param current_depth: Keeping track of the current depth in the tree + :return: Returns the three different groups of distance and binary values. + """ + + if len(tree) < num_features_per_node: + return [], [], [] + + depth = 0 + tmp = len(tree) / num_features_per_node - 1 + pow4 = 4 + while tmp > 0: + tmp -= pow4 + depth += 1 + pow4 *= 4 + child_size = (len(tree) - num_features_per_node) // 4 + """ + Here we split the node features into the different classes of distances and binary values. + Pay close attention to this part if you modify any of the features in the tree observation. + """ + tree_data = tree[:6].tolist() + distance_data = [tree[6]] + agent_data = tree[7:num_features_per_node].tolist() + # Split each child of the current node and continue to next depth level + for children in range(4): + child_tree = tree[(num_features_per_node + children * child_size): + (num_features_per_node + (children + 1) * child_size)] + tmp_tree_data, tmp_distance_data, tmp_agent_data = split_tree(child_tree, + num_features_per_node, + current_depth=current_depth + 1) + if len(tmp_tree_data) > 0: + tree_data.extend(tmp_tree_data) + distance_data.extend(tmp_distance_data) + agent_data.extend(tmp_agent_data) + return tree_data, distance_data, agent_data