diff --git a/scoring/generate_tests.py b/scoring/generate_tests.py index 6839b039c42f08661196f05a3e1e8e899b5d8f26..edec302c305670134c1d594b1c89fd4c222e3a08 100644 --- a/scoring/generate_tests.py +++ b/scoring/generate_tests.py @@ -2,7 +2,7 @@ import time import numpy as np -from utils.misc_utils import create_testsfiles +from utils.misc_utils import create_testfiles with open('parameters.txt', 'r') as inf: parameters = eval(inf.read()) @@ -21,5 +21,4 @@ test_idx = 0 for test_nr in parameters: current_parameters = parameters[test_nr] - create_testsfiles(current_parameters,test_nr) - + create_testfiles(current_parameters, test_nr, nr_trials_per_test=100) diff --git a/scoring/score_test.py b/scoring/score_test.py index 1e46f080d8208af896134b1e6cacfdd476bf6b3c..79f0ee6a21590e021daa732afb7e2d5e8f68ee05 100644 --- a/scoring/score_test.py +++ b/scoring/score_test.py @@ -2,10 +2,13 @@ import time import numpy as np import torch -from utils.misc_utils import run_test,run_test_sequential -from dueling_double_dqn import Agent -from sequential_agent.simple_order_agent import OrderedAgent -with open('parameters.txt','r') as inf: +from flatland.envs.observations import TreeObsForRailEnv +from flatland.envs.predictions import ShortestPathPredictorForRailEnv + +from torch_training.dueling_double_dqn import Agent +from utils.misc_utils import run_test + +with open('parameters.txt', 'r') as inf: parameters = eval(inf.read()) # Parameter initialization @@ -23,29 +26,26 @@ test_times = [] test_dones = [] sequential_agent_test = False +# Load your agent +agent = Agent(state_size, action_size, "FC", 0) +agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint60000.pth')) -# Load agent -if sequential_agent_test: - agent = OrderedAgent() -else: - agent = Agent(state_size, action_size, "FC", 0) - agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint60000.pth')) +# Load the necessary Observation Builder and Predictor +predictor = ShortestPathPredictorForRailEnv() +observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor) start_time_scoring = time.time() score_board = [] for test_nr in parameters: current_parameters = parameters[test_nr] - if sequential_agent_test: - test_score, test_dones, test_time = run_test_sequential(current_parameters, agent, test_nr=test_nr, tree_depth=1) - - else: - test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_nr,tree_depth=3) + test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=observation_builder, + test_nr=test_nr, nr_trials_per_test=10) print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format( test_nr, np.mean(test_score), np.mean(test_dones) * 100, test_time)) - score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time]) + score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time]) print('---------') print(' RESULTS') print('---------') diff --git a/scoring/utils/misc_utils.py b/scoring/utils/misc_utils.py index e4aab524164275b47601783bde9d6b0703098ca0..de30bad398c5c09493b8850f02f13326648b2278 100644 --- a/scoring/utils/misc_utils.py +++ b/scoring/utils/misc_utils.py @@ -35,46 +35,17 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length= print('') -class RandomAgent: - - def __init__(self, state_size, action_size): - self.state_size = state_size - self.action_size = action_size - - def act(self, state, eps=0): - """ - :param state: input is the observation of the agent - :return: returns an action - """ - return np.random.choice(np.arange(self.action_size)) - - def step(self, memories): - """ - Step function to improve agent by adjusting policy given the observations - - :param memories: SARS Tuple to be - :return: - """ - return - - def save(self, filename): - # Store the current policy - return - - def load(self, filename): - # Load a policy - return - - -def run_test(parameters, agent, test_nr=0, tree_depth=3): +def run_test(parameters, agent, observation_builder=None, observation_wrapper=None, test_nr=0, nr_trials_per_test=100): # Parameter initialization features_per_node = 9 start_time_scoring = time.time() action_dict = dict() - nr_trials_per_test = 100 + print('Running {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], parameters[2])) - + if observation_builder == None: + print("No observation defined!") + return # Reset all measurements test_scores = [] test_dones = [] @@ -87,47 +58,39 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3): printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) for trial in range(nr_trials_per_test): # Reset the env - file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial) env = RailEnv(width=3, height=3, rail_generator=rail_from_file(file_name), - obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()), + obs_builder_object=observation_builder(), number_of_agents=1, ) obs = env.reset() - agent_obs = [None] * env.get_num_agents() - for a in range(env.get_num_agents()): - data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9, - current_depth=0) - data = norm_obs_clip(data, fixed_radius=10) - distance = norm_obs_clip(distance) - agent_data = np.clip(agent_data, -1, 1) - agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + + if observation_wrapper is not None: + for a in range(env.get_num_agents()): + obs[a] = observation_wrapper(obs[a]) # Run episode trial_score = 0 - max_steps = int(max_time_factor* (env.height + env.width)) + max_steps = int(max_time_factor * (env.height + env.width)) for step in range(max_steps): for a in range(env.get_num_agents()): - action = agent.act(agent_obs[a], eps=0) + action = agent.act(obs[a], eps=0) action_dict.update({a: action}) # Environment step - next_obs, all_rewards, done, _ = env.step(action_dict) + obs, all_rewards, done, _ = env.step(action_dict) for a in range(env.get_num_agents()): - data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), - num_features_per_node=features_per_node, - current_depth=0) - data = norm_obs_clip(data, fixed_radius=10) - distance = norm_obs_clip(distance) - agent_data = np.clip(agent_data, -1, 1) - agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + if observation_wrapper is not None: + obs[a] = observation_wrapper(obs[a]) trial_score += np.mean(all_rewards[a]) + if done['__all__']: break test_scores.append(trial_score / max_steps) @@ -138,9 +101,8 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3): return test_scores, test_dones, tot_test_time -def create_testsfiles(parameters, test_nr=0): +def create_testfiles(parameters, test_nr=0, nr_trials_per_test=100): # Parameter initialization - nr_trials_per_test = 100 print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], parameters[2])) # Reset environment @@ -159,7 +121,7 @@ def create_testsfiles(parameters, test_nr=0): for trial in range(nr_trials_per_test): # Reset the env env.reset(True, True) - env.save("./Envs/{}/Level_{}.pkl".format(test_nr, trial)) + env.save("./Tests/{}/Level_{}.pkl".format(test_nr, trial)) printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) return @@ -171,7 +133,7 @@ def render_test(parameters, test_nr=0, nr_examples=5): print('Showing {} Level {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, trial, parameters[0], parameters[1], parameters[2])) - file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial) env = RailEnv(width=1, height=1, @@ -210,7 +172,7 @@ def run_test_sequential(parameters, agent, test_nr=0, tree_depth=3): printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) for trial in range(nr_trials_per_test): # Reset the env - file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) + file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial) env = RailEnv(width=3, height=3,