Skip to content
Snippets Groups Projects
Commit 489f5a97 authored by Erik Nygren's avatar Erik Nygren
Browse files

Added ability to generate test and test you own agent.

parent 3412e9f5
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ import time ...@@ -2,7 +2,7 @@ import time
import numpy as np import numpy as np
from utils.misc_utils import create_testsfiles from utils.misc_utils import create_testfiles
with open('parameters.txt', 'r') as inf: with open('parameters.txt', 'r') as inf:
parameters = eval(inf.read()) parameters = eval(inf.read())
...@@ -21,5 +21,4 @@ test_idx = 0 ...@@ -21,5 +21,4 @@ test_idx = 0
for test_nr in parameters: for test_nr in parameters:
current_parameters = parameters[test_nr] current_parameters = parameters[test_nr]
create_testsfiles(current_parameters,test_nr) create_testfiles(current_parameters, test_nr, nr_trials_per_test=100)
...@@ -2,10 +2,13 @@ import time ...@@ -2,10 +2,13 @@ import time
import numpy as np import numpy as np
import torch import torch
from utils.misc_utils import run_test,run_test_sequential from flatland.envs.observations import TreeObsForRailEnv
from dueling_double_dqn import Agent from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from sequential_agent.simple_order_agent import OrderedAgent
with open('parameters.txt','r') as inf: from torch_training.dueling_double_dqn import Agent
from utils.misc_utils import run_test
with open('parameters.txt', 'r') as inf:
parameters = eval(inf.read()) parameters = eval(inf.read())
# Parameter initialization # Parameter initialization
...@@ -23,29 +26,26 @@ test_times = [] ...@@ -23,29 +26,26 @@ test_times = []
test_dones = [] test_dones = []
sequential_agent_test = False sequential_agent_test = False
# Load your agent
agent = Agent(state_size, action_size, "FC", 0)
agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint60000.pth'))
# Load agent # Load the necessary Observation Builder and Predictor
if sequential_agent_test: predictor = ShortestPathPredictorForRailEnv()
agent = OrderedAgent() observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor)
else:
agent = Agent(state_size, action_size, "FC", 0)
agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint60000.pth'))
start_time_scoring = time.time() start_time_scoring = time.time()
score_board = [] score_board = []
for test_nr in parameters: for test_nr in parameters:
current_parameters = parameters[test_nr] current_parameters = parameters[test_nr]
if sequential_agent_test: test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=observation_builder,
test_score, test_dones, test_time = run_test_sequential(current_parameters, agent, test_nr=test_nr, tree_depth=1) test_nr=test_nr, nr_trials_per_test=10)
else:
test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_nr,tree_depth=3)
print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format( print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format(
test_nr, test_nr,
np.mean(test_score), np.mean(test_dones) * 100, test_time)) np.mean(test_score), np.mean(test_dones) * 100, test_time))
score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time]) score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time])
print('---------') print('---------')
print(' RESULTS') print(' RESULTS')
print('---------') print('---------')
......
...@@ -35,46 +35,17 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length= ...@@ -35,46 +35,17 @@ def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=
print('') print('')
class RandomAgent: def run_test(parameters, agent, observation_builder=None, observation_wrapper=None, test_nr=0, nr_trials_per_test=100):
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
def act(self, state, eps=0):
"""
:param state: input is the observation of the agent
:return: returns an action
"""
return np.random.choice(np.arange(self.action_size))
def step(self, memories):
"""
Step function to improve agent by adjusting policy given the observations
:param memories: SARS Tuple to be
:return:
"""
return
def save(self, filename):
# Store the current policy
return
def load(self, filename):
# Load a policy
return
def run_test(parameters, agent, test_nr=0, tree_depth=3):
# Parameter initialization # Parameter initialization
features_per_node = 9 features_per_node = 9
start_time_scoring = time.time() start_time_scoring = time.time()
action_dict = dict() action_dict = dict()
nr_trials_per_test = 100
print('Running {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], print('Running {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1],
parameters[2])) parameters[2]))
if observation_builder == None:
print("No observation defined!")
return
# Reset all measurements # Reset all measurements
test_scores = [] test_scores = []
test_dones = [] test_dones = []
...@@ -87,47 +58,39 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3): ...@@ -87,47 +58,39 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
for trial in range(nr_trials_per_test): for trial in range(nr_trials_per_test):
# Reset the env # Reset the env
file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial)
env = RailEnv(width=3, env = RailEnv(width=3,
height=3, height=3,
rail_generator=rail_from_file(file_name), rail_generator=rail_from_file(file_name),
obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()), obs_builder_object=observation_builder(),
number_of_agents=1, number_of_agents=1,
) )
obs = env.reset() obs = env.reset()
agent_obs = [None] * env.get_num_agents()
for a in range(env.get_num_agents()): if observation_wrapper is not None:
data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9, for a in range(env.get_num_agents()):
current_depth=0) obs[a] = observation_wrapper(obs[a])
data = norm_obs_clip(data, fixed_radius=10)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
# Run episode # Run episode
trial_score = 0 trial_score = 0
max_steps = int(max_time_factor* (env.height + env.width)) max_steps = int(max_time_factor * (env.height + env.width))
for step in range(max_steps): for step in range(max_steps):
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
action = agent.act(agent_obs[a], eps=0) action = agent.act(obs[a], eps=0)
action_dict.update({a: action}) action_dict.update({a: action})
# Environment step # Environment step
next_obs, all_rewards, done, _ = env.step(action_dict) obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), if observation_wrapper is not None:
num_features_per_node=features_per_node, obs[a] = observation_wrapper(obs[a])
current_depth=0)
data = norm_obs_clip(data, fixed_radius=10)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
trial_score += np.mean(all_rewards[a]) trial_score += np.mean(all_rewards[a])
if done['__all__']: if done['__all__']:
break break
test_scores.append(trial_score / max_steps) test_scores.append(trial_score / max_steps)
...@@ -138,9 +101,8 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3): ...@@ -138,9 +101,8 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
return test_scores, test_dones, tot_test_time return test_scores, test_dones, tot_test_time
def create_testsfiles(parameters, test_nr=0): def create_testfiles(parameters, test_nr=0, nr_trials_per_test=100):
# Parameter initialization # Parameter initialization
nr_trials_per_test = 100
print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1], print('Creating {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1],
parameters[2])) parameters[2]))
# Reset environment # Reset environment
...@@ -159,7 +121,7 @@ def create_testsfiles(parameters, test_nr=0): ...@@ -159,7 +121,7 @@ def create_testsfiles(parameters, test_nr=0):
for trial in range(nr_trials_per_test): for trial in range(nr_trials_per_test):
# Reset the env # Reset the env
env.reset(True, True) env.reset(True, True)
env.save("./Envs/{}/Level_{}.pkl".format(test_nr, trial)) env.save("./Tests/{}/Level_{}.pkl".format(test_nr, trial))
printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
return return
...@@ -171,7 +133,7 @@ def render_test(parameters, test_nr=0, nr_examples=5): ...@@ -171,7 +133,7 @@ def render_test(parameters, test_nr=0, nr_examples=5):
print('Showing {} Level {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, trial, parameters[0], print('Showing {} Level {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, trial, parameters[0],
parameters[1], parameters[1],
parameters[2])) parameters[2]))
file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial)
env = RailEnv(width=1, env = RailEnv(width=1,
height=1, height=1,
...@@ -210,7 +172,7 @@ def run_test_sequential(parameters, agent, test_nr=0, tree_depth=3): ...@@ -210,7 +172,7 @@ def run_test_sequential(parameters, agent, test_nr=0, tree_depth=3):
printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20) printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
for trial in range(nr_trials_per_test): for trial in range(nr_trials_per_test):
# Reset the env # Reset the env
file_name = "./Envs/{}/Level_{}.pkl".format(test_nr, trial) file_name = "./Tests/{}/Level_{}.pkl".format(test_nr, trial)
env = RailEnv(width=3, env = RailEnv(width=3,
height=3, height=3,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment