Skip to content
Snippets Groups Projects
Commit d83a358c authored by Erik Nygren's avatar Erik Nygren
Browse files

updated normalization of tree observation in observaiton utils

parent 1c649d43
No related branches found
No related tags found
No related merge requests found
import numpy as np
from utils.observation_utils import split_tree, min_lt
from utils.observation_utils import split_tree, min_gt
class OrderedAgent:
......@@ -15,7 +15,7 @@ class OrderedAgent:
_, distance, _ = split_tree(tree=np.array(state), num_features_per_node=9,
current_depth=0)
distance = distance[1:]
min_dist = min_lt(distance, 0)
min_dist = min_gt(distance, 0)
min_direction = np.where(distance == min_dist)
if len(min_direction[0]) > 1:
return min_direction[0][-1] + 1
......
No preview for this file type
......@@ -17,7 +17,7 @@ from utils.observation_utils import normalize_observation
random.seed(3)
np.random.seed(2)
file_name = "./railway/simple_avoid.pkl"
file_name = "./railway/testing_stuff.pkl"
env = RailEnv(width=10,
height=20,
rail_generator=rail_from_file(file_name),
......@@ -41,6 +41,7 @@ env = RailEnv(width=x_dim,
obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=n_agents)
env.reset(True, True)
"""
tree_depth = 3
observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv())
......@@ -70,7 +71,7 @@ action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0)
with path(torch_training.Nets, "avoid_checkpoint49000.pth") as file_in:
with path(torch_training.Nets, "avoid_checkpoint59900.pth") as file_in:
agent.qnetwork_local.load_state_dict(torch.load(file_in))
record_images = False
......@@ -93,7 +94,7 @@ for trials in range(1, n_trials + 1):
if record_images:
env_renderer.gl.save_image("./Images/Avoiding/flatland_frame_{:04d}.bmp".format(frame_step))
frame_step += 1
# time.sleep(5)
time.sleep(1.5)
# Action
for a in range(env.get_num_agents()):
action = agent.act(agent_obs[a], eps=0)
......
......@@ -73,7 +73,7 @@ def main(argv):
n_episodes = 60000
# Set max number of steps per episode as well as other training relevant parameter
max_steps = int(3 * (env.height + env.width))
max_steps = int((env.height + env.width))
eps = 1.
eps_end = 0.005
eps_decay = 0.9995
......@@ -102,7 +102,7 @@ def main(argv):
Training Curriculum: In order to get good generalization we change the number of agents
and the size of the levels every 50 episodes.
"""
if episodes % 50 == 0:
if episodes % 50 == 1:
x_dim = np.random.randint(8, 15)
y_dim = np.random.randint(8, 15)
n_agents = np.random.randint(3, 8)
......@@ -117,7 +117,7 @@ def main(argv):
number_of_agents=n_agents)
# Adjust the parameters according to the new env.
max_steps = int(3 * (env.height + env.width))
max_steps = int((env.height + env.width))
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
......@@ -174,7 +174,11 @@ def main(argv):
eps = max(eps_end, eps_decay * eps) # decrease epsilon
# Collection information about training
done_window.append(env_done)
tasks_finished = 0
for _idx in range(env.get_num_agents()):
if done[_idx] == 1:
tasks_finished += 1
done_window.append(tasks_finished / env.get_num_agents())
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
......
......@@ -15,7 +15,7 @@ def max_lt(seq, val):
return max
def min_lt(seq, val):
def min_gt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
......@@ -29,7 +29,7 @@ def min_lt(seq, val):
return min
def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
"""
This function returns the difference between min and max value of an observation
:param obs: Observation that should be normalized
......@@ -42,13 +42,12 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
else:
max_obs = max(1, max_lt(obs, 1000)) + 1
min_obs = 0 # min(max_obs, min_lt(obs, 0))
min_obs = 0 # min(max_obs, min_gt(obs, 0))
if normalize_to_range:
min_obs = min_gt(obs, 0)
if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
if norm == 0:
norm = 1.
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
......@@ -103,7 +102,7 @@ def normalize_observation(observation, num_features_per_node=9, observation_radi
data, distance, agent_data = split_tree(tree=np.array(observation), num_features_per_node=num_features_per_node,
current_depth=0)
data = norm_obs_clip(data, fixed_radius=observation_radius)
distance = norm_obs_clip(distance)
distance = norm_obs_clip(distance, normalize_to_range=True)
agent_data = np.clip(agent_data, -1, 1)
normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data))
return normalized_obs
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment