Skip to content
Snippets Groups Projects
Commit 4bee50ba authored by Erik Nygren's avatar Erik Nygren
Browse files

fixed normalizing function

parent 3f36d20a
No related branches found
No related tags found
No related merge requests found
...@@ -46,19 +46,21 @@ env = RailEnv(width=10, ...@@ -46,19 +46,21 @@ env = RailEnv(width=10,
env.load("./railway/complex_scene.pkl") env.load("./railway/complex_scene.pkl")
""" """
env = RailEnv(width=20, env = RailEnv(width=8,
height=20, height=8,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=0), rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=4, max_dist=99999, seed=0),
obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=DummyPredictorForRailEnv()), obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=DummyPredictorForRailEnv()),
number_of_agents=10) number_of_agents=3)
env.reset(True, True) env.reset(True, True)
env_renderer = RenderTool(env, gl="PILSVG") env_renderer = RenderTool(env, gl="PILSVG")
handle = env.get_agent_handles() handle = env.get_agent_handles()
state_size = 147 * 2 state_size = 168 * 2
action_size = 5 action_size = 5
n_trials = 15000 n_trials = 15000
max_steps = int(1.5 * (env.height + env.width))
eps = 1. eps = 1.
eps_end = 0.005 eps_end = 0.005
eps_decay = 0.9995 eps_decay = 0.9995
...@@ -73,9 +75,9 @@ action_prob = [0] * action_size ...@@ -73,9 +75,9 @@ action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents() agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0) agent = Agent(state_size, action_size, "FC", 0)
agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth')) # agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth'))
demo = True demo = False
def max_lt(seq, val): def max_lt(seq, val):
""" """
...@@ -99,7 +101,7 @@ def min_lt(seq, val): ...@@ -99,7 +101,7 @@ def min_lt(seq, val):
min = np.inf min = np.inf
idx = len(seq) - 1 idx = len(seq) - 1
while idx >= 0: while idx >= 0:
if seq[idx] > val and seq[idx] < min: if seq[idx] >= val and seq[idx] < min:
min = seq[idx] min = seq[idx]
idx -= 1 idx -= 1
return min return min
...@@ -114,7 +116,8 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): ...@@ -114,7 +116,8 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
:return: returnes normalized and clipped observatoin :return: returnes normalized and clipped observatoin
""" """
max_obs = max(1, max_lt(obs, 1000)) max_obs = max(1, max_lt(obs, 1000))
min_obs = max(0, min_lt(obs, 0)) min_obs = min(max_obs, min_lt(obs, 0))
if max_obs == min_obs: if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max) return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs) norm = np.abs(max_obs - min_obs)
...@@ -131,13 +134,14 @@ for trials in range(1, n_trials + 1): ...@@ -131,13 +134,14 @@ for trials in range(1, n_trials + 1):
env_renderer.set_new_rail() env_renderer.set_new_rail()
final_obs = obs.copy() final_obs = obs.copy()
final_obs_next = obs.copy() final_obs_next = obs.copy()
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=7, current_depth=0) data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=8,
current_depth=0)
data = norm_obs_clip(data) data = norm_obs_clip(data)
distance = norm_obs_clip(distance) distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1) agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
for i in range(2): for i in range(2):
time_obs.append(obs) time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
...@@ -147,7 +151,7 @@ for trials in range(1, n_trials + 1): ...@@ -147,7 +151,7 @@ for trials in range(1, n_trials + 1):
score = 0 score = 0
env_done = 0 env_done = 0
# Run episode # Run episode
for step in range(env.height * env.width): for step in range(max_steps):
if demo: if demo:
env_renderer.renderEnv(show=True, show_observations=False) env_renderer.renderEnv(show=True, show_observations=False)
# print(step) # print(step)
...@@ -163,13 +167,12 @@ for trials in range(1, n_trials + 1): ...@@ -163,13 +167,12 @@ for trials in range(1, n_trials + 1):
next_obs, all_rewards, done, _ = env.step(action_dict) next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=7, data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=8,
current_depth=0) current_depth=0)
data = norm_obs_clip(data) data = norm_obs_clip(data)
distance = norm_obs_clip(distance) distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1) agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs) time_obs.append(next_obs)
# Update replay buffer and train agent # Update replay buffer and train agent
...@@ -181,7 +184,7 @@ for trials in range(1, n_trials + 1): ...@@ -181,7 +184,7 @@ for trials in range(1, n_trials + 1):
final_action_dict.update({a: action_dict[a]}) final_action_dict.update({a: action_dict[a]})
if not demo and not done[a]: if not demo and not done[a]:
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
score += all_rewards[a] score += all_rewards[a] / env.get_num_agents()
agent_obs = agent_next_obs.copy() agent_obs = agent_next_obs.copy()
if done['__all__']: if done['__all__']:
...@@ -193,11 +196,12 @@ for trials in range(1, n_trials + 1): ...@@ -193,11 +196,12 @@ for trials in range(1, n_trials + 1):
eps = max(eps_end, eps_decay * eps) # decrease epsilon eps = max(eps_end, eps_decay * eps) # decrease epsilon
done_window.append(env_done) done_window.append(env_done)
scores_window.append(score) # save most recent score scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window)) scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window))) dones_list.append((np.mean(done_window)))
print('\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( print(
'\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean(scores_window), np.mean(scores_window),
...@@ -206,7 +210,7 @@ for trials in range(1, n_trials + 1): ...@@ -206,7 +210,7 @@ for trials in range(1, n_trials + 1):
if trials % 100 == 0: if trials % 100 == 0:
print( print(
'\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean(scores_window), np.mean(scores_window),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment