Skip to content
Snippets Groups Projects
Commit 23e98b59 authored by Erik Nygren's avatar Erik Nygren
Browse files

only store observations in buffer when you are allowed to take an action!

parent 08d70296
No related branches found
No related tags found
No related merge requests found
......@@ -346,7 +346,7 @@ class TreeObsForRailEnv(ObservationBuilder):
unusable_switch = np.inf
other_agent_same_direction = 0
other_agent_opposite_direction = 0
malfunctioning_agent = 0
malfunctioning_agent = 0.
min_fractional_speed = 1.
num_steps = 1
while exploring:
......
......@@ -103,7 +103,7 @@ action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0)
with path(torch_training.Nets, "navigator_checkpoint1200.pth") as file_in:
with path(torch_training.Nets, "navigator_checkpoint500.pth") as file_in:
agent.qnetwork_local.load_state_dict(torch.load(file_in))
record_images = False
......@@ -126,14 +126,12 @@ for trials in range(1, n_trials + 1):
# Action
for a in range(env.get_num_agents()):
action = agent.act(agent_obs[a], eps=0)
action = agent.act(agent_obs[a], eps=0.)
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
obs, all_rewards, done, _ = env.step(action_dict)
env_renderer.render_env(show=True, show_predictions=False, show_observations=False)
env_renderer.render_env(show=True, show_predictions=True, show_observations=False)
# Build agent specific observations and normalize
for a in range(env.get_num_agents()):
agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
......
......@@ -71,11 +71,9 @@ def main(argv):
number_of_agents=n_agents,
stochastic_data=stochastic_data, # Malfunction data generator
obs_builder_object=TreeObservation)
env.reset(True, True)
# After training we want to render the results so we also load a renderer
env_renderer = RenderTool(env, gl="PILSVG", )
# Given the depth of the tree observation and the number of features per node we get the following state_size
num_features_per_node = env.obs_builder.observation_dim
tree_depth = 2
......@@ -104,7 +102,6 @@ def main(argv):
final_action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
time_obs = deque(maxlen=2)
scores = []
dones_list = []
action_prob = [0] * action_size
......@@ -114,8 +111,6 @@ def main(argv):
# Now we load a Double dueling DQN agent
agent = Agent(state_size, action_size, "FC", 0)
Training = True
for trials in range(1, n_trials + 1):
# Reset environment
......@@ -126,19 +121,17 @@ def main(argv):
# Build agent specific observations
for a in range(env.get_num_agents()):
agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
# Action
for a in range(env.get_num_agents()):
if env.agents[a].speed_data['position_fraction'] == 0.:
if env.agents[a].speed_data['position_fraction'] < 0.001:
register_action_state[a] = True
else:
register_action_state[a] = False
......@@ -166,7 +159,6 @@ def main(argv):
# Copy observation
agent_obs = agent_next_obs.copy()
if done['__all__']:
env_done = 1
for a in range(env.get_num_agents()):
......@@ -206,52 +198,6 @@ def main(argv):
'./Nets/navigator_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size
# Render the trained agent
# Reset environment
obs = env.reset(True, True)
env_renderer.set_new_rail()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific local observation
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
env_renderer.render_env(show=True, show_observations=False)
# Chose the actions
for a in range(env.get_num_agents()):
eps = 0
action = agent.act(agent_obs[a], eps=eps)
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
agent_obs = agent_next_obs.copy()
if done['__all__']:
break
# Plot overall training progress at the end
plt.plot(scores)
plt.show()
......
......@@ -95,6 +95,7 @@ def split_tree(tree, num_features_per_node, current_depth=0):
tree_data.extend(tmp_tree_data)
distance_data.extend(tmp_distance_data)
agent_data.extend(tmp_agent_data)
return tree_data, distance_data, agent_data
......@@ -103,6 +104,6 @@ def normalize_observation(observation, num_features_per_node=11, observation_rad
current_depth=0)
data = norm_obs_clip(data, fixed_radius=observation_radius)
distance = norm_obs_clip(distance, normalize_to_range=True)
agent_data = np.clip(agent_data, -1, 20)
agent_data = np.clip(agent_data, -1, 1)
normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data))
return normalized_obs
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment