Skip to content
Snippets Groups Projects
Commit cebde3d8 authored by Erik Nygren's avatar Erik Nygren
Browse files

updated training navigation and render_agent_behavior

parent 12e52a2e
No related branches found
No related tags found
No related merge requests found
......@@ -13,7 +13,7 @@ from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from torch_training.dueling_double_dqn import Agent
from utils.observation_utils import norm_obs_clip, split_tree
from utils.observation_utils import normalize_observation
random.seed(1)
np.random.seed(1)
......@@ -77,9 +77,12 @@ env.reset(True, True)
observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
env_renderer = RenderTool(env, gl="PILSVG", )
num_features_per_node = env.obs_builder.observation_dim
handle = env.get_agent_handles()
features_per_node = 9
state_size = features_per_node * 85 * 2
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
state_size = num_features_per_node * nr_nodes
action_size = 5
# We set the number of episodes we would like to train on
......@@ -100,7 +103,7 @@ action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0)
with path(torch_training.Nets, "avoid_checkpoint49700.pth") as file_in:
with path(torch_training.Nets, "navigator_checkpoint100.pth") as file_in:
agent.qnetwork_local.load_state_dict(torch.load(file_in))
record_images = False
......@@ -110,57 +113,32 @@ for trials in range(1, n_trials + 1):
# Reset environment
obs = env.reset(True, True)
env_renderer.set_new_rail()
obs_original = obs.copy()
final_obs = obs.copy()
final_obs_next = obs.copy()
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=num_features_per_node,
current_depth=0)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
agent_data = env.agents[a]
speed = 1 # np.random.randint(1,5)
agent_data.speed_data['speed'] = 1. / speed
for i in range(2):
time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
env_renderer.reset()
# Build agent specific observations
for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
env_renderer.render_env(show=True, show_observations=False, show_predictions=True)
if record_images:
env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
frame_step += 1
# Action
for a in range(env.get_num_agents()):
# action = agent.act(np.array(obs[a]), eps=eps)
action = agent.act(agent_obs[a], eps=0)
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
obs, all_rewards, done, _ = env.step(action_dict)
next_obs, all_rewards, done, _ = env.step(action_dict)
# print(all_rewards,action)
obs_original = next_obs.copy()
env_renderer.render_env(show=True, show_predictions=False, show_observations=False)
# Build agent specific observations and normalize
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs)
for a in range(env.get_num_agents()):
agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
agent_obs = agent_next_obs.copy()
agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
if done['__all__']:
break
......@@ -13,7 +13,7 @@ from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from utils.observation_utils import norm_obs_clip, split_tree
from utils.observation_utils import normalize_observation
def main(argv):
......@@ -120,74 +120,63 @@ def main(argv):
# Reset environment
obs = env.reset(True, True)
if not Training:
env_renderer.set_new_rail()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific local observation
final_obs = agent_obs.copy()
final_obs_next = agent_next_obs.copy()
# Build agent specific observations
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
# Only render when not triaing
if not Training:
env_renderer.render_env(show=True, show_observations=True)
# Chose the actions
# Action
for a in range(env.get_num_agents()):
if not Training:
eps = 0
action = agent.act(agent_obs[a], eps=eps)
action_dict.update({a: action})
# Count number of actions takes for statistics
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
# Build agent specific observations and normalize
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
agent_next_obs[a] = normalize_observation(next_obs[a], observation_radius=10)
# Update replay buffer and train agent
for a in range(env.get_num_agents()):
# Remember and train agent
if Training:
if done[a]:
final_obs[a] = agent_obs[a].copy()
final_obs_next[a] = agent_next_obs[a].copy()
final_action_dict.update({a: action_dict[a]})
if not done[a]:
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
# Update the current score
score += all_rewards[a] / env.get_num_agents()
# Copy observation
agent_obs = agent_next_obs.copy()
if done['__all__']:
env_done = 1
for a in range(env.get_num_agents()):
agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
break
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
# Store the information about training progress
done_window.append(env_done)
# Collection information about training
tasks_finished = 0
for _idx in range(env.get_num_agents()):
if done[_idx] == 1:
tasks_finished += 1
done_window.append(tasks_finished / env.get_num_agents())
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment