Skip to content
Snippets Groups Projects
Commit 2cf1b9d1 authored by u214892's avatar u214892
Browse files

#42 run baselines in ci

parent ccf03494
No related branches found
No related tags found
No related merge requests found
...@@ -92,108 +92,108 @@ def main(argv): ...@@ -92,108 +92,108 @@ def main(argv):
print("Going to run training for {} trials...".format(n_trials)) print("Going to run training for {} trials...".format(n_trials))
for trials in range(1, n_trials + 1): for trials in range(1, n_trials + 1):
if trials % 50 == 0 and not demo: # if trials % 50 == 0 and not demo:
x_dim = np.random.randint(8, 20) # x_dim = np.random.randint(8, 20)
y_dim = np.random.randint(8, 20) # y_dim = np.random.randint(8, 20)
n_agents = np.random.randint(3, 8) # n_agents = np.random.randint(3, 8)
n_goals = n_agents + np.random.randint(0, 3) # n_goals = n_agents + np.random.randint(0, 3)
min_dist = int(0.75 * min(x_dim, y_dim)) # min_dist = int(0.75 * min(x_dim, y_dim))
env = RailEnv(width=x_dim, # env = RailEnv(width=x_dim,
height=y_dim, # height=y_dim,
rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, # rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
max_dist=99999, # max_dist=99999,
seed=0), # seed=0),
obs_builder_object=TreeObsForRailEnv(max_depth=3, # obs_builder_object=TreeObsForRailEnv(max_depth=3,
predictor=ShortestPathPredictorForRailEnv()), # predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=n_agents) # number_of_agents=n_agents)
env.reset(True, True) # env.reset(True, True)
max_steps = int(3 * (env.height + env.width)) # max_steps = int(3 * (env.height + env.width))
agent_obs = [None] * env.get_num_agents() # agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents() # agent_next_obs = [None] * env.get_num_agents()
# Reset environment # # Reset environment
if file_load: # if file_load:
obs = env.reset(False, False) # obs = env.reset(False, False)
else: # else:
obs = env.reset(True, True) # obs = env.reset(True, True)
if demo: # if demo:
env_renderer.set_new_rail() # env_renderer.set_new_rail()
obs_original = obs.copy() # obs_original = obs.copy()
final_obs = obs.copy() # final_obs = obs.copy()
final_obs_next = obs.copy() # final_obs_next = obs.copy()
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(obs[a]), # data, distance, agent_data = split_tree(tree=np.array(obs[a]),
current_depth=0) # current_depth=0)
data = norm_obs_clip(data) # data = norm_obs_clip(data)
distance = norm_obs_clip(distance) # distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1) # agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) # obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
agent_data = env.agents[a] # agent_data = env.agents[a]
speed = 1 # np.random.randint(1,5) # speed = 1 # np.random.randint(1,5)
agent_data.speed_data['speed'] = 1. / speed # agent_data.speed_data['speed'] = 1. / speed
#
for i in range(2): # for i in range(2):
time_obs.append(obs) # time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) # # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) # agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
#
score = 0 # score = 0
env_done = 0 # env_done = 0
# Run episode # # Run episode
for step in range(max_steps): # for step in range(max_steps):
if demo: # if demo:
env_renderer.renderEnv(show=True, show_observations=False) # env_renderer.renderEnv(show=True, show_observations=False)
# observation_helper.util_print_obs_subtree(obs_original[0]) # # observation_helper.util_print_obs_subtree(obs_original[0])
if record_images: # if record_images:
env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step)) # env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
frame_step += 1 # frame_step += 1
# print(step) # # print(step)
# Action # # Action
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
if demo: # if demo:
eps = 0 # eps = 0
# action = agent.act(np.array(obs[a]), eps=eps) # # action = agent.act(np.array(obs[a]), eps=eps)
action = agent.act(agent_obs[a], eps=eps) # action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1 # action_prob[action] += 1
action_dict.update({a: action}) # action_dict.update({a: action})
# Environment step # # Environment step
#
next_obs, all_rewards, done, _ = env.step(action_dict) # next_obs, all_rewards, done, _ = env.step(action_dict)
# print(all_rewards,action) # # print(all_rewards,action)
obs_original = next_obs.copy() # obs_original = next_obs.copy()
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), # data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
current_depth=0) # current_depth=0)
data = norm_obs_clip(data) # data = norm_obs_clip(data)
distance = norm_obs_clip(distance) # distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1) # agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) # next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs) # time_obs.append(next_obs)
#
# Update replay buffer and train agent # # Update replay buffer and train agent
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) # agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
if done[a]: # if done[a]:
final_obs[a] = agent_obs[a].copy() # final_obs[a] = agent_obs[a].copy()
final_obs_next[a] = agent_next_obs[a].copy() # final_obs_next[a] = agent_next_obs[a].copy()
final_action_dict.update({a: action_dict[a]}) # final_action_dict.update({a: action_dict[a]})
if not demo and not done[a]: # if not demo and not done[a]:
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) # agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
score += all_rewards[a] / env.get_num_agents() # score += all_rewards[a] / env.get_num_agents()
#
agent_obs = agent_next_obs.copy() # agent_obs = agent_next_obs.copy()
if done['__all__']: # if done['__all__']:
env_done = 1 # env_done = 1
for a in range(env.get_num_agents()): # for a in range(env.get_num_agents()):
agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a]) # agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
break # break
# Epsilon decay # # Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon # eps = max(eps_end, eps_decay * eps) # decrease epsilon
#
done_window.append(env_done) # done_window.append(env_done)
scores_window.append(score / max_steps) # save most recent score # scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window)) # scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window))) # dones_list.append((np.mean(done_window)))
print( print(
'\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment