Commit d40319cf authored by nilabha's avatar nilabha
Browse files

update single agent and sequential agent codes to new version

parent b845ca2c
import sys
import PIL
import numpy as np
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.line_generators import sparse_line_generator
from flatland.utils.rendertools import RenderTool
from pathlib import Path
......@@ -23,36 +24,29 @@ multi_agent_training.py is a better starting point to train your own solution!
np.random.seed(2)
x_dim = np.random.randint(8, 20)
y_dim = np.random.randint(8, 20)
x_dim = np.random.randint(30, 35)
y_dim = np.random.randint(30, 35)
n_agents = np.random.randint(3, 8)
n_goals = n_agents + np.random.randint(0, 3)
min_dist = int(0.75 * min(x_dim, y_dim))
env = RailEnv(
width=x_dim,
height=y_dim,
rail_generator=complex_rail_generator(
nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
max_dist=99999,
seed=0
),
schedule_generator=complex_schedule_generator(),
rail_generator=sparse_rail_generator(),
line_generator=sparse_line_generator(),
obs_builder_object=TreeObsForRailEnv(max_depth=1, predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=n_agents)
env.reset(True, True)
tree_depth = 1
observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv())
env_renderer = RenderTool(env, gl="PGL", )
handle = env.get_agent_handles()
n_episodes = 10
n_episodes = 1
max_steps = 100 * (env.height + env.width)
record_images = False
record_images = True
policy = OrderedPolicy()
action_dict = dict()
frame_list = []
for trials in range(1, n_episodes + 1):
# Reset environment
obs, info = env.reset(True, True)
done = env.dones
......@@ -61,10 +55,10 @@ for trials in range(1, n_episodes + 1):
# Run episode
for step in range(max_steps):
env_renderer.render_env(show=True, show_observations=False, show_predictions=True)
env_renderer.render_env(show=False, show_observations=False, show_predictions=True)
if record_images:
env_renderer.gl.save_image("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
frame_list.append(PIL.Image.fromarray(env_renderer.gl.get_image()))
frame_step += 1
# Action
......@@ -82,4 +76,8 @@ for trials in range(1, n_episodes + 1):
obs, all_rewards, done, _ = env.step(action_dict)
if done['__all__']:
print(done)
if record_images:
frame_list[0].save(f"flatland_sequential_agent_{trials}.gif", save_all=True, append_images=frame_list[1:], duration=3, loop=0)
frame_list=[]
break
......@@ -3,6 +3,9 @@ import sys
from argparse import ArgumentParser, Namespace
from collections import deque
from pathlib import Path
import PIL
from flatland.utils.rendertools import RenderTool
base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))
......@@ -14,7 +17,7 @@ import torch
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.envs.line_generators import sparse_line_generator
from utils.observation_utils import normalize_observation
from flatland.envs.observations import TreeObsForRailEnv
......@@ -30,8 +33,8 @@ multi_agent_training.py is a better starting point to train your own solution!
def train_agent(n_episodes):
# Environment parameters
n_agents = 1
x_dim = 25
y_dim = 25
x_dim = 30
y_dim = 30
n_cities = 4
max_rails_between_cities = 2
max_rails_in_city = 3
......@@ -62,9 +65,9 @@ def train_agent(n_episodes):
seed=seed,
grid_mode=False,
max_rails_between_cities=max_rails_between_cities,
max_rails_in_city=max_rails_in_city
max_rail_pairs_in_city=max_rails_in_city
),
schedule_generator=sparse_schedule_generator(),
line_generator=sparse_line_generator(),
number_of_agents=n_agents,
obs_builder_object=tree_observation
)
......@@ -83,7 +86,7 @@ def train_agent(n_episodes):
# Max number of steps per episode
# This is the official formula used during evaluations
max_steps = int(4 * 2 * (env.height + env.width + (n_agents / n_cities)))
max_steps = int(100 * (env.height + env.width + (n_agents / n_cities)))
action_dict = dict()
......@@ -113,12 +116,19 @@ def train_agent(n_episodes):
# Double Dueling DQN policy
policy = DDDQNPolicy(state_size, action_size, Namespace(**training_parameters))
record_images = False
# env_renderer = RenderTool(env, gl="PGL", )
frame_list = []
for episode_idx in range(n_episodes):
score = 0
if episode_idx == n_episodes - 1:
record_images = True
# Reset environment
obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True)
if record_images:
env_renderer = RenderTool(env, gl="PGL", )
env_renderer.reset()
# env_renderer.set_new_rail()
# Build agent specific observations
for agent in env.get_agent_handles():
......@@ -127,7 +137,7 @@ def train_agent(n_episodes):
agent_prev_obs[agent] = agent_obs[agent].copy()
# Run episode
for step in range(max_steps - 1):
for step in range(max_steps*3 - 1):
for agent in env.get_agent_handles():
if info['action_required'][agent]:
# If an action is required, we want to store the obs at that step as well as the action
......@@ -141,6 +151,9 @@ def train_agent(n_episodes):
# Environment step
next_obs, all_rewards, done, info = env.step(action_dict)
if record_images:
env_renderer.render_env(show=False, show_observations=False, show_predictions=True)
frame_list.append(PIL.Image.fromarray(env_renderer.gl.get_image()))
# Update replay buffer and train agent
for agent in range(env.get_num_agents()):
......@@ -157,6 +170,14 @@ def train_agent(n_episodes):
score += all_rewards[agent]
if done['__all__']:
if record_images:
print(done)
tasks_done = np.sum([int(done[idx]) for idx in env.get_agent_handles()])
completed = tasks_done / max(1, env.get_num_agents())
print(completed)
frame_list[0].save(f"flatland_single_agent_{episode_idx}.gif", save_all=True, append_images=frame_list[1:], duration=3, loop=0)
frame_list=[]
# env_renderer.close_window()
break
# Epsilon decay
......@@ -187,17 +208,54 @@ def train_agent(n_episodes):
action_probs
), end=end)
# Run episode with trained policy
obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True)
env_renderer.reset()
frame_list = []
for step in range(max_steps - 1):
env_renderer.render_env(show=False, show_observations=False, show_predictions=True)
frame_list.append(PIL.Image.fromarray(env_renderer.gl.get_image()))
for agent in env.get_agent_handles():
if obs[agent]:
agent_obs[agent] = normalize_observation(obs[agent], observation_tree_depth, observation_radius=observation_radius)
action = 0
if info['action_required'][agent]:
action = policy.act(agent_obs[agent], eps=0.0)
action_dict.update({agent: action})
obs, all_rewards, done, info = env.step(action_dict)
for agent in env.get_agent_handles():
score += all_rewards[agent]
if done['__all__']:
frame_list[0].save(f"flatland_single_agent.gif", save_all=True, append_images=frame_list[1:], duration=3, loop=0)
frame_list = []
break
normalized_score = score / (max_steps * env.get_num_agents())
print(normalized_score)
tasks_finished = sum(done[idx] for idx in env.get_agent_handles())
completion = tasks_finished / max(1, env.get_num_agents())
print(completion)
# Plot overall training progress at the end
plt.plot(scores)
plt.show()
plt.savefig('scores.png')
# plt.show()
plt.plot(completion)
plt.show()
plt.savefig('completion.png')
# plt.show()
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-n", "--n_episodes", dest="n_episodes", help="number of episodes to run", default=500, type=int)
parser.add_argument("-n", "--n_episodes", dest="n_episodes", help="number of episodes to run", default=200, type=int)
args = parser.parse_args()
train_agent(args.n_episodes)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment