diff --git a/sequential_agent/__init__.py b/sequential_agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sequential_agent/run_test.py b/sequential_agent/run_test.py new file mode 100644 index 0000000000000000000000000000000000000000..72b645fcdf909ff6a1a6742e6b41fe65e5903e4c --- /dev/null +++ b/sequential_agent/run_test.py @@ -0,0 +1,77 @@ +from sequential_agent.simple_order_agent import OrderedAgent +from flatland.envs.generators import rail_from_file, complex_rail_generator +from flatland.envs.observations import TreeObsForRailEnv +from flatland.envs.predictions import ShortestPathPredictorForRailEnv +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool +import numpy as np + +np.random.seed(2) +""" +file_name = "./railway/complex_scene.pkl" +env = RailEnv(width=10, + height=20, + rail_generator=rail_from_file(file_name), + obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())) +x_dim = env.width +y_dim = env.height + +""" + +x_dim = 10 # np.random.randint(8, 20) +y_dim = 10 # np.random.randint(8, 20) +n_agents = 5 # np.random.randint(3, 8) +n_goals = n_agents + np.random.randint(0, 3) +min_dist = int(0.75 * min(x_dim, y_dim)) + +env = RailEnv(width=x_dim, + height=y_dim, + rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, + max_dist=99999, + seed=0), + obs_builder_object=TreeObsForRailEnv(max_depth=1, predictor=ShortestPathPredictorForRailEnv()), + number_of_agents=n_agents) +env.reset(True, True) + +tree_depth = 1 +observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()) +env_renderer = RenderTool(env, gl="PILSVG", ) +handle = env.get_agent_handles() +n_trials = 10 +max_steps = 3 * (env.height + env.width) +record_images = False +agent = OrderedAgent() +action_dict = dict() + +for trials in range(1, n_trials + 1): + + # Reset environment + obs = env.reset(True, True) + done = env.dones + env_renderer.reset() + frame_step = 0 + # Run episode + for step in range(max_steps): + env_renderer.render_env(show=True, show_observations=False, show_predictions=True) + + if record_images: + env_renderer.gl.save_image("./Images/flatland_frame_{:04d}.bmp".format(frame_step)) + frame_step += 1 + + # Action + acting_agent = 0 + for a in range(env.get_num_agents()): + if done[a]: + acting_agent += 1 + if acting_agent == a: + action = agent.act(obs[acting_agent], eps=0) + else: + action = 0 + action_dict.update({a: action}) + + # Environment step + + obs, all_rewards, done, _ = env.step(action_dict) + + if done['__all__']: + break diff --git a/sequential_agent/simple_order_agent.py b/sequential_agent/simple_order_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..6e888c51ab7210062ee6efb9862cd78e5a61ca5a --- /dev/null +++ b/sequential_agent/simple_order_agent.py @@ -0,0 +1,39 @@ +import numpy as np +from utils.observation_utils import split_tree, min_lt + + +class OrderedAgent: + + def __init__(self): + self.action_size = 5 + + def act(self, state, eps=0): + """ + :param state: input is the observation of the agent + :return: returns an action + """ + _, distance, _ = split_tree(tree=np.array(state), num_features_per_node=9, + current_depth=0) + distance = distance[1:] + min_dist = min_lt(distance, 0) + min_direction = np.where(distance == min_dist) + if len(min_direction[0]) > 1: + return min_direction[0][0] + 1 + return min_direction[0] + 1 + + def step(self, memories): + """ + Step function to improve agent by adjusting policy given the observations + + :param memories: SARS Tuple to be + :return: + """ + return + + def save(self, filename): + # Store the current policy + return + + def load(self, filename): + # Load a policy + return diff --git a/torch_training/Nets/avoid_checkpoint15000.pth b/torch_training/Nets/avoid_checkpoint15000.pth index 32b6f6ea0591f0e74f84fc86079484520040ca18..9d1936ab4a1d51530662b589423f78c0ccb57c44 100644 Binary files a/torch_training/Nets/avoid_checkpoint15000.pth and b/torch_training/Nets/avoid_checkpoint15000.pth differ diff --git a/torch_training/Nets/avoid_checkpoint30000.pth b/torch_training/Nets/avoid_checkpoint30000.pth index a44eb3ff895908b1146f8c3cbf1607689169bbbf..066b00180693a783ae134195e7cfdb1cd8975624 100644 Binary files a/torch_training/Nets/avoid_checkpoint30000.pth and b/torch_training/Nets/avoid_checkpoint30000.pth differ diff --git a/torch_training/multi_agent_inference.py b/torch_training/multi_agent_inference.py index 4ee79bbcbdeec111f75f5e543097526efafac805..003b18a566d28b73df3e91a0a4aabc7b600e9b7f 100644 --- a/torch_training/multi_agent_inference.py +++ b/torch_training/multi_agent_inference.py @@ -3,7 +3,7 @@ from collections import deque import numpy as np import torch -from flatland.envs.generators import rail_from_file +from flatland.envs.generators import rail_from_file, complex_rail_generator from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.predictions import ShortestPathPredictorForRailEnv from flatland.envs.rail_env import RailEnv @@ -17,7 +17,7 @@ from utils.observation_utils import norm_obs_clip, split_tree random.seed(3) np.random.seed(2) -file_name = "./railway/flatland.pkl" +file_name = "./railway/complex_scene.pkl" env = RailEnv(width=10, height=20, rail_generator=rail_from_file(file_name), @@ -27,10 +27,10 @@ y_dim = env.height """ -x_dim = 20 #np.random.randint(8, 20) -y_dim = 20 #np.random.randint(8, 20) -n_agents = 1 # np.random.randint(3, 8) -n_goals = 10 + n_agents + np.random.randint(0, 3) +x_dim = 50 #np.random.randint(8, 20) +y_dim = 50 #np.random.randint(8, 20) +n_agents = 20 # np.random.randint(3, 8) +n_goals = n_agents + np.random.randint(0, 3) min_dist = int(0.75 * min(x_dim, y_dim)) env = RailEnv(width=x_dim, @@ -53,7 +53,7 @@ for i in range(tree_depth + 1): state_size = num_features_per_node * nr_nodes action_size = 5 -n_trials = 10 +n_trials = 1 observation_radius = 10 max_steps = int(3 * (env.height + env.width)) eps = 1. @@ -70,10 +70,10 @@ action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() agent = Agent(state_size, action_size, "FC", 0) -with path(torch_training.Nets, "avoid_checkpoint36600.pth") as file_in: +with path(torch_training.Nets, "avoid_checkpoint52800.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) -record_images = False +record_images = True frame_step = 0 for trials in range(1, n_trials + 1): @@ -93,7 +93,7 @@ for trials in range(1, n_trials + 1): # Run episode for step in range(max_steps): - env_renderer.render_env(show=True, show_observations=False, show_predictions=True) + env_renderer.render_env(show=True, show_observations=False, show_predictions=False) if record_images: env_renderer.gl.save_image("./Images/Avoiding/flatland_frame_{:04d}.bmp".format(frame_step)) diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py index 4d337d1d8efa3bb455eeb9ecbd06ca783bafa673..b5fe86a04e981c7bdae96976bfdfca85d533d789 100644 --- a/torch_training/multi_agent_training.py +++ b/torch_training/multi_agent_training.py @@ -92,8 +92,8 @@ def main(argv): agent = Agent(state_size, action_size, "FC", 0) # Here you can pre-load an agent - if False: - with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in: + if True: + with path(torch_training.Nets, "avoid_checkpoint53700.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) # Do training over n_episodes