diff --git a/sequential_agent/simple_order_agent.py b/sequential_agent/simple_order_agent.py index 3feff350e94226f157559036abdaea8d5dc18bf9..e572434685554acc67a64a550b3d0213de5681d4 100644 --- a/sequential_agent/simple_order_agent.py +++ b/sequential_agent/simple_order_agent.py @@ -1,5 +1,5 @@ import numpy as np -from utils.observation_utils import split_tree, min_lt +from utils.observation_utils import split_tree, min_gt class OrderedAgent: @@ -15,7 +15,7 @@ class OrderedAgent: _, distance, _ = split_tree(tree=np.array(state), num_features_per_node=9, current_depth=0) distance = distance[1:] - min_dist = min_lt(distance, 0) + min_dist = min_gt(distance, 0) min_direction = np.where(distance == min_dist) if len(min_direction[0]) > 1: return min_direction[0][-1] + 1 diff --git a/torch_training/Nets/avoid_checkpoint60000.pth b/torch_training/Nets/avoid_checkpoint60000.pth index b6f15348130b09ae8bee0adad454031fc013fabf..b4fef60542f50419353047721fae31f5382e7bd4 100644 Binary files a/torch_training/Nets/avoid_checkpoint60000.pth and b/torch_training/Nets/avoid_checkpoint60000.pth differ diff --git a/torch_training/multi_agent_inference.py b/torch_training/multi_agent_inference.py index 2b541219e688a8b55e20b412fd91f9e8cc22b9cb..6a9ed8e967a6389d828d7444e68fa9bc845dd0a1 100644 --- a/torch_training/multi_agent_inference.py +++ b/torch_training/multi_agent_inference.py @@ -17,7 +17,7 @@ from utils.observation_utils import normalize_observation random.seed(3) np.random.seed(2) -file_name = "./railway/simple_avoid.pkl" +file_name = "./railway/testing_stuff.pkl" env = RailEnv(width=10, height=20, rail_generator=rail_from_file(file_name), @@ -41,6 +41,7 @@ env = RailEnv(width=x_dim, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), number_of_agents=n_agents) env.reset(True, True) + """ tree_depth = 3 observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv()) @@ -70,7 +71,7 @@ action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() agent = Agent(state_size, action_size, "FC", 0) -with path(torch_training.Nets, "avoid_checkpoint49000.pth") as file_in: +with path(torch_training.Nets, "avoid_checkpoint59900.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) record_images = False @@ -93,7 +94,7 @@ for trials in range(1, n_trials + 1): if record_images: env_renderer.gl.save_image("./Images/Avoiding/flatland_frame_{:04d}.bmp".format(frame_step)) frame_step += 1 - # time.sleep(5) + time.sleep(1.5) # Action for a in range(env.get_num_agents()): action = agent.act(agent_obs[a], eps=0) diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py index 5e194f51b09bbe4e81e257caedf3d57d17e04bca..c3c8c2b6b49dfeee1eceafa52d2bc0b4f1ff366e 100644 --- a/torch_training/multi_agent_training.py +++ b/torch_training/multi_agent_training.py @@ -73,7 +73,7 @@ def main(argv): n_episodes = 60000 # Set max number of steps per episode as well as other training relevant parameter - max_steps = int(3 * (env.height + env.width)) + max_steps = int((env.height + env.width)) eps = 1. eps_end = 0.005 eps_decay = 0.9995 @@ -102,7 +102,7 @@ def main(argv): Training Curriculum: In order to get good generalization we change the number of agents and the size of the levels every 50 episodes. """ - if episodes % 50 == 0: + if episodes % 50 == 1: x_dim = np.random.randint(8, 15) y_dim = np.random.randint(8, 15) n_agents = np.random.randint(3, 8) @@ -117,7 +117,7 @@ def main(argv): number_of_agents=n_agents) # Adjust the parameters according to the new env. - max_steps = int(3 * (env.height + env.width)) + max_steps = int((env.height + env.width)) agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() @@ -174,7 +174,11 @@ def main(argv): eps = max(eps_end, eps_decay * eps) # decrease epsilon # Collection information about training - done_window.append(env_done) + tasks_finished = 0 + for _idx in range(env.get_num_agents()): + if done[_idx] == 1: + tasks_finished += 1 + done_window.append(tasks_finished / env.get_num_agents()) scores_window.append(score / max_steps) # save most recent score scores.append(np.mean(scores_window)) dones_list.append((np.mean(done_window))) diff --git a/utils/observation_utils.py b/utils/observation_utils.py index b3dd5aefdc8e57e79fe816efadf2a05d50e02d8b..26108cca8da0156c07f01b8f036167ad449b8d8f 100644 --- a/utils/observation_utils.py +++ b/utils/observation_utils.py @@ -15,7 +15,7 @@ def max_lt(seq, val): return max -def min_lt(seq, val): +def min_gt(seq, val): """ Return smallest item in seq for which item > val applies. None is returned if seq was empty or all items in seq were >= val. @@ -29,7 +29,7 @@ def min_lt(seq, val): return min -def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0): +def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False): """ This function returns the difference between min and max value of an observation :param obs: Observation that should be normalized @@ -42,13 +42,12 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0): else: max_obs = max(1, max_lt(obs, 1000)) + 1 - min_obs = 0 # min(max_obs, min_lt(obs, 0)) - + min_obs = 0 # min(max_obs, min_gt(obs, 0)) + if normalize_to_range: + min_obs = min_gt(obs, 0) if max_obs == min_obs: return np.clip(np.array(obs) / max_obs, clip_min, clip_max) norm = np.abs(max_obs - min_obs) - if norm == 0: - norm = 1. return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max) @@ -103,7 +102,7 @@ def normalize_observation(observation, num_features_per_node=9, observation_radi data, distance, agent_data = split_tree(tree=np.array(observation), num_features_per_node=num_features_per_node, current_depth=0) data = norm_obs_clip(data, fixed_radius=observation_radius) - distance = norm_obs_clip(distance) + distance = norm_obs_clip(distance, normalize_to_range=True) agent_data = np.clip(agent_data, -1, 1) normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data)) return normalized_obs