From 2f1e8af1a9027e127b0680d9910021c3775c563d Mon Sep 17 00:00:00 2001 From: Guillaume Mollard <guillaume.mollard2@gmail.com> Date: Wed, 12 Jun 2019 12:49:44 +0200 Subject: [PATCH] improved prediction observation, not tested git add RLLib_training/RailEnvRLLibWrapper.py RLLib_training/render_training_result.py --- RLLib_training/RailEnvRLLibWrapper.py | 164 +++++++++++------------ RLLib_training/render_training_result.py | 60 +++++---- 2 files changed, 112 insertions(+), 112 deletions(-) diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index a5184f2..68a76e6 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -43,8 +43,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): self.height = self.env.height self.step_memory = config["step_memory"] - - def reset(self): self.agents_done = [] if self.predefined_env: @@ -54,45 +52,25 @@ class RailEnvRLLibWrapper(MultiAgentEnv): predictions = self.env.predict() if predictions != {}: + # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of + # agents at each time step pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0) + pred_dir = [x[:, 2] for x in list(predictions.values())] o = dict() for i_agent in range(len(self.env.agents)): if predictions != {}: - # prediction of collision that will be added to the observation - # Allows to the agent to know which other train is is about to meet (maybe will come - # up with a priority order of trains). - pred_obs = np.zeros((len(predictions[0]), len(self.env.agents))) - - for time_offset in range(len(predictions[0])): - - # We consider a time window of t-1; t+1 to find a collision - collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(predictions[0])))) - - coord_agent = pred_pos[i_agent, time_offset, 0] + 1000*pred_pos[i_agent, time_offset, 1] - - # x coordinates of all other train in the time window - x_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent+1, len(self.env.agents)))][ - :, collision_window, 0] - - # y coordinates of all other train in the time window - y_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent + 1, len(self.env.agents)))][ - :, collision_window, 1] - - coord_other_agents = x_coord_other_agents + 1000*y_coord_other_agents - - # collision_info here contains the index of the agent colliding with the current agent - for collision_info in np.argwhere(coord_agent == coord_other_agents)[:, 0]: - pred_obs[time_offset, collision_info + 1*(collision_info >= i_agent)] = 1 + pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent) agent_id_one_hot = np.zeros(len(self.env.agents)) agent_id_one_hot[i_agent] = 1 o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs] else: o[i_agent] = obs[i_agent] - + + # needed for the renderer self.rail = self.env.rail self.agents = self.env.agents self.agents_static = self.env.agents_static @@ -110,45 +88,23 @@ class RailEnvRLLibWrapper(MultiAgentEnv): def step(self, action_dict): obs, rewards, dones, infos = self.env.step(action_dict) - # print(obs) d = dict() r = dict() o = dict() - # print(self.agents_done) - # print(dones) + predictions = self.env.predict() - if predictions != {}: + if predictions != {}: + # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of + # agents at each time step pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0) + pred_dir = [x[:, 2] for x in list(predictions.values())] for i_agent in range(len(self.env.agents)): if i_agent not in self.agents_done: - # prediction of collision that will be added to the observation - # Allows to the agent to know which other train is is about to meet (maybe will come - # up with a priority order of trains). - if predictions != {}: - pred_obs = np.zeros((len(predictions[0]), len(self.env.agents))) - for time_offset in range(len(predictions[0])): - - # We consider a time window of t-1; t+1 to find a collision - collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(predictions[0])))) - - coord_agent = pred_pos[i_agent, time_offset, 0] + 1000*pred_pos[i_agent, time_offset, 1] - - # x coordinates of all other train in the time window - x_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent+1, len(self.env.agents)))][ - :, collision_window, 0] - - # y coordinates of all other train in the time window - y_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent + 1, len(self.env.agents)))][ - :, collision_window, 1] - - coord_other_agents = x_coord_other_agents + 1000*y_coord_other_agents - - # collision_info here contains the index of the agent colliding with the current agent - for collision_info in np.argwhere(coord_agent == coord_other_agents)[:, 0]: - pred_obs[time_offset, collision_info + 1*(collision_info >= i_agent)] = 1 + if predictions != {}: + pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent) agent_id_one_hot = np.zeros(len(self.env.agents)) agent_id_one_hot[i_agent] = 1 o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs] @@ -159,22 +115,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): d['__all__'] = dones['__all__'] - # for agent, done in dones.items(): - # if agent not in self.agents_done: - # if agent != '__all__': - # # o[agent] = obs[agent] - # #one_hot_agent_encoding = np.zeros(len(self.env.agents)) - # #one_hot_agent_encoding[agent] += 1 - # o[agent] = obs[agent]#np.append(obs[agent], one_hot_agent_encoding) - # - # - # d[agent] = dones[agent] - - self.agents = self.env.agents - self.agents_static = self.env.agents_static - self.dev_obs_dict = self.env.dev_obs_dict - #print('Old OBS #####', self.old_obs) - if self.step_memory >= 2: oo = dict() @@ -188,16 +128,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): if done and agent != '__all__': self.agents_done.append(agent) - #print(obs) - #return obs, rewards, dones, infos - # oo = dict() - # oo['agents'] = o - # o['global'] = np.ones((17, 17)) * 17 - - # o[0] = [o[0], np.ones((17, 17)) * 17] - # o['global_obs'] = np.ones((17, 17)) * 17 - # r['global_obs'] = 0 - # d['global_obs'] = True if self.step_memory < 2: return o, r, d, infos else: @@ -208,3 +138,71 @@ class RailEnvRLLibWrapper(MultiAgentEnv): def get_num_agents(self): return self.env.get_num_agents() + + def get_prediction_as_observation(self, pred_pos, pred_dir, agent_handle): + ''' + :param pred_pos: pred_pos should be a 3 dimensions array (N_Agents, T_pred, 2) containing x and y + predicted coordinates of agents at each time step + :param pred_dir: pred_dir should be a 2 dimensions array (N_Agents, T_pred) predicted directions + of agents at each time step + :param agent_handle: agent index + :return: 2 dimensional array (T_pred, N_agents) with value 1 at coord. (t,i) if agent 'agent_handle' + and agent i are going to meet at time step t. + + Computes prediction of collision that will be added to the observation. + Allows to the agent to know which other train it is about to meet, and when. + The id of the other trains are shared, allowing eventually the agents to come + up with a priority order of trains. + ''' + + pred_obs = np.zeros((len(pred_pos[1]), len(self.env.agents))) + + for time_offset in range(len(pred_pos[1])): + + # We consider a time window of t-1:t+1 to find a collision + collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(pred_pos[1])))) + + # coordinate of agent `agent_handle` at time t. + coord_agent = pred_pos[agent_handle, time_offset, 0] + 1000 * pred_pos[agent_handle, time_offset, 1] + + # x coordinates of all other agents in the time window + # array of dim (N_Agents, 3), the 3 elements corresponding to x coordinates of the agents + # at t-1, t, t + 1 + x_coord_other_agents = pred_pos[list(range(agent_handle)) + + list(range(agent_handle + 1, + len(self.env.agents)))][:, collision_window, 0] + + # y coordinates of all other agents in the time window + # array of dim (N_Agents, 3), the 3 elements corresponding to y coordinates of the agents + # at t-1, t, t + 1 + y_coord_other_agents = pred_pos[list(range(agent_handle)) + + list(range(agent_handle + 1, len(self.env.agents)))][ + :, collision_window, 1] + + coord_other_agents = x_coord_other_agents + 1000 * y_coord_other_agents + + # collision_info here contains the index of the agent colliding with the current agent and + # the delta_t at which they visit the same cell (0 for t-1, 1 for t or 2 for t+1) + for collision_info in np.argwhere(coord_agent == coord_other_agents): + # If they are on the same cell at the same time, there is a collison in all cases + if collision_info[1] == 1: + pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1 + elif collision_info[1] == 0: + # In this case, the other agent (agent 2) was on the same cell at t-1 + # There is a collision if agent 2 is at t, on the cell where was agent 1 at t-1 + coord_agent_1_t_minus_1 = pred_pos[agent_handle, time_offset-1, 0] + \ + 1000 * pred_pos[agent_handle, time_offset, 1] + coord_agent_2_t = coord_other_agents[collision_info[0], 1] + if coord_agent_1_t_minus_1 == coord_agent_2_t: + pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1 + + elif collision_info[1] == 2: + # In this case, the other agent (agent 2) will be on the same cell at t+1 + # There is a collision if agent 2 is at t, on the cell where will be agent 1 at t+1 + coord_agent_1_t_plus_1 = pred_pos[agent_handle, time_offset + 1, 0] + \ + 1000 * pred_pos[agent_handle, time_offset, 1] + coord_agent_2_t = coord_other_agents[collision_info[0], 1] + if coord_agent_1_t_plus_1 == coord_agent_2_t: + pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1 + + return pred_obs diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py index 5b9a08c..5f0159f 100644 --- a/RLLib_training/render_training_result.py +++ b/RLLib_training/render_training_result.py @@ -50,11 +50,11 @@ ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs) ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000) -CHECKPOINT_PATH = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/' \ - 'conv_model_test/ppo_policy_TreeObsForRailEnv_5_agents_conv_model_False_ial1g3w9/checkpoint_51/checkpoint-51' +CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/env_complexity_benchmark/' \ + 'ppo_policy_nr_extra_10_0qxx0qy_/checkpoint_1001/checkpoint-1001' -N_EPISODES = 3 -N_STEPS_PER_EPISODE = 50 +N_EPISODES = 10 +N_STEPS_PER_EPISODE = 80 def render_training_result(config): @@ -62,22 +62,11 @@ def render_training_result(config): set_seed(config['seed'], config['seed'], config['seed']) - transition_probability = [15, # empty cell - Case 0 - 5, # Case 1 - straight - 5, # Case 2 - simple switch - 1, # Case 3 - diamond crossing - 1, # Case 4 - single slip - 1, # Case 5 - double slip - 1, # Case 6 - symmetrical - 0, # Case 7 - dead end - 1, # Case 1b (8) - simple turn right - 1, # Case 1c (9) - simple turn left - 1] # Case 2b (10) - simple switch mirrored - # Example configuration to generate a random rail env_config = {"width": config['map_width'], "height": config['map_height'], - "rail_generator": complex_rail_generator, + "rail_generator": config["rail_generator"], + "nr_extra": config["nr_extra"], "number_of_agents": config['n_agents'], "seed": config['seed'], "obs_builder": config['obs_builder']} @@ -85,7 +74,7 @@ def render_training_result(config): # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) + obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,)) preprocessor = "tree_obs_prep" elif isinstance(config["obs_builder"], GlobalObsForRailEnv): @@ -154,6 +143,8 @@ def render_training_result(config): trainer_config['simple_optimizer'] = False trainer_config['postprocess_inputs'] = True trainer_config['log_level'] = 'WARN' + trainer_config['num_sgd_iter'] = 10 + trainer_config['clip_param'] = 0.2 env = RailEnvRLLibWrapper(env_config) @@ -163,21 +154,29 @@ def render_training_result(config): policy = trainer.get_policy(config['policy_folder_name'].format(**locals())) - env_renderer = RenderTool(env, gl="PIL", show=True) + + preprocessor = CustomPreprocessor(gym.spaces.Box(low=-1, high=1, shape=(147,))) + env_renderer = RenderTool(env, gl="PIL") for episode in range(N_EPISODES): observation = env.reset() for i in range(N_STEPS_PER_EPISODE): - - action, _, infos = policy.compute_actions(list(observation.values()), []) - env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i, - action_dict=action) + preprocessed_obs = [] + for obs in observation.values(): + preprocessed_obs.append(preprocessor.transform(obs)) + action, _, infos = policy.compute_actions(preprocessed_obs, []) logits = infos['behaviour_logits'] actions = dict() for j, logit in enumerate(logits): actions[j] = np.argmax(logit) - + # for j, act in enumerate(action): + # actions[j] = act time.sleep(1) - observation, _, _, _ = env.step(action) + print(actions, logits) + # print(action, print(infos['behaviour_logits'])) + env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i, + action_dict=list(actions.values())) + + observation, _, _, _ = env.step(actions) env_renderer.close_window() @@ -185,7 +184,7 @@ def render_training_result(config): @gin.configurable def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, - entropy_coeff, seed, conv_model): + entropy_coeff, seed, conv_model, rail_generator, nr_extra): render_training_result( config={"n_agents": n_agents, @@ -199,12 +198,15 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, "obs_builder": obs_builder, "entropy_coeff": entropy_coeff, "seed": seed, - "conv_model": conv_model - }) + "conv_model": conv_model, + "rail_generator": rail_generator, + "nr_extra": 10# nr_extra + } + ) if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify + dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark' # To Modify gin.parse_config_file(dir + '/config.gin') run_experiment(local_dir=dir) -- GitLab