From 2f1e8af1a9027e127b0680d9910021c3775c563d Mon Sep 17 00:00:00 2001
From: Guillaume Mollard <guillaume.mollard2@gmail.com>
Date: Wed, 12 Jun 2019 12:49:44 +0200
Subject: [PATCH] improved prediction observation, not tested git add
 RLLib_training/RailEnvRLLibWrapper.py
 RLLib_training/render_training_result.py

---
 RLLib_training/RailEnvRLLibWrapper.py    | 164 +++++++++++------------
 RLLib_training/render_training_result.py |  60 +++++----
 2 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py
index a5184f2..68a76e6 100644
--- a/RLLib_training/RailEnvRLLibWrapper.py
+++ b/RLLib_training/RailEnvRLLibWrapper.py
@@ -43,8 +43,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
         self.height = self.env.height
         self.step_memory = config["step_memory"]
 
-
-    
     def reset(self):
         self.agents_done = []
         if self.predefined_env:
@@ -54,45 +52,25 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
 
         predictions = self.env.predict()
         if predictions != {}:
+            # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of
+            # agents at each time step
             pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0)
+            pred_dir = [x[:, 2] for x in list(predictions.values())]
 
         o = dict()
 
         for i_agent in range(len(self.env.agents)):
             
             if predictions != {}:
-                # prediction of collision that will be added to the observation
-                # Allows to the agent to know which other train is is about to meet (maybe will come
-                # up with a priority order of trains).
-                pred_obs = np.zeros((len(predictions[0]), len(self.env.agents)))
-
-                for time_offset in range(len(predictions[0])):
-
-                    # We consider a time window of t-1; t+1 to find a collision
-                    collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(predictions[0]))))
-
-                    coord_agent = pred_pos[i_agent, time_offset, 0] + 1000*pred_pos[i_agent, time_offset, 1]
-
-                    # x coordinates of all other train in the time window
-                    x_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent+1, len(self.env.agents)))][
-                                                    :, collision_window, 0]
-
-                    # y coordinates of all other train in the time window
-                    y_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent + 1, len(self.env.agents)))][
-                                                    :, collision_window, 1]
-
-                    coord_other_agents = x_coord_other_agents + 1000*y_coord_other_agents
-
-                    # collision_info here contains the index of the agent colliding with the current agent
-                    for collision_info in np.argwhere(coord_agent == coord_other_agents)[:, 0]:
-                        pred_obs[time_offset, collision_info + 1*(collision_info >= i_agent)] = 1
+                pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent)
 
                 agent_id_one_hot = np.zeros(len(self.env.agents))
                 agent_id_one_hot[i_agent] = 1
                 o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs]
             else:
                 o[i_agent] = obs[i_agent]
-        
+
+        # needed for the renderer
         self.rail = self.env.rail                      
         self.agents = self.env.agents                  
         self.agents_static = self.env.agents_static    
@@ -110,45 +88,23 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
 
     def step(self, action_dict):
         obs, rewards, dones, infos = self.env.step(action_dict)
-        # print(obs)
 
         d = dict()
         r = dict()
         o = dict()
-        # print(self.agents_done)
-        # print(dones)
+
         predictions = self.env.predict()
-        if predictions != {}:    
+        if predictions != {}:
+            # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of
+            # agents at each time step
             pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0)
+            pred_dir = [x[:, 2] for x in list(predictions.values())]
 
         for i_agent in range(len(self.env.agents)):
             if i_agent not in self.agents_done:
-                # prediction of collision that will be added to the observation
-                # Allows to the agent to know which other train is is about to meet (maybe will come
-                # up with a priority order of trains).
-                if predictions != {}:
-                    pred_obs = np.zeros((len(predictions[0]), len(self.env.agents)))
-                    for time_offset in range(len(predictions[0])):
-
-                        # We consider a time window of t-1; t+1 to find a collision
-                        collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(predictions[0]))))
-
-                        coord_agent = pred_pos[i_agent, time_offset, 0] + 1000*pred_pos[i_agent, time_offset, 1]
-
-                        # x coordinates of all other train in the time window
-                        x_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent+1, len(self.env.agents)))][
-                                                        :, collision_window, 0]
-
-                        # y coordinates of all other train in the time window
-                        y_coord_other_agents = pred_pos[list(range(i_agent)) + list(range(i_agent + 1, len(self.env.agents)))][
-                                                        :, collision_window, 1]
-
-                        coord_other_agents = x_coord_other_agents + 1000*y_coord_other_agents
-
-                        # collision_info here contains the index of the agent colliding with the current agent
-                        for collision_info in np.argwhere(coord_agent == coord_other_agents)[:, 0]:
-                            pred_obs[time_offset, collision_info + 1*(collision_info >= i_agent)] = 1
 
+                if predictions != {}:
+                    pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent)
                     agent_id_one_hot = np.zeros(len(self.env.agents))
                     agent_id_one_hot[i_agent] = 1
                     o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs]
@@ -159,22 +115,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
 
         d['__all__'] = dones['__all__']
 
-        # for agent, done in dones.items():
-        #     if agent not in self.agents_done:
-        #         if agent != '__all__':
-        # #            o[agent] = obs[agent]
-        #             #one_hot_agent_encoding = np.zeros(len(self.env.agents))
-        #             #one_hot_agent_encoding[agent] += 1
-        #             o[agent] = obs[agent]#np.append(obs[agent], one_hot_agent_encoding)
-        #
-        #
-        #         d[agent] = dones[agent]
-
-        self.agents = self.env.agents
-        self.agents_static = self.env.agents_static
-        self.dev_obs_dict = self.env.dev_obs_dict
-        #print('Old OBS #####', self.old_obs)
-        
         if self.step_memory >= 2:
             oo = dict()
 
@@ -188,16 +128,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
             if done and agent != '__all__':
                 self.agents_done.append(agent)
 
-        #print(obs)
-        #return obs, rewards, dones, infos
-        # oo = dict()
-        # oo['agents'] = o
-        # o['global'] = np.ones((17, 17)) * 17
-
-        # o[0] = [o[0], np.ones((17, 17)) * 17]
-        # o['global_obs'] = np.ones((17, 17)) * 17
-        # r['global_obs'] = 0
-        # d['global_obs'] = True
         if self.step_memory < 2:
             return o, r, d, infos
         else:
@@ -208,3 +138,71 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
 
     def get_num_agents(self):
         return self.env.get_num_agents()
+
+    def get_prediction_as_observation(self, pred_pos, pred_dir, agent_handle):
+        '''
+        :param pred_pos: pred_pos should be a 3 dimensions array (N_Agents, T_pred, 2) containing x and y
+                         predicted coordinates of agents at each time step
+        :param pred_dir: pred_dir should be a 2 dimensions array (N_Agents, T_pred) predicted directions
+                         of agents at each time step
+        :param agent_handle: agent index
+        :return: 2 dimensional array (T_pred, N_agents) with value 1 at coord. (t,i) if agent 'agent_handle'
+                and agent i are going to meet at time step t.
+
+        Computes prediction of collision that will be added to the observation.
+        Allows to the agent to know which other train it is about to meet, and when.
+        The id of the other trains are shared, allowing eventually the agents to come
+        up with a priority order of trains.
+        '''
+
+        pred_obs = np.zeros((len(pred_pos[1]), len(self.env.agents)))
+
+        for time_offset in range(len(pred_pos[1])):
+
+            # We consider a time window of t-1:t+1 to find a collision
+            collision_window = list(range(max(time_offset - 1, 0), min(time_offset + 2, len(pred_pos[1]))))
+
+            # coordinate of agent `agent_handle` at time t.
+            coord_agent = pred_pos[agent_handle, time_offset, 0] + 1000 * pred_pos[agent_handle, time_offset, 1]
+
+            # x coordinates of all other agents in the time window
+            # array of dim (N_Agents, 3), the 3 elements corresponding to x coordinates of the agents
+            # at t-1, t, t + 1
+            x_coord_other_agents = pred_pos[list(range(agent_handle)) +
+                                            list(range(agent_handle + 1,
+                                                       len(self.env.agents)))][:, collision_window, 0]
+
+            # y coordinates of all other agents in the time window
+            # array of dim (N_Agents, 3), the 3 elements corresponding to y coordinates of the agents
+            # at t-1, t, t + 1
+            y_coord_other_agents = pred_pos[list(range(agent_handle)) +
+                                            list(range(agent_handle + 1, len(self.env.agents)))][
+                                   :, collision_window, 1]
+
+            coord_other_agents = x_coord_other_agents + 1000 * y_coord_other_agents
+
+            # collision_info here contains the index of the agent colliding with the current agent and
+            # the delta_t at which they visit the same cell (0 for t-1, 1 for t or 2 for t+1)
+            for collision_info in np.argwhere(coord_agent == coord_other_agents):
+                # If they are on the same cell at the same time, there is a collison in all cases
+                if collision_info[1] == 1:
+                    pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1
+                elif collision_info[1] == 0:
+                    # In this case, the other agent (agent 2) was on the same cell at t-1
+                    # There is a collision if agent 2 is at t, on the cell where was agent 1 at t-1
+                    coord_agent_1_t_minus_1 = pred_pos[agent_handle, time_offset-1, 0] + \
+                                          1000 * pred_pos[agent_handle, time_offset, 1]
+                    coord_agent_2_t = coord_other_agents[collision_info[0], 1]
+                    if coord_agent_1_t_minus_1 == coord_agent_2_t:
+                        pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1
+
+                elif collision_info[1] == 2:
+                    # In this case, the other agent (agent 2) will be on the same cell at t+1
+                    # There is a collision if agent 2 is at t, on the cell where will be agent 1 at t+1
+                    coord_agent_1_t_plus_1 = pred_pos[agent_handle, time_offset + 1, 0] + \
+                                              1000 * pred_pos[agent_handle, time_offset, 1]
+                    coord_agent_2_t = coord_other_agents[collision_info[0], 1]
+                    if coord_agent_1_t_plus_1 == coord_agent_2_t:
+                        pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1
+
+        return pred_obs
diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py
index 5b9a08c..5f0159f 100644
--- a/RLLib_training/render_training_result.py
+++ b/RLLib_training/render_training_result.py
@@ -50,11 +50,11 @@ ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
 ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000)
 
 
-CHECKPOINT_PATH = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/' \
-                  'conv_model_test/ppo_policy_TreeObsForRailEnv_5_agents_conv_model_False_ial1g3w9/checkpoint_51/checkpoint-51'
+CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/env_complexity_benchmark/' \
+                  'ppo_policy_nr_extra_10_0qxx0qy_/checkpoint_1001/checkpoint-1001'
 
-N_EPISODES = 3
-N_STEPS_PER_EPISODE = 50
+N_EPISODES = 10
+N_STEPS_PER_EPISODE = 80
 
 
 def render_training_result(config):
@@ -62,22 +62,11 @@ def render_training_result(config):
 
     set_seed(config['seed'], config['seed'], config['seed'])
 
-    transition_probability = [15,  # empty cell - Case 0
-                              5,  # Case 1 - straight
-                              5,  # Case 2 - simple switch
-                              1,  # Case 3 - diamond crossing
-                              1,  # Case 4 - single slip
-                              1,  # Case 5 - double slip
-                              1,  # Case 6 - symmetrical
-                              0,  # Case 7 - dead end
-                              1,  # Case 1b (8)  - simple turn right
-                              1,  # Case 1c (9)  - simple turn left
-                              1]  # Case 2b (10) - simple switch mirrored
-
     # Example configuration to generate a random rail
     env_config = {"width": config['map_width'],
                   "height": config['map_height'],
-                  "rail_generator": complex_rail_generator,
+                  "rail_generator": config["rail_generator"],
+                  "nr_extra": config["nr_extra"],
                   "number_of_agents": config['n_agents'],
                   "seed": config['seed'],
                   "obs_builder": config['obs_builder']}
@@ -85,7 +74,7 @@ def render_training_result(config):
 
     # Observation space and action space definitions
     if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
+        obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,))
         preprocessor = "tree_obs_prep"
 
     elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
@@ -154,6 +143,8 @@ def render_training_result(config):
     trainer_config['simple_optimizer'] = False
     trainer_config['postprocess_inputs'] = True
     trainer_config['log_level'] = 'WARN'
+    trainer_config['num_sgd_iter'] = 10
+    trainer_config['clip_param'] = 0.2
 
     env = RailEnvRLLibWrapper(env_config)
 
@@ -163,21 +154,29 @@ def render_training_result(config):
 
     policy = trainer.get_policy(config['policy_folder_name'].format(**locals()))
 
-    env_renderer = RenderTool(env, gl="PIL", show=True)
+
+    preprocessor = CustomPreprocessor(gym.spaces.Box(low=-1, high=1, shape=(147,)))
+    env_renderer = RenderTool(env, gl="PIL")
     for episode in range(N_EPISODES):
         observation = env.reset()
         for i in range(N_STEPS_PER_EPISODE):
-
-            action, _, infos = policy.compute_actions(list(observation.values()), [])
-            env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i,
-                                   action_dict=action)
+            preprocessed_obs = []
+            for obs in observation.values():
+                preprocessed_obs.append(preprocessor.transform(obs))
+            action, _, infos = policy.compute_actions(preprocessed_obs, [])
             logits = infos['behaviour_logits']
             actions = dict()
             for j, logit in enumerate(logits):
                 actions[j] = np.argmax(logit)
-
+            # for j, act in enumerate(action):
+                # actions[j] = act
             time.sleep(1)
-            observation, _, _, _ = env.step(action)
+            print(actions, logits)
+            # print(action, print(infos['behaviour_logits']))
+            env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i,
+                                   action_dict=list(actions.values()))
+
+            observation, _, _, _ = env.step(actions)
 
     env_renderer.close_window()
 
@@ -185,7 +184,7 @@ def render_training_result(config):
 @gin.configurable
 def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                    map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
-                   entropy_coeff, seed, conv_model):
+                   entropy_coeff, seed, conv_model, rail_generator, nr_extra):
 
     render_training_result(
         config={"n_agents": n_agents,
@@ -199,12 +198,15 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                 "obs_builder": obs_builder,
                 "entropy_coeff": entropy_coeff,
                 "seed": seed,
-                "conv_model": conv_model
-                })
+                "conv_model": conv_model,
+                "rail_generator": rail_generator,
+                "nr_extra": 10# nr_extra
+                }
+    )
 
 
 if __name__ == '__main__':
     gin.external_configurable(tune.grid_search)
-    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test'  # To Modify
+    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark'  # To Modify
     gin.parse_config_file(dir + '/config.gin')
     run_experiment(local_dir=dir)
-- 
GitLab