From 540dce249f0f0c647b663e975975076bc0e12bd9 Mon Sep 17 00:00:00 2001
From: Guillaume Mollard <guillaume.mollard2@gmail.com>
Date: Fri, 7 Jun 2019 17:59:30 +0200
Subject: [PATCH] env complexity benchmark

---
 RLLib_training/RailEnvRLLibWrapper.py         | 17 ++++++++-----
 RLLib_training/custom_preprocessors.py        | 17 ++++++-------
 .../env_complexity_benchmark/config.gin       | 25 +++++++++++++++++++
 .../config.gin                                |  4 +--
 RLLib_training/train_experiment.py            | 17 +++++++------
 5 files changed, 55 insertions(+), 25 deletions(-)
 create mode 100644 RLLib_training/experiment_configs/env_complexity_benchmark/config.gin

diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py
index 5ab92a4..5642520 100644
--- a/RLLib_training/RailEnvRLLibWrapper.py
+++ b/RLLib_training/RailEnvRLLibWrapper.py
@@ -3,6 +3,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.generators import random_rail_generator
 from ray.rllib.utils.seed import seed as set_seed
+from flatland.envs.generators import complex_rail_generator, random_rail_generator
 import numpy as np
 
 
@@ -19,14 +20,18 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
             vector_index = config.vector_index
         else:
             vector_index = 1
-        #self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5,
-         #                                              nr_extra=30, seed=config['seed'] * (1+vector_index))
+
+        if config['rail_generator'] == "complex_rail_generator":
+            self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5,
+                                                          nr_extra=config['nr_extra'], seed=config['seed'] * (1+vector_index))
+        else:
+            self.rail_generator = random_rail_generator()
+
         set_seed(config['seed'] * (1+vector_index))
-        #self.env = RailEnv(width=config["width"], height=config["height"],
-        self.env = RailEnv(width=10, height=20,
+        self.env = RailEnv(width=config["width"], height=config["height"],
                 number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder'])
 
-        self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl')
+        # self.env.load('/home/guillaume/EPFL/Master_Thesis/flatland/baselines/torch_training/railway/complex_scene.pkl')
 
         self.width = self.env.width
         self.height = self.env.height
@@ -35,7 +40,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
     
     def reset(self):
         self.agents_done = []
-        obs = self.env.reset(False, False)
+        obs = self.env.reset()
         o = dict()
         # o['agents'] = obs
         # obs[0] = [obs[0], np.ones((17, 17)) * 17]
diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py
index 1c3fa08..9b7243f 100644
--- a/RLLib_training/custom_preprocessors.py
+++ b/RLLib_training/custom_preprocessors.py
@@ -1,7 +1,6 @@
 import numpy as np
 from ray.rllib.models.preprocessors import Preprocessor
 
-
 def max_lt(seq, val):
     """
     Return greatest item in seq for which item < val applies.
@@ -36,27 +35,27 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
     :param obs: Observation that should be normalized
     :param clip_min: min value where observation will be clipped
     :param clip_max: max value where observation will be clipped
-    :return: returns normalized and clipped observation
+    :return: returnes normalized and clipped observatoin
     """
     max_obs = max(1, max_lt(obs, 1000))
     min_obs = max(0, min_lt(obs, 0))
     if max_obs == min_obs:
-        return np.clip(np.array(obs)/ max_obs, clip_min, clip_max)
+        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
     norm = np.abs(max_obs - min_obs)
     if norm == 0:
         norm = 1.
-    return np.clip((np.array(obs)-min_obs)/ norm, clip_min, clip_max)
+    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
 
 
 class CustomPreprocessor(Preprocessor):
     def _init_shape(self, obs_space, options):
-        return (111,)
+        return obs_space.shape
 
     def transform(self, observation):
-        if len(observation) == 111:
-            return norm_obs_clip(observation)
-        else:
-            return observation
+        # if len(observation) == 111:
+        return norm_obs_clip(observation)
+        # else:
+        #     return observation
 
 
 class ConvModelPreprocessor(Preprocessor):
diff --git a/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin b/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin
new file mode 100644
index 0000000..82305a6
--- /dev/null
+++ b/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin
@@ -0,0 +1,25 @@
+run_experiment.name = "observation_benchmark_results"
+run_experiment.num_iterations = 1002
+run_experiment.save_every = 50
+run_experiment.hidden_sizes = [32, 32]
+
+run_experiment.map_width = 20
+run_experiment.map_height = 10
+run_experiment.n_agents = 8
+run_experiment.rail_generator = "complex_rail_generator"
+run_experiment.nr_extra = {"grid_search": [10, 20, 30, 40]}
+run_experiment.policy_folder_name = "ppo_policy_nr_extra_{config[nr_extra]}_"
+
+run_experiment.horizon = 50
+run_experiment.seed = 123
+
+#run_experiment.conv_model = {"grid_search": [True, False]}
+run_experiment.conv_model = False
+
+#run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]}
+run_experiment.obs_builder = @TreeObsForRailEnv()
+TreeObsForRailEnv.max_depth = 2
+LocalObsForRailEnv.view_radius = 5
+
+run_experiment.entropy_coeff = 0.01
+
diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin
index 64ff1c9..1369bb4 100644
--- a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin
+++ b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin
@@ -4,8 +4,8 @@ run_experiment.save_every = 50
 run_experiment.hidden_sizes = [32, 32]
 
 run_experiment.map_width = 20
-run_experiment.map_height = 20
-run_experiment.n_agents = 5
+run_experiment.map_height = 10
+run_experiment.n_agents = 8
 run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}"#_entropy_coeff_{config[entropy_coeff]}_{config[hidden_sizes][0]}_hidden_sizes_"
 
 run_experiment.horizon = 50
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index d58e9bf..57fb0ce 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -1,10 +1,10 @@
 from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper
 import gym
 
+import gin
 
 from flatland.envs.generators import complex_rail_generator
 
-
 # Import PPO trainer: we can replace these imports by any other trainer from RLLib.
 from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
 from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
@@ -25,8 +25,6 @@ import numpy as np
 from ray.tune.logger import UnifiedLogger
 import tempfile
 
-import gin
-
 from ray import tune
 
 from ray.rllib.utils.seed import seed as set_seed
@@ -59,14 +57,15 @@ def train(config, reporter):
     # Example configuration to generate a random rail
     env_config = {"width": config['map_width'],
                   "height": config['map_height'],
-                  "rail_generator": complex_rail_generator,
+                  "rail_generator": config["rail_generator"],
+                  "nr_extra": config["nr_extra"],
                   "number_of_agents": config['n_agents'],
                   "seed": config['seed'],
                   "obs_builder": config['obs_builder']}
 
     # Observation space and action space definitions
     if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(111,))
+        obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,))
         preprocessor = "tree_obs_prep"
 
     elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
@@ -166,7 +165,7 @@ def train(config, reporter):
 @gin.configurable
 def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                    map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
-                   entropy_coeff, seed, conv_model):
+                   entropy_coeff, seed, conv_model, rail_generator, nr_extra):
 
     tune.run(
         train,
@@ -183,7 +182,9 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                 "obs_builder": obs_builder,
                 "entropy_coeff": entropy_coeff,
                 "seed": seed,
-                "conv_model": conv_model
+                "conv_model": conv_model,
+                "rail_generator": rail_generator,
+                "nr_extra": nr_extra
                 },
         resources_per_trial={
             "cpu": 2,
@@ -195,6 +196,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
 
 if __name__ == '__main__':
     gin.external_configurable(tune.grid_search)
-    dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env'  # To Modify
+    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark'  # To Modify
     gin.parse_config_file(dir + '/config.gin')
     run_experiment(local_dir=dir)
-- 
GitLab