From 38d81491081b329fa14544aaa9603b5e87be0076 Mon Sep 17 00:00:00 2001 From: Guillaume Mollard <guillaume.mollard2@gmail.com> Date: Fri, 7 Jun 2019 16:22:04 +0200 Subject: [PATCH] small changes to test modified tree obs, but not working --- RLLib_training/RailEnvRLLibWrapper.py | 10 +++++----- RLLib_training/custom_preprocessors.py | 4 ++-- RLLib_training/experiment_configs/CustomModels.py | 0 .../n_agents_experiment/config.gin | 15 ++++++++++----- .../observation_benchmark_loaded_env/config.gin | 5 +++-- RLLib_training/train_experiment.py | 8 ++++++-- 6 files changed, 26 insertions(+), 16 deletions(-) delete mode 100644 RLLib_training/experiment_configs/CustomModels.py diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index ad504e2..5ab92a4 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -19,13 +19,14 @@ class RailEnvRLLibWrapper(MultiAgentEnv): vector_index = config.vector_index else: vector_index = 1 - self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, - nr_extra=30, seed=config['seed'] * (1+vector_index)) + #self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, + # nr_extra=30, seed=config['seed'] * (1+vector_index)) set_seed(config['seed'] * (1+vector_index)) - self.env = RailEnv(width=config["width"], height=config["height"], rail_generator=self.rail_generator, + #self.env = RailEnv(width=config["width"], height=config["height"], + self.env = RailEnv(width=10, height=20, number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder']) - self.env.load('./baselines/torch_training/railway/complex_scene.pkl') + self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl') self.width = self.env.width self.height = self.env.height @@ -45,7 +46,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): self.agents = self.env.agents self.agents_static = self.env.agents_static self.dev_obs_dict = self.env.dev_obs_dict - return obs def step(self, action_dict): diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index cc58a0d..1c3fa08 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -50,10 +50,10 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): class CustomPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): - return (105,) + return (111,) def transform(self, observation): - if len(observation) == 105: + if len(observation) == 111: return norm_obs_clip(observation) else: return observation diff --git a/RLLib_training/experiment_configs/CustomModels.py b/RLLib_training/experiment_configs/CustomModels.py deleted file mode 100644 index e69de29..0000000 diff --git a/RLLib_training/experiment_configs/n_agents_experiment/config.gin b/RLLib_training/experiment_configs/n_agents_experiment/config.gin index 31eedbe..025eab9 100644 --- a/RLLib_training/experiment_configs/n_agents_experiment/config.gin +++ b/RLLib_training/experiment_configs/n_agents_experiment/config.gin @@ -1,14 +1,19 @@ -run_experiment.name = "n_agents_results" +run_experiment.name = "observation_benchmark_results" run_experiment.num_iterations = 1002 -run_experiment.save_every = 200 -run_experiment.hidden_sizes = [32, 32] +run_experiment.save_every = 100 +run_experiment.hidden_sizes = [32,32] run_experiment.map_width = 20 run_experiment.map_height = 20 -run_experiment.n_agents = {"grid_search": [1]}#, 2, 5, 10]} -run_experiment.policy_folder_name = "ppo_policy_{config[n_agents]}_agents" +run_experiment.n_agents = {"grid_search": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} +run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_entropy_coeff_{config[entropy_coeff]}_{config[n_agents]}_agents_" run_experiment.horizon = 50 run_experiment.seed = 123 +run_experiment.entropy_coeff = {"grid_search": [1e-3, 1e-2, 0]} + +run_experiment.obs_builder = {"grid_search": [@TreeObsForRailEnv()]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +TreeObsForRailEnv.max_depth = 2 +LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin index 03aae99..64ff1c9 100644 --- a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin +++ b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin @@ -1,7 +1,7 @@ run_experiment.name = "observation_benchmark_loaded_env_results" run_experiment.num_iterations = 1002 run_experiment.save_every = 50 -run_experiment.hidden_sizes = 32 +run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 run_experiment.map_height = 20 @@ -10,9 +10,10 @@ run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__._ run_experiment.horizon = 50 run_experiment.seed = 123 +run_experiment.conv_model = False run_experiment.entropy_coeff = 1e-2 -run_experiment.obs_builder = {"grid_search": [@LocalObsForRailEnv(), @TreeObsForRailEnv(), @GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]} +run_experiment.obs_builder = @TreeObsForRailEnv()#{"grid_search": [@LocalObsForRailEnv(), @TreeObsForRailEnv(), @GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent()]} TreeObsForRailEnv.max_depth = 2 LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index e7085b4..d58e9bf 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -52,6 +52,10 @@ def train(config, reporter): set_seed(config['seed'], config['seed'], config['seed']) + config['map_width']= 20 + config['map_height']= 10 + config['n_agents'] = 8 + # Example configuration to generate a random rail env_config = {"width": config['map_width'], "height": config['map_height'], @@ -62,7 +66,7 @@ def train(config, reporter): # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) + obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(111,)) preprocessor = "tree_obs_prep" elif isinstance(config["obs_builder"], GlobalObsForRailEnv): @@ -191,6 +195,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify + dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env' # To Modify gin.parse_config_file(dir + '/config.gin') run_experiment(local_dir=dir) -- GitLab