diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index ad504e271f8758f6f445917c1963b9da316b9ba3..5ab92a48cf815c1cca17eac993c8ff528aabd32c 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -19,13 +19,14 @@ class RailEnvRLLibWrapper(MultiAgentEnv): vector_index = config.vector_index else: vector_index = 1 - self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, - nr_extra=30, seed=config['seed'] * (1+vector_index)) + #self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, + # nr_extra=30, seed=config['seed'] * (1+vector_index)) set_seed(config['seed'] * (1+vector_index)) - self.env = RailEnv(width=config["width"], height=config["height"], rail_generator=self.rail_generator, + #self.env = RailEnv(width=config["width"], height=config["height"], + self.env = RailEnv(width=10, height=20, number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder']) - self.env.load('./baselines/torch_training/railway/complex_scene.pkl') + self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl') self.width = self.env.width self.height = self.env.height @@ -45,7 +46,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): self.agents = self.env.agents self.agents_static = self.env.agents_static self.dev_obs_dict = self.env.dev_obs_dict - return obs def step(self, action_dict): diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index cc58a0d51188c756fb7c37e0f10ada403ecf804e..1c3fa0898582a6f9d093dbcac787d70805b2e0b6 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -50,10 +50,10 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): class CustomPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): - return (105,) + return (111,) def transform(self, observation): - if len(observation) == 105: + if len(observation) == 111: return norm_obs_clip(observation) else: return observation diff --git a/RLLib_training/experiment_configs/CustomModels.py b/RLLib_training/experiment_configs/CustomModels.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/RLLib_training/experiment_configs/n_agents_experiment/config.gin b/RLLib_training/experiment_configs/n_agents_experiment/config.gin index 31eedbe868622e21bda76b80c85f6596db028820..025eab9130086188892e406edbfb477215f8a8cd 100644 --- a/RLLib_training/experiment_configs/n_agents_experiment/config.gin +++ b/RLLib_training/experiment_configs/n_agents_experiment/config.gin @@ -1,14 +1,19 @@ -run_experiment.name = "n_agents_results" +run_experiment.name = "observation_benchmark_results" run_experiment.num_iterations = 1002 -run_experiment.save_every = 200 -run_experiment.hidden_sizes = [32, 32] +run_experiment.save_every = 100 +run_experiment.hidden_sizes = [32,32] run_experiment.map_width = 20 run_experiment.map_height = 20 -run_experiment.n_agents = {"grid_search": [1]}#, 2, 5, 10]} -run_experiment.policy_folder_name = "ppo_policy_{config[n_agents]}_agents" +run_experiment.n_agents = {"grid_search": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} +run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_entropy_coeff_{config[entropy_coeff]}_{config[n_agents]}_agents_" run_experiment.horizon = 50 run_experiment.seed = 123 +run_experiment.entropy_coeff = {"grid_search": [1e-3, 1e-2, 0]} + +run_experiment.obs_builder = {"grid_search": [@TreeObsForRailEnv()]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +TreeObsForRailEnv.max_depth = 2 +LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin index 03aae996a12bf69717599cf734a39b6780bbfc72..64ff1c981dc9d068dee3a089bc8cb77c834d9e63 100644 --- a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin +++ b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin @@ -1,7 +1,7 @@ run_experiment.name = "observation_benchmark_loaded_env_results" run_experiment.num_iterations = 1002 run_experiment.save_every = 50 -run_experiment.hidden_sizes = 32 +run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 run_experiment.map_height = 20 @@ -10,9 +10,10 @@ run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__._ run_experiment.horizon = 50 run_experiment.seed = 123 +run_experiment.conv_model = False run_experiment.entropy_coeff = 1e-2 -run_experiment.obs_builder = {"grid_search": [@LocalObsForRailEnv(), @TreeObsForRailEnv(), @GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]} +run_experiment.obs_builder = @TreeObsForRailEnv()#{"grid_search": [@LocalObsForRailEnv(), @TreeObsForRailEnv(), @GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent()]} TreeObsForRailEnv.max_depth = 2 LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index e7085b48819028f1ee3e9eee2f9cf8945ff0c870..d58e9bf0e02ee270e97672a42dd26384c68d7b4e 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -52,6 +52,10 @@ def train(config, reporter): set_seed(config['seed'], config['seed'], config['seed']) + config['map_width']= 20 + config['map_height']= 10 + config['n_agents'] = 8 + # Example configuration to generate a random rail env_config = {"width": config['map_width'], "height": config['map_height'], @@ -62,7 +66,7 @@ def train(config, reporter): # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) + obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(111,)) preprocessor = "tree_obs_prep" elif isinstance(config["obs_builder"], GlobalObsForRailEnv): @@ -191,6 +195,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify + dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env' # To Modify gin.parse_config_file(dir + '/config.gin') run_experiment(local_dir=dir)