diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index 5ab92a48cf815c1cca17eac993c8ff528aabd32c..5642520a0e3649878789c093b07b3f0f06fb3f32 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -3,6 +3,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.generators import random_rail_generator from ray.rllib.utils.seed import seed as set_seed +from flatland.envs.generators import complex_rail_generator, random_rail_generator import numpy as np @@ -19,14 +20,18 @@ class RailEnvRLLibWrapper(MultiAgentEnv): vector_index = config.vector_index else: vector_index = 1 - #self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, - # nr_extra=30, seed=config['seed'] * (1+vector_index)) + + if config['rail_generator'] == "complex_rail_generator": + self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5, + nr_extra=config['nr_extra'], seed=config['seed'] * (1+vector_index)) + else: + self.rail_generator = random_rail_generator() + set_seed(config['seed'] * (1+vector_index)) - #self.env = RailEnv(width=config["width"], height=config["height"], - self.env = RailEnv(width=10, height=20, + self.env = RailEnv(width=config["width"], height=config["height"], number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder']) - self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl') + # self.env.load('/home/guillaume/EPFL/Master_Thesis/flatland/baselines/torch_training/railway/complex_scene.pkl') self.width = self.env.width self.height = self.env.height @@ -35,7 +40,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv): def reset(self): self.agents_done = [] - obs = self.env.reset(False, False) + obs = self.env.reset() o = dict() # o['agents'] = obs # obs[0] = [obs[0], np.ones((17, 17)) * 17] diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index 1c3fa0898582a6f9d093dbcac787d70805b2e0b6..9b7243f38a8b4cd06d7360db4a95c6099e80f7bc 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -1,7 +1,6 @@ import numpy as np from ray.rllib.models.preprocessors import Preprocessor - def max_lt(seq, val): """ Return greatest item in seq for which item < val applies. @@ -36,27 +35,27 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): :param obs: Observation that should be normalized :param clip_min: min value where observation will be clipped :param clip_max: max value where observation will be clipped - :return: returns normalized and clipped observation + :return: returnes normalized and clipped observatoin """ max_obs = max(1, max_lt(obs, 1000)) min_obs = max(0, min_lt(obs, 0)) if max_obs == min_obs: - return np.clip(np.array(obs)/ max_obs, clip_min, clip_max) + return np.clip(np.array(obs) / max_obs, clip_min, clip_max) norm = np.abs(max_obs - min_obs) if norm == 0: norm = 1. - return np.clip((np.array(obs)-min_obs)/ norm, clip_min, clip_max) + return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max) class CustomPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): - return (111,) + return obs_space.shape def transform(self, observation): - if len(observation) == 111: - return norm_obs_clip(observation) - else: - return observation + # if len(observation) == 111: + return norm_obs_clip(observation) + # else: + # return observation class ConvModelPreprocessor(Preprocessor): diff --git a/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin b/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin new file mode 100644 index 0000000000000000000000000000000000000000..82305a640693dbfa7efd946e3eb671727e0f72a5 --- /dev/null +++ b/RLLib_training/experiment_configs/env_complexity_benchmark/config.gin @@ -0,0 +1,25 @@ +run_experiment.name = "observation_benchmark_results" +run_experiment.num_iterations = 1002 +run_experiment.save_every = 50 +run_experiment.hidden_sizes = [32, 32] + +run_experiment.map_width = 20 +run_experiment.map_height = 10 +run_experiment.n_agents = 8 +run_experiment.rail_generator = "complex_rail_generator" +run_experiment.nr_extra = {"grid_search": [10, 20, 30, 40]} +run_experiment.policy_folder_name = "ppo_policy_nr_extra_{config[nr_extra]}_" + +run_experiment.horizon = 50 +run_experiment.seed = 123 + +#run_experiment.conv_model = {"grid_search": [True, False]} +run_experiment.conv_model = False + +#run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +run_experiment.obs_builder = @TreeObsForRailEnv() +TreeObsForRailEnv.max_depth = 2 +LocalObsForRailEnv.view_radius = 5 + +run_experiment.entropy_coeff = 0.01 + diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin index 64ff1c981dc9d068dee3a089bc8cb77c834d9e63..1369bb44d9e6f6d761a2d7f4a37af11a735c1fab 100644 --- a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin +++ b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin @@ -4,8 +4,8 @@ run_experiment.save_every = 50 run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 -run_experiment.map_height = 20 -run_experiment.n_agents = 5 +run_experiment.map_height = 10 +run_experiment.n_agents = 8 run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}"#_entropy_coeff_{config[entropy_coeff]}_{config[hidden_sizes][0]}_hidden_sizes_" run_experiment.horizon = 50 diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index d58e9bf0e02ee270e97672a42dd26384c68d7b4e..57fb0ceb642cabd562075c3aae8e7cd8e6240460 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -1,10 +1,10 @@ from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper import gym +import gin from flatland.envs.generators import complex_rail_generator - # Import PPO trainer: we can replace these imports by any other trainer from RLLib. from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer @@ -25,8 +25,6 @@ import numpy as np from ray.tune.logger import UnifiedLogger import tempfile -import gin - from ray import tune from ray.rllib.utils.seed import seed as set_seed @@ -59,14 +57,15 @@ def train(config, reporter): # Example configuration to generate a random rail env_config = {"width": config['map_width'], "height": config['map_height'], - "rail_generator": complex_rail_generator, + "rail_generator": config["rail_generator"], + "nr_extra": config["nr_extra"], "number_of_agents": config['n_agents'], "seed": config['seed'], "obs_builder": config['obs_builder']} # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(111,)) + obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,)) preprocessor = "tree_obs_prep" elif isinstance(config["obs_builder"], GlobalObsForRailEnv): @@ -166,7 +165,7 @@ def train(config, reporter): @gin.configurable def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, - entropy_coeff, seed, conv_model): + entropy_coeff, seed, conv_model, rail_generator, nr_extra): tune.run( train, @@ -183,7 +182,9 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, "obs_builder": obs_builder, "entropy_coeff": entropy_coeff, "seed": seed, - "conv_model": conv_model + "conv_model": conv_model, + "rail_generator": rail_generator, + "nr_extra": nr_extra }, resources_per_trial={ "cpu": 2, @@ -195,6 +196,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env' # To Modify + dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark' # To Modify gin.parse_config_file(dir + '/config.gin') run_experiment(local_dir=dir)