From 088c5f168a2aedbb171196457bae652503da2ef7 Mon Sep 17 00:00:00 2001 From: Guillaume Mollard <guillaume.mollard2@gmail.com> Date: Wed, 29 May 2019 10:19:53 +0200 Subject: [PATCH] Convolutional model and preprocessor added --- RLLib_training/custom_models.py | 101 ++++++++++++++++++ ...reprocessor.py => custom_preprocessors.py} | 10 ++ .../conv_model_test/config.gin | 21 ++++ RLLib_training/train_experiment.py | 39 ++++--- 4 files changed, 159 insertions(+), 12 deletions(-) create mode 100644 RLLib_training/custom_models.py rename RLLib_training/{CustomPreprocessor.py => custom_preprocessors.py} (81%) create mode 100644 RLLib_training/experiment_configs/conv_model_test/config.gin diff --git a/RLLib_training/custom_models.py b/RLLib_training/custom_models.py new file mode 100644 index 0000000..81f5223 --- /dev/null +++ b/RLLib_training/custom_models.py @@ -0,0 +1,101 @@ +from ray.rllib.models import ModelCatalog, Model +from ray.rllib.models.misc import normc_initializer + +import tensorflow as tf + + +class ConvModelGlobalObs(Model): + def _build_layers_v2(self, input_dict, num_outputs, options): + """Define the layers of a custom model. + Arguments: + input_dict (dict): Dictionary of input tensors, including "obs", + "prev_action", "prev_reward", "is_training". + num_outputs (int): Output tensor must be of size + [BATCH_SIZE, num_outputs]. + options (dict): Model options. + Returns: + (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] + and [BATCH_SIZE, desired_feature_size]. + When using dict or tuple observation spaces, you can access + the nested sub-observation batches here as well: + Examples: + >>> print(input_dict) + {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>, + 'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>, + 'is_training': <tf.Tensor shape=(), dtype=bool>, + 'obs': (observation, features) + """ + # Convolutional Layer #1 + + Relu = tf.nn.relu + BatchNormalization = tf.layers.batch_normalization + Dropout = tf.layers.dropout + Dense = tf.contrib.layers.fully_connected + + map_size = int(input_dict['obs'][0].shape[0]) + + N_CHANNELS = 96 + + conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2))) + + # conv2 = Relu(self.conv2d(conv1, 64, 'valid')) + + # conv3 = Relu(self.conv2d(conv2, 64, 'valid')) + + conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)]) + # conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1) + s_fc1 = Relu(Dense(conv2_flat, 256)) + layerN_minus_1 = Relu(Dense(s_fc1, 64)) + layerN = Dense(layerN_minus_1, num_outputs) + return layerN, layerN_minus_1 + + def conv2d(self, x, out_channels, padding, strides=(1,1)): + return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, + use_bias=True, strides=strides) + + +class LightModel(Model): + def _build_layers_v2(self, input_dict, num_outputs, options): + """Define the layers of a custom model. + Arguments: + input_dict (dict): Dictionary of input tensors, including "obs", + "prev_action", "prev_reward", "is_training". + num_outputs (int): Output tensor must be of size + [BATCH_SIZE, num_outputs]. + options (dict): Model options. + Returns: + (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] + and [BATCH_SIZE, desired_feature_size]. + When using dict or tuple observation spaces, you can access + the nested sub-observation batches here as well: + Examples: + >>> print(input_dict) + {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>, + 'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>, + 'is_training': <tf.Tensor shape=(), dtype=bool>, + 'obs': (observation, features) + """ + # print(input_dict) + # Convolutional Layer #1 + self.sess = tf.get_default_session() + Relu = tf.nn.relu + BatchNormalization = tf.layers.batch_normalization + Dropout = tf.layers.dropout + Dense = tf.contrib.layers.fully_connected + + #conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid')) + conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid')) + conv2 = Relu(self.conv2d(conv1, 16, 'valid')) + + # conv3 = Relu(self.conv2d(conv2, 64, 'valid')) + + conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2]) + #conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1) + s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0))) + # layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False)) + layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01)) + return layerN, s_fc1 + + def conv2d(self, x, out_channels, padding): + return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True) + # weights_initializer=normc_initializer(1.0)) diff --git a/RLLib_training/CustomPreprocessor.py b/RLLib_training/custom_preprocessors.py similarity index 81% rename from RLLib_training/CustomPreprocessor.py rename to RLLib_training/custom_preprocessors.py index 7d23b8c..cc58a0d 100644 --- a/RLLib_training/CustomPreprocessor.py +++ b/RLLib_training/custom_preprocessors.py @@ -59,6 +59,16 @@ class CustomPreprocessor(Preprocessor): return observation +class ConvModelPreprocessor(Preprocessor): + def _init_shape(self, obs_space, options): + out_shape = (obs_space[0].shape[0], obs_space[0].shape[1], sum([space.shape[2] for space in obs_space])) + return out_shape + + def transform(self, observation): + return np.concatenate([observation[0], + observation[1], + observation[2]], axis=2) + # class NoPreprocessor: diff --git a/RLLib_training/experiment_configs/conv_model_test/config.gin b/RLLib_training/experiment_configs/conv_model_test/config.gin new file mode 100644 index 0000000..a55633e --- /dev/null +++ b/RLLib_training/experiment_configs/conv_model_test/config.gin @@ -0,0 +1,21 @@ +run_experiment.name = "observation_benchmark_results" +run_experiment.num_iterations = 1002 +run_experiment.save_every = 100 +run_experiment.hidden_sizes = [32, 32] + +run_experiment.map_width = 20 +run_experiment.map_height = 20 +run_experiment.n_agents = 5 +run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_{config[n_agents]}_agents_conv_model_{config[conv_model]}_" + +run_experiment.horizon = 50 +run_experiment.seed = 123 + +run_experiment.conv_model = {"grid_search": [True, False]} + +run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +TreeObsForRailEnv.max_depth = 2 +LocalObsForRailEnv.view_radius = 5 + +run_experiment.entropy_coeff = 0.01 + diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 330aced..223b905 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -1,4 +1,4 @@ -from baselines.RailEnvRLLibWrapper import RailEnvRLLibWrapper +from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper import gym @@ -14,7 +14,9 @@ from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph from ray.rllib.models import ModelCatalog from ray.tune.logger import pretty_print -from baselines.CustomPreprocessor import CustomPreprocessor +from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor + +from baselines.RLLib_training.custom_models import ConvModelGlobalObs import ray @@ -28,15 +30,20 @@ import gin from ray import tune from ray.rllib.utils.seed import seed as set_seed -from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, LocalObsForRailEnv +from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\ + LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent + gin.external_configurable(TreeObsForRailEnv) gin.external_configurable(GlobalObsForRailEnv) gin.external_configurable(LocalObsForRailEnv) +gin.external_configurable(GlobalObsForRailEnvDirectionDependent) from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor) +ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor) +ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs) ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000) @@ -70,13 +77,16 @@ def train(config, reporter): obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) preprocessor = "tree_obs_prep" - elif isinstance(config["obs_builder"], GlobalObsForRailEnv): + elif isinstance(config["obs_builder"], GlobalObsForRailEnv) or \ + isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent): obs_space = gym.spaces.Tuple(( gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)), - gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 3)), - gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 4)), - gym.spaces.Box(low=0, high=1, shape=(4,)))) - preprocessor = "global_obs_prep" + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)), + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2)))) + if config['conv_model']: + preprocessor = "conv_obs_prep" + else: + preprocessor = "global_obs_prep" elif isinstance(config["obs_builder"], LocalObsForRailEnv): view_radius = config["obs_builder"].view_radius @@ -104,7 +114,11 @@ def train(config, reporter): # Trainer configuration trainer_config = DEFAULT_CONFIG.copy() - trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor} + if config['conv_model']: + trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor} + else: + trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor} + trainer_config['multiagent'] = {"policy_graphs": policy_graphs, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": list(policy_graphs.keys())} @@ -151,7 +165,7 @@ def train(config, reporter): @gin.configurable def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, - entropy_coeff, seed): + entropy_coeff, seed, conv_model): tune.run( train, @@ -167,7 +181,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, 'policy_folder_name': policy_folder_name, "obs_builder": obs_builder, "entropy_coeff": entropy_coeff, - "seed": seed + "seed": seed, + "conv_model": conv_model }, resources_per_trial={ "cpu": 2, @@ -179,6 +194,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/experiment_configs/observation_benchmark' # To Modify + dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify gin.parse_config_file(dir + '/config.gin') run_experiment(local_dir=dir) -- GitLab