Skip to content
Snippets Groups Projects
Commit 088c5f16 authored by gmollard's avatar gmollard
Browse files

Convolutional model and preprocessor added

parent 55e6bd63
No related branches found
No related tags found
No related merge requests found
from ray.rllib.models import ModelCatalog, Model
from ray.rllib.models.misc import normc_initializer
import tensorflow as tf
class ConvModelGlobalObs(Model):
def _build_layers_v2(self, input_dict, num_outputs, options):
"""Define the layers of a custom model.
Arguments:
input_dict (dict): Dictionary of input tensors, including "obs",
"prev_action", "prev_reward", "is_training".
num_outputs (int): Output tensor must be of size
[BATCH_SIZE, num_outputs].
options (dict): Model options.
Returns:
(outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
and [BATCH_SIZE, desired_feature_size].
When using dict or tuple observation spaces, you can access
the nested sub-observation batches here as well:
Examples:
>>> print(input_dict)
{'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
'is_training': <tf.Tensor shape=(), dtype=bool>,
'obs': (observation, features)
"""
# Convolutional Layer #1
Relu = tf.nn.relu
BatchNormalization = tf.layers.batch_normalization
Dropout = tf.layers.dropout
Dense = tf.contrib.layers.fully_connected
map_size = int(input_dict['obs'][0].shape[0])
N_CHANNELS = 96
conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2)))
# conv2 = Relu(self.conv2d(conv1, 64, 'valid'))
# conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)])
# conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1)
s_fc1 = Relu(Dense(conv2_flat, 256))
layerN_minus_1 = Relu(Dense(s_fc1, 64))
layerN = Dense(layerN_minus_1, num_outputs)
return layerN, layerN_minus_1
def conv2d(self, x, out_channels, padding, strides=(1,1)):
return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding,
use_bias=True, strides=strides)
class LightModel(Model):
def _build_layers_v2(self, input_dict, num_outputs, options):
"""Define the layers of a custom model.
Arguments:
input_dict (dict): Dictionary of input tensors, including "obs",
"prev_action", "prev_reward", "is_training".
num_outputs (int): Output tensor must be of size
[BATCH_SIZE, num_outputs].
options (dict): Model options.
Returns:
(outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
and [BATCH_SIZE, desired_feature_size].
When using dict or tuple observation spaces, you can access
the nested sub-observation batches here as well:
Examples:
>>> print(input_dict)
{'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
'is_training': <tf.Tensor shape=(), dtype=bool>,
'obs': (observation, features)
"""
# print(input_dict)
# Convolutional Layer #1
self.sess = tf.get_default_session()
Relu = tf.nn.relu
BatchNormalization = tf.layers.batch_normalization
Dropout = tf.layers.dropout
Dense = tf.contrib.layers.fully_connected
#conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid'))
conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid'))
conv2 = Relu(self.conv2d(conv1, 16, 'valid'))
# conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2])
#conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1)
s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0)))
# layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False))
layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01))
return layerN, s_fc1
def conv2d(self, x, out_channels, padding):
return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True)
# weights_initializer=normc_initializer(1.0))
......@@ -59,6 +59,16 @@ class CustomPreprocessor(Preprocessor):
return observation
class ConvModelPreprocessor(Preprocessor):
def _init_shape(self, obs_space, options):
out_shape = (obs_space[0].shape[0], obs_space[0].shape[1], sum([space.shape[2] for space in obs_space]))
return out_shape
def transform(self, observation):
return np.concatenate([observation[0],
observation[1],
observation[2]], axis=2)
# class NoPreprocessor:
......
run_experiment.name = "observation_benchmark_results"
run_experiment.num_iterations = 1002
run_experiment.save_every = 100
run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20
run_experiment.map_height = 20
run_experiment.n_agents = 5
run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_{config[n_agents]}_agents_conv_model_{config[conv_model]}_"
run_experiment.horizon = 50
run_experiment.seed = 123
run_experiment.conv_model = {"grid_search": [True, False]}
run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]}
TreeObsForRailEnv.max_depth = 2
LocalObsForRailEnv.view_radius = 5
run_experiment.entropy_coeff = 0.01
from baselines.RailEnvRLLibWrapper import RailEnvRLLibWrapper
from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper
import gym
......@@ -14,7 +14,9 @@ from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
from ray.rllib.models import ModelCatalog
from ray.tune.logger import pretty_print
from baselines.CustomPreprocessor import CustomPreprocessor
from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
from baselines.RLLib_training.custom_models import ConvModelGlobalObs
import ray
......@@ -28,15 +30,20 @@ import gin
from ray import tune
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, LocalObsForRailEnv
from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\
LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
gin.external_configurable(TreeObsForRailEnv)
gin.external_configurable(GlobalObsForRailEnv)
gin.external_configurable(LocalObsForRailEnv)
gin.external_configurable(GlobalObsForRailEnvDirectionDependent)
from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor
ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor)
ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000)
......@@ -70,13 +77,16 @@ def train(config, reporter):
obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
preprocessor = "tree_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
elif isinstance(config["obs_builder"], GlobalObsForRailEnv) or \
isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent):
obs_space = gym.spaces.Tuple((
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 3)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 4)),
gym.spaces.Box(low=0, high=1, shape=(4,))))
preprocessor = "global_obs_prep"
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
if config['conv_model']:
preprocessor = "conv_obs_prep"
else:
preprocessor = "global_obs_prep"
elif isinstance(config["obs_builder"], LocalObsForRailEnv):
view_radius = config["obs_builder"].view_radius
......@@ -104,7 +114,11 @@ def train(config, reporter):
# Trainer configuration
trainer_config = DEFAULT_CONFIG.copy()
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
if config['conv_model']:
trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor}
else:
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
......@@ -151,7 +165,7 @@ def train(config, reporter):
@gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed):
entropy_coeff, seed, conv_model):
tune.run(
train,
......@@ -167,7 +181,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
'policy_folder_name': policy_folder_name,
"obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed
"seed": seed,
"conv_model": conv_model
},
resources_per_trial={
"cpu": 2,
......@@ -179,6 +194,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
if __name__ == '__main__':
gin.external_configurable(tune.grid_search)
dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/experiment_configs/observation_benchmark' # To Modify
dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify
gin.parse_config_file(dir + '/config.gin')
run_experiment(local_dir=dir)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment