diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..63972a8bcb2fba2edbb88a401b200e33374d4c23 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +Examples of scripts to train agents in the Flatland environment. + +It should be cloned inside the main flatland repository. + +The `torch_training` folder shows an example of how to train agents with a DQN implemented in pytorch. + +The `RLLib_training` folder shows an example of how to train agents with algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib> + diff --git a/RLLib_training/README.md b/RLLib_training/README.md index 63afbd4c35e0cd03683920d784048914f5b30c04..21665a54eea1595b1679ebd587392cb1d5725eea 100644 --- a/RLLib_training/README.md +++ b/RLLib_training/README.md @@ -1,27 +1,25 @@ This repository allows to run Rail Environment multi agent training with the RLLib Library. -It should be clone inside the main flatland repository. - ## Installation: ```sh pip install ray pip install gin-config ``` -To start a grid search on some parameters, you can create a folder containing a config.gin file (see example in `grid_search_configs/n_agents_grid_search/config.gin`. +To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`. -Then, you can modify the config.gin file path at the end of the `grid_search_train.py` file. +Then, you can modify the config.gin file path at the end of the `train_experiment.py` file. The results will be stored inside the folder, and the learning curves can be visualized in tensorboard: ``` -tensorboard --logdir=/path/to/foler_containing_config_gin_file +tensorboard --logdir=/path/to/folder_containing_config_gin_file ``` ## Gin config files -In each config.gin files, all the parameters, except `local_dir` of the `run_experiment` functions have to be specified. +In each config.gin files, all the parameters of the `run_experiment` functions have to be specified. For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added: ``` @@ -54,4 +52,26 @@ Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv() -More documentation on how to use gin-config can be found on the library github repository: https://github.com/google/gin-config +More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config + +## Run an example: +To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script: +``` +python baselines/RLLib_training/train_experiment.py +``` +This will load the gin config file in the folder `experiment_configs/config_examples`. + +To visualize the result of a training, one can load a training checkpoint and use the policy learned. +This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script: + +``` +CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7', + 'checkpoint_101', 'checkpoint-101') +``` +and load the corresponding gin config file: + +``` +gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin')) +``` + + diff --git a/RLLib_training/custom_models.py b/RLLib_training/custom_models.py deleted file mode 100644 index 81f5223393833986082727feee58699aaa9c60d5..0000000000000000000000000000000000000000 --- a/RLLib_training/custom_models.py +++ /dev/null @@ -1,101 +0,0 @@ -from ray.rllib.models import ModelCatalog, Model -from ray.rllib.models.misc import normc_initializer - -import tensorflow as tf - - -class ConvModelGlobalObs(Model): - def _build_layers_v2(self, input_dict, num_outputs, options): - """Define the layers of a custom model. - Arguments: - input_dict (dict): Dictionary of input tensors, including "obs", - "prev_action", "prev_reward", "is_training". - num_outputs (int): Output tensor must be of size - [BATCH_SIZE, num_outputs]. - options (dict): Model options. - Returns: - (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] - and [BATCH_SIZE, desired_feature_size]. - When using dict or tuple observation spaces, you can access - the nested sub-observation batches here as well: - Examples: - >>> print(input_dict) - {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>, - 'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>, - 'is_training': <tf.Tensor shape=(), dtype=bool>, - 'obs': (observation, features) - """ - # Convolutional Layer #1 - - Relu = tf.nn.relu - BatchNormalization = tf.layers.batch_normalization - Dropout = tf.layers.dropout - Dense = tf.contrib.layers.fully_connected - - map_size = int(input_dict['obs'][0].shape[0]) - - N_CHANNELS = 96 - - conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2))) - - # conv2 = Relu(self.conv2d(conv1, 64, 'valid')) - - # conv3 = Relu(self.conv2d(conv2, 64, 'valid')) - - conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)]) - # conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1) - s_fc1 = Relu(Dense(conv2_flat, 256)) - layerN_minus_1 = Relu(Dense(s_fc1, 64)) - layerN = Dense(layerN_minus_1, num_outputs) - return layerN, layerN_minus_1 - - def conv2d(self, x, out_channels, padding, strides=(1,1)): - return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, - use_bias=True, strides=strides) - - -class LightModel(Model): - def _build_layers_v2(self, input_dict, num_outputs, options): - """Define the layers of a custom model. - Arguments: - input_dict (dict): Dictionary of input tensors, including "obs", - "prev_action", "prev_reward", "is_training". - num_outputs (int): Output tensor must be of size - [BATCH_SIZE, num_outputs]. - options (dict): Model options. - Returns: - (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] - and [BATCH_SIZE, desired_feature_size]. - When using dict or tuple observation spaces, you can access - the nested sub-observation batches here as well: - Examples: - >>> print(input_dict) - {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>, - 'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>, - 'is_training': <tf.Tensor shape=(), dtype=bool>, - 'obs': (observation, features) - """ - # print(input_dict) - # Convolutional Layer #1 - self.sess = tf.get_default_session() - Relu = tf.nn.relu - BatchNormalization = tf.layers.batch_normalization - Dropout = tf.layers.dropout - Dense = tf.contrib.layers.fully_connected - - #conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid')) - conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid')) - conv2 = Relu(self.conv2d(conv1, 16, 'valid')) - - # conv3 = Relu(self.conv2d(conv2, 64, 'valid')) - - conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2]) - #conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1) - s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0))) - # layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False)) - layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01)) - return layerN, s_fc1 - - def conv2d(self, x, out_channels, padding): - return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True) - # weights_initializer=normc_initializer(1.0)) diff --git a/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py b/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/RLLib_training/train.py b/RLLib_training/train.py deleted file mode 100644 index ba5f4eab43f5173dd410bc6d9b306d90e0e21ffc..0000000000000000000000000000000000000000 --- a/RLLib_training/train.py +++ /dev/null @@ -1,83 +0,0 @@ -import random - -import gym -import numpy as np -import ray -import ray.rllib.agents.ppo.ppo as ppo -from RailEnvRLLibWrapper import RailEnvRLLibWrapper -from flatland.envs.generators import complex_rail_generator -from ray.rllib.agents.ppo.ppo import PPOTrainer -from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph -from ray.rllib.models import ModelCatalog -from ray.tune.logger import pretty_print - -from RLLib_training.custom_preprocessors import CustomPreprocessor - -ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor) -ray.init() - - -def train(config): - print('Init Env') - random.seed(1) - np.random.seed(1) - - transition_probability = [15, # empty cell - Case 0 - 5, # Case 1 - straight - 5, # Case 2 - simple switch - 1, # Case 3 - diamond crossing - 1, # Case 4 - single slip - 1, # Case 5 - double slip - 1, # Case 6 - symmetrical - 0, # Case 7 - dead end - 1, # Case 1b (8) - simple turn right - 1, # Case 1c (9) - simple turn left - 1] # Case 2b (10) - simple switch mirrored - - # Example generate a random rail - env_config = {"width": 20, - "height": 20, - "rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0), - "number_of_agents": 5} - - obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) - act_space = gym.spaces.Discrete(4) - - # Dict with the different policies to train - policy_graphs = { - "ppo_policy": (PPOPolicyGraph, obs_space, act_space, {}) - } - - def policy_mapping_fn(agent_id): - return f"ppo_policy" - - agent_config = ppo.DEFAULT_CONFIG.copy() - agent_config['model'] = {"fcnet_hiddens": [32, 32], "custom_preprocessor": "my_prep"} - agent_config['multiagent'] = {"policy_graphs": policy_graphs, - "policy_mapping_fn": policy_mapping_fn, - "policies_to_train": list(policy_graphs.keys())} - agent_config["horizon"] = 50 - agent_config["num_workers"] = 0 - # agent_config["sample_batch_size"]: 1000 - # agent_config["num_cpus_per_worker"] = 40 - # agent_config["num_gpus"] = 2.0 - # agent_config["num_gpus_per_worker"] = 2.0 - # agent_config["num_cpus_for_driver"] = 5 - # agent_config["num_envs_per_worker"] = 15 - agent_config["env_config"] = env_config - # agent_config["batch_mode"] = "complete_episodes" - - ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config) - - for i in range(100000 + 2): - print("== Iteration", i, "==") - - print("-- PPO --") - print(pretty_print(ppo_trainer.train())) - - # if i % config['save_every'] == 0: - # checkpoint = ppo_trainer.save() - # print("checkpoint saved at", checkpoint) - - -train({})