Skip to content
Snippets Groups Projects
Commit b34e7e57 authored by gmollard's avatar gmollard
Browse files

Merge branch 'master' of gitlab.aicrowd.com:flatland/baselines

parents 546342c1 757f4faa
No related branches found
No related tags found
No related merge requests found
Examples of scripts to train agents in the Flatland environment.
It should be cloned inside the main flatland repository.
The `torch_training` folder shows an example of how to train agents with a DQN implemented in pytorch.
The `RLLib_training` folder shows an example of how to train agents with algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib>
This repository allows to run Rail Environment multi agent training with the RLLib Library. This repository allows to run Rail Environment multi agent training with the RLLib Library.
It should be clone inside the main flatland repository.
## Installation: ## Installation:
```sh ```sh
pip install ray pip install ray
pip install gin-config pip install gin-config
``` ```
To start a grid search on some parameters, you can create a folder containing a config.gin file (see example in `grid_search_configs/n_agents_grid_search/config.gin`. To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`.
Then, you can modify the config.gin file path at the end of the `grid_search_train.py` file. Then, you can modify the config.gin file path at the end of the `train_experiment.py` file.
The results will be stored inside the folder, and the learning curves can be visualized in The results will be stored inside the folder, and the learning curves can be visualized in
tensorboard: tensorboard:
``` ```
tensorboard --logdir=/path/to/foler_containing_config_gin_file tensorboard --logdir=/path/to/folder_containing_config_gin_file
``` ```
## Gin config files ## Gin config files
In each config.gin files, all the parameters, except `local_dir` of the `run_experiment` functions have to be specified. In each config.gin files, all the parameters of the `run_experiment` functions have to be specified.
For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added: For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added:
``` ```
...@@ -54,4 +52,26 @@ Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv() ...@@ -54,4 +52,26 @@ Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv()
More documentation on how to use gin-config can be found on the library github repository: https://github.com/google/gin-config More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config
## Run an example:
To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script:
```
python baselines/RLLib_training/train_experiment.py
```
This will load the gin config file in the folder `experiment_configs/config_examples`.
To visualize the result of a training, one can load a training checkpoint and use the policy learned.
This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script:
```
CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
'checkpoint_101', 'checkpoint-101')
```
and load the corresponding gin config file:
```
gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin'))
```
from ray.rllib.models import ModelCatalog, Model
from ray.rllib.models.misc import normc_initializer
import tensorflow as tf
class ConvModelGlobalObs(Model):
def _build_layers_v2(self, input_dict, num_outputs, options):
"""Define the layers of a custom model.
Arguments:
input_dict (dict): Dictionary of input tensors, including "obs",
"prev_action", "prev_reward", "is_training".
num_outputs (int): Output tensor must be of size
[BATCH_SIZE, num_outputs].
options (dict): Model options.
Returns:
(outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
and [BATCH_SIZE, desired_feature_size].
When using dict or tuple observation spaces, you can access
the nested sub-observation batches here as well:
Examples:
>>> print(input_dict)
{'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
'is_training': <tf.Tensor shape=(), dtype=bool>,
'obs': (observation, features)
"""
# Convolutional Layer #1
Relu = tf.nn.relu
BatchNormalization = tf.layers.batch_normalization
Dropout = tf.layers.dropout
Dense = tf.contrib.layers.fully_connected
map_size = int(input_dict['obs'][0].shape[0])
N_CHANNELS = 96
conv1 = Relu(self.conv2d(input_dict['obs'], N_CHANNELS, 'valid', strides=(2, 2)))
# conv2 = Relu(self.conv2d(conv1, 64, 'valid'))
# conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
conv2_flat = tf.reshape(conv1, [-1, int(N_CHANNELS * ((map_size-3 + 1)/2)**2)])
# conv4_feature = tf.concat((conv2_flat, input_dict['obs'][1]), axis=1)
s_fc1 = Relu(Dense(conv2_flat, 256))
layerN_minus_1 = Relu(Dense(s_fc1, 64))
layerN = Dense(layerN_minus_1, num_outputs)
return layerN, layerN_minus_1
def conv2d(self, x, out_channels, padding, strides=(1,1)):
return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding,
use_bias=True, strides=strides)
class LightModel(Model):
def _build_layers_v2(self, input_dict, num_outputs, options):
"""Define the layers of a custom model.
Arguments:
input_dict (dict): Dictionary of input tensors, including "obs",
"prev_action", "prev_reward", "is_training".
num_outputs (int): Output tensor must be of size
[BATCH_SIZE, num_outputs].
options (dict): Model options.
Returns:
(outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs]
and [BATCH_SIZE, desired_feature_size].
When using dict or tuple observation spaces, you can access
the nested sub-observation batches here as well:
Examples:
>>> print(input_dict)
{'prev_actions': <tf.Tensor shape=(?,) dtype=int64>,
'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>,
'is_training': <tf.Tensor shape=(), dtype=bool>,
'obs': (observation, features)
"""
# print(input_dict)
# Convolutional Layer #1
self.sess = tf.get_default_session()
Relu = tf.nn.relu
BatchNormalization = tf.layers.batch_normalization
Dropout = tf.layers.dropout
Dense = tf.contrib.layers.fully_connected
#conv1 = Relu(self.conv2d(input_dict['obs'][0], 32, 'valid'))
conv1 = Relu(self.conv2d(input_dict['obs'], 32, 'valid'))
conv2 = Relu(self.conv2d(conv1, 16, 'valid'))
# conv3 = Relu(self.conv2d(conv2, 64, 'valid'))
conv4_flat = tf.reshape(conv2, [-1, 16 * (17-2*2)**2])
#conv4_feature = tf.concat((conv4_flat, input_dict['obs'][1]), axis=1)
s_fc1 = Relu(Dense(conv4_flat, 128, weights_initializer=normc_initializer(1.0)))
# layerN_minus_1 = Relu(Dense(s_fc1, 256, use_bias=False))
layerN = Dense(s_fc1, num_outputs, weights_initializer=normc_initializer(0.01))
return layerN, s_fc1
def conv2d(self, x, out_channels, padding):
return tf.layers.conv2d(x, out_channels, kernel_size=[3, 3], padding=padding, use_bias=True)
# weights_initializer=normc_initializer(1.0))
import random
import gym
import numpy as np
import ray
import ray.rllib.agents.ppo.ppo as ppo
from RailEnvRLLibWrapper import RailEnvRLLibWrapper
from flatland.envs.generators import complex_rail_generator
from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.models import ModelCatalog
from ray.tune.logger import pretty_print
from RLLib_training.custom_preprocessors import CustomPreprocessor
ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor)
ray.init()
def train(config):
print('Init Env')
random.seed(1)
np.random.seed(1)
transition_probability = [15, # empty cell - Case 0
5, # Case 1 - straight
5, # Case 2 - simple switch
1, # Case 3 - diamond crossing
1, # Case 4 - single slip
1, # Case 5 - double slip
1, # Case 6 - symmetrical
0, # Case 7 - dead end
1, # Case 1b (8) - simple turn right
1, # Case 1c (9) - simple turn left
1] # Case 2b (10) - simple switch mirrored
# Example generate a random rail
env_config = {"width": 20,
"height": 20,
"rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
"number_of_agents": 5}
obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
act_space = gym.spaces.Discrete(4)
# Dict with the different policies to train
policy_graphs = {
"ppo_policy": (PPOPolicyGraph, obs_space, act_space, {})
}
def policy_mapping_fn(agent_id):
return f"ppo_policy"
agent_config = ppo.DEFAULT_CONFIG.copy()
agent_config['model'] = {"fcnet_hiddens": [32, 32], "custom_preprocessor": "my_prep"}
agent_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
agent_config["horizon"] = 50
agent_config["num_workers"] = 0
# agent_config["sample_batch_size"]: 1000
# agent_config["num_cpus_per_worker"] = 40
# agent_config["num_gpus"] = 2.0
# agent_config["num_gpus_per_worker"] = 2.0
# agent_config["num_cpus_for_driver"] = 5
# agent_config["num_envs_per_worker"] = 15
agent_config["env_config"] = env_config
# agent_config["batch_mode"] = "complete_episodes"
ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config)
for i in range(100000 + 2):
print("== Iteration", i, "==")
print("-- PPO --")
print(pretty_print(ppo_trainer.train()))
# if i % config['save_every'] == 0:
# checkpoint = ppo_trainer.save()
# print("checkpoint saved at", checkpoint)
train({})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment