Skip to content
Snippets Groups Projects
Commit 43686bfe authored by u229589's avatar u229589
Browse files

remove RLLib

parent 81513d7b
No related branches found
No related tags found
1 merge request!7remove unused observation and prediction files
...@@ -4,7 +4,6 @@ include HISTORY.rst ...@@ -4,7 +4,6 @@ include HISTORY.rst
include LICENSE include LICENSE
include README.rst include README.rst
include requirements_torch_training.txt include requirements_torch_training.txt
include requirements_RLLib_training.txt
......
...@@ -15,9 +15,6 @@ With the above introductions you will solve tasks like these and even more... ...@@ -15,9 +15,6 @@ With the above introductions you will solve tasks like these and even more...
![Conflict_Avoidance](https://i.imgur.com/AvBHKaD.gif) ![Conflict_Avoidance](https://i.imgur.com/AvBHKaD.gif)
# RLLib Training
The `RLLib_training` folder shows an example of how to train agents with algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib>
# Sequential Agent # Sequential Agent
This is a very simple baseline to show you have the `complex_level_generator` generates feasible network configurations. This is a very simple baseline to show you have the `complex_level_generator` generates feasible network configurations.
If you run the `run_test.py` file you will see a simple agent that solves the level by sequentially running each agent along its shortest path. If you run the `run_test.py` file you will see a simple agent that solves the level by sequentially running each agent along its shortest path.
......
This repository allows to run Rail Environment multi agent training with the RLLib Library.
## Installation:
To run scripts of this repository, the deep learning library tensorflow should be installed, along with the following packages:
```sh
pip install gym ray==0.7.0 gin-config opencv-python lz4 psutil
```
To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`.
Then, you can modify the config.gin file path at the end of the `train_experiment.py` file.
The results will be stored inside the folder, and the learning curves can be visualized in
tensorboard:
```
tensorboard --logdir=/path/to/folder_containing_config_gin_file
```
## Gin config files
In each config.gin files, all the parameters of the `run_experiment` functions have to be specified.
For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added:
```
run_experiment.n_agents = 2
```
If several number of agents have to be explored during the experiment, one can pass the following value to the `n_agents` parameter:
```
run_experiment.n_agents = {"grid_search": [2,5]}
```
which is the way to indicate to the tune library to experiment several values for a parameter.
To reference a class or an object within gin, you should first register it from the `train_experiment.py` script adding the following line:
```
gin.external_configurable(TreeObsForRailEnv)
```
and then a `TreeObsForRailEnv` object can be referenced in the `config.gin` file:
```
run_experiment.obs_builder = {"grid_search": [@TreeObsForRailEnv(), @GlobalObsForRailEnv()]}
TreeObsForRailEnv.max_depth = 2
```
Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv()` references instantiates an object of this class.
More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config
## Run an example:
To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script:
```
python RLLib_training/train_experiment.py
```
This will load the gin config file in the folder `experiment_configs/config_examples`.
To visualize the result of a training, one can load a training checkpoint and use the policy learned.
This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script:
```
CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
'checkpoint_101', 'checkpoint-101')
```
and load the corresponding gin config file:
```
gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin'))
```
import numpy as np
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator, random_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator, random_schedule_generator
class RailEnvRLLibWrapper(MultiAgentEnv):
def __init__(self, config):
super(MultiAgentEnv, self).__init__()
# Environment ID if num_envs_per_worker > 1
if hasattr(config, "vector_index"):
vector_index = config.vector_index
else:
vector_index = 1
self.predefined_env = False
if config['rail_generator'] == "complex_rail_generator":
self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'],
min_dist=config['min_dist'],
nr_extra=config['nr_extra'],
seed=config['seed'] * (1 + vector_index))
self.schedule_generator = complex_schedule_generator()
elif config['rail_generator'] == "random_rail_generator":
self.rail_generator = random_rail_generator()
self.schedule_generator = random_schedule_generator()
elif config['rail_generator'] == "load_env":
self.predefined_env = True
self.rail_generator = random_rail_generator()
self.schedule_generator = random_schedule_generator()
else:
raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}')
set_seed(config['seed'] * (1 + vector_index))
self.env = RailEnv(width=config["width"], height=config["height"],
number_of_agents=config["number_of_agents"],
obs_builder_object=config['obs_builder'],
rail_generator=self.rail_generator,
schedule_generator=self.schedule_generator
)
if self.predefined_env:
self.env.load_resource('torch_training.railway', 'complex_scene.pkl')
self.width = self.env.width
self.height = self.env.height
self.step_memory = config["step_memory"]
# needed for the renderer
self.rail = self.env.rail
self.agents = self.env.agents
self.agents_static = self.env.agents_static
self.dev_obs_dict = self.env.dev_obs_dict
def reset(self):
self.agents_done = []
if self.predefined_env:
obs = self.env.reset(False, False)
else:
obs = self.env.reset()
# RLLib only receives observation of agents that are not done.
o = dict()
for i_agent in range(len(self.env.agents)):
data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
current_depth=0)
o[i_agent] = [data, distance, agent_data]
# needed for the renderer
self.rail = self.env.rail
self.agents = self.env.agents
self.agents_static = self.env.agents_static
self.dev_obs_dict = self.env.dev_obs_dict
# If step_memory > 1, we need to concatenate it the observations in memory, only works for
# step_memory = 1 or 2 for the moment
if self.step_memory < 2:
return o
else:
self.old_obs = o
oo = dict()
for i_agent in range(len(self.env.agents)):
oo[i_agent] = [o[i_agent], o[i_agent]]
return oo
def step(self, action_dict):
obs, rewards, dones, infos = self.env.step(action_dict)
d = dict()
r = dict()
o = dict()
for i_agent in range(len(self.env.agents)):
if i_agent not in self.agents_done:
data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
current_depth=0)
o[i_agent] = [data, distance, agent_data]
r[i_agent] = rewards[i_agent]
d[i_agent] = dones[i_agent]
d['__all__'] = dones['__all__']
if self.step_memory >= 2:
oo = dict()
for i_agent in range(len(self.env.agents)):
if i_agent not in self.agents_done:
oo[i_agent] = [o[i_agent], self.old_obs[i_agent]]
self.old_obs = o
for agent, done in dones.items():
if done and agent != '__all__':
self.agents_done.append(agent)
if self.step_memory < 2:
return o, r, d, infos
else:
return oo, r, d, infos
def get_agent_handles(self):
return self.env.get_agent_handles()
def get_num_agents(self):
return self.env.get_num_agents()
import numpy as np
from ray.rllib.models.preprocessors import Preprocessor
from utils.observation_utils import norm_obs_clip
class TreeObsPreprocessor(Preprocessor):
def _init_shape(self, obs_space, options):
print(options)
self.step_memory = options["custom_options"]["step_memory"]
return sum([space.shape[0] for space in obs_space]),
def transform(self, observation):
if self.step_memory == 2:
data = norm_obs_clip(observation[0][0])
distance = norm_obs_clip(observation[0][1])
agent_data = np.clip(observation[0][2], -1, 1)
data2 = norm_obs_clip(observation[1][0])
distance2 = norm_obs_clip(observation[1][1])
agent_data2 = np.clip(observation[1][2], -1, 1)
else:
data = norm_obs_clip(observation[0])
distance = norm_obs_clip(observation[1])
agent_data = np.clip(observation[2], -1, 1)
return np.concatenate((np.concatenate((np.concatenate((data, distance)), agent_data)), np.concatenate((np.concatenate((data2, distance2)), agent_data2))))
run_experiment.name = "experiment_example"
run_experiment.num_iterations = 1002
run_experiment.save_every = 100
run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20
run_experiment.map_height = 20
run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]}
run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
run_experiment.nr_extra = 5
run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
run_experiment.seed = 123
run_experiment.conv_model = False
run_experiment.obs_builder = @TreeObsForRailEnv()
TreeObsForRailEnv.predictor = @ShortestPathPredictorForRailEnv()
TreeObsForRailEnv.max_depth = 2
run_experiment.entropy_coeff = 0.001
run_experiment.kl_coeff = 0.2
run_experiment.lambda_gae = 0.9
run_experiment.step_memory = 2
run_experiment.min_dist = 10
from RailEnvRLLibWrapper import RailEnvRLLibWrapper
from custom_preprocessors import TreeObsPreprocessor
import gym
import os
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
from ray.rllib.models import ModelCatalog
import ray
import numpy as np
import gin
from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv
gin.external_configurable(DummyPredictorForRailEnv)
gin.external_configurable(ShortestPathPredictorForRailEnv)
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.observations import TreeObsForRailEnv
from flatland.utils.rendertools import RenderTool
import time
gin.external_configurable(TreeObsForRailEnv)
ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor)
ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000)
__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
'checkpoint_101', 'checkpoint-101') # To Modify
N_EPISODES = 10
N_STEPS_PER_EPISODE = 50
def render_training_result(config):
print('Init Env')
set_seed(config['seed'], config['seed'], config['seed'])
# Example configuration to generate a random rail
env_config = {"width": config['map_width'],
"height": config['map_height'],
"rail_generator": config["rail_generator"],
"nr_extra": config["nr_extra"],
"number_of_agents": config['n_agents'],
"seed": config['seed'],
"obs_builder": config['obs_builder'],
"min_dist": config['min_dist'],
"step_memory": config["step_memory"]}
# Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2)
preprocessor = TreeObsPreprocessor
else:
raise ValueError("Undefined observation space")
act_space = gym.spaces.Discrete(5)
# Dict with the different policies to train
policy_graphs = {
"ppo_policy": (PolicyGraph, obs_space, act_space, {})
}
def policy_mapping_fn(agent_id):
return "ppo_policy"
# Trainer configuration
trainer_config = DEFAULT_CONFIG.copy()
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes']}
trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 4
trainer_config["num_gpus"] = 0.2
trainer_config["num_gpus_per_worker"] = 0.2
trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1
trainer_config['entropy_coeff'] = config['entropy_coeff']
trainer_config["env_config"] = env_config
trainer_config["batch_mode"] = "complete_episodes"
trainer_config['simple_optimizer'] = False
trainer_config['postprocess_inputs'] = True
trainer_config['log_level'] = 'WARN'
trainer_config['num_sgd_iter'] = 10
trainer_config['clip_param'] = 0.2
trainer_config['kl_coeff'] = config['kl_coeff']
trainer_config['lambda'] = config['lambda_gae']
env = RailEnvRLLibWrapper(env_config)
trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config)
trainer.restore(CHECKPOINT_PATH)
policy = trainer.get_policy("ppo_policy")
preprocessor = preprocessor(obs_space, {"step_memory": config["step_memory"]})
env_renderer = RenderTool(env, gl="PILSVG")
for episode in range(N_EPISODES):
observation = env.reset()
for i in range(N_STEPS_PER_EPISODE):
preprocessed_obs = []
for obs in observation.values():
preprocessed_obs.append(preprocessor.transform(obs))
action, _, infos = policy.compute_actions(preprocessed_obs, [])
logits = infos['behaviour_logits']
actions = dict()
# We select the greedy action.
for j, logit in enumerate(logits):
actions[j] = np.argmax(logit)
# In case we prefer to sample an action stochastically according to the policy graph.
# for j, act in enumerate(action):
# actions[j] = act
# Time to see the rendering at one step
time.sleep(1)
env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i,
action_dict=list(actions.values()))
observation, _, _, _ = env.step(actions)
env_renderer.close_window()
@gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, policy_folder_name, obs_builder,
entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
step_memory, min_dist):
render_training_result(
config={"n_agents": n_agents,
"hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers
"save_every": save_every,
"map_width": map_width,
"map_height": map_height,
'policy_folder_name': policy_folder_name,
"obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed,
"conv_model": conv_model,
"rail_generator": rail_generator,
"nr_extra": nr_extra,
"kl_coeff": kl_coeff,
"lambda_gae": lambda_gae,
"min_dist": min_dist,
"step_memory": step_memory
}
)
if __name__ == '__main__':
gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin')) # To Modify
run_experiment()
import os
import gin
import gym
from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv
# Import PPO trainer: we can replace these imports by any other trainer from RLLib.
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
from ray.rllib.models import ModelCatalog
gin.external_configurable(DummyPredictorForRailEnv)
gin.external_configurable(ShortestPathPredictorForRailEnv)
import ray
from ray.tune.logger import UnifiedLogger
from ray.tune.logger import pretty_print
import os
from RailEnvRLLibWrapper import RailEnvRLLibWrapper
import tempfile
from ray import tune
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.observations import TreeObsForRailEnv
gin.external_configurable(TreeObsForRailEnv)
import numpy as np
from custom_preprocessors import TreeObsPreprocessor
ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor)
ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000)
__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
def on_episode_start(info):
episode = info['episode']
map_width = info['env'].envs[0].width
map_height = info['env'].envs[0].height
episode.horizon = 3*(map_width + map_height)
def on_episode_end(info):
episode = info['episode']
# Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents
# and the number of the maximum time steps of the episode.
score = 0
for k, v in episode._agent_reward_history.items():
score += np.sum(v)
score /= (len(episode._agent_reward_history) * episode.horizon)
# Calculation of the proportion of solved episodes before the maximum time step
done = 0
if len(episode._agent_reward_history[0]) <= episode.horizon-5:
done = 1
episode.custom_metrics["score"] = score
episode.custom_metrics["proportion_episode_solved"] = done
def train(config, reporter):
print('Init Env')
set_seed(config['seed'], config['seed'], config['seed'])
# Given the depth of the tree observation and the number of features per node we get the following state_size
num_features_per_node = config['obs_builder'].observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
obs_size = num_features_per_node * nr_nodes
# Environment parameters
env_config = {"width": config['map_width'],
"height": config['map_height'],
"rail_generator": config["rail_generator"],
"nr_extra": config["nr_extra"],
"number_of_agents": config['n_agents'],
"seed": config['seed'],
"obs_builder": config['obs_builder'],
"min_dist": config['min_dist'],
"step_memory": config["step_memory"]}
# Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2)
preprocessor = "tree_obs_prep"
else:
raise ValueError("Undefined observation space") # Only TreeObservation implemented for now.
act_space = gym.spaces.Discrete(5)
# Dict with the different policies to train. In this case, all trains follow the same policy
policy_graphs = {
"ppo_policy": (PolicyGraph, obs_space, act_space, {})
}
# Function that maps an agent id to the name of its respective policy.
def policy_mapping_fn(agent_id):
return "ppo_policy"
# Trainer configuration
trainer_config = DEFAULT_CONFIG.copy()
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
"custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}}
trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
# Maximum time steps for an episode is set to 3*map_width*map_height
trainer_config["horizon"] = 3 * (config['map_width'] + config['map_height'])
# Parameters for calculation parallelization
trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 8
trainer_config["num_gpus"] = 0.2
trainer_config["num_gpus_per_worker"] = 0.2
trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1
# Parameters for PPO training
trainer_config['entropy_coeff'] = config['entropy_coeff']
trainer_config["env_config"] = env_config
trainer_config["batch_mode"] = "complete_episodes"
trainer_config['simple_optimizer'] = False
trainer_config['log_level'] = 'WARN'
trainer_config['num_sgd_iter'] = 10
trainer_config['clip_param'] = 0.2
trainer_config['kl_coeff'] = config['kl_coeff']
trainer_config['lambda'] = config['lambda_gae']
trainer_config['callbacks'] = {
"on_episode_start": tune.function(on_episode_start),
"on_episode_end": tune.function(on_episode_end)
}
def logger_creator(conf):
"""Creates a Unified logger with a default logdir prefix."""
logdir = config['policy_folder_name'].format(**locals())
logdir = tempfile.mkdtemp(
prefix=logdir, dir=config['local_dir'])
return UnifiedLogger(conf, logdir, None)
logger = logger_creator
trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config, logger_creator=logger)
for i in range(100000 + 2):
print("== Iteration", i, "==")
print(pretty_print(trainer.train()))
if i % config['save_every'] == 0:
checkpoint = trainer.save()
print("checkpoint saved at", checkpoint)
reporter(num_iterations_trained=trainer._iteration)
@gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
step_memory, min_dist):
tune.run(
train,
name=name,
stop={"num_iterations_trained": num_iterations},
config={"n_agents": n_agents,
"hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers
"save_every": save_every,
"map_width": map_width,
"map_height": map_height,
"local_dir": local_dir,
'policy_folder_name': policy_folder_name,
"obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed,
"conv_model": conv_model,
"rail_generator": rail_generator,
"nr_extra": nr_extra,
"kl_coeff": kl_coeff,
"lambda_gae": lambda_gae,
"min_dist": min_dist,
"step_memory": step_memory # If equal to two, the current observation plus
# the observation of last time step will be given as input the the model.
},
resources_per_trial={
"cpu": 8,
"gpu": 0.2
},
verbose=2,
local_dir=local_dir
)
if __name__ == '__main__':
folder_name = 'config_example' # To Modify
gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', folder_name, 'config.gin'))
dir = os.path.join(__file_dirname__, 'experiment_configs', folder_name)
run_experiment(local_dir=dir)
...@@ -2,8 +2,7 @@ from setuptools import setup, find_packages ...@@ -2,8 +2,7 @@ from setuptools import setup, find_packages
install_reqs = [] install_reqs = []
dependency_links = [] dependency_links = []
# TODO: include requirements_RLLib_training.txt requirements_paths = ['requirements_torch_training.txt']
requirements_paths = ['requirements_torch_training.txt'] # , 'requirements_RLLib_training.txt']
for requirements_path in requirements_paths: for requirements_path in requirements_paths:
with open(requirements_path, 'r') as f: with open(requirements_path, 'r') as f:
install_reqs += [ install_reqs += [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment