Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • jack_bruck/baselines
  • rivesunder/baselines
  • xzhaoma/baselines
  • giulia_cantini/baselines
  • sfwatergit/baselines
  • jiaodaxiaozi/baselines
  • flatland/baselines
7 results
Show changes
Showing
with 306 additions and 332 deletions
import random
import gym
import numpy as np
import ray
import ray.rllib.agents.ppo.ppo as ppo
from RailEnvRLLibWrapper import RailEnvRLLibWrapper
from flatland.envs.generators import complex_rail_generator
from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.models import ModelCatalog
from ray.tune.logger import pretty_print
from RLLib_training.custom_preprocessors import CustomPreprocessor
ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor)
ray.init()
def train(config):
print('Init Env')
random.seed(1)
np.random.seed(1)
transition_probability = [15, # empty cell - Case 0
5, # Case 1 - straight
5, # Case 2 - simple switch
1, # Case 3 - diamond crossing
1, # Case 4 - single slip
1, # Case 5 - double slip
1, # Case 6 - symmetrical
0, # Case 7 - dead end
1, # Case 1b (8) - simple turn right
1, # Case 1c (9) - simple turn left
1] # Case 2b (10) - simple switch mirrored
# Example generate a random rail
env_config = {"width": 20,
"height": 20,
"rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
"number_of_agents": 5}
obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
act_space = gym.spaces.Discrete(4)
# Dict with the different policies to train
policy_graphs = {
"ppo_policy": (PPOPolicyGraph, obs_space, act_space, {})
}
def policy_mapping_fn(agent_id):
return f"ppo_policy"
agent_config = ppo.DEFAULT_CONFIG.copy()
agent_config['model'] = {"fcnet_hiddens": [32, 32], "custom_preprocessor": "my_prep"}
agent_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
agent_config["horizon"] = 50
agent_config["num_workers"] = 0
# agent_config["sample_batch_size"]: 1000
# agent_config["num_cpus_per_worker"] = 40
# agent_config["num_gpus"] = 2.0
# agent_config["num_gpus_per_worker"] = 2.0
# agent_config["num_cpus_for_driver"] = 5
# agent_config["num_envs_per_worker"] = 15
agent_config["env_config"] = env_config
# agent_config["batch_mode"] = "complete_episodes"
ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config)
for i in range(100000 + 2):
print("== Iteration", i, "==")
print("-- PPO --")
print(pretty_print(ppo_trainer.train()))
# if i % config['save_every'] == 0:
# checkpoint = ppo_trainer.save()
# print("checkpoint saved at", checkpoint)
train({})
import os
import gin
import gym
from flatland.envs.predictions import DummyPredictorForRailEnv
from importlib_resources import path
# Import PPO trainer: we can replace these imports by any other trainer from RLLib.
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
from ray.rllib.models import ModelCatalog
gin.external_configurable(DummyPredictorForRailEnv)
import ray
from ray.tune.logger import UnifiedLogger
from ray.tune.logger import pretty_print
from RailEnvRLLibWrapper import RailEnvRLLibWrapper
from custom_models import ConvModelGlobalObs
from custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
import tempfile
from ray import tune
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, \
LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
gin.external_configurable(TreeObsForRailEnv)
gin.external_configurable(GlobalObsForRailEnv)
gin.external_configurable(LocalObsForRailEnv)
gin.external_configurable(GlobalObsForRailEnvDirectionDependent)
from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor
import numpy as np
ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor)
ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000)
__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
def on_episode_start(info):
episode = info['episode']
map_width = info['env'].envs[0].width
map_height = info['env'].envs[0].height
episode.horizon = map_width + map_height
# def on_episode_step(info):
# episode = info['episode']
# print('#########################', episode._agent_reward_history)
# # print(ds)
def on_episode_end(info):
episode = info['episode']
score = 0
for k, v in episode._agent_reward_history.items():
score += np.sum(v)
score /= (len(episode._agent_reward_history) * 3 * episode.horizon)
episode.custom_metrics["score"] = score
def train(config, reporter):
print('Init Env')
set_seed(config['seed'], config['seed'], config['seed'])
config['map_height'] = config['map_width']
# Example configuration to generate a random rail
env_config = {"width": config['map_width'],
"height": config['map_height'],
"rail_generator": config["rail_generator"],
"nr_extra": config["nr_extra"],
"number_of_agents": config['n_agents'],
"seed": config['seed'],
"obs_builder": config['obs_builder'],
"min_dist": config['min_dist'],
# "predictor": config["predictor"],
"step_memory": config["step_memory"]}
# Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)), ))
# gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
# gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
# 'step_memory'])
# if config['predictor'] is None:
# obs_space = gym.spaces.Tuple(
# (gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory'])
# else:
# obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),
# gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
# gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
# 'step_memory'])
preprocessor = "tree_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
obs_space = gym.spaces.Tuple((
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
if config['conv_model']:
preprocessor = "conv_obs_prep"
else:
preprocessor = "global_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent):
obs_space = gym.spaces.Tuple((
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 5)),
gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
if config['conv_model']:
preprocessor = "conv_obs_prep"
else:
preprocessor = "global_obs_prep"
elif isinstance(config["obs_builder"], LocalObsForRailEnv):
view_radius = config["obs_builder"].view_radius
obs_space = gym.spaces.Tuple((
gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 16)),
gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 2)),
gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 4)),
gym.spaces.Box(low=0, high=1, shape=(4,))))
preprocessor = "global_obs_prep"
else:
raise ValueError("Undefined observation space")
act_space = gym.spaces.Discrete(5)
# Dict with the different policies to train
policy_graphs = {
config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {})
}
def policy_mapping_fn(agent_id):
return config['policy_folder_name'].format(**locals())
# Trainer configuration
trainer_config = DEFAULT_CONFIG.copy()
if config['conv_model']:
trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor}
else:
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())}
trainer_config["horizon"] = 1.5 * (config['map_width'] + config['map_height'])#config['horizon']
trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 7
trainer_config["num_gpus"] = 0.0
trainer_config["num_gpus_per_worker"] = 0.0
trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1
trainer_config['entropy_coeff'] = config['entropy_coeff']
trainer_config["env_config"] = env_config
trainer_config["batch_mode"] = "complete_episodes"
trainer_config['simple_optimizer'] = False
trainer_config['postprocess_inputs'] = True
trainer_config['log_level'] = 'WARN'
trainer_config['num_sgd_iter'] = 10
trainer_config['clip_param'] = 0.2
trainer_config['kl_coeff'] = config['kl_coeff']
trainer_config['lambda'] = config['lambda_gae']
trainer_config['callbacks'] = {
"on_episode_start": tune.function(on_episode_start),
"on_episode_end": tune.function(on_episode_end)
}
def logger_creator(conf):
"""Creates a Unified logger with a default logdir prefix
containing the agent name and the env id
"""
logdir = config['policy_folder_name'].format(**locals())
logdir = tempfile.mkdtemp(
prefix=logdir, dir=config['local_dir'])
return UnifiedLogger(conf, logdir, None)
logger = logger_creator
trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config, logger_creator=logger)
for i in range(100000 + 2):
print("== Iteration", i, "==")
print(pretty_print(trainer.train()))
if i % config['save_every'] == 0:
checkpoint = trainer.save()
print("checkpoint saved at", checkpoint)
reporter(num_iterations_trained=trainer._iteration)
@gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
step_memory, min_dist):
tune.run(
train,
name=name,
stop={"num_iterations_trained": num_iterations},
config={"n_agents": n_agents,
"hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers
"save_every": save_every,
"map_width": map_width,
"map_height": map_height,
"local_dir": local_dir,
"horizon": horizon, # Max number of time steps
'policy_folder_name': policy_folder_name,
"obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed,
"conv_model": conv_model,
"rail_generator": rail_generator,
"nr_extra": nr_extra,
"kl_coeff": kl_coeff,
"lambda_gae": lambda_gae,
"min_dist": min_dist,
# "predictor": predictor,
"step_memory": step_memory
},
resources_per_trial={
"cpu": 8,
"gpu": 0
},
verbose=2,
local_dir=local_dir
)
if __name__ == '__main__':
gin.external_configurable(tune.grid_search)
# with path('RLLib_training.experiment_configs.n_agents_experiment', 'config.gin') as f:
# gin.parse_config_file(f)
gin.parse_config_file('/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test/config.gin')
dir = '/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test'
# dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory')
run_experiment(local_dir=dir)
{'Test_0':[20,20,20,3],
'Test_1':[10,10,3,4321],
'Test_2':[10,10,5,123],
'Test_3':[50,50,5,21],
'Test_4':[50,50,20,85],
'Test_5':[100,100,5,436],
'Test_6':[100,100,20,6487],
'Test_7':[100,100,50,567],
'Test_8':[100,10,20,3245],
'Test_9':[10,100,20,632]
}
\ No newline at end of file
torch==1.1.0
\ No newline at end of file
git+https://gitlab.aicrowd.com/flatland/flatland.git
importlib-metadata>=0.17
importlib_resources>=1.0.2
torch>=1.1.0
\ No newline at end of file
import time
import numpy as np
from utils.misc_utils import RandomAgent, run_test
with open('parameters.txt','r') as inf:
parameters = eval(inf.read())
# Parameter initialization
features_per_node = 9
tree_depth = 3
nodes = 0
for i in range(tree_depth + 1):
nodes += np.power(4, i)
state_size = features_per_node * nodes * 2
action_size = 5
action_dict = dict()
nr_trials_per_test = 100
test_results = []
test_times = []
test_dones = []
agent = RandomAgent(state_size, action_size)
start_time_scoring = time.time()
test_idx = 0
score_board = []
for test_nr in parameters:
current_parameters = parameters[test_nr]
test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_idx)
print('---------')
print(' RESULTS')
print('---------')
print('{} score was {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.\n\n\n'.format(
test_nr,
np.mean(test_score), np.mean(test_dones) * 100, test_time))
test_idx += 1
score_board.append([test_score, test_dones, test_times])
# Local Submission Scoring
The files in this repo are supposed to help you score your agents behavior locally.
**WARNING**: This is not the actual submission scoring --> Results will differ from the scores you achieve here. But the scoring setup is very similar to this setup.
**Beta Stage**: The scoring function here is still under development, use with caution.
## Introduction
This repo contains a very basic setup to test your own agent/algorithm on the Flatland scoring setup.
The repo contains 3 important files:
- `generate_tests.py` Pre-generates the test files for faster testing
- `score_tests.py` Scores your agent on the generated test files
- `show_test.py` Shows samples of the generated test files
- `parameters.txt` Parameters for generating the test files --> These differ in the challenge submission scoring
To start the scoring of your agent you need to do the following
## Parameters used for Level generation
| Test Nr. | X-Dim | Y-Dim | Nr. Agents | Random Seed |
|:---------:|:------:|:------:|:-----------:|:------------:|
| Test 0 | 10 | 10 | 1 | 3 |
| Test 1 | 10 | 10 | 3 | 3 |
| Test 2 | 10 | 10 | 5 | 3 |
| Test 3 | 50 | 10 | 10 | 3 |
| Test 4 | 20 | 50 | 10 | 3 |
| Test 5 | 20 | 20 | 15 | 3 |
| Test 6 | 50 | 50 | 10 | 3 |
| Test 7 | 50 | 50 | 40 | 3 |
| Test 8 | 100 | 100 | 10 | 3 |
| Test 9 | 100 | 100 | 50 | 3 |
These can be changed if you like to test your agents behavior on different tests.
## Generate the test files
To generate the set of test files you just have to run `python generate_tests.py`
This generates pickle files of the levels to test on and places them in the corresponding folders.
## Run Test
To run the tests you have to modify the `score_tests.py` file to load your agent and the necessary predictor and observation.
The following lines have to be replaced by you code:
```
# Load your agent
agent = YourAgent
agent.load(Your_Checkpoint)
# Load the necessary Observation Builder and Predictor
predictor = ShortestPathPredictorForRailEnv()
observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor)
```
The agent and the observation builder as well as an observation wrapper can be passed to the test function like this
```
test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=your_observation_builder,
observation_wrapper=your_observation_wrapper,
test_nr=test_nr, nr_trials_per_test=10)
```
In order to speed up the test time you can limit the number of trials per test (`nr_trials_per_test=10`). After you have made these changes to the file you can run `python score_tests.py` which will produce an output similiar to this:
```
Running Test_0 with (x_dim,y_dim) = (10,10) and 1 Agents.
Progress: |********************| 100.0% Complete
Test_0 score was -0.380 with 100.00% environments solved. Test took 0.62 Seconds to complete.
Running Test_1 with (x_dim,y_dim) = (10,10) and 3 Agents.
Progress: |********************| 100.0% Complete
Test_1 score was -1.540 with 80.00% environments solved. Test took 2.67 Seconds to complete.
Running Test_2 with (x_dim,y_dim) = (10,10) and 5 Agents.
Progress: |********************| 100.0% Complete
Test_2 score was -2.460 with 80.00% environments solved. Test took 4.48 Seconds to complete.
Running Test_3 with (x_dim,y_dim) = (50,10) and 10 Agents.
Progress: |**__________________| 10.0% Complete
```
The score is computed by
```
score = sum(mean(all_rewards))/max_steps
```
which is the sum over all time steps and the mean over all agents of the rewards. We normalize it by the maximum number of allowed steps for a level size. The max number of allowed steps is
```
max_steps = mult_factor * (env.height+env.width)
```
Where the `mult_factor` is a multiplication factor to allow for more time if difficulty is to high.
The number of solved envs is just the percentage of episodes that terminated with all agents done.
How these two numbers are used to define your final score will be posted on the [flatland page](https://www.aicrowd.com/organizers/sbb/challenges/flatland-challenge)
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
## Ignore everything in this directory
*
# Except this file
!.gitignore
\ No newline at end of file
import time
import numpy as np
from utils.misc_utils import create_testfiles
with open('parameters.txt', 'r') as inf:
parameters = eval(inf.read())
# Parameter initialization
features_per_node = 9
tree_depth = 3
nodes = 0
for i in range(tree_depth + 1):
nodes += np.power(4, i)
state_size = features_per_node * nodes * 2
action_size = 5
action_dict = dict()
nr_trials_per_test = 100
test_idx = 0
for test_nr in parameters:
current_parameters = parameters[test_nr]
create_testfiles(current_parameters, test_nr, nr_trials_per_test=100)
{'Test_0':[10,10,1,3],
'Test_1':[10,10,3,3],
'Test_2':[10,10,5,3],
'Test_3':[50,10,10,3],
'Test_4':[20,50,10,3],
'Test_5':[20,20,15,3],
'Test_6':[50,50,10,3],
'Test_7':[50,50,40,3],
'Test_8':[100,100,10,3],
'Test_9':[100,100,50,3]
}
\ No newline at end of file
import time
import numpy as np
import torch
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from torch_training.dueling_double_dqn import Agent
from scoring.utils.misc_utils import run_test
from utils.observation_utils import normalize_observation
with open('parameters.txt', 'r') as inf:
parameters = eval(inf.read())
# Parameter initialization
features_per_node = 9
tree_depth = 3
nodes = 0
for i in range(tree_depth + 1):
nodes += np.power(4, i)
state_size = features_per_node * nodes
action_size = 5
action_dict = dict()
nr_trials_per_test = 100
test_results = []
test_times = []
test_dones = []
sequential_agent_test = False
# Load your agent
agent = Agent(state_size, action_size)
agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint500.pth'))
# Load the necessary Observation Builder and Predictor
predictor = ShortestPathPredictorForRailEnv()
observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor)
start_time_scoring = time.time()
score_board = []
for test_nr in parameters:
current_parameters = parameters[test_nr]
test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=observation_builder,
observation_wrapper=normalize_observation,
test_nr=test_nr, nr_trials_per_test=10)
print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format(
test_nr,
np.mean(test_score), np.mean(test_dones) * 100, test_time))
score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time])
print('---------')
print(' RESULTS')
print('---------')
test_idx = 0
for test_nr in parameters:
print('{} score was {:.3f}\twith {:.2f}% environments solved.\tTest took {:.2f} Seconds to complete.'.format(
test_nr, score_board[test_idx][0], score_board[test_idx][1], score_board[test_idx][2]))
test_idx += 1
import time
import numpy as np
from utils.misc_utils import render_test
with open('parameters.txt','r') as inf:
parameters = eval(inf.read())
# Parameter initialization
features_per_node = 9
tree_depth = 3
nodes = 0
for i in range(tree_depth + 1):
nodes += np.power(4, i)
state_size = features_per_node * nodes * 2
action_size = 5
action_dict = dict()
nr_trials_per_test = 100
test_idx = 0
for test_nr in parameters:
current_parameters = parameters[test_nr]
render_test(current_parameters, test_nr, nr_examples=2)