Skip to content
Snippets Groups Projects
Commit 540dce24 authored by gmollard's avatar gmollard
Browse files

env complexity benchmark

parent 85f9e04b
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.generators import random_rail_generator
from ray.rllib.utils.seed import seed as set_seed
from flatland.envs.generators import complex_rail_generator, random_rail_generator
import numpy as np
......@@ -19,14 +20,18 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
vector_index = config.vector_index
else:
vector_index = 1
#self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5,
# nr_extra=30, seed=config['seed'] * (1+vector_index))
if config['rail_generator'] == "complex_rail_generator":
self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5,
nr_extra=config['nr_extra'], seed=config['seed'] * (1+vector_index))
else:
self.rail_generator = random_rail_generator()
set_seed(config['seed'] * (1+vector_index))
#self.env = RailEnv(width=config["width"], height=config["height"],
self.env = RailEnv(width=10, height=20,
self.env = RailEnv(width=config["width"], height=config["height"],
number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder'])
self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl')
# self.env.load('/home/guillaume/EPFL/Master_Thesis/flatland/baselines/torch_training/railway/complex_scene.pkl')
self.width = self.env.width
self.height = self.env.height
......@@ -35,7 +40,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
def reset(self):
self.agents_done = []
obs = self.env.reset(False, False)
obs = self.env.reset()
o = dict()
# o['agents'] = obs
# obs[0] = [obs[0], np.ones((17, 17)) * 17]
......
import numpy as np
from ray.rllib.models.preprocessors import Preprocessor
def max_lt(seq, val):
"""
Return greatest item in seq for which item < val applies.
......@@ -36,27 +35,27 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
:param obs: Observation that should be normalized
:param clip_min: min value where observation will be clipped
:param clip_max: max value where observation will be clipped
:return: returns normalized and clipped observation
:return: returnes normalized and clipped observatoin
"""
max_obs = max(1, max_lt(obs, 1000))
min_obs = max(0, min_lt(obs, 0))
if max_obs == min_obs:
return np.clip(np.array(obs)/ max_obs, clip_min, clip_max)
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
if norm == 0:
norm = 1.
return np.clip((np.array(obs)-min_obs)/ norm, clip_min, clip_max)
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
class CustomPreprocessor(Preprocessor):
def _init_shape(self, obs_space, options):
return (111,)
return obs_space.shape
def transform(self, observation):
if len(observation) == 111:
return norm_obs_clip(observation)
else:
return observation
# if len(observation) == 111:
return norm_obs_clip(observation)
# else:
# return observation
class ConvModelPreprocessor(Preprocessor):
......
run_experiment.name = "observation_benchmark_results"
run_experiment.num_iterations = 1002
run_experiment.save_every = 50
run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20
run_experiment.map_height = 10
run_experiment.n_agents = 8
run_experiment.rail_generator = "complex_rail_generator"
run_experiment.nr_extra = {"grid_search": [10, 20, 30, 40]}
run_experiment.policy_folder_name = "ppo_policy_nr_extra_{config[nr_extra]}_"
run_experiment.horizon = 50
run_experiment.seed = 123
#run_experiment.conv_model = {"grid_search": [True, False]}
run_experiment.conv_model = False
#run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]}
run_experiment.obs_builder = @TreeObsForRailEnv()
TreeObsForRailEnv.max_depth = 2
LocalObsForRailEnv.view_radius = 5
run_experiment.entropy_coeff = 0.01
......@@ -4,8 +4,8 @@ run_experiment.save_every = 50
run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20
run_experiment.map_height = 20
run_experiment.n_agents = 5
run_experiment.map_height = 10
run_experiment.n_agents = 8
run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}"#_entropy_coeff_{config[entropy_coeff]}_{config[hidden_sizes][0]}_hidden_sizes_"
run_experiment.horizon = 50
......
from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper
import gym
import gin
from flatland.envs.generators import complex_rail_generator
# Import PPO trainer: we can replace these imports by any other trainer from RLLib.
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
......@@ -25,8 +25,6 @@ import numpy as np
from ray.tune.logger import UnifiedLogger
import tempfile
import gin
from ray import tune
from ray.rllib.utils.seed import seed as set_seed
......@@ -59,14 +57,15 @@ def train(config, reporter):
# Example configuration to generate a random rail
env_config = {"width": config['map_width'],
"height": config['map_height'],
"rail_generator": complex_rail_generator,
"rail_generator": config["rail_generator"],
"nr_extra": config["nr_extra"],
"number_of_agents": config['n_agents'],
"seed": config['seed'],
"obs_builder": config['obs_builder']}
# Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(111,))
obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,))
preprocessor = "tree_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
......@@ -166,7 +165,7 @@ def train(config, reporter):
@gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed, conv_model):
entropy_coeff, seed, conv_model, rail_generator, nr_extra):
tune.run(
train,
......@@ -183,7 +182,9 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
"obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed,
"conv_model": conv_model
"conv_model": conv_model,
"rail_generator": rail_generator,
"nr_extra": nr_extra
},
resources_per_trial={
"cpu": 2,
......@@ -195,6 +196,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
if __name__ == '__main__':
gin.external_configurable(tune.grid_search)
dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env' # To Modify
dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark' # To Modify
gin.parse_config_file(dir + '/config.gin')
run_experiment(local_dir=dir)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment