Skip to content
Snippets Groups Projects
Commit 1c649d43 authored by Erik Nygren's avatar Erik Nygren
Browse files

Merge branch 'master' of gitlab.aicrowd.com:flatland/baselines

parents 1ae9854f c527e1b6
No related branches found
No related tags found
No related merge requests found
import numpy as np
from ray.rllib.models.preprocessors import Preprocessor
def max_lt(seq, val):
"""
Return greatest item in seq for which item < val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
max = 0
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
max = seq[idx]
idx -= 1
return max
def min_lt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
min = np.inf
idx = len(seq) - 1
while idx >= 0:
if seq[idx] >= val and seq[idx] < min:
min = seq[idx]
idx -= 1
return min
def norm_obs_clip(obs, clip_min=-1, clip_max=1):
"""
This function returns the difference between min and max value of an observation
:param obs: Observation that should be normalized
:param clip_min: min value where observation will be clipped
:param clip_max: max value where observation will be clipped
:return: returnes normalized and clipped observatoin
"""
max_obs = max(1, max_lt(obs, 1000))
min_obs = min(max_obs, min_lt(obs, 0))
if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
if norm == 0:
norm = 1.
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
from utils.observation_utils import norm_obs_clip
class TreeObsPreprocessor(Preprocessor):
def _init_shape(self, obs_space, options):
......
......@@ -5,7 +5,7 @@ run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20
run_experiment.map_height = 20
run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]}
run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
run_experiment.nr_extra = 5
run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
......
......@@ -54,11 +54,12 @@ def on_episode_end(info):
for k, v in episode._agent_reward_history.items():
score += np.sum(v)
score /= (len(episode._agent_reward_history) * episode.horizon)
# Calculation of the proportion of solved episodes before the maximum time step
done = 1
if len(episode._agent_reward_history[0]) == episode.horizon:
done = 0
done = 0
if len(episode._agent_reward_history[0]) <= episode.horizon-5:
done = 1
episode.custom_metrics["score"] = score
episode.custom_metrics["proportion_episode_solved"] = done
......@@ -68,6 +69,15 @@ def train(config, reporter):
set_seed(config['seed'], config['seed'], config['seed'])
# Given the depth of the tree observation and the number of features per node we get the following state_size
num_features_per_node = config['obs_builder'].observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
obs_size = num_features_per_node * nr_nodes
# Environment parameters
env_config = {"width": config['map_width'],
"height": config['map_height'],
......@@ -81,7 +91,7 @@ def train(config, reporter):
# Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2)
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2)
preprocessor = "tree_obs_prep"
else:
raise ValueError("Undefined observation space") # Only TreeObservation implemented for now.
......@@ -100,7 +110,7 @@ def train(config, reporter):
# Trainer configuration
trainer_config = DEFAULT_CONFIG.copy()
trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
"custom_options": {"step_memory": config["step_memory"]}}
"custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}}
trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn,
......@@ -111,9 +121,9 @@ def train(config, reporter):
# Parameters for calculation parallelization
trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 3
trainer_config["num_gpus"] = 0.0
trainer_config["num_gpus_per_worker"] = 0.0
trainer_config["num_cpus_per_worker"] = 8
trainer_config["num_gpus"] = 0.2
trainer_config["num_gpus_per_worker"] = 0.2
trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1
......@@ -185,8 +195,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
# the observation of last time step will be given as input the the model.
},
resources_per_trial={
"cpu": 3,
"gpu": 0
"cpu": 8,
"gpu": 0.2
},
verbose=2,
local_dir=local_dir
......
......@@ -105,7 +105,7 @@ We have no successfully set up the environment for training. To visualize it in
env_renderer = RenderTool(env, gl="PILSVG", )
```
###Setting up the agent
### Setting up the agent
To set up a appropriate agent we need the state and action space sizes. From the discussion above about the tree observation we end up with:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment