Merge branch 'master' of gitlab.aicrowd.com:flatland/baselines

1c649d43 · Erik Nygren · 1ae9854f · c527e1b6 · 1c649d43 · 1c649d43
Commit 1c649d43 authored 5 years ago by Erik Nygren
--- a/RLLib_training/custom_preprocessors.py
+++ b/RLLib_training/custom_preprocessors.py
 import numpy as np
 from ray.rllib.models.preprocessors import Preprocessor
-
-def max_lt(seq, val):
-    """
-    Return greatest item in seq for which item < val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    max = 0
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
-            max = seq[idx]
-        idx -= 1
-    return max
-
-
-def min_lt(seq, val):
-    """
-    Return smallest item in seq for which item > val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    min = np.inf
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] >= val and seq[idx] < min:
-            min = seq[idx]
-        idx -= 1
-    return min
-
-
-def norm_obs_clip(obs, clip_min=-1, clip_max=1):
-    """
-    This function returns the difference between min and max value of an observation
-    :param obs: Observation that should be normalized
-    :param clip_min: min value where observation will be clipped
-    :param clip_max: max value where observation will be clipped
-    :return: returnes normalized and clipped observatoin
-    """
-    max_obs = max(1, max_lt(obs, 1000))
-    min_obs = min(max_obs, min_lt(obs, 0))
-
-    if max_obs == min_obs:
-        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
-    norm = np.abs(max_obs - min_obs)
-    if norm == 0:
-        norm = 1.
-    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
-
+from utils.observation_utils import norm_obs_clip

 class TreeObsPreprocessor(Preprocessor):
    def _init_shape(self, obs_space, options):

--- a/RLLib_training/experiment_configs/config_example/config.gin
+++ b/RLLib_training/experiment_configs/config_example/config.gin
@@ -5,7 +5,7 @@ run_experiment.hidden_sizes = [32, 32]

 run_experiment.map_width = 20
 run_experiment.map_height = 20
-run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
+run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]}
 run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
 run_experiment.nr_extra = 5
 run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"

--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -54,11 +54,12 @@ def on_episode_end(info):
    for k, v in episode._agent_reward_history.items():
        score += np.sum(v)
    score /= (len(episode._agent_reward_history) * episode.horizon)
-
+    
    # Calculation of the proportion of solved episodes before the maximum time step
-    done = 1
-    if len(episode._agent_reward_history[0]) == episode.horizon:
-        done = 0
+    done = 0
+    if len(episode._agent_reward_history[0]) <= episode.horizon-5:
+        done = 1
+
    episode.custom_metrics["score"] = score
    episode.custom_metrics["proportion_episode_solved"] = done

@@ -68,6 +69,15 @@ def train(config, reporter):

    set_seed(config['seed'], config['seed'], config['seed'])

+    # Given the depth of the tree observation and the number of features per node we get the following state_size
+    num_features_per_node = config['obs_builder'].observation_dim
+    tree_depth = 2
+    nr_nodes = 0
+    for i in range(tree_depth + 1):
+        nr_nodes += np.power(4, i)
+    obs_size = num_features_per_node * nr_nodes
+
+
    # Environment parameters
    env_config = {"width": config['map_width'],
                  "height": config['map_height'],
@@ -81,7 +91,7 @@ def train(config, reporter):

    # Observation space and action space definitions
    if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2)
+        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2)
        preprocessor = "tree_obs_prep"
    else:
        raise ValueError("Undefined observation space") # Only TreeObservation implemented for now.
@@ -100,7 +110,7 @@ def train(config, reporter):
    # Trainer configuration
    trainer_config = DEFAULT_CONFIG.copy()
    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
-                               "custom_options": {"step_memory": config["step_memory"]}}
+            "custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}}

    trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
                                    "policy_mapping_fn": policy_mapping_fn,
@@ -111,9 +121,9 @@ def train(config, reporter):

    # Parameters for calculation parallelization
    trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 3
-    trainer_config["num_gpus"] = 0.0
-    trainer_config["num_gpus_per_worker"] = 0.0
+    trainer_config["num_cpus_per_worker"] = 8
+    trainer_config["num_gpus"] = 0.2
+    trainer_config["num_gpus_per_worker"] = 0.2
    trainer_config["num_cpus_for_driver"] = 1
    trainer_config["num_envs_per_worker"] = 1

@@ -185,8 +195,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                                            # the observation of last time step will be given as input the the model.
                },
        resources_per_trial={
-            "cpu": 3,
-            "gpu": 0
+            "cpu": 8,
+            "gpu": 0.2
        },
        verbose=2,
        local_dir=local_dir

--- a/torch_training/Getting_Started_Training.md
+++ b/torch_training/Getting_Started_Training.md
@@ -105,7 +105,7 @@ We have no successfully set up the environment for training. To visualize it in
 env_renderer = RenderTool(env, gl="PILSVG", )
 ```

-###Setting up the agent
+### Setting up the agent

 To set up a appropriate agent we need the state and action space sizes. From the discussion above about the tree observation we end up with: