diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index bb6b21e316e72177dfad54f2f02d3ac527810520..d4c81a83f1c05317315a3f71f99565006e9311e1 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -1,52 +1,6 @@ import numpy as np from ray.rllib.models.preprocessors import Preprocessor - -def max_lt(seq, val): - """ - Return greatest item in seq for which item < val applies. - None is returned if seq was empty or all items in seq were >= val. - """ - max = 0 - idx = len(seq) - 1 - while idx >= 0: - if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max: - max = seq[idx] - idx -= 1 - return max - - -def min_lt(seq, val): - """ - Return smallest item in seq for which item > val applies. - None is returned if seq was empty or all items in seq were >= val. - """ - min = np.inf - idx = len(seq) - 1 - while idx >= 0: - if seq[idx] >= val and seq[idx] < min: - min = seq[idx] - idx -= 1 - return min - - -def norm_obs_clip(obs, clip_min=-1, clip_max=1): - """ - This function returns the difference between min and max value of an observation - :param obs: Observation that should be normalized - :param clip_min: min value where observation will be clipped - :param clip_max: max value where observation will be clipped - :return: returnes normalized and clipped observatoin - """ - max_obs = max(1, max_lt(obs, 1000)) - min_obs = min(max_obs, min_lt(obs, 0)) - - if max_obs == min_obs: - return np.clip(np.array(obs) / max_obs, clip_min, clip_max) - norm = np.abs(max_obs - min_obs) - if norm == 0: - norm = 1. - return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max) - +from utils.observation_utils import norm_obs_clip class TreeObsPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin index 22f807893eaee57a52e860e1d227c48dedb724ad..59d2dfb508f13cccf4b9152f24ab06d44c290450 100644 --- a/RLLib_training/experiment_configs/config_example/config.gin +++ b/RLLib_training/experiment_configs/config_example/config.gin @@ -5,7 +5,7 @@ run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 run_experiment.map_height = 20 -run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]} +run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]} run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene run_experiment.nr_extra = 5 run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_" diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index c4c3be578b50608b962f0fd807bf213ed9990c4b..7435a8fed728ec363321ba7a2bcf04b186513559 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -54,11 +54,12 @@ def on_episode_end(info): for k, v in episode._agent_reward_history.items(): score += np.sum(v) score /= (len(episode._agent_reward_history) * episode.horizon) - + # Calculation of the proportion of solved episodes before the maximum time step - done = 1 - if len(episode._agent_reward_history[0]) == episode.horizon: - done = 0 + done = 0 + if len(episode._agent_reward_history[0]) <= episode.horizon-5: + done = 1 + episode.custom_metrics["score"] = score episode.custom_metrics["proportion_episode_solved"] = done @@ -68,6 +69,15 @@ def train(config, reporter): set_seed(config['seed'], config['seed'], config['seed']) + # Given the depth of the tree observation and the number of features per node we get the following state_size + num_features_per_node = config['obs_builder'].observation_dim + tree_depth = 2 + nr_nodes = 0 + for i in range(tree_depth + 1): + nr_nodes += np.power(4, i) + obs_size = num_features_per_node * nr_nodes + + # Environment parameters env_config = {"width": config['map_width'], "height": config['map_height'], @@ -81,7 +91,7 @@ def train(config, reporter): # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2) + obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2) preprocessor = "tree_obs_prep" else: raise ValueError("Undefined observation space") # Only TreeObservation implemented for now. @@ -100,7 +110,7 @@ def train(config, reporter): # Trainer configuration trainer_config = DEFAULT_CONFIG.copy() trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor, - "custom_options": {"step_memory": config["step_memory"]}} + "custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}} trainer_config['multiagent'] = {"policy_graphs": policy_graphs, "policy_mapping_fn": policy_mapping_fn, @@ -111,9 +121,9 @@ def train(config, reporter): # Parameters for calculation parallelization trainer_config["num_workers"] = 0 - trainer_config["num_cpus_per_worker"] = 3 - trainer_config["num_gpus"] = 0.0 - trainer_config["num_gpus_per_worker"] = 0.0 + trainer_config["num_cpus_per_worker"] = 8 + trainer_config["num_gpus"] = 0.2 + trainer_config["num_gpus_per_worker"] = 0.2 trainer_config["num_cpus_for_driver"] = 1 trainer_config["num_envs_per_worker"] = 1 @@ -185,8 +195,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, # the observation of last time step will be given as input the the model. }, resources_per_trial={ - "cpu": 3, - "gpu": 0 + "cpu": 8, + "gpu": 0.2 }, verbose=2, local_dir=local_dir diff --git a/torch_training/Getting_Started_Training.md b/torch_training/Getting_Started_Training.md index b69467ebb05024ce7892bd83b83e662a8b168f35..d575e1c673665013b55bccd3faecde526cec8ab4 100644 --- a/torch_training/Getting_Started_Training.md +++ b/torch_training/Getting_Started_Training.md @@ -105,7 +105,7 @@ We have no successfully set up the environment for training. To visualize it in env_renderer = RenderTool(env, gl="PILSVG", ) ``` -###Setting up the agent +### Setting up the agent To set up a appropriate agent we need the state and action space sizes. From the discussion above about the tree observation we end up with: