diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index 6d93aea149131465447d36578865cc3ccebe9a8f..bb6b21e316e72177dfad54f2f02d3ac527810520 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -50,15 +50,23 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): class TreeObsPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): + print(options) + self.step_memory = options["custom_options"]["step_memory"] return sum([space.shape[0] for space in obs_space]), def transform(self, observation): - data = norm_obs_clip(observation[0][0]) - distance = norm_obs_clip(observation[0][1]) - agent_data = np.clip(observation[0][2], -1, 1) - data2 = norm_obs_clip(observation[1][0]) - distance2 = norm_obs_clip(observation[1][1]) - agent_data2 = np.clip(observation[1][2], -1, 1) + + if self.step_memory == 2: + data = norm_obs_clip(observation[0][0]) + distance = norm_obs_clip(observation[0][1]) + agent_data = np.clip(observation[0][2], -1, 1) + data2 = norm_obs_clip(observation[1][0]) + distance2 = norm_obs_clip(observation[1][1]) + agent_data2 = np.clip(observation[1][2], -1, 1) + else: + data = norm_obs_clip(observation[0]) + distance = norm_obs_clip(observation[1]) + agent_data = np.clip(observation[2], -1, 1) return np.concatenate((np.concatenate((np.concatenate((data, distance)), agent_data)), np.concatenate((np.concatenate((data2, distance2)), agent_data2)))) diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py index 668c763d162bbafd1bb5c62fb2317324998768f3..1ee7cc1ce394f3b40791706871aa180ec0510b52 100644 --- a/RLLib_training/render_training_result.py +++ b/RLLib_training/render_training_result.py @@ -105,8 +105,8 @@ def render_training_result(config): policy = trainer.get_policy("ppo_policy") - preprocessor = preprocessor(obs_space) - env_renderer = RenderTool(env, gl="PIL") + preprocessor = preprocessor(obs_space, {"step_memory": config["step_memory"]}) + env_renderer = RenderTool(env, gl="PILSVG") for episode in range(N_EPISODES): observation = env.reset() diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 808bbc34556ce541ca41fc560b464c31b1dd96a3..c4c3be578b50608b962f0fd807bf213ed9990c4b 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -99,7 +99,8 @@ def train(config, reporter): # Trainer configuration trainer_config = DEFAULT_CONFIG.copy() - trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor} + trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor, + "custom_options": {"step_memory": config["step_memory"]}} trainer_config['multiagent'] = {"policy_graphs": policy_graphs, "policy_mapping_fn": policy_mapping_fn, @@ -131,6 +132,7 @@ def train(config, reporter): "on_episode_end": tune.function(on_episode_end) } + def logger_creator(conf): """Creates a Unified logger with a default logdir prefix.""" logdir = config['policy_folder_name'].format(**locals()) @@ -179,7 +181,8 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, "kl_coeff": kl_coeff, "lambda_gae": lambda_gae, "min_dist": min_dist, - "step_memory": step_memory + "step_memory": step_memory # If equal to two, the current observation plus + # the observation of last time step will be given as input the the model. }, resources_per_trial={ "cpu": 3,