Skip to content
Snippets Groups Projects
Commit 4ed90e1c authored by gmollard's avatar gmollard
Browse files

added entropy coeff parameter

parent d5a04cae
No related branches found
No related tags found
No related merge requests found
run_experiment.name = "observation_benchmark_results"
run_experiment.num_iterations = 1002
run_experiment.save_every = 100
run_experiment.hidden_sizes = {"grid_search": [[32, 32], [64, 64], [128, 128], [256, 256]}
run_experiment.map_width = 20
run_experiment.map_height = 20
run_experiment.n_agents = 5
run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}_entropy_coeff_{config[entropy_coeff]}_{config[hidden_sizes][0]}_hidden_sizes_"
run_experiment.horizon = 50
run_experiment.seed = 123
run_experiment.entropy_coeff = {"grid_search": [1e-3, 1e-2, 0]}
run_experiment.obs_builder = {"grid_search": [@LocalObsForRailEnv()]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]}
TreeObsForRailEnv.max_depth = 2
LocalObsForRailEnv.view_radius = 5
...@@ -116,6 +116,7 @@ def train(config, reporter): ...@@ -116,6 +116,7 @@ def train(config, reporter):
trainer_config["num_gpus_per_worker"] = 0 trainer_config["num_gpus_per_worker"] = 0
trainer_config["num_cpus_for_driver"] = 1 trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1 trainer_config["num_envs_per_worker"] = 1
trainer_config['entropy_coeff'] = config['entropy_coeff']
trainer_config["env_config"] = env_config trainer_config["env_config"] = env_config
trainer_config["batch_mode"] = "complete_episodes" trainer_config["batch_mode"] = "complete_episodes"
trainer_config['simple_optimizer'] = False trainer_config['simple_optimizer'] = False
...@@ -149,7 +150,8 @@ def train(config, reporter): ...@@ -149,7 +150,8 @@ def train(config, reporter):
@gin.configurable @gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, seed): map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed):
tune.run( tune.run(
train, train,
...@@ -164,6 +166,7 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, ...@@ -164,6 +166,7 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
"horizon": horizon, # Max number of time steps "horizon": horizon, # Max number of time steps
'policy_folder_name': policy_folder_name, 'policy_folder_name': policy_folder_name,
"obs_builder": obs_builder, "obs_builder": obs_builder,
"entropy_coeff": entropy_coeff,
"seed": seed "seed": seed
}, },
resources_per_trial={ resources_per_trial={
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment