Skip to content
Snippets Groups Projects
Commit 63285ab0 authored by gmollard's avatar gmollard
Browse files

small fixxes

parent b905b1e2
No related branches found
No related tags found
No related merge requests found
...@@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32] ...@@ -5,10 +5,10 @@ run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = 20 run_experiment.map_width = 20
run_experiment.map_height = 20 run_experiment.map_height = 20
run_experiment.n_agents = 7#{"grid_search": [3, 4, 5, 6, 7, 8]} run_experiment.n_agents = 7 #{"grid_search": [3, 4, 5, 6, 7, 8]}
run_experiment.rail_generator = "complex_rail_generator" run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
run_experiment.nr_extra = 5 run_experiment.nr_extra = 5
run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"#_map_size_{config[map_width]}" run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
run_experiment.seed = 123 run_experiment.seed = 123
......
...@@ -33,8 +33,6 @@ __file_dirname__ = os.path.dirname(os.path.realpath(__file__)) ...@@ -33,8 +33,6 @@ __file_dirname__ = os.path.dirname(os.path.realpath(__file__))
CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7', CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
'checkpoint_101', 'checkpoint-101') 'checkpoint_101', 'checkpoint-101')
CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/ppo_policy_two_obs_with_predictions_n_agents_7_8e5zko1_/checkpoint_1301/checkpoint-1301'
N_EPISODES = 10 N_EPISODES = 10
N_STEPS_PER_EPISODE = 50 N_STEPS_PER_EPISODE = 50
...@@ -67,8 +65,7 @@ def render_training_result(config): ...@@ -67,8 +65,7 @@ def render_training_result(config):
# Dict with the different policies to train # Dict with the different policies to train
policy_graphs = { policy_graphs = {
config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {}) "ppo_policy": (PolicyGraph, obs_space, act_space, {})
# "ppo_policy": (PolicyGraph, obs_space, act_space, {})
} }
def policy_mapping_fn(agent_id): def policy_mapping_fn(agent_id):
...@@ -106,8 +103,7 @@ def render_training_result(config): ...@@ -106,8 +103,7 @@ def render_training_result(config):
trainer.restore(CHECKPOINT_PATH) trainer.restore(CHECKPOINT_PATH)
# policy = trainer.get_policy("ppo_policy") policy = trainer.get_policy("ppo_policy")
policy = trainer.get_policy(config['policy_folder_name'].format(**locals()))
preprocessor = preprocessor(obs_space) preprocessor = preprocessor(obs_space)
env_renderer = RenderTool(env, gl="PIL") env_renderer = RenderTool(env, gl="PIL")
......
...@@ -47,12 +47,17 @@ def on_episode_start(info): ...@@ -47,12 +47,17 @@ def on_episode_start(info):
def on_episode_end(info): def on_episode_end(info):
episode = info['episode'] episode = info['episode']
# Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents
# and the number of the maximum time steps of the episode.
score = 0 score = 0
for k, v in episode._agent_reward_history.items(): for k, v in episode._agent_reward_history.items():
score += np.sum(v) score += np.sum(v)
score /= (len(episode._agent_reward_history) * episode.horizon) score /= (len(episode._agent_reward_history) * episode.horizon)
# Calculation of the proportion of solved episodes before the maximum time step
done = 1 done = 1
if len(episode._agent_reward_history) == episode.horizon: if len(episode._agent_reward_history[0]) == episode.horizon:
done = 0 done = 0
episode.custom_metrics["score"] = score episode.custom_metrics["score"] = score
episode.custom_metrics["proportion_episode_solved"] = done episode.custom_metrics["proportion_episode_solved"] = done
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment