Commit 5243dd6a authored by nilabha's avatar nilabha

Updated reward norm metric

parent 734be92c
......@@ -50,7 +50,7 @@ def on_episode_end(info):
# Not a valid check when considering a single policy for multiple agents
#assert len(episode._agent_to_last_info) == episode_num_agents
norm_factor = 1.0 / (episode_max_steps + episode_num_agents)
norm_factor = 1.0 / (episode_max_steps * episode_num_agents)
percentage_complete = float(episode_done_agents) / episode_num_agents
episode.custom_metrics["episode_steps"] = episode_steps
......
......@@ -48,7 +48,7 @@ def on_episode_end(info):
# Not a valid check when considering a single policy for multiple agents
#assert len(episode._agent_to_last_info) == episode_num_agents
norm_factor = 1.0 / (episode_max_steps + episode_num_agents)
norm_factor = 1.0 / (episode_max_steps * episode_num_agents)
percentage_complete = float(episode_done_agents) / episode_num_agents
episode.custom_metrics["episode_steps"] = episode_steps
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment