Skip to content
Snippets Groups Projects
Commit 0476e140 authored by spmohanty's avatar spmohanty
Browse files

Addresses #321 - Groups the rewards across test_ids in the evaluation service

parent b470aa0b
No related branches found
No related tags found
No related merge requests found
...@@ -61,7 +61,13 @@ def demo(args=None): ...@@ -61,7 +61,13 @@ def demo(args=None):
help="Evaluation Service ID. This has to match the service id on the client.", help="Evaluation Service ID. This has to match the service id on the client.",
required=False required=False
) )
def evaluator(tests, service_id): @click.option('--results_path',
type=click.Path(exists=False),
default=False,
help="Path where the evaluator should write the results metadata.",
required=False
)
def evaluator(tests, service_id, results_path):
try: try:
redis_connection = redis.Redis() redis_connection = redis.Redis()
redis_connection.ping() redis_connection.ping()
...@@ -75,6 +81,7 @@ def evaluator(tests, service_id): ...@@ -75,6 +81,7 @@ def evaluator(tests, service_id):
test_env_folder=tests, test_env_folder=tests,
flatland_rl_service_id=service_id, flatland_rl_service_id=service_id,
visualize=False, visualize=False,
result_output_path=results_path,
verbose=False verbose=False
) )
grader.run() grader.run()
......
...@@ -687,10 +687,27 @@ class FlatlandRemoteEvaluationService: ...@@ -687,10 +687,27 @@ class FlatlandRemoteEvaluationService:
to operate on all the test environments. to operate on all the test environments.
""" """
) )
#################################################################################
mean_reward = round(np.mean(self.simulation_rewards), 2) #################################################################################
mean_normalized_reward = round(np.mean(self.simulation_rewards_normalized), 2) # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
mean_percentage_complete = round(np.mean(self.simulation_percentage_complete), 3) # we group all the results by the test_ids
# so we first compute the mean in each of the test_id groups,
# and then we compute the mean across each of the test_id groups
#
# NOTE : this df should not have NaN rows for any of the above
# metrics if all the evaluations are successfully completed
#
#################################################################################
#################################################################################
grouped_df = self.evaluation_metadata_df.groupby(['test_id']).mean()
mean_reward = grouped_df["reward"].mean()
mean_normalized_reward = grouped_df["normalized_reward"].mean()
mean_percentage_complete = grouped_df["percentage_complete"].mean()
#
mean_reward = round(mean_reward, 2)
mean_normalized_reward = round(mean_normalized_reward, 2)
mean_percentage_complete = round(mean_percentage_complete, 3)
if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0: if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
# Generate the video # Generate the video
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment