From 0476e140c87cc50aa59f0e1226b8f190f66ccee3 Mon Sep 17 00:00:00 2001
From: "S.P. Mohanty" <spmohanty91@gmail.com>
Date: Fri, 5 Jun 2020 15:53:32 +0200
Subject: [PATCH] Addresses #321 - Groups the rewards across test_ids in the
 evaluation service

---
 flatland/cli.py                |  9 ++++++++-
 flatland/evaluators/service.py | 25 +++++++++++++++++++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/flatland/cli.py b/flatland/cli.py
index cc7576d1..7f3b95fd 100644
--- a/flatland/cli.py
+++ b/flatland/cli.py
@@ -61,7 +61,13 @@ def demo(args=None):
               help="Evaluation Service ID. This has to match the service id on the client.",
               required=False
               )
-def evaluator(tests, service_id):
+@click.option('--results_path',
+              type=click.Path(exists=False),
+              default=False,
+              help="Path where the evaluator should write the results metadata.",
+              required=False
+              )
+def evaluator(tests, service_id, results_path):
     try:
         redis_connection = redis.Redis()
         redis_connection.ping()
@@ -75,6 +81,7 @@ def evaluator(tests, service_id):
         test_env_folder=tests,
         flatland_rl_service_id=service_id,
         visualize=False,
+        result_output_path=results_path,
         verbose=False
     )
     grader.run()
diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index 06447153..782dc185 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -687,10 +687,27 @@ class FlatlandRemoteEvaluationService:
                 to operate on all the test environments.
                 """
             )
-
-        mean_reward = round(np.mean(self.simulation_rewards), 2)
-        mean_normalized_reward = round(np.mean(self.simulation_rewards_normalized), 2)
-        mean_percentage_complete = round(np.mean(self.simulation_percentage_complete), 3)
+        #################################################################################
+        #################################################################################
+        # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
+        # we group all the results by the test_ids
+        # so we first compute the mean in each of the test_id groups, 
+        # and then we compute the mean across each of the test_id groups
+        #
+        # NOTE : this df should not have NaN rows for any of the above 
+        #        metrics if all the evaluations are successfully completed
+        #
+        #################################################################################
+        #################################################################################
+
+        grouped_df = self.evaluation_metadata_df.groupby(['test_id']).mean()
+        mean_reward = grouped_df["reward"].mean()
+        mean_normalized_reward = grouped_df["normalized_reward"].mean()
+        mean_percentage_complete = grouped_df["percentage_complete"].mean()
+        # 
+        mean_reward = round(mean_reward, 2)
+        mean_normalized_reward = round(mean_normalized_reward, 2)
+        mean_percentage_complete = round(mean_percentage_complete, 3)
 
         if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
             # Generate the video
-- 
GitLab