From 2fbdd5525d3c25ba17d6393c692d7949e51052a6 Mon Sep 17 00:00:00 2001
From: "S.P. Mohanty" <spmohanty91@gmail.com>
Date: Fri, 5 Jun 2020 17:28:40 +0200
Subject: [PATCH] Refactor reward computations

---
 flatland/evaluators/service.py | 57 +++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index e0a352f6..25fdeee5 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -565,9 +565,9 @@ class FlatlandRemoteEvaluationService:
         progress = np.clip(
             self.simulation_count * 1.0 / len(self.env_file_paths),
             0, 1)
-        mean_reward = round(np.mean(self.simulation_rewards), 2)
-        mean_normalized_reward = round(np.mean(self.simulation_rewards_normalized), 2)
-        mean_percentage_complete = round(np.mean(self.simulation_percentage_complete), 3)
+
+        mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores()
+
         self.evaluation_state["state"] = "IN_PROGRESS"
         self.evaluation_state["progress"] = progress
         self.evaluation_state["simulation_count"] = self.simulation_count
@@ -687,27 +687,8 @@ class FlatlandRemoteEvaluationService:
                 to operate on all the test environments.
                 """
             )
-        #################################################################################
-        #################################################################################
-        # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
-        # we group all the results by the test_ids
-        # so we first compute the mean in each of the test_id groups, 
-        # and then we compute the mean across each of the test_id groups
-        #
-        # NOTE : this df should not have NaN rows for any of the above 
-        #        metrics if all the evaluations are successfully completed
-        #
-        #################################################################################
-        #################################################################################
 
-        grouped_df = self.evaluation_metadata_df.groupby(['test_id']).mean()
-        mean_reward = grouped_df["reward"].mean()
-        mean_normalized_reward = grouped_df["normalized_reward"].mean()
-        mean_percentage_complete = grouped_df["percentage_complete"].mean()
-        # 
-        mean_reward = round(mean_reward, 2)
-        mean_normalized_reward = round(mean_normalized_reward, 2)
-        mean_percentage_complete = round(mean_percentage_complete, 3)
+        mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores()
 
         if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
             # Generate the video
@@ -771,9 +752,11 @@ class FlatlandRemoteEvaluationService:
         self.evaluation_state["state"] = "FINISHED"
         self.evaluation_state["progress"] = 1.0
         self.evaluation_state["simulation_count"] = self.simulation_count
-        self.evaluation_state["score"]["score"] = mean_percentage_complete
-        self.evaluation_state["score"]["score_secondary"] = mean_reward
+        self.evaluation_state["score"]["score"] = mean_normalized_reward
+        self.evaluation_state["score"]["score_secondary"] = mean_percentage_complete
         self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward
+        self.evaluation_state["meta"]["reward"] = mean_reward
+        self.evaluation_state["meta"]["percentage_complete"] = mean_percentage_complete
         self.handle_aicrowd_success_event(self.evaluation_state)
         print("#" * 100)
         print("EVALUATION COMPLETE !!")
@@ -784,6 +767,30 @@ class FlatlandRemoteEvaluationService:
         print("#" * 100)
         print("#" * 100)
 
+    def compute_mean_scores(self):
+        #################################################################################
+        #################################################################################
+        # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
+        # we group all the results by the test_ids
+        # so we first compute the mean in each of the test_id groups, 
+        # and then we compute the mean across each of the test_id groups
+        #
+        #
+        #################################################################################
+        #################################################################################
+        source_df = self.evaluation_metadata_df.dropna()
+        grouped_df = source_df.groupby(['test_id']).mean()
+
+        mean_reward = grouped_df["reward"].mean()
+        mean_normalized_reward = grouped_df["normalized_reward"].mean()
+        mean_percentage_complete = grouped_df["percentage_complete"].mean()
+        # Round off the reward values
+        mean_reward = round(mean_reward, 2)
+        mean_normalized_reward = round(mean_normalized_reward, 5)
+        mean_percentage_complete = round(mean_percentage_complete, 3)
+
+        return mean_reward, mean_normalized_reward, mean_percentage_complete
+
     def report_error(self, error_message, command_response_channel):
         """
         A helper function used to report error back to the client
-- 
GitLab