Addresses #321 - Groups the rewards across test_ids in the evaluation service

0476e140 · spmohanty · b470aa0b · 0476e140 · 0476e140
Commit 0476e140 authored 4 years ago by spmohanty
--- a/flatland/cli.py
+++ b/flatland/cli.py
@@ -61,7 +61,13 @@ def demo(args=None):
              help="Evaluation Service ID. This has to match the service id on the client.",
              required=False
              )
-def evaluator(tests, service_id):
+@click.option('--results_path',
+              type=click.Path(exists=False),
+              default=False,
+              help="Path where the evaluator should write the results metadata.",
+              required=False
+              )
+def evaluator(tests, service_id, results_path):
    try:
        redis_connection = redis.Redis()
        redis_connection.ping()
@@ -75,6 +81,7 @@ def evaluator(tests, service_id):
        test_env_folder=tests,
        flatland_rl_service_id=service_id,
        visualize=False,
+        result_output_path=results_path,
        verbose=False
    )
    grader.run()

--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -687,10 +687,27 @@ class FlatlandRemoteEvaluationService:
                to operate on all the test environments.
                """
            )
+        #################################################################################
-        mean_reward = round(np.mean(self.simulation_rewards), 2)
+        #################################################################################
-        mean_normalized_reward = round(np.mean(self.simulation_rewards_normalized), 2)
+        # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
-        mean_percentage_complete = round(np.mean(self.simulation_percentage_complete), 3)
+        # we group all the results by the test_ids
+        # so we first compute the mean in each of the test_id groups, 
+        # and then we compute the mean across each of the test_id groups
+        #
+        # NOTE : this df should not have NaN rows for any of the above 
+        #        metrics if all the evaluations are successfully completed
+        #
+        #################################################################################
+        #################################################################################
+        grouped_df = self.evaluation_metadata_df.groupby(['test_id']).mean()
+        mean_reward = grouped_df["reward"].mean()
+        mean_normalized_reward = grouped_df["normalized_reward"].mean()
+        mean_percentage_complete = grouped_df["percentage_complete"].mean()
+        # 
+        mean_reward = round(mean_reward, 2)
+        mean_normalized_reward = round(mean_normalized_reward, 2)
+        mean_percentage_complete = round(mean_percentage_complete, 3)
        if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
            # Generate the video