From 2fbdd5525d3c25ba17d6393c692d7949e51052a6 Mon Sep 17 00:00:00 2001 From: "S.P. Mohanty" <spmohanty91@gmail.com> Date: Fri, 5 Jun 2020 17:28:40 +0200 Subject: [PATCH] Refactor reward computations --- flatland/evaluators/service.py | 57 +++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py index e0a352f6..25fdeee5 100644 --- a/flatland/evaluators/service.py +++ b/flatland/evaluators/service.py @@ -565,9 +565,9 @@ class FlatlandRemoteEvaluationService: progress = np.clip( self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1) - mean_reward = round(np.mean(self.simulation_rewards), 2) - mean_normalized_reward = round(np.mean(self.simulation_rewards_normalized), 2) - mean_percentage_complete = round(np.mean(self.simulation_percentage_complete), 3) + + mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores() + self.evaluation_state["state"] = "IN_PROGRESS" self.evaluation_state["progress"] = progress self.evaluation_state["simulation_count"] = self.simulation_count @@ -687,27 +687,8 @@ class FlatlandRemoteEvaluationService: to operate on all the test environments. """ ) - ################################################################################# - ################################################################################# - # Compute the mean rewards, mean normalized_reward and mean_percentage_complete - # we group all the results by the test_ids - # so we first compute the mean in each of the test_id groups, - # and then we compute the mean across each of the test_id groups - # - # NOTE : this df should not have NaN rows for any of the above - # metrics if all the evaluations are successfully completed - # - ################################################################################# - ################################################################################# - grouped_df = self.evaluation_metadata_df.groupby(['test_id']).mean() - mean_reward = grouped_df["reward"].mean() - mean_normalized_reward = grouped_df["normalized_reward"].mean() - mean_percentage_complete = grouped_df["percentage_complete"].mean() - # - mean_reward = round(mean_reward, 2) - mean_normalized_reward = round(mean_normalized_reward, 2) - mean_percentage_complete = round(mean_percentage_complete, 3) + mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores() if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0: # Generate the video @@ -771,9 +752,11 @@ class FlatlandRemoteEvaluationService: self.evaluation_state["state"] = "FINISHED" self.evaluation_state["progress"] = 1.0 self.evaluation_state["simulation_count"] = self.simulation_count - self.evaluation_state["score"]["score"] = mean_percentage_complete - self.evaluation_state["score"]["score_secondary"] = mean_reward + self.evaluation_state["score"]["score"] = mean_normalized_reward + self.evaluation_state["score"]["score_secondary"] = mean_percentage_complete self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward + self.evaluation_state["meta"]["reward"] = mean_reward + self.evaluation_state["meta"]["percentage_complete"] = mean_percentage_complete self.handle_aicrowd_success_event(self.evaluation_state) print("#" * 100) print("EVALUATION COMPLETE !!") @@ -784,6 +767,30 @@ class FlatlandRemoteEvaluationService: print("#" * 100) print("#" * 100) + def compute_mean_scores(self): + ################################################################################# + ################################################################################# + # Compute the mean rewards, mean normalized_reward and mean_percentage_complete + # we group all the results by the test_ids + # so we first compute the mean in each of the test_id groups, + # and then we compute the mean across each of the test_id groups + # + # + ################################################################################# + ################################################################################# + source_df = self.evaluation_metadata_df.dropna() + grouped_df = source_df.groupby(['test_id']).mean() + + mean_reward = grouped_df["reward"].mean() + mean_normalized_reward = grouped_df["normalized_reward"].mean() + mean_percentage_complete = grouped_df["percentage_complete"].mean() + # Round off the reward values + mean_reward = round(mean_reward, 2) + mean_normalized_reward = round(mean_normalized_reward, 5) + mean_percentage_complete = round(mean_percentage_complete, 3) + + return mean_reward, mean_normalized_reward, mean_percentage_complete + def report_error(self, error_message, command_response_channel): """ A helper function used to report error back to the client -- GitLab