From f73a6411bd31754e900cbef3658b4bba30b77308 Mon Sep 17 00:00:00 2001 From: flaurent <florian.laurent@gmail.com> Date: Mon, 7 Sep 2020 02:08:37 +0200 Subject: [PATCH] Correctly handling case when mean percentage done during a test gets too low --- flatland/evaluators/service.py | 56 +++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py index 0277652b..8b15b58c 100644 --- a/flatland/evaluators/service.py +++ b/flatland/evaluators/service.py @@ -57,11 +57,16 @@ TEST_MIN_PERCENTAGE_COMPLETE_MEAN = 0.25 # this probably means the submission has crashed MAX_SUCCESSIVE_TIMEOUTS = 10 -# 8 hours +debug_mode = (os.getenv("AICROWD_DEBUG_SUBMISSION", 0) == 1) +if debug_mode: + print("=" * 20) + print("Submission in DEBUG MODE! will get limited time") + print("=" * 20) + +# 8 hours (will get debug timeout from env variable if applicable) OVERALL_TIMEOUT = int(os.getenv( "FLATLAND_OVERALL_TIMEOUT", - # 8 * 60 * 60)) - 15)) + 8 * 60 * 60)) # 10 mins INTIAL_PLANNING_TIMEOUT = int(os.getenv( @@ -235,7 +240,7 @@ class FlatlandRemoteEvaluationService: self.env_step_times = [] self.nb_malfunctioning_trains = [] self.overall_start_time = 0 - self.overall_timeout_reached = False + self.evaluation_done = False self.begin_simulation = False self.current_step = 0 self.current_test = -1 @@ -619,7 +624,7 @@ class FlatlandRemoteEvaluationService: """ # Check if the previous episode was finished - if not self.simulation_done and not self.overall_timeout_reached: + if not self.simulation_done and not self.evaluation_done: _command_response = self._error_template("CAN'T CREATE NEW ENV BEFORE PREVIOUS IS DONE") self.send_response(_command_response, command) raise Exception(_command_response['payload']) @@ -635,34 +640,35 @@ class FlatlandRemoteEvaluationService: # reset the timeout flag / state. self.state_env_timed_out = False - if self.simulation_count < len(self.env_file_paths) and not self.overall_timeout_reached: + test_env_file_path = self.env_file_paths[self.simulation_count] + env_test, env_level = self.get_env_test_and_level(test_env_file_path) + + # Did we just finish a test, and if yes did it reach high enough mean percentage done? + if self.current_test != env_test and env_test != 0: + if self.current_test not in self.simulation_percentage_complete_per_test: + raise Exception("Missing percentages for previous test: test {}".format(self.current_test)) + + mean_test_complete_percentage = np.mean(self.simulation_percentage_complete_per_test[self.current_test]) + if mean_test_complete_percentage < TEST_MIN_PERCENTAGE_COMPLETE_MEAN: + print("=" * 15) + print("Mean percentage done too low: {} < {}. Evaluation will stop here.".format( + mean_test_complete_percentage, + TEST_MIN_PERCENTAGE_COMPLETE_MEAN + )) + self.evaluation_done = True + + if self.simulation_count < len(self.env_file_paths) and not self.evaluation_done: """ There are still test envs left that are yet to be evaluated """ - test_env_file_path = self.env_file_paths[self.simulation_count] print("Evaluating {} ({}/{})".format(test_env_file_path, self.simulation_count, len(self.env_file_paths))) - env_test, env_level = self.get_env_test_and_level(test_env_file_path) test_env_file_path = os.path.join( self.test_env_folder, test_env_file_path ) - if self.current_test != env_test and env_test != 0: - if self.current_test not in self.simulation_percentage_complete_per_test: - raise Exception("Missing percentages for previous test: test {}".format(self.current_test)) - - # Check if episodes from the previous test had good enough results - mean_test_complete_percentage = np.mean(self.simulation_percentage_complete_per_test[self.current_test]) - if mean_test_complete_percentage >= TEST_MIN_PERCENTAGE_COMPLETE_MEAN: - print("Starting new test: test {} to test {}".format(self.current_test, env_test)) - else: - _command_response = self._error_template( - "COMPLETE PERCENTAGE TOO LOW: {} < {}".format(mean_test_complete_percentage, TEST_MIN_PERCENTAGE_COMPLETE_MEAN)) - self.send_response(_command_response, command) - raise Exception(_command_response['payload']) - self.current_test = env_test self.current_level = env_level @@ -774,7 +780,7 @@ class FlatlandRemoteEvaluationService: # _command_response = self._error_template(msg) # self.send_response(_command_response, command) # raise Exception(_command_response['payload']) - self.overall_timeout_reached = True + self.evaluation_done = True print("=" * 15) print(msg) @@ -965,7 +971,7 @@ class FlatlandRemoteEvaluationService: # Compute the evaluation metadata for the last episode self.update_evaluation_metadata() - if len(self.simulation_rewards) != len(self.env_file_paths) and not self.overall_timeout_reached: + if len(self.simulation_rewards) != len(self.env_file_paths) and not self.evaluation_done: raise Exception( """env.submit called before the agent had the chance to operate on all the test environments. @@ -1063,7 +1069,7 @@ class FlatlandRemoteEvaluationService: ################################################################################# ################################################################################# source_df = self.evaluation_metadata_df.dropna() - #grouped_df = source_df.groupby(['test_id']).mean() + # grouped_df = source_df.groupby(['test_id']).mean() mean_reward = source_df["reward"].mean() mean_normalized_reward = source_df["normalized_reward"].mean() -- GitLab