From badc19ebffc6c90ab815d9fbe2cac1c07252efb9 Mon Sep 17 00:00:00 2001
From: flaurent <florian.laurent@gmail.com>
Date: Wed, 9 Sep 2020 03:39:18 +0200
Subject: [PATCH] Handling timeouts correctly for Round 2

---
 flatland/evaluators/service.py | 40 +++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index a18e4363..06e2a75e 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -648,16 +648,18 @@ class FlatlandRemoteEvaluationService:
         # Did we just finish a test, and if yes did it reach high enough mean percentage done?
         if self.current_test != env_test and env_test != 0:
             if self.current_test not in self.simulation_percentage_complete_per_test:
-                raise Exception("Missing percentages for previous test: test {}".format(self.current_test))
+                print("No environment was finished at all during test {}!".format(self.current_test))
+                mean_test_complete_percentage = 0.0
+            else:
+                mean_test_complete_percentage = np.mean(self.simulation_percentage_complete_per_test[self.current_test])
 
-            mean_test_complete_percentage = np.mean(self.simulation_percentage_complete_per_test[self.current_test])
             if mean_test_complete_percentage < TEST_MIN_PERCENTAGE_COMPLETE_MEAN:
                 print("=" * 15)
                 msg = "The mean percentage of done agents during the last 10 environments was too low: {:.3f} < {}".format(
                     mean_test_complete_percentage,
                     TEST_MIN_PERCENTAGE_COMPLETE_MEAN
                 )
-                print(msg)
+                print(msg, "Evaluation will stop.")
                 self.termination_cause = msg
                 self.evaluation_done = True
 
@@ -699,6 +701,7 @@ class FlatlandRemoteEvaluationService:
             self.simulation_rewards.append(0)
             self.simulation_rewards_normalized.append(0)
             self.simulation_percentage_complete.append(0)
+            self.simulation_times.append(0)
             self.simulation_steps.append(0)
             self.nb_malfunctioning_trains.append(0)
 
@@ -765,8 +768,8 @@ class FlatlandRemoteEvaluationService:
         TODO: Add a high level summary of everything thats happening here.
         """
 
-        if self.state_env_timed_out:
-            print("Ignoring step command after timeout")
+        if self.state_env_timed_out or self.evaluation_done:
+            print("Ignoring step command after timeout.")
             return
 
         _payload = command['payload']
@@ -787,9 +790,7 @@ class FlatlandRemoteEvaluationService:
             self.evaluation_done = True
 
             print("=" * 15)
-            print(msg)
-            print("Skipping these rewards...")
-            print("=" * 15)
+            print(msg, "Evaluation will stop.")
             return
         # else:
         #     print("="*15)
@@ -845,7 +846,7 @@ class FlatlandRemoteEvaluationService:
             if self.begin_simulation:
                 # If begin simulation has already been initialized at least once
                 # This adds the simulation time for the previous episode
-                self.simulation_times.append(time.time() - self.begin_simulation)
+                self.simulation_times[-1] = time.time() - self.begin_simulation
 
             # Compute percentage complete
             complete = 0
@@ -1056,6 +1057,7 @@ class FlatlandRemoteEvaluationService:
         self.evaluation_state["meta"]["percentage_complete"] = mean_percentage_complete
         self.evaluation_state["meta"]["termination_cause"] = self.termination_cause
         self.handle_aicrowd_success_event(self.evaluation_state)
+
         print("#" * 100)
         print("EVALUATION COMPLETE !!")
         print("#" * 100)
@@ -1066,6 +1068,8 @@ class FlatlandRemoteEvaluationService:
         print("#" * 100)
         print("#" * 100)
 
+        return _command_response
+
     def compute_mean_scores(self):
         #################################################################################
         #################################################################################
@@ -1134,8 +1138,8 @@ class FlatlandRemoteEvaluationService:
         """
         print("Listening at : ", self.command_channel)
         MESSAGE_QUEUE_LATENCY = []
-        while True:
 
+        while True:
             try:
                 command = self.get_next_command()
             except timeout_decorator.timeout_decorator.TimeoutError:
@@ -1150,11 +1154,11 @@ class FlatlandRemoteEvaluationService:
 
                 self.simulation_steps[-1] += 1
                 self.simulation_rewards[-1] = self.env._max_episode_steps * self.env.get_num_agents()
-                self.simulation_rewards_normalized[-1] = -1.0
+                self.simulation_rewards_normalized[-1] = 0.0
 
                 print(
-                    "Evaluation TIMED OUT after {} timesteps (exceeded {}), using max penalty. {} consecutive timeouts."
-                    "Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}".format(
+                    "Evaluation of this episode TIMED OUT after {} timesteps (exceeded {}), won't get any reward. {} consecutive timeouts. "
+                    "Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}.".format(
                         self.simulation_steps[-1],
                         timeout_details,
                         self.timeout_counter,
@@ -1167,10 +1171,12 @@ class FlatlandRemoteEvaluationService:
                 self.state_env_timed_out = True
                 self.simulation_done = True
 
-                print("Consecutive timeouts: {}".format(self.timeout_counter))
-                if self.timeout_counter > MAX_SUCCESSIVE_TIMEOUTS:
-                    raise Exception("{} consecutive timeouts, aborting.".format(self.timeout_counter))
-
+                if self.timeout_counter >= MAX_SUCCESSIVE_TIMEOUTS:
+                    print("=" * 15)
+                    msg = "Submissions had {} consecutive timeouts.".format(self.timeout_counter)
+                    print(msg, "Evaluation will stop.")
+                    self.termination_cause = msg
+                    self.evaluation_done = True
                 continue
 
             self.timeout_counter = 0
-- 
GitLab