From 7ac0b0356c5ac8825284eb967adc5b72cb5f7d9a Mon Sep 17 00:00:00 2001
From: flaurent <florian.laurent@gmail.com>
Date: Fri, 21 Aug 2020 08:19:07 +0200
Subject: [PATCH] Logging total number of malfunctions per episode

---
 flatland/cli.py                | 17 ++++++++--
 flatland/evaluators/service.py | 59 ++++++++++++++++++++++++++++------
 2 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/flatland/cli.py b/flatland/cli.py
index 9bb71074..51db57b3 100644
--- a/flatland/cli.py
+++ b/flatland/cli.py
@@ -61,13 +61,24 @@ def demo(args=None):
               help="Evaluation Service ID. This has to match the service id on the client.",
               required=False
               )
+@click.option('--shuffle',
+              type=bool,
+              default=True,
+              help="Shuffle the environments before starting evaluation.",
+              required=False
+              )
+@click.option('--disable_timeouts',
+              default=False,
+              help="Disable all evaluation timeouts.",
+              required=False
+              )
 @click.option('--results_path',
               type=click.Path(exists=False),
               default=None,
               help="Path where the evaluator should write the results metadata.",
               required=False
               )
-def evaluator(tests, service_id, results_path):
+def evaluator(tests, service_id, shuffle, disable_timeouts, results_path):
     try:
         redis_connection = redis.Redis()
         redis_connection.ping()
@@ -82,7 +93,9 @@ def evaluator(tests, service_id, results_path):
         flatland_rl_service_id=service_id,
         visualize=False,
         result_output_path=results_path,
-        verbose=False
+        verbose=False,
+        shuffle=shuffle,
+        disable_timeouts=disable_timeouts
     )
     grader.run()
 
diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index 2de35fd5..82d89ac3 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -9,6 +9,7 @@ import time
 import traceback
 import json
 import itertools
+import re
 
 import crowdai_api
 import msgpack
@@ -107,6 +108,7 @@ class FlatlandRemoteEvaluationService:
                  shuffle=True,
                  missing_only=False,
                  result_output_path=None,
+                 disable_timeouts=False
                  ):
 
         # Episode recording properties
@@ -121,6 +123,17 @@ class FlatlandRemoteEvaluationService:
             os.makedirs(self.mergeDir)
         self.use_pickle = use_pickle
         self.missing_only = missing_only
+        self.disable_timeouts = disable_timeouts
+
+        if self.disable_timeouts:
+            print("=" * 20)
+            print("Timeout are DISABLED!")
+            print("=" * 20)
+
+        if not shuffle:
+            print("=" * 20)
+            print("Env shuffling is DISABLED!")
+            print("=" * 20)
 
         # Test Env folder Paths
         self.test_env_folder = test_env_folder
@@ -202,6 +215,7 @@ class FlatlandRemoteEvaluationService:
         self.simulation_steps = []
         self.simulation_times = []
         self.env_step_times = []
+        self.nb_malfunctioning_trains = []
         self.begin_simulation = False
         self.current_step = 0
         self.visualize = visualize
@@ -279,12 +293,15 @@ class FlatlandRemoteEvaluationService:
                 ├── .......
                 └── Level_99.pkl
         """
-        env_paths = sorted(glob.glob(
-            os.path.join(
-                self.test_env_folder,
-                "*/*.pkl"
+        env_paths = sorted(
+            glob.glob(
+                os.path.join(
+                    self.test_env_folder,
+                    "*/*.pkl"
+                )
             )
-        ))
+        )
+
         # Remove the root folder name from the individual
         # lists, so that we only have the path relative
         # to the test root folder
@@ -292,13 +309,21 @@ class FlatlandRemoteEvaluationService:
             x, self.test_env_folder
         ) for x in env_paths])
 
+        # Sort in proper order
+        def get_file_order(f):
+            numbers = re.findall(r'\d+', os.path.relpath(f))
+            value = int(numbers[0]) * 1000 + int(numbers[1])
+            return value
+
+        env_paths.sort(key=get_file_order)
+
         # if requested, only generate actions for those envs which don't already have them
         if self.mergeDir and self.missing_only:
             existing_paths = (itertools.chain.from_iterable(
                 [glob.glob(os.path.join(self.mergeDir, f"envs/*.{ext}"))
                  for ext in ["pkl", "mpk"]]))
             existing_paths = [os.path.relpath(sPath, self.mergeDir) for sPath in existing_paths]
-            env_paths = sorted(set(env_paths) - set(existing_paths))
+            env_paths = set(env_paths) - set(existing_paths)
 
         return env_paths
 
@@ -329,6 +354,7 @@ class FlatlandRemoteEvaluationService:
             self.evaluation_metadata_df["percentage_complete"] = np.nan
             self.evaluation_metadata_df["steps"] = np.nan
             self.evaluation_metadata_df["simulation_time"] = np.nan
+            self.evaluation_metadata_df["nb_malfunctioning_trains"] = np.nan
 
             # Add client specific columns
             # TODO: This needs refactoring
@@ -357,6 +383,7 @@ class FlatlandRemoteEvaluationService:
             _row.percentage_complete = self.simulation_percentage_complete[-1]
             _row.steps = self.simulation_steps[-1]
             _row.simulation_time = self.simulation_times[-1]
+            _row.nb_malfunctioning_trains = self.nb_malfunctioning_trains[-1]
 
             # TODO: This needs refactoring
             # Add controller_inference_time_metrics
@@ -462,6 +489,9 @@ class FlatlandRemoteEvaluationService:
             """
             COMMAND_TIMEOUT = 10 ** 6
 
+        if self.disable_timeouts:
+            COMMAND_TIMEOUT = None
+
         @timeout_decorator.timeout(COMMAND_TIMEOUT, use_signals=use_signals_in_timeout)  # timeout for each command
         def _get_next_command(command_channel, _redis):
             """
@@ -605,6 +635,7 @@ class FlatlandRemoteEvaluationService:
             self.simulation_rewards_normalized.append(0)
             self.simulation_percentage_complete.append(0)
             self.simulation_steps.append(0)
+            self.nb_malfunctioning_trains.append(0)
 
             self.current_step = 0
 
@@ -710,6 +741,12 @@ class FlatlandRemoteEvaluationService:
                 self.env.get_num_agents()
             )
 
+        num_malfunctioning = sum(agent.malfunction_data['malfunction'] > 0 for agent in self.env.agents)
+        if (num_malfunctioning > 0):
+            print(num_malfunctioning, "agent malfunctioning at step", self.current_step)
+
+        self.nb_malfunctioning_trains[-1] += num_malfunctioning
+
         # record the actions before checking for done
         if self.actionDir is not None:
             self.lActions.append(action)
@@ -732,11 +769,12 @@ class FlatlandRemoteEvaluationService:
             percentage_complete = complete * 1.0 / self.env.get_num_agents()
             self.simulation_percentage_complete[-1] = percentage_complete
 
-            print("Evaluation finished in {} timesteps, {:.3f} seconds. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format(
+            print("Evaluation finished in {} timesteps, {:.3f} seconds. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}.".format(
                 self.simulation_steps[-1],
                 self.simulation_times[-1],
                 self.simulation_percentage_complete[-1],
-                self.simulation_rewards_normalized[-1]
+                self.simulation_rewards_normalized[-1],
+                self.nb_malfunctioning_trains[-1]
             ))
 
             # Write intermediate results
@@ -1010,10 +1048,11 @@ class FlatlandRemoteEvaluationService:
                 self.simulation_rewards[-1] = self.env._max_episode_steps * self.env.get_num_agents()
                 self.simulation_rewards_normalized[-1] = -1.0
 
-                print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format(
+                print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}".format(
                     self.simulation_steps[-1],
                     self.simulation_percentage_complete[-1],
-                    self.simulation_rewards_normalized[-1]
+                    self.simulation_rewards_normalized[-1],
+                    self.nb_malfunctioning_trains[-1],
                 ))
 
                 self.timeout_counter += 1
-- 
GitLab