diff --git a/flatland/cli.py b/flatland/cli.py
index 9bb7107476a0981fda13b05502949549ce8d4d31..51db57b3bc5c7e2889ac57df84c95a15b59685f7 100644
--- a/flatland/cli.py
+++ b/flatland/cli.py
@@ -61,13 +61,24 @@ def demo(args=None):
               help="Evaluation Service ID. This has to match the service id on the client.",
               required=False
               )
+@click.option('--shuffle',
+              type=bool,
+              default=True,
+              help="Shuffle the environments before starting evaluation.",
+              required=False
+              )
+@click.option('--disable_timeouts',
+              default=False,
+              help="Disable all evaluation timeouts.",
+              required=False
+              )
 @click.option('--results_path',
               type=click.Path(exists=False),
               default=None,
               help="Path where the evaluator should write the results metadata.",
               required=False
               )
-def evaluator(tests, service_id, results_path):
+def evaluator(tests, service_id, shuffle, disable_timeouts, results_path):
     try:
         redis_connection = redis.Redis()
         redis_connection.ping()
@@ -82,7 +93,9 @@ def evaluator(tests, service_id, results_path):
         flatland_rl_service_id=service_id,
         visualize=False,
         result_output_path=results_path,
-        verbose=False
+        verbose=False,
+        shuffle=shuffle,
+        disable_timeouts=disable_timeouts
     )
     grader.run()
 
diff --git a/flatland/envs/agent_chains.py b/flatland/envs/agent_chains.py
index 02734f463fd6566767226391e48772a30543bac4..a2792479c0877aef2b41a8b62af7d55f5ab46bb0 100644
--- a/flatland/envs/agent_chains.py
+++ b/flatland/envs/agent_chains.py
@@ -32,12 +32,12 @@ class MotionCheck(object):
         if xlabel:
             self.G.nodes[rc1]["xlabel"] = xlabel
         self.G.add_edge(rc1, rc2)
-    
+
     def find_stops(self):
         """ find all the stopped agents as a set of rc position nodes
             A stopped agent is a self-loop on a cell node.
         """
-        
+
         # get the (sparse) adjacency matrix
         spAdj = nx.linalg.adjacency_matrix(self.G)
 
@@ -45,7 +45,7 @@ class MotionCheck(object):
         # the where turns this into a list of indices of the 1s
         giStops = np.where(spAdj.diagonal())[0]
 
-        # convert the cell/node indices into the node rc values 
+        # convert the cell/node indices into the node rc values
         lvAll = list(self.G.nodes())
         # pick out the stops by their indices
         lvStops = [ lvAll[i] for i in giStops ]
@@ -78,7 +78,7 @@ class MotionCheck(object):
             # Find all the stops in this chain
             svCompStops = svStops.intersection(Gwcc)
             #print(svCompStops)
-            
+
             if len(svCompStops) > 0:
 
                 # We need to traverse it in reverse - back up the movement edges
@@ -90,7 +90,7 @@ class MotionCheck(object):
                     iter_stops = nx.algorithms.traversal.dfs_postorder_nodes(Gwcc_rev, vStop)
                     lStops = list(iter_stops)
                     svBlocked.update(lStops)
-        
+
         return svBlocked
 
     def find_swaps(self):
@@ -102,7 +102,7 @@ class MotionCheck(object):
         llvSwaps = [lvLoop for lvLoop in llvLoops if len(lvLoop) == 2 ]
         svSwaps = { v for lvSwap in llvSwaps for v in lvSwap }
         return svSwaps
-    
+
     def find_same_dest(self):
         """ find groups of agents which are trying to land on the same cell.
             ie there is a gap of one cell between them and they are both landing on it.
@@ -123,22 +123,22 @@ class MotionCheck(object):
             elif v in svBlocked:
                 self.G.nodes[v]["color"] = "red"
             elif len(dPred)>1:
-                
+
                 if self.G.nodes[v].get("color") == "red":
                     continue
-                
+
                 if self.G.nodes[v].get("agent") is None:
-                    self.G.nodes[v]["color"] = "blue"    
+                    self.G.nodes[v]["color"] = "blue"
                 else:
                     self.G.nodes[v]["color"] = "magenta"
-                
+
                 # predecessors of a contended cell
                 diAgCell = {self.G.nodes[vPred].get("agent"): vPred  for vPred in dPred}
-                
+
                 # remove the agent with the lowest index, who wins
                 iAgWinner = min(diAgCell)
                 diAgCell.pop(iAgWinner)
-                
+
                 # Block all the remaining predessors, and their tree of preds
                 for iAg, v in diAgCell.items():
                     self.G.nodes[v]["color"] = "red"
@@ -163,7 +163,7 @@ class MotionCheck(object):
             sColor = dAttr["color"]
             if sColor in [ "red", "purple" ]:
                 return (False, rcPos)
-        
+
         dSucc = self.G.succ[rcPos]
 
         # This should never happen - only the next cell of an agent has no successor
@@ -179,7 +179,7 @@ class MotionCheck(object):
         return (True, rcNext)
 
 
-            
+
 
 def render(omc:MotionCheck, horizontal=True):
     try:
@@ -210,10 +210,10 @@ class ChainTestEnv(object):
 
     def addAgentToRow(self, c1, c2, xlabel=None):
         self.addAgent((self.iRowNext, c1), (self.iRowNext, c2), xlabel=xlabel)
-        
 
-    def create_test_chain(self, 
-            nAgents:int, 
+
+    def create_test_chain(self,
+            nAgents:int,
             rcVel:Tuple[int] = (0,1),
             liStopped:List[int]=[],
             xlabel=None):
@@ -227,7 +227,7 @@ class ChainTestEnv(object):
             else:
                 rcVel1 = rcVel
             self.omc.addAgent(iAg+self.iAgNext, rcPos, (rcPos[0] + rcVel1[0], rcPos[1] + rcVel1[1]) )
-        
+
         if xlabel:
             self.omc.G.nodes[lrcAgPos[0]]["xlabel"] = xlabel
 
@@ -237,7 +237,7 @@ class ChainTestEnv(object):
     def nextRow(self):
         self.iRowNext+=1
 
-    
+
 
 def create_test_agents(omc:MotionCheck):
 
@@ -326,7 +326,7 @@ def create_test_agents2(omc:MotionCheck):
     cte.addAgentToRow(3, 2)
     cte.addAgent((cte.iRowNext+1, 2), (cte.iRowNext, 2))
     cte.nextRow()
-    
+
     if False:
         cte.nextRow()
         cte.nextRow()
@@ -342,7 +342,7 @@ def create_test_agents2(omc:MotionCheck):
     cte.addAgentToRow(3, 4)
     cte.addAgent((cte.iRowNext+1, 3), (cte.iRowNext, 3))
     cte.nextRow()
-    
+
 
     cte.nextRow()
     cte.addAgentToRow(1, 2, "Tree")
@@ -370,8 +370,8 @@ def test_agent_following():
 
     lvCells = omc.G.nodes()
 
-    lColours = [ "magenta" if v in svStops 
-            else "red" if v in svBlocked 
+    lColours = [ "magenta" if v in svStops
+            else "red" if v in svBlocked
             else "purple" if v in svSwaps
             else "lightblue"
             for v in lvCells ]
@@ -388,4 +388,3 @@ def main():
 
 if __name__=="__main__":
     main()
-    
\ No newline at end of file
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index b258e1d2f5bf72d33ad7470ca5a28f58260ec79d..d9ff7120789d5ad58ba4acc15d16586f0e34b827 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -154,11 +154,11 @@ class RailEnv(Environment):
     def __init__(self,
                  width,
                  height,
-                 rail_generator = None, 
-                 schedule_generator = None, # : sched_gen.ScheduleGenerator = sched_gen.random_schedule_generator(),
+                 rail_generator=None,
+                 schedule_generator=None,  # : sched_gen.ScheduleGenerator = sched_gen.random_schedule_generator(),
                  number_of_agents=1,
                  obs_builder_object: ObservationBuilder = GlobalObsForRailEnv(),
-                 malfunction_generator_and_process_data=None, #mal_gen.no_malfunction_generator(),
+                 malfunction_generator_and_process_data=None,  # mal_gen.no_malfunction_generator(),
                  malfunction_generator=None,
                  remove_agents_at_target=True,
                  random_seed=1,
@@ -208,18 +208,18 @@ class RailEnv(Environment):
         elif malfunction_generator is not None:
             self.malfunction_generator = malfunction_generator
             # malfunction_process_data is not used
-            #self.malfunction_generator, self.malfunction_process_data = malfunction_generator_and_process_data
+            # self.malfunction_generator, self.malfunction_process_data = malfunction_generator_and_process_data
             self.malfunction_process_data = self.malfunction_generator.get_process_data()
         # replace default values here because we can't use default args values because of cyclic imports
         else:
             self.malfunction_generator = mal_gen.NoMalfunctionGen()
             self.malfunction_process_data = self.malfunction_generator.get_process_data()
 
-        #self.rail_generator: RailGenerator = rail_generator
+        # self.rail_generator: RailGenerator = rail_generator
         if rail_generator is None:
             rail_generator = rail_gen.random_rail_generator()
         self.rail_generator = rail_generator
-        #self.schedule_generator: ScheduleGenerator = schedule_generator
+        # self.schedule_generator: ScheduleGenerator = schedule_generator
         if schedule_generator is None:
             schedule_generator = sched_gen.random_schedule_generator()
         self.schedule_generator = schedule_generator
@@ -266,13 +266,12 @@ class RailEnv(Environment):
         # save episode timesteps ie agent positions, orientations.  (not yet actions / observations)
         self.record_steps = record_steps  # whether to save timesteps
         # save timesteps in here: [[[row, col, dir, malfunction],...nAgents], ...nSteps]
-        self.cur_episode = []  
-        self.list_actions = [] # save actions in here
+        self.cur_episode = []
+        self.list_actions = []  # save actions in here
 
         self.close_following = close_following  # use close following logic
         self.motionCheck = ac.MotionCheck()
 
-
     def _seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)
         random.seed(seed)
@@ -304,8 +303,6 @@ class RailEnv(Environment):
             agent.reset()
         self.active_agents = [i for i in range(len(self.agents))]
 
-
-
     def action_required(self, agent):
         """
         Check if an agent needs to provide an action
@@ -365,7 +362,6 @@ class RailEnv(Environment):
             else:
                 raise ValueError("Could not invoke __call__ or generate on rail_generator")
 
-
             self.rail = rail
             self.height, self.width = self.rail.grid.shape
 
@@ -377,8 +373,6 @@ class RailEnv(Environment):
         if optionals and 'distance_map' in optionals:
             self.distance_map.set(optionals['distance_map'])
 
-
-
         if regenerate_schedule or regenerate_rail or self.get_num_agents() == 0:
             agents_hints = None
             if optionals and 'agents_hints' in optionals:
@@ -475,7 +469,6 @@ class RailEnv(Environment):
 
         """
 
-        #malfunction: Malfunction = self.malfunction_generator(agent, self.np_random)
         if "generate" in dir(self.malfunction_generator):
             malfunction: mal_gen.Malfunction = self.malfunction_generator.generate(agent, self.np_random)
         else:
@@ -564,15 +557,13 @@ class RailEnv(Environment):
                 # Perform step on the agent
                 self._step_agent_cf(i_agent, action_dict_.get(i_agent))
 
-        
             # second loop: check for collisions / conflicts
             self.motionCheck.find_conflicts()
 
-
             # third loop: update positions
             for i_agent, agent in enumerate(self.agents):
                 self._step_agent2_cf(i_agent)
-                
+
                 # manage the boolean flag to check if all agents are indeed done (or done_removed)
                 have_all_agents_ended &= (agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED])
 
@@ -584,9 +575,6 @@ class RailEnv(Environment):
 
                 # Fix agents that finished their malfunction such that they can perform an action in the next step
                 self._fix_agent_after_malfunction(agent)
-        
-        
-
 
         # Check for end of episode + set global reward to all rewards!
         if have_all_agents_ended:
@@ -601,8 +589,6 @@ class RailEnv(Environment):
 
         return self._get_observations(), self.rewards_dict, self.dones, info_dict
 
-
-
     def _step_agent(self, i_agent, action: Optional[RailEnvActions] = None):
         """
         Performs a step and step, start and stop penalty on a single agent in the following sub steps:
@@ -714,7 +700,7 @@ class RailEnv(Environment):
                 # Perform stored action to transition to the next cell as soon as cell is free
                 # Notice that we've already checked new_cell_valid and transition valid when we stored the action,
                 # so we only have to check cell_free now!
-                
+
                 # Traditional check that next cell is free
                 # cell and transition validity was checked when we stored transition_action_on_cellexit!
                 cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent(
@@ -727,7 +713,6 @@ class RailEnv(Environment):
                     self._move_agent_to_new_position(agent, new_position)
                     agent.direction = new_direction
                     agent.speed_data['position_fraction'] = 0.0
-                
 
             # has the agent reached its target?
             if np.equal(agent.position, agent.target).all():
@@ -758,7 +743,6 @@ class RailEnv(Environment):
                 self.motionCheck.addAgent(i_agent, None, None)
             return
 
-
         agent.old_direction = agent.direction
         agent.old_position = agent.position
 
@@ -767,7 +751,7 @@ class RailEnv(Environment):
         if agent.malfunction_data['malfunction'] > 0:
             self.motionCheck.addAgent(i_agent, agent.position, agent.position)
             # agent will get penalty in step_agent2_cf
-            #self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+            # self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
             return
 
         # Is the agent at the beginning of the cell? Then, it can take an action.
@@ -827,14 +811,14 @@ class RailEnv(Environment):
                     self.rewards_dict[i_agent] += self.invalid_action_penalty
                     self.rewards_dict[i_agent] += self.stop_penalty
                     agent.moving = False
-                    self.motionCheck.addAgent(i_agent, agent.position, agent.position)    
+                    self.motionCheck.addAgent(i_agent, agent.position, agent.position)
                     return
 
             if new_position is None:
                 self.motionCheck.addAgent(i_agent, agent.position, agent.position)
                 if agent.moving:
                     print("Agent", i_agent, "new_pos none, but moving")
-            
+
         # Check the pos_frac position fraction
         if agent.moving:
             agent.speed_data['position_fraction'] += agent.speed_data['speed']
@@ -853,27 +837,25 @@ class RailEnv(Environment):
             else:  # This agent hasn't yet crossed the cell
                 self.motionCheck.addAgent(i_agent, agent.position, agent.position)
 
-
-
     def _step_agent2_cf(self, i_agent):
         agent = self.agents[i_agent]
 
-        if agent.status in [ RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED ]:
+        if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]:
             return
 
         (move, rc_next) = self.motionCheck.check_motion(i_agent, agent.position)
 
         if agent.position is not None:
             sbTrans = format(self.rail.grid[agent.position], "016b")
-            trans_block = sbTrans[agent.direction*4 : agent.direction * 4 + 4]
+            trans_block = sbTrans[agent.direction * 4: agent.direction * 4 + 4]
             if (trans_block == "0000"):
                 print (i_agent, agent.position, agent.direction, sbTrans, trans_block)
 
         # if agent cannot enter env, then we should have move=False
-        
+
         if move:
             if agent.position is None:  # agent is entering the env
-                #print(i_agent, "writing new pos ", rc_next, " into agent position (None)")
+                # print(i_agent, "writing new pos ", rc_next, " into agent position (None)")
                 agent.position = rc_next
                 agent.status = RailAgentStatus.ACTIVE
                 agent.speed_data['position_fraction'] = 0.0
@@ -881,7 +863,7 @@ class RailEnv(Environment):
             else:  # normal agent move
                 cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent(
                     agent.speed_data['transition_action_on_cellexit'], agent)
-        
+
                 if not all([transition_valid, new_cell_valid]):
                     print(f"ERRROR: step_agent2 invalid transition ag {i_agent} dir {new_direction} pos {agent.position} next {rc_next}")
 
@@ -891,14 +873,14 @@ class RailEnv(Environment):
                           f"stored action: {agent.speed_data['transition_action_on_cellexit']}")
 
                 sbTrans = format(self.rail.grid[agent.position], "016b")
-                trans_block = sbTrans[agent.direction*4 : agent.direction * 4 + 4]
+                trans_block = sbTrans[agent.direction * 4: agent.direction * 4 + 4]
                 if (trans_block == "0000"):
                     print ("ERROR: ", i_agent, agent.position, agent.direction, sbTrans, trans_block)
 
                 agent.position = rc_next
                 agent.direction = new_direction
-                agent.speed_data['position_fraction'] = 0.0                        
-          
+                agent.speed_data['position_fraction'] = 0.0
+
             # has the agent reached its target?
             if np.equal(agent.position, agent.target).all():
                 agent.status = RailAgentStatus.DONE
@@ -912,10 +894,6 @@ class RailEnv(Environment):
             # step penalty if not moving (stopped now or before)
             self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
 
-
-
-
-
     def _set_agent_to_initial_position(self, agent: EnvAgent, new_position: IntVector2D):
         """
         Sets the agent to its initial position. Updates the agent object and the position
@@ -1091,7 +1069,7 @@ class RailEnv(Environment):
         ------
         Dict object
         """
-        #print(f"_get_obs - num agents: {self.get_num_agents()} {list(range(self.get_num_agents()))}")
+        # print(f"_get_obs - num agents: {self.get_num_agents()} {list(range(self.get_num_agents()))}")
         self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents())))
         return self.obs_dict
 
@@ -1110,8 +1088,6 @@ class RailEnv(Environment):
         """
         return Grid4Transitions.get_entry_directions(self.rail.get_full_transitions(row, col))
 
-
-
     def _exp_distirbution_synced(self, rate: float) -> float:
         """
         Generates sample from exponential distribution
@@ -1140,5 +1116,3 @@ class RailEnv(Environment):
     def save(self, filename):
         print("deprecated call to env.save() - pls call RailEnvPersister.save()")
         persistence.RailEnvPersister.save(self, filename)
-
-
diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index dff9fe6448e3799c79739405f57325163a8e271f..7eef7f0667335cddd233a17327ea06960e8cfc97 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -9,6 +9,7 @@ import time
 import traceback
 import json
 import itertools
+import re
 
 import crowdai_api
 import msgpack
@@ -107,6 +108,7 @@ class FlatlandRemoteEvaluationService:
                  shuffle=True,
                  missing_only=False,
                  result_output_path=None,
+                 disable_timeouts=False
                  ):
 
         # Episode recording properties
@@ -121,6 +123,17 @@ class FlatlandRemoteEvaluationService:
             os.makedirs(self.mergeDir)
         self.use_pickle = use_pickle
         self.missing_only = missing_only
+        self.disable_timeouts = disable_timeouts
+
+        if self.disable_timeouts:
+            print("=" * 20)
+            print("Timeout are DISABLED!")
+            print("=" * 20)
+
+        if not shuffle:
+            print("=" * 20)
+            print("Env shuffling is DISABLED!")
+            print("=" * 20)
 
         # Test Env folder Paths
         self.test_env_folder = test_env_folder
@@ -202,6 +215,7 @@ class FlatlandRemoteEvaluationService:
         self.simulation_steps = []
         self.simulation_times = []
         self.env_step_times = []
+        self.nb_malfunctioning_trains = []
         self.begin_simulation = False
         self.current_step = 0
         self.visualize = visualize
@@ -279,12 +293,15 @@ class FlatlandRemoteEvaluationService:
                 ├── .......
                 └── Level_99.pkl
         """
-        env_paths = sorted(glob.glob(
-            os.path.join(
-                self.test_env_folder,
-                "*/*.pkl"
+        env_paths = sorted(
+            glob.glob(
+                os.path.join(
+                    self.test_env_folder,
+                    "*/*.pkl"
+                )
             )
-        ))
+        )
+
         # Remove the root folder name from the individual
         # lists, so that we only have the path relative
         # to the test root folder
@@ -292,13 +309,21 @@ class FlatlandRemoteEvaluationService:
             x, self.test_env_folder
         ) for x in env_paths])
 
+        # Sort in proper order
+        def get_file_order(f):
+            numbers = re.findall(r'\d+', os.path.relpath(f))
+            value = int(numbers[0]) * 1000 + int(numbers[1])
+            return value
+
+        env_paths.sort(key=get_file_order)
+
         # if requested, only generate actions for those envs which don't already have them
         if self.mergeDir and self.missing_only:
             existing_paths = (itertools.chain.from_iterable(
                 [glob.glob(os.path.join(self.mergeDir, f"envs/*.{ext}"))
                  for ext in ["pkl", "mpk"]]))
             existing_paths = [os.path.relpath(sPath, self.mergeDir) for sPath in existing_paths]
-            env_paths = sorted(set(env_paths) - set(existing_paths))
+            env_paths = set(env_paths) - set(existing_paths)
 
         return env_paths
 
@@ -308,7 +333,7 @@ class FlatlandRemoteEvaluationService:
             information specific to each of the individual env evaluations.
 
             This loads the template CSV with pre-filled information from the
-            provided metadata.csv file, and fills it up with 
+            provided metadata.csv file, and fills it up with
             evaluation runtime information.
         """
         self.evaluation_metadata_df = None
@@ -329,6 +354,7 @@ class FlatlandRemoteEvaluationService:
             self.evaluation_metadata_df["percentage_complete"] = np.nan
             self.evaluation_metadata_df["steps"] = np.nan
             self.evaluation_metadata_df["simulation_time"] = np.nan
+            self.evaluation_metadata_df["nb_malfunctioning_trains"] = np.nan
 
             # Add client specific columns
             # TODO: This needs refactoring
@@ -341,7 +367,7 @@ class FlatlandRemoteEvaluationService:
     def update_evaluation_metadata(self):
         """
         This function is called when we move from one simulation to another
-        and it simply tries to update the simulation specific information 
+        and it simply tries to update the simulation specific information
         for the **previous** episode in the metadata_df if it exists.
         """
 
@@ -357,6 +383,7 @@ class FlatlandRemoteEvaluationService:
             _row.percentage_complete = self.simulation_percentage_complete[-1]
             _row.steps = self.simulation_steps[-1]
             _row.simulation_time = self.simulation_times[-1]
+            _row.nb_malfunctioning_trains = self.nb_malfunctioning_trains[-1]
 
             # TODO: This needs refactoring
             # Add controller_inference_time_metrics
@@ -380,7 +407,7 @@ class FlatlandRemoteEvaluationService:
                 last_simulation_env_file_path
             ] = _row
 
-            # Delete this key from the stats to ensure that it 
+            # Delete this key from the stats to ensure that it
             # gets computed again from scratch in the next episode
             self.delete_key_in_running_stats(
                 "current_episode_controller_inference_time")
@@ -462,6 +489,9 @@ class FlatlandRemoteEvaluationService:
             """
             COMMAND_TIMEOUT = 10 ** 6
 
+        if self.disable_timeouts:
+            COMMAND_TIMEOUT = None
+
         @timeout_decorator.timeout(COMMAND_TIMEOUT, use_signals=use_signals_in_timeout)  # timeout for each command
         def _get_next_command(command_channel, _redis):
             """
@@ -576,7 +606,7 @@ class FlatlandRemoteEvaluationService:
             There are still test envs left that are yet to be evaluated 
             """
             test_env_file_path = self.env_file_paths[self.simulation_count]
-            print("Evaluating : {}".format(test_env_file_path))
+            print("Evaluating {} ({}/{})".format(test_env_file_path, self.simulation_count, len(self.env_file_paths)))
             test_env_file_path = os.path.join(
                 self.test_env_folder,
                 test_env_file_path
@@ -589,11 +619,6 @@ class FlatlandRemoteEvaluationService:
                                obs_builder_object=DummyObservationBuilder(),
                                record_steps=True)
 
-            if self.begin_simulation:
-                # If begin simulation has already been initialized
-                # atleast once
-                # This adds the simulation time for the previous episode
-                self.simulation_times.append(time.time() - self.begin_simulation)
             self.begin_simulation = time.time()
 
             # Update evaluation metadata for the previous episode
@@ -610,6 +635,7 @@ class FlatlandRemoteEvaluationService:
             self.simulation_rewards_normalized.append(0)
             self.simulation_percentage_complete.append(0)
             self.simulation_steps.append(0)
+            self.nb_malfunctioning_trains.append(0)
 
             self.current_step = 0
 
@@ -715,6 +741,12 @@ class FlatlandRemoteEvaluationService:
                 self.env.get_num_agents()
             )
 
+        num_malfunctioning = sum(agent.malfunction_data['malfunction'] > 0 for agent in self.env.agents)
+        if (num_malfunctioning > 0):
+            print(num_malfunctioning, "agent malfunctioning at step", self.current_step)
+
+        self.nb_malfunctioning_trains[-1] += num_malfunctioning
+
         # record the actions before checking for done
         if self.actionDir is not None:
             self.lActions.append(action)
@@ -723,6 +755,11 @@ class FlatlandRemoteEvaluationService:
         if done["__all__"]:
             self.simulation_done = True
 
+            if self.begin_simulation:
+                # If begin simulation has already been initialized at least once
+                # This adds the simulation time for the previous episode
+                self.simulation_times.append(time.time() - self.begin_simulation)
+
             # Compute percentage complete
             complete = 0
             for i_agent in range(self.env.get_num_agents()):
@@ -732,12 +769,19 @@ class FlatlandRemoteEvaluationService:
             percentage_complete = complete * 1.0 / self.env.get_num_agents()
             self.simulation_percentage_complete[-1] = percentage_complete
 
-            print("Evaluation finished in {} timesteps. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format(
+            print("Evaluation finished in {} timesteps, {:.3f} seconds. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}.".format(
                 self.simulation_steps[-1],
+                self.simulation_times[-1],
                 self.simulation_percentage_complete[-1],
-                self.simulation_rewards_normalized[-1]
+                self.simulation_rewards_normalized[-1],
+                self.nb_malfunctioning_trains[-1]
             ))
 
+            # Write intermediate results
+            if self.result_output_path:
+                self.evaluation_metadata_df.to_csv(self.result_output_path)
+                print("Wrote intermediate output results to : {}".format(self.result_output_path))
+
             if self.actionDir is not None:
                 self.save_actions()
 
@@ -883,7 +927,7 @@ class FlatlandRemoteEvaluationService:
             self.evaluation_metadata_df.to_csv(self.result_output_path)
             print("Wrote output results to : {}".format(self.result_output_path))
 
-            # Upload the metadata file to S3 
+            # Upload the metadata file to S3
             if aicrowd_helpers.is_grading() and aicrowd_helpers.is_aws_configured():
                 metadata_s3_key = aicrowd_helpers.upload_to_s3(
                     self.result_output_path
@@ -925,7 +969,7 @@ class FlatlandRemoteEvaluationService:
         #################################################################################
         # Compute the mean rewards, mean normalized_reward and mean_percentage_complete
         # we group all the results by the test_ids
-        # so we first compute the mean in each of the test_id groups, 
+        # so we first compute the mean in each of the test_id groups,
         # and then we compute the mean across each of the test_id groups
         #
         #
@@ -1004,10 +1048,11 @@ class FlatlandRemoteEvaluationService:
                 self.simulation_rewards[-1] = self.env._max_episode_steps * self.env.get_num_agents()
                 self.simulation_rewards_normalized[-1] = -1.0
 
-                print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format(
+                print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}".format(
                     self.simulation_steps[-1],
                     self.simulation_percentage_complete[-1],
-                    self.simulation_rewards_normalized[-1]
+                    self.simulation_rewards_normalized[-1],
+                    self.nb_malfunctioning_trains[-1],
                 ))
 
                 self.timeout_counter += 1
@@ -1079,12 +1124,12 @@ class FlatlandRemoteEvaluationService:
                     return _error
                 ###########################################
                 # We keep a record of the previous command
-                # to be able to have different behaviors 
+                # to be able to have different behaviors
                 # between different "command transitions"
-                # 
-                # An example use case, is when we want to 
-                # have a different timeout for the 
-                # first step in every environment 
+                #
+                # An example use case, is when we want to
+                # have a different timeout for the
+                # first step in every environment
                 # to account for some initial planning time
                 self.previous_command = command
             except Exception as e:
@@ -1141,12 +1186,24 @@ if __name__ == "__main__":
                         help="don't shuffle the envs.  Default is to shuffle.",
                         required=False)
 
+    parser.add_argument('--disableTimeouts',
+                        default=False,
+                        action="store_true",
+                        help="Disable all timeouts.",
+                        required=False)
+
     parser.add_argument('--missingOnly',
                         default=False,
                         action="store_true",
                         help="only request the envs/actions which are missing",
                         required=False)
 
+    parser.add_argument('--resultsDir',
+                        default="/tmp/output.csv",
+                        help="Results CSV path",
+                        required=False)
+
+
     parser.add_argument('--verbose',
                         default=False,
                         action="store_true",
@@ -1162,13 +1219,14 @@ if __name__ == "__main__":
         verbose=args.verbose,
         visualize=True,
         video_generation_envs=["Test_0/Level_100.pkl"],
-        result_output_path="/tmp/output.csv",
+        result_output_path=args.resultsDir,
         actionDir=args.actionDir,
         episodeDir=args.episodeDir,
         mergeDir=args.mergeDir,
         use_pickle=args.pickle,
         shuffle=not args.noShuffle,
         missing_only=args.missingOnly,
+        disable_timeouts=args.disableTimeouts
     )
     result = grader.run()
     if result['type'] == messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE:
diff --git a/requirements_dev.txt b/requirements_dev.txt
index aff1d72c46cc877d72683059f0e428c6e9f054c3..93414562b79e3c0d5e1a77e42b967dc0ea4028fe 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -21,6 +21,5 @@ attrs
 gym==0.14.0
 networkx
 ipycanvas
-pygraphviz
 graphviz
 imageio