diff --git a/tests/manual_test_eval_timeout.sh b/tests/manual_test_eval_timeout.sh
index 901fc16fa0f8f32e017037d79775d8e2b6ee2bec..e4a1fa7a2841bc8999828a2ae9ee1da205462a4b 100755
--- a/tests/manual_test_eval_timeout.sh
+++ b/tests/manual_test_eval_timeout.sh
@@ -11,9 +11,11 @@ redis-cli KEYS "*" | grep -i flatland | xargs redis-cli DEL
 
 # you need to create the envs in the folder
 # best to delete all but 10 small ones
-gnome-terminal --title Service --window -- python -m flatland.evaluators.service --test_folder  ../submission-scoring/Envs/neurips2020_round1_v0/   
+#gnome-terminal --title Service --window -- python -m flatland.evaluators.service --test_folder  ../submission-scoring/Envs/neurips2020_round1_v0/   
+xterm -title Service -hold -e "python -m flatland.evaluators.service --test_folder  ../submission-scoring/Envs/neurips2020_round1_v0/   " &
 
-gnome-terminal --title test_eval_timeout  --window -- python tests/test_eval_timeout.py
+#gnome-terminal --title test_eval_timeout  --window -- python tests/test_eval_timeout.py
+xterm -title test_eval_timeout  -hold -e "python tests/test_eval_timeout.py" &
 
 
 
diff --git a/tests/test_eval_timeout.py b/tests/test_eval_timeout.py
index 564e8301975278676aecfc9ab156a71b902a30e5..422f04bd8b3bd6bd8e43daea34f9a36488b7958c 100644
--- a/tests/test_eval_timeout.py
+++ b/tests/test_eval_timeout.py
@@ -50,7 +50,7 @@ class CustomObservationBuilder(ObservationBuilder):
 
         return self.rail_obs, (status, position, direction, initial_position, target)
 
-remote_client = FlatlandRemoteClient(verbose=False)
+
 
 def my_controller(obs, number_of_agents):
     _action = {}
@@ -58,102 +58,109 @@ def my_controller(obs, number_of_agents):
         _action[_idx] = np.random.randint(0, 5)
     return _action
 
-my_observation_builder = CustomObservationBuilder()
 
-evaluation_number = 0
+def test_random_timeouts():
+    remote_client = FlatlandRemoteClient(verbose=False)
 
-step_delay_rate = 0.001
-step_delay = 6
+    my_observation_builder = CustomObservationBuilder()
 
-reset_delay_rate = 0.2
-reset_delay = 10
+    evaluation_number = 0
+    n_evalations = 10
 
+    step_delay_rate = 0.001
+    step_delay = 6
 
-while True:
+    reset_delay_rate = 0.2
+    reset_delay = 10
 
-    evaluation_number += 1
-    # Switch to a new evaluation environemnt
-    # 
-    # a remote_client.env_create is similar to instantiating a 
-    # RailEnv and then doing a env.reset()
-    # hence it returns the first observation from the 
-    # env.reset()
-    # 
-    # You can also pass your custom observation_builder object
-    # to allow you to have as much control as you wish 
-    # over the observation of your choice.
-    time_start = time.time()
-    observation, info = remote_client.env_create(
-                    obs_builder_object=my_observation_builder
-                )
-    env_creation_time = time.time() - time_start
-    if not observation:
-        #
-        # If the remote_client returns False on a `env_create` call,
-        # then it basically means that your agent has already been 
-        # evaluated on all the required evaluation environments,
-        # and hence its safe to break out of the main evaluation loop
-        break
-    
-    print("Evaluation Number : {}".format(evaluation_number))
-
-    if np.random.uniform() < reset_delay_rate:
-        print(f"eval {evaluation_number} sleeping for {reset_delay} seconds")
-        time.sleep(reset_delay)
-
-    local_env = remote_client.env
-    number_of_agents = len(local_env.agents)
-
-    time_taken_by_controller = []
-    time_taken_per_step = []
-    steps = 0
-
-
-
-    while True:
-        time_start = time.time()
-        action = my_controller(observation, number_of_agents)
-        time_taken = time.time() - time_start
-        time_taken_by_controller.append(time_taken)
+    while evaluation_number < n_evalations:
 
+        evaluation_number += 1
+        # Switch to a new evaluation environemnt
+        # 
+        # a remote_client.env_create is similar to instantiating a 
+        # RailEnv and then doing a env.reset()
+        # hence it returns the first observation from the 
+        # env.reset()
+        # 
+        # You can also pass your custom observation_builder object
+        # to allow you to have as much control as you wish 
+        # over the observation of your choice.
         time_start = time.time()
-
-        try:
-            observation, all_rewards, done, info = remote_client.env_step(action)
-        except StopAsyncIteration as err:
-            print("timeout error ", err)
+        observation, info = remote_client.env_create(
+                        obs_builder_object=my_observation_builder
+                    )
+        env_creation_time = time.time() - time_start
+        if not observation:
+            #
+            # If the remote_client returns False on a `env_create` call,
+            # then it basically means that your agent has already been 
+            # evaluated on all the required evaluation environments,
+            # and hence its safe to break out of the main evaluation loop
             break
-
-        steps += 1
-        time_taken = time.time() - time_start
-        time_taken_per_step.append(time_taken)
-
-        if np.random.uniform() < step_delay_rate:
-            print(f"step {steps} sleeping for {step_delay} seconds")
-            time.sleep(step_delay)
-
-        if done['__all__']:
-            print("Reward : ", sum(list(all_rewards.values())))
-            break
-    
-    np_time_taken_by_controller = np.array(time_taken_by_controller)
-    np_time_taken_per_step = np.array(time_taken_per_step)
-    print("="*100)
-    print("="*100)
-    print("Evaluation Number : ", evaluation_number)
-    print("Current Env Path : ", remote_client.current_env_path)
-    print("Env Creation Time : ", env_creation_time)
-    print("Number of Steps : ", steps)
-    print("Mean/Std of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std())
-    print("Mean/Std of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std())
-    print("="*100)
-
-print("Evaluation of all environments complete...")
-########################################################################
-# Submit your Results
-# 
-# Please do not forget to include this call, as this triggers the 
-# final computation of the score statistics, video generation, etc
-# and is necesaary to have your submission marked as successfully evaluated
-########################################################################
-print(remote_client.submit())
+        
+        print("Evaluation Number : {}".format(evaluation_number))
+
+        if np.random.uniform() < reset_delay_rate:
+            print(f"eval {evaluation_number} sleeping for {reset_delay} seconds")
+            time.sleep(reset_delay)
+
+        local_env = remote_client.env
+        number_of_agents = len(local_env.agents)
+
+        time_taken_by_controller = []
+        time_taken_per_step = []
+        steps = 0
+
+
+
+        while True:
+            time_start = time.time()
+            action = my_controller(observation, number_of_agents)
+            time_taken = time.time() - time_start
+            time_taken_by_controller.append(time_taken)
+
+            time_start = time.time()
+
+            try:
+                observation, all_rewards, done, info = remote_client.env_step(action)
+            except StopAsyncIteration as err:
+                print("timeout error ", err)
+                break
+
+            steps += 1
+            time_taken = time.time() - time_start
+            time_taken_per_step.append(time_taken)
+
+            if np.random.uniform() < step_delay_rate:
+                print(f"step {steps} sleeping for {step_delay} seconds")
+                time.sleep(step_delay)
+
+            if done['__all__']:
+                print("Reward : ", sum(list(all_rewards.values())))
+                break
+        
+        np_time_taken_by_controller = np.array(time_taken_by_controller)
+        np_time_taken_per_step = np.array(time_taken_per_step)
+        print("="*100)
+        print("="*100)
+        print("Evaluation Number : ", evaluation_number)
+        print("Current Env Path : ", remote_client.current_env_path)
+        print("Env Creation Time : ", env_creation_time)
+        print("Number of Steps : ", steps)
+        print("Mean/Std of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std())
+        print("Mean/Std of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std())
+        print("="*100)
+
+    print("Evaluation of all environments complete...")
+    ########################################################################
+    # Submit your Results
+    # 
+    # Please do not forget to include this call, as this triggers the 
+    # final computation of the score statistics, video generation, etc
+    # and is necesaary to have your submission marked as successfully evaluated
+    ########################################################################
+    print(remote_client.submit())
+
+if __name__ == "__main__":
+    test_random_timeouts()
\ No newline at end of file