diff --git a/sequential_agent/simple_order_agent.py b/sequential_agent/simple_order_agent.py
index 3feff350e94226f157559036abdaea8d5dc18bf9..e572434685554acc67a64a550b3d0213de5681d4 100644
--- a/sequential_agent/simple_order_agent.py
+++ b/sequential_agent/simple_order_agent.py
@@ -1,5 +1,5 @@
 import numpy as np
-from utils.observation_utils import split_tree, min_lt
+from utils.observation_utils import split_tree, min_gt
 
 
 class OrderedAgent:
@@ -15,7 +15,7 @@ class OrderedAgent:
         _, distance, _ = split_tree(tree=np.array(state), num_features_per_node=9,
                                     current_depth=0)
         distance = distance[1:]
-        min_dist = min_lt(distance, 0)
+        min_dist = min_gt(distance, 0)
         min_direction = np.where(distance == min_dist)
         if len(min_direction[0]) > 1:
             return min_direction[0][-1] + 1
diff --git a/torch_training/Nets/avoid_checkpoint60000.pth b/torch_training/Nets/avoid_checkpoint60000.pth
index b6f15348130b09ae8bee0adad454031fc013fabf..b4fef60542f50419353047721fae31f5382e7bd4 100644
Binary files a/torch_training/Nets/avoid_checkpoint60000.pth and b/torch_training/Nets/avoid_checkpoint60000.pth differ
diff --git a/torch_training/multi_agent_inference.py b/torch_training/multi_agent_inference.py
index 2b541219e688a8b55e20b412fd91f9e8cc22b9cb..6a9ed8e967a6389d828d7444e68fa9bc845dd0a1 100644
--- a/torch_training/multi_agent_inference.py
+++ b/torch_training/multi_agent_inference.py
@@ -17,7 +17,7 @@ from utils.observation_utils import normalize_observation
 random.seed(3)
 np.random.seed(2)
 
-file_name = "./railway/simple_avoid.pkl"
+file_name = "./railway/testing_stuff.pkl"
 env = RailEnv(width=10,
               height=20,
               rail_generator=rail_from_file(file_name),
@@ -41,6 +41,7 @@ env = RailEnv(width=x_dim,
               obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
               number_of_agents=n_agents)
 env.reset(True, True)
+
 """
 tree_depth = 3
 observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv())
@@ -70,7 +71,7 @@ action_prob = [0] * action_size
 agent_obs = [None] * env.get_num_agents()
 agent_next_obs = [None] * env.get_num_agents()
 agent = Agent(state_size, action_size, "FC", 0)
-with path(torch_training.Nets, "avoid_checkpoint49000.pth") as file_in:
+with path(torch_training.Nets, "avoid_checkpoint59900.pth") as file_in:
     agent.qnetwork_local.load_state_dict(torch.load(file_in))
 
 record_images = False
@@ -93,7 +94,7 @@ for trials in range(1, n_trials + 1):
         if record_images:
             env_renderer.gl.save_image("./Images/Avoiding/flatland_frame_{:04d}.bmp".format(frame_step))
             frame_step += 1
-        # time.sleep(5)
+        time.sleep(1.5)
         # Action
         for a in range(env.get_num_agents()):
             action = agent.act(agent_obs[a], eps=0)
diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py
index 5e194f51b09bbe4e81e257caedf3d57d17e04bca..c3c8c2b6b49dfeee1eceafa52d2bc0b4f1ff366e 100644
--- a/torch_training/multi_agent_training.py
+++ b/torch_training/multi_agent_training.py
@@ -73,7 +73,7 @@ def main(argv):
         n_episodes = 60000
 
     # Set max number of steps per episode as well as other training relevant parameter
-    max_steps = int(3 * (env.height + env.width))
+    max_steps = int((env.height + env.width))
     eps = 1.
     eps_end = 0.005
     eps_decay = 0.9995
@@ -102,7 +102,7 @@ def main(argv):
         Training Curriculum: In order to get good generalization we change the number of agents
         and the size of the levels every 50 episodes.
         """
-        if episodes % 50 == 0:
+        if episodes % 50 == 1:
             x_dim = np.random.randint(8, 15)
             y_dim = np.random.randint(8, 15)
             n_agents = np.random.randint(3, 8)
@@ -117,7 +117,7 @@ def main(argv):
                           number_of_agents=n_agents)
 
             # Adjust the parameters according to the new env.
-            max_steps = int(3 * (env.height + env.width))
+            max_steps = int((env.height + env.width))
             agent_obs = [None] * env.get_num_agents()
             agent_next_obs = [None] * env.get_num_agents()
 
@@ -174,7 +174,11 @@ def main(argv):
         eps = max(eps_end, eps_decay * eps)  # decrease epsilon
 
         # Collection information about training
-        done_window.append(env_done)
+        tasks_finished = 0
+        for _idx in range(env.get_num_agents()):
+            if done[_idx] == 1:
+                tasks_finished += 1
+        done_window.append(tasks_finished / env.get_num_agents())
         scores_window.append(score / max_steps)  # save most recent score
         scores.append(np.mean(scores_window))
         dones_list.append((np.mean(done_window)))
diff --git a/utils/observation_utils.py b/utils/observation_utils.py
index b3dd5aefdc8e57e79fe816efadf2a05d50e02d8b..26108cca8da0156c07f01b8f036167ad449b8d8f 100644
--- a/utils/observation_utils.py
+++ b/utils/observation_utils.py
@@ -15,7 +15,7 @@ def max_lt(seq, val):
     return max
 
 
-def min_lt(seq, val):
+def min_gt(seq, val):
     """
     Return smallest item in seq for which item > val applies.
     None is returned if seq was empty or all items in seq were >= val.
@@ -29,7 +29,7 @@ def min_lt(seq, val):
     return min
 
 
-def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
+def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
     """
     This function returns the difference between min and max value of an observation
     :param obs: Observation that should be normalized
@@ -42,13 +42,12 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
     else:
         max_obs = max(1, max_lt(obs, 1000)) + 1
 
-    min_obs = 0  # min(max_obs, min_lt(obs, 0))
-
+    min_obs = 0  # min(max_obs, min_gt(obs, 0))
+    if normalize_to_range:
+        min_obs = min_gt(obs, 0)
     if max_obs == min_obs:
         return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
     norm = np.abs(max_obs - min_obs)
-    if norm == 0:
-        norm = 1.
     return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
 
 
@@ -103,7 +102,7 @@ def normalize_observation(observation, num_features_per_node=9, observation_radi
     data, distance, agent_data = split_tree(tree=np.array(observation), num_features_per_node=num_features_per_node,
                                             current_depth=0)
     data = norm_obs_clip(data, fixed_radius=observation_radius)
-    distance = norm_obs_clip(distance)
+    distance = norm_obs_clip(distance, normalize_to_range=True)
     agent_data = np.clip(agent_data, -1, 1)
     normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data))
     return normalized_obs