diff --git a/score_test.py b/score_test.py
index a8bbda7ef257025c4dd79c303394f32cdef095a6..5f61300637561e5a29acfdfe498cc564b42a3e54 100644
--- a/score_test.py
+++ b/score_test.py
@@ -1,27 +1,21 @@
-import random
 import time
-from collections import deque
 
-import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from torch_training.dueling_double_dqn import Agent
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.envs.predictions import ShortestPathPredictorForRailEnv
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-from flatland.envs.generators import complex_rail_generator
-from utils.observation_utils import norm_obs_clip, split_tree
-from flatland.utils.rendertools import RenderTool
-from utils.misc_utils import printProgressBar, RandomAgent
 
+from torch_training.dueling_double_dqn import Agent
+from utils.misc_utils import RandomAgent, run_test
 
 with open('parameters.txt','r') as inf:
     parameters = eval(inf.read())
 
 # Parameter initialization
 features_per_node = 9
-state_size = features_per_node*21 * 2
+tree_depth = 3
+nodes = 0
+for i in range(tree_depth + 1):
+    nodes += np.power(4, i)
+state_size = features_per_node * nodes * 2
 action_size = 5
 action_dict = dict()
 nr_trials_per_test = 100
@@ -29,96 +23,18 @@ test_results = []
 test_times = []
 test_dones = []
 # Load agent
-#agent = Agent(state_size, action_size, "FC", 0)
-#agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint30000.pth'))
+agent = Agent(state_size, action_size, "FC", 0)
+agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint1700.pth'))
 agent = RandomAgent(state_size, action_size)
 start_time_scoring = time.time()
-for test_nr in parameters:
-    current_parameters = parameters[test_nr]
-    print('\nRunning {} with (x_dim,ydim) = ({},{}) and {} Agents.'.format(test_nr,current_parameters[0],current_parameters[1],current_parameters[2]))
-    # Reset all measurements
-    time_obs = deque(maxlen=2)
-    test_scores = []
-
-    tot_dones = 0
-    tot_test_score = 0
-
-    # Reset environment
-    random.seed(current_parameters[3])
-    np.random.seed(current_parameters[3])
-    nr_paths = max(2,current_parameters[2] + int(0.5*current_parameters[2]))
-    min_dist = int(min([current_parameters[0], current_parameters[1]])*0.75)
-    env = RailEnv(width=current_parameters[0],
-                  height=current_parameters[1],
-                  rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist, max_dist=99999,
-                                                        seed=current_parameters[3]),
-                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
-                  number_of_agents=current_parameters[2])
-    max_steps = max_steps = int(3 * (env.height + env.width))
-    agent_obs = [None] * env.get_num_agents()
-    env_renderer = RenderTool(env, gl="PILSVG", )
-    printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
-    start = time.time()
-    for trial in range(nr_trials_per_test):
-        # Reset the env
-        printProgressBar(trial+1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
-        obs = env.reset(True, True)
-        #env_renderer.set_new_rail()
-        for a in range(env.get_num_agents()):
-            data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9,
-                                                    current_depth=0)
-            data = norm_obs_clip(data)
-            distance = norm_obs_clip(distance)
-            agent_data = np.clip(agent_data, -1, 1)
-            obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-
-        for i in range(2):
-            time_obs.append(obs)
-
-        for a in range(env.get_num_agents()):
-            agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-
-        # Run episode
-        trial_score = 0
-        for step in range(max_steps):
-
-            for a in range(env.get_num_agents()):
-
-                action = agent.act(agent_obs[a], eps=0)
-                action_dict.update({a: action})
-
-            # Environment step
-            next_obs, all_rewards, done, _ = env.step(action_dict)
-
-            for a in range(env.get_num_agents()):
-                data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), num_features_per_node=features_per_node,
-                                                        current_depth=0)
-                data = norm_obs_clip(data)
-                distance = norm_obs_clip(distance)
-                agent_data = np.clip(agent_data, -1, 1)
-                next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-            time_obs.append(next_obs)
-            for a in range(env.get_num_agents()):
-                agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-                trial_score += all_rewards[a] / env.get_num_agents()
-            if done['__all__']:
-                tot_dones += 1
-                break
-        test_scores.append(trial_score / max_steps)
-    end = time.time()
-    comp_time = end-start
-    tot_test_score = np.mean(test_scores)
-    test_results.append(tot_test_score)
-    test_times.append(comp_time)
-    test_dones.append(tot_dones/nr_trials_per_test*100)
-end_time_scoring = time.time()
-tot_test_time = end_time_scoring-start_time_scoring
 test_idx = 0
-print('-----------------------------------------------')
-print('                     RESULTS')
-print('-----------------------------------------------')
 for test_nr in parameters:
-    print('{} score was = {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.'.format(test_nr,
-                                                                                                             test_results[test_idx],test_dones[test_idx],test_times[test_idx]))
-    test_idx += 1
-print('Total scoring duration was', tot_test_time)
\ No newline at end of file
+    current_parameters = parameters[test_nr]
+    test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_idx)
+    print('---------')
+    print(' RESULTS')
+    print('---------')
+    print('{} score was = {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.\n\n\n'.format(
+        test_nr,
+        np.mean(test_score), np.mean(test_dones) * 100, test_time))
+    test_idx += 1
\ No newline at end of file
diff --git a/torch_training/Nets/avoid_checkpoint15000.pth b/torch_training/Nets/avoid_checkpoint15000.pth
index 1ab84e032c64b9b07e138bfa1191a7cfe09f9b6c..ba488f1a074af3f62ad87d54f61a251e8292ae50 100644
Binary files a/torch_training/Nets/avoid_checkpoint15000.pth and b/torch_training/Nets/avoid_checkpoint15000.pth differ
diff --git a/torch_training/training_navigation.py b/torch_training/training_navigation.py
index 65f1e9d00d6893732e96d3edbcab9642a07aeb2d..0356b531a5c9518820754e04f8e16a450256f5d2 100644
--- a/torch_training/training_navigation.py
+++ b/torch_training/training_navigation.py
@@ -5,11 +5,12 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
 from dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
 from flatland.utils.rendertools import RenderTool
-from flatland.envs.generators import complex_rail_generator
+
 from utils.observation_utils import norm_obs_clip, split_tree
 
 random.seed(1)
@@ -47,26 +48,34 @@ env = RailEnv(width=10,
 env.load("./railway/complex_scene.pkl")
 file_load = True
 """
-
-env = RailEnv(width=100,
-              height=100,
-              rail_generator=complex_rail_generator(nr_start_goal=100, nr_extra=5, min_dist=5, max_dist=99999, seed=0),
-              obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
-              number_of_agents=1)
-file_load = False
+x_dim = np.random.randint(8, 20)
+y_dim = np.random.randint(8, 20)
+n_agents = np.random.randint(3, 8)
+n_goals = n_agents + np.random.randint(0, 3)
+min_dist = int(0.75 * min(x_dim, y_dim))
+env = RailEnv(width=x_dim,
+              height=y_dim,
+              rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
+                                                    max_dist=99999,
+                                                    seed=0),
+              obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
+              number_of_agents=n_agents)
 env.reset(True, True)
+file_load = False
+
 """
 """
+observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
 env_renderer = RenderTool(env, gl="PILSVG",)
 handle = env.get_agent_handles()
 features_per_node = 9
-state_size = features_per_node*21 * 2
+state_size = features_per_node * 85 * 2
 action_size = 5
 n_trials = 30000
 max_steps = int(3 * (env.height + env.width))
 eps = 1.
 eps_end = 0.005
-eps_decay = 0.9997
+eps_decay = 0.9995
 action_dict = dict()
 final_action_dict = dict()
 scores_window = deque(maxlen=100)
@@ -78,23 +87,39 @@ action_prob = [0] * action_size
 agent_obs = [None] * env.get_num_agents()
 agent_next_obs = [None] * env.get_num_agents()
 agent = Agent(state_size, action_size, "FC", 0)
-agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint30000.pth'))
+# agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth'))
 
-demo = True
+demo = False
 record_images = False
 
 
-
-
 for trials in range(1, n_trials + 1):
 
+    if trials % 50 == 0 and not demo:
+        x_dim = np.random.randint(8, 20)
+        y_dim = np.random.randint(8, 20)
+        n_agents = np.random.randint(3, 8)
+        n_goals = n_agents + np.random.randint(0, 3)
+        min_dist = int(0.75 * min(x_dim, y_dim))
+        env = RailEnv(width=x_dim,
+                      height=y_dim,
+                      rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
+                                                            max_dist=99999,
+                                                            seed=0),
+                      obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
+                      number_of_agents=n_agents)
+        env.reset(True, True)
+        max_steps = int(3 * (env.height + env.width))
+        agent_obs = [None] * env.get_num_agents()
+        agent_next_obs = [None] * env.get_num_agents()
     # Reset environment
-    if file_load :
+    if file_load:
         obs = env.reset(False, False)
     else:
         obs = env.reset(True, True)
     if demo:
         env_renderer.set_new_rail()
+    obs_original = obs.copy()
     final_obs = obs.copy()
     final_obs_next = obs.copy()
     for a in range(env.get_num_agents()):
@@ -120,7 +145,8 @@ for trials in range(1, n_trials + 1):
     # Run episode
     for step in range(max_steps):
         if demo:
-            env_renderer.renderEnv(show=True, show_observations=False)
+            env_renderer.renderEnv(show=True, show_observations=True)
+            observation_helper.util_print_obs_subtree(obs_original[0])
             if record_images:
                 env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(step))
         # print(step)
@@ -135,6 +161,8 @@ for trials in range(1, n_trials + 1):
         # Environment step
 
         next_obs, all_rewards, done, _ = env.step(action_dict)
+        # print(all_rewards,action)
+        obs_original = next_obs.copy()
         for a in range(env.get_num_agents()):
             data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), num_features_per_node=features_per_node,
                                                     current_depth=0)
@@ -170,8 +198,8 @@ for trials in range(1, n_trials + 1):
     dones_list.append((np.mean(done_window)))
 
     print(
-        '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-              env.get_num_agents(),
+        '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+            env.get_num_agents(), x_dim, y_dim,
               trials,
               np.mean(scores_window),
               100 * np.mean(done_window),
diff --git a/utils/misc_utils.py b/utils/misc_utils.py
index 62189c52fda1a4f862df44268f5c815c1462a434..097450b6dbf1c3eab92d2c110df7528f2850a62e 100644
--- a/utils/misc_utils.py
+++ b/utils/misc_utils.py
@@ -1,5 +1,16 @@
-# Print iterations progress
+import random
+import time
+from collections import deque
+
 import numpy as np
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+
+from utils.observation_utils import norm_obs_clip, split_tree
+
+
 def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '*'):
     """
     Call in a loop to create terminal progress bar
@@ -49,3 +60,84 @@ class RandomAgent:
     def load(self, filename):
         # Load a policy
         return
+
+
+def run_test(parameters, agent, test_nr=0, tree_depth=3):
+    # Parameter initialization
+    features_per_node = 9
+    start_time_scoring = time.time()
+    action_dict = dict()
+    nr_trials_per_test = 100
+    print('Running Test {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1],
+                                                                               parameters[2]))
+
+    # Reset all measurements
+    time_obs = deque(maxlen=2)
+    test_scores = []
+    test_dones = []
+
+    # Reset environment
+    random.seed(parameters[3])
+    np.random.seed(parameters[3])
+    nr_paths = max(2, parameters[2] + int(0.5 * parameters[2]))
+    min_dist = int(min([parameters[0], parameters[1]]) * 0.75)
+    env = RailEnv(width=parameters[0],
+                  height=parameters[1],
+                  rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist,
+                                                        max_dist=99999,
+                                                        seed=parameters[3]),
+                  obs_builder_object=TreeObsForRailEnv(max_depth=tree_depth,
+                                                       predictor=ShortestPathPredictorForRailEnv()),
+                  number_of_agents=parameters[2])
+    max_steps = int(3 * (env.height + env.width))
+    agent_obs = [None] * env.get_num_agents()
+    printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
+    for trial in range(nr_trials_per_test):
+        # Reset the env
+        obs = env.reset(True, True)
+        for a in range(env.get_num_agents()):
+            data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=9,
+                                                    current_depth=0)
+            data = norm_obs_clip(data)
+            distance = norm_obs_clip(distance)
+            agent_data = np.clip(agent_data, -1, 1)
+            obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
+
+        for i in range(2):
+            time_obs.append(obs)
+
+        for a in range(env.get_num_agents()):
+            agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
+
+        # Run episode
+        trial_score = 0
+        for step in range(max_steps):
+
+            for a in range(env.get_num_agents()):
+                action = agent.act(agent_obs[a], eps=0)
+                action_dict.update({a: action})
+
+            # Environment step
+            next_obs, all_rewards, done, _ = env.step(action_dict)
+
+            for a in range(env.get_num_agents()):
+                data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
+                                                        num_features_per_node=features_per_node,
+                                                        current_depth=0)
+                data = norm_obs_clip(data)
+                distance = norm_obs_clip(distance)
+                agent_data = np.clip(agent_data, -1, 1)
+                next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
+            time_obs.append(next_obs)
+            for a in range(env.get_num_agents()):
+                agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
+                trial_score += all_rewards[a] / env.get_num_agents()
+
+            if done['__all__']:
+                break
+        test_scores.append(trial_score / max_steps)
+        test_dones.append(done['__all__'])
+        printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
+    end_time_scoring = time.time()
+    tot_test_time = end_time_scoring - start_time_scoring
+    return test_scores, test_dones, tot_test_time