diff --git a/checkpoints/201106234900-100.pth.local b/checkpoints/201106234900-100.pth.local
deleted file mode 100644
index 1b4b1cb2c430282098d599f76b71123fc84c9ba4..0000000000000000000000000000000000000000
Binary files a/checkpoints/201106234900-100.pth.local and /dev/null differ
diff --git a/checkpoints/201106234900-100.pth.target b/checkpoints/201106234900-100.pth.target
deleted file mode 100644
index 391a36c37d22af1fdc97de554f96d7ecdc0d4874..0000000000000000000000000000000000000000
Binary files a/checkpoints/201106234900-100.pth.target and /dev/null differ
diff --git a/checkpoints/201106234900-300.pth.local b/checkpoints/201106234900-300.pth.local
deleted file mode 100644
index 0da81f8a4d34395de91b3d9e516c2b7535685f46..0000000000000000000000000000000000000000
Binary files a/checkpoints/201106234900-300.pth.local and /dev/null differ
diff --git a/checkpoints/201106234900-300.pth.target b/checkpoints/201106234900-300.pth.target
deleted file mode 100644
index 8517ad36ccbddc6f4ef34e493120af6262e9f86a..0000000000000000000000000000000000000000
Binary files a/checkpoints/201106234900-300.pth.target and /dev/null differ
diff --git a/reinforcement_learning/multi_agent_training.py b/reinforcement_learning/multi_agent_training.py
index 74288e73bc182195e6b3f0778c4739ffbc487217..0f26a206d72de7bbc5393df477b500760a28eadf 100755
--- a/reinforcement_learning/multi_agent_training.py
+++ b/reinforcement_learning/multi_agent_training.py
@@ -264,7 +264,7 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params):
                 if info['action_required'][agent]:
                     update_values[agent] = True
 
-                    if agent == agent_to_learn:
+                    if agent == agent_to_learn or True:
                         action = policy.act(agent_obs[agent], eps=eps_start)
                     else:
                         action = policy2.act([agent], eps=eps_start)
@@ -284,20 +284,21 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params):
             step_timer.start()
             next_obs, all_rewards, done, info = train_env.step(action_dict)
 
-            for agent in train_env.get_agent_handles():
-                act = action_dict.get(agent, RailEnvActions.DO_NOTHING)
-                if agent_obs[agent][26] == 1:
-                    if act == RailEnvActions.STOP_MOVING:
-                        all_rewards[agent] *= 0.01
-                else:
-                    if act == RailEnvActions.MOVE_LEFT:
-                        all_rewards[agent] *= 0.9
+            if False:
+                for agent in train_env.get_agent_handles():
+                    act = action_dict.get(agent, RailEnvActions.DO_NOTHING)
+                    if agent_obs[agent][26] == 1:
+                        if act == RailEnvActions.STOP_MOVING:
+                            all_rewards[agent] *= 0.01
                     else:
-                        if agent_obs[agent][7] == 0 and agent_obs[agent][8] == 0:
-                            if act == RailEnvActions.MOVE_FORWARD:
-                                all_rewards[agent] *= 0.01
-                if done[agent]:
-                    all_rewards[agent] += 100.0
+                        if act == RailEnvActions.MOVE_LEFT:
+                            all_rewards[agent] *= 0.9
+                        else:
+                            if agent_obs[agent][7] == 0 and agent_obs[agent][8] == 0:
+                                if act == RailEnvActions.MOVE_FORWARD:
+                                    all_rewards[agent] *= 0.01
+                    if done[agent]:
+                        all_rewards[agent] += 100.0
 
             step_timer.end()
 
@@ -531,7 +532,7 @@ if __name__ == "__main__":
     parser.add_argument("--load_policy", help="policy filename (reference) to load", default="", type=str)
     parser.add_argument("--use_fast_tree_observation", help="use FastTreeObs instead of stock TreeObs",
                         action='store_true')
-    parser.add_argument("--max_depth", help="max depth", default=1, type=int)
+    parser.add_argument("--max_depth", help="max depth", default=2, type=int)
 
     training_params = parser.parse_args()
     env_params = [
diff --git a/run.py b/run.py
index a7e7e7bdda1ac152ad06a570b80e74a87dd34082..0b7108fe2fe930fae71f07b1784b96b721e97fd9 100644
--- a/run.py
+++ b/run.py
@@ -26,8 +26,7 @@ from reinforcement_learning.dddqn_policy import DDDQNPolicy
 VERBOSE = True
 
 # Checkpoint to use (remember to push it!)
-checkpoint = "./checkpoints/201106234244-400.pth"  # 15.64082361736683 Depth 1
-checkpoint = "./checkpoints/201106234900-300.pth"  # 15.64082361736683 Depth 1
+checkpoint = "./checkpoints/201106234900-5400.pth"  # 15.64082361736683 Depth 1
 
 # Use last action cache
 USE_ACTION_CACHE = False
diff --git a/utils/fast_tree_obs.py b/utils/fast_tree_obs.py
index 3b14b0f7be2379bc472a23122e982336a4421106..cd7f9c54f79e055215efad6dbecd8679e123eb52 100755
--- a/utils/fast_tree_obs.py
+++ b/utils/fast_tree_obs.py
@@ -168,7 +168,7 @@ class FastTreeObs(ObservationBuilder):
         if depth >= self.max_depth:
             return has_opp_agent, has_same_agent, has_switch, visited
 
-        # max_explore_steps = 100
+        # max_explore_steps = 100 -> just to ensure that the exploration ends
         cnt = 0
         while cnt < 100:
             cnt += 1
@@ -177,26 +177,41 @@ class FastTreeObs(ObservationBuilder):
             opp_a = self.env.agent_positions[new_position]
             if opp_a != -1 and opp_a != handle:
                 if self.env.agents[opp_a].direction != new_direction:
-                    # opp agent found
+                    # opp agent found -> stop exploring. This would be a strong signal.
                     has_opp_agent = 1
                     return has_opp_agent, has_same_agent, has_switch, visited
                 else:
+                    # same agent found
+                    # the agent can follow the agent, because this agent is still moving ahead and there shouldn't
+                    # be any dead-lock nor other issue -> agent is just walking -> if other agent has a deadlock
+                    # this should be avoided by other agents -> one edge case would be when other agent has it's
+                    # target on this branch -> thus the agents should scan further whether there will be an opposite
+                    # agent walking on same track
                     has_same_agent = 1
-                    return has_opp_agent, has_same_agent, has_switch, visited
+                    # !NOT stop exploring! return has_opp_agent, has_same_agent, has_switch, visited
 
-            # convert one-hot encoding to 0,1,2,3
-            agents_on_switch, \
-            agents_near_to_switch, \
-            agents_near_to_switch_all, \
-            agents_on_switch_all = \
+            # agents_on_switch == TRUE -> Current cell is a switch where the agent can decide (branch) in exploration
+            # agent_near_to_switch == TRUE -> One cell before the switch, where the agent can decide
+            #
+            agents_on_switch, agents_near_to_switch, _, _ = \
                 self.check_agent_decision(new_position, new_direction)
+
             if agents_near_to_switch:
+                # The exploration was walking on a path where the agent can not decide
+                # Best option would be MOVE_FORWARD -> Skip exploring - just walking
                 return has_opp_agent, has_same_agent, has_switch, visited
 
             possible_transitions = self.env.rail.get_transitions(*new_position, new_direction)
             if agents_on_switch:
                 f = 0
-                for dir_loop in range(4):
+                orientation = new_direction
+                if fast_count_nonzero(possible_transitions) == 1:
+                    orientation = fast_argmax(possible_transitions)
+                for dir_loop, branch_direction in enumerate(
+                        [(orientation + dir_loop) % 4 for dir_loop in range(-1, 3)]):
+                    # branch the exploration path and aggregate the found information
+                    # --- OPEN RESEARCH QUESTION ---> is this good or shall we use full detailed information as
+                    # we did in the TreeObservation (FLATLAND) ?
                     if possible_transitions[dir_loop] == 1:
                         f += 1
                         hoa, hsa, hs, v = self._explore(handle,