diff --git a/checkpoints/ppo/model_checkpoint.meta b/checkpoints/ppo/model_checkpoint.meta
index 31959a4680ed59613594bfb9418b1e3497ad6ce1..56a27a0763598ba9748c4b337fcb59e95ccdf612 100644
Binary files a/checkpoints/ppo/model_checkpoint.meta and b/checkpoints/ppo/model_checkpoint.meta differ
diff --git a/checkpoints/ppo/model_checkpoint.optimizer b/checkpoints/ppo/model_checkpoint.optimizer
index 89630787199743db98bebfd8f3132c681a60a099..ff9dec932ff15047548262d1c0a0e0f52a79cbc6 100644
Binary files a/checkpoints/ppo/model_checkpoint.optimizer and b/checkpoints/ppo/model_checkpoint.optimizer differ
diff --git a/checkpoints/ppo/model_checkpoint.policy b/checkpoints/ppo/model_checkpoint.policy
index 6049b699289690113a6eed1bca3545cd12bf4c4e..f1258b047ae66c183f0c9d25b0b1b0cef274d9c9 100644
Binary files a/checkpoints/ppo/model_checkpoint.policy and b/checkpoints/ppo/model_checkpoint.policy differ
diff --git a/run.py b/run.py
index 08f32e4b1c10916b44ed0a5363ea3d8470ffa5f0..4eab22942b0914d57606a7226d6bd250c81401dd 100644
--- a/run.py
+++ b/run.py
@@ -30,7 +30,7 @@ def my_controller(extra: Extra, observation, info):
 # the example here : 
 # https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/observations.py#L14
 #####################################################################
-my_observation_builder = Extra(max_depth=20)
+my_observation_builder = Extra(max_depth=3)
 
 # Or if you want to use your own approach to build the observation from the env_step, 
 # please feel free to pass a DummyObservationBuilder() object as mentioned below,
diff --git a/src/extra.py b/src/extra.py
index 0d948bd4796888fdbcbe361d14379f5db31b3b41..5b6af80916d513619b7b9b3c1edbd18c0e24ee04 100644
--- a/src/extra.py
+++ b/src/extra.py
@@ -19,7 +19,7 @@
 #
 # Private submission
 # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8
-#
+
 import numpy as np
 from flatland.core.env_observation_builder import ObservationBuilder
 from flatland.core.grid.grid4_utils import get_new_position
@@ -67,8 +67,9 @@ class Extra(ObservationBuilder):
 
     def __init__(self, max_depth):
         self.max_depth = max_depth
-        self.observation_dim = 22
+        self.observation_dim = 26
         self.agent = None
+        self.random_agent_starter = []
 
     def build_data(self):
         if self.env is not None:
@@ -190,13 +191,6 @@ class Extra(ObservationBuilder):
     def is_collision(self, obsData):
         return False
 
-    def intern_is_collision(self, obsData):
-        if np.sum(obsData[10:14]) == 0:
-            return False
-        if np.sum(obsData[10:14]) == np.sum(obsData[14:18]):
-            return True
-        return False
-
     def reset(self):
         self.build_data()
         return
@@ -245,20 +239,15 @@ class Extra(ObservationBuilder):
                 return has_opp_agent, has_same_agent, visited
 
             if agents_on_switch:
-                pt = 0
                 for dir_loop in range(4):
                     if possible_transitions[dir_loop] == 1:
-                        pt += 1
                         hoa, hsa, v = self._explore(handle,
                                                     get_new_position(new_position, dir_loop),
                                                     dir_loop,
                                                     depth + 1)
                         visited.append(v)
-                        has_opp_agent += hoa
-                        has_same_agent + hsa
-                if pt > 0:
-                    has_opp_agent /= pt
-                    has_same_agent /= pt
+                        has_opp_agent = 0.5 * (has_opp_agent + hoa)
+                        has_same_agent = 0.5 * (has_same_agent + hsa)
                 return has_opp_agent, has_same_agent, visited
             else:
                 new_direction = fast_argmax(possible_transitions)
@@ -273,7 +262,7 @@ class Extra(ObservationBuilder):
         # observation[3]  : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path
         # observation[4]  : int(agent.status == RailAgentStatus.READY_TO_DEPART)
         # observation[5]  : int(agent.status == RailAgentStatus.ACTIVE)
-        # observation[6]  : deadlock estimated (collision) 1 otherwise 0
+        # observation[6]  : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED)
         # observation[7]  : current agent is located at a switch, where it can take a routing decision
         # observation[8]  : current agent is located at a cell, where it has to take a stop-or-go decision
         # observation[9]  : current agent is located one step before/after a switch
@@ -290,7 +279,7 @@ class Extra(ObservationBuilder):
         # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1
         # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1
 
-        observation = np.zeros(self.observation_dim) - 1
+        observation = np.zeros(self.observation_dim)
         visited = []
         agent = self.env.agents[handle]
 
@@ -302,6 +291,7 @@ class Extra(ObservationBuilder):
             agent_virtual_position = agent.position
             observation[5] = 1
         else:
+            observation[6] = 1
             agent_virtual_position = (-1, -1)
             agent_done = True
 
@@ -319,6 +309,7 @@ class Extra(ObservationBuilder):
             for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]):
                 if possible_transitions[branch_direction]:
                     new_position = get_new_position(agent_virtual_position, branch_direction)
+
                     new_cell_dist = distance_map[handle,
                                                  new_position[0], new_position[1],
                                                  branch_direction]
@@ -332,6 +323,10 @@ class Extra(ObservationBuilder):
                     observation[14 + dir_loop] = has_opp_agent
                     observation[18 + dir_loop] = has_same_agent
 
+                    opp_a = self.env.agent_positions[new_position]
+                    if opp_a != -1 and opp_a != handle:
+                        observation[22 + dir_loop] = 1
+
         agents_on_switch, \
         agents_near_to_switch, \
         agents_near_to_switch_all = \
@@ -340,13 +335,11 @@ class Extra(ObservationBuilder):
         observation[8] = int(agents_near_to_switch)
         observation[9] = int(agents_near_to_switch_all)
 
-        observation[6] = int(self.intern_is_collision(observation))
-
         self.env.dev_obs_dict.update({handle: visited})
 
         return observation
 
-    def rl_agent_act(self, observation, info, eps=0.0):
+    def rl_agent_act_ADRIAN(self, observation, info, eps=0.0):
         self.loadAgent()
         action_dict = {}
         for a in range(self.env.get_num_agents()):
@@ -358,21 +351,43 @@ class Extra(ObservationBuilder):
 
         return action_dict
 
-    def rl_agent_act_X(self, observation, info, eps=0.0):
+    def rl_agent_act(self, observation, info, eps=0.0):
+        if len(self.random_agent_starter) != len(self.env.get_num_agents()):
+            self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0
+            self.loadAgent()
+
+        action_dict = {}
+        for a in range(self.env.get_num_agents()):
+            if self.random_agent_starter[a] > self.env._elapsed_steps:
+                action_dict[a] = RailEnvActions.STOP_MOVING
+            elif info['action_required'][a]:
+                action_dict[a] = self.agent.act(observation[a], eps=eps)
+                # action_dict[a] = np.random.randint(5)
+            else:
+                action_dict[a] = RailEnvActions.DO_NOTHING
+
+        return action_dict
+
+    def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0):
         self.loadAgent()
         action_dict = {}
         active_cnt = 0
         for a in range(self.env.get_num_agents()):
-            if active_cnt < 1 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
-                if observation[a][6] == 0:
+            if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
+                if observation[a][6] == 1:
+                    active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE)
+                    action_dict[a] = RailEnvActions.STOP_MOVING
+                else:
                     active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE)
-                    if info['action_required'][a]:
-                        action_dict[a] = self.agent.act(observation[a], eps=eps)
-                        # action_dict[a] = np.random.randint(5)
+                    if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \
+                            (self.env.agents[a].status < RailAgentStatus.ACTIVE):
+                        if info['action_required'][a]:
+                            action_dict[a] = self.agent.act(observation[a], eps=eps)
+                            # action_dict[a] = np.random.randint(5)
+                        else:
+                            action_dict[a] = RailEnvActions.MOVE_FORWARD
                     else:
                         action_dict[a] = RailEnvActions.MOVE_FORWARD
-                else:
-                    action_dict[a] = RailEnvActions.STOP_MOVING
             else:
                 action_dict[a] = RailEnvActions.STOP_MOVING