diff --git a/run.py b/run.py
index d920c06630b2ca9ad32bba6ac7a0228b7e45ea7f..674ae41557941f1c6d26db1ea055bb0aa5c9cd6a 100644
--- a/run.py
+++ b/run.py
@@ -3,13 +3,14 @@ import time
 import numpy as np
 from flatland.core.env_observation_builder import DummyObservationBuilder
 from flatland.envs.agent_utils import RailAgentStatus
+from flatland.envs.rail_env import RailEnvActions
 from flatland.evaluators.client import FlatlandRemoteClient
 
+from utils.dead_lock_avoidance_agent import DeadLockAvoidanceAgent
+
 #####################################################################
 # Instantiate a Remote Client
 #####################################################################
-from src.dead_lock_avoidance_agent import DeadLockAvoidanceAgent
-from src.extra import Extra
 
 remote_client = FlatlandRemoteClient()
 
@@ -24,11 +25,16 @@ remote_client = FlatlandRemoteClient()
 # def my_controller_RL(extra: Extra, observation, info):
 #     return extra.rl_agent_act(observation, info)
 
-def my_controller(policy):
+def my_controller(policy, info):
     policy.start_step()
     actions = {}
+    # print("-------- act ------------")
     for handle in range(policy.env.get_num_agents()):
-        a = policy.act(handle, None, 0)
+        if info['action_required'][handle] and handle < policy.env._elapsed_steps:
+            a = policy.act(handle, None, 0)
+        else:
+            a = RailEnvActions.DO_NOTHING
+            agent = policy.env.agents[handle]
         actions.update({handle: a})
     policy.end_step()
     return actions
@@ -136,7 +142,7 @@ while True:
         # Compute the action for this step by using the previously 
         # defined controller
         time_start = time.time()
-        action = my_controller(policy)
+        action = my_controller(policy, info)
         time_taken = time.time() - time_start
         time_taken_by_controller.append(time_taken)
 
diff --git a/src/agent/__pycache__/dueling_double_dqn.cpython-36.pyc b/src/agent/__pycache__/dueling_double_dqn.cpython-36.pyc
deleted file mode 100644
index 41afc8cad7a82889d0118d5fd836cf1f15af34de..0000000000000000000000000000000000000000
Binary files a/src/agent/__pycache__/dueling_double_dqn.cpython-36.pyc and /dev/null differ
diff --git a/src/agent/__pycache__/model.cpython-36.pyc b/src/agent/__pycache__/model.cpython-36.pyc
deleted file mode 100644
index a13378b2cbe5716b1bf04b774b5fce12ccef39e7..0000000000000000000000000000000000000000
Binary files a/src/agent/__pycache__/model.cpython-36.pyc and /dev/null differ
diff --git a/src/agent/dueling_double_dqn.py b/src/agent/dueling_double_dqn.py
deleted file mode 100644
index f08e17602db84265079fa5f6f7d8422d5a5d4c89..0000000000000000000000000000000000000000
--- a/src/agent/dueling_double_dqn.py
+++ /dev/null
@@ -1,512 +0,0 @@
-import torch
-import torch.optim as optim
-
-BUFFER_SIZE = int(1e5)  # replay buffer size
-BATCH_SIZE = 512  # minibatch size
-GAMMA = 0.99  # discount factor 0.99
-TAU = 0.5e-3  # for soft update of target parameters
-LR = 0.5e-4  # learning rate 0.5e-4 works
-
-# how often to update the network
-UPDATE_EVERY = 20
-UPDATE_EVERY_FINAL = 10
-UPDATE_EVERY_AGENT_CANT_CHOOSE = 200
-
-
-double_dqn = True  # If using double dqn algorithm
-input_channels = 5  # Number of Input channels
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-device = torch.device("cpu")
-print(device)
-
-USE_OPTIMIZER = optim.Adam
-# USE_OPTIMIZER = optim.RMSprop
-print(USE_OPTIMIZER)
-
-
-class Agent:
-    """Interacts with and learns from the environment."""
-
-    def __init__(self, state_size, action_size, net_type, seed, double_dqn=True, input_channels=5):
-        """Initialize an Agent object.
-
-        Params
-        ======
-            state_size (int): dimension of each state
-            action_size (int): dimension of each action
-            seed (int): random seed
-        """
-        self.state_size = state_size
-        self.action_size = action_size
-        self.seed = random.seed(seed)
-        self.version = net_type
-        self.double_dqn = double_dqn
-        # Q-Network
-        if self.version == "Conv":
-            self.qnetwork_local = QNetwork2(state_size, action_size, seed, input_channels).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-        else:
-            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-
-        self.optimizer = USE_OPTIMIZER(self.qnetwork_local.parameters(), lr=LR)
-
-        # Replay memory
-        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        self.memory_final = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        self.memory_agent_can_not_choose = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-
-        self.final_step = {}
-
-        # Initialize time step (for updating every UPDATE_EVERY steps)
-        self.t_step = 0
-        self.t_step_final = 0
-        self.t_step_agent_can_not_choose = 0
-
-    def save(self, filename):
-        torch.save(self.qnetwork_local.state_dict(), filename + ".local")
-        torch.save(self.qnetwork_target.state_dict(), filename + ".target")
-
-    def load(self, filename):
-        if os.path.exists(filename + ".local"):
-            self.qnetwork_local.load_state_dict(torch.load(filename + ".local"))
-            print(filename + ".local -> ok")
-        if os.path.exists(filename + ".target"):
-            self.qnetwork_target.load_state_dict(torch.load(filename + ".target"))
-            print(filename + ".target -> ok")
-        self.optimizer = USE_OPTIMIZER(self.qnetwork_local.parameters(), lr=LR)
-
-    def _update_model(self, switch=0):
-        # Learn every UPDATE_EVERY time steps.
-        # If enough samples are available in memory, get random subset and learn
-        if switch == 0:
-            self.t_step = (self.t_step + 1) % UPDATE_EVERY
-            if self.t_step == 0:
-                if len(self.memory) > BATCH_SIZE:
-                    experiences = self.memory.sample()
-                    self.learn(experiences, GAMMA)
-        elif switch == 1:
-            self.t_step_final = (self.t_step_final + 1) % UPDATE_EVERY_FINAL
-            if self.t_step_final == 0:
-                if len(self.memory_final) > BATCH_SIZE:
-                    experiences = self.memory_final.sample()
-                    self.learn(experiences, GAMMA)
-        else:
-            # If enough samples are available in memory_agent_can_not_choose, get random subset and learn
-            self.t_step_agent_can_not_choose = (self.t_step_agent_can_not_choose + 1) % UPDATE_EVERY_AGENT_CANT_CHOOSE
-            if self.t_step_agent_can_not_choose == 0:
-                if len(self.memory_agent_can_not_choose) > BATCH_SIZE:
-                    experiences = self.memory_agent_can_not_choose.sample()
-                    self.learn(experiences, GAMMA)
-
-    def step(self, state, action, reward, next_state, done):
-        # Save experience in replay memory
-        self.memory.add(state, action, reward, next_state, done)
-        self._update_model(0)
-
-    def step_agent_can_not_choose(self, state, action, reward, next_state, done):
-        # Save experience in replay memory_agent_can_not_choose
-        self.memory_agent_can_not_choose.add(state, action, reward, next_state, done)
-        self._update_model(2)
-
-    def add_final_step(self, agent_handle, state, action, reward, next_state, done):
-        if self.final_step.get(agent_handle) is None:
-            self.final_step.update({agent_handle: [state, action, reward, next_state, done]})
-
-    def make_final_step(self, additional_reward=0):
-        for _, item in self.final_step.items():
-            state = item[0]
-            action = item[1]
-            reward = item[2] + additional_reward
-            next_state = item[3]
-            done = item[4]
-            self.memory_final.add(state, action, reward, next_state, done)
-            self._update_model(1)
-        self._reset_final_step()
-
-    def _reset_final_step(self):
-        self.final_step = {}
-
-    def act(self, state, eps=0.):
-        """Returns actions for given state as per current policy.
-
-        Params
-        ======
-            state (array_like): current state
-            eps (float): epsilon, for epsilon-greedy action selection
-        """
-        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
-        self.qnetwork_local.eval()
-        with torch.no_grad():
-            action_values = self.qnetwork_local(state)
-        self.qnetwork_local.train()
-
-        # Epsilon-greedy action selection
-        if random.random() > eps:
-            return np.argmax(action_values.cpu().data.numpy())
-        else:
-            return random.choice(np.arange(self.action_size))
-
-    def learn(self, experiences, gamma):
-
-        """Update value parameters using given batch of experience tuples.
-
-        Params
-        ======
-            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
-            gamma (float): discount factor
-        """
-        states, actions, rewards, next_states, dones = experiences
-
-        # Get expected Q values from local model
-        Q_expected = self.qnetwork_local(states).gather(1, actions)
-
-        if self.double_dqn:
-            # Double DQN
-            q_best_action = self.qnetwork_local(next_states).max(1)[1]
-            Q_targets_next = self.qnetwork_target(next_states).gather(1, q_best_action.unsqueeze(-1))
-        else:
-            # DQN
-            Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(-1)
-
-            # Compute Q targets for current states
-
-        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
-
-        # Compute loss
-        loss = F.mse_loss(Q_expected, Q_targets)
-        # Minimize the loss
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-
-        # ------------------- update target network ------------------- #
-        self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)
-
-    def soft_update(self, local_model, target_model, tau):
-        """Soft update model parameters.
-        Î¸_target = Ï„*Î¸_local + (1 - Ï„)*Î¸_target
-
-        Params
-        ======
-            local_model (PyTorch model): weights will be copied from
-            target_model (PyTorch model): weights will be copied to
-            tau (float): interpolation parameter
-        """
-        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
-            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
-
-
-class ReplayBuffer:
-    """Fixed-size buffer to store experience tuples."""
-
-    def __init__(self, action_size, buffer_size, batch_size, seed):
-        """Initialize a ReplayBuffer object.
-
-        Params
-        ======
-            action_size (int): dimension of each action
-            buffer_size (int): maximum size of buffer
-            batch_size (int): size of each training batch
-            seed (int): random seed
-        """
-        self.action_size = action_size
-        self.memory = deque(maxlen=buffer_size)
-        self.batch_size = batch_size
-        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
-        self.seed = random.seed(seed)
-
-    def add(self, state, action, reward, next_state, done):
-        """Add a new experience to memory."""
-        e = self.experience(np.expand_dims(state, 0), action, reward, np.expand_dims(next_state, 0), done)
-        self.memory.append(e)
-
-    def sample(self):
-        """Randomly sample a batch of experiences from memory."""
-        experiences = random.sample(self.memory, k=self.batch_size)
-
-        states = torch.from_numpy(self.__v_stack_impr([e.state for e in experiences if e is not None])) \
-            .float().to(device)
-        actions = torch.from_numpy(self.__v_stack_impr([e.action for e in experiences if e is not None])) \
-            .long().to(device)
-        rewards = torch.from_numpy(self.__v_stack_impr([e.reward for e in experiences if e is not None])) \
-            .float().to(device)
-        next_states = torch.from_numpy(self.__v_stack_impr([e.next_state for e in experiences if e is not None])) \
-            .float().to(device)
-        dones = torch.from_numpy(self.__v_stack_impr([e.done for e in experiences if e is not None]).astype(np.uint8)) \
-            .float().to(device)
-
-        return (states, actions, rewards, next_states, dones)
-
-    def __len__(self):
-        """Return the current size of internal memory."""
-        return len(self.memory)
-
-    def __v_stack_impr(self, states):
-        sub_dim = len(states[0][0]) if isinstance(states[0], Iterable) else 1
-        np_states = np.reshape(np.array(states), (len(states), sub_dim))
-        return np_states
-
-
-import copy
-import os
-import random
-from collections import namedtuple, deque, Iterable
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-import torch.optim as optim
-
-from src.agent.model import QNetwork2, QNetwork
-
-BUFFER_SIZE = int(1e5)  # replay buffer size
-BATCH_SIZE = 512  # minibatch size
-GAMMA = 0.95  # discount factor 0.99
-TAU = 0.5e-4  # for soft update of target parameters
-LR = 0.5e-3  # learning rate 0.5e-4 works
-
-# how often to update the network
-UPDATE_EVERY = 40
-UPDATE_EVERY_FINAL = 1000
-UPDATE_EVERY_AGENT_CANT_CHOOSE = 200
-
-double_dqn = True  # If using double dqn algorithm
-input_channels = 5  # Number of Input channels
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-device = torch.device("cpu")
-print(device)
-
-USE_OPTIMIZER = optim.Adam
-# USE_OPTIMIZER = optim.RMSprop
-print(USE_OPTIMIZER)
-
-
-class Agent:
-    """Interacts with and learns from the environment."""
-
-    def __init__(self, state_size, action_size, net_type, seed, double_dqn=True, input_channels=5):
-        """Initialize an Agent object.
-
-        Params
-        ======
-            state_size (int): dimension of each state
-            action_size (int): dimension of each action
-            seed (int): random seed
-        """
-        self.state_size = state_size
-        self.action_size = action_size
-        self.seed = random.seed(seed)
-        self.version = net_type
-        self.double_dqn = double_dqn
-        # Q-Network
-        if self.version == "Conv":
-            self.qnetwork_local = QNetwork2(state_size, action_size, seed, input_channels).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-        else:
-            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-
-        self.optimizer = USE_OPTIMIZER(self.qnetwork_local.parameters(), lr=LR)
-
-        # Replay memory
-        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        self.memory_final = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        self.memory_agent_can_not_choose = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-
-        self.final_step = {}
-
-        # Initialize time step (for updating every UPDATE_EVERY steps)
-        self.t_step = 0
-        self.t_step_final = 0
-        self.t_step_agent_can_not_choose = 0
-
-    def save(self, filename):
-        torch.save(self.qnetwork_local.state_dict(), filename + ".local")
-        torch.save(self.qnetwork_target.state_dict(), filename + ".target")
-
-    def load(self, filename):
-        print("try to load: " + filename)
-        if os.path.exists(filename + ".local"):
-            self.qnetwork_local.load_state_dict(torch.load(filename + ".local"))
-            print(filename + ".local -> ok")
-        if os.path.exists(filename + ".target"):
-            self.qnetwork_target.load_state_dict(torch.load(filename + ".target"))
-            print(filename + ".target -> ok")
-        self.optimizer = USE_OPTIMIZER(self.qnetwork_local.parameters(), lr=LR)
-
-    def _update_model(self, switch=0):
-        # Learn every UPDATE_EVERY time steps.
-        # If enough samples are available in memory, get random subset and learn
-        if switch == 0:
-            self.t_step = (self.t_step + 1) % UPDATE_EVERY
-            if self.t_step == 0:
-                if len(self.memory) > BATCH_SIZE:
-                    experiences = self.memory.sample()
-                    self.learn(experiences, GAMMA)
-        elif switch == 1:
-            self.t_step_final = (self.t_step_final + 1) % UPDATE_EVERY_FINAL
-            if self.t_step_final == 0:
-                if len(self.memory_final) > BATCH_SIZE:
-                    experiences = self.memory_final.sample()
-                    self.learn(experiences, GAMMA)
-        else:
-            # If enough samples are available in memory_agent_can_not_choose, get random subset and learn
-            self.t_step_agent_can_not_choose = (self.t_step_agent_can_not_choose + 1) % UPDATE_EVERY_AGENT_CANT_CHOOSE
-            if self.t_step_agent_can_not_choose == 0:
-                if len(self.memory_agent_can_not_choose) > BATCH_SIZE:
-                    experiences = self.memory_agent_can_not_choose.sample()
-                    self.learn(experiences, GAMMA)
-
-    def step(self, state, action, reward, next_state, done):
-        # Save experience in replay memory
-        self.memory.add(state, action, reward, next_state, done)
-        self._update_model(0)
-
-    def step_agent_can_not_choose(self, state, action, reward, next_state, done):
-        # Save experience in replay memory_agent_can_not_choose
-        self.memory_agent_can_not_choose.add(state, action, reward, next_state, done)
-        self._update_model(2)
-
-    def add_final_step(self, agent_handle, state, action, reward, next_state, done):
-        if self.final_step.get(agent_handle) is None:
-            self.final_step.update({agent_handle: [state, action, reward, next_state, done]})
-            return True
-        else:
-            return False
-
-    def make_final_step(self, additional_reward=0):
-        for _, item in self.final_step.items():
-            state = item[0]
-            action = item[1]
-            reward = item[2] + additional_reward
-            next_state = item[3]
-            done = item[4]
-            self.memory_final.add(state, action, reward, next_state, done)
-            self._update_model(1)
-        self._reset_final_step()
-
-    def _reset_final_step(self):
-        self.final_step = {}
-
-    def act(self, state, eps=0.):
-        """Returns actions for given state as per current policy.
-
-        Params
-        ======
-            state (array_like): current state
-            eps (float): epsilon, for epsilon-greedy action selection
-        """
-        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
-        self.qnetwork_local.eval()
-        with torch.no_grad():
-            action_values = self.qnetwork_local(state)
-        self.qnetwork_local.train()
-
-        # Epsilon-greedy action selection
-        if random.random() > eps:
-            return np.argmax(action_values.cpu().data.numpy()), False
-        else:
-            return random.choice(np.arange(self.action_size)), True
-
-    def learn(self, experiences, gamma):
-
-        """Update value parameters using given batch of experience tuples.
-
-        Params
-        ======
-            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
-            gamma (float): discount factor
-        """
-        states, actions, rewards, next_states, dones = experiences
-
-        # Get expected Q values from local model
-        Q_expected = self.qnetwork_local(states).gather(1, actions)
-
-        if self.double_dqn:
-            # Double DQN
-            q_best_action = self.qnetwork_local(next_states).max(1)[1]
-            Q_targets_next = self.qnetwork_target(next_states).gather(1, q_best_action.unsqueeze(-1))
-        else:
-            # DQN
-            Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(-1)
-
-            # Compute Q targets for current states
-
-        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
-
-        # Compute loss
-        loss = F.mse_loss(Q_expected, Q_targets)
-        # Minimize the loss
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-
-        # ------------------- update target network ------------------- #
-        self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)
-
-    def soft_update(self, local_model, target_model, tau):
-        """Soft update model parameters.
-        Î¸_target = Ï„*Î¸_local + (1 - Ï„)*Î¸_target
-
-        Params
-        ======
-            local_model (PyTorch model): weights will be copied from
-            target_model (PyTorch model): weights will be copied to
-            tau (float): interpolation parameter
-        """
-        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
-            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
-
-
-class ReplayBuffer:
-    """Fixed-size buffer to store experience tuples."""
-
-    def __init__(self, action_size, buffer_size, batch_size, seed):
-        """Initialize a ReplayBuffer object.
-
-        Params
-        ======
-            action_size (int): dimension of each action
-            buffer_size (int): maximum size of buffer
-            batch_size (int): size of each training batch
-            seed (int): random seed
-        """
-        self.action_size = action_size
-        self.memory = deque(maxlen=buffer_size)
-        self.batch_size = batch_size
-        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
-        self.seed = random.seed(seed)
-
-    def add(self, state, action, reward, next_state, done):
-        """Add a new experience to memory."""
-        e = self.experience(np.expand_dims(state, 0), action, reward, np.expand_dims(next_state, 0), done)
-        self.memory.append(e)
-
-    def sample(self):
-        """Randomly sample a batch of experiences from memory."""
-        experiences = random.sample(self.memory, k=self.batch_size)
-
-        states = torch.from_numpy(self.__v_stack_impr([e.state for e in experiences if e is not None])) \
-            .float().to(device)
-        actions = torch.from_numpy(self.__v_stack_impr([e.action for e in experiences if e is not None])) \
-            .long().to(device)
-        rewards = torch.from_numpy(self.__v_stack_impr([e.reward for e in experiences if e is not None])) \
-            .float().to(device)
-        next_states = torch.from_numpy(self.__v_stack_impr([e.next_state for e in experiences if e is not None])) \
-            .float().to(device)
-        dones = torch.from_numpy(self.__v_stack_impr([e.done for e in experiences if e is not None]).astype(np.uint8)) \
-            .float().to(device)
-
-        return (states, actions, rewards, next_states, dones)
-
-    def __len__(self):
-        """Return the current size of internal memory."""
-        return len(self.memory)
-
-    def __v_stack_impr(self, states):
-        sub_dim = len(states[0][0]) if isinstance(states[0], Iterable) else 1
-        np_states = np.reshape(np.array(states), (len(states), sub_dim))
-        return np_states
diff --git a/src/agent/model.py b/src/agent/model.py
deleted file mode 100644
index 70952e0e1e708620ecae294f327becd167321c62..0000000000000000000000000000000000000000
--- a/src/agent/model.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class QNetwork(nn.Module):
-    def __init__(self, state_size, action_size, seed, hidsize1=64, hidsize2=128):
-        super(QNetwork, self).__init__()
-
-        self.fc1_val = nn.Linear(state_size, hidsize1)
-        self.fc2_val = nn.Linear(hidsize1, hidsize2)
-        self.fc3_val = nn.Linear(hidsize2, 1)
-
-        self.fc1_adv = nn.Linear(state_size, hidsize1)
-        self.fc2_adv = nn.Linear(hidsize1, hidsize2)
-        self.fc3_adv = nn.Linear(hidsize2, action_size)
-
-    def forward(self, x):
-        val = F.relu(self.fc1_val(x))
-        val = F.relu(self.fc2_val(val))
-        val = self.fc3_val(val)
-
-        # advantage calculation
-        adv = F.relu(self.fc1_adv(x))
-        adv = F.relu(self.fc2_adv(adv))
-        adv = self.fc3_adv(adv)
-        return val + adv - adv.mean()
-
-
-class QNetwork2(nn.Module):
-    def __init__(self, state_size, action_size, seed, input_channels, hidsize1=128, hidsize2=64):
-        super(QNetwork2, self).__init__()
-        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=3, stride=1)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=3)
-        self.bn2 = nn.BatchNorm2d(32)
-        self.conv3 = nn.Conv2d(32, 64, kernel_size=5, stride=3)
-        self.bn3 = nn.BatchNorm2d(64)
-
-        self.fc1_val = nn.Linear(6400, hidsize1)
-        self.fc2_val = nn.Linear(hidsize1, hidsize2)
-        self.fc3_val = nn.Linear(hidsize2, 1)
-
-        self.fc1_adv = nn.Linear(6400, hidsize1)
-        self.fc2_adv = nn.Linear(hidsize1, hidsize2)
-        self.fc3_adv = nn.Linear(hidsize2, action_size)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        x = F.relu(self.conv2(x))
-        x = F.relu(self.conv3(x))
-
-        # value function approximation
-        val = F.relu(self.fc1_val(x.view(x.size(0), -1)))
-        val = F.relu(self.fc2_val(val))
-        val = self.fc3_val(val)
-
-        # advantage calculation
-        adv = F.relu(self.fc1_adv(x.view(x.size(0), -1)))
-        adv = F.relu(self.fc2_adv(adv))
-        adv = self.fc3_adv(adv)
-        return val + adv - adv.mean()
diff --git a/src/dead_lock_avoidance_agent.py b/src/dead_lock_avoidance_agent.py
deleted file mode 100644
index 43f1b4ae15aebae1348e0eadc5f9cab243511a28..0000000000000000000000000000000000000000
--- a/src/dead_lock_avoidance_agent.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-from flatland.envs.agent_utils import RailAgentStatus
-from flatland.envs.rail_env import RailEnv, RailEnvActions
-
-from reinforcement_learning.policy import Policy
-from utils.shortest_Distance_walker import ShortestDistanceWalker
-
-
-class MyWalker(ShortestDistanceWalker):
-    def __init__(self, env: RailEnv, agent_positions):
-        super().__init__(env)
-        self.shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
-                                                     self.env.height,
-                                                     self.env.width),
-                                                    dtype=int) - 1
-
-        self.agent_positions = agent_positions
-
-        self.agent_map = {}
-
-    def getData(self):
-        return self.shortest_distance_agent_map
-
-    def callback(self, handle, agent, position, direction, action):
-        opp_a = self.agent_positions[position]
-        if opp_a != -1 and opp_a != handle:
-            d = self.agent_map.get(handle, [])
-            d.append(opp_a)
-            if self.env.agents[opp_a].direction != direction:
-                self.agent_map.update({handle: d})
-        self.shortest_distance_agent_map[(handle, position[0], position[1])] = direction
-
-
-class DeadLockAvoidanceAgent(Policy):
-    def __init__(self, env: RailEnv, state_size, action_size):
-        self.env = env
-        self.action_size = action_size
-        self.state_size = state_size
-        self.memory = []
-        self.loss = 0
-        self.agent_can_move = {}
-
-    def step(self, handle, state, action, reward, next_state, done):
-        pass
-
-    def act(self, handle, state, eps=0.):
-        agent = self.env.agents[handle]
-        #if handle > self.env._elapsed_steps:
-        #    return RailEnvActions.STOP_MOVING
-        if agent.status == RailAgentStatus.ACTIVE:
-            self.active_agent_cnt += 1
-        #if agent.status > 20:
-        #    return RailEnvActions.STOP_MOVING
-        check = self.agent_can_move.get(handle, None)
-        if check is None:
-            # print(handle, RailEnvActions.STOP_MOVING)
-            return RailEnvActions.STOP_MOVING
-
-        return check[3]
-
-    def reset(self):
-        pass
-
-    def start_step(self):
-        self.active_agent_cnt = 0
-        self.shortest_distance_mapper()
-
-    def end_step(self):
-        # print("#A:", self.active_agent_cnt, "/", self.env.get_num_agents(),self.env._elapsed_steps)
-        pass
-
-    def get_actions(self):
-        pass
-
-    def shortest_distance_mapper(self):
-
-        # build map with agent positions (only active agents)
-        agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
-        for handle in range(self.env.get_num_agents()):
-            agent = self.env.agents[handle]
-            if agent.status <= RailAgentStatus.ACTIVE:
-                if agent.position is not None:
-                    agent_positions[agent.position] = handle
-
-        my_walker = MyWalker(self.env, agent_positions)
-        for handle in range(self.env.get_num_agents()):
-            agent = self.env.agents[handle]
-            if agent.status <= RailAgentStatus.ACTIVE:
-                my_walker.walk_to_target(handle)
-        self.shortest_distance_agent_map = my_walker.getData()
-
-        self.agent_can_move = {}
-        agent_positions_map = np.clip(agent_positions + 1, 0, 1)
-        for handle in range(self.env.get_num_agents()):
-            opp_agents = my_walker.agent_map.get(handle, [])
-            me = np.clip(self.shortest_distance_agent_map[handle] + 1, 0, 1)
-            next_step_ok = True
-            next_position, next_direction, action = my_walker.walk_one_step(handle)
-            for opp_a in opp_agents:
-                opp = np.clip(self.shortest_distance_agent_map[opp_a] + 1, 0, 1)
-                delta = np.clip(me - opp - agent_positions_map, 0, 1)
-                if (np.sum(delta) > 1):
-                    next_step_ok = False
-            if next_step_ok:
-                self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
-
-        if False:
-            a = np.floor(np.sqrt(self.env.get_num_agents()))
-            b = np.ceil(self.env.get_num_agents() / a)
-            for handle in range(self.env.get_num_agents()):
-                plt.subplot(a, b, handle + 1)
-                plt.imshow(self.shortest_distance_agent_map[handle])
-            # plt.colorbar()
-            plt.show(block=False)
-            plt.pause(0.001)
diff --git a/src/extra.py b/src/extra.py
deleted file mode 100644
index b70830f91714c3a8a3efdabe34bd1a427ea1f73e..0000000000000000000000000000000000000000
--- a/src/extra.py
+++ /dev/null
@@ -1,404 +0,0 @@
-#
-# Author Adrian Egli
-#
-# This observation solves the FLATland challenge ROUND 1 - with agent's done 19.3%
-#
-# Training:
-# For the training of the PPO RL agent I showed 10k episodes - The episodes used for the training
-# consists of 1..20 agents on a 50x50 grid. Thus the RL agent has to learn to handle 1 upto 20 agents.
-#
-#   - https://github.com/mitchellgoffpc/flatland-training
-# ./adrian_egli_ppo_training_done.png
-#
-# The key idea behind this observation is that agent's can not freely choose where they want.
-#
-# ./images/adrian_egli_decisions.png
-# ./images/adrian_egli_info.png
-# ./images/adrian_egli_start.png
-# ./images/adrian_egli_target.png
-#
-# Private submission
-# http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8
-
-import numpy as np
-from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.grid.grid4_utils import get_new_position
-from flatland.envs.agent_utils import RailAgentStatus
-from flatland.envs.rail_env import RailEnvActions
-
-from src.ppo.agent import Agent
-
-
-# ------------------------------------- USE FAST_METHOD from FLATland master ------------------------------------------
-# Adrian Egli performance fix (the fast methods brings more than 50%)
-
-def fast_isclose(a, b, rtol):
-    return (a < (b + rtol)) or (a < (b - rtol))
-
-
-def fast_clip(position: (int, int), min_value: (int, int), max_value: (int, int)) -> bool:
-    return (
-        max(min_value[0], min(position[0], max_value[0])),
-        max(min_value[1], min(position[1], max_value[1]))
-    )
-
-
-def fast_argmax(possible_transitions: (int, int, int, int)) -> bool:
-    if possible_transitions[0] == 1:
-        return 0
-    if possible_transitions[1] == 1:
-        return 1
-    if possible_transitions[2] == 1:
-        return 2
-    return 3
-
-
-def fast_position_equal(pos_1: (int, int), pos_2: (int, int)) -> bool:
-    return pos_1[0] == pos_2[0] and pos_1[1] == pos_2[1]
-
-
-def fast_count_nonzero(possible_transitions: (int, int, int, int)):
-    return possible_transitions[0] + possible_transitions[1] + possible_transitions[2] + possible_transitions[3]
-
-
-# ------------------------------- END - USE FAST_METHOD from FLATland master ------------------------------------------
-
-class Extra(ObservationBuilder):
-
-    def __init__(self, max_depth):
-        self.max_depth = max_depth
-        self.observation_dim = 26
-        self.agent = None
-        self.random_agent_starter = []
-
-    def build_data(self):
-        if self.env is not None:
-            self.env.dev_obs_dict = {}
-        self.switches = {}
-        self.switches_neighbours = {}
-        self.debug_render_list = []
-        self.debug_render_path_list = []
-        if self.env is not None:
-            self.find_all_cell_where_agent_can_choose()
-
-    def find_all_cell_where_agent_can_choose(self):
-
-        switches = {}
-        for h in range(self.env.height):
-            for w in range(self.env.width):
-                pos = (h, w)
-                for dir in range(4):
-                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
-                    num_transitions = fast_count_nonzero(possible_transitions)
-                    if num_transitions > 1:
-                        if pos not in switches.keys():
-                            switches.update({pos: [dir]})
-                        else:
-                            switches[pos].append(dir)
-
-        switches_neighbours = {}
-        for h in range(self.env.height):
-            for w in range(self.env.width):
-                # look one step forward
-                for dir in range(4):
-                    pos = (h, w)
-                    possible_transitions = self.env.rail.get_transitions(*pos, dir)
-                    for d in range(4):
-                        if possible_transitions[d] == 1:
-                            new_cell = get_new_position(pos, d)
-                            if new_cell in switches.keys() and pos not in switches.keys():
-                                if pos not in switches_neighbours.keys():
-                                    switches_neighbours.update({pos: [dir]})
-                                else:
-                                    switches_neighbours[pos].append(dir)
-
-        self.switches = switches
-        self.switches_neighbours = switches_neighbours
-
-    def check_agent_descision(self, position, direction):
-        switches = self.switches
-        switches_neighbours = self.switches_neighbours
-        agents_on_switch = False
-        agents_near_to_switch = False
-        agents_near_to_switch_all = False
-        if position in switches.keys():
-            agents_on_switch = direction in switches[position]
-
-        if position in switches_neighbours.keys():
-            new_cell = get_new_position(position, direction)
-            if new_cell in switches.keys():
-                if not direction in switches[new_cell]:
-                    agents_near_to_switch = direction in switches_neighbours[position]
-            else:
-                agents_near_to_switch = direction in switches_neighbours[position]
-
-            agents_near_to_switch_all = direction in switches_neighbours[position]
-
-        return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
-
-    def required_agent_descision(self):
-        agents_can_choose = {}
-        agents_on_switch = {}
-        agents_near_to_switch = {}
-        agents_near_to_switch_all = {}
-        for a in range(self.env.get_num_agents()):
-            ret_agents_on_switch, ret_agents_near_to_switch, ret_agents_near_to_switch_all = \
-                self.check_agent_descision(
-                    self.env.agents[a].position,
-                    self.env.agents[a].direction)
-            agents_on_switch.update({a: ret_agents_on_switch})
-            ready_to_depart = self.env.agents[a].status == RailAgentStatus.READY_TO_DEPART
-            agents_near_to_switch.update({a: (ret_agents_near_to_switch and not ready_to_depart)})
-
-            agents_can_choose.update({a: agents_on_switch[a] or agents_near_to_switch[a]})
-
-            agents_near_to_switch_all.update({a: (ret_agents_near_to_switch_all and not ready_to_depart)})
-
-        return agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all
-
-    def debug_render(self, env_renderer):
-        agents_can_choose, agents_on_switch, agents_near_to_switch, agents_near_to_switch_all = \
-            self.required_agent_descision()
-        self.env.dev_obs_dict = {}
-        for a in range(max(3, self.env.get_num_agents())):
-            self.env.dev_obs_dict.update({a: []})
-
-        selected_agent = None
-        if agents_can_choose[0]:
-            if self.env.agents[0].position is not None:
-                self.debug_render_list.append(self.env.agents[0].position)
-            else:
-                self.debug_render_list.append(self.env.agents[0].initial_position)
-
-        if self.env.agents[0].position is not None:
-            self.debug_render_path_list.append(self.env.agents[0].position)
-        else:
-            self.debug_render_path_list.append(self.env.agents[0].initial_position)
-
-        env_renderer.gl.agent_colors[0] = env_renderer.gl.rgb_s2i("FF0000")
-        env_renderer.gl.agent_colors[1] = env_renderer.gl.rgb_s2i("666600")
-        env_renderer.gl.agent_colors[2] = env_renderer.gl.rgb_s2i("006666")
-        env_renderer.gl.agent_colors[3] = env_renderer.gl.rgb_s2i("550000")
-
-        self.env.dev_obs_dict[0] = self.debug_render_list
-        self.env.dev_obs_dict[1] = self.switches.keys()
-        self.env.dev_obs_dict[2] = self.switches_neighbours.keys()
-        self.env.dev_obs_dict[3] = self.debug_render_path_list
-
-    def normalize_observation(self, obsData):
-        return obsData
-
-    def is_collision(self, obsData):
-        return False
-
-    def reset(self):
-        self.build_data()
-        return
-
-    def fast_argmax(self, array):
-        if array[0] == 1:
-            return 0
-        if array[1] == 1:
-            return 1
-        if array[2] == 1:
-            return 2
-        return 3
-
-    def _explore(self, handle, new_position, new_direction, depth=0):
-        has_opp_agent = 0
-        has_same_agent = 0
-        visited = []
-
-        # stop exploring (max_depth reached)
-        if depth >= self.max_depth:
-            return has_opp_agent, has_same_agent, visited
-
-        # max_explore_steps = 100
-        cnt = 0
-        while cnt < 100:
-            cnt += 1
-
-            visited.append(new_position)
-            opp_a = self.env.agent_positions[new_position]
-            if opp_a != -1 and opp_a != handle:
-                if self.env.agents[opp_a].direction != new_direction:
-                    # opp agent found
-                    has_opp_agent = 1
-                    return has_opp_agent, has_same_agent, visited
-                else:
-                    has_same_agent = 1
-                    return has_opp_agent, has_same_agent, visited
-
-            # convert one-hot encoding to 0,1,2,3
-            possible_transitions = self.env.rail.get_transitions(*new_position, new_direction)
-            agents_on_switch, \
-            agents_near_to_switch, \
-            agents_near_to_switch_all = \
-                self.check_agent_descision(new_position, new_direction)
-            if agents_near_to_switch:
-                return has_opp_agent, has_same_agent, visited
-
-            if agents_on_switch:
-                for dir_loop in range(4):
-                    if possible_transitions[dir_loop] == 1:
-                        hoa, hsa, v = self._explore(handle,
-                                                    get_new_position(new_position, dir_loop),
-                                                    dir_loop,
-                                                    depth + 1)
-                        visited.append(v)
-                        has_opp_agent = 0.5 * (has_opp_agent + hoa)
-                        has_same_agent = 0.5 * (has_same_agent + hsa)
-                return has_opp_agent, has_same_agent, visited
-            else:
-                new_direction = fast_argmax(possible_transitions)
-                new_position = get_new_position(new_position, new_direction)
-        return has_opp_agent, has_same_agent, visited
-
-    def get(self, handle):
-        # all values are [0,1]
-        # observation[0]  : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path
-        # observation[1]  : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path
-        # observation[2]  : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path
-        # observation[3]  : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path
-        # observation[4]  : int(agent.status == RailAgentStatus.READY_TO_DEPART)
-        # observation[5]  : int(agent.status == RailAgentStatus.ACTIVE)
-        # observation[6]  : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED)
-        # observation[7]  : current agent is located at a switch, where it can take a routing decision
-        # observation[8]  : current agent is located at a cell, where it has to take a stop-or-go decision
-        # observation[9]  : current agent is located one step before/after a switch
-        # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0)
-        # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1)
-        # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2)
-        # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3)
-        # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1
-        # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1
-        # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1
-        # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1
-        # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1
-        # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1
-        # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1
-        # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1
-
-        observation = np.zeros(self.observation_dim)
-        visited = []
-        agent = self.env.agents[handle]
-
-        agent_done = False
-        if agent.status == RailAgentStatus.READY_TO_DEPART:
-            agent_virtual_position = agent.initial_position
-            observation[4] = 1
-        elif agent.status == RailAgentStatus.ACTIVE:
-            agent_virtual_position = agent.position
-            observation[5] = 1
-        else:
-            observation[6] = 1
-            agent_virtual_position = (-1, -1)
-            agent_done = True
-
-        if not agent_done:
-            visited.append(agent_virtual_position)
-            distance_map = self.env.distance_map.get()
-            current_cell_dist = distance_map[handle,
-                                             agent_virtual_position[0], agent_virtual_position[1],
-                                             agent.direction]
-            possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
-            orientation = agent.direction
-            if fast_count_nonzero(possible_transitions) == 1:
-                orientation = np.argmax(possible_transitions)
-
-            for dir_loop, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]):
-                if possible_transitions[branch_direction]:
-                    new_position = get_new_position(agent_virtual_position, branch_direction)
-
-                    new_cell_dist = distance_map[handle,
-                                                 new_position[0], new_position[1],
-                                                 branch_direction]
-                    if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)):
-                        observation[dir_loop] = int(new_cell_dist < current_cell_dist)
-
-                    has_opp_agent, has_same_agent, v = self._explore(handle, new_position, branch_direction)
-                    visited.append(v)
-
-                    observation[10 + dir_loop] = 1
-                    observation[14 + dir_loop] = has_opp_agent
-                    observation[18 + dir_loop] = has_same_agent
-
-                    opp_a = self.env.agent_positions[new_position]
-                    if opp_a != -1 and opp_a != handle:
-                        observation[22 + dir_loop] = 1
-
-        agents_on_switch, \
-        agents_near_to_switch, \
-        agents_near_to_switch_all = \
-            self.check_agent_descision(agent_virtual_position, agent.direction)
-        observation[7] = int(agents_on_switch)
-        observation[8] = int(agents_near_to_switch)
-        observation[9] = int(agents_near_to_switch_all)
-
-        self.env.dev_obs_dict.update({handle: visited})
-
-        return observation
-
-    def rl_agent_act_ADRIAN(self, observation, info, eps=0.0):
-        self.loadAgent()
-        action_dict = {}
-        for a in range(self.env.get_num_agents()):
-            if info['action_required'][a]:
-                action_dict[a] = self.agent.act(observation[a], eps=eps)
-                # action_dict[a] = np.random.randint(5)
-            else:
-                action_dict[a] = RailEnvActions.DO_NOTHING
-
-        return action_dict
-
-    def rl_agent_act(self, observation, info, eps=0.0):
-        if len(self.random_agent_starter) != self.env.get_num_agents():
-            self.random_agent_starter = np.random.random(self.env.get_num_agents()) * 1000.0
-            self.loadAgent()
-
-        action_dict = {}
-        for a in range(self.env.get_num_agents()):
-            if self.random_agent_starter[a] > self.env._elapsed_steps:
-                action_dict[a] = RailEnvActions.STOP_MOVING
-            elif info['action_required'][a]:
-                action_dict[a] = self.agent.act(observation[a], eps=eps)
-                # action_dict[a] = np.random.randint(5)
-            else:
-                action_dict[a] = RailEnvActions.DO_NOTHING
-
-        return action_dict
-
-    def rl_agent_act_ADRIAN_01(self, observation, info, eps=0.0):
-        self.loadAgent()
-        action_dict = {}
-        active_cnt = 0
-        for a in range(self.env.get_num_agents()):
-            if active_cnt < 10 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
-                if observation[a][6] == 1:
-                    active_cnt += int(self.env.agents[a].status == RailAgentStatus.ACTIVE)
-                    action_dict[a] = RailEnvActions.STOP_MOVING
-                else:
-                    active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE)
-                    if (observation[a][7] + observation[a][8] + observation[a][9] > 0) or \
-                            (self.env.agents[a].status < RailAgentStatus.ACTIVE):
-                        if info['action_required'][a]:
-                            action_dict[a] = self.agent.act(observation[a], eps=eps)
-                            # action_dict[a] = np.random.randint(5)
-                        else:
-                            action_dict[a] = RailEnvActions.MOVE_FORWARD
-                    else:
-                        action_dict[a] = RailEnvActions.MOVE_FORWARD
-            else:
-                action_dict[a] = RailEnvActions.STOP_MOVING
-
-        return action_dict
-
-    def loadAgent(self):
-        if self.agent is not None:
-            return
-        self.state_size = self.env.obs_builder.observation_dim
-        self.action_size = 5
-        print("action_size: ", self.action_size)
-        print("state_size: ", self.state_size)
-        self.agent = Agent(self.state_size, self.action_size, 0)
-        self.agent.load('./checkpoints/', 0, 1.0)
diff --git a/src/images/adrian_egli_decisions.png b/src/images/adrian_egli_decisions.png
deleted file mode 100644
index f5bd276eb753a71f21aaa62fcec8a88cf460b409..0000000000000000000000000000000000000000
Binary files a/src/images/adrian_egli_decisions.png and /dev/null differ
diff --git a/src/images/adrian_egli_info.png b/src/images/adrian_egli_info.png
deleted file mode 100644
index afe228be94acecd4c6d596e53c7faf5fab47cb06..0000000000000000000000000000000000000000
Binary files a/src/images/adrian_egli_info.png and /dev/null differ
diff --git a/src/images/adrian_egli_ppo_training_done.png b/src/images/adrian_egli_ppo_training_done.png
deleted file mode 100644
index d163a110b8d09713a99a1accdd0ea2191d151bbd..0000000000000000000000000000000000000000
Binary files a/src/images/adrian_egli_ppo_training_done.png and /dev/null differ
diff --git a/src/images/adrian_egli_start.png b/src/images/adrian_egli_start.png
deleted file mode 100644
index 977fead60d7dd11d1e0f1e32635b555df64b1a0a..0000000000000000000000000000000000000000
Binary files a/src/images/adrian_egli_start.png and /dev/null differ
diff --git a/src/images/adrian_egli_target.png b/src/images/adrian_egli_target.png
deleted file mode 100644
index ab357c59d3cf5adbc74b559ffb3cd87400e0cb8d..0000000000000000000000000000000000000000
Binary files a/src/images/adrian_egli_target.png and /dev/null differ
diff --git a/src/observations.py b/src/observations.py
deleted file mode 100644
index ce47e508b21d7f0cf9a9a88ee864d3da18184cf1..0000000000000000000000000000000000000000
--- a/src/observations.py
+++ /dev/null
@@ -1,736 +0,0 @@
-"""
-Collection of environment-specific ObservationBuilder.
-"""
-import collections
-from typing import Optional, List, Dict, Tuple
-
-import numpy as np
-from flatland.core.env import Environment
-from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.env_prediction_builder import PredictionBuilder
-from flatland.core.grid.grid4_utils import get_new_position
-from flatland.core.grid.grid_utils import coordinate_to_position
-from flatland.envs.agent_utils import RailAgentStatus, EnvAgent
-from flatland.utils.ordered_set import OrderedSet
-
-
-class MyTreeObsForRailEnv(ObservationBuilder):
-    """
-    TreeObsForRailEnv object.
-
-    This object returns observation vectors for agents in the RailEnv environment.
-    The information is local to each agent and exploits the graph structure of the rail
-    network to simplify the representation of the state of the environment for each agent.
-
-    For details about the features in the tree observation see the get() function.
-    """
-    Node = collections.namedtuple('Node', 'dist_min_to_target '
-                                          'target_encountered '
-                                          'num_agents_same_direction '
-                                          'num_agents_opposite_direction '
-                                          'childs')
-
-    tree_explored_actions_char = ['L', 'F', 'R', 'B']
-
-    def __init__(self, max_depth: int, predictor: PredictionBuilder = None):
-        super().__init__()
-        self.max_depth = max_depth
-        self.observation_dim = 2
-        self.location_has_agent = {}
-        self.predictor = predictor
-        self.location_has_target = None
-
-        self.switches_list = {}
-        self.switches_neighbours_list = []
-        self.check_agent_descision = None
-
-    def reset(self):
-        self.location_has_target = {tuple(agent.target): 1 for agent in self.env.agents}
-
-    def set_switch_and_pre_switch(self, switch_list, pre_switch_list, check_agent_descision):
-        self.switches_list = switch_list
-        self.switches_neighbours_list = pre_switch_list
-        self.check_agent_descision = check_agent_descision
-
-    def get_many(self, handles: Optional[List[int]] = None) -> Dict[int, Node]:
-        """
-        Called whenever an observation has to be computed for the `env` environment, for each agent with handle
-        in the `handles` list.
-        """
-
-        if handles is None:
-            handles = []
-        if self.predictor:
-            self.max_prediction_depth = 0
-            self.predicted_pos = {}
-            self.predicted_dir = {}
-            self.predictions = self.predictor.get()
-            if self.predictions:
-                for t in range(self.predictor.max_depth + 1):
-                    pos_list = []
-                    dir_list = []
-                    for a in handles:
-                        if self.predictions[a] is None:
-                            continue
-                        pos_list.append(self.predictions[a][t][1:3])
-                        dir_list.append(self.predictions[a][t][3])
-                    self.predicted_pos.update({t: coordinate_to_position(self.env.width, pos_list)})
-                    self.predicted_dir.update({t: dir_list})
-                self.max_prediction_depth = len(self.predicted_pos)
-        # Update local lookup table for all agents' positions
-        # ignore other agents not in the grid (only status active and done)
-
-        self.location_has_agent = {}
-        self.location_has_agent_direction = {}
-        self.location_has_agent_speed = {}
-        self.location_has_agent_malfunction = {}
-        self.location_has_agent_ready_to_depart = {}
-
-        for _agent in self.env.agents:
-            if _agent.status in [RailAgentStatus.ACTIVE, RailAgentStatus.DONE] and \
-                    _agent.position:
-                self.location_has_agent[tuple(_agent.position)] = 1
-                self.location_has_agent_direction[tuple(_agent.position)] = _agent.direction
-                self.location_has_agent_speed[tuple(_agent.position)] = _agent.speed_data['speed']
-                self.location_has_agent_malfunction[tuple(_agent.position)] = _agent.malfunction_data[
-                    'malfunction']
-
-            if _agent.status in [RailAgentStatus.READY_TO_DEPART] and \
-                    _agent.initial_position:
-                self.location_has_agent_ready_to_depart[tuple(_agent.initial_position)] = \
-                    self.location_has_agent_ready_to_depart.get(tuple(_agent.initial_position), 0) + 1
-
-        observations = super().get_many(handles)
-
-        return observations
-
-    def get(self, handle: int = 0) -> Node:
-        """
-        Computes the current observation for agent `handle` in env
-
-        The observation vector is composed of 4 sequential parts, corresponding to data from the up to 4 possible
-        movements in a RailEnv (up to because only a subset of possible transitions are allowed in RailEnv).
-        The possible movements are sorted relative to the current orientation of the agent, rather than NESW as for
-        the transitions. The order is::
-
-            [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
-
-        Each branch data is organized as::
-
-            [root node information] +
-            [recursive branch data from 'left'] +
-            [... from 'forward'] +
-            [... from 'right] +
-            [... from 'back']
-
-        Each node information is composed of 9 features:
-
-        #1:
-            if own target lies on the explored branch the current distance from the agent in number of cells is stored.
-
-        #2:
-            if another agents target is detected the distance in number of cells from the agents current location\
-            is stored
-
-        #3:
-            if another agent is detected the distance in number of cells from current agent position is stored.
-
-        #4:
-            possible conflict detected
-            tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the \
-             distance in number of cells from current agent position
-
-            0 = No other agent reserve the same cell at similar time
-
-        #5:
-            if an not usable switch (for agent) is detected we store the distance.
-
-        #6:
-            This feature stores the distance in number of cells to the next branching  (current node)
-
-        #7:
-            minimum distance from node to the agent's target given the direction of the agent if this path is chosen
-
-        #8:
-            agent in the same direction
-            n = number of agents present same direction \
-                (possible future use: number of other agents in the same direction in this branch)
-            0 = no agent present same direction
-
-        #9:
-            agent in the opposite direction
-            n = number of agents present other direction than myself (so conflict) \
-                (possible future use: number of other agents in other direction in this branch, ie. number of conflicts)
-            0 = no agent present other direction than myself
-
-        #10:
-            malfunctioning/blokcing agents
-            n = number of time steps the oberved agent remains blocked
-
-        #11:
-            slowest observed speed of an agent in same direction
-            1 if no agent is observed
-
-            min_fractional speed otherwise
-        #12:
-            number of agents ready to depart but no yet active
-
-        Missing/padding nodes are filled in with -inf (truncated).
-        Missing values in present node are filled in with +inf (truncated).
-
-
-        In case of the root node, the values are [0, 0, 0, 0, distance from agent to target, own malfunction, own speed]
-        In case the target node is reached, the values are [0, 0, 0, 0, 0].
-        """
-
-        if handle > len(self.env.agents):
-            print("ERROR: obs _get - handle ", handle, " len(agents)", len(self.env.agents))
-        agent = self.env.agents[handle]  # TODO: handle being treated as index
-
-        if agent.status == RailAgentStatus.READY_TO_DEPART:
-            agent_virtual_position = agent.initial_position
-        elif agent.status == RailAgentStatus.ACTIVE:
-            agent_virtual_position = agent.position
-        elif agent.status == RailAgentStatus.DONE:
-            agent_virtual_position = agent.target
-        else:
-            return None
-
-        possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
-        num_transitions = np.count_nonzero(possible_transitions)
-
-        # Here information about the agent itself is stored
-        distance_map = self.env.distance_map.get()
-
-        root_node_observation = MyTreeObsForRailEnv.Node(dist_min_to_target=distance_map[
-            (handle, *agent_virtual_position,
-             agent.direction)],
-                                                         target_encountered=0,
-                                                         num_agents_same_direction=0,
-                                                         num_agents_opposite_direction=0,
-                                                         childs={})
-
-        visited = OrderedSet()
-
-        # Start from the current orientation, and see which transitions are available;
-        # organize them as [left, forward, right, back], relative to the current orientation
-        # If only one transition is possible, the tree is oriented with this transition as the forward branch.
-        orientation = agent.direction
-
-        if num_transitions == 1:
-            orientation = np.argmax(possible_transitions)
-
-        for i, branch_direction in enumerate([(orientation + i) % 4 for i in range(-1, 3)]):
-            if possible_transitions[branch_direction]:
-                new_cell = get_new_position(agent_virtual_position, branch_direction)
-
-                branch_observation, branch_visited = \
-                    self._explore_branch(handle, new_cell, branch_direction, 1, 1)
-                root_node_observation.childs[self.tree_explored_actions_char[i]] = branch_observation
-
-                visited |= branch_visited
-            else:
-                # add cells filled with infinity if no transition is possible
-                root_node_observation.childs[self.tree_explored_actions_char[i]] = -np.inf
-        self.env.dev_obs_dict[handle] = visited
-
-        return root_node_observation
-
-    def _explore_branch(self, handle, position, direction, tot_dist, depth):
-        """
-        Utility function to compute tree-based observations.
-        We walk along the branch and collect the information documented in the get() function.
-        If there is a branching point a new node is created and each possible branch is explored.
-        """
-
-        # [Recursive branch opened]
-        if depth >= self.max_depth + 1:
-            return [], []
-
-        # Continue along direction until next switch or
-        # until no transitions are possible along the current direction (i.e., dead-ends)
-        # We treat dead-ends as nodes, instead of going back, to avoid loops
-        exploring = True
-
-        visited = OrderedSet()
-        agent = self.env.agents[handle]
-
-        other_agent_opposite_direction = 0
-        other_agent_same_direction = 0
-
-        dist_min_to_target = self.env.distance_map.get()[handle, position[0], position[1], direction]
-
-        last_is_dead_end = False
-        last_is_a_decision_cell = False
-        target_encountered = 0
-
-        cnt = 0
-        while exploring:
-
-            dist_min_to_target = min(dist_min_to_target, self.env.distance_map.get()[handle, position[0], position[1],
-                                                                                     direction])
-
-            if agent.target == position:
-                target_encountered = 1
-
-            new_direction_me = direction
-            new_cell_me = position
-            a = self.env.agent_positions[new_cell_me]
-            if a != -1 and a != handle:
-                opp_agent = self.env.agents[a]
-                # look one step forward
-                # opp_possible_transitions = self.env.rail.get_transitions(*opp_agent.position, opp_agent.direction)
-                if opp_agent.direction != new_direction_me:  # opp_possible_transitions[new_direction_me] == 0:
-                    other_agent_opposite_direction += 1
-                else:
-                    other_agent_same_direction += 1
-
-            # #############################
-            # #############################
-            if (position[0], position[1], direction) in visited:
-                break
-            visited.add((position[0], position[1], direction))
-
-            # If the target node is encountered, pick that as node. Also, no further branching is possible.
-            if np.array_equal(position, self.env.agents[handle].target):
-                last_is_target = True
-                break
-
-            exploring = False
-
-            # Check number of possible transitions for agent and total number of transitions in cell (type)
-            possible_transitions = self.env.rail.get_transitions(*position, direction)
-            num_transitions = np.count_nonzero(possible_transitions)
-            # cell_transitions = self.env.rail.get_transitions(*position, direction)
-            transition_bit = bin(self.env.rail.get_full_transitions(*position))
-            total_transitions = transition_bit.count("1")
-
-            if num_transitions == 1:
-                # Check if dead-end, or if we can go forward along direction
-                nbits = total_transitions
-                if nbits == 1:
-                    # Dead-end!
-                    last_is_dead_end = True
-
-            if self.check_agent_descision is not None:
-                ret_agents_on_switch, ret_agents_near_to_switch, agents_near_to_switch_all = \
-                    self.check_agent_descision(position,
-                                               direction,
-                                               self.switches_list,
-                                               self.switches_neighbours_list)
-                if ret_agents_on_switch:
-                    last_is_a_decision_cell = True
-                    break
-
-            exploring = True
-            # convert one-hot encoding to 0,1,2,3
-            cell_transitions = self.env.rail.get_transitions(*position, direction)
-            direction = np.argmax(cell_transitions)
-            position = get_new_position(position, direction)
-
-            cnt += 1
-            if cnt > 1000:
-                exploring = False
-
-        # #############################
-        # #############################
-        # Modify here to append new / different features for each visited cell!
-
-        node = MyTreeObsForRailEnv.Node(dist_min_to_target=dist_min_to_target,
-                                        target_encountered=target_encountered,
-                                        num_agents_opposite_direction=other_agent_opposite_direction,
-                                        num_agents_same_direction=other_agent_same_direction,
-                                        childs={})
-
-        # #############################
-        # #############################
-        # Start from the current orientation, and see which transitions are available;
-        # organize them as [left, forward, right, back], relative to the current orientation
-        # Get the possible transitions
-        possible_transitions = self.env.rail.get_transitions(*position, direction)
-
-        for i, branch_direction in enumerate([(direction + 4 + i) % 4 for i in range(-1, 3)]):
-            if last_is_dead_end and self.env.rail.get_transition((*position, direction),
-                                                                 (branch_direction + 2) % 4):
-                # Swap forward and back in case of dead-end, so that an agent can learn that going forward takes
-                # it back
-                new_cell = get_new_position(position, (branch_direction + 2) % 4)
-                branch_observation, branch_visited = self._explore_branch(handle,
-                                                                          new_cell,
-                                                                          (branch_direction + 2) % 4,
-                                                                          tot_dist + 1,
-                                                                          depth + 1)
-                node.childs[self.tree_explored_actions_char[i]] = branch_observation
-                if len(branch_visited) != 0:
-                    visited |= branch_visited
-            elif last_is_a_decision_cell and possible_transitions[branch_direction]:
-                new_cell = get_new_position(position, branch_direction)
-                branch_observation, branch_visited = self._explore_branch(handle,
-                                                                          new_cell,
-                                                                          branch_direction,
-                                                                          tot_dist + 1,
-                                                                          depth + 1)
-                node.childs[self.tree_explored_actions_char[i]] = branch_observation
-                if len(branch_visited) != 0:
-                    visited |= branch_visited
-            else:
-                # no exploring possible, add just cells with infinity
-                node.childs[self.tree_explored_actions_char[i]] = -np.inf
-
-        if depth == self.max_depth:
-            node.childs.clear()
-        return node, visited
-
-    def util_print_obs_subtree(self, tree: Node):
-        """
-        Utility function to print tree observations returned by this object.
-        """
-        self.print_node_features(tree, "root", "")
-        for direction in self.tree_explored_actions_char:
-            self.print_subtree(tree.childs[direction], direction, "\t")
-
-    @staticmethod
-    def print_node_features(node: Node, label, indent):
-        print(indent, "Direction ", label, ": ", node.num_agents_same_direction,
-              ", ", node.num_agents_opposite_direction)
-
-    def print_subtree(self, node, label, indent):
-        if node == -np.inf or not node:
-            print(indent, "Direction ", label, ": -np.inf")
-            return
-
-        self.print_node_features(node, label, indent)
-
-        if not node.childs:
-            return
-
-        for direction in self.tree_explored_actions_char:
-            self.print_subtree(node.childs[direction], direction, indent + "\t")
-
-    def set_env(self, env: Environment):
-        super().set_env(env)
-        if self.predictor:
-            self.predictor.set_env(self.env)
-
-    def _reverse_dir(self, direction):
-        return int((direction + 2) % 4)
-
-
-class GlobalObsForRailEnv(ObservationBuilder):
-    """
-    Gives a global observation of the entire rail environment.
-    The observation is composed of the following elements:
-
-        - transition map array with dimensions (env.height, env.width, 16),\
-          assuming 16 bits encoding of transitions.
-
-        - obs_agents_state: A 3D array (map_height, map_width, 5) with
-            - first channel containing the agents position and direction
-            - second channel containing the other agents positions and direction
-            - third channel containing agent/other agent malfunctions
-            - fourth channel containing agent/other agent fractional speeds
-            - fifth channel containing number of other agents ready to depart
-
-        - obs_targets: Two 2D arrays (map_height, map_width, 2) containing respectively the position of the given agent\
-         target and the positions of the other agents targets (flag only, no counter!).
-    """
-
-    def __init__(self):
-        super(GlobalObsForRailEnv, self).__init__()
-
-    def set_env(self, env: Environment):
-        super().set_env(env)
-
-    def reset(self):
-        self.rail_obs = np.zeros((self.env.height, self.env.width, 16))
-        for i in range(self.rail_obs.shape[0]):
-            for j in range(self.rail_obs.shape[1]):
-                bitlist = [int(digit) for digit in bin(self.env.rail.get_full_transitions(i, j))[2:]]
-                bitlist = [0] * (16 - len(bitlist)) + bitlist
-                self.rail_obs[i, j] = np.array(bitlist)
-
-    def get(self, handle: int = 0) -> (np.ndarray, np.ndarray, np.ndarray):
-
-        agent = self.env.agents[handle]
-        if agent.status == RailAgentStatus.READY_TO_DEPART:
-            agent_virtual_position = agent.initial_position
-        elif agent.status == RailAgentStatus.ACTIVE:
-            agent_virtual_position = agent.position
-        elif agent.status == RailAgentStatus.DONE:
-            agent_virtual_position = agent.target
-        else:
-            return None
-
-        obs_targets = np.zeros((self.env.height, self.env.width, 2))
-        obs_agents_state = np.zeros((self.env.height, self.env.width, 5)) - 1
-
-        # TODO can we do this more elegantly?
-        # for r in range(self.env.height):
-        #     for c in range(self.env.width):
-        #         obs_agents_state[(r, c)][4] = 0
-        obs_agents_state[:, :, 4] = 0
-
-        obs_agents_state[agent_virtual_position][0] = agent.direction
-        obs_targets[agent.target][0] = 1
-
-        for i in range(len(self.env.agents)):
-            other_agent: EnvAgent = self.env.agents[i]
-
-            # ignore other agents not in the grid any more
-            if other_agent.status == RailAgentStatus.DONE_REMOVED:
-                continue
-
-            obs_targets[other_agent.target][1] = 1
-
-            # second to fourth channel only if in the grid
-            if other_agent.position is not None:
-                # second channel only for other agents
-                if i != handle:
-                    obs_agents_state[other_agent.position][1] = other_agent.direction
-                obs_agents_state[other_agent.position][2] = other_agent.malfunction_data['malfunction']
-                obs_agents_state[other_agent.position][3] = other_agent.speed_data['speed']
-            # fifth channel: all ready to depart on this position
-            if other_agent.status == RailAgentStatus.READY_TO_DEPART:
-                obs_agents_state[other_agent.initial_position][4] += 1
-        return self.rail_obs, obs_agents_state, obs_targets
-
-
-class LocalObsForRailEnv(ObservationBuilder):
-    """
-    !!!!!!WARNING!!! THIS IS DEPRACTED AND NOT UPDATED TO FLATLAND 2.0!!!!!
-    Gives a local observation of the rail environment around the agent.
-    The observation is composed of the following elements:
-
-        - transition map array of the local environment around the given agent, \
-          with dimensions (view_height,2*view_width+1, 16), \
-          assuming 16 bits encoding of transitions.
-
-        - Two 2D arrays (view_height,2*view_width+1, 2) containing respectively, \
-        if they are in the agent's vision range, its target position, the positions of the other targets.
-
-        - A 2D array (view_height,2*view_width+1, 4) containing the one hot encoding of directions \
-          of the other agents at their position coordinates, if they are in the agent's vision range.
-
-        - A 4 elements array with one hot encoding of the direction.
-
-    Use the parameters view_width and view_height to define the rectangular view of the agent.
-    The center parameters moves the agent along the height axis of this rectangle. If it is 0 the agent only has
-    observation in front of it.
-
-    .. deprecated:: 2.0.0
-    """
-
-    def __init__(self, view_width, view_height, center):
-
-        super(LocalObsForRailEnv, self).__init__()
-        self.view_width = view_width
-        self.view_height = view_height
-        self.center = center
-        self.max_padding = max(self.view_width, self.view_height - self.center)
-
-    def reset(self):
-        # We build the transition map with a view_radius empty cells expansion on each side.
-        # This helps to collect the local transition map view when the agent is close to a border.
-        self.max_padding = max(self.view_width, self.view_height)
-        self.rail_obs = np.zeros((self.env.height,
-                                  self.env.width, 16))
-        for i in range(self.env.height):
-            for j in range(self.env.width):
-                bitlist = [int(digit) for digit in bin(self.env.rail.get_full_transitions(i, j))[2:]]
-                bitlist = [0] * (16 - len(bitlist)) + bitlist
-                self.rail_obs[i, j] = np.array(bitlist)
-
-    def get(self, handle: int = 0) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
-        agents = self.env.agents
-        agent = agents[handle]
-
-        # Correct agents position for padding
-        # agent_rel_pos[0] = agent.position[0] + self.max_padding
-        # agent_rel_pos[1] = agent.position[1] + self.max_padding
-
-        # Collect visible cells as set to be plotted
-        visited, rel_coords = self.field_of_view(agent.position, agent.direction, )
-        local_rail_obs = None
-
-        # Add the visible cells to the observed cells
-        self.env.dev_obs_dict[handle] = set(visited)
-
-        # Locate observed agents and their coresponding targets
-        local_rail_obs = np.zeros((self.view_height, 2 * self.view_width + 1, 16))
-        obs_map_state = np.zeros((self.view_height, 2 * self.view_width + 1, 2))
-        obs_other_agents_state = np.zeros((self.view_height, 2 * self.view_width + 1, 4))
-        _idx = 0
-        for pos in visited:
-            curr_rel_coord = rel_coords[_idx]
-            local_rail_obs[curr_rel_coord[0], curr_rel_coord[1], :] = self.rail_obs[pos[0], pos[1], :]
-            if pos == agent.target:
-                obs_map_state[curr_rel_coord[0], curr_rel_coord[1], 0] = 1
-            else:
-                for tmp_agent in agents:
-                    if pos == tmp_agent.target:
-                        obs_map_state[curr_rel_coord[0], curr_rel_coord[1], 1] = 1
-            if pos != agent.position:
-                for tmp_agent in agents:
-                    if pos == tmp_agent.position:
-                        obs_other_agents_state[curr_rel_coord[0], curr_rel_coord[1], :] = np.identity(4)[
-                            tmp_agent.direction]
-
-            _idx += 1
-
-        direction = np.identity(4)[agent.direction]
-        return local_rail_obs, obs_map_state, obs_other_agents_state, direction
-
-    def get_many(self, handles: Optional[List[int]] = None) -> Dict[
-        int, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
-        """
-        Called whenever an observation has to be computed for the `env` environment, for each agent with handle
-        in the `handles` list.
-        """
-
-        return super().get_many(handles)
-
-    def field_of_view(self, position, direction, state=None):
-        # Compute the local field of view for an agent in the environment
-        data_collection = False
-        if state is not None:
-            temp_visible_data = np.zeros(shape=(self.view_height, 2 * self.view_width + 1, 16))
-            data_collection = True
-        if direction == 0:
-            origin = (position[0] + self.center, position[1] - self.view_width)
-        elif direction == 1:
-            origin = (position[0] - self.view_width, position[1] - self.center)
-        elif direction == 2:
-            origin = (position[0] - self.center, position[1] + self.view_width)
-        else:
-            origin = (position[0] + self.view_width, position[1] + self.center)
-        visible = list()
-        rel_coords = list()
-        for h in range(self.view_height):
-            for w in range(2 * self.view_width + 1):
-                if direction == 0:
-                    if 0 <= origin[0] - h < self.env.height and 0 <= origin[1] + w < self.env.width:
-                        visible.append((origin[0] - h, origin[1] + w))
-                        rel_coords.append((h, w))
-                    # if data_collection:
-                    #    temp_visible_data[h, w, :] = state[origin[0] - h, origin[1] + w, :]
-                elif direction == 1:
-                    if 0 <= origin[0] + w < self.env.height and 0 <= origin[1] + h < self.env.width:
-                        visible.append((origin[0] + w, origin[1] + h))
-                        rel_coords.append((h, w))
-                    # if data_collection:
-                    #    temp_visible_data[h, w, :] = state[origin[0] + w, origin[1] + h, :]
-                elif direction == 2:
-                    if 0 <= origin[0] + h < self.env.height and 0 <= origin[1] - w < self.env.width:
-                        visible.append((origin[0] + h, origin[1] - w))
-                        rel_coords.append((h, w))
-                    # if data_collection:
-                    #    temp_visible_data[h, w, :] = state[origin[0] + h, origin[1] - w, :]
-                else:
-                    if 0 <= origin[0] - w < self.env.height and 0 <= origin[1] - h < self.env.width:
-                        visible.append((origin[0] - w, origin[1] - h))
-                        rel_coords.append((h, w))
-                    # if data_collection:
-                    #    temp_visible_data[h, w, :] = state[origin[0] - w, origin[1] - h, :]
-        if data_collection:
-            return temp_visible_data
-        else:
-            return visible, rel_coords
-
-
-def _split_node_into_feature_groups(node: MyTreeObsForRailEnv.Node, dist_min_to_target: int) -> (np.ndarray, np.ndarray,
-                                                                                                 np.ndarray):
-    data = np.zeros(2)
-
-    data[0] = 2.0 * int(node.num_agents_opposite_direction > 0) - 1.0
-    # data[1] = 2.0 * int(node.num_agents_same_direction > 0) - 1.0
-    data[1] = 2.0 * int(node.target_encountered > 0) - 1.0
-
-    return data
-
-
-def _split_subtree_into_feature_groups(node: MyTreeObsForRailEnv.Node, dist_min_to_target: int,
-                                       current_tree_depth: int,
-                                       max_tree_depth: int) -> (
-        np.ndarray, np.ndarray, np.ndarray):
-    if node == -np.inf:
-        remaining_depth = max_tree_depth - current_tree_depth
-        # reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
-        num_remaining_nodes = int((4 ** (remaining_depth + 1) - 1) / (4 - 1))
-        return [0] * num_remaining_nodes * 2
-
-    data = _split_node_into_feature_groups(node, dist_min_to_target)
-
-    if not node.childs:
-        return data
-
-    for direction in MyTreeObsForRailEnv.tree_explored_actions_char:
-        sub_data = _split_subtree_into_feature_groups(node.childs[direction],
-                                                      node.dist_min_to_target,
-                                                      current_tree_depth + 1,
-                                                      max_tree_depth)
-        data = np.concatenate((data, sub_data))
-    return data
-
-
-def split_tree_into_feature_groups(tree: MyTreeObsForRailEnv.Node, max_tree_depth: int) -> (
-        np.ndarray, np.ndarray, np.ndarray):
-    """
-    This function splits the tree into three difference arrays of values
-    """
-    data = _split_node_into_feature_groups(tree, 1000000.0)
-
-    for direction in MyTreeObsForRailEnv.tree_explored_actions_char:
-        sub_data = _split_subtree_into_feature_groups(tree.childs[direction],
-                                                      1000000.0,
-                                                      1,
-                                                      max_tree_depth)
-        data = np.concatenate((data, sub_data))
-
-    return data
-
-
-def normalize_observation(observation: MyTreeObsForRailEnv.Node, tree_depth: int):
-    """
-    This function normalizes the observation used by the RL algorithm
-    """
-    data = split_tree_into_feature_groups(observation, tree_depth)
-    normalized_obs = data
-
-    # navigate_info
-    navigate_info = np.zeros(4)
-    action_info = np.zeros(4)
-    np.seterr(all='raise')
-    try:
-        dm = observation.dist_min_to_target
-        if observation.childs['L'] != -np.inf:
-            navigate_info[0] = dm - observation.childs['L'].dist_min_to_target
-            action_info[0] = 1
-        if observation.childs['F'] != -np.inf:
-            navigate_info[1] = dm - observation.childs['F'].dist_min_to_target
-            action_info[1] = 1
-        if observation.childs['R'] != -np.inf:
-            navigate_info[2] = dm - observation.childs['R'].dist_min_to_target
-            action_info[2] = 1
-        if observation.childs['B'] != -np.inf:
-            navigate_info[3] = dm - observation.childs['B'].dist_min_to_target
-            action_info[3] = 1
-    except:
-        navigate_info = np.ones(4)
-        normalized_obs = np.zeros(len(normalized_obs))
-
-    # navigate_info_2 = np.copy(navigate_info)
-    # max_v = np.max(navigate_info_2)
-    # navigate_info_2 = navigate_info_2 / max_v
-    # navigate_info_2[navigate_info_2 < 1] = -1
-
-    max_v = np.max(navigate_info)
-    navigate_info = navigate_info / max_v
-    navigate_info[navigate_info < 0] = -1
-    # navigate_info[abs(navigate_info) < 1] = 0
-    # normalized_obs = navigate_info
-
-    # navigate_info = np.concatenate((navigate_info, action_info))
-    normalized_obs = np.concatenate((navigate_info, normalized_obs))
-    # normalized_obs = np.concatenate((navigate_info, navigate_info_2))
-    # print(normalized_obs)
-    return normalized_obs
diff --git a/src/ppo/agent.py b/src/ppo/agent.py
deleted file mode 100644
index 86b210ad388f9d9638c5e1e037cdca27ffaf9e73..0000000000000000000000000000000000000000
--- a/src/ppo/agent.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import pickle
-
-import torch
-# from model import PolicyNetwork
-# from replay_memory import Episode, ReplayBuffer
-from torch.distributions.categorical import Categorical
-
-from src.ppo.model import PolicyNetwork
-from src.ppo.replay_memory import Episode, ReplayBuffer
-
-BUFFER_SIZE = 32_000
-BATCH_SIZE = 4096
-GAMMA = 0.98
-LR = 0.5e-4
-CLIP_FACTOR = .005
-UPDATE_EVERY = 30
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-print("device:", device)
-
-
-class Agent:
-    def __init__(self, state_size, action_size, num_agents):
-        self.policy = PolicyNetwork(state_size, action_size).to(device)
-        self.old_policy = PolicyNetwork(state_size, action_size).to(device)
-        self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=LR)
-
-        self.episodes = [Episode() for _ in range(num_agents)]
-        self.memory = ReplayBuffer(BUFFER_SIZE)
-        self.t_step = 0
-
-    def reset(self):
-        self.finished = [False] * len(self.episodes)
-
-    # Decide on an action to take in the environment
-
-    def act(self, state, eps=None):
-        self.policy.eval()
-        with torch.no_grad():
-            output = self.policy(torch.from_numpy(state).float().unsqueeze(0).to(device))
-            return Categorical(output).sample().item()
-
-    # Record the results of the agent's action and update the model
-
-    def step(self, handle, state, action, next_state, agent_done, episode_done, collision):
-        if not self.finished[handle]:
-            if agent_done:
-                reward = 1
-            elif collision:
-                reward = -.5
-            else:
-                reward = 0
-
-            # Push experience into Episode memory
-            self.episodes[handle].push(state, action, reward, next_state, agent_done or episode_done)
-
-            # When we finish the episode, discount rewards and push the experience into replay memory
-            if agent_done or episode_done:
-                self.episodes[handle].discount_rewards(GAMMA)
-                self.memory.push_episode(self.episodes[handle])
-                self.episodes[handle].reset()
-                self.finished[handle] = True
-
-        # Perform a gradient update every UPDATE_EVERY time steps
-        self.t_step = (self.t_step + 1) % UPDATE_EVERY
-        if self.t_step == 0 and len(self.memory) > BATCH_SIZE * 4:
-            self.learn(*self.memory.sample(BATCH_SIZE, device))
-
-    def learn(self, states, actions, rewards, next_state, done):
-        self.policy.train()
-
-        responsible_outputs = torch.gather(self.policy(states), 1, actions)
-        old_responsible_outputs = torch.gather(self.old_policy(states), 1, actions).detach()
-
-        # rewards = rewards - rewards.mean()
-        ratio = responsible_outputs / (old_responsible_outputs + 1e-5)
-        clamped_ratio = torch.clamp(ratio, 1. - CLIP_FACTOR, 1. + CLIP_FACTOR)
-        loss = -torch.min(ratio * rewards, clamped_ratio * rewards).mean()
-
-        # Compute loss and perform a gradient step
-        self.old_policy.load_state_dict(self.policy.state_dict())
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-
-    # Checkpointing methods
-
-    def save(self, path, *data):
-        torch.save(self.policy.state_dict(), path / 'ppo/model_checkpoint.policy')
-        torch.save(self.optimizer.state_dict(), path / 'ppo/model_checkpoint.optimizer')
-        with open(path / 'ppo/model_checkpoint.meta', 'wb') as file:
-            pickle.dump(data, file)
-
-    def load(self, path, *defaults):
-        try:
-            print("Loading model from checkpoint...")
-            print(path + 'ppo/model_checkpoint.policy')
-            self.policy.load_state_dict(
-                torch.load(path + 'ppo/model_checkpoint.policy', map_location=torch.device('cpu')))
-            self.optimizer.load_state_dict(
-                torch.load(path + 'ppo/model_checkpoint.optimizer', map_location=torch.device('cpu')))
-            with open(path + 'ppo/model_checkpoint.meta', 'rb') as file:
-                return pickle.load(file)
-        except:
-            print("No checkpoint file was found")
-            return defaults
diff --git a/src/ppo/model.py b/src/ppo/model.py
deleted file mode 100644
index 51b86ff16691c03f6a754405352bb4cf48e4b914..0000000000000000000000000000000000000000
--- a/src/ppo/model.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class PolicyNetwork(nn.Module):
-    def __init__(self, state_size, action_size, hidsize1=128, hidsize2=128, hidsize3=32):
-        super().__init__()
-        self.fc1 = nn.Linear(state_size, hidsize1)
-        self.fc2 = nn.Linear(hidsize1, hidsize2)
-        # self.fc3 = nn.Linear(hidsize2, hidsize3)
-        self.output = nn.Linear(hidsize2, action_size)
-        self.softmax = nn.Softmax(dim=1)
-        self.bn0 = nn.BatchNorm1d(state_size, affine=False)
-
-    def forward(self, inputs):
-        x = self.bn0(inputs.float())
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        # x = F.relu(self.fc3(x))
-        return self.softmax(self.output(x))
diff --git a/src/ppo/replay_memory.py b/src/ppo/replay_memory.py
deleted file mode 100644
index 3e6619b40169597d7a4b379f4ce2c9ddccd4cd9b..0000000000000000000000000000000000000000
--- a/src/ppo/replay_memory.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import torch
-import random
-import numpy as np
-from collections import namedtuple, deque, Iterable
-
-
-Transition = namedtuple("Experience", ("state", "action", "reward", "next_state", "done"))
-
-
-class Episode:
-    memory = []
-
-    def reset(self):
-        self.memory = []
-
-    def push(self, *args):
-        self.memory.append(tuple(args))
-
-    def discount_rewards(self, gamma):
-        running_add = 0.
-        for i, (state, action, reward, *rest) in list(enumerate(self.memory))[::-1]:
-            running_add = running_add * gamma + reward
-            self.memory[i] = (state, action, running_add, *rest)
-
-
-class ReplayBuffer:
-    def __init__(self, buffer_size):
-        self.memory = deque(maxlen=buffer_size)
-
-    def push(self, state, action, reward, next_state, done):
-        self.memory.append(Transition(np.expand_dims(state, 0), action, reward, np.expand_dims(next_state, 0), done))
-
-    def push_episode(self, episode):
-        for step in episode.memory:
-            self.push(*step)
-
-    def sample(self, batch_size, device):
-        experiences = random.sample(self.memory, k=batch_size)
-
-        states      = torch.from_numpy(self.stack([e.state      for e in experiences])).float().to(device)
-        actions     = torch.from_numpy(self.stack([e.action     for e in experiences])).long().to(device)
-        rewards     = torch.from_numpy(self.stack([e.reward     for e in experiences])).float().to(device)
-        next_states = torch.from_numpy(self.stack([e.next_state for e in experiences])).float().to(device)
-        dones       = torch.from_numpy(self.stack([e.done       for e in experiences]).astype(np.uint8)).float().to(device)
-
-        return states, actions, rewards, next_states, dones
-
-    def stack(self, states):
-        sub_dims = states[0].shape[1:] if isinstance(states[0], Iterable) else [1]
-        return np.reshape(np.array(states), (len(states), *sub_dims))
-
-    def __len__(self):
-        return len(self.memory)
diff --git a/src/shortest_Distance_walker.py b/src/shortest_Distance_walker.py
deleted file mode 100644
index d69ebcb0a98f732cc44849b10858b2e42a376a23..0000000000000000000000000000000000000000
--- a/src/shortest_Distance_walker.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-from flatland.core.grid.grid4_utils import get_new_position
-from flatland.envs.rail_env import RailEnv, RailEnvActions
-from flatland.envs.rail_env import fast_count_nonzero, fast_argmax
-
-
-class ShortestDistanceWalker:
-    def __init__(self, env: RailEnv):
-        self.env = env
-
-    def walk(self, handle, position, direction):
-        possible_transitions = self.env.rail.get_transitions(*position, direction)
-        num_transitions = fast_count_nonzero(possible_transitions)
-        if num_transitions == 1:
-            new_direction = fast_argmax(possible_transitions)
-            new_position = get_new_position(position, new_direction)
-            dist = self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction]
-            return new_position, new_direction, dist, RailEnvActions.MOVE_FORWARD
-        else:
-            min_distances = []
-            positions = []
-            directions = []
-            for new_direction in [(direction + i) % 4 for i in range(-1, 2)]:
-                if possible_transitions[new_direction]:
-                    new_position = get_new_position(position, new_direction)
-                    min_distances.append(
-                        self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction])
-                    positions.append(new_position)
-                    directions.append(new_direction)
-                else:
-                    min_distances.append(np.inf)
-                    positions.append(None)
-                    directions.append(None)
-
-        a = np.argmin(min_distances)
-        return positions[a], directions[a], min_distances[a], a + 1
-
-    def callback(self, handle, agent, position, direction, action):
-        pass
-
-    def walk_to_target(self, handle):
-        agent = self.env.agents[handle]
-        if agent.position is not None:
-            position = agent.position
-        else:
-            position = agent.initial_position
-        direction = agent.direction
-        while (position != agent.target):
-            position, direction, dist, action = self.walk(handle, position, direction)
-            if position is None:
-                break
-            self.callback(handle, agent, position, direction, action)
-
-    def callback_one_step(self, handle, agent, position, direction, action):
-        pass
-
-    def walk_one_step(self, handle):
-        agent = self.env.agents[handle]
-        if agent.position is not None:
-            position = agent.position
-        else:
-            position = agent.initial_position
-        direction = agent.direction
-        if (position != agent.target):
-            new_position, new_direction, dist, action = self.walk(handle, position, direction)
-            if new_position is None:
-                return position, direction, RailEnvActions.STOP_MOVING
-            self.callback_one_step(handle, agent, new_position, new_direction, action)
-        return new_position, new_direction, action
diff --git a/utils/dead_lock_avoidance_agent.py b/utils/dead_lock_avoidance_agent.py
index 43f1b4ae15aebae1348e0eadc5f9cab243511a28..382959aebf06df06faa50364ce569ee4e21f2cbb 100644
--- a/utils/dead_lock_avoidance_agent.py
+++ b/utils/dead_lock_avoidance_agent.py
@@ -15,21 +15,33 @@ class MyWalker(ShortestDistanceWalker):
                                                      self.env.width),
                                                     dtype=int) - 1
 
+        self.full_shortest_distance_agent_map = np.zeros((self.env.get_num_agents(),
+                                                          self.env.height,
+                                                          self.env.width),
+                                                         dtype=int) - 1
+
         self.agent_positions = agent_positions
 
         self.agent_map = {}
 
+    def get_action(self, handle, min_distances):
+        return np.argmin(min_distances)
+
     def getData(self):
-        return self.shortest_distance_agent_map
+        return self.shortest_distance_agent_map, self.full_shortest_distance_agent_map
 
     def callback(self, handle, agent, position, direction, action):
         opp_a = self.agent_positions[position]
         if opp_a != -1 and opp_a != handle:
-            d = self.agent_map.get(handle, [])
-            d.append(opp_a)
             if self.env.agents[opp_a].direction != direction:
+                d = self.agent_map.get(handle, [])
+                if opp_a not in d:
+                    d.append(opp_a)
                 self.agent_map.update({handle: d})
-        self.shortest_distance_agent_map[(handle, position[0], position[1])] = direction
+        d = self.agent_map.get(handle, [])
+        if len(d) == 0:
+            self.shortest_distance_agent_map[(handle, position[0], position[1])] = 1
+        self.full_shortest_distance_agent_map[(handle, position[0], position[1])] = 1
 
 
 class DeadLockAvoidanceAgent(Policy):
@@ -45,16 +57,9 @@ class DeadLockAvoidanceAgent(Policy):
         pass
 
     def act(self, handle, state, eps=0.):
-        agent = self.env.agents[handle]
-        #if handle > self.env._elapsed_steps:
-        #    return RailEnvActions.STOP_MOVING
-        if agent.status == RailAgentStatus.ACTIVE:
-            self.active_agent_cnt += 1
-        #if agent.status > 20:
-        #    return RailEnvActions.STOP_MOVING
+        # agent = self.env.agents[handle]
         check = self.agent_can_move.get(handle, None)
         if check is None:
-            # print(handle, RailEnvActions.STOP_MOVING)
             return RailEnvActions.STOP_MOVING
 
         return check[3]
@@ -63,11 +68,9 @@ class DeadLockAvoidanceAgent(Policy):
         pass
 
     def start_step(self):
-        self.active_agent_cnt = 0
         self.shortest_distance_mapper()
 
     def end_step(self):
-        # print("#A:", self.active_agent_cnt, "/", self.env.get_num_agents(),self.env._elapsed_steps)
         pass
 
     def get_actions(self):
@@ -79,7 +82,7 @@ class DeadLockAvoidanceAgent(Policy):
         agent_positions = np.zeros((self.env.height, self.env.width), dtype=int) - 1
         for handle in range(self.env.get_num_agents()):
             agent = self.env.agents[handle]
-            if agent.status <= RailAgentStatus.ACTIVE:
+            if agent.status == RailAgentStatus.ACTIVE:
                 if agent.position is not None:
                     agent_positions[agent.position] = handle
 
@@ -88,20 +91,22 @@ class DeadLockAvoidanceAgent(Policy):
             agent = self.env.agents[handle]
             if agent.status <= RailAgentStatus.ACTIVE:
                 my_walker.walk_to_target(handle)
-        self.shortest_distance_agent_map = my_walker.getData()
+        shortest_distance_agent_map, full_shortest_distance_agent_map = my_walker.getData()
 
         self.agent_can_move = {}
-        agent_positions_map = np.clip(agent_positions + 1, 0, 1)
+        agent_positions_map = (agent_positions > -1).astype(int)
         for handle in range(self.env.get_num_agents()):
             opp_agents = my_walker.agent_map.get(handle, [])
-            me = np.clip(self.shortest_distance_agent_map[handle] + 1, 0, 1)
+            me = shortest_distance_agent_map[handle]
+            delta = me
             next_step_ok = True
             next_position, next_direction, action = my_walker.walk_one_step(handle)
             for opp_a in opp_agents:
-                opp = np.clip(self.shortest_distance_agent_map[opp_a] + 1, 0, 1)
-                delta = np.clip(me - opp - agent_positions_map, 0, 1)
-                if (np.sum(delta) > 1):
+                opp = full_shortest_distance_agent_map[opp_a]
+                delta = (delta - opp - agent_positions_map > 0).astype(int)
+                if (np.sum(delta) < 3):
                     next_step_ok = False
+
             if next_step_ok:
                 self.agent_can_move.update({handle: [next_position[0], next_position[1], next_direction, action]})
 
@@ -110,7 +115,7 @@ class DeadLockAvoidanceAgent(Policy):
             b = np.ceil(self.env.get_num_agents() / a)
             for handle in range(self.env.get_num_agents()):
                 plt.subplot(a, b, handle + 1)
-                plt.imshow(self.shortest_distance_agent_map[handle])
+                plt.imshow(shortest_distance_agent_map[handle])
             # plt.colorbar()
             plt.show(block=False)
-            plt.pause(0.001)
+            plt.pause(0.01)
diff --git a/utils/shortest_Distance_walker.py b/utils/shortest_Distance_walker.py
index d69ebcb0a98f732cc44849b10858b2e42a376a23..bd1d5b33f3a58c15b75106efcb21ef11f7dd22cc 100644
--- a/utils/shortest_Distance_walker.py
+++ b/utils/shortest_Distance_walker.py
@@ -14,6 +14,7 @@ class ShortestDistanceWalker:
         if num_transitions == 1:
             new_direction = fast_argmax(possible_transitions)
             new_position = get_new_position(position, new_direction)
+
             dist = self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction]
             return new_position, new_direction, dist, RailEnvActions.MOVE_FORWARD
         else:
@@ -32,9 +33,12 @@ class ShortestDistanceWalker:
                     positions.append(None)
                     directions.append(None)
 
-        a = np.argmin(min_distances)
+        a = self.get_action(handle, min_distances)
         return positions[a], directions[a], min_distances[a], a + 1
 
+    def get_action(self, handle, min_distances):
+        return np.argmin(min_distances)
+
     def callback(self, handle, agent, position, direction, action):
         pass