diff --git a/torch_training/bla.py b/torch_training/bla.py
deleted file mode 100644
index 80ec308c2b6bc5498d9198e2a03e562b02e7c96d..0000000000000000000000000000000000000000
--- a/torch_training/bla.py
+++ /dev/null
@@ -1,225 +0,0 @@
-import getopt
-import random
-import sys
-from collections import deque
-
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-from importlib_resources import path
-
-import torch_training.Nets
-from flatland.envs.generators import complex_rail_generator
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.envs.predictions import ShortestPathPredictorForRailEnv
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-from torch_training.dueling_double_dqn import Agent
-from utils.observation_utils import norm_obs_clip, split_tree
-
-print("multi_agent_trainging.py (1)")
-
-def main(argv):
-    try:
-        opts, args = getopt.getopt(argv, "n:", ["n_trials="])
-    except getopt.GetoptError:
-        print('training_navigation.py -n <n_trials>')
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt in ('-n', '--n_trials'):
-            n_trials = int(arg)
-    print("main1")
-    random.seed(1)
-    np.random.seed(1)
-
-    """
-    env = RailEnv(width=10,
-                  height=20, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()))
-    env.load("./railway/complex_scene.pkl")
-    file_load = True
-    """
-
-    x_dim = np.random.randint(8, 20)
-    y_dim = np.random.randint(8, 20)
-    n_agents = np.random.randint(3, 8)
-    n_goals = n_agents + np.random.randint(0, 3)
-    min_dist = int(0.75 * min(x_dim, y_dim))
-    print("main2")
-
-    env = RailEnv(width=x_dim,
-                  height=y_dim,
-                  rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
-                                                        max_dist=99999,
-                                                        seed=0),
-                  obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
-                  number_of_agents=n_agents)
-    env.reset(True, True)
-    file_load = False
-    observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
-    env_renderer = RenderTool(env, gl="PILSVG", )
-    handle = env.get_agent_handles()
-    features_per_node = 9
-    state_size = features_per_node * 85 * 2
-    action_size = 5
-
-    print("main3")
-
-    # We set the number of episodes we would like to train on
-    if 'n_trials' not in locals():
-        n_trials = 30000
-    max_steps = int(3 * (env.height + env.width))
-    eps = 1.
-    eps_end = 0.005
-    eps_decay = 0.9995
-    action_dict = dict()
-    final_action_dict = dict()
-    scores_window = deque(maxlen=100)
-    done_window = deque(maxlen=100)
-    time_obs = deque(maxlen=2)
-    scores = []
-    dones_list = []
-    action_prob = [0] * action_size
-    agent_obs = [None] * env.get_num_agents()
-    agent_next_obs = [None] * env.get_num_agents()
-    agent = Agent(state_size, action_size, "FC", 0)
-    with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in:
-        agent.qnetwork_local.load_state_dict(torch.load(file_in))
-
-    demo = False
-    record_images = False
-    frame_step = 0
-
-    print("Going to run training for {} trials...".format(n_trials))
-    for trials in range(1, n_trials + 1):
-
-        if trials % 50 == 0 and not demo:
-            x_dim = np.random.randint(8, 20)
-            y_dim = np.random.randint(8, 20)
-            n_agents = np.random.randint(3, 8)
-            n_goals = n_agents + np.random.randint(0, 3)
-            min_dist = int(0.75 * min(x_dim, y_dim))
-            env = RailEnv(width=x_dim,
-                          height=y_dim,
-                          rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
-                                                                max_dist=99999,
-                                                                seed=0),
-                          obs_builder_object=TreeObsForRailEnv(max_depth=3,
-                                                               predictor=ShortestPathPredictorForRailEnv()),
-                          number_of_agents=n_agents)
-            env.reset(True, True)
-            max_steps = int(3 * (env.height + env.width))
-            agent_obs = [None] * env.get_num_agents()
-            agent_next_obs = [None] * env.get_num_agents()
-        # # Reset environment
-        # if file_load:
-        #     obs = env.reset(False, False)
-        # else:
-        #     obs = env.reset(True, True)
-        # if demo:
-        #     env_renderer.set_new_rail()
-        # obs_original = obs.copy()
-        # final_obs = obs.copy()
-        # final_obs_next = obs.copy()
-        # for a in range(env.get_num_agents()):
-        #     data, distance, agent_data = split_tree(tree=np.array(obs[a]),
-        #                                             current_depth=0)
-        #     data = norm_obs_clip(data)
-        #     distance = norm_obs_clip(distance)
-        #     agent_data = np.clip(agent_data, -1, 1)
-        #     obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-        #     agent_data = env.agents[a]
-        #     speed = 1  # np.random.randint(1,5)
-        #     agent_data.speed_data['speed'] = 1. / speed
-        #
-        # for i in range(2):
-        #     time_obs.append(obs)
-        # # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
-        # for a in range(env.get_num_agents()):
-        #     agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-        #
-        # score = 0
-        # env_done = 0
-        # # Run episode
-        # for step in range(max_steps):
-        #     if demo:
-        #         env_renderer.renderEnv(show=True, show_observations=False)
-        #         # observation_helper.util_print_obs_subtree(obs_original[0])
-        #         if record_images:
-        #             env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
-        #             frame_step += 1
-        #     # print(step)
-        #     # Action
-        #     for a in range(env.get_num_agents()):
-        #         if demo:
-        #             eps = 0
-        #         # action = agent.act(np.array(obs[a]), eps=eps)
-        #         action = agent.act(agent_obs[a], eps=eps)
-        #         action_prob[action] += 1
-        #         action_dict.update({a: action})
-        #     # Environment step
-        #
-        #     next_obs, all_rewards, done, _ = env.step(action_dict)
-        #     # print(all_rewards,action)
-        #     obs_original = next_obs.copy()
-        #     for a in range(env.get_num_agents()):
-        #         data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
-        #                                                 current_depth=0)
-        #         data = norm_obs_clip(data)
-        #         distance = norm_obs_clip(distance)
-        #         agent_data = np.clip(agent_data, -1, 1)
-        #         next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-        #     time_obs.append(next_obs)
-        #
-        #     # Update replay buffer and train agent
-        #     for a in range(env.get_num_agents()):
-        #         agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-        #         if done[a]:
-        #             final_obs[a] = agent_obs[a].copy()
-        #             final_obs_next[a] = agent_next_obs[a].copy()
-        #             final_action_dict.update({a: action_dict[a]})
-        #         if not demo and not done[a]:
-        #             agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
-        #         score += all_rewards[a] / env.get_num_agents()
-        #
-        #     agent_obs = agent_next_obs.copy()
-        #     if done['__all__']:
-        #         env_done = 1
-        #         for a in range(env.get_num_agents()):
-        #             agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
-        #         break
-        # # Epsilon decay
-        # eps = max(eps_end, eps_decay * eps)  # decrease epsilon
-        #
-        # done_window.append(env_done)
-        # scores_window.append(score / max_steps)  # save most recent score
-        # scores.append(np.mean(scores_window))
-        # dones_list.append((np.mean(done_window)))
-
-        print(
-            '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-                env.get_num_agents(), x_dim, y_dim,
-                trials,
-                np.mean(scores_window),
-                100 * np.mean(done_window),
-                eps, action_prob / np.sum(action_prob)), end=" ")
-
-        if trials % 100 == 0:
-            print(
-                '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-                    env.get_num_agents(),
-                    trials,
-                    np.mean(scores_window),
-                    100 * np.mean(done_window),
-                    eps,
-                    action_prob / np.sum(action_prob)))
-            torch.save(agent.qnetwork_local.state_dict(),
-                       './Nets/avoid_checkpoint' + str(trials) + '.pth')
-            action_prob = [1] * action_size
-
-print("multi_agent_trainging.py (2)")
-
-if __name__ == '__main__':
-    print("main")
-    main(sys.argv[1:])
-
-print("multi_agent_trainging.py (3)")
\ No newline at end of file
diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py
index 5224ab68ac93905a5cf3cc8f436e9178e0523708..d273bbfdc6ec98da469c7c5c120add4a38026f56 100644
--- a/torch_training/multi_agent_training.py
+++ b/torch_training/multi_agent_training.py
@@ -1,10 +1,10 @@
-import getopt
-import random
 import sys
 from collections import deque
 
+import getopt
 import matplotlib.pyplot as plt
 import numpy as np
+import random
 import torch
 from flatland.envs.generators import complex_rail_generator
 from flatland.envs.observations import TreeObsForRailEnv
@@ -17,8 +17,6 @@ import torch_training.Nets
 from torch_training.dueling_double_dqn import Agent
 from utils.observation_utils import norm_obs_clip, split_tree
 
-print("multi_agent_trainging.py (1)")
-
 
 def main(argv):
     try:
@@ -29,7 +27,6 @@ def main(argv):
     for opt, arg in opts:
         if opt in ('-n', '--n_trials'):
             n_trials = int(arg)
-    print("main1")
     random.seed(1)
     np.random.seed(1)
     """
@@ -66,8 +63,6 @@ def main(argv):
     state_size = features_per_node * 85 * 2
     action_size = 5
 
-    print("main3")
-
     # We set the number of episodes we would like to train on
     if 'n_trials' not in locals():
         n_trials = 60000
@@ -93,7 +88,6 @@ def main(argv):
     record_images = False
     frame_step = 0
 
-    print("Going to run training for {} trials...".format(n_trials))
     for trials in range(1, n_trials + 1):
 
         if trials % 50 == 0 and not demo:
@@ -220,10 +214,5 @@ def main(argv):
     plt.show()
 
 
-print("multi_agent_trainging.py (2)")
-
 if __name__ == '__main__':
-    print("main")
     main(sys.argv[1:])
-
-print("multi_agent_trainging.py (3)")
diff --git a/tox.ini b/tox.ini
index 36b7c10a081b629375b202ad1e642e7366db2abe..da528553c74be43bc54ce7858bb057e117cee11a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -22,8 +22,7 @@ passenv =
 deps =
     -r{toxinidir}/requirements_torch_training.txt
 commands =
-    python -m pip install -r requirements_torch_training.txt
-    python torch_training/bla.py --n_trials=10
+    python torch_training/multi_agent_training.py --n_trials=10
 
 [flake8]
 max-line-length = 120