diff --git a/examples/training_navigation.py b/examples/training_navigation.py
index 52e76450bd1a97487471b9d531d72021b7b4fcef..4b73decfe9dda44d41fc85c9ffa93f26a86dd8e0 100644
--- a/examples/training_navigation.py
+++ b/examples/training_navigation.py
@@ -33,8 +33,8 @@ env = RailEnv(width=10,
 """
 env = RailEnv(width=15,
               height=15,
-              rail_generator=complex_rail_generator(nr_start_goal=15, min_dist=5, max_dist=99999, seed=0),
-              number_of_agents=10)
+              rail_generator=complex_rail_generator(nr_start_goal=3, min_dist=5, max_dist=99999, seed=0),
+              number_of_agents=3)
 
 """
 env = RailEnv(width=20,
@@ -54,15 +54,16 @@ eps = 1.
 eps_end = 0.005
 eps_decay = 0.998
 action_dict = dict()
+final_action_dict = dict()
 scores_window = deque(maxlen=100)
 done_window = deque(maxlen=100)
 scores = []
 dones_list = []
 action_prob = [0] * 4
 agent = Agent(state_size, action_size, "FC", 0)
-agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint15000.pth'))
+#agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint15000.pth'))
 
-demo = True
+demo = False
 
 
 def max_lt(seq, val):
@@ -97,7 +98,8 @@ for trials in range(1, n_trials + 1):
 
     # Reset environment
     obs = env.reset()
-    for a in range(env.number_of_agents):
+    final_obs = obs.copy()
+    for a in range(env.get_num_agents()):
         norm = max(1, max_lt(obs[a], np.inf))
         obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1)
 
@@ -105,14 +107,13 @@ for trials in range(1, n_trials + 1):
 
     score = 0
     env_done = 0
-
     # Run episode
     for step in range(100):
         if demo:
             env_renderer.renderEnv(show=True)
         # print(step)
         # Action
-        for a in range(env.number_of_agents):
+        for a in range(env.get_num_agents()):
             if demo:
                 eps = 0
             action = agent.act(np.array(obs[a]), eps=eps)
@@ -121,18 +122,24 @@ for trials in range(1, n_trials + 1):
             #env.obs_builder.util_print_obs_subtree(tree=obs[a], num_features_per_node=5)
         # Environment step
         next_obs, all_rewards, done, _ = env.step(action_dict)
-        for a in range(env.number_of_agents):
+
+        for a in range(env.get_num_agents()):
             norm = max(1, max_lt(next_obs[a], np.inf))
             next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
         # Update replay buffer and train agent
-        for a in range(env.number_of_agents):
-            if not demo:
+        for a in range(env.get_num_agents()):
+            if done[a]:
+                final_obs[a] = obs[a]
+                final_action_dict.update({a: action_dict[a]})
+            if not demo and not done[a]:
                 agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
             score += all_rewards[a]
 
+
         obs = next_obs.copy()
         if done['__all__']:
             env_done = 1
+            agent.step(final_obs[a], final_action_dict[a], all_rewards[a], next_obs[a], done[a])
             break
     # Epsilon decay
     eps = max(eps_end, eps_decay * eps)  # decrease epsilon
@@ -144,7 +151,7 @@ for trials in range(1, n_trials + 1):
 
     print(
         '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-            env.number_of_agents,
+            env.get_num_agents(),
             trials,
             np.mean(
                 scores_window),
@@ -155,7 +162,7 @@ for trials in range(1, n_trials + 1):
     if trials % 100 == 0:
         print(
             '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-                env.number_of_agents,
+                env.get_num_agents(),
                 trials,
                 np.mean(
                     scores_window),
diff --git a/flatland/envs/generators.py b/flatland/envs/generators.py
index c5e7d67ccea46a4125975a2dd6e6769d7aedd53a..35af8944cb8f883851849bc028a9a8f58fccce4e 100644
--- a/flatland/envs/generators.py
+++ b/flatland/envs/generators.py
@@ -143,7 +143,7 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=10, min_dist=2, max_dist=99
             if len(new_path) >= 2:
                 nr_created += 1
 
-        print("\n> Complex Rail Gen: Created #", len(start_goal), "pairs and #", nr_created, "extra connections")
+        #print("\n> Complex Rail Gen: Created #", len(start_goal), "pairs and #", nr_created, "extra connections")
         # print(start_goal)
 
         agents_position = [sg[0] for sg in start_goal]