diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index f552f7626cfffca9a4422cb9337c9cdc27d83867..7ae26bcc3d4e0f4a5cbbcfafd055e2c084a42345 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
 5. When you're done making changes, check that your changes pass flake8 and the
    tests, including testing other Python versions with tox::
 
-    $ flake8 flatland tests
+    $ flake8 flatland tests examples
     $ python setup.py test or py.test
     $ tox
 
@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch
 $ git push
 $ git push --tags
 
-Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
\ No newline at end of file
+Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
diff --git a/Makefile b/Makefile
index 691bf84c768aee6794420c467b0387390dec6218..6a655bb9f96397d09ced973bf6f6ccab20d4e42e 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	flake8 flatland tests
+	flake8 flatland tests examples
 
 test: ## run tests quickly with the default Python
 	py.test
diff --git a/examples/play_model.py b/examples/play_model.py
index 62726c24c96be0e5dae2f4840e18da452163b7ac..174568177a4a886cfe38e53125d0f73f2dae52de 100644
--- a/examples/play_model.py
+++ b/examples/play_model.py
@@ -1,12 +1,14 @@
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.generators import complex_rail_generator
-from flatland.utils.rendertools import RenderTool
-from flatland.baselines.dueling_double_dqn import Agent
-from collections import deque
-import torch
 import random
-import numpy as np
 import time
+from collections import deque
+
+import numpy as np
+import torch
+
+from flatland.baselines.dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
 
 
 class Player(object):
@@ -25,7 +27,7 @@ class Player(object):
         self.done_window = deque(maxlen=100)
         self.scores = []
         self.dones_list = []
-        self.action_prob = [0]*4
+        self.action_prob = [0] * 4
         self.agent = Agent(self.state_size, self.action_size, "FC", 0)
         # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
         self.agent.qnetwork_local.load_state_dict(torch.load(
@@ -33,7 +35,7 @@ class Player(object):
 
         self.iFrame = 0
         self.tStart = time.time()
-        
+
         # Reset environment
         # self.obs = self.env.reset()
         self.env.obs_builder.reset()
@@ -51,7 +53,6 @@ class Player(object):
         env = self.env
 
         # Pass the (stored) observation to the agent network and retrieve the action
-        #for handle in env.get_agent_handles():
         for handle in env.get_agent_handles():
             action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
             self.action_prob[action] += 1
@@ -68,8 +69,8 @@ class Player(object):
         # Update replay buffer and train agent
         for handle in self.env.get_agent_handles():
             self.agent.step(self.obs[handle], self.action_dict[handle],
-                all_rewards[handle], next_obs[handle], done[handle],
-                train=False)
+                            all_rewards[handle], next_obs[handle], done[handle],
+                            train=False)
             self.score += all_rewards[handle]
 
         self.iFrame += 1
@@ -85,7 +86,7 @@ def max_lt(seq, val):
     None is returned if seq was empty or all items in seq were >= val.
     """
 
-    idx = len(seq)-1
+    idx = len(seq) - 1
     while idx >= 0:
         if seq[idx] < val and seq[idx] >= 0:
             return seq[idx]
@@ -94,7 +95,6 @@ def max_lt(seq, val):
 
 
 def main(render=True, delay=0.0):
-
     random.seed(1)
     np.random.seed(1)
 
@@ -118,8 +118,9 @@ def main(render=True, delay=0.0):
     done_window = deque(maxlen=100)
     scores = []
     dones_list = []
-    action_prob = [0]*4
+    action_prob = [0] * 4
     agent = Agent(state_size, action_size, "FC", 0)
+
     # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
 
     def max_lt(seq, val):
@@ -128,7 +129,7 @@ def main(render=True, delay=0.0):
         None is returned if seq was empty or all items in seq were >= val.
         """
 
-        idx = len(seq)-1
+        idx = len(seq) - 1
         while idx >= 0:
             if seq[idx] < val and seq[idx] >= 0:
                 return seq[idx]
@@ -141,7 +142,8 @@ def main(render=True, delay=0.0):
 
         # Reset environment
         obs = env.reset()
-        env_renderer.set_new_rail()
+        if render:
+            env_renderer.set_new_rail()
 
         for a in range(env.get_num_agents()):
             norm = max(1, max_lt(obs[a], np.inf))
@@ -165,7 +167,6 @@ def main(render=True, delay=0.0):
 
             if render:
                 env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
-                #time.sleep(10)
                 if delay > 0:
                     time.sleep(delay)
 
@@ -181,7 +182,6 @@ def main(render=True, delay=0.0):
                 agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
                 score += all_rewards[a]
 
-
             obs = next_obs.copy()
             if done['__all__']:
                 env_done = 1
@@ -196,25 +196,25 @@ def main(render=True, delay=0.0):
 
         print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
                '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
-               env.get_num_agents(),
-               trials,
-               np.mean(scores_window),
-               100 * np.mean(done_window),
-               eps, action_prob/np.sum(action_prob)),
+            env.get_num_agents(),
+            trials,
+            np.mean(scores_window),
+            100 * np.mean(done_window),
+            eps, action_prob / np.sum(action_prob)),
             end=" ")
         if trials % 100 == 0:
             tNow = time.time()
             rFps = iFrame / (tNow - tStart)
             print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
                    '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
-                   env.get_num_agents(),
-                   trials,
-                   np.mean(scores_window),
-                   100 * np.mean(done_window),
-                   eps, rFps, action_prob / np.sum(action_prob)))
+                env.get_num_agents(),
+                trials,
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps, rFps, action_prob / np.sum(action_prob)))
             torch.save(agent.qnetwork_local.state_dict(),
-                    '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
-            action_prob = [1]*4
+                       '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
+            action_prob = [1] * 4
 
 
 if __name__ == "__main__":
diff --git a/examples/qt2.py b/examples/qt2.py
index 6074106523c7fbe503f79b6bc7604f055dc27b35..ee3ea0cd123a3e6d0b1fc970eced219ed8503203 100644
--- a/examples/qt2.py
+++ b/examples/qt2.py
@@ -1,9 +1,8 @@
-
-
 import sys
+
 from PyQt5 import QtSvg
-from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
 from PyQt5.QtCore import Qt, QByteArray
+from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
 
 from flatland.utils import svg
 
@@ -75,4 +74,3 @@ window = MainWindow()
 window.show()
 
 app.exec_()
-
diff --git a/examples/temporary_example.py b/examples/temporary_example.py
index 1f3504f221d59d0205974bb135c2237364a22e07..db909e00e1f1a2c1fdef73ba2a35159a698832a3 100644
--- a/examples/temporary_example.py
+++ b/examples/temporary_example.py
@@ -1,11 +1,10 @@
 import random
+
 import numpy as np
-import matplotlib.pyplot as plt
 
-from flatland.envs.rail_env import *
-from flatland.envs.generators import *
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import *
+from flatland.envs.generators import random_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
 
 random.seed(0)
 np.random.seed(0)
@@ -94,7 +93,7 @@ env = RailEnv(width=7,
 #     print(env.obs_builder.distance_map[0, :, :, i])
 
 # Print the observation vector for agent 0
-obs, all_rewards, done, _ = env.step({0:0})
+obs, all_rewards, done, _ = env.step({0: 0})
 for i in range(env.get_num_agents()):
     env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5)
 
@@ -113,6 +112,7 @@ for step in range(100):
     while i < len(cmds):
         if cmds[i] == 'q':
             import sys
+
             sys.exit()
         elif cmds[i] == 's':
             obs, all_rewards, done, _ = env.step(action_dict)
@@ -120,9 +120,9 @@ for step in range(100):
             print("Rewards: ", all_rewards, "  [done=", done, "]")
         else:
             agent_id = int(cmds[i])
-            action = int(cmds[i+1])
+            action = int(cmds[i + 1])
             action_dict[agent_id] = action
-            i = i+1
+            i = i + 1
         i += 1
 
     env_renderer.renderEnv(show=True)
diff --git a/examples/training_navigation.py b/examples/training_navigation.py
index cabb655e3eb2bc2d12d908559b0102b072163052..85f9531b8820139e5559081feee4a93c4e01ac6c 100644
--- a/examples/training_navigation.py
+++ b/examples/training_navigation.py
@@ -1,11 +1,15 @@
-from flatland.envs.rail_env import *
-from flatland.envs.generators import *
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import *
-from flatland.baselines.dueling_double_dqn import Agent
-from collections import deque
-import torch, random
+import random
 import time
+from collections import deque
+
+import numpy as np
+import torch
+
+from flatland.baselines.dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
 random.seed(1)
 np.random.seed(1)
 
@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1):
     dones_list.append((np.mean(done_window)))
 
     print(
-        '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+        '\rTraining {} Agents.\t' +
+        'Episode {}\t' +
+        'Average Score: {:.0f}\t' +
+        'Dones: {:.2f}%\t' +
+        'Epsilon: {:.2f} \t ' +
+        'Action Probabilities: \t ' +
+        '{}'.format(
             env.get_num_agents(),
             trials,
-            np.mean(
-                scores_window),
-            100 * np.mean(
-                done_window),
+            np.mean(scores_window),
+            100 * np.mean(done_window),
             eps, action_prob / np.sum(action_prob)),
         end=" ")
     if trials % 100 == 0:
         print(
-            '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+            '\rTraining {} Agents.\t' +
+            'Episode {}\t' +
+            'Average Score: {:.0f}\t' +
+            'Dones: {:.2f}%\t' +
+            'Epsilon: {:.2f} \t ' +
+            'Action Probabilities: \t ' +
+            '{}'.format(
                 env.get_num_agents(),
                 trials,
-                np.mean(
-                    scores_window),
-                100 * np.mean(
-                    done_window),
-                eps, action_prob / np.sum(action_prob)))
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps,
+                action_prob / np.sum(action_prob)))
         torch.save(agent.qnetwork_local.state_dict(),
                    '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
         action_prob = [1] * 4
diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py
index 8cddec0151f691339ace3b367b888878eabe0b07..f90b529d51b66a353ba9ee71d77497b277556494 100644
--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -203,7 +203,7 @@ class TreeObsForRailEnv(ObservationBuilder):
         num_transitions = np.count_nonzero(possible_transitions)
         # Root node - current position
         # observation = [0, 0, 0, 0, self.distance_map[handle, position[0], position[1], orientation]]
-        observation = [0, 0, 0, 0, self.distance_map[(handle, *agent.position, agent.direction)]]
+        observation = [0, 0, 0, 0, self.distance_map[(handle, *agent.position,direc agent.direction)]]
         root_observation = observation[:]
         visited = set()
         # Start from the current orientation, and see which transitions are available;
@@ -478,11 +478,13 @@ class GlobalObsForRailEnv(ObservationBuilder):
         - transition map array with dimensions (env.height, env.width, 16),
           assuming 16 bits encoding of transitions.
 
-        - Four 2D arrays containing respectively the position of the given agent,
-          the position of its target, the positions of the other agents and of
-          their target.
+        - Three 2D arrays (map_height, map_width, 3) containing respectively the position of the given agent,
+          the position of its target and the positions of the other agents targets.
 
-        - A 4 elements array with one hot encoding of the direction.
+        - A 3D array (map_height, map_width, 4) containing the one hot encoding of directions
+          of the other agents at their position coordinates.
+
+        - A 4 elements array with one of encoding of the direction of the agent of interest.
     """
 
     def __init__(self):
@@ -492,32 +494,37 @@ class GlobalObsForRailEnv(ObservationBuilder):
         self.rail_obs = np.zeros((self.env.height, self.env.width, 16))
         for i in range(self.rail_obs.shape[0]):
             for j in range(self.rail_obs.shape[1]):
-                self.rail_obs[i, j] = np.array(
-                    list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int)
+                bitlist = [int(digit) for digit in bin(self.env.rail.get_transitions((i, j)))[2:]]
+                bitlist = [0] * (16 - len(bitlist)) + bitlist
+                self.rail_obs[i, j] = np.array(bitlist)
+                # self.rail_obs[i, j] = np.array(
+                #     list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int)
 
         # self.targets = np.zeros(self.env.height, self.env.width)
         # for target_pos in self.env.agents_target:
         #     self.targets[target_pos] += 1
 
     def get(self, handle):
-        obs = np.zeros((4, self.env.height, self.env.width))
+        obs_map_state = np.zeros((self.env.height, self.env.width, 3))
+        obs_other_agents_state = np.zeros((self.env.height, self.env.width, 4))
         agents = self.env.agents
         agent = agents[handle]
 
         agent_pos = agents[handle].position
-        obs[0][agent_pos] += 1
-        obs[1][agent.target] += 1
+        obs_map_state[agent_pos][0] += 1
+        obs_map_state[agent.target][1] += 1
 
         for i in range(len(agents)):
             if i != handle:  # TODO: handle used as index...?
                 agent2 = agents[i]
-                obs[3][agent2.position] += 1
-                obs[2][agent2.target] += 1
+                obs_other_agents_state[agent2.position][agent2.direction] = 1
+                obs_map_state[agent2.target][2] += 1
 
         direction = np.zeros(4)
         direction[agent.direction] = 1
 
-        return self.rail_obs, obs, direction
+        return self.rail_obs, obs_map_state, obs_other_agents_state, direction
+
 
 
 class LocalObsForRailEnv(ObservationBuilder):