diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c722f3c334d412eb2796cff2d46e4e01f063e18d..81372257bfb43a079f6c18d4889e850871f7fabd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -25,7 +25,8 @@ tests:
         - apt update
         - apt install -y libgl1-mesa-glx xvfb
         - pip install tox
-        - xvfb-run -s "-screen 0 800x600x24" tox
+        - apt install -y graphviz xdg-utils
+        - xvfb-run tox -v --recreate
 
 build_and_deploy_docs:
     image: "python:latest"
@@ -36,10 +37,12 @@ build_and_deploy_docs:
         - tests
     before_script:
         - pip install awscli
+        - apt update
+        - apt install -y graphviz libgl1-mesa-glx xvfb xdg-utils
     script:
         - pip install -r requirements_dev.txt
         - python setup.py install
-        - make docs
+        - xvfb-run make docs
         - aws s3 cp ./docs/_build/html/ s3://${BUCKET_NAME} --recursive
     environment:
         name: ${CI_COMMIT_REF_SLUG}
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 7ae26bcc3d4e0f4a5cbbcfafd055e2c084a42345..65971d323bb11cd7449be54f22898bedfa6e4b0d 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
 5. When you're done making changes, check that your changes pass flake8 and the
    tests, including testing other Python versions with tox::
 
-    $ flake8 flatland tests examples
+    $ flake8 flatland tests examples benchmarks
     $ python setup.py test or py.test
     $ tox
 
diff --git a/Makefile b/Makefile
index 6a655bb9f96397d09ced973bf6f6ccab20d4e42e..98dcbb47a03ad7125694e5053f5e973e45b4fba4 100644
--- a/Makefile
+++ b/Makefile
@@ -51,9 +51,10 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	flake8 flatland tests examples
+	flake8 flatland tests examples benchmarks
 
 test: ## run tests quickly with the default Python
+	echo "$$DISPLAY"
 	py.test
 
 test-all: ## run tests on every Python version with tox
@@ -71,6 +72,7 @@ docs: ## generate Sphinx HTML documentation, including API docs
 	sphinx-apidoc -o docs/ flatland
 	$(MAKE) -C docs clean
 	$(MAKE) -C docs html
+	pydeps --no-config --noshow flatland -o docs/_build/html/flatland.svg
 	$(BROWSER) docs/_build/html/index.html
 
 servedocs: docs ## compile the docs watching for changes
diff --git a/README.rst b/README.rst
index b4d3193c1b4fb55327216164c602d75cc92e3fdb..025d8089759cf4fa2def171904e80dd40736243d 100644
--- a/README.rst
+++ b/README.rst
@@ -13,7 +13,31 @@ Flatland
 
 
 
-Multi Agent Reinforcement Learning on Trains
+Flatland is a toolkit for developing and comparing multi agent reinforcement learning algorithms on grids.
+The base environment is a two-dimensional grid in which many agents can be placed. Each agent must solve one or more tasks in the grid world.
+In general, agents can freely navigate from cell to cell. However, cell-to-cell navigation can be restricted by transition maps.
+Each cell can hold an own transition map. By default, each cell has a default transition map defined which allows all transitions to its
+eight neighbor cells (go up and left, go up, go up and right, go right, go down and right, go down, go down and left, go left).
+So, the agents can freely move from cell to cell.
+
+The general purpose of the implementation allows to implement any kind of two-dimensional gird based environments.
+It can be used for many learning task where a two-dimensional grid could be the base of the environment.
+
+Flatland delivers a python implementation which can be easily extended. And it provides different baselines for different environments.
+Each environment enables an interesting task to solve. For example, the mutli-agent navigation task for railway train dispatching is a very exciting topic.
+It can be easily extended or adapted to the airplane landing problem. This can further be the basic implementation for many other tasks in transportation and logistics.
+
+Mapping a railway infrastructure into a grid world is an excellent example showing how the movement of an agent must be restricted.
+As trains can normally not run backwards and they have to follow rails the transition for one cell to the other depends also on train's orientation, respectively on train's travel direction.
+Trains can only change the traveling path at switches. There are two variants of switches. The first kind of switch is the splitting "switch", where trains can change rails and in consequence they can change the traveling path.
+The second kind of switch is the fusion switch, where train can change the sequence. That means two rails come together. Thus, the navigation behavior of a train is very restricted.
+The railway planning problem where many agents share same infrastructure is a very complex problem.
+
+Furthermore, trains have a departing location where they cannot depart earlier than the committed departure time.
+Then they must arrive at destination not later than the committed arrival time. This makes the whole planning problem
+very complex. In such a complex environment cooperation is essential. Thus, agents must learn to cooperate in a way that all trains (agents) arrive on time.
+
+
 
 Getting Started
 ===============
@@ -92,6 +116,10 @@ flatland
 ========
 TODO: explain the interface here
 
+Module Dependencies
+===================
+.. image:: flatland.svg
+
 
 Authors
 --------
@@ -102,6 +130,7 @@ Authors
 * Erik Nygren <erik.nygren@sbb.ch>
 * Adrian Egli <adrian.egli@sbb.ch>
 * Vaibhav Agrawal <theinfamouswayne@gmail.com>
+* Christian Eichenberger <christian.markus.eichenberger@sbb.ch>
 
 
 <please fill yourself in>
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/benchmarks/play_model_benchmark.py b/benchmarks/play_model_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c087e8ab8ed33a24bd447cd3203b85eb1a28f2
--- /dev/null
+++ b/benchmarks/play_model_benchmark.py
@@ -0,0 +1,132 @@
+import random
+import time
+from collections import deque
+
+import numpy as np
+from benchmarker import Benchmarker
+
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+
+def main(render=True, delay=0.0):
+    random.seed(1)
+    np.random.seed(1)
+
+    # Example generate a random rail
+    env = RailEnv(width=15, height=15,
+                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
+                  number_of_agents=5)
+
+    if render:
+        env_renderer = RenderTool(env, gl="QTSVG")
+
+    n_trials = 20
+    eps = 1.
+    eps_end = 0.005
+    eps_decay = 0.998
+    action_dict = dict()
+    scores_window = deque(maxlen=100)
+    done_window = deque(maxlen=100)
+    scores = []
+    dones_list = []
+    action_prob = [0] * 4
+
+    def max_lt(seq, val):
+        """
+        Return greatest item in seq for which item < val applies.
+        None is returned if seq was empty or all items in seq were >= val.
+        """
+
+        idx = len(seq) - 1
+        while idx >= 0:
+            if seq[idx] < val and seq[idx] >= 0:
+                return seq[idx]
+            idx -= 1
+        return None
+
+    iFrame = 0
+    tStart = time.time()
+    for trials in range(1, n_trials + 1):
+
+        # Reset environment
+        obs = env.reset()
+        if render:
+            env_renderer.set_new_rail()
+
+        for a in range(env.get_num_agents()):
+            norm = max(1, max_lt(obs[a], np.inf))
+            obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1)
+
+        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
+
+        score = 0
+        env_done = 0
+
+        # Run episode
+        for step in range(100):
+            # if trials > 114:
+            # env_renderer.renderEnv(show=True)
+            # print(step)
+            # Action
+            for a in range(env.get_num_agents()):
+                action = np.random.randint(0, 4)
+                action_prob[action] += 1
+                action_dict.update({a: action})
+
+            if render:
+                env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
+                if delay > 0:
+                    time.sleep(delay)
+
+            iFrame += 1
+
+            # Environment step
+            next_obs, all_rewards, done, _ = env.step(action_dict)
+            for a in range(env.get_num_agents()):
+                norm = max(1, max_lt(next_obs[a], np.inf))
+                next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
+            # Update replay buffer and train agent
+            for a in range(env.get_num_agents()):
+                # agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
+                score += all_rewards[a]
+
+            obs = next_obs.copy()
+            if done['__all__']:
+                env_done = 1
+                break
+        # Epsilon decay
+        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
+
+        done_window.append(env_done)
+        scores_window.append(score)  # save most recent score
+        scores.append(np.mean(scores_window))
+        dones_list.append((np.mean(done_window)))
+
+        print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
+               '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
+            env.get_num_agents(),
+            trials,
+            np.mean(scores_window),
+            100 * np.mean(done_window),
+            eps, action_prob / np.sum(action_prob)),
+            end=" ")
+        if trials % 100 == 0:
+            tNow = time.time()
+            rFps = iFrame / (tNow - tStart)
+            print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
+                   '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
+                env.get_num_agents(),
+                trials,
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps, rFps, action_prob / np.sum(action_prob)))
+            action_prob = [1] * 4
+
+
+if __name__ == "__main__":
+    with Benchmarker(cycle=20, extra=1) as bench:
+        @bench("Everything")
+        def _(bm):
+            main(render=False, delay=0)
diff --git a/env-data/railway/example_network_000.pkl b/env-data/railway/example_network_000.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..280688c2629331621ab2ea80b4b096226464e653
Binary files /dev/null and b/env-data/railway/example_network_000.pkl differ
diff --git a/env-data/railway/example_network_001.pkl b/env-data/railway/example_network_001.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..801f95149dec6eb4d47fd14e36d30f2541480188
Binary files /dev/null and b/env-data/railway/example_network_001.pkl differ
diff --git a/env-data/railway/example_network_002.pkl b/env-data/railway/example_network_002.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..898d54ebeb823e48790d4661ffe75a6940cd0712
Binary files /dev/null and b/env-data/railway/example_network_002.pkl differ
diff --git a/env-data/tests/test-10x10.mpk b/env-data/tests/test-10x10.mpk
new file mode 100644
index 0000000000000000000000000000000000000000..e7e9ad5a5fd875eacced553d816ee407fa8215d1
Binary files /dev/null and b/env-data/tests/test-10x10.mpk differ
diff --git a/examples/demo.py b/examples/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e3725bccde4d4635c3a5e5ff275af9606018184
--- /dev/null
+++ b/examples/demo.py
@@ -0,0 +1,218 @@
+import os
+import random
+from collections import deque
+
+import time
+import numpy as np
+import torch
+
+from flatland.baselines.dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
+# from flatland.envs.generators import rail_from_list_of_saved_GridTransitionMap_generator
+from flatland.envs.generators import random_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+# ensure that every demo run behave constantly equal
+random.seed(1)
+np.random.seed(1)
+
+
+class Scenario_Generator:
+    @staticmethod
+    def generate_random_scenario(number_of_agents=3):
+        # Example generate a rail given a manual specification,
+        # a map of tuples (cell_type, rotation)
+        transition_probability = [15,  # empty cell - Case 0
+                                  5,  # Case 1 - straight
+                                  5,  # Case 2 - simple switch
+                                  1,  # Case 3 - diamond crossing
+                                  1,  # Case 4 - single slip
+                                  1,  # Case 5 - double slip
+                                  1,  # Case 6 - symmetrical
+                                  0,  # Case 7 - dead end
+                                  1,  # Case 1b (8)  - simple turn right
+                                  1,  # Case 1c (9)  - simple turn left
+                                  1]  # Case 2b (10) - simple switch mirrored
+
+        # Example generate a random rail
+
+        env = RailEnv(width=20,
+                      height=20,
+                      rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
+                      number_of_agents=number_of_agents)
+
+        return env
+
+    @staticmethod
+    def generate_complex_scenario(number_of_agents=3):
+        env = RailEnv(width=15,
+                      height=15,
+                      rail_generator=complex_rail_generator(nr_start_goal=6, nr_extra=30, min_dist=10, max_dist=99999, seed=0),
+                      number_of_agents=number_of_agents)
+
+        return env
+
+    @staticmethod
+    def load_scenario(filename, number_of_agents=3):
+        env = RailEnv(width=2 * (1 + number_of_agents),
+                      height=1 + number_of_agents)
+
+        """
+        env = RailEnv(width=20,
+                      height=20,
+                      rail_generator=rail_from_list_of_saved_GridTransitionMap_generator(
+                          [filename]),
+                      number_of_agents=number_of_agents)
+        """
+        if os.path.exists(filename):
+            print("load file: ", filename)
+            env.load(filename)
+            env.reset(False, False)
+        else:
+            print("File does not exist:", filename, " Working directory: ", os.getcwd())
+
+        return env
+
+
+def max_lt(seq, val):
+    """
+    Return greatest item in seq for which item < val applies.
+    None is returned if seq was empty or all items in seq were >= val.
+    """
+    max = 0
+    idx = len(seq) - 1
+    while idx >= 0:
+        if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
+            max = seq[idx]
+        idx -= 1
+    return max
+
+
+def min_lt(seq, val):
+    """
+    Return smallest item in seq for which item > val applies.
+    None is returned if seq was empty or all items in seq were >= val.
+    """
+    min = np.inf
+    idx = len(seq) - 1
+    while idx >= 0:
+        if seq[idx] > val and seq[idx] < min:
+            min = seq[idx]
+        idx -= 1
+    return min
+
+
+def norm_obs_clip(obs, clip_min=-1, clip_max=1):
+    """
+    This function returns the difference between min and max value of an observation
+    :param obs: Observation that should be normalized
+    :param clip_min: min value where observation will be clipped
+    :param clip_max: max value where observation will be clipped
+    :return: returnes normalized and clipped observatoin
+    """
+    max_obs = max(1, max_lt(obs, 1000))
+    min_obs = max(0, min_lt(obs, 0))
+    if max_obs == min_obs:
+        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
+    norm = np.abs(max_obs - min_obs)
+    if norm == 0:
+        norm = 1.
+    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
+
+
+class Demo:
+
+    def __init__(self, env):
+        self.env = env
+        self.create_renderer()
+        self.load_agent()
+
+    def load_agent(self):
+        self.state_size = 105 * 2
+        self.action_size = 4
+        self.agent = Agent(self.state_size, self.action_size, "FC", 0)
+        self.agent.qnetwork_local.load_state_dict(torch.load('./flatland/baselines/Nets/avoid_checkpoint15000.pth'))
+
+    def create_renderer(self):
+        self.renderer = RenderTool(self.env, gl="QTSVG")
+        handle = self.env.get_agent_handles()
+        return handle
+
+    def run_demo(self, max_nbr_of_steps=100):
+        action_dict = dict()
+        time_obs = deque(maxlen=2)
+        action_prob = [0] * 4
+        agent_obs = [None] * self.env.get_num_agents()
+        agent_next_obs = [None] * self.env.get_num_agents()
+
+        # Reset environment
+        obs = self.env.reset(False, False)
+
+        for a in range(self.env.get_num_agents()):
+            data, distance = self.env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=5, current_depth=0)
+
+            data = norm_obs_clip(data)
+            distance = norm_obs_clip(distance)
+            obs[a] = np.concatenate((data, distance))
+
+        for i in range(2):
+            time_obs.append(obs)
+
+        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
+        for a in range(self.env.get_num_agents()):
+            agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
+
+        for step in range(max_nbr_of_steps):
+
+            time.sleep(.2)
+
+            # print(step)
+            # Action
+            for a in range(self.env.get_num_agents()):
+                action = self.agent.act(agent_obs[a])
+                action_prob[action] += 1
+                action_dict.update({a: action})
+
+            self.renderer.renderEnv(show=True,action_dict=action_dict)
+
+            # Environment step
+            next_obs, all_rewards, done, _ = self.env.step(action_dict)
+            for a in range(self.env.get_num_agents()):
+                data, distance = self.env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=5,
+                                                                 current_depth=0)
+                data = norm_obs_clip(data)
+                distance = norm_obs_clip(distance)
+                next_obs[a] = np.concatenate((data, distance))
+
+            # Update replay buffer and train agent
+            for a in range(self.env.get_num_agents()):
+                agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
+
+            time_obs.append(next_obs)
+
+            agent_obs = agent_next_obs.copy()
+            if done['__all__']:
+                break
+
+
+if True:
+    demo_000 = Demo(Scenario_Generator.generate_random_scenario())
+    demo_000.run_demo()
+    demo_000 = None
+
+    demo_001 = Demo(Scenario_Generator.generate_complex_scenario())
+    demo_001.run_demo()
+    demo_001 = None
+
+demo_000 = Demo(Scenario_Generator.load_scenario('./env-data/railway/example_network_000.pkl'))
+demo_000.run_demo()
+demo_000 = None
+
+demo_001 = Demo(Scenario_Generator.load_scenario('./env-data/railway/example_network_001.pkl'))
+demo_001.run_demo()
+demo_001 = None
+
+demo_002 = Demo(Scenario_Generator.load_scenario('./env-data/railway/example_network_002.pkl'))
+demo_002.run_demo()
+demo_002 = None
diff --git a/examples/play_model.py b/examples/play_model.py
index 174568177a4a886cfe38e53125d0f73f2dae52de..34c6aadfeefd44771fd335e2957e1fbd0b2f740f 100644
--- a/examples/play_model.py
+++ b/examples/play_model.py
@@ -1,11 +1,11 @@
+# import torch
 import random
 import time
+# from flatland.baselines.dueling_double_dqn import Agent
 from collections import deque
 
 import numpy as np
-import torch
 
-from flatland.baselines.dueling_double_dqn import Agent
 from flatland.envs.generators import complex_rail_generator
 from flatland.envs.rail_env import RailEnv
 from flatland.utils.rendertools import RenderTool
@@ -28,10 +28,12 @@ class Player(object):
         self.scores = []
         self.dones_list = []
         self.action_prob = [0] * 4
-        self.agent = Agent(self.state_size, self.action_size, "FC", 0)
+
+        # Removing refs to a real agent for now.
+        # self.agent = Agent(self.state_size, self.action_size, "FC", 0)
         # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
-        self.agent.qnetwork_local.load_state_dict(torch.load(
-            '../flatland/flatland/baselines/Nets/avoid_checkpoint15000.pth'))
+        # self.agent.qnetwork_local.load_state_dict(torch.load(
+        #    '../flatland/flatland/baselines/Nets/avoid_checkpoint15000.pth'))
 
         self.iFrame = 0
         self.tStart = time.time()
@@ -49,12 +51,21 @@ class Player(object):
         self.score = 0
         self.env_done = 0
 
+    def reset(self):
+        self.obs = self.env.reset()
+        return self.obs
+
     def step(self):
         env = self.env
 
         # Pass the (stored) observation to the agent network and retrieve the action
         for handle in env.get_agent_handles():
-            action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
+            # Real Agent
+            # action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
+            # Random actions
+            action = random.randint(0, 3)
+            # Numpy version uses single random sequence
+            # action = np.random.randint(0, 4, size=1)
             self.action_prob[action] += 1
             self.action_dict.update({handle: action})
 
@@ -67,11 +78,12 @@ class Player(object):
             next_obs[handle] = np.clip(np.array(next_obs[handle]) / norm, -1, 1)
 
         # Update replay buffer and train agent
-        for handle in self.env.get_agent_handles():
-            self.agent.step(self.obs[handle], self.action_dict[handle],
-                            all_rewards[handle], next_obs[handle], done[handle],
-                            train=False)
-            self.score += all_rewards[handle]
+        if False:
+            for handle in self.env.get_agent_handles():
+                self.agent.step(self.obs[handle], self.action_dict[handle],
+                                all_rewards[handle], next_obs[handle], done[handle],
+                                train=False)
+                self.score += all_rewards[handle]
 
         self.iFrame += 1
 
@@ -94,7 +106,50 @@ def max_lt(seq, val):
     return None
 
 
-def main(render=True, delay=0.0):
+def main(render=True, delay=0.0, n_trials=3, n_steps=50, sGL="QT"):
+    random.seed(1)
+    np.random.seed(1)
+
+    # Example generate a random rail
+    env = RailEnv(width=15, height=15,
+                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
+                  number_of_agents=5)
+
+    if render:
+        # env_renderer = RenderTool(env, gl="QTSVG")
+        env_renderer = RenderTool(env, gl=sGL)
+
+    oPlayer = Player(env)
+
+    for trials in range(1, n_trials + 1):
+
+        # Reset environment
+        oPlayer.reset()
+        env_renderer.set_new_rail()
+
+        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
+
+        # score = 0
+        # env_done = 0
+
+        # Run episode
+        for step in range(n_steps):
+            oPlayer.step()
+            if render:
+                env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step,
+                                       action_dict=oPlayer.action_dict)
+                # time.sleep(10)
+                if delay > 0:
+                    time.sleep(delay)
+
+
+def main_old(render=True, delay=0.0):
+    ''' DEPRECATED main which drives agent directly
+        Please use the new main() which creates a Player object which is also used by the Editor.
+        Please fix any bugs in main() and Player rather than here.
+        Will delete this one shortly.
+    '''
+
     random.seed(1)
     np.random.seed(1)
 
@@ -107,8 +162,6 @@ def main(render=True, delay=0.0):
         env_renderer = RenderTool(env, gl="QTSVG")
         # env_renderer = RenderTool(env, gl="QT")
 
-    state_size = 105
-    action_size = 4
     n_trials = 9999
     eps = 1.
     eps_end = 0.005
@@ -119,8 +172,11 @@ def main(render=True, delay=0.0):
     scores = []
     dones_list = []
     action_prob = [0] * 4
-    agent = Agent(state_size, action_size, "FC", 0)
 
+    # Real Agent
+    # state_size = 105
+    # action_size = 4
+    # agent = Agent(state_size, action_size, "FC", 0)
     # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
 
     def max_lt(seq, val):
@@ -161,7 +217,7 @@ def main(render=True, delay=0.0):
             # print(step)
             # Action
             for a in range(env.get_num_agents()):
-                action = agent.act(np.array(obs[a]), eps=eps)
+                action = random.randint(0, 3)  # agent.act(np.array(obs[a]), eps=eps)
                 action_prob[action] += 1
                 action_dict.update({a: action})
 
@@ -174,13 +230,16 @@ def main(render=True, delay=0.0):
 
             # Environment step
             next_obs, all_rewards, done, _ = env.step(action_dict)
+
             for a in range(env.get_num_agents()):
                 norm = max(1, max_lt(next_obs[a], np.inf))
                 next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
+
             # Update replay buffer and train agent
-            for a in range(env.get_num_agents()):
-                agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
-                score += all_rewards[a]
+            # only needed for "real" agent
+            # for a in range(env.get_num_agents()):
+            #    agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
+            #    score += all_rewards[a]
 
             obs = next_obs.copy()
             if done['__all__']:
@@ -212,8 +271,8 @@ def main(render=True, delay=0.0):
                 np.mean(scores_window),
                 100 * np.mean(done_window),
                 eps, rFps, action_prob / np.sum(action_prob)))
-            torch.save(agent.qnetwork_local.state_dict(),
-                       '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
+            # torch.save(agent.qnetwork_local.state_dict(),
+            #         '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
             action_prob = [1] * 4
 
 
diff --git a/examples/temporary_example.py b/examples/temporary_example.py
index db909e00e1f1a2c1fdef73ba2a35159a698832a3..862369411056d87d411c3e173bd479e9a7e93e01 100644
--- a/examples/temporary_example.py
+++ b/examples/temporary_example.py
@@ -1,10 +1,9 @@
 import random
 
-import numpy as np
-
 from flatland.envs.generators import random_rail_generator
 from flatland.envs.rail_env import RailEnv
 from flatland.utils.rendertools import RenderTool
+import numpy as np
 
 random.seed(0)
 np.random.seed(0)
diff --git a/examples/tkplay.py b/examples/tkplay.py
new file mode 100644
index 0000000000000000000000000000000000000000..95842e3b430000169093d27c3c9de02ebe037de9
--- /dev/null
+++ b/examples/tkplay.py
@@ -0,0 +1,60 @@
+import time
+import tkinter as tk
+
+from PIL import ImageTk, Image
+
+from examples.play_model import Player
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+
+def tkmain(n_trials=2):
+    # This creates the main window of an application
+    window = tk.Tk()
+    window.title("Join")
+    window.configure(background='grey')
+
+    # Example generate a random rail
+    env = RailEnv(width=15, height=15,
+                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
+                  number_of_agents=5)
+
+    env_renderer = RenderTool(env, gl="PIL")
+
+    oPlayer = Player(env)
+    n_trials = 1
+    n_steps = 20
+    delay = 0
+    for trials in range(1, n_trials + 1):
+
+        # Reset environment8
+        oPlayer.reset()
+        env_renderer.set_new_rail()
+
+        first = True
+
+        for step in range(n_steps):
+            oPlayer.step()
+            env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step,
+                                   action_dict=oPlayer.action_dict)
+            img = env_renderer.getImage()
+            img = Image.fromarray(img)
+            tkimg = ImageTk.PhotoImage(img)
+
+            if first:
+                panel = tk.Label(window, image=tkimg)
+                panel.pack(side="bottom", fill="both", expand="yes")
+            else:
+                # update the image in situ
+                panel.configure(image=tkimg)
+                panel.image = tkimg
+
+            window.update()
+            if delay > 0:
+                time.sleep(delay)
+            first = False
+
+
+if __name__ == "__main__":
+    tkmain()
diff --git a/examples/training_navigation.py b/examples/training_navigation.py
index 85f9531b8820139e5559081feee4a93c4e01ac6c..0cb9d275eda2a01932c4f632c1abd4fb662f4037 100644
--- a/examples/training_navigation.py
+++ b/examples/training_navigation.py
@@ -1,5 +1,4 @@
 import random
-import time
 from collections import deque
 
 import numpy as np
@@ -36,8 +35,8 @@ env = RailEnv(width=20,
 """
 env = RailEnv(width=15,
               height=15,
-              rail_generator=complex_rail_generator(nr_start_goal=2, nr_extra=30, min_dist=5, max_dist=99999, seed=0),
-              number_of_agents=3)
+              rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=10, max_dist=99999, seed=0),
+              number_of_agents=5)
 
 """
 env = RailEnv(width=20,
@@ -47,7 +46,7 @@ env = RailEnv(width=20,
               number_of_agents=3)
 
 """
-env_renderer = RenderTool(env, gl="QT")
+env_renderer = RenderTool(env, gl="QTSVG")
 handle = env.get_agent_handles()
 
 state_size = 105 * 2
@@ -67,7 +66,7 @@ action_prob = [0] * 4
 agent_obs = [None] * env.get_num_agents()
 agent_next_obs = [None] * env.get_num_agents()
 agent = Agent(state_size, action_size, "FC", 0)
-agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint15000.pth'))
+agent.qnetwork_local.load_state_dict(torch.load('./flatland/baselines/Nets/avoid_checkpoint15000.pth'))
 
 demo = True
 
@@ -144,8 +143,7 @@ for trials in range(1, n_trials + 1):
     # Run episode
     for step in range(100):
         if demo:
-            env_renderer.renderEnv(show=True, obsrender=True)
-            time.sleep(2)
+            env_renderer.renderEnv(show=True)
         # print(step)
         # Action
         for a in range(env.get_num_agents()):
@@ -193,29 +191,18 @@ for trials in range(1, n_trials + 1):
     scores.append(np.mean(scores_window))
     dones_list.append((np.mean(done_window)))
 
-    print(
-        '\rTraining {} Agents.\t' +
-        'Episode {}\t' +
-        'Average Score: {:.0f}\t' +
-        'Dones: {:.2f}%\t' +
-        'Epsilon: {:.2f} \t ' +
-        'Action Probabilities: \t ' +
-        '{}'.format(
-            env.get_num_agents(),
-            trials,
-            np.mean(scores_window),
-            100 * np.mean(done_window),
-            eps, action_prob / np.sum(action_prob)),
-        end=" ")
+    print('\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%' +
+          '\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+              env.get_num_agents(),
+              trials,
+              np.mean(scores_window),
+              100 * np.mean(done_window),
+              eps, action_prob / np.sum(action_prob)), end=" ")
+
     if trials % 100 == 0:
         print(
-            '\rTraining {} Agents.\t' +
-            'Episode {}\t' +
-            'Average Score: {:.0f}\t' +
-            'Dones: {:.2f}%\t' +
-            'Epsilon: {:.2f} \t ' +
-            'Action Probabilities: \t ' +
-            '{}'.format(
+            '\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%' +
+            '\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
                 env.get_num_agents(),
                 trials,
                 np.mean(scores_window),
diff --git a/flatland/baselines/dueling_double_dqn.py b/flatland/baselines/dueling_double_dqn.py
index 66fe3a3effec0dfa9dc35d07fec887eaa05be6fc..41a27bf8431df7812f1b4f63e797aa426c17edf1 100644
--- a/flatland/baselines/dueling_double_dqn.py
+++ b/flatland/baselines/dueling_double_dqn.py
@@ -1,12 +1,14 @@
-import numpy as np
-import random
-from collections import namedtuple, deque
+import copy
 import os
-from flatland.baselines.model import QNetwork, QNetwork2
+import random
+from collections import namedtuple, deque, Iterable
+
+import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-import copy
+
+from flatland.baselines.model import QNetwork, QNetwork2
 
 BUFFER_SIZE = int(1e5)  # replay buffer size
 BATCH_SIZE = 512  # minibatch size
@@ -175,16 +177,24 @@ class ReplayBuffer:
         """Randomly sample a batch of experiences from memory."""
         experiences = random.sample(self.memory, k=self.batch_size)
 
-        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
-        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
-        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
-        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(
-            device)
-        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(
-            device)
+        states = torch.from_numpy(self.__v_stack_impr([e.state for e in experiences if e is not None])) \
+            .float().to(device)
+        actions = torch.from_numpy(self.__v_stack_impr([e.action for e in experiences if e is not None])) \
+            .long().to(device)
+        rewards = torch.from_numpy(self.__v_stack_impr([e.reward for e in experiences if e is not None])) \
+            .float().to(device)
+        next_states = torch.from_numpy(self.__v_stack_impr([e.next_state for e in experiences if e is not None])) \
+            .float().to(device)
+        dones = torch.from_numpy(self.__v_stack_impr([e.done for e in experiences if e is not None]).astype(np.uint8)) \
+            .float().to(device)
 
         return (states, actions, rewards, next_states, dones)
 
     def __len__(self):
         """Return the current size of internal memory."""
         return len(self.memory)
+
+    def __v_stack_impr(self, states):
+        sub_dim = len(states[0][0]) if isinstance(states[0], Iterable) else 1
+        np_states = np.reshape(np.array(states), (len(states), sub_dim))
+        return np_states
diff --git a/flatland/core/transitions.py b/flatland/core/transitions.py
index 622d900598bba6bbc48750d6bb48923975af9b5e..add047b6c7895e391211258bb10561110e0f1a19 100644
--- a/flatland/core/transitions.py
+++ b/flatland/core/transitions.py
@@ -556,16 +556,16 @@ class RailEnvTransitions(Grid4Transitions):
         self.maskDeadEnds = 0b0010000110000100
 
         # create this to make validation faster
-        self.transitions_all = []
+        self.transitions_all = set()
         for index, trans in enumerate(self.transitions):
-            self.transitions_all.append(trans)
+            self.transitions_all.add(trans)
             if index in (2, 4, 6, 7, 8, 9, 10):
                 for _ in range(3):
                     trans = self.rotate_transition(trans, rotation=90)
-                    self.transitions_all.append(trans)
+                    self.transitions_all.add(trans)
             elif index in (1, 5):
                 trans = self.rotate_transition(trans, rotation=90)
-                self.transitions_all.append(trans)
+                self.transitions_all.add(trans)
 
     def print(self, cell_transition):
         print("  NESW")
@@ -620,10 +620,7 @@ class RailEnvTransitions(Grid4Transitions):
         Boolean
             True or False
         """
-        for trans in self.transitions_all:
-            if cell_transition == trans:
-                return True
-        return False
+        return cell_transition in self.transitions_all
 
     def has_deadend(self, cell_transition):
         if cell_transition & self.maskDeadEnds > 0:
diff --git a/flatland/envs/env_utils.py b/flatland/envs/env_utils.py
index b58604c6d7ededa28a33d30e87e13777a3cd54ec..1482b4438bebd82638b873f3232198172a05e6d0 100644
--- a/flatland/envs/env_utils.py
+++ b/flatland/envs/env_utils.py
@@ -1,12 +1,13 @@
-
 """
 Definition of the RailEnv environment and related level-generation functions.
 
 Generator functions are functions that take width, height and num_resets as arguments and return
 a GridTransitionMap object.
 """
+
 import numpy as np
 
+
 # from flatland.core.env import Environment
 # from flatland.envs.observations import TreeObsForRailEnv
 
@@ -53,7 +54,6 @@ def validate_new_transition(rail_trans, rail_array, prev_pos, current_pos, new_p
         else:
             # check if matches existing layout
             new_trans = rail_trans.set_transition(new_trans, current_dir, new_dir, 1)
-            # new_trans = rail_trans.set_transition(new_trans, mirror(new_dir), mirror(current_dir), 1)
     else:
         # set the forward path
         new_trans = rail_trans.set_transition(new_trans, current_dir, new_dir, 1)
@@ -68,7 +68,6 @@ def validate_new_transition(rail_trans, rail_array, prev_pos, current_pos, new_p
         else:
             # check if matches existing layout
             new_trans_e = rail_trans.set_transition(new_trans_e, new_dir, new_dir, 1)
-            # new_trans_e = rail_trans.set_transition(new_trans_e, mirror(new_dir), mirror(new_dir), 1)
 
         if not rail_trans.is_valid(new_trans_e):
             return False
@@ -90,6 +89,9 @@ class AStarNode():
     def __eq__(self, other):
         return self.pos == other.pos
 
+    def __hash__(self):
+        return hash(self.pos)
+
     def update_if_better(self, other):
         if other.g < self.g:
             self.parent = other.parent
@@ -106,30 +108,23 @@ def a_star(rail_trans, rail_array, start, end):
     rail_shape = rail_array.shape
     start_node = AStarNode(None, start)
     end_node = AStarNode(None, end)
-    open_list = []
-    closed_list = []
+    open_nodes = set()
+    closed_nodes = set()
+    open_nodes.add(start_node)
 
-    open_list.append(start_node)
-
-    # this could be optimized
-    def is_node_in_list(node, the_list):
-        for o_node in the_list:
-            if node == o_node:
-                return o_node
-        return None
-
-    while len(open_list) > 0:
+    while len(open_nodes) > 0:
         # get node with current shortest est. path (lowest f)
-        current_node = open_list[0]
-        current_index = 0
-        for index, item in enumerate(open_list):
+        current_node = None
+        for item in open_nodes:
+            if current_node is None:
+                current_node = item
+                continue
             if item.f < current_node.f:
                 current_node = item
-                current_index = index
 
         # pop current off open list, add to closed list
-        open_list.pop(current_index)
-        closed_list.append(current_node)
+        open_nodes.remove(current_node)
+        closed_nodes.add(current_node)
 
         # found the goal
         if current_node == end_node:
@@ -149,10 +144,7 @@ def a_star(rail_trans, rail_array, start, end):
             prev_pos = None
         for new_pos in [(0, -1), (0, 1), (-1, 0), (1, 0)]:
             node_pos = (current_node.pos[0] + new_pos[0], current_node.pos[1] + new_pos[1])
-            if node_pos[0] >= rail_shape[0] or \
-                    node_pos[0] < 0 or \
-                    node_pos[1] >= rail_shape[1] or \
-                    node_pos[1] < 0:
+            if node_pos[0] >= rail_shape[0] or node_pos[0] < 0 or node_pos[1] >= rail_shape[1] or node_pos[1] < 0:
                 continue
 
             # validate positions
@@ -166,8 +158,7 @@ def a_star(rail_trans, rail_array, start, end):
         # loop through children
         for child in children:
             # already in closed list?
-            closed_node = is_node_in_list(child, closed_list)
-            if closed_node is not None:
+            if child in closed_nodes:
                 continue
 
             # create the f, g, and h values
@@ -180,16 +171,14 @@ def a_star(rail_trans, rail_array, start, end):
             child.f = child.g + child.h
 
             # already in the open list?
-            open_node = is_node_in_list(child, open_list)
-            if open_node is not None:
-                open_node.update_if_better(child)
+            if child in open_nodes:
                 continue
 
             # add the child to the open list
-            open_list.append(child)
+            open_nodes.add(child)
 
         # no full path found
-        if len(open_list) == 0:
+        if len(open_nodes) == 0:
             return []
 
 
@@ -323,8 +312,7 @@ def get_rnd_agents_pos_tgt_dir_on_rail(rail, num_agents):
             valid_starting_directions = []
             for m in valid_movements:
                 new_position = get_new_position(agents_position[i], m[1])
-                if m[0] not in valid_starting_directions and \
-                   _path_exists(rail, new_position, m[0], agents_target[i]):
+                if m[0] not in valid_starting_directions and _path_exists(rail, new_position, m[0], agents_target[i]):
                     valid_starting_directions.append(m[0])
 
             if len(valid_starting_directions) == 0:
diff --git a/flatland/envs/generators.py b/flatland/envs/generators.py
index c1578a816e2e30127fb77dda6e72ab51b2f41cb2..9f2dfee3e89b88009d8489faaa6fb0870e01204b 100644
--- a/flatland/envs/generators.py
+++ b/flatland/envs/generators.py
@@ -9,6 +9,21 @@ from flatland.envs.env_utils import distance_on_rail, connect_rail, get_directio
 from flatland.envs.env_utils import get_rnd_agents_pos_tgt_dir_on_rail
 
 
+def empty_rail_generator():
+    """
+    Returns a generator which returns an empty rail mail with no agents.
+    Primarily used by the editor
+    """
+    def generator(width, height, num_agents=0, num_resets=0):
+        rail_trans = RailEnvTransitions()
+        grid_map = GridTransitionMap(width=width, height=height, transitions=rail_trans)
+        rail_array = grid_map.grid
+        rail_array.fill(0)
+
+        return grid_map, [], [], []
+    return generator
+
+
 def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist=99999, seed=0):
     """
     Parameters
@@ -25,6 +40,9 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist=
     """
 
     def generator(width, height, num_agents, num_resets=0):
+        if num_agents > nr_start_goal:
+            num_agents = nr_start_goal 
+            print("complex_rail_generator: num_agents > nr_start_goal, changing num_agents")
         rail_trans = RailEnvTransitions()
         grid_map = GridTransitionMap(width=width, height=height, transitions=rail_trans)
         rail_array = grid_map.grid
@@ -34,41 +52,20 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist=
 
         # generate rail array
         # step 1:
-        # - generate a list of start and goal positions
-        # - use a min/max distance allowed as input for this
-        # - validate that start/goals are not placed too close to other start/goals
-        #
-        # step 2: (optional)
-        # - place random elements on rails array
-        #   - for instance "train station", etc.
-        #
-        # step 3:
-        # - iterate over all [start, goal] pairs:
-        #   - [first X pairs]
-        #     - draw a rail from [start,goal]
-        #     - draw either vertical or horizontal part first (randomly)
+        # - generate a start and goal position
+        #   - validate min/max distance allowed
+        #   - validate that start/goals are not placed too close to other start/goals
+        #   - draw a rail from [start,goal]
         #     - if rail crosses existing rail then validate new connection
-        #       - if new connection is invalid turn 90 degrees to left/right
-        #       - possibility that this fails to create a path to goal
-        #         - on failure goto step1 and retry with seed+1
-        #     - [avoid crossing other start,goal positions] (optional)
-        #
-        #   - [after X pairs]
-        #     - find closest rail from start (Pa)
-        #       - iterating outwards in a "circle" from start until an existing rail cell is hit
-        #     - connect [start, Pa]
-        #       - validate crossing rails
-        #     - Do A* from Pa to find closest point on rail (Pb) to goal point
-        #       - Basically normal A* but find point on rail which is closest to goal
-        #       - since full path to goal is unlikely
-        #     - connect [Pb, goal]
-        #       - validate crossing rails
+        #     - possibility that this fails to create a path to goal
+        #     - on failure generate new start/goal
         #
-        # step 4: (optional)
-        # - add more rails to map randomly
+        # step 2:
+        # - add more rails to map randomly between cells that have rails
+        #   - validate all new rails, on failure don't add new rails
         #
-        # step 5:
-        # - return transition map + list of [start, goal] points
+        # step 3:
+        # - return transition map + list of [start_pos, start_dir, goal_pos] points
         #
 
         start_goal = []
@@ -146,9 +143,9 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist=
         # print("\n> Complex Rail Gen: Created #", len(start_goal), "pairs and #", nr_created, "extra connections")
         # print(start_goal)
 
-        agents_position = [sg[0] for sg in start_goal]
-        agents_target = [sg[1] for sg in start_goal]
-        agents_direction = start_dir
+        agents_position = [sg[0] for sg in start_goal[:num_agents]]
+        agents_target = [sg[1] for sg in start_goal[:num_agents]]
+        agents_direction = start_dir[:num_agents]
 
         return grid_map, agents_position, agents_direction, agents_target
 
diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py
index 0fd94a4e21a7b7e226e7ea012f6d3e156888dfb8..21f9cb45900137e2167ab5811630bd05489d8d3a 100644
--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -195,7 +195,8 @@ class TreeObsForRailEnv(ObservationBuilder):
         # for loc in self.env.agents_position:
         #    self.location_has_agent[(loc[0], loc[1])] = 1
         self.location_has_agent = {tuple(agent.position): 1 for agent in self.env.agents}
-
+        if handle > len(self.env.agents):
+            print("ERROR: obs _get - handle ", handle, " len(agents)", len(self.env.agents))
         agent = self.env.agents[handle]  # TODO: handle being treated as index
         # position = self.env.agents_position[handle]
         # orientation = self.env.agents_direction[handle]
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index 118ebf4d3ea122519a09c8b7b5a00c53964063a1..74e7526caa93ca8a1821eb5b2a47576231eb95c3 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -353,6 +353,8 @@ class RailEnv(Environment):
         self.agents = [EnvAgent(d[0], d[1], d[2], d[3], d[4]) for d in data[b"agents"]]
         # setup with loaded data
         self.height, self.width = self.rail.grid.shape
+        self.rail.height = self.height
+        self.rail.width = self.width
         # self.agents = [None] * self.get_num_agents()
         self.dones = dict.fromkeys(list(range(self.get_num_agents())) + ["__all__"], False)
 
diff --git a/flatland/utils/editor.py b/flatland/utils/editor.py
index a33a0116a93b9c1aa25c9da8103925f7c5ed51ae..7e813d763d7f26a96a1e1ca1f4c3d1bceef68ee3 100644
--- a/flatland/utils/editor.py
+++ b/flatland/utils/editor.py
@@ -15,7 +15,7 @@ import os
 # from ipywidgets import IntSlider, link, VBox
 
 from flatland.envs.rail_env import RailEnv, random_rail_generator
-from flatland.envs.generators import complex_rail_generator
+from flatland.envs.generators import complex_rail_generator, empty_rail_generator
 # from flatland.core.transitions import RailEnvTransitions
 from flatland.envs.observations import TreeObsForRailEnv
 import flatland.utils.rendertools as rt
@@ -60,7 +60,7 @@ class View(object):
         self.new_env()
         self.oRT.renderEnv(spacing=False, arrows=False, sRailColor="gray", show=False)
         img = self.oRT.getImage()
-        plt.clf()
+        plt.clf()  # TODO: remove this plt.clf() call
         self.wImage = jpy_canvas.Canvas(img)
         self.yxSize = self.wImage.data.shape[:2]
         self.writableData = np.copy(self.wImage.data)  # writable copy of image - wid_img.data is somehow readonly
@@ -86,6 +86,9 @@ class View(object):
         self.wDebug_move = Checkbox(description="Debug mouse move")
         self.wDebug_move.observe(self.controller.setDebugMove, names="value")
 
+        # Checkbox for rendering observations
+        self.wShowObs = Checkbox(description="Show Agent Observations")
+
         # This is like a cell widget where loggin goes
         self.wOutput = Output()
 
@@ -95,13 +98,15 @@ class View(object):
         self.wFilename.observe(self.controller.setFilename, names="value")
 
         # Size of environment when regenerating
-        self.wSize = IntSlider(value=10, min=5, max=30, step=5, description="Regen Size")
-        self.wSize.observe(self.controller.setRegenSize, names="value")
+        self.wRegenSize = IntSlider(value=10, min=5, max=100, step=5, description="Regen Size",
+            tip="Click Regenerate after changing this")
+        self.wRegenSize.observe(self.controller.setRegenSize, names="value")
 
         # Number of Agents when regenerating
-        self.wNAgents = IntSlider(value=1, min=0, max=20, step=1, description="# Agents")
+        self.wRegenNAgents = IntSlider(value=1, min=0, max=20, step=1, description="# Agents",
+            tip="Click regenerate or reset after changing this")
 
-        self.wRegenMethod = RadioButtons(description="Regen\nMethod", options=["Random Cell", "Path-based"])
+        self.wRegenMethod = RadioButtons(description="Regen\nMethod", options=["Empty", "Random Cell", "Path-based"])
         self.wReplaceAgents = Checkbox(value=True, description="Replace Agents")
 
         self.wTab = Tab()
@@ -109,8 +114,8 @@ class View(object):
         for i, title in enumerate(tab_contents):
             self.wTab.set_title(i, title)
         self.wTab.children = [
-            VBox([self.wDebug, self.wDebug_move]),
-            VBox([self.wRegenMethod, self.wReplaceAgents])]
+            VBox([self.wDebug, self.wDebug_move, self.wShowObs]),
+            VBox([self.wRegenSize, self.wRegenNAgents, self.wRegenMethod, self.wReplaceAgents])]
 
         # Progress bar intended for stepping in the background (not yet working)
         self.wProg_steps = ipywidgets.IntProgress(value=0, min=0, max=20, step=1, description="Step")
@@ -140,8 +145,8 @@ class View(object):
         self.wVbox_controls = VBox([
             self.wFilename,  # self.wDrawMode,
             *self.lwButtons,
-            self.wSize,
-            self.wNAgents,
+            # self.wRegenSize,
+            # self.wRegenNAgents,
             self.wProg_steps,
             self.wTab])
 
@@ -161,13 +166,17 @@ class View(object):
         with self.wOutput:
             # plt.figure(figsize=(10, 10))
             self.oRT.renderEnv(spacing=False, arrows=False, sRailColor="gray",
-                               show=False, iSelectedAgent=self.model.iSelectedAgent)
+                               show=False, iSelectedAgent=self.model.iSelectedAgent,
+                               show_observations=self.show_observations())
             img = self.oRT.getImage()
             # plt.clf()
             # plt.close()
 
             self.wImage.data = img
             self.writableData = np.copy(self.wImage.data)
+
+            # the size should only be updated on regenerate at most
+            self.yxSize = self.wImage.data.shape[:2]
             return img
 
     def redisplayImage(self):
@@ -191,6 +200,13 @@ class View(object):
         else:
             print(*args, **kwargs)
 
+    def show_observations(self):
+        ''' returns whether to show observations - boolean '''
+        if self.wShowObs.value:
+            return True
+        else:
+            return False
+
 
 class Controller(object):
     """
@@ -297,17 +313,17 @@ class Controller(object):
         self.model.clear()
 
     def reset(self, event):
-        self.log("Reset - nAgents:", self.view.wNAgents.value)
+        self.log("Reset - nAgents:", self.view.wRegenNAgents.value)
         self.model.reset(replace_agents=self.view.wReplaceAgents.value,
-                         nAgents=self.view.wNAgents.value)
+                         nAgents=self.view.wRegenNAgents.value)
 
     def restartAgents(self, event):
-        self.log("Restart Agents - nAgents:", self.view.wNAgents.value)
+        self.log("Restart Agents - nAgents:", self.view.wRegenNAgents.value)
         self.model.restartAgents()
 
     def regenerate(self, event):
         method = self.view.wRegenMethod.value
-        nAgents = self.view.wNAgents.value
+        nAgents = self.view.wRegenNAgents.value
         self.model.regenerate(method, nAgents)
 
     def setRegenSize(self, event):
@@ -375,6 +391,43 @@ class EditorModel(object):
     def setDrawMode(self, sDrawMode):
         self.drawMode = sDrawMode
 
+    def interpolate_path(self, rcLast, rcCell):
+        if np.array_equal(rcLast, rcCell):
+            return []
+        rcLast = array(rcLast)
+        rcCell = array(rcCell)
+        rcDelta = rcCell - rcLast
+
+        lrcInterp = []  # extra row,col points
+
+        if np.any(np.abs(rcDelta) >= 1):
+            iDim0 = np.argmax(np.abs(rcDelta))  # the dimension with the bigger move
+            iDim1 = 1 - iDim0                   # the dim with the smaller move
+            rcRatio = rcDelta[iDim1] / rcDelta[iDim0]
+            delta0 = rcDelta[iDim0]
+            sgn0 = np.sign(delta0)
+
+            iDelta1 = 0
+
+            # count integers along the larger dimension
+            for iDelta0 in range(sgn0, delta0 + sgn0, sgn0):
+                rDelta1 = iDelta0 * rcRatio
+                
+                if np.abs(rDelta1 - iDelta1) >= 1:
+                    rcInterp = (iDelta0, iDelta1)  # fill in the "corner" for "Manhattan interpolation"
+                    lrcInterp.append(rcInterp)
+                    iDelta1 = int(rDelta1)
+                    
+                rcInterp = (iDelta0, int(rDelta1))
+                lrcInterp.append(rcInterp)
+            g2Interp = array(lrcInterp)
+            if iDim0 == 1:  # if necessary, swap c,r to make r,c
+                g2Interp = g2Interp[:, [1, 0]]
+            g2Interp += rcLast
+            # Convert the array to a list of tuples
+            lrcInterp = list(map(tuple, g2Interp))
+        return lrcInterp
+            
     def drag_path_element(self, rcCell):
         """Mouse motion event handler for drawing.
         """
@@ -384,8 +437,9 @@ class EditorModel(object):
         if len(lrcStroke) > 0:
             rcLast = lrcStroke[-1]
             if not np.array_equal(rcLast, rcCell):  # only save at transition
-                lrcStroke.append(rcCell)
-                self.debug("lrcStroke ", len(lrcStroke), rcCell)
+                lrcInterp = self.interpolate_path(rcLast, rcCell)
+                lrcStroke.extend(lrcInterp)
+                self.debug("lrcStroke ", len(lrcStroke), rcCell, "interp:", lrcInterp)
 
         else:
             # This is the first cell in a mouse stroke
@@ -545,7 +599,7 @@ class EditorModel(object):
         self.redraw()
 
     def setFilename(self, filename):
-        self.log("filename = ", filename, type(filename))
+        # self.log("filename = ", filename, type(filename))
         self.env_filename = filename
 
     def load(self):
@@ -567,7 +621,9 @@ class EditorModel(object):
     def regenerate(self, method=None, nAgents=0):
         self.log("Regenerate size", self.regen_size)
 
-        if method is None or method == "Random Cell":
+        if method is None or method == "Empty":
+            fnMethod = empty_rail_generator()
+        elif method == "Random Cell":
             fnMethod = random_rail_generator(cell_type_relative_proportion=[1] * 11)
         else:
             fnMethod = complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12)
@@ -583,6 +639,7 @@ class EditorModel(object):
         self.set_env(self.env)
         self.player = Player(self.env)
         self.view.new_env()
+        # self.view.init_canvas() # Can't do init_canvas - need to keep the same canvas widget!
         self.redraw()
 
     def setRegenSize(self, size):
diff --git a/flatland/utils/graphics_pil.py b/flatland/utils/graphics_pil.py
index 41516fd94737556a4b8abbc7ccfce0fd503a3d6e..b66c8dc55f38c321d038306f933de66493a6e6b3 100644
--- a/flatland/utils/graphics_pil.py
+++ b/flatland/utils/graphics_pil.py
@@ -18,28 +18,32 @@ class PILGL(GraphicsLayer):
         # Total grid size at native scale
         self.widthPx = self.width * self.nPixCell + self.linewidth
         self.heightPx = self.height * self.nPixCell + self.linewidth
-        self.beginFrame()
+        self.layers = []
+        self.draws = []
 
         self.tColBg = (255, 255, 255)     # white background
         # self.tColBg = (220, 120, 40)    # background color
         self.tColRail = (0, 0, 0)         # black rails
         self.tColGrid = (230,) * 3        # light grey for grid
 
-    def plot(self, gX, gY, color=None, linewidth=3, **kwargs):
-        color = self.adaptColor(color)
+        self.beginFrame()
 
-        # print(gX, gY)
+    def plot(self, gX, gY, color=None, linewidth=3, layer=0, opacity=255, **kwargs):
+        color = self.adaptColor(color)
+        if len(color) == 3:
+            color += (opacity,)
+        elif len(color) == 4:
+            color = color[:3] + (opacity,)
         gPoints = np.stack([array(gX), -array(gY)]).T * self.nPixCell
         gPoints = list(gPoints.ravel())
-        # print(gPoints, color)
-        self.draw.line(gPoints, fill=color, width=self.linewidth)
+        self.draws[layer].line(gPoints, fill=color, width=self.linewidth)
 
-    def scatter(self, gX, gY, color=None, marker="o", s=50, *args, **kwargs):
+    def scatter(self, gX, gY, color=None, marker="o", s=50, layer=0, opacity=255, *args, **kwargs):
         color = self.adaptColor(color)
         r = np.sqrt(s)
         gPoints = np.stack([np.atleast_1d(gX), -np.atleast_1d(gY)]).T * self.nPixCell
         for x, y in gPoints:
-            self.draw.rectangle([(x - r, y - r), (x + r, y + r)], fill=color, outline=color)
+            self.draws[layer].rectangle([(x - r, y - r), (x + r, y + r)], fill=color, outline=color)
 
     def text(self, *args, **kwargs):
         pass
@@ -51,8 +55,8 @@ class PILGL(GraphicsLayer):
         pass
 
     def beginFrame(self):
-        self.img = Image.new("RGBA", (self.widthPx, self.heightPx), (255, 255, 255, 255))
-        self.draw = ImageDraw.Draw(self.img)
+        self.create_layer(0)
+        self.create_layer(1)
 
     def show(self, block=False):
         pass
@@ -62,5 +66,35 @@ class PILGL(GraphicsLayer):
         pass
         # plt.pause(seconds)
 
+    def alpha_composite_layers(self):
+        img = self.layers[0]
+        for img2 in self.layers[1:]:
+            img = Image.alpha_composite(img, img2)
+        return img
+
     def getImage(self):
-        return array(self.img)
+        """ return a blended / alpha composited image composed of all the layers,
+            with layer 0 at the "back".
+        """
+        img = self.alpha_composite_layers()
+        return array(img)
+
+    def create_image(self, opacity=255):
+        img = Image.new("RGBA", (self.widthPx, self.heightPx), (255, 255, 255, opacity))
+        return img
+
+    def create_layer(self, iLayer=0):
+        if len(self.layers) <= iLayer:
+            for i in range(len(self.layers), iLayer+1):
+                if i == 0:
+                    opacity = 255  # "bottom" layer is opaque (for rails)
+                else:
+                    opacity = 0   # subsequent layers are transparent
+                img = self.create_image(opacity)
+                self.layers.append(img)
+                self.draws.append(ImageDraw.Draw(img))
+        else:
+            opacity = 0 if iLayer > 0 else 255
+            self.layers[iLayer] = img = self.create_image(opacity)
+            self.draws[iLayer] = ImageDraw.Draw(img)
+
diff --git a/flatland/utils/render_qt.py b/flatland/utils/render_qt.py
index ea9613968c473c351849509bfc3e277dd7fe0701..73b8ca77a33042bf181097d4b1a0a1afcb48b56e 100644
--- a/flatland/utils/render_qt.py
+++ b/flatland/utils/render_qt.py
@@ -11,6 +11,19 @@ from PyQt5.QtWidgets import QApplication, QMainWindow, QWidget, QGridLayout
 from PyQt5 import QtSvg
 
 
+def transform_string_svg(sSVG):
+    sSVG = sSVG.replace("ASCII", "UTF-8")
+    bySVG = bytearray(sSVG, encoding='utf-8')
+    return bySVG
+
+def create_QtSvgWidget_from_svg_string(sSVG):
+    svgWidget = QtSvg.QSvgWidget()
+    ret = svgWidget.renderer().load(transform_string_svg(sSVG))
+    if ret == False:
+        print("create_QtSvgWidget_from_svg_string : failed to parse:", sSVG)
+    return svgWidget
+
+
 class QTGL(GraphicsLayer):
     def __init__(self, width, height):
         self.cell_pixels = 60
@@ -108,7 +121,7 @@ class QTSVG(GraphicsLayer):
         self.layout = QGridLayout()
         self.layout.setSpacing(0)
         self.wMain.setLayout(self.layout)
-        self.wWinMain.resize(1000, 1000)
+        self.wWinMain.resize(600, 600)
         self.wWinMain.show()
         self.wWinMain.setFocus()
 
@@ -129,13 +142,7 @@ class QTSVG(GraphicsLayer):
         if False:
             for binTrans in self.track.dSvg.keys():
                 sSVG = self.track.dSvg[binTrans].to_string()
-
-                bySVG = bytearray(sSVG, encoding='utf-8')
-
-                svgWidget = QtSvg.QSvgWidget()
-                svgWidget.renderer().load(bySVG)
-                print(iRow, iCol)
-                self.layout.addWidget(svgWidget, iRow, iCol)
+                self.layout.addWidget(create_QtSvgWidget_from_svg_string(sSVG), iRow, iCol)
 
                 iArt += 1
                 iRow = int(iArt / nCols)
@@ -170,9 +177,7 @@ class QTSVG(GraphicsLayer):
     def setRailAt(self, row, col, binTrans):
         if binTrans in self.track.dSvg:
             sSVG = self.track.dSvg[binTrans].to_string()
-            bySVG = bytearray(sSVG, encoding='utf-8')
-            svgWidget = QtSvg.QSvgWidget()
-            svgWidget.renderer().load(bySVG)
+            svgWidget = create_QtSvgWidget_from_svg_string(sSVG)
             self.layout.addWidget(svgWidget, row, col)
             self.lwTrack.append(svgWidget)
         else:
@@ -199,8 +204,7 @@ class QTSVG(GraphicsLayer):
                     agentPrev.direction = iDirOut
                     agentPrev.old_direction = iDirIn
                     sSVG = self.zug.getSvg(iAgent, iDirIn, iDirOut, color=color).to_string()
-                    bySVG = bytearray(sSVG, encoding='utf-8')
-                    wAgent.renderer().load(bySVG)
+                    wAgent.renderer().load(transform_string_svg(sSVG))
                     return
 
         # Ensure we have adequate slots in the list lwAgents
@@ -210,13 +214,10 @@ class QTSVG(GraphicsLayer):
 
         # Create a new widget for the agent
         sSVG = self.zug.getSvg(iAgent, iDirIn, iDirOut, color=color).to_string()
-        bySVG = bytearray(sSVG, encoding='utf-8')
-        svgWidget = QtSvg.QSvgWidget()
-        svgWidget.renderer().load(bySVG)
+        svgWidget = create_QtSvgWidget_from_svg_string(sSVG)
         self.lwAgents[iAgent] = svgWidget
         self.agents_prev[iAgent] = EnvAgent((row, col), iDirOut, (0, 0), old_direction=iDirIn)
         self.layout.addWidget(svgWidget, row, col)
-        # print("Created ", iAgent, row, col)
 
     def show(self, block=False):
         self.wMain.update()
diff --git a/flatland/utils/rendertools.py b/flatland/utils/rendertools.py
index 34f3e9fa6857e86f4d99d211784d983a2e2a1e75..b96531c6f2162553d13ba5b4dc7415626266e11f 100644
--- a/flatland/utils/rendertools.py
+++ b/flatland/utils/rendertools.py
@@ -1,26 +1,29 @@
-from recordtype import recordtype
+import time
+from collections import deque
 
-import numpy as np
-from numpy import array
 # import xarray as xr
 import matplotlib.pyplot as plt
-import time
-from collections import deque
-from flatland.utils.render_qt import QTGL, QTSVG
-from flatland.utils.graphics_pil import PILGL
+import numpy as np
+from numpy import array
+from recordtype import recordtype
+
 from flatland.utils.graphics_layer import GraphicsLayer
+from flatland.utils.graphics_pil import PILGL
+from flatland.utils.render_qt import QTGL, QTSVG
 
 
 # TODO: suggested renaming to RailEnvRenderTool, as it will only work with RailEnv!
 
 
 class MPLGL(GraphicsLayer):
-    def __init__(self, width, height):
+    def __init__(self, width, height, show=False):
         self.width = width
         self.height = height
         self.yxBase = array([6, 21])  # pixel offset
         self.nPixCell = 700 / width
         self.img = None
+        if show:
+            plt.figure(figsize=(10, 10))
 
     def plot(self, *args, **kwargs):
         plt.plot(*args, **kwargs)
@@ -70,6 +73,7 @@ class MPLGL(GraphicsLayer):
     def beginFrame(self):
         self.img = None
         plt.figure(figsize=(10, 10))
+        plt.clf()
         pass
 
     def endFrame(self):
@@ -115,7 +119,7 @@ class RenderTool(object):
     gTheta = np.linspace(0, np.pi / 2, 5)
     gArc = array([np.cos(gTheta), np.sin(gTheta)]).T  # from [1,0] to [0,1]
 
-    def __init__(self, env, gl="MPL"):
+    def __init__(self, env, gl="MPL", show=False):
         self.env = env
         self.iFrame = 0
         self.time1 = time.time()
@@ -123,7 +127,7 @@ class RenderTool(object):
         # self.gl = MPLGL()
 
         if gl == "MPL":
-            self.gl = MPLGL(env.width, env.height)
+            self.gl = MPLGL(env.width, env.height, show=show)
         elif gl == "QT":
             self.gl = QTGL(env.width, env.height)
         elif gl == "PIL":
@@ -219,17 +223,19 @@ class RenderTool(object):
         if static:
             color = self.gl.adaptColor(color, lighten=True)
 
+        color = color
+
         # print("Agent:", rcPos, iDir, rcDir, xyDir, xyPos)
-        self.gl.scatter(*xyPos, color=color, marker="o", s=100)  # agent location
+        self.gl.scatter(*xyPos, color=color, layer=1, marker="o", s=100)  # agent location
         xyDirLine = array([xyPos, xyPos + xyDir / 2]).T  # line for agent orient.
-        self.gl.plot(*xyDirLine, color=color, lw=5, ms=0, alpha=0.6)
+        self.gl.plot(*xyDirLine, color=color, layer=1, lw=5, ms=0, alpha=0.6)
         if selected:
             self._draw_square(xyPos, 1, color)
 
         if target is not None:
             rcTarget = array(target)
             xyTarget = np.matmul(rcTarget, rt.grc2xy) + rt.xyHalf
-            self._draw_square(xyTarget, 1 / 3, color)
+            self._draw_square(xyTarget, 1 / 3, color, layer=1)
 
     def plotTrans(self, rcPos, gTransRCAg, color="r", depth=None):
         """
@@ -397,13 +403,19 @@ class RenderTool(object):
                 visit = visit.prev
                 xyPrev = xy
 
-    def drawTrans2(
-        self,
-            xyLine, xyCentre,
-            rotation, bDeadEnd=False,
-            sColor="gray",
-            bArrow=True,
-            spacing=0.1):
+    def drawTrans(self, oFrom, oTo, sColor="gray"):
+        self.gl.plot(
+            [oFrom[0], oTo[0]],  # x
+            [oFrom[1], oTo[1]],  # y
+            color=sColor
+        )
+
+    def drawTrans2(self,
+                   xyLine, xyCentre,
+                   rotation, bDeadEnd=False,
+                   sColor="gray",
+                   bArrow=True,
+                   spacing=0.1):
         """
         gLine is a numpy 2d array of points,
         in the plotting space / coords.
@@ -474,8 +486,8 @@ class RenderTool(object):
 
     def renderObs(self, agent_handles, observation_dict):
         """
-        Render the extent of the observation of each agent. All cells that appear in the agent obsrevation will be
-        highlighted.
+        Render the extent of the observation of each agent. All cells that appear in the agent
+        observation will be highlighted.
         :param agent_handles: List of agent indices to adapt color and get correct observation
         :param observation_dict: dictionary containing sets of cells of the agent observation
 
@@ -489,47 +501,13 @@ class RenderTool(object):
             for visited_cell in observation_dict[agent]:
                 cell_coord = array(visited_cell[:2])
                 cell_coord_trans = np.matmul(cell_coord, rt.grc2xy) + rt.xyHalf
-                self._draw_square(cell_coord_trans, 1 / 3, color)
-
-    def renderEnv(
-        self, show=False, curves=True, spacing=False,
-            arrows=False, agents=True, obsrender=True, sRailColor="gray", frames=False, iEpisode=None, iStep=None,
-            iSelectedAgent=None, action_dict=None):
-        """
-        Draw the environment using matplotlib.
-        Draw into the figure if provided.
-
-        Call pyplot.show() if show==True.
-        (Use show=False from a Jupyter notebook with %matplotlib inline)
-        """
+                self._draw_square(cell_coord_trans, 1 / (agent + 1.1), color, layer=1, opacity=100)
 
-        if not self.gl.is_raster():
-            self.renderEnv2(show, curves, spacing,
-                            arrows, agents, sRailColor,
-                            frames, iEpisode, iStep,
-                            iSelectedAgent, action_dict)
-            return
-
-        # cell_size is a bit pointless with matplotlib - it does not relate to pixels,
-        # so for now I've changed it to 1 (from 10)
-        cell_size = 1
-        self.gl.beginFrame()
-
-        # self.gl.clf()
-        # if oFigure is None:
-        #    oFigure = self.gl.figure()
-
-        def drawTrans(oFrom, oTo, sColor="gray"):
-            self.gl.plot(
-                [oFrom[0], oTo[0]],  # x
-                [oFrom[1], oTo[1]],  # y
-                color=sColor
-            )
+    def renderRail(self, spacing=False, sRailColor="gray", curves=True, arrows=False):
 
+        cell_size = 1  # TODO: remove cell_size
         env = self.env
 
-        # t1 = time.time()
-
         # Draw cells grid
         grid_color = [0.95, 0.95, 0.95]
         for r in range(env.height + 1):
@@ -613,7 +591,7 @@ class RenderTool(object):
                                         rotation, spacing=spacing, bArrow=arrows,
                                         sColor=sRailColor)
                                 else:
-                                    drawTrans(from_xy, to_xy, sRailColor)
+                                    self.drawTrans(self, from_xy, to_xy, sRailColor)
 
                             if False:
                                 print(
@@ -626,10 +604,46 @@ class RenderTool(object):
                                     "rot:", rotation,
                                 )
 
+    def renderEnv(self, show=False, curves=True, spacing=False,
+                  arrows=False, agents=True, renderobs=True, show_observations=True, sRailColor="gray", frames=False,
+                  iEpisode=None, iStep=None,
+                  iSelectedAgent=None, action_dict=None):
+        """
+        Draw the environment using matplotlib.
+        Draw into the figure if provided.
+
+        Call pyplot.show() if show==True.
+        (Use show=False from a Jupyter notebook with %matplotlib inline)
+        """
+
+        if not self.gl.is_raster():
+            self.renderEnv2(show, curves, spacing,
+                            arrows, agents, renderobs,sRailColor,
+                            frames, iEpisode, iStep,
+                            iSelectedAgent, action_dict)
+
+            return
+
+        if type(self.gl) in (QTGL, PILGL):
+            self.gl.beginFrame()
+
+        if type(self.gl) is MPLGL:
+            # self.gl.clf()
+            self.gl.beginFrame()
+            pass
+
+        # self.gl.clf()
+        # if oFigure is None:
+        #    oFigure = self.gl.figure()
+
+        env = self.env
+
+        self.renderRail()
+
         # Draw each agent + its orientation + its target
         if agents:
             self.plotAgents(targets=True, iSelectedAgent=iSelectedAgent)
-        if obsrender:
+        if show_observations:
             self.renderObs(range(env.get_num_agents()), env.dev_obs_dict)
         # Draw some textual information like fps
         yText = [-0.3, -0.6, -0.9]
@@ -657,23 +671,26 @@ class RenderTool(object):
         # TODO: for MPL, we don't want to call clf (called by endframe)
         # for QT, we need to call endFrame()
         # if not show:
-        self.gl.endFrame()
+        if type(self.gl) is QTGL:
+            self.gl.endFrame()
+            if show:
+                self.gl.show(block=False)
 
-        # t2 = time.time()
-        # print(t2 - t1, "seconds")
+        if type(self.gl) is MPLGL:
+            if show:
+                self.gl.show(block=False)
+            # self.gl.endFrame()
 
-        if show:
-            self.gl.show(block=False)
-            self.gl.pause(0.00001)
+        self.gl.pause(0.00001)
 
         return
 
-    def _draw_square(self, center, size, color):
+    def _draw_square(self, center, size, color, opacity=255, layer=0):
         x0 = center[0] - size / 2
         x1 = center[0] + size / 2
         y0 = center[1] - size / 2
         y1 = center[1] + size / 2
-        self.gl.plot([x0, x1, x1, x0, x0], [y0, y0, y1, y1, y0], color=color)
+        self.gl.plot([x0, x1, x1, x0, x0], [y0, y0, y1, y1, y0], color=color, layer=layer, opacity=opacity)
 
     def getImage(self):
         return self.gl.getImage()
@@ -707,10 +724,9 @@ class RenderTool(object):
 
             gP0 = array([gX1, gY1, gZ1])
 
-    def renderEnv2(
-        self, show=False, curves=True, spacing=False, arrows=False, agents=True, renderobs=True, sRailColor="gray",
-            frames=False, iEpisode=None, iStep=None, iSelectedAgent=None,
-            action_dict=dict()):
+    def renderEnv2(self, show=False, curves=True, spacing=False, arrows=False, agents=True, renderobs=True,
+                   sRailColor="gray", frames=False, iEpisode=None, iStep=None, iSelectedAgent=None,
+                   action_dict=dict()):
         """
         Draw the environment using matplotlib.
         Draw into the figure if provided.
@@ -749,9 +765,9 @@ class RenderTool(object):
             if action_isValid:
                 self.gl.setAgentAt(iAgent, *agent.position, agent.direction, new_direction, color=oColor)
             else:
-                pass
-                # print("invalid action - agent ", iAgent, " bend ", agent.direction, new_direction)
-                # self.gl.setAgentAt(iAgent, *agent.position, agent.direction, new_direction)
+                #pass
+                print("invalid action - agent ", iAgent, " bend ", agent.direction, new_direction)
+                self.gl.setAgentAt(iAgent, *agent.position, agent.direction, new_direction)
 
         self.gl.show()
         for i in range(3):
diff --git a/flatland/utils/svg.py b/flatland/utils/svg.py
index 32d5631839f43964cf5ff7d94520aacf67db0488..fb8b987cae83f4b8a3696e0e9496659045709c17 100644
--- a/flatland/utils/svg.py
+++ b/flatland/utils/svg.py
@@ -80,6 +80,7 @@ class Zug(object):
         self.svg_curve2 = SVG("svg/Zug_2_Weiche_#0091ea.svg")
 
     def getSvg(self, iAgent, iDirIn, iDirOut, color=None):
+
         delta_dir = (iDirOut - iDirIn) % 4
         # if delta_dir != 0:
         #    print("Bend:", iAgent, iDirIn, iDirOut)
@@ -105,7 +106,7 @@ class Zug(object):
 class Track(object):
     def __init__(self):
         dFiles = {
-            "": "Background_#91D1DD.svg",
+            "": "Background_#9CCB89.svg",
             "WE": "Gleis_Deadend.svg",
             "WW EE NN SS": "Gleis_Diamond_Crossing.svg",
             "WW EE": "Gleis_horizontal.svg",
@@ -132,7 +133,7 @@ class Track(object):
 
         lDirs = list("NESW")
 
-        svgBG = SVG("./svg/Background_#91D1DD.svg")
+        svgBG = SVG("./svg/Background_#9CCB89.svg")
 
         for sTrans, sFile in dFiles.items():
             svg = SVG("./svg/" + sFile)
diff --git a/images/basic-env.npz b/images/basic-env.npz
index 356da5d70146b3b8081dd99c0fe5e6bd70646e53..8ffaf023e1116b0c92702212ddb04c71b82f0655 100644
Binary files a/images/basic-env.npz and b/images/basic-env.npz differ
diff --git a/make_docs.py b/make_docs.py
index 7ccbdb736b9b53743b58cfd985538705c5e79f08..8cc1124a6fe624fe5afff416450a0a5d30d654ca 100644
--- a/make_docs.py
+++ b/make_docs.py
@@ -25,5 +25,6 @@ os.environ["SPHINXPROJ"] = "flatland"
 os.chdir('docs')
 subprocess.call(['python', '-msphinx', '-M', 'clean', '.', '_build'])
 subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build'])
+subprocess.call(['python', '-mpydeps', '../flatland', '-o', '_build/html/flatland.svg'])
 
 browser('_build/html/index.html')
diff --git a/notebooks/Editor2.ipynb b/notebooks/Editor2.ipynb
index f2481d086d67376ec6018b758e1a88edd4222183..5dcfd5595bbc7d15e876397e01f9c369abb91c48 100644
--- a/notebooks/Editor2.ipynb
+++ b/notebooks/Editor2.ipynb
@@ -9,9 +9,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2"
@@ -19,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -32,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -54,24 +63,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 28,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cpu\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from flatland.utils.editor import EditorMVC, EditorModel, View, Controller"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -93,19 +94,19 @@
    "metadata": {},
    "source": [
     "## Instructions\n",
-    "- Drag to draw\n",
-    "  - improved dead-ends\n",
-    "- Shift-Drag to erase rails\n",
-    "  - erasing dead ends not yet automated - drag right across them\n",
+    "- Drag to draw (improved dead-ends)\n",
+    "- Shift-Drag to erase rails (erasing dead ends not yet automated - drag right across them)\n",
     "- ctrl-click to add agent\n",
     "  - direction chosen randomly to fit rail\n",
     "- ctrl-shift-click to add target for last agent\n",
-    "  - target can be moved by repeating "
+    "  - target can be moved by repeating\n",
+    "- to Resize the env (cannot preserve work):\n",
+    "  - select \"Regen\" tab, set regen size slider, click regenerate."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 30,
    "metadata": {
     "scrolled": false
    },
@@ -113,7 +114,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6b1f996bbb834fcc962c80041465ac2d",
+       "model_id": "47af532101994c36a053e16a9b31dcd6",
        "version_major": 2,
        "version_minor": 0
       },
@@ -131,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 31,
    "metadata": {
     "scrolled": false
    },
@@ -139,7 +140,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bf8e0dd6aa564b42a5ec3aa19c31a679",
+       "model_id": "949dc7440647445e82dd1ca0f250e5ca",
        "version_major": 2,
        "version_minor": 0
       },
@@ -155,6 +156,26 @@
     "mvc.view.wOutput.clear_output()\n",
     "mvc.view.wOutput"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0, 0)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(mvc.editor.env.agents), len(mvc.editor.env.agents_static)"
+   ]
   }
  ],
  "metadata": {
@@ -174,7 +195,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.8"
   },
   "latex_envs": {
    "LaTeX_envs_menu_present": true,
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 08fcc9d17a409f4df71b0b67e037dde5ae042f8a..67b2a0e632283c89d07d778f935f0328559ffb8a 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -2,13 +2,16 @@ bumpversion==0.5.3
 wheel==0.32.1
 watchdog==0.9.0
 flake8==3.5.0
+pydeps==1.7.2
 tox==3.5.2
 coverage==4.5.1
 Sphinx==1.8.1
 twine==1.12.1
+benchmarker==4.0.1
 
 pytest==3.8.2
 pytest-runner==4.2
+pytest-xvfb==1.2.0
 sphinx-rtd-theme==0.4.3
 
 numpy==1.16.2
diff --git a/tests/test_player.py b/tests/test_player.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b2745f2e372ca80cd2fb5cf9dcaa3db96fb910a
--- /dev/null
+++ b/tests/test_player.py
@@ -0,0 +1,8 @@
+
+# from examples.play_model import main
+from examples.tkplay import tkmain
+
+
+def test_main():
+    tkmain(n_trials=2)
+
diff --git a/tests/test_rendertools.py b/tests/test_rendertools.py
index 245f2f327524653b3cf03bf921f6db6b0d4b51fb..8204a305328df746a772d034f3c763c848cceb93 100644
--- a/tests/test_rendertools.py
+++ b/tests/test_rendertools.py
@@ -4,18 +4,14 @@
 Tests for `flatland` package.
 """
 
-from flatland.envs.rail_env import RailEnv, random_rail_generator
-import numpy as np
-#<<<<<<< HEAD
-#=======
-# import os
-#>>>>>>> dc2fa1ee0244b15c76d89ab768c5e1bbd2716147
 import sys
 
 import matplotlib.pyplot as plt
+import numpy as np
 
 import flatland.utils.rendertools as rt
 from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.rail_env import RailEnv, random_rail_generator
 
 
 def checkFrozenImage(oRT, sFileImage, resave=False):
@@ -50,8 +46,8 @@ def test_render_env(save_new_images=False):
                    )
     sfTestEnv = "env-data/tests/test1.npy"
     oEnv.rail.load_transition_map(sfTestEnv)
-    oRT = rt.RenderTool(oEnv)
-    oRT.renderEnv()
+    oRT = rt.RenderTool(oEnv, gl="PIL", show=False)
+    oRT.renderEnv(show=False)
 
     checkFrozenImage(oRT, "basic-env.npz", resave=save_new_images)
 
diff --git a/tox.ini b/tox.ini
index 1c9a170724b2ed111eac11061ba598eea52a571c..cb8d7db144975245c5f087ecd1dfe473baaf183f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py36, py37, flake8, docs, coverage
+envlist = py36, py37, flake8, docs, coverage, benchmark, sh
 
 [travis]
 python =
@@ -8,29 +8,45 @@ python =
 
 [flake8]
 max-line-length = 120
-ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W505 
+ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W505
 
 [testenv:flake8]
 basepython = python
 deps = flake8
-commands = flake8 flatland
+passenv = DISPLAY
+commands = flake8 flatland tests examples benchmarks
 
 [testenv:docs]
 basepython = python
 whitelist_externals = make
+passenv =
+    DISPLAY
 commands = make docs
 
 [testenv:coverage]
 basepython = python
 whitelist_externals = make
-commands = 
+passenv = DISPLAY
+commands =
     pip install -U pip
     pip install -r requirements_dev.txt
     make coverage
 
+[testenv:benchmark]
+basepython = python
+setenv =
+    PYTHONPATH = {toxinidir}
+passenv = DISPLAY
+whitelist_externals = sh
+commands =
+    sh -c 'ls benchmarks/*.py  | xargs -n 1 python'
+
 [testenv]
+whitelist_externals = sh
+                      pip
 setenv =
     PYTHONPATH = {toxinidir}
+passenv = DISPLAY
 deps =
     -r{toxinidir}/requirements_dev.txt
 ; If you want to make tox run the tests with the same versions, create a
@@ -39,6 +55,7 @@ deps =
 commands =
     pip install -U pip
     pip install -r requirements_dev.txt
+    sh -c 'echo DISPLAY: $DISPLAY'
     py.test --basetemp={envtmpdir}