diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1d212e6d5a689051d40539f0db7fb3dc0ca6841a..698ceca148d712874f7d3c59ff29d029e7dfeb33 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -55,7 +55,7 @@ build_and_deploy_docs:
         - echo "Bucket=${BUCKET_NAME}"
         - echo "AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}"
         - echo "CI_COMMIT_REF_SLUG=${CI_COMMIT_REF_SLUG}"
-        - xvfb-run tox -v -e docs
+        - xvfb-run tox -v -e docs --recreate
         - aws s3 cp ./docs/_build/html/ s3://${BUCKET_NAME} --recursive
     environment:
         name: ${CI_COMMIT_REF_SLUG}
diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000000000000000000000000000000000000..272f8db9e2713c06d5489f17031ae2922409402b
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,27 @@
+Credits
+=======
+
+Development
+-----------
+
+* Christian Baumberger <christian.baumberger@sbb.ch>
+* Christian Eichenberger <christian.markus.eichenberger@sbb.ch>
+* Adrian Egli <adrian.egli@sbb.ch>
+* Mattias LjungstrÃ¶m
+* Sharada Mohanty <mohanty@aicrowd.com>
+* Guillaume Mollard <guillaume.mollard2@gmail.com>
+* Erik Nygren <erik.nygren@sbb.ch>
+* Giacomo Spigler <giacomo.spigler@gmail.com>
+* Jeremy Watson
+
+
+Acknowledgements
+----------------
+* Vaibhav Agrawal <theinfamouswayne@gmail.com>
+* Anurag Ghosh
+
+
+Contributors
+------------
+
+None yet. Why not be the first?
diff --git a/AUTHORS.rst b/AUTHORS.rst
deleted file mode 100644
index f7ab2e089c2315142abbdbe52cfe17492218d341..0000000000000000000000000000000000000000
--- a/AUTHORS.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-=======
-Credits
-=======
-
-Development
-----------------
-
-* S.P. Mohanty <mohanty@aicrowd.com>
-
-* G Spigler <giacomo.spigler@gmail.com>
-
-* A Egli <adrian.egli@sbb.ch>
-
-* E Nygren <erik.nygren@sbb.ch>
-
-* Ch. Eichenberger <christian.markus.eichenberger@sbb.ch>
-
-* Mattias LjungstrÃ¶m
-
-Contributors
-------------
-
-None yet. Why not be the first?
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index d015f7a6a5096b2fd55f7d4dc3ab1b6a1ff91426..b219cfeeb6bf7b634107946adacec7d335cbfe1b 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -135,3 +135,70 @@ $ git push
 $ git push --tags
 
 TODO: Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
+
+
+Local Evaluation
+----------------
+
+This document explains you how to locally evaluate your submissions before making
+an official submission to the competition.
+
+Requirements
+~~~~~~~~~~~~
+
+* **flatland-rl** : We expect that you have `flatland-rl` installed by following the instructions in  [README.md](README.md).
+
+* **redis** : Additionally you will also need to have  `redis installed <https://redis.io/topics/quickstart>`_ and **should have it running in the background.**
+
+Test Data
+~~~~~~~~~
+
+* **test env data** : You can `download and untar the test-env-data <https://www.aicrowd.com/challenges/flatland-challenge/dataset_files>`, at a location of your choice, lets say `/path/to/test-env-data/`. After untarring the folder, the folder structure should look something like:
+
+
+.. code-block:: console
+
+    .
+    â””â”€â”€ test-env-data
+        â”œâ”€â”€ Test_0
+        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
+        â”‚Â Â  â””â”€â”€ Level_1.pkl
+        â”œâ”€â”€ Test_1
+        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
+        â”‚Â Â  â””â”€â”€ Level_1.pkl
+        â”œ..................
+        â”œ..................
+        â”œâ”€â”€ Test_8
+        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
+        â”‚Â Â  â””â”€â”€ Level_1.pkl
+        â””â”€â”€ Test_9
+            â”œâ”€â”€ Level_0.pkl
+            â””â”€â”€ Level_1.pkl
+
+Evaluation Service
+~~~~~~~~~~~~~~~~~~
+
+* **start evaluation service** : Then you can start the evaluator by running :
+
+.. code-block:: console
+
+    flatland-evaluator --tests /path/to/test-env-data/
+
+RemoteClient
+~~~~~~~~~~~~
+
+* **run client** : Some `sample submission code can be found in the starter-kit <https://github.com/AIcrowd/flatland-challenge-starter-kit/>`_, but before you can run your code locally using `FlatlandRemoteClient`, you will have to set the `AICROWD_TESTS_FOLDER` environment variable to the location where you previous untarred the folder with `the test-env-data`:
+
+
+.. code-block:: console
+
+    export AICROWD_TESTS_FOLDER="/path/to/test-env-data/"
+
+    # or on Windows :
+    #
+    # set AICROWD_TESTS_FOLDER "\path\to\test-env-data\"
+
+    # and then finally run your code
+    python run.py
+
+
diff --git a/MANIFEST.in b/MANIFEST.in
index ca50ea340f7f443d230f1f473a34331525fdbef1..6669a47ef184b5f70befc0a34088ee388435ba82 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,8 +1,9 @@
-include AUTHORS.rst
+include AUTHORS.md
 include CONTRIBUTING.rst
-include HISTORY.rst
+include changelog.md
 include LICENSE
-include README.rst
+include README.md
+
 include requirements_dev.txt
 include requirements_continuous_integration.txt
 
@@ -16,4 +17,4 @@ recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 
-recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
+recursive-include docs *.rst *.md conf.py *.jpg *.png *.gif
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..14102defb5f7508d9ceb04e3cd0f1a441c4ca172
--- /dev/null
+++ b/README.md
@@ -0,0 +1,153 @@
+Flatland
+========
+
+![Test Running](https://gitlab.aicrowd.com/flatland/flatland/badges/master/pipeline.svg)![Test Coverage](https://gitlab.aicrowd.com/flatland/flatland/badges/master/coverage.svg "asdff")
+
+
+![Flatland](https://i.imgur.com/0rnbSLY.gif)
+
+## About Flatland
+
+Flatland is a opensource toolkit for developing and comparing Multi Agent Reinforcement Learning algorithms in little (or ridiculously large !) gridworlds.
+
+The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the [official docs](http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/).
+
+This library was developed by [SBB](<https://www.sbb.ch/en/>), [AIcrowd](https://www.aicrowd.com/) and numerous contributors and AIcrowd research fellows from the AIcrowd community. 
+
+This library was developed specifically for the [Flatland Challenge](https://www.aicrowd.com/challenges/flatland-challenge) in which we strongly encourage you to take part in. 
+
+**NOTE This document is best viewed in the official documentation site at** [Flatland-RL Docs](http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/)
+
+
+## Installation
+### Installation Prerequistes
+
+* Install [Anaconda](https://www.anaconda.com/distribution/) by following the instructions [here](https://www.anaconda.com/distribution/).
+* Create a new conda environment:
+
+```console
+$ conda create python=3.6 --name flatland-rl
+$ conda activate flatland-rl
+```
+
+* Install the necessary dependencies
+
+```console
+$ conda install -c conda-forge cairosvg pycairo
+$ conda install -c anaconda tk  
+```
+
+### Install Flatland
+#### Stable Release
+
+To install flatland, run this command in your terminal:
+
+```console
+$ pip install flatland-rl
+```
+
+This is the preferred method to install flatland, as it will always install the most recent stable release.
+
+If you don't have `pip`_ installed, this `Python installation guide`_ can guide
+you through the process.
+
+.. _pip: https://pip.pypa.io
+.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
+
+
+#### From sources
+
+The sources for flatland can be downloaded from [gitlab](https://gitlab.aicrowd.com/flatland/flatland)
+
+You can clone the public repository:
+```console
+$ git clone git@gitlab.aicrowd.com:flatland/flatland.git
+```
+
+Once you have a copy of the source, you can install it with:
+
+```console
+$ python setup.py install
+```
+
+### Test installation
+
+Test that the installation works
+
+```console
+$ flatland-demo
+```
+
+
+
+### Jupyter Canvas Widget
+If you work with jupyter notebook you need to install the Jupyer Canvas Widget. To install the Jupyter Canvas Widget read also
+https://github.com/Who8MyLunch/Jupyter_Canvas_Widget#installation
+
+## Basic Usage
+
+Basic usage of the RailEnv environment used by the Flatland Challenge
+
+
+```python
+import numpy as np
+import time
+from flatland.envs.rail_generators import complex_rail_generator
+from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+NUMBER_OF_AGENTS = 10
+env = RailEnv(
+            width=20,
+            height=20,
+            rail_generator=complex_rail_generator(
+                                    nr_start_goal=10,
+                                    nr_extra=1,
+                                    min_dist=8,
+                                    max_dist=99999,
+                                    seed=0),
+            schedule_generator=complex_schedule_generator(),
+            number_of_agents=NUMBER_OF_AGENTS)
+
+env_renderer = RenderTool(env)
+
+def my_controller():
+    """
+    You are supposed to write this controller
+    """
+    _action = {}
+    for _idx in range(NUMBER_OF_AGENTS):
+        _action[_idx] = np.random.randint(0, 5)
+    return _action
+
+for step in range(100):
+
+    _action = my_controller()
+    obs, all_rewards, done, _ = env.step(_action)
+    print("Rewards: {}, [done={}]".format( all_rewards, done))
+    env_renderer.render_env(show=True, frames=False, show_observations=False)
+    time.sleep(0.3)
+```
+
+and **ideally** you should see something along the lines of
+
+![Flatland](https://i.imgur.com/VrTQVeM.gif)
+
+Best of Luck !!
+
+## Communication
+* [Official Documentation](http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/)
+* [Discussion Forum](https://discourse.aicrowd.com/c/flatland-challenge)
+* [Issue Tracker](https://gitlab.aicrowd.com/flatland/flatland/issues/)
+
+
+## Contributions
+Please follow the [Contribution Guidelines](http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/contributing.html) for more details on how you can successfully contribute to the project. We enthusiastically look forward to your contributions.
+
+## Partners
+<a href="https://sbb.ch" target="_blank"><img src="https://i.imgur.com/OSCXtde.png" alt="SBB"/></a>
+<a href="https://www.aicrowd.com"  target="_blank"><img src="https://avatars1.githubusercontent.com/u/44522764?s=200&v=4" alt="AICROWD"/></a>
+
+
+
diff --git a/README.rst b/README.rst
deleted file mode 100644
index e0376fe7b201ff18ce88c3d69187c011df86debf..0000000000000000000000000000000000000000
--- a/README.rst
+++ /dev/null
@@ -1,147 +0,0 @@
-========
-Flatland
-========
-
-
-
-.. image:: https://gitlab.aicrowd.com/flatland/flatland/badges/master/pipeline.svg
-     :target: https://gitlab.aicrowd.com/flatland/flatland/pipelines
-     :alt: Test Running
-
-.. image:: https://gitlab.aicrowd.com/flatland/flatland/badges/master/coverage.svg
-     :target: https://gitlab.aicrowd.com/flatland/flatland/pipelines
-     :alt: Test Coverage
-
-'
-
-.. image:: https://i.imgur.com/0rnbSLY.gif
-  :width: 800
-  :align: center
-
-Flatland is a opensource toolkit for developing and comparing Multi Agent Reinforcement Learning algorithms in little (or ridiculously large !) gridworlds.
-The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the `official docs <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/>`_.
-
-This library was developed by `SBB <https://www.sbb.ch/en/>`_ , `AIcrowd <https://www.aicrowd.com/>`_ and numerous contributors and AIcrowd research fellows from the AIcrowd community.
-
-This library was developed specifically for the `Flatland Challenge <https://www.aicrowd.com/challenges/flatland-challenge>`_ in which we strongly encourage you to take part in.
-
-
-**NOTE This document is best viewed in the official documentation site at** `Flatland-RL Docs <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/readme.html>`_
-
-Contents
-===========
-* `Official Documentation <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/readme.html>`_
-* `About Flatland <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/about_flatland.html>`_
-* `Installation <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/installation.html>`_
-* `Getting Started <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/gettingstarted.html>`_
-* `Frequently Asked Questions <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/FAQ.html>`_
-* `Code Docs <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/modules.html>`_
-* `Contributing Guidelines <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/contributing.html>`_
-* `Discussion Forum <https://discourse.aicrowd.com/c/flatland-challenge>`_
-* `Issue Tracker <https://gitlab.aicrowd.com/flatland/flatland/issues/>`_
-
-Quick Start
-===========
-
-* Install `Anaconda <https://www.anaconda.com/distribution/>`_ by following the instructions `here <https://www.anaconda.com/distribution/>`_
-* Install the dependencies and the library
-
-.. code-block:: console
-
-    $ conda create python=3.6 --name flatland-rl
-    $ conda activate flatland-rl
-    $ conda install -c conda-forge cairosvg pycairo
-    $ conda install -c anaconda tk
-    $ pip install flatland-rl
-
-* Test that the installation works
-
-.. code-block:: console
-
-    $ flatland-demo
-
-
-Basic Usage
-============
-
-Basic usage of the RailEnv environment used by the Flatland Challenge
-
-.. code-block:: python
-
-    import numpy as np
-    import time
-    from flatland.envs.rail_generators import complex_rail_generator
-    from flatland.envs.schedule_generators import complex_schedule_generator
-    from flatland.envs.rail_env import RailEnv
-    from flatland.utils.rendertools import RenderTool
-
-    NUMBER_OF_AGENTS = 10
-    env = RailEnv(
-                width=20,
-                height=20,
-                rail_generator=complex_rail_generator(
-                                        nr_start_goal=10,
-                                        nr_extra=1,
-                                        min_dist=8,
-                                        max_dist=99999,
-                                        seed=0),
-                schedule_generator=complex_schedule_generator(),
-                number_of_agents=NUMBER_OF_AGENTS)
-
-    env_renderer = RenderTool(env)
-
-    def my_controller():
-        """
-        You are supposed to write this controller
-        """
-        _action = {}
-        for _idx in range(NUMBER_OF_AGENTS):
-            _action[_idx] = np.random.randint(0, 5)
-        return _action
-
-    for step in range(100):
-
-        _action = my_controller()
-        obs, all_rewards, done, _ = env.step(_action)
-        print("Rewards: {}, [done={}]".format( all_rewards, done))
-        env_renderer.render_env(show=True, frames=False, show_observations=False)
-        time.sleep(0.3)
-
-and **ideally** you should see something along the lines of
-
-.. image:: https://i.imgur.com/VrTQVeM.gif
-  :align: center
-  :width: 600px
-
-Best of Luck !!
-
-Contributions
-=============
-Flatland is an opensource project, and we very much value all and any contributions you make towards the project.
-Please follow the `Contribution Guidelines <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/contributing.html>`_ for more details on how you can successfully contribute to the project. We enthusiastically look forward to your contributions.
-
-Partners
-============
-.. image:: https://i.imgur.com/OSCXtde.png
-   :target: https://sbb.ch
-.. image:: https://avatars1.githubusercontent.com/u/44522764?s=200&v=4
-   :target: https://www.aicrowd.com
-
-
-Authors
-============
-
-* Christian Eichenberger <christian.markus.eichenberger@sbb.ch>
-* Adrian Egli <adrian.egli@sbb.ch>
-* Mattias LjungstrÃ¶m
-* Sharada Mohanty <mohanty@aicrowd.com>
-* Guillaume Mollard <guillaume.mollard2@gmail.com>
-* Erik Nygren <erik.nygren@sbb.ch>
-* Giacomo Spigler <giacomo.spigler@gmail.com>
-* Jeremy Watson
-
-
-Acknowledgements
-====================
-* Vaibhav Agrawal <theinfamouswayne@gmail.com>
-* Anurag Ghosh
diff --git a/changelog.md b/changelog.md
index cad8ee000506dea3abf05dedb27a43aaaf0bf8b7..4cb76d2e326717b0e515f55e32c33e61c62631e7 100644
--- a/changelog.md
+++ b/changelog.md
@@ -38,7 +38,8 @@ The stock `ShortestPathPredictorForRailEnv` now respects the different agent spe
   - `rail_generator` now only returns the grid and optionally hints (a python dictionary); the hints are currently use for distance_map and communication of start and goal position in complex rail generator.
   - `schedule_generator` takes a `GridTransitionMap` and the number of agents and optionally the `agents_hints` field of the hints dictionary.
   - Inrodcution of types hints:
-```
+
+```python
 RailGeneratorProduct = Tuple[GridTransitionMap, Optional[Any]]
 RailGenerator = Callable[[int, int, int, int], RailGeneratorProduct]
 AgentPosition = Tuple[int, int]
@@ -62,7 +63,7 @@ To set up multiple speeds you have to modify the `agent.speed_data` within your
 Just like in real-worl transportation systems we introduced stochastic events to disturb normal traffic flow. Currently we implemented a malfunction process that stops agents at random time intervalls for a random time of duration.
 Currently the Flatland environment can be initiated with the following poisson process parameters:
 
-```
+```python
 # Use a the malfunction generator to break agents from time to time
 stochastic_data = {'prop_malfunction': 0.1,  # Percentage of defective agents
                    'malfunction_rate': 30,  # Rate of malfunction occurence
diff --git a/docs/readme.rst b/docs/01_readme.rst
similarity index 100%
rename from docs/readme.rst
rename to docs/01_readme.rst
diff --git a/docs/03_tutorials.rst b/docs/03_tutorials.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e862221d8c405cc7000399e4ffd7f092bdc4bc22
--- /dev/null
+++ b/docs/03_tutorials.rst
@@ -0,0 +1,5 @@
+.. include:: tutorials/01_gettingstarted.rst
+.. include:: tutorials/02_observationbuilder.rst
+.. include:: tutorials/03_rail_and_schedule_generator.rst
+.. include:: tutorials/04_stochasticity.rst
+.. include:: tutorials/05_multispeed.rst
diff --git a/docs/03_tutorials_toc.rst b/docs/03_tutorials_toc.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8fa32c816c3c2b47a6972115b2aba906de4e9229
--- /dev/null
+++ b/docs/03_tutorials_toc.rst
@@ -0,0 +1,7 @@
+Tutorials
+=========
+
+.. toctree::
+   :maxdepth: 2
+
+   03_tutorials
diff --git a/docs/04_specifications.rst b/docs/04_specifications.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4a7ffee65dac39e4d22ccfad47abecc7b6bb616f
--- /dev/null
+++ b/docs/04_specifications.rst
@@ -0,0 +1,7 @@
+.. include:: specifications/intro.rst
+.. include:: specifications/core.rst
+.. include:: specifications/railway.rst
+.. include:: specifications/intro_observation_actions.rst
+.. include:: specifications/rendering.rst
+.. include:: specifications/visualization.rst
+.. include:: specifications/FAQ.rst
diff --git a/docs/04_specifications_toc.rst b/docs/04_specifications_toc.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b7155b04a8557025cdbd05aba51ed1a31b1801ba
--- /dev/null
+++ b/docs/04_specifications_toc.rst
@@ -0,0 +1,10 @@
+Specifications
+==============
+
+
+.. toctree::
+   :maxdepth: 2
+
+   04_specifications
+
+
diff --git a/docs/contributing.rst b/docs/06_contributing.rst
similarity index 100%
rename from docs/contributing.rst
rename to docs/06_contributing.rst
diff --git a/docs/changelog_index.rst b/docs/07_changes.rst
similarity index 57%
rename from docs/changelog_index.rst
rename to docs/07_changes.rst
index 081c500ffe5427b3dc9987c5fd0ed2ae6482ba6a..9db3a352f3107591da74799905e191715aef844a 100644
--- a/docs/changelog_index.rst
+++ b/docs/07_changes.rst
@@ -4,5 +4,4 @@ Changes
 .. toctree::
    :maxdepth: 2
 
-   changelog.md
-   flatland_2.0.md
+   07_changes_include.rst
diff --git a/docs/07_changes_include.rst b/docs/07_changes_include.rst
new file mode 100644
index 0000000000000000000000000000000000000000..33ca17b289dfa7fd53d356c9b8c20c22a957ae75
--- /dev/null
+++ b/docs/07_changes_include.rst
@@ -0,0 +1,2 @@
+.. include:: ../changelog.rst
+.. include:: ../flatland_2.0.rst
diff --git a/docs/authors.rst b/docs/08_authors.rst
similarity index 100%
rename from docs/authors.rst
rename to docs/08_authors.rst
diff --git a/docs/about_flatland.rst b/docs/about_flatland.rst
deleted file mode 100644
index 84f2e329daf41579c8dffa4836387e5ac8f058f5..0000000000000000000000000000000000000000
--- a/docs/about_flatland.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-About Flatland
-==============
-
-.. image:: https://i.imgur.com/rKGEmsk.gif
-  :align: center
-
-
-
-Flatland is a toolkit for developing and comparing multi agent reinforcement learning algorithms on grids.
-The base environment is a two-dimensional grid in which many agents can be placed. Each agent must solve one or more tasks in the grid world.
-In general, agents can freely navigate from cell to cell. However, cell-to-cell navigation can be restricted by transition maps.
-Each cell can hold an own transition map. By default, each cell has a default transition map defined which allows all transitions to its
-eight neighbor cells (go up and left, go up, go up and right, go right, go down and right, go down, go down and left, go left).
-So, the agents can freely move from cell to cell.
-
-The general purpose of the implementation allows to implement any kind of two-dimensional gird based environments.
-It can be used for many learning task where a two-dimensional grid could be the base of the environment.
-
-Flatland delivers a python implementation which can be easily extended. And it provides different baselines for different environments.
-Each environment enables an interesting task to solve. For example, the mutli-agent navigation task for railway train dispatching is a very exciting topic.
-It can be easily extended or adapted to the airplane landing problem. This can further be the basic implementation for many other tasks in transportation and logistics.
-
-Mapping a railway infrastructure into a grid world is an excellent example showing how the movement of an agent must be restricted.
-As trains can normally not run backwards and they have to follow rails the transition for one cell to the other depends also on train's orientation, respectively on train's travel direction.
-Trains can only change the traveling path at switches. There are two variants of switches. The first kind of switch is the splitting "switch", where trains can change rails and in consequence they can change the traveling path.
-The second kind of switch is the fusion switch, where train can change the sequence. That means two rails come together. Thus, the navigation behavior of a train is very restricted.
-The railway planning problem where many agents share same infrastructure is a very complex problem.
-
-Furthermore, trains have a departing location where they cannot depart earlier than the committed departure time.
-Then they must arrive at destination not later than the committed arrival time. This makes the whole planning problem
-very complex. In such a complex environment cooperation is essential. Thus, agents must learn to cooperate in a way that all trains (agents) arrive on time.
-
-This library was developed by `SBB <https://www.sbb.ch/en/>`_ , `AIcrowd <https://www.aicrowd.com/>`_ and numerous contributors and AIcrowd research fellows from the AIcrowd community. 
-
-This library was developed specifically for the `Flatland Challenge <https://www.aicrowd.com/challenges/flatland-challenge>`_ in which we strongly encourage you to take part in. 
-
-
-.. image:: https://i.imgur.com/pucB84T.gif
-  :align: center
-  :width: 600px
-
-.. image:: https://i.imgur.com/xgWGRse.gif
-  :align: center
-  :width: 600px
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 66f5183f192dc083f87fc9a0175c9f6fe733545e..4bec1b3242658dedf7add9aea2bc56fac028ce00 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.abspath('..'))
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['recommonmark', 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'numpydoc']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'numpydoc']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/docs/index.rst b/docs/index.rst
index ba35554ab50a17f118e744eefd3a77967828a17b..94efbc91d33db3b4c459d31665d98f1c7a333b54 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,26 +1,19 @@
 Welcome to flatland's documentation!
 ======================================
 
+.. include:: ../README.rst
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
-   readme
-   installation
-   about_flatland
-   gettingstarted
-   intro_observationbuilder
-   intro_observation_actions
-   specifications_index
-   modules
-   FAQ
-   localevaluation
-   contributing
-   changelog_index
-   authors
-
-
-
+   01_readme
+   03_tutorials_toc
+   04_specifications_toc
+   05_apidoc
+   06_contributing
+   07_changes
+   08_authors
 
 Indices and tables
 ==================
diff --git a/docs/installation.rst b/docs/installation.rst
deleted file mode 100644
index 99bee32b887eb759653761a4176cffb531217ea7..0000000000000000000000000000000000000000
--- a/docs/installation.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-.. highlight:: shell
-
-============
-Installation
-============
-
-Software Runtime & Dependencies
--------------------------------
-
-This is the recommended way of installation and running flatland's dependencies.
-
-* Install `Anaconda <https://www.anaconda.com/distribution/>`_ by following the instructions `here <https://www.anaconda.com/distribution/>`_
-* Create a new conda environment 
-
-.. code-block:: console
-
-    $ conda create python=3.6 --name flatland-rl
-    $ conda activate flatland-rl
-
-* Install the necessary dependencies
-
-.. code-block:: console
-
-    $ conda install -c conda-forge cairosvg pycairo
-    $ conda install -c anaconda tk  
-
-
-Stable release
---------------
-
-To install flatland, run this command in your terminal:
-
-.. code-block:: console
-
-    $ pip install flatland-rl
-
-This is the preferred method to install flatland, as it will always install the most recent stable release.
-
-If you don't have `pip`_ installed, this `Python installation guide`_ can guide
-you through the process.
-
-.. _pip: https://pip.pypa.io
-.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
-
-
-From sources
-------------
-
-The sources for flatland can be downloaded from the `Gitlab repo`_.
-
-You can clone the public repository:
-
-.. code-block:: console
-
-    $ git clone git@gitlab.aicrowd.com:flatland/flatland.git
-
-Once you have a copy of the source, you can install it with:
-
-.. code-block:: console
-
-    $ python setup.py install
-
-
-.. _Gitlab repo: https://gitlab.aicrowd.com/flatland/flatland
-
-
-Jupyter Canvas Widget
----------------------
-If you work with jupyter notebook you need to install the Jupyer Canvas Widget. To install the Jupyter Canvas Widget read also
-https://github.com/Who8MyLunch/Jupyter_Canvas_Widget#installation
diff --git a/docs/localevaluation.rst b/docs/localevaluation.rst
deleted file mode 100644
index 10f9001ba1722e93d7ecf6347fb6099d31f65ed7..0000000000000000000000000000000000000000
--- a/docs/localevaluation.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-================
-Local Evaluation
-================
-
-This document explains you how to locally evaluate your submissions before making 
-an official submission to the competition.
-
-Requirements
-------------
-
-* **flatland-rl** : We expect that you have `flatland-rl` installed by following the instructions in  :doc:`installation`.
-
-* **redis** : Additionally you will also need to have  `redis installed <https://redis.io/topics/quickstart>`_ and **should have it running in the background.**
-
-Test Data
----------
-
-* **test env data** : You can `download and untar the test-env-data <https://www.aicrowd.com/challenges/flatland-challenge/dataset_files>`, at a location of your choice, lets say `/path/to/test-env-data/`. After untarring the folder, the folder structure should look something like:
-
-
-.. code-block:: console
-
-    .
-    â””â”€â”€ test-env-data
-        â”œâ”€â”€ Test_0
-        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
-        â”‚Â Â  â””â”€â”€ Level_1.pkl
-        â”œâ”€â”€ Test_1
-        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
-        â”‚Â Â  â””â”€â”€ Level_1.pkl
-        â”œ..................
-        â”œ..................
-        â”œâ”€â”€ Test_8
-        â”‚Â Â  â”œâ”€â”€ Level_0.pkl
-        â”‚Â Â  â””â”€â”€ Level_1.pkl
-        â””â”€â”€ Test_9
-            â”œâ”€â”€ Level_0.pkl
-            â””â”€â”€ Level_1.pkl
-
-Evaluation Service
-------------------
-
-* **start evaluation service** : Then you can start the evaluator by running : 
-
-.. code-block:: console
-
-    flatland-evaluator --tests /path/to/test-env-data/
-
-RemoteClient
-------------
-
-* **run client** : Some `sample submission code can be found in the starter-kit <https://github.com/AIcrowd/flatland-challenge-starter-kit/>`_, but before you can run your code locally using `FlatlandRemoteClient`, you will have to set the `AICROWD_TESTS_FOLDER` environment variable to the location where you previous untarred the folder with `the test-env-data`:
-
-
-.. code-block:: console
-
-    export AICROWD_TESTS_FOLDER="/path/to/test-env-data/"
-
-    # or on Windows :
-    # 
-    # set AICROWD_TESTS_FOLDER "\path\to\test-env-data\"
-
-    # and then finally run your code
-    python run.py
-
diff --git a/docs/FAQ.rst b/docs/specifications/FAQ.rst
similarity index 100%
rename from docs/FAQ.rst
rename to docs/specifications/FAQ.rst
diff --git a/docs/specifications/core.md b/docs/specifications/core.md
index cbfcf3bc3f1a9eb11c7c8dac489aee10ffff7ee6..0c3a100e0db4db39312676d5e879227899adaca5 100644
--- a/docs/specifications/core.md
+++ b/docs/specifications/core.md
@@ -1,5 +1,6 @@
-# Core Specifications
-## Environment Class Overview
+## Core Specifications
+
+### Environment Class Overview
 
 The Environment class contains all necessary functions for the interactions between the agents and the environment. The base Environment class is derived from rllib.env.MultiAgentEnv (https://github.com/ray-project/ray).
 
diff --git a/docs/specifications/intro.md b/docs/specifications/intro.md
new file mode 100644
index 0000000000000000000000000000000000000000..c6b8d792edf9759ae129acdf80804f41e7de0650
--- /dev/null
+++ b/docs/specifications/intro.md
@@ -0,0 +1,9 @@
+## Intro
+
+In a human-readable language, specifications provide
+- code base overview (hand-drawn concept)
+- key concepts (generators, envs) and how are they linked
+- link relevant code base
+
+![Overview](img/UML_flatland.png)
+`Diagram Source <https://confluence.sbb.ch/x/pQfsSw>`_
diff --git a/docs/intro_observation_actions.rst b/docs/specifications/intro_observation_actions.rst
similarity index 97%
rename from docs/intro_observation_actions.rst
rename to docs/specifications/intro_observation_actions.rst
index 70723e3000ba0ab9c1a90af5971f6294084958eb..1d9fd9c1b61c439de9ee03af27a715bf76f5b821 100644
--- a/docs/intro_observation_actions.rst
+++ b/docs/specifications/intro_observation_actions.rst
@@ -1,10 +1,10 @@
-=============================
+
 Observation and Action Spaces
-=============================
+----------------------------
 This is an introduction to the three standard observations and the action space of **Flatland**.
 
 Action Space
-============
+^^^^^^^^^^^^
 Flatland is a railway simulation. Thus the actions of an agent are strongly limited to the railway network. This means that in many cases not all actions are valid.
 The possible actions of an agent are
 
@@ -15,7 +15,7 @@ The possible actions of an agent are
 - ``4`` **Stop**: This action causes the agent to stop.
 
 Observation Spaces
-==================
+^^^^^^^^^^^^^^^^^^
 In the **Flatland** environment we have included three basic observations to get started. The figure below illustrates the observation range of the different basic observation: ``Global``, ``Local Grid`` and ``Local Tree``.
 
 .. image:: https://i.imgur.com/oo8EIYv.png
@@ -24,7 +24,7 @@ In the **Flatland** environment we have included three basic observations to get
 
    
 Global Observation
-------------------
+~~~~~~~~~~~~~~~~~~
 Gives a global observation of the entire rail environment.
 
 The observation is composed of the following elements:
@@ -37,7 +37,7 @@ We encourage you to enhance this observation with any layer you think might help
 It would also be possible to construct a global observation for a super agent that controls all agents at once.
 
 Local Grid Observation
-----------------------
+~~~~~~~~~~~~~~~~~~~~~~
 Gives a local observation of the rail environment around the agent.
 The observation is composed of the following elements:
 
@@ -50,7 +50,7 @@ Be aware that this observation **does not** contain any clues about target locat
 We encourage you to come up with creative ways to overcome this problem. In the tree observation below we introduce the concept of distance maps.
 
 Tree Observation
-----------------
+~~~~~~~~~~~~~~~~
 The tree observation is built by exploiting the graph structure of the railway network. The observation is generated by spanning a **4 branched tree** from the current position of the agent. Each branch follows the allowed transitions (backward branch only allowed at dead-ends) until a cell with multiple allowed transitions is reached. Here the information gathered along the branch is stored as a node in the tree.
 The figure below illustrates how the tree observation is built:
 
@@ -73,7 +73,7 @@ The right side of the figure shows the resulting tree of the railway network on
     
     
 Node Information
-----------------
+~~~~~~~~~~~~~~~~
 Each node is filled with information gathered along the path to the node. Currently each node contains 9 features:
 
 - 1: if own target lies on the explored branch the current distance from the agent in number of cells is stored.
diff --git a/docs/specifications/railway.md b/docs/specifications/railway.md
index 36cfb7364f06fa72806c96d5150ff856d4ebca3f..04867f08f4b3a2948dd09983d552d5f33222cf4f 100644
--- a/docs/specifications/railway.md
+++ b/docs/specifications/railway.md
@@ -1,6 +1,6 @@
-# Railway Specifications
+## Railway Specifications
 
-## Overview
+### Overview
 
 Flatland is usually a two-dimensional environment intended for multi-agent problems, in particular it should serve as a benchmark for many multi-agent reinforcement learning approaches.
 
@@ -9,7 +9,7 @@ The environment can host a broad array of diverse problems reaching from disease
 This documentation illustrates the dynamics and possibilities of Flatland environment and introduces the details of the train traffic management implementation.
 
 
-## Environment
+### Environment
 
 Before describing the Flatland at hand, let us first define terms which will be used in this specification. Flatland is grid-like n-dimensional space of any size. A cell is the elementary element of the grid.  The cell is defined as a location where any objects can be located at. The term agent is defined as an entity that can move within the grid and must solve tasks. An agent can move in any arbitrary direction on well-defined transitions from cells to cell. The cell where the agent is located at must have enough capacity to hold the agent on. Every agent reserves exact one capacity or resource. The capacity of a cell is usually one. Thus usually only one agent can be at same time located at a given cell. The agent movement possibility can be restricted by limiting the allowed transitions. 
 
@@ -22,7 +22,7 @@ Flatland supports many different types of agents. In consequence the cell type c
 For each agent type Flatland can have a different action space. 
 
 
-### Grid
+#### Grid
 
 A rectangular grid of integer shape (dim_x, dim_y) defines the spatial dimensions of the environment.
 
@@ -40,9 +40,9 @@ Two cells $`i`$ and $`j`$ ($`i \neq j`$) are considered neighbors when the Eucli
 For each cell the allowed transitions to all neighboring 4 cells are defined. This can be extended to include transition probabilities as well.
 
 
-### Tile Types 
+#### Tile Types 
 
-##### Railway Grid
+###### Railway Grid
 
 Each Cell within the simulation grid consists of a distinct tile type which in turn limit the movement possibilities of the agent through the cell. For railway specific problem 8 basic tile types can be defined which describe a rail network. As a general fact in railway network when on navigation choice must be taken at maximum two options are available. 
 
@@ -73,7 +73,7 @@ In Case 5 coming from all direction a navigation choice must be taken.
 Case 7 represents a deadend, thus only stop or backwards motion is possible when an agent occupies this cell. 
 
 
-##### Tile Types of Wall-Based Cell Games (Theseus and Minotaur's puzzle, Labyrinth Game)
+###### Tile Types of Wall-Based Cell Games (Theseus and Minotaur's puzzle, Labyrinth Game)
 
 The Flatland approach can also be used the describe a variety of cell based logic games. While not going into any detail at all it is still worthwhile noting that the games are usually visualized using cell grid with wall describing forbidden transitions (negative formulation). 
 
@@ -82,22 +82,22 @@ The Flatland approach can also be used the describe a variety of cell based logi
 Left: Wall-based Grid definition (negative definition), Right: lane-based Grid definition (positive definition) 
 
 
-# Train Traffic Management
+## Train Traffic Management
 
 
-### Problem Definition
+#### Problem Definition
 
 Additionally, due to the dynamics of train traffic, each transition probability is symmetric in this environment. This means that neighboring cells will always have the same transition probability to each other.
 
 Furthermore, each cell is exclusive and can only be occupied by one agent at any given time.
 
 
-## Observations
+### Observations
 
 In this early stage of the project it is very difficult to come up with the necessary observation space in order to solve all train related problems. Given our early experiments we therefore propose different observation methods and hope to investigate further options with the crowdsourcing challenge. Below we compare global observation with local observations and discuss the differences in performance and flexibility.
 
 
-### Global Observation
+#### Global Observation
 
 Global observations, specifically on a grid like environment, benefit from the vast research results on learning from pixels and the advancements in convolutional neural network algorithms. The observation can simply be generated from the environment state and not much additional computation is necessary to generate the state.
 
@@ -108,7 +108,7 @@ However, we run into problems when scalability and flexibility become an importa
 Given the complexity of real-world railway networks (especially in Switzerland), we do not believe that a global observation is suited for this problem.
 
 
-### Local Observation
+#### Local Observation
 
 Given that scalability and speed are the main requirements for our use cases local observations offer an interesting novel approach. Local observations require some additional computations to be extracted from the environment state but could in theory be performed in parallel for each agent.
 
@@ -117,7 +117,7 @@ With early experiments (presentation GTC, details below) we could show that even
 Below we highlight two different forms of local observations and elaborate on their benefits.
 
 
-#### Local Field of View
+##### Local Field of View
 
 This form of observation is very similar to the global view approach, in that it consists of a grid like input. In this setup each agent has its own observation that depends on its current location in the environment.
 
@@ -129,7 +129,7 @@ Given an agents location, the observation is simply a $`n \times m`$ grid around
 
 ![local_grid](https://drive.google.com/uc?export=view&id=1kZzinMOs7hlPaSJJeIiaQ7lAz2erXuHx)
 
-#### Tree Search
+##### Tree Search
 
 From our past experiences and the nature of railway networks (they are a graph) it seems most suitable to use a local tree search as an observation for the agents.
 
@@ -148,7 +148,7 @@ _Figure 3: A local tree search moves along the allowed transitions, originating
 We have gained some insights into using and aggregating the information along the tree search. This should be part of the early investigation while implementing Flatland. One possibility would also be to leave this up to the participants of the Flatland challenge.
 
 
-### Communication
+#### Communication
 
 Given the complexity and the high dependence of the multi-agent system a communication form might be necessary. This needs to be investigated und following constraints:
 
@@ -158,15 +158,15 @@ Given the complexity and the high dependence of the multi-agent system a communi
 Depending on the game configuration every agent can be informed about the position of the other agents present in the respective observation range. For a local observation space the agent knows the distance to the next agent (defined with the agent type) in each direction. If no agent is present the the distance can simply be -1 or null. 
 
 
-### Action Negotiation 
+#### Action Negotiation 
 
 In order to avoid illicit situations ( for example agents crashing into each other) the intended actions for each agent in the observation range is known. Depending on the known movement intentions new movement intention must be generated by the agents. This is called a negotiation round. After a fixed amount of negotiation round the last intended action is executed for each agent. An illicit situation results in ending the game with a fixed low rewards. 
 
 
-## Actions
+### Actions
 
 
-### Navigation
+#### Navigation
 
 The agent can be located at any cell except on case 0 cells. The agent can move along the rails to another unoccupied cell or it can just wait where he is currently located at.  
 
@@ -179,7 +179,7 @@ An agent can move with a definable maximum speed. The default and absolute maxim
 An agent can be defined to be picked up/dropped off by another agent or to pick up/drop off another agent. When agent A is picked up by another agent B it is said that A is linked to B. The linked agent loses all its navigation possibilities. On the other side it inherits the position from the linking agent for the time being linked. Linking and unlinking between two agents is only possible the participating agents have the same space-time coordinates for the linking and unlinking action.  
 
 
-### Transportation
+#### Transportation
 
 In railway the transportation of goods or passengers is essential. Consequently agents can transport goods or passengers. It's depending on the agent's type. If the agent is a freight train, it will transport goods. It's passenger train it will transport passengers only.  But the transportation capacity for both kind of trains limited. Passenger trains have a maximum number of seats restriction. The freight trains have a maximal number of tons restriction. 
 
@@ -188,7 +188,7 @@ Passenger can take or switch trains only at stations. Passengers are agents with
 Goods will be only transported over the railway network. Goods are agents with transportation needs. They can start their transportation chain at any station. Each good has a station as the destination attached. The destination is the end of the transportation. It's the transportation goal. Once a good reach its destination it will disappear. Disappearing mean the goods leave Flatland. Goods can't move independently on the grid. They can only move by using trains. They can switch trains at any stations. The goal of the system is to find for goods the right trains to get a feasible transportation chain.  The quality of the transportation chain is measured by the reward function.
 
 
-## Environment Rules
+### Environment Rules
 
 *   Depending the cell type a cell must have a given number of neighbouring cells of a given type. \
 
@@ -199,7 +199,7 @@ Goods will be only transported over the railway network. Goods are agents with t
 *   Agents related to each other through transport (one carries another) must be at the same place the same time.
 
 
-## Environment Configuration
+### Environment Configuration
 
 The environment should allow for a broad class of problem instances. Thus the configuration file for each problem instance should contain:
 
@@ -231,10 +231,10 @@ Observation Type: Local, Targets known
 It should be check prior to solving the problem that the Goal location for each agent can be reached.
 
 
-## Reward Function
+### Reward Function
 
 
-### Railway-specific Use-Cases
+#### Railway-specific Use-Cases
 
 A first idea for a Cost function for generic applicability is as follows. For each agent and each goal sum up 
 
@@ -246,15 +246,15 @@ A first idea for a Cost function for generic applicability is as follows. For ea
 An additional refinement proven meaningful for situations where not target time is given is to weight the longest arrival time higher as the sum off all arrival times. 
 
 
-### Further Examples (Games)
+#### Further Examples (Games)
 
 
-## Initialization
+### Initialization
 
 Given that we want a generalizable agent to solve the problem, training must be performed on a diverse training set. We therefore need a level generator which can create novel tasks for to be solved in a reliable and fast fashion. 
 
 
-### Level Generator
+#### Level Generator
 
 Each problem instance can have its own level generator.
 
@@ -279,63 +279,63 @@ The output of the level generator should be:
 *   Initial rewards, positions and observations
 
 
-## Railway Use Cases
+### Railway Use Cases
 
 In this section we define a few simple tasks related to railway traffic that we believe would be well suited for a crowdsourcing challenge. The tasks are ordered according to their complexity. The Flatland repo must at least support all these types of use cases.
 
 
-### Simple Navigation
+#### Simple Navigation
 
 In order to onboard the broad reinforcement learning community this task is intended as an introduction to the Railway@Flatland environment. 
 
 
-#### Task
+##### Task
 
 A single agent is placed at an arbitrary (permitted) cell and is given a target cell (reachable by the rules of Flatand). The task is to arrive at the target destination in as little time steps as possible.
 
 
-#### Actions
+##### Actions
 
 In this task an agent can perform transitions ( max 3 possibilities) or stop. Therefore, the agent can chose an action in the range $`a \in [0,4] `$.
 
 
-#### Reward
+##### Reward
 
 The reward is -1 for each time step and 10 if the agent stops at the destination. We might add -1 for invalid moves to speed up exploration and learning.
 
 
-#### Observation
+##### Observation
 
 If we chose a local observation scheme, we need to provide some information about the distance to the target to the agent. This could either be achieved by a distance map, by using waypoints or providing a broad sense of direction to the agent.
 
 
-### Multi Agent Navigation and Dispatching
+#### Multi Agent Navigation and Dispatching
 
 This task is intended as a natural extension of the navigation task.
 
 
-#### Task
+##### Task
 
 A number of agents ($`n`$-agents) are placed at an arbitrary (permitted) cell and given individual target cells (reachable by the rules of Flatand). The task is to arrive at the target destination in as little time steps as possible as a group. This means that the goal is to minimize the longest path of *ALL* agents.
 
 
-#### Actions
+##### Actions
 
 In this task an agent can perform transitions ( max 3 possibilities) or stop. Therefore, the agent can chose an action in the range $`a \in [0,4] `$.
 
-#### Reward
+##### Reward
 
 The reward is -1 for each time step and 10 if all the agents stop at the destination. We can further punish collisions between agents and illegal moves to speed up learning.
 
 
-#### Observation
+##### Observation
 
 If we chose a local observation scheme, we need to provide some information about the distance to the target to the agent. This could either be achieved by a distance map or by using waypoints.
 
 The agents must see each other in their tree searches.
 
 
-#### Previous learnings
+##### Previous learnings
 
 Training an agent by himself first to understand the main task turned out to be beneficial.
 
@@ -344,15 +344,348 @@ It might be necessary to add the "intended" paths of each agent to the observati
 A communication layer might be necessary to improve agent performance.
 
 
-### Multi Agent Navigation and Dispatching with Schedule
+#### Multi Agent Navigation and Dispatching with Schedule
 
 
-### Transport Chains (Transportation of goods and passengers)
+#### Transport Chains (Transportation of goods and passengers)
 
-## Benefits of Transition Model
+### Benefits of Transition Model
 
 Using a grid world with 8 transition possibilities to the neighboring cells constitutes a very flexible environment, which can model many different types of problems.
 
 Considering the recent advancements in machine learning, this approach also allows to make use of convolutions in order to process observation states of agents. For the specific case of railway simulation the grid world unfortunately also brings a few drawbacks.
 
 Most notably the railway network only offers action possibilities at elements where there are more than two transition probabilities. Thus, if using a less dense graph than a grid, the railway network could be represented in a simpler graph. However, we believe that moving from grid-like example where many transitions are allowed towards the railway network with fewer transitions would be the simplest approach for the broad reinforcement learning community.
+
+
+
+
+## Rail Generators and Schedule Generators
+The separation between rail generator and schedule generator reflects the organisational separation in the railway domain
+- Infrastructure Manager (IM): is responsible for the layout and maintenance of tracks
+- Railway Undertaking (RU): operates trains on the infrastructure
+Usually, there is a third organisation, which ensures discrimination-free access to the infrastructure for concurrent requests for the infrastructure in a **schedule planning phase**.
+However, in the **Flat**land challenge, we focus on the re-scheduling problem during live operations.
+
+Technically, 
+```python 
+RailGeneratorProduct = Tuple[GridTransitionMap, Optional[Any]]
+RailGenerator = Callable[[int, int, int, int], RailGeneratorProduct]
+
+AgentPosition = Tuple[int, int]
+ScheduleGeneratorProduct = Tuple[List[AgentPosition], List[AgentPosition], List[AgentPosition], List[float]]
+ScheduleGenerator = Callable[[GridTransitionMap, int, Optional[Any]], ScheduleGeneratorProduct]
+```
+
+We can then produce `RailGenerator`s by currying:
+```python
+def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2, min_node_dist=20, node_radius=2,
+                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0):
+
+    def generator(width, height, num_agents, num_resets=0):
+    
+        # generate the grid and (optionally) some hints for the schedule_generator
+        ...
+         
+        return grid_map, {'agents_hints': {
+            'num_agents': num_agents,
+            'agent_start_targets_nodes': agent_start_targets_nodes,
+            'train_stations': train_stations
+        }}
+
+    return generator
+```
+And, similarly, `ScheduleGenerator`s:
+```python
+def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None) -> ScheduleGenerator:
+    def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
+        # place agents:
+        # - initial position
+        # - initial direction
+        # - (initial) speed
+        # - malfunction
+        ...
+                
+        return agents_position, agents_direction, agents_target, speeds, agents_malfunction
+
+    return generator
+```
+Notice that the `rail_generator` may pass `agents_hints` to the  `schedule_generator` which the latter may interpret.
+For instance, the way the `sparse_rail_generator` generates the grid, it already determines the agent's goal and target.
+Hence, `rail_generator` and `schedule_generator` have to match if `schedule_generator` presupposes some specific `agents_hints`.
+
+The environment's `reset` takes care of applying the two generators:
+```python
+    def __init__(self,
+            ...
+             rail_generator: RailGenerator = random_rail_generator(),
+             schedule_generator: ScheduleGenerator = random_schedule_generator(),
+             ...
+             ):
+        self.rail_generator: RailGenerator = rail_generator
+        self.schedule_generator: ScheduleGenerator = schedule_generator
+        
+    def reset(self, regen_rail=True, replace_agents=True):
+        rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)
+
+        ...
+
+        if replace_agents:
+            agents_hints = None
+            if optionals and 'agents_hints' in optionals:
+                agents_hints = optionals['agents_hints']
+            self.agents_static = EnvAgentStatic.from_lists(
+                *self.schedule_generator(self.rail, self.get_num_agents(), hints=agents_hints))
+```
+
+
+### RailEnv Speeds
+One of the main contributions to the complexity of railway network operations stems from the fact that all trains travel at different speeds while sharing a very limited railway network. 
+
+The different speed profiles can be generated using the `schedule_generator`, where you can actually chose as many different speeds as you like. 
+Keep in mind that the *fastest speed* is 1 and all slower speeds must be between 1 and 0. 
+For the submission scoring you can assume that there will be no more than 5 speed profiles.
+
+
+Currently (as of **Flat**land 2.0), an agent keeps its speed over the whole episode. 
+
+Because the different speeds are implemented as fractions the agents ability to perform actions has been updated. 
+We **do not allow actions to change within the cell **. 
+This means that each agent can only chose an action to be taken when entering a cell (ie. positional fraction is 0). 
+There is some real railway specific considerations such as reserved blocks that are similar to this behavior. 
+But more importantly we disabled this to simplify the use of machine learning algorithms with the environment. 
+If we allow stop actions in the middle of cells. then the controller needs to make much more observations and not only at cell changes. 
+(Not set in stone and could be updated if the need arises).
+
+The chosen action is then executed when a step to the next cell is valid. For example
+
+- Agent enters switch and choses to deviate left. Agent fractional speed is 1/4 and thus the agent will take 4 time steps to complete its journey through the cell. On the 4th time step the agent will leave the cell deviating left as chosen at the entry of the cell.
+    - All actions chosen by the agent during its travels within a cell are ignored
+    - Agents can make observations at any time step. Make sure to discard observations without any information. See this [example](https://gitlab.aicrowd.com/flatland/baselines/blob/master/torch_training/training_navigation.py) for a simple implementation.
+- The environment checks if agent is allowed to move to next cell only at the time of the switch to the next cell
+
+In your controller, you can check whether an agent requires an action by checking `info`: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+action_dict = dict()
+for a in range(env.get_num_agents()):
+    if info['action_required'][a]:
+        action_dict.update({a: ...})
+
+```
+Notice that `info['action_required'][a]` 
+* if the agent breaks down (see stochasticity below) on entering the cell (no distance elpased in the cell), an action required as long as the agent is broken down;
+when it gets back to work, the action chosen just before will be taken and executed at the end of the cell; you may check whether the agent
+gets healthy again in the next step by checking `info['malfunction'][a] == 1`.
+* when the agent has spent enough time in the cell, the next cell may not be free and the agent has to wait. 
+
+
+Since later versions of **Flat**land might have varying speeds during episodes. 
+Therefore, we return the agents' speed - in your controller, you can get the agents' speed from the `info` returned by `step`: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+for a in range(env.get_num_agents()):
+    speed = info['speed'][a]
+```
+Notice that we do not guarantee that the speed will be computed at each step, but if not costly we will return it at each step.
+
+
+
+
+
+
+
+
+
+### RailEnv Malfunctioning / Stochasticity
+
+Stochastic events may happen during the episodes. 
+This is very common for railway networks where the initial plan usually needs to be rescheduled during operations as minor events such as delayed departure from trainstations, malfunctions on trains or infrastructure or just the weather lead to delayed trains.
+
+We implemted a poisson process to simulate delays by stopping agents at random times for random durations. The parameters necessary for the stochastic events can be provided when creating the environment.
+
+```python
+## Use a the malfunction generator to break agents from time to time
+
+stochastic_data = {
+    'prop_malfunction': 0.5,  # Percentage of defective agents
+    'malfunction_rate': 30,  # Rate of malfunction occurence
+    'min_duration': 3,  # Minimal duration of malfunction
+    'max_duration': 10  # Max duration of malfunction
+}
+```
+
+The parameters are as follows:
+
+- `prop_malfunction` is the proportion of agents that can malfunction. `1.0` means that each agent can break.
+- `malfunction_rate` is the mean rate of the poisson process in number of environment steps.
+- `min_duration` and `max_duration` set the range of malfunction durations. They are sampled uniformly
+
+You can introduce stochasticity by simply creating the env as follows:
+
+```python
+env = RailEnv(
+    ...
+    stochastic_data=stochastic_data,  # Malfunction data generator
+    ...    
+)
+```
+In your controller, you can check whether an agent is malfunctioning: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+action_dict = dict()
+for a in range(env.get_num_agents()):
+    if info['malfunction'][a] == 0:
+        action_dict.update({a: ...})
+
+## Custom observation builder
+tree_observation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+
+## Different agent types (trains) with different speeds.
+speed_ration_map = {1.: 0.25,  # Fast passenger train
+                    1. / 2.: 0.25,  # Fast freight train
+                    1. / 3.: 0.25,  # Slow commuter train
+                    1. / 4.: 0.25}  # Slow freight train
+
+env = RailEnv(width=50,
+              height=50,
+              rail_generator=sparse_rail_generator(num_cities=20,  # Number of cities in map (where train stations are)
+                                                   num_intersections=5,  # Number of intersections (no start / target)
+                                                   num_trainstations=15,  # Number of possible start/targets on map
+                                                   min_node_dist=3,  # Minimal distance of nodes
+                                                   node_radius=2,  # Proximity of stations to city center
+                                                   num_neighb=4,  # Number of connections to other cities/intersections
+                                                   seed=15,  # Random seed
+                                                   grid_mode=True,
+                                                   enhance_intersection=True
+                                                   ),
+              schedule_generator=sparse_schedule_generator(speed_ration_map),
+              number_of_agents=10,
+              stochastic_data=stochastic_data,  # Malfunction data generator
+              obs_builder_object=tree_observation)
+```
+
+
+### Observation Builders
+Every `RailEnv` has an `obs_builder`. The `obs_builder` has full access to the `RailEnv`. 
+The `obs_builder` is called in the `step()` function to produce the observations.
+
+```python
+env = RailEnv(
+    ...
+    obs_builder_object=TreeObsForRailEnv(
+        max_depth=2,
+       predictor=ShortestPathPredictorForRailEnv(max_depth=10)
+    ),
+    ...                   
+)
+```
+
+The two principal observation builders provided are global and tree.
+
+#### Global Observation Builder
+`GlobalObsForRailEnv` gives a global observation of the entire rail environment.
+* transition map array with dimensions (env.height, env.width, 16), assuming 16 bits encoding of transitions.
+
+* Two 2D arrays (map_height, map_width, 2) containing respectively the position of the given agent target and the positions of the other agents targets.
+
+* A 3D array (map_height, map_width, 4) wtih
+  - first channel containing the agents position and direction
+  - second channel containing the other agents positions and diretions
+  - third channel containing agent malfunctions
+  - fourth channel containing agent fractional speeds
+            
+#### Tree Observation Builder
+`TreeObsForRailEnv` computes the current observation for each agent.
+
+The observation vector is composed of 4 sequential parts, corresponding to data from the up to 4 possible
+movements in a `RailEnv` (up to because only a subset of possible transitions are allowed in RailEnv).
+The possible movements are sorted relative to the current orientation of the agent, rather than NESW as for
+the transitions. The order is:
+
+```console
+    [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
+```
+
+Each branch data is organized as:
+
+```console
+    [root node information] +
+    [recursive branch data from 'left'] +
+    [... from 'forward'] +
+    [... from 'right] +
+    [... from 'back']
+```
+
+Each node information is composed of 9 features:
+
+1. if own target lies on the explored branch the current distance from the agent in number of cells is stored.
+
+2. if another agents target is detected the distance in number of cells from the agents current location
+    is stored
+
+3. if another agent is detected the distance in number of cells from current agent position is stored.
+
+4. possible conflict detected
+    tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the
+     distance in number of cells from current agent position
+```console
+    0 = No other agent reserve the same cell at similar time
+```
+5. if an not usable switch (for agent) is detected we store the distance.
+
+6. This feature stores the distance in number of cells to the next branching  (current node)
+
+7. minimum distance from node to the agent's target given the direction of the agent if this path is chosen
+
+8. agent in the same direction
+```console
+    n = number of agents present same direction
+        (possible future use: number of other agents in the same direction in this branch)
+    0 = no agent present same direction
+```
+9. agent in the opposite direction
+```console
+    n = number of agents present other direction than myself (so conflict)
+        (possible future use: number of other agents in other direction in this branch, ie. number of conflicts)
+    0 = no agent present other direction than myself
+```
+
+10. malfunctioning/blokcing agents
+```console
+    n = number of time steps the oberved agent remains blocked
+```
+
+11. slowest observed speed of an agent in same direction
+```console
+    1 if no agent is observed
+
+    min_fractional speed otherwise
+```
+Missing/padding nodes are filled in with -inf (truncated).
+Missing values in present node are filled in with +inf (truncated).
+
+
+In case of the root node, the values are [0, 0, 0, 0, distance from agent to target, own malfunction, own speed]
+In case the target node is reached, the values are [0, 0, 0, 0, 0].
+
+
+### Predictors
+Predictors make predictions on future agents' moves based on the current state of the environment.
+They are decoupled from observation builders in order to be encapsulate the functionality and to make it re-usable.
+
+For instance, `TreeObsForRailEnv` optionally uses the predicted the predicted trajectories while exploring
+the branches of an agent's future moves to detect future conflicts.
+
+The general call structure is as follows:
+```python
+RailEnv.step() 
+               -> ObservationBuilder.get_many() 
+                                                ->  self.predictor.get()
+                                                    self.get()
+                                                    self.get()
+                                                    ...
+```
diff --git a/docs/specifications/rendering.md b/docs/specifications/rendering.md
index baba1c06e16da99f20ee61e6feb6d031f0a415c2..0080acbaaafef2cf1e6bc218703e999206b9eb67 100644
--- a/docs/specifications/rendering.md
+++ b/docs/specifications/rendering.md
@@ -1,14 +1,14 @@
-# Rendering Specifications
+## Rendering Specifications
 
-## Scope
+### Scope
 This doc specifies the software to meet the requirements in the Visualization requirements doc.
 
-## References
+### References
 - [Visualization Requirements](visualization)
 - [Core Spec](./core)
 
-## Interfaces
-### Interface with Environment Component
+### Interfaces
+#### Interface with Environment Component
 
 - Environment produces the Env Snapshot data structure (TBD)
 - Renderer reads the Env Snapshot
@@ -28,9 +28,9 @@ This doc specifies the software to meet the requirements in the Visualization re
     - Or, render frames without blocking environment
         - Render frames in separate process / thread
 
-#### Environment Snapshot
+##### Environment Snapshot
 
-### Data Structure
+#### Data Structure
 
 A definitions of the data structure is to be defined in Core requirements or Interfaces doc.
 
@@ -50,7 +50,7 @@ Top-level dictionary
         - Tree-based observation
             - TBD
 
-### Existing Tools / Libraries
+#### Existing Tools / Libraries
 1. Pygame
     1. Very easy to use. Like dead simple to add sprites etc. [Link](https://studywolf.wordpress.com/2015/03/06/arm-visualization-with pygame/)
     2. No inbuilt support for threads/processes. Does get faster if using pypy/pysco.
@@ -58,18 +58,18 @@ Top-level dictionary
     1. Somewhat simple, a little more verbose to use the different modules.
     2. Multi-threaded via QThread! Yay! (Doesnâ€™t block main thread that does the real work), [Link](https://nikolak.com/pyqt-threading-tutorial/)
 
-#### How to structure the code
+##### How to structure the code
 
 1. Define draw functions/classes for each primitive
     1. Primitives: Agents (Trains), Railroad, Grass, Houses etc.
 2. Background. Initialize the background before starting the episode.
     1. Static objects in the scenes, directly draw those primitives once and cache.
 
-#### Proposed Interfaces
+##### Proposed Interfaces
 To-be-filled
 
-### Technical Graphics Considerations
+#### Technical Graphics Considerations
 
-#### Overlay dynamic primitives over the background at each time step.
+##### Overlay dynamic primitives over the background at each time step.
 
 No point trying to figure out changes. Need to explicitly draw every primitive anyways (thatâ€™s how these renders work).
diff --git a/docs/specifications/specifications.md b/docs/specifications/specifications.md
deleted file mode 100644
index 2b7484425d84345ace351b8b6ce681d789c75fc8..0000000000000000000000000000000000000000
--- a/docs/specifications/specifications.md
+++ /dev/null
@@ -1,337 +0,0 @@
-Flatland Specs
-==========================
-
-What are **Flatland** specs about?
----------------------------------
-In a humand-readable language, they provide
-* code base overview (hand-drawn concept)
-* key concepts (generators, envs) and how are they linked
-* link relevant code base
-
-## Overview
-![UML_flatland.png](img/UML_flatland.png)
-[Diagram Source](https://confluence.sbb.ch/x/pQfsSw)
-
-
-
-## Rail Generators and Schedule Generators
-The separation between rail generator and schedule generator reflects the organisational separation in the railway domain
-- Infrastructure Manager (IM): is responsible for the layout and maintenance of tracks
-- Railway Undertaking (RU): operates trains on the infrastructure
-Usually, there is a third organisation, which ensures discrimination-free access to the infrastructure for concurrent requests for the infrastructure in a **schedule planning phase**.
-However, in the **Flat**land challenge, we focus on the re-scheduling problem during live operations.
-
-Technically, 
-``` 
-RailGeneratorProduct = Tuple[GridTransitionMap, Optional[Any]]
-RailGenerator = Callable[[int, int, int, int], RailGeneratorProduct]
-
-AgentPosition = Tuple[int, int]
-ScheduleGeneratorProduct = Tuple[List[AgentPosition], List[AgentPosition], List[AgentPosition], List[float]]
-ScheduleGenerator = Callable[[GridTransitionMap, int, Optional[Any]], ScheduleGeneratorProduct]
-```
-
-We can then produce `RailGenerator`s by currying:
-```
-def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2, min_node_dist=20, node_radius=2,
-                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0):
-
-    def generator(width, height, num_agents, num_resets=0):
-    
-        # generate the grid and (optionally) some hints for the schedule_generator
-        ...
-         
-        return grid_map, {'agents_hints': {
-            'num_agents': num_agents,
-            'agent_start_targets_nodes': agent_start_targets_nodes,
-            'train_stations': train_stations
-        }}
-
-    return generator
-```
-And, similarly, `ScheduleGenerator`s:
-```
-def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None) -> ScheduleGenerator:
-    def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
-        # place agents:
-        # - initial position
-        # - initial direction
-        # - (initial) speed
-        # - malfunction
-        ...
-                
-        return agents_position, agents_direction, agents_target, speeds, agents_malfunction
-
-    return generator
-```
-Notice that the `rail_generator` may pass `agents_hints` to the  `schedule_generator` which the latter may interpret.
-For instance, the way the `sparse_rail_generator` generates the grid, it already determines the agent's goal and target.
-Hence, `rail_generator` and `schedule_generator` have to match if `schedule_generator` presupposes some specific `agents_hints`.
-
-The environment's `reset` takes care of applying the two generators:
-```
-    def __init__(self,
-            ...
-             rail_generator: RailGenerator = random_rail_generator(),
-             schedule_generator: ScheduleGenerator = random_schedule_generator(),
-             ...
-             ):
-        self.rail_generator: RailGenerator = rail_generator
-        self.schedule_generator: ScheduleGenerator = schedule_generator
-        
-    def reset(self, regen_rail=True, replace_agents=True):
-        rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)
-
-        ...
-
-        if replace_agents:
-            agents_hints = None
-            if optionals and 'agents_hints' in optionals:
-                agents_hints = optionals['agents_hints']
-            self.agents_static = EnvAgentStatic.from_lists(
-                *self.schedule_generator(self.rail, self.get_num_agents(), hints=agents_hints))
-```
-
-
-## RailEnv Speeds
-One of the main contributions to the complexity of railway network operations stems from the fact that all trains travel at different speeds while sharing a very limited railway network. 
-
-The different speed profiles can be generated using the `schedule_generator`, where you can actually chose as many different speeds as you like. 
-Keep in mind that the *fastest speed* is 1 and all slower speeds must be between 1 and 0. 
-For the submission scoring you can assume that there will be no more than 5 speed profiles.
-
-
-Currently (as of **Flat**land 2.0), an agent keeps its speed over the whole episode. 
-
-Because the different speeds are implemented as fractions the agents ability to perform actions has been updated. 
-We **do not allow actions to change within the cell **. 
-This means that each agent can only chose an action to be taken when entering a cell (ie. positional fraction is 0). 
-There is some real railway specific considerations such as reserved blocks that are similar to this behavior. 
-But more importantly we disabled this to simplify the use of machine learning algorithms with the environment. 
-If we allow stop actions in the middle of cells. then the controller needs to make much more observations and not only at cell changes. 
-(Not set in stone and could be updated if the need arises).
-
-The chosen action is then executed when a step to the next cell is valid. For example
-
-- Agent enters switch and choses to deviate left. Agent fractional speed is 1/4 and thus the agent will take 4 time steps to complete its journey through the cell. On the 4th time step the agent will leave the cell deviating left as chosen at the entry of the cell.
-    - All actions chosen by the agent during its travels within a cell are ignored
-    - Agents can make observations at any time step. Make sure to discard observations without any information. See this [example](https://gitlab.aicrowd.com/flatland/baselines/blob/master/torch_training/training_navigation.py) for a simple implementation.
-- The environment checks if agent is allowed to move to next cell only at the time of the switch to the next cell
-
-In your controller, you can check whether an agent requires an action by checking `info`: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-action_dict = dict()
-for a in range(env.get_num_agents()):
-    if info['action_required'][a]:
-        action_dict.update({a: ...})
-
-```
-Notice that `info['action_required'][a]` 
-* if the agent breaks down (see stochasticity below) on entering the cell (no distance elpased in the cell), an action required as long as the agent is broken down;
-when it gets back to work, the action chosen just before will be taken and executed at the end of the cell; you may check whether the agent
-gets healthy again in the next step by checking `info['malfunction'][a] == 1`.
-* when the agent has spent enough time in the cell, the next cell may not be free and the agent has to wait. 
-
-
-Since later versions of **Flat**land might have varying speeds during episodes. 
-Therefore, we return the agents' speed - in your controller, you can get the agents' speed from the `info` returned by `step`: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-for a in range(env.get_num_agents()):
-    speed = info['speed'][a]
-```
-Notice that we do not guarantee that the speed will be computed at each step, but if not costly we will return it at each step.
-
-
-
-
-
-
-
-
-
-## RailEnv Malfunctioning / Stochasticity
-
-Stochastic events may happen during the episodes. 
-This is very common for railway networks where the initial plan usually needs to be rescheduled during operations as minor events such as delayed departure from trainstations, malfunctions on trains or infrastructure or just the weather lead to delayed trains.
-
-We implemted a poisson process to simulate delays by stopping agents at random times for random durations. The parameters necessary for the stochastic events can be provided when creating the environment.
-
-```
-# Use a the malfunction generator to break agents from time to time
-
-stochastic_data = {
-    'prop_malfunction': 0.5,  # Percentage of defective agents
-    'malfunction_rate': 30,  # Rate of malfunction occurence
-    'min_duration': 3,  # Minimal duration of malfunction
-    'max_duration': 10  # Max duration of malfunction
-}
-```
-
-The parameters are as follows:
-
-- `prop_malfunction` is the proportion of agents that can malfunction. `1.0` means that each agent can break.
-- `malfunction_rate` is the mean rate of the poisson process in number of environment steps.
-- `min_duration` and `max_duration` set the range of malfunction durations. They are sampled uniformly
-
-You can introduce stochasticity by simply creating the env as follows:
-
-```
-env = RailEnv(
-    ...
-    stochastic_data=stochastic_data,  # Malfunction data generator
-    ...    
-)
-```
-In your controller, you can check whether an agent is malfunctioning: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-action_dict = dict()
-for a in range(env.get_num_agents()):
-    if info['malfunction'][a] == 0:
-        action_dict.update({a: ...})
-
-# Custom observation builder
-tree_observation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
-
-# Different agent types (trains) with different speeds.
-speed_ration_map = {1.: 0.25,  # Fast passenger train
-                    1. / 2.: 0.25,  # Fast freight train
-                    1. / 3.: 0.25,  # Slow commuter train
-                    1. / 4.: 0.25}  # Slow freight train
-
-env = RailEnv(width=50,
-              height=50,
-              rail_generator=sparse_rail_generator(num_cities=20,  # Number of cities in map (where train stations are)
-                                                   num_intersections=5,  # Number of intersections (no start / target)
-                                                   num_trainstations=15,  # Number of possible start/targets on map
-                                                   min_node_dist=3,  # Minimal distance of nodes
-                                                   node_radius=2,  # Proximity of stations to city center
-                                                   num_neighb=4,  # Number of connections to other cities/intersections
-                                                   seed=15,  # Random seed
-                                                   grid_mode=True,
-                                                   enhance_intersection=True
-                                                   ),
-              schedule_generator=sparse_schedule_generator(speed_ration_map),
-              number_of_agents=10,
-              stochastic_data=stochastic_data,  # Malfunction data generator
-              obs_builder_object=tree_observation)
-```
-
-
-## Observation Builders
-Every `RailEnv` has an `obs_builder`. The `obs_builder` has full access to the `RailEnv`. 
-The `obs_builder` is called in the `step()` function to produce the observations.
-
-```
-env = RailEnv(
-    ...
-    obs_builder_object=TreeObsForRailEnv(
-        max_depth=2,
-       predictor=ShortestPathPredictorForRailEnv(max_depth=10)
-    ),
-    ...                   
-)
-```
-
-The two principal observation builders provided are global and tree.
-
-### Global Observation Builder
-`GlobalObsForRailEnv` gives a global observation of the entire rail environment.
-* transition map array with dimensions (env.height, env.width, 16),
-          assuming 16 bits encoding of transitions.
-
-* Two 2D arrays (map_height, map_width, 2) containing respectively the position of the given agent
-         target and the positions of the other agents targets.
-
-* A 3D array (map_height, map_width, 4) wtih
-            - first channel containing the agents position and direction
-            - second channel containing the other agents positions and diretions
-            - third channel containing agent malfunctions
-            - fourth channel containing agent fractional speeds
-            
-### Tree Observation Builder
-`TreeObsForRailEnv` computes the current observation for each agent.
-
-The observation vector is composed of 4 sequential parts, corresponding to data from the up to 4 possible
-movements in a `RailEnv` (up to because only a subset of possible transitions are allowed in RailEnv).
-The possible movements are sorted relative to the current orientation of the agent, rather than NESW as for
-the transitions. The order is:
-
-    [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
-
-Each branch data is organized as:
-
-    [root node information] +
-    [recursive branch data from 'left'] +
-    [... from 'forward'] +
-    [... from 'right] +
-    [... from 'back']
-
-Each node information is composed of 9 features:
-
-1. if own target lies on the explored branch the current distance from the agent in number of cells is stored.
-
-2. if another agents target is detected the distance in number of cells from the agents current location
-    is stored
-
-3. if another agent is detected the distance in number of cells from current agent position is stored.
-
-4. possible conflict detected
-    tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the
-     distance in number of cells from current agent position
-
-    0 = No other agent reserve the same cell at similar time
-
-5. if an not usable switch (for agent) is detected we store the distance.
-
-6. This feature stores the distance in number of cells to the next branching  (current node)
-
-7. minimum distance from node to the agent's target given the direction of the agent if this path is chosen
-
-8. agent in the same direction
-    n = number of agents present same direction
-        (possible future use: number of other agents in the same direction in this branch)
-    0 = no agent present same direction
-
-9. agent in the opposite direction
-    n = number of agents present other direction than myself (so conflict)
-        (possible future use: number of other agents in other direction in this branch, ie. number of conflicts)
-    0 = no agent present other direction than myself
-
-10. malfunctioning/blokcing agents
-    n = number of time steps the oberved agent remains blocked
-
-11. slowest observed speed of an agent in same direction
-    1 if no agent is observed
-
-    min_fractional speed otherwise
-
-Missing/padding nodes are filled in with -inf (truncated).
-Missing values in present node are filled in with +inf (truncated).
-
-
-In case of the root node, the values are [0, 0, 0, 0, distance from agent to target, own malfunction, own speed]
-In case the target node is reached, the values are [0, 0, 0, 0, 0].
-
-
-## Predictors
-Predictors make predictions on future agents' moves based on the current state of the environment.
-They are decoupled from observation builders in order to be encapsulate the functionality and to make it re-usable.
-
-For instance, `TreeObsForRailEnv` optionally uses the predicted the predicted trajectories while exploring
-the branches of an agent's future moves to detect future conflicts.
-
-The general call structure is as follows:
-```
-RailEnv.step() 
-               -> ObservationBuilder.get_many() 
-                                                ->  self.predictor.get()
-                                                    self.get()
-                                                    self.get()
-                                                    ...
-```
diff --git a/docs/specifications/visualization.md b/docs/specifications/visualization.md
index 8dd5255a185e8ce72590d925710227ec11770bfc..36aae090cd10c957faf1f850c1c8596c54749e6a 100644
--- a/docs/specifications/visualization.md
+++ b/docs/specifications/visualization.md
@@ -1,14 +1,14 @@
-# Visualization
+## Visualization
 
 ![logo](https://drive.google.com/uc?export=view&id=1rstqMPJXFJd9iD46z1A5Rus-W0Ww6O8i)
 
 
-# Introduction & Scope
+### Introduction & Scope
 
 Broad requirements for human-viewable display of a single Flatland Environment.
 
 
-## Context
+#### Context
 
 Shows this software component in relation to some of the other components.  We name the component the "Renderer".  Multiple agents interact with a single Environment.  A renderer interacts with the environment, and displays on screen, and/or into movie or image files.
 
@@ -20,10 +20,10 @@ Shows this software component in relation to some of the other components.  We n
 ![drawing](https://docs.google.com/a/google.com/drawings/d/12345/export/png)
 
 
-# Requirements
+### Requirements
 
 
-## Primary Requirements
+#### Primary Requirements
 
 
 
@@ -39,7 +39,7 @@ Shows this software component in relation to some of the other components.  We n
     7. Should not drive the "main loop" of Inference or training 
 
 
-## Secondary / Optional Requirements 
+#### Secondary / Optional Requirements 
 
 
 
@@ -68,7 +68,7 @@ Shows this software component in relation to some of the other components.  We n
     15. Browser
 
 
-## Performance Metrics
+#### Performance Metrics
 
 Here are some performance metrics which the Renderer should meet.
 
@@ -78,7 +78,7 @@ Here are some performance metrics which the Renderer should meet.
    <td>
    </td>
    <td><p style="text-align: right">
-# Per second</p>
+### Per second</p>
 
    </td>
    <td><p style="text-align: right">
@@ -144,15 +144,15 @@ Prototype time (ms)</p>
 
 
 
-## Example Visualization
+#### Example Visualization
 
 
-# Reference Documents
+### Reference Documents
 
 Link to this doc: https://docs.google.com/document/d/1Y4Mw0Q6r8PEOvuOZMbxQX-pV2QKDuwbZJBvn18mo9UU/edit#
 
 
-## Core Specification
+#### Core Specification
 
 This specifies the system containing the environment and agents - this will be able to run independently of the renderer.
 
@@ -161,24 +161,24 @@ This specifies the system containing the environment and agents - this will be a
 The data structure which the renderer needs to read initially resides here.
 
 
-## Visualization Specification
+#### Visualization Specification
 
 This will specify the software which will meet the requirements documented here.
 
 [https://docs.google.com/document/d/1XYOe_aUIpl1h_RdHnreACvevwNHAZWT0XHDL0HsfzRY/edit#](https://docs.google.com/document/d/1XYOe_aUIpl1h_RdHnreACvevwNHAZWT0XHDL0HsfzRY/edit#)
 
 
-## Interface Specification
+#### Interface Specification
 
 This will specify the interfaces through which the different components communicate
 
 
-# Non-requirements - to be deleted below here.
+### Non-requirements - to be deleted below here.
 
 The below has been copied into the spec doc.    Comments may be lost.  I'm only preserving it to save the comments for a few days - they don't cut & paste into the other doc!
 
 
-## Interface with Environment Component
+#### Interface with Environment Component
 
 
 
@@ -201,7 +201,7 @@ The below has been copied into the spec doc.    Comments may be lost.  I'm only
         *   Render frames in separate process / thread
 
 
-#### Environment Snapshot
+###### Environment Snapshot
 
 **Data Structure**
 
@@ -227,7 +227,7 @@ Top-level dictionary
             *   TBD
 
 
-## Investigation into Existing Tools / Libraries
+#### Investigation into Existing Tools / Libraries
 
 
 
@@ -252,9 +252,9 @@ Top-level dictionary
 To-be-filled
 
 
-## Technical Graphics Considerations
+#### Technical Graphics Considerations
 
 
-#### Overlay dynamic primitives over the background at each time step.
+###### Overlay dynamic primitives over the background at each time step.
 
 No point trying to figure out changes. Need to explicitly draw every primitive anyways (that's how these renders work).
diff --git a/docs/specifications_index.rst b/docs/specifications_index.rst
deleted file mode 100644
index ce42dad5f3885bb425ed2cde88e0ea5dfb3eb358..0000000000000000000000000000000000000000
--- a/docs/specifications_index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-Flatland Specs
-==============
-
-.. toctree::
-   :maxdepth: 2
-
-   specifications/specifications.md
-   specifications/core.md
-   specifications/railway.md
-   specifications/rendering.md
-   specifications/specifications.md
-   specifications/visualization.md
diff --git a/docs/gettingstarted.rst b/docs/tutorials/01_gettingstarted.rst
similarity index 72%
rename from docs/gettingstarted.rst
rename to docs/tutorials/01_gettingstarted.rst
index 8bde9adfa41089ab932a2d753befdc16b0a5d7ec..9ca370a0e1ed6a698d3e37111c70b97ac0ad2aa8 100644
--- a/docs/gettingstarted.rst
+++ b/docs/tutorials/01_gettingstarted.rst
@@ -1,6 +1,5 @@
-===============
-Getting Started
-===============
+Getting Started Tutorial
+========================
 
 Overview
 --------
@@ -16,9 +15,8 @@ To use flatland in a project:
     import flatland
 
 
-Part 1 : Basic Usage
---------------------
-
+Simple Example 1 : Basic Usage
+------------------------------
 The basic usage of RailEnv environments consists in creating a RailEnv object
 endowed with a rail generator, that generates new rail networks on each reset,
 and an observation generator object, that is supplied with environment-specific
@@ -120,7 +118,8 @@ The complete code for this part of the Getting Started guide can be found in
 
 
 Part 2 : Training a Simple an Agent on Flatland
------------------------------------------------
+---------------------------------------------------------
+
 This is a brief tutorial on how to train an agent on Flatland.
 Here we use a simple random agent to illustrate the process on how to interact with the environment.
 The corresponding code can be found in examples/training_example.py and in the baselines repository
@@ -187,77 +186,4 @@ This dictionary is then passed to the environment which checks the validity of a
 The environment returns an array of new observations, reward dictionary for all agents as well as a flag for which agents are done.
 This information can be used to update the policy of your agent and if done['__all__'] == True the episode terminates.
 
-Part 3 : Customizing Observations and Level Generators
-------------------------------------------------------
-
-Example code for generating custom observations given a RailEnv and to generate
-random rail maps are available in examples/custom_observation_example.py and
-examples/custom_railmap_example.py .
-
-Custom observations can be produced by deriving a new object from the
-core.env_observation_builder.ObservationBuilder base class, for example as follows:
-
-.. code-block:: python
-
-    class CustomObs(ObservationBuilder):
-        def __init__(self):
-            self.observation_space = [5]
-
-        def reset(self):
-            return
-
-        def get(self, handle):
-            observation = handle*np.ones((5,))
-            return observation
-
-It is important that an observation_space is defined with a list of dimensions
-of the returned observation tensors. get() returns the observation for each agent,
-of handle 'handle'.
-
-A RailEnv environment can then be created as usual:
-
-.. code-block:: python
-
-    env = RailEnv(width=7,
-                  height=7,
-                  rail_generator=random_rail_generator(),
-                  number_of_agents=3,
-                  obs_builder_object=CustomObs())
-
-As for generating custom rail maps, the RailEnv class accepts a rail_generator
-argument that must be a function with arguments `width`, `height`, `num_agents`,
-and `num_resets=0`, and that has to return a GridTransitionMap object (the rail map),
-and three lists of tuples containing the (row,column) coordinates of each of
-num_agent agents, their initial orientation **(0=North, 1=East, 2=South, 3=West)**,
-and the position of their targets.
-
-For example, the following custom rail map generator returns an empty map of
-size (height, width), with no agents (regardless of num_agents):
-
-.. code-block:: python
-
-    def custom_rail_generator():
-        def generator(width, height, num_agents=0, num_resets=0):
-            rail_trans = RailEnvTransitions()
-            grid_map = GridTransitionMap(width=width, height=height, transitions=rail_trans)
-            rail_array = grid_map.grid
-            rail_array.fill(0)
-
-            agents_positions = []
-            agents_direction = []
-            agents_target = []
-
-            return grid_map, agents_positions, agents_direction, agents_target
-        return generator
-
-It is worth to note that helpful utilities to manage RailEnv environments and their
-related data structures are available in 'envs.env_utils'. In particular,
-envs.env_utils.get_rnd_agents_pos_tgt_dir_on_rail is fairly handy to fill in
-random (but consistent) agents along with their targets and initial directions,
-given a rail map (GridTransitionMap object) and the desired number of agents:
-
-.. code-block:: python
-
-    agents_position, agents_direction, agents_target = get_rnd_agents_pos_tgt_dir_on_rail(
-        rail_map,
-        num_agents)
+The full source code of this example can be found in `examples/training_example.py <https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/training_example.py>`_.
diff --git a/docs/intro_observationbuilder.rst b/docs/tutorials/02_observationbuilder.rst
similarity index 99%
rename from docs/intro_observationbuilder.rst
rename to docs/tutorials/02_observationbuilder.rst
index 50d65fd4771e5a5362aa7250e6f50c9f884299f4..fd5decae31139a883289155131d71fd6870e0a74 100644
--- a/docs/intro_observationbuilder.rst
+++ b/docs/tutorials/02_observationbuilder.rst
@@ -1,6 +1,5 @@
-==============================================================
-Getting Started with custom observations and custom predictors
-==============================================================
+Custom observations and custom predictors Tutorial
+==================================================
 
 Overview
 --------
diff --git a/docs/tutorials/03_rail_and_schedule_generator.md b/docs/tutorials/03_rail_and_schedule_generator.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a236a6d507d6b743430ef34abbb6c0456dd0d7c
--- /dev/null
+++ b/docs/tutorials/03_rail_and_schedule_generator.md
@@ -0,0 +1,72 @@
+# Level Generation Tutorial
+
+We are currently working on different new level generators and you can expect that the levels in the submission testing will not all come from just one but rather different level generators to be sure that the controllers can handle any railway specific challenge.
+
+Let's have a look at the `sparse_rail_generator`.
+
+## Sparse Rail Generator
+![Example_Sparse](https://i.imgur.com/DP8sIyx.png)
+
+The idea behind the sparse rail generator is to mimic classic railway structures where dense nodes (cities) are sparsely connected to each other and where you have to manage traffic flow between the nodes efficiently. 
+The cities in this level generator are much simplified in comparison to real city networks but it mimics parts of the problems faced in daily operations of any railway company.
+
+There are a few parameters you can tune to build your own map and test different complexity levels of the levels. 
+**Warning** some combinations of parameters do not go well together and will lead to infeasible level generation. 
+In the worst case, the level generator currently issues a warning when it cannot build the environment according to the parameters provided. 
+This will lead to a crash of the whole env. 
+We are currently working on improvements here and are **happy for any suggestions from your side**.
+
+To build an environment you instantiate a `RailEnv` as follows:
+
+```python
+ Initialize the generator
+rail_generator=sparse_rail_generator(
+    num_cities=10,  # Number of cities in map
+    num_intersections=10,  # Number of interesections in map
+    num_trainstations=50,  # Number of possible start/targets on map
+    min_node_dist=6,  # Minimal distance of nodes
+    node_radius=3,  # Proximity of stations to city center
+    num_neighb=3,  # Number of connections to other cities
+    seed=5,  # Random seed
+    grid_mode=False  # Ordered distribution of nodes
+)
+
+ Build the environment
+env = RailEnv(
+    width=50,
+    height=50,
+    rail_generator=rail_generator
+    schedule_generator=sparse_schedule_generator(),
+    number_of_agents=10,
+    obs_builder_object=TreeObsForRailEnv(max_depth=3,predictor=shortest_path_predictor)
+)
+```
+
+You can see that you now need both a `rail_generator` and a `schedule_generator` to generate a level. These need to work nicely together. The `rail_generator` will only generate the railway infrastructure and provide hints to the `schedule_generator` about where to place agents. The `schedule_generator` will then generate a schedule, meaning it places agents at different train stations and gives them tasks by providing individual targets.
+
+You can tune the following parameters in the `sparse_rail_generator`:
+
+- `num_cities` is the number of cities on a map. Cities are the only nodes that can host start and end points for agent tasks (Train stations). Here you have to be carefull that the number is not too high as all the cities have to fit on the map. When `grid_mode=False` you have to be carefull when chosing `min_node_dist` because leves will fails if not all cities (and intersections) can be placed with at least `min_node_dist` between them.
+- `num_intersections` is the number of nodes that don't hold any trainstations. They are also the first priority that a city connects to. We use these to allow for sparse connections between cities.
+- `num_trainstations` defines the *Total* number of trainstations in the network. This also sets the max number of allowed agents in the environment. This is also a delicate parameter as there is only a limitid amount of space available around nodes and thus if the number is too high the level generation will fail. *Important*: Only the number of agents provided to the environment will actually produce active train stations. The others will just be present as dead-ends (See figures below).
+- `min_node_dist` is only used if `grid_mode=False` and represents the minimal distance between two nodes.
+- `node_radius` defines the extent of a city. Each trainstation is placed at a distance to the closes city node that is smaller or equal to this number.
+- `num_neighb`defines the number of neighbouring nodes that connect to each other. Thus this changes the connectivity and thus the amount of alternative routes in the network.
+- `grid_mode` True -> Nodes evenly distriubted in env, False-> Random distribution of nodes
+- `enhance_intersection`: True -> Extra rail elements added at intersections
+- `seed` is used to initialize the random generator
+
+
+If you run into any bugs with sets of parameters please let us know.
+
+Here is a network with `grid_mode=False` and the parameters from above.
+
+![sparse_random](https://i.imgur.com/Xg7nifF.png)
+
+and here with `grid_mode=True`
+
+![sparse_ordered](https://i.imgur.com/jyA7Pt4.png)
+
+## Example code
+
+To see all the changes in action you can just run the `flatland_example_2_0.py` file in the examples folder. The file can be found [here](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/flatland_2_0_example.py).
diff --git a/docs/tutorials/04_stochasticity.md b/docs/tutorials/04_stochasticity.md
new file mode 100644
index 0000000000000000000000000000000000000000..201e359b923c2b91d0b125f24011c8abfcf67fb2
--- /dev/null
+++ b/docs/tutorials/04_stochasticity.md
@@ -0,0 +1,74 @@
+# Stochasticity Tutorial
+
+Another area where we improved **Flat**land 2.0 are stochastic events added during the episodes. 
+This is very common for railway networks where the initial plan usually needs to be rescheduled during operations as minor events such as delayed departure from trainstations, malfunctions on trains or infrastructure or just the weather lead to delayed trains.
+
+We implemted a poisson process to simulate delays by stopping agents at random times for random durations. The parameters necessary for the stochastic events can be provided when creating the environment.
+
+```python
+# Use a the malfunction generator to break agents from time to time
+
+stochastic_data = {
+    'prop_malfunction': 0.5,  # Percentage of defective agents
+    'malfunction_rate': 30,  # Rate of malfunction occurence
+    'min_duration': 3,  # Minimal duration of malfunction
+    'max_duration': 10  # Max duration of malfunction
+}
+```
+
+The parameters are as follows:
+
+- `prop_malfunction` is the proportion of agents that can malfunction. `1.0` means that each agent can break.
+- `malfunction_rate` is the mean rate of the poisson process in number of environment steps.
+- `min_duration` and `max_duration` set the range of malfunction durations. They are sampled uniformly
+
+You can introduce stochasticity by simply creating the env as follows:
+
+```python
+env = RailEnv(
+    ...
+    stochastic_data=stochastic_data,  # Malfunction data generator
+    ...    
+)
+```
+In your controller, you can check whether an agent is malfunctioning: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+action_dict = dict()
+for a in range(env.get_num_agents()):
+    if info['malfunction'][a] == 0:
+        action_dict.update({a: ...})
+
+# Custom observation builder
+tree_observation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+
+# Different agent types (trains) with different speeds.
+speed_ration_map = {1.: 0.25,  # Fast passenger train
+                    1. / 2.: 0.25,  # Fast freight train
+                    1. / 3.: 0.25,  # Slow commuter train
+                    1. / 4.: 0.25}  # Slow freight train
+
+env = RailEnv(width=50,
+              height=50,
+              rail_generator=sparse_rail_generator(num_cities=20,  # Number of cities in map (where train stations are)
+                                                   num_intersections=5,  # Number of intersections (no start / target)
+                                                   num_trainstations=15,  # Number of possible start/targets on map
+                                                   min_node_dist=3,  # Minimal distance of nodes
+                                                   node_radius=2,  # Proximity of stations to city center
+                                                   num_neighb=4,  # Number of connections to other cities/intersections
+                                                   seed=15,  # Random seed
+                                                   grid_mode=True,
+                                                   enhance_intersection=True
+                                                   ),
+              schedule_generator=sparse_schedule_generator(speed_ration_map),
+              number_of_agents=10,
+              stochastic_data=stochastic_data,  # Malfunction data generator
+              obs_builder_object=tree_observation)
+```
+
+You will quickly realize that this will lead to unforeseen difficulties which means that **your controller** needs to observe the environment at all times to be able to react to the stochastic events.
+
+## Example code
+
+To see all the changes in action you can just run the `flatland_example_2_0.py` file in the examples folder. The file can be found [here](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/flatland_2_0_example.py).
diff --git a/docs/tutorials/05_multispeed.md b/docs/tutorials/05_multispeed.md
new file mode 100644
index 0000000000000000000000000000000000000000..99db7ee6a275fe317c891cdebfc8d6aaf99d0b8c
--- /dev/null
+++ b/docs/tutorials/05_multispeed.md
@@ -0,0 +1,128 @@
+# Different speed profiles Tutorial
+
+One of the main contributions to the complexity of railway network operations stems from the fact that all trains travel at different speeds while sharing a very limited railway network. 
+In **Flat**land 2.0 this feature will be enabled as well and will lead to much more complex configurations. Here we count on your support if you find bugs or improvements  :).
+
+The different speed profiles can be generated using the `schedule_generator`, where you can actually chose as many different speeds as you like. 
+Keep in mind that the *fastest speed* is 1 and all slower speeds must be between 1 and 0. 
+For the submission scoring you can assume that there will be no more than 5 speed profiles.
+
+
+ 
+Later versions of **Flat**land might have varying speeds during episodes. Therefore, we return the agent speeds. 
+Notice that we do not guarantee that the speed will be computed at each step, but if not costly we will return it at each step.
+In your controller, you can get the agents' speed from the `info` returned by `step`: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+for a in range(env.get_num_agents()):
+    speed = info['speed'][a]
+```
+
+## Actions and observation with different speed levels
+
+Because the different speeds are implemented as fractions the agents ability to perform actions has been updated. 
+We **do not allow actions to change within the cell **. 
+This means that each agent can only chose an action to be taken when entering a cell. 
+This action is then executed when a step to the next cell is valid. For example
+
+- Agent enters switch and choses to deviate left. Agent fractional speed is 1/4 and thus the agent will take 4 time steps to complete its journey through the cell. On the 4th time step the agent will leave the cell deviating left as chosen at the entry of the cell.
+    - All actions chosen by the agent during its travels within a cell are ignored
+    - Agents can make observations at any time step. Make sure to discard observations without any information. See this [example](https://gitlab.aicrowd.com/flatland/baselines/blob/master/torch_training/training_navigation.py) for a simple implementation.
+- The environment checks if agent is allowed to move to next cell only at the time of the switch to the next cell
+
+In your controller, you can check whether an agent requires an action by checking `info`: 
+```python
+obs, rew, done, info = env.step(actions) 
+...
+action_dict = dict()
+for a in range(env.get_num_agents()):
+    if info['action_required'][a] and info['malfunction'][a] == 0:
+        action_dict.update({a: ...})
+
+```
+Notice that `info['action_required'][a]` does not mean that the action will have an effect: 
+if the next cell is blocked or the agent breaks down, the action cannot be performed and an action will be required again in the next step. 
+
+## Rail Generators and Schedule Generators
+The separation between rail generator and schedule generator reflects the organisational separation in the railway domain
+- Infrastructure Manager (IM): is responsible for the layout and maintenance of tracks
+- Railway Undertaking (RU): operates trains on the infrastructure
+Usually, there is a third organisation, which ensures discrimination-free access to the infrastructure for concurrent requests for the infrastructure in a **schedule planning phase**.
+However, in the **Flat**land challenge, we focus on the re-scheduling problem during live operations.
+
+Technically, 
+```python
+RailGeneratorProduct = Tuple[GridTransitionMap, Optional[Any]]
+RailGenerator = Callable[[int, int, int, int], RailGeneratorProduct]
+
+AgentPosition = Tuple[int, int]
+ScheduleGeneratorProduct = Tuple[List[AgentPosition], List[AgentPosition], List[AgentPosition], List[float]]
+ScheduleGenerator = Callable[[GridTransitionMap, int, Optional[Any]], ScheduleGeneratorProduct]
+```
+
+We can then produce `RailGenerator`s by currying:
+```python
+def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2, min_node_dist=20, node_radius=2,
+                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0):
+
+    def generator(width, height, num_agents, num_resets=0):
+    
+        # generate the grid and (optionally) some hints for the schedule_generator
+        ...
+         
+        return grid_map, {'agents_hints': {
+            'num_agents': num_agents,
+            'agent_start_targets_nodes': agent_start_targets_nodes,
+            'train_stations': train_stations
+        }}
+
+    return generator
+```
+And, similarly, `ScheduleGenerator`s:
+```python
+def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None) -> ScheduleGenerator:
+    def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
+        # place agents:
+        # - initial position
+        # - initial direction
+        # - (initial) speed
+        # - malfunction
+        ...
+                
+        return agents_position, agents_direction, agents_target, speeds, agents_malfunction
+
+    return generator
+```
+Notice that the `rail_generator` may pass `agents_hints` to the  `schedule_generator` which the latter may interpret.
+For instance, the way the `sparse_rail_generator` generates the grid, it already determines the agent's goal and target.
+Hence, `rail_generator` and `schedule_generator` have to match if `schedule_generator` presupposes some specific `agents_hints`.
+
+The environment's `reset` takes care of applying the two generators:
+```python
+    def __init__(self,
+            ...
+             rail_generator: RailGenerator = random_rail_generator(),
+             schedule_generator: ScheduleGenerator = random_schedule_generator(),
+             ...
+             ):
+        self.rail_generator: RailGenerator = rail_generator
+        self.schedule_generator: ScheduleGenerator = schedule_generator
+        
+    def reset(self, regen_rail=True, replace_agents=True):
+        rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)
+
+        ...
+
+        if replace_agents:
+            agents_hints = None
+            if optionals and 'agents_hints' in optionals:
+                agents_hints = optionals['agents_hints']
+            self.agents_static = EnvAgentStatic.from_lists(
+                *self.schedule_generator(self.rail, self.get_num_agents(), hints=agents_hints))
+```
+
+
+## Example code
+
+To see all the changes in action you can just run the `flatland_example_2_0.py` file in the examples folder. The file can be found [here](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/flatland_2_0_example.py).
diff --git a/env_data/tests/test_002.pkl b/env_data/tests/test_002.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e46a5c088ef555109c352b7a4d5eaf8dfbaf8700
Binary files /dev/null and b/env_data/tests/test_002.pkl differ
diff --git a/flatland/core/grid/grid4.py b/flatland/core/grid/grid4.py
index 2d7284dc74cf2411ee2f0466d0d653bffb2132dc..819fc8da9102b688138cb06f59833e718188a411 100644
--- a/flatland/core/grid/grid4.py
+++ b/flatland/core/grid/grid4.py
@@ -1,5 +1,5 @@
 from enum import IntEnum
-from typing import Type
+from typing import Type, List
 
 import numpy as np
 
@@ -238,5 +238,6 @@ class Grid4Transitions(Transitions):
         cell_transition &= cell_transition & (~self.maskDeadEnds) & 0xffff
         return cell_transition
 
-    def get_entry_directions(self, cell_transition):
+    @staticmethod
+    def get_entry_directions(cell_transition) -> List[int]:
         return [(cell_transition >> ((3 - orientation) * 4)) & 15 > 0 for orientation in range(4)]
diff --git a/flatland/core/grid/grid_utils.py b/flatland/core/grid/grid_utils.py
index fe4a381fb8458aa61bd54be9f2ba105297400402..6004c53567f46feb30e283f0b9dbd946ee8f1375 100644
--- a/flatland/core/grid/grid_utils.py
+++ b/flatland/core/grid/grid_utils.py
@@ -23,9 +23,9 @@ class Vec2dOperations:
 
         :param node_a: tuple with coordinate (x,y) or 2d vector
         :param node_b: tuple with coordinate (x,y) or 2d vector
+
         :return:
-            -------
-        check if node_a and nobe_b are equal
+            check if node_a and nobe_b are equal
         """
         return node_a[0] == node_b[0] and node_a[1] == node_b[1]
 
@@ -36,9 +36,9 @@ class Vec2dOperations:
 
         :param node_a: tuple with coordinate (x,y) or 2d vector
         :param node_b: tuple with coordinate (x,y) or 2d vector
+
         :return:
-            -------
-        tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
         """
         return node_a[0] - node_b[0], node_a[1] - node_b[1]
 
@@ -49,9 +49,8 @@ class Vec2dOperations:
 
         :param node_a: tuple with coordinate (x,y) or 2d vector
         :param node_b: tuple with coordinate (x,y) or 2d vector
-        :return:
-            -------
-        tuple with coordinate (x,y) or 2d vector
+
+        :return: tuple with coordinate (x,y) or 2d vector
         """
         return node_a[0] + node_b[0], node_a[1] + node_b[1]
 
@@ -61,9 +60,8 @@ class Vec2dOperations:
         vector operation : rotates the 2D vector +90Â°
 
         :param node: tuple with coordinate (x,y) or 2d vector
-        :return:
-            -------
-        tuple with coordinate (x,y) or 2d vector
+
+        :return: tuple with coordinate (x,y) or 2d vector
         """
         return node[1], -node[0]
 
@@ -74,9 +72,9 @@ class Vec2dOperations:
         [see: https://lyfat.wordpress.com/2012/05/22/euclidean-vs-chebyshev-vs-manhattan-distance/]
 
         :param node: tuple with coordinate (x,y) or 2d vector
+
         :return:
-            -------
-        tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
         """
         return np.sqrt(node[0] * node[0] + node[1] * node[1])
 
@@ -126,7 +124,7 @@ class Vec2dOperations:
         calculates the chebyshev norm of the 2d vector
         [see: https://lyfat.wordpress.com/2012/05/22/euclidean-vs-chebyshev-vs-manhattan-distance/]
 
-        :Parameters
+        Parameters
         ----------
         node_a
             tuple with coordinate (x,y) or 2d vector
@@ -144,12 +142,11 @@ class Vec2dOperations:
     @staticmethod
     def normalize(node: Vector2D) -> Tuple[float, float]:
         """
-        normalize the 2d vector = v/|v|
+        normalize the 2d vector = `v/|v|`
 
         :param node: tuple with coordinate (x,y) or 2d vector
-        :return:
-            -------
-        tuple with coordinate (x,y) or 2d vector
+
+        :return: tuple with coordinate (x,y) or 2d vector
         """
         n = Vec2dOperations.get_norm(node)
         if n > 0.0:
@@ -163,9 +160,8 @@ class Vec2dOperations:
 
          :param node: tuple with coordinate (x,y) or 2d vector
          :param scale: scalar to scale
-         :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+
+         :return: tuple with coordinate (x,y) or 2d vector
          """
         return node[0] * scale, node[1] * scale
 
@@ -175,9 +171,8 @@ class Vec2dOperations:
          rounds the x and y coordinate and convert them to an integer values
 
          :param node: tuple with coordinate (x,y) or 2d vector
-         :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+
+         :return: tuple with coordinate (x,y) or 2d vector
          """
         return int(np.round(node[0])), int(np.round(node[1]))
 
@@ -187,9 +182,9 @@ class Vec2dOperations:
          ceiling the x and y coordinate and convert them to an integer values
 
          :param node: tuple with coordinate (x,y) or 2d vector
+
          :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
          """
         return int(np.ceil(node[0])), int(np.ceil(node[1]))
 
@@ -199,9 +194,9 @@ class Vec2dOperations:
          floor the x and y coordinate and convert them to an integer values
 
          :param node: tuple with coordinate (x,y) or 2d vector
+
          :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
          """
         return int(np.floor(node[0])), int(np.floor(node[1]))
 
@@ -213,9 +208,9 @@ class Vec2dOperations:
          :param node: tuple with coordinate (x,y) or 2d vector
          :param min_value: scalar value
          :param max_value: scalar value
+
          :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
          """
         return max(min_value, min(max_value, node[0])), max(min_value, min(max_value, node[1]))
 
@@ -226,9 +221,9 @@ class Vec2dOperations:
 
          :param node: tuple with coordinate (x,y) or 2d vector
          :param rot_in_degree:  angle in degree
+
          :return:
-             -------
-         tuple with coordinate (x,y) or 2d vector
+            tuple with coordinate (x,y) or 2d vector
          """
         alpha = rot_in_degree / 180.0 * np.pi
         x0 = node[0]
diff --git a/flatland/core/transition_map.py b/flatland/core/transition_map.py
index 07678add5549c3ac13df876132ed3bcdbf5bec5e..9db7f3c7775a01824a849d8dc126fbbb3955d212 100644
--- a/flatland/core/transition_map.py
+++ b/flatland/core/transition_map.py
@@ -14,7 +14,7 @@ from flatland.core.grid.rail_env_grid import RailEnvTransitions
 from flatland.core.transitions import Transitions
 from flatland.utils.ordered_set import OrderedSet
 
-
+# TODO are these general classes or for grid4 only?
 class TransitionMap:
     """
     Base TransitionMap class.
diff --git a/flatland/envs/distance_map.py b/flatland/envs/distance_map.py
index c9c6b00375ef4577880e2b8c98c2ff9dc946a7fa..22721407f059ff2e02d907110cb9f982aa9d599e 100644
--- a/flatland/envs/distance_map.py
+++ b/flatland/envs/distance_map.py
@@ -18,27 +18,21 @@ class DistanceMap:
         self.agents: List[EnvAgent] = agents
         self.rail: Optional[GridTransitionMap] = None
 
-    """
-    Set the distance map
-    """
     def set(self, distance_map: np.ndarray):
+        """
+        Set the distance map
+        """
         self.distance_map = distance_map
 
-    """
-    Get the distance map
-    """
     def get(self) -> np.ndarray:
-
+        """
+        Get the distance map
+        """
         if self.reset_was_called:
             self.reset_was_called = False
 
             nb_agents = len(self.agents)
             compute_distance_map = True
-            if self.agents_previous_computation is not None and nb_agents == len(self.agents_previous_computation):
-                compute_distance_map = False
-                for i in range(nb_agents):
-                    if self.agents[i].target != self.agents_previous_computation[i].target:
-                        compute_distance_map = True
             # Don't compute the distance map if it was loaded
             if self.agents_previous_computation is None and self.distance_map is not None:
                 compute_distance_map = False
@@ -51,12 +45,12 @@ class DistanceMap:
 
         return self.distance_map
 
-    """
-    Reset the distance map
-    """
     def reset(self, agents: List[EnvAgent], rail: GridTransitionMap):
+        """
+        Reset the distance map
+        """
         self.reset_was_called = True
-        self.agents = agents
+        self.agents: List[EnvAgent] = agents
         self.rail = rail
         self.env_height = rail.height
         self.env_width = rail.width
@@ -110,7 +104,8 @@ class DistanceMap:
 
         return max_distance
 
-    def _get_and_update_neighbors(self, rail: GridTransitionMap, position, target_nr, current_distance, enforce_target_direction=-1):
+    def _get_and_update_neighbors(self, rail: GridTransitionMap, position, target_nr, current_distance,
+                                  enforce_target_direction=-1):
         """
         Utility function used by _distance_map_walker to perform a BFS walk over the rail, filling in the
         minimum distances from each target cell.
@@ -134,8 +129,7 @@ class DistanceMap:
                 for agent_orientation in range(4):
                     # Is a transition along movement `desired_movement_from_new_cell' to the current cell possible?
                     is_valid = rail.get_transition((new_cell[0], new_cell[1], agent_orientation),
-                                                            desired_movement_from_new_cell)
-                    # is_valid = True
+                                                   desired_movement_from_new_cell)
 
                     if is_valid:
                         """
diff --git a/flatland/envs/predictions.py b/flatland/envs/predictions.py
index 77707b9f110376ddf2638b830830ff1a1c1edbf6..76095a2a2e1d9532951600118c6a777612641101 100644
--- a/flatland/envs/predictions.py
+++ b/flatland/envs/predictions.py
@@ -5,8 +5,9 @@ Collection of environment-specific PredictionBuilder.
 import numpy as np
 
 from flatland.core.env_prediction_builder import PredictionBuilder
-from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.distance_map import DistanceMap
 from flatland.envs.rail_env import RailEnvActions
+from flatland.envs.rail_env_shortest_paths import get_shortest_paths
 from flatland.utils.ordered_set import OrderedSet
 
 
@@ -59,7 +60,7 @@ class DummyPredictorForRailEnv(PredictionBuilder):
 
                     continue
                 for action in action_priorities:
-                    cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \
+                    cell_is_free, new_cell_isValid, new_direction, new_position, transition_isValid = \
                         self.env._check_action_on_agent(action, agent)
                     if all([new_cell_isValid, transition_isValid]):
                         # move and change direction to face the new_direction that was
@@ -92,6 +93,9 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
         """
         Called whenever get_many in the observation build is called.
         Requires distance_map to extract the shortest path.
+        Does not take into account future positions of other agents!
+
+        If there is no shortest path, the agent just stands still and stops moving.
 
         Parameters
         ----------
@@ -106,14 +110,15 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
             - position axis 0
             - position axis 1
             - direction
-            - action taken to come here
+            - action taken to come here (not implemented yet)
             The prediction at 0 is the current position, direction etc.
         """
         agents = self.env.agents
         if handle:
             agents = [self.env.agents[handle]]
-        distance_map = self.env.distance_map
-        assert distance_map is not None
+        distance_map: DistanceMap = self.env.distance_map
+
+        shortest_paths = get_shortest_paths(distance_map, max_depth=self.max_depth)
 
         prediction_dict = {}
         for agent in agents:
@@ -123,52 +128,35 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
             times_per_cell = int(np.reciprocal(agent_speed))
             prediction = np.zeros(shape=(self.max_depth + 1, 5))
             prediction[0] = [0, *_agent_initial_position, _agent_initial_direction, 0]
+
+            shortest_path = shortest_paths[agent.handle]
+
+            # if there is a shortest path, remove the initial position
+            if shortest_path:
+                shortest_path = shortest_path[1:]
+
             new_direction = _agent_initial_direction
             new_position = _agent_initial_position
             visited = OrderedSet()
             for index in range(1, self.max_depth + 1):
-                # if we're at the target, stop moving...
-                if agent.position == agent.target:
-                    prediction[index] = [index, *agent.target, agent.direction, RailEnvActions.STOP_MOVING]
-                    visited.add((agent.position[0], agent.position[1], agent.direction))
-                    continue
-                if not agent.moving:
-                    prediction[index] = [index, *agent.position, agent.direction, RailEnvActions.STOP_MOVING]
-                    visited.add((agent.position[0], agent.position[1], agent.direction))
+                # if we're at the target or not moving, stop moving until max_depth is reached
+                if new_position == agent.target or not agent.moving or not shortest_path:
+                    prediction[index] = [index, *new_position, new_direction, RailEnvActions.STOP_MOVING]
+                    visited.add((*new_position, agent.direction))
                     continue
-                # Take shortest possible path
-                cell_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
-
-                if np.sum(cell_transitions) == 1 and index % times_per_cell == 0:
-                    new_direction = np.argmax(cell_transitions)
-                    new_position = get_new_position(agent.position, new_direction)
-                elif np.sum(cell_transitions) > 1 and index % times_per_cell == 0:
-                    min_dist = np.inf
-                    no_dist_found = True
-                    for direction in range(4):
-                        if cell_transitions[direction] == 1:
-                            neighbour_cell = get_new_position(agent.position, direction)
-                            target_dist = distance_map.get()[agent.handle, neighbour_cell[0], neighbour_cell[1], direction]
-                            if target_dist < min_dist or no_dist_found:
-                                min_dist = target_dist
-                                new_direction = direction
-                                no_dist_found = False
-                    new_position = get_new_position(agent.position, new_direction)
-                elif index % times_per_cell == 0:
-                    raise Exception("No transition possible {}".format(cell_transitions))
-
-                # update the agent's position and direction
-                agent.position = new_position
-                agent.direction = new_direction
+
+                if index % times_per_cell == 0:
+                    new_position = shortest_path[0].position
+                    new_direction = shortest_path[0].direction
+
+                    shortest_path = shortest_path[1:]
 
                 # prediction is ready
                 prediction[index] = [index, *new_position, new_direction, 0]
-                visited.add((new_position[0], new_position[1], new_direction))
+                visited.add((*new_position, new_direction))
+
+            # TODO: very bady side effects for visualization only: hand the dev_pred_dict back instead of setting on env!
             self.env.dev_pred_dict[agent.handle] = visited
             prediction_dict[agent.handle] = prediction
 
-            # cleanup: reset initial position
-            agent.position = _agent_initial_position
-            agent.direction = _agent_initial_direction
-
         return prediction_dict
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index d0add3086014c7ad07c29e01588cba380c26cbd7..6e021183a77642c8547c5efc3a8c97764fa078d0 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -4,7 +4,7 @@ Definition of the RailEnv environment.
 # TODO:  _ this is a global method --> utils or remove later
 import warnings
 from enum import IntEnum
-from typing import List, Set, NamedTuple, Optional, Tuple, Dict
+from typing import List, NamedTuple, Optional, Tuple, Dict
 
 import msgpack
 import msgpack_numpy as m
@@ -12,7 +12,7 @@ import numpy as np
 
 from flatland.core.env import Environment
 from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.grid.grid4 import Grid4TransitionsEnum
+from flatland.core.grid.grid4 import Grid4TransitionsEnum, Grid4Transitions
 from flatland.core.grid.grid4_utils import get_new_position
 from flatland.core.transition_map import GridTransitionMap
 from flatland.envs.agent_utils import EnvAgentStatic, EnvAgent
@@ -20,7 +20,6 @@ from flatland.envs.distance_map import DistanceMap
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.rail_generators import random_rail_generator, RailGenerator
 from flatland.envs.schedule_generators import random_schedule_generator, ScheduleGenerator
-from flatland.utils.ordered_set import OrderedSet
 
 m.patch()
 
@@ -241,9 +240,6 @@ class RailEnv(Environment):
         #  can we not put 'self.rail_generator(..)' into 'if regen_rail or self.rail is None' condition?
         rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)
 
-        if optionals and 'distance_map' in optionals:
-            self.distance_map.set(optionals['distance_map'])
-
         if regen_rail or self.rail is None:
             self.rail = rail
             self.height, self.width = self.rail.grid.shape
@@ -253,6 +249,11 @@ class RailEnv(Environment):
                     check = self.rail.cell_neighbours_valid(rc_pos, True)
                     if not check:
                         warnings.warn("Invalid grid at {} -> {}".format(rc_pos, check))
+        # TODO https://gitlab.aicrowd.com/flatland/flatland/issues/172
+        #  hacky: we must re-compute the distance map and not use the initial distance_map loaded from file by
+        #  rail_from_file!!!
+        elif optionals and 'distance_map' in optionals:
+            self.distance_map.set(optionals['distance_map'])
 
         if replace_agents:
             agents_hints = None
@@ -587,64 +588,13 @@ class RailEnv(Environment):
             transition_valid = True
         return new_direction, transition_valid
 
-    @staticmethod
-    def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum,
-                                agent_position: Tuple[int, int],
-                                rail: GridTransitionMap) -> Set[RailEnvNextAction]:
-        """
-        Get the valid move actions (forward, left, right) for an agent.
-
-        Parameters
-        ----------
-        agent_direction : Grid4TransitionsEnum
-        agent_position: Tuple[int,int]
-        rail : GridTransitionMap
-
-
-        Returns
-        -------
-        Set of `RailEnvNextAction` (tuples of (action,position,direction))
-            Possible move actions (forward,left,right) and the next position/direction they lead to.
-            It is not checked that the next cell is free.
-        """
-        valid_actions: Set[RailEnvNextAction] = OrderedSet()
-        possible_transitions = rail.get_transitions(*agent_position, agent_direction)
-        num_transitions = np.count_nonzero(possible_transitions)
-        # Start from the current orientation, and see which transitions are available;
-        # organize them as [left, forward, right], relative to the current orientation
-        # If only one transition is possible, the forward branch is aligned with it.
-        if rail.is_dead_end(agent_position):
-            action = RailEnvActions.MOVE_FORWARD
-            exit_direction = (agent_direction + 2) % 4
-            if possible_transitions[exit_direction]:
-                new_position = get_new_position(agent_position, exit_direction)
-                valid_actions.add(RailEnvNextAction(action, new_position, exit_direction))
-        elif num_transitions == 1:
-            action = RailEnvActions.MOVE_FORWARD
-            for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
-                if possible_transitions[new_direction]:
-                    new_position = get_new_position(agent_position, new_direction)
-                    valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
-        else:
-            for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
-                if possible_transitions[new_direction]:
-                    if new_direction == agent_direction:
-                        action = RailEnvActions.MOVE_FORWARD
-                    elif new_direction == (agent_direction + 1) % 4:
-                        action = RailEnvActions.MOVE_RIGHT
-                    elif new_direction == (agent_direction - 1) % 4:
-                        action = RailEnvActions.MOVE_LEFT
-                    else:
-                        raise Exception("Illegal state")
-
-                    new_position = get_new_position(agent_position, new_direction)
-                    valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
-        return valid_actions
-
     def _get_observations(self):
         self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents())))
         return self.obs_dict
 
+    def get_valid_directions_on_grid(self, row: int, col: int) -> List[int]:
+        return Grid4Transitions.get_entry_directions(self.rail.get_full_transitions(row,col))
+
     def get_full_state_msg(self):
         grid_data = self.rail.grid.tolist()
         agent_static_data = [agent.to_list() for agent in self.agents_static]
diff --git a/flatland/envs/rail_env_shortest_paths.py b/flatland/envs/rail_env_shortest_paths.py
new file mode 100644
index 0000000000000000000000000000000000000000..793601d4d18ac38b729d15883089d5acbfc41ed3
--- /dev/null
+++ b/flatland/envs/rail_env_shortest_paths.py
@@ -0,0 +1,140 @@
+import math
+from typing import Dict, List, Optional, NamedTuple, Tuple, Set
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
+from flatland.core.grid.grid4_utils import get_new_position
+from flatland.core.transition_map import GridTransitionMap
+from flatland.envs.distance_map import DistanceMap
+from flatland.envs.rail_env import RailEnvNextAction, RailEnvActions
+from flatland.utils.ordered_set import OrderedSet
+
+WalkingElement = \
+    NamedTuple('WalkingElement',
+               [('position', Tuple[int, int]), ('direction', int), ('next_action_element', RailEnvNextAction)])
+
+
+def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum,
+                            agent_position: Tuple[int, int],
+                            rail: GridTransitionMap) -> Set[RailEnvNextAction]:
+    """
+    Get the valid move actions (forward, left, right) for an agent.
+
+    Parameters
+    ----------
+    agent_direction : Grid4TransitionsEnum
+    agent_position: Tuple[int,int]
+    rail : GridTransitionMap
+
+
+    Returns
+    -------
+    Set of `RailEnvNextAction` (tuples of (action,position,direction))
+        Possible move actions (forward,left,right) and the next position/direction they lead to.
+        It is not checked that the next cell is free.
+    """
+    valid_actions: Set[RailEnvNextAction] = OrderedSet()
+    possible_transitions = rail.get_transitions(*agent_position, agent_direction)
+    num_transitions = np.count_nonzero(possible_transitions)
+    # Start from the current orientation, and see which transitions are available;
+    # organize them as [left, forward, right], relative to the current orientation
+    # If only one transition is possible, the forward branch is aligned with it.
+    if rail.is_dead_end(agent_position):
+        action = RailEnvActions.MOVE_FORWARD
+        exit_direction = (agent_direction + 2) % 4
+        if possible_transitions[exit_direction]:
+            new_position = get_new_position(agent_position, exit_direction)
+            valid_actions.add(RailEnvNextAction(action, new_position, exit_direction))
+    elif num_transitions == 1:
+        action = RailEnvActions.MOVE_FORWARD
+        for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+            if possible_transitions[new_direction]:
+                new_position = get_new_position(agent_position, new_direction)
+                valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+    else:
+        for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+            if possible_transitions[new_direction]:
+                if new_direction == agent_direction:
+                    action = RailEnvActions.MOVE_FORWARD
+                elif new_direction == (agent_direction + 1) % 4:
+                    action = RailEnvActions.MOVE_RIGHT
+                elif new_direction == (agent_direction - 1) % 4:
+                    action = RailEnvActions.MOVE_LEFT
+                else:
+                    raise Exception("Illegal state")
+
+                new_position = get_new_position(agent_position, new_direction)
+                valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+    return valid_actions
+
+
+# N.B. get_shortest_paths is not part of distance_map since it refers to RailEnvActions (would lead to circularity!)
+def get_shortest_paths(distance_map: DistanceMap, max_depth: Optional[int] = None) \
+    -> Dict[int, Optional[List[WalkingElement]]]:
+    """
+    Computes the shortest path for each agent to its target and the action to be taken to do so.
+    The paths are derived from a `DistanceMap`.
+
+    If there is no path (rail disconnected), the path is given as None.
+    The agent state (moving or not) and its speed are not taken into account
+
+    Parameters
+    ----------
+    distance_map
+
+    Returns
+    -------
+        Dict[int, Optional[List[WalkingElement]]]
+
+    """
+    shortest_paths = dict()
+
+    def _shortest_path_for_agent(agent):
+        position = agent.position
+        direction = agent.direction
+        shortest_paths[agent.handle] = []
+        distance = math.inf
+        depth = 0
+        while (position != agent.target and (max_depth is None or depth < max_depth)):
+            next_actions = get_valid_move_actions_(direction, position, distance_map.rail)
+            best_next_action = None
+            for next_action in next_actions:
+                next_action_distance = distance_map.get()[
+                    agent.handle, next_action.next_position[0], next_action.next_position[
+                        1], next_action.next_direction]
+                if next_action_distance < distance:
+                    best_next_action = next_action
+                    distance = next_action_distance
+
+            shortest_paths[agent.handle].append(WalkingElement(position, direction, best_next_action))
+            depth += 1
+
+            # if there is no way to continue, the rail must be disconnected!
+            # (or distance map is incorrect)
+            if best_next_action is None:
+                shortest_paths[agent.handle] = None
+                return
+
+            position = best_next_action.next_position
+            direction = best_next_action.next_direction
+        if max_depth is None or depth < max_depth:
+            shortest_paths[agent.handle].append(
+                WalkingElement(position, direction,
+                               RailEnvNextAction(RailEnvActions.STOP_MOVING, position, direction)))
+
+    for agent in distance_map.agents:
+        _shortest_path_for_agent(agent)
+
+    return shortest_paths
+
+
+def visualize_distance_map(distance_map: DistanceMap, agent_handle: int = 0):
+    if agent_handle >= distance_map.get().shape[0]:
+        print("Error: agent_handle cannot be larger than actual number of agents")
+        return
+    # take min value of all 4 directions
+    min_distance_map = np.min(distance_map.get(), axis=3)
+    plt.imshow(min_distance_map[agent_handle][:][:])
+    plt.show()
diff --git a/flatland/envs/rail_env_utils.py b/flatland/envs/rail_env_utils.py
index 69cfce764fe124d9e3eb05019e1c734f2285bdb5..dc1cff12c0c8b1860859208a13d6403734a2d2ad 100644
--- a/flatland/envs/rail_env_utils.py
+++ b/flatland/envs/rail_env_utils.py
@@ -1,7 +1,3 @@
-import numpy as np
-import matplotlib.pyplot as plt
-
-from flatland.envs.distance_map import DistanceMap
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
@@ -21,13 +17,3 @@ def load_flatland_environment_from_file(file_name, load_from_package=None, obs_b
                           schedule_generator=schedule_from_file(file_name, load_from_package),
                           obs_builder_object=obs_builder_object)
     return environment
-
-
-def visualize_distance_map(distance_map: DistanceMap, agent_handle: int = 0):
-    if agent_handle >= distance_map.get().shape[0]:
-        print("Error: agent_handle cannot be larger than actual number of agents")
-        return
-    # take min value of all 4 directions
-    min_distance_map = np.min(distance_map.get(), axis=3)
-    plt.imshow(min_distance_map[agent_handle][:][:])
-    plt.show()
diff --git a/flatland/envs/rail_generators_city_generator.py b/flatland/envs/rail_generators_city_generator.py
index 53cdaef1e0bbe794f38f72cb77089f296e0c9cf5..ecea9f902d509572afd9087a6e9b64bee144b3f2 100644
--- a/flatland/envs/rail_generators_city_generator.py
+++ b/flatland/envs/rail_generators_city_generator.py
@@ -37,10 +37,10 @@ def city_generator(num_cities: int = 5,
     :param a_star_distance_function: Heuristic how the distance between two nodes get estimated in the "a-star" path
     :param seed: Random Seed
     :param print_out_info: print debug info if True
-    :return:
-        -------
-    numpy.ndarray of type numpy.uint16
-        The matrix with the correct 16-bit bitmaps for each cell.
+
+    :return: The matrix with the correct 16-bit bitmaps for each cell.
+    :rtype: numpy.ndarray of type numpy.uint16
+
     """
 
     def do_generate_city_locations(width: int,
@@ -269,8 +269,8 @@ def city_generator(num_cities: int = 5,
                          intern_connect_max_nbr_of_shortes_city: int):
         city_edges = []
 
-        s_nodes:IntVector2DArrayArray  = copy.deepcopy(org_s_nodes)
-        e_nodes:IntVector2DArrayArray = copy.deepcopy(org_e_nodes)
+        s_nodes: IntVector2DArrayArray = copy.deepcopy(org_s_nodes)
+        e_nodes: IntVector2DArrayArray = copy.deepcopy(org_e_nodes)
 
         for nbr_connected in range(intern_connect_max_nbr_of_shortes_city):
             for city_loop in range(len(s_nodes)):
diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py
index 8967b52d9d6ee70a7eb8af257ef6b4e25b531314..4f273be466a1c2f95b55cafece4783bad91e0d2c 100644
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -44,10 +44,9 @@ class FlatlandRemoteEvaluationService:
     """
     A remote evaluation service which exposes the following interfaces
     of a RailEnv :
-        - env_create
-        - env_step
-    and an additional `env_submit` to cater to score computation and 
-    on-episode-complete post processings.
+    - env_create
+    - env_step
+    and an additional `env_submit` to cater to score computation and on-episode-complete post processings.
 
     This service is designed to be used in conjunction with 
     `FlatlandRemoteClient` and both the srevice and client maintain a 
@@ -148,17 +147,17 @@ class FlatlandRemoteEvaluationService:
         for evaluation. The folder structure expected at the `test_env_folder`
         is similar to :
 
-        .
-        â”œâ”€â”€ Test_0
-        â”‚Â Â  â”œâ”€â”€ Level_1.pkl
-        â”‚Â Â  â”œâ”€â”€ .......
-        â”‚Â Â  â”œâ”€â”€ .......
-        â”‚Â Â  â””â”€â”€ Level_99.pkl
-        â””â”€â”€ Test_1
-            â”œâ”€â”€ Level_1.pkl
-         Â Â  â”œâ”€â”€ .......
-         Â Â  â”œâ”€â”€ .......
-            â””â”€â”€ Level_99.pkl 
+            .
+            â”œâ”€â”€ Test_0
+            â”‚Â Â  â”œâ”€â”€ Level_1.pkl
+            â”‚Â Â  â”œâ”€â”€ .......
+            â”‚Â Â  â”œâ”€â”€ .......
+            â”‚Â Â  â””â”€â”€ Level_99.pkl
+            â””â”€â”€ Test_1
+                â”œâ”€â”€ Level_1.pkl
+             Â Â  â”œâ”€â”€ .......
+             Â Â  â”œâ”€â”€ .......
+                â””â”€â”€ Level_99.pkl
         """
         env_paths = sorted(glob.glob(
             os.path.join(
@@ -291,9 +290,7 @@ class FlatlandRemoteEvaluationService:
     def handle_env_create(self, command):
         """
         Handles a ENV_CREATE command from the client
-        TODO:   
-            Add a high level summary of everything thats 
-            hapenning here.
+        TODO: Add a high level summary of everything thats happening here.
         """
         self.simulation_count += 1
         if self.simulation_count < len(self.env_file_paths):
@@ -374,9 +371,7 @@ class FlatlandRemoteEvaluationService:
     def handle_env_step(self, command):
         """
         Handles a ENV_STEP command from the client
-        TODO:   
-            Add a high level summary of everything thats 
-            hapenning here.
+        TODO: Add a high level summary of everything thats happening here.
         """
         _payload = command['payload']
 
@@ -449,9 +444,7 @@ class FlatlandRemoteEvaluationService:
     def handle_env_submit(self, command):
         """
         Handles a ENV_SUBMIT command from the client
-        TODO:   
-            Add a high level summary of everything thats 
-            hapenning here.
+        TODO: Add a high level summary of everything thats happening here.
         """
         _payload = command['payload']
 
diff --git a/flatland/utils/editor.py b/flatland/utils/editor.py
index af1aad222919b00b716dd9da0f3be9534d54e411..e5d55bb51241307d0bf9251eb20ea36faac479ba 100644
--- a/flatland/utils/editor.py
+++ b/flatland/utils/editor.py
@@ -696,10 +696,10 @@ class EditorModel(object):
 
     def click_agent(self, cell_row_col):
         """ The user has clicked on a cell -
-            - If there is an agent, select it
-                - If that agent was already selected, then deselect it
-            - If there is no agent selected, and no agent in the cell, create one
-            - If there is an agent selected, and no agent in the cell, move the selected agent to the cell
+            * If there is an agent, select it
+              * If that agent was already selected, then deselect it
+            * If there is no agent selected, and no agent in the cell, create one
+            * If there is an agent selected, and no agent in the cell, move the selected agent to the cell
         """
 
         # Has the user clicked on an existing agent?
diff --git a/flatland/utils/graphics_pil.py b/flatland/utils/graphics_pil.py
index 92a0f84f35fa942b03236c6add6e722475a2d842..4dad2ca872725517ffd47308f088f098b6abe1aa 100644
--- a/flatland/utils/graphics_pil.py
+++ b/flatland/utils/graphics_pil.py
@@ -174,7 +174,6 @@ class PILGL(GraphicsLayer):
         self.draws[layer].text(xyPixLeftTop, strText, font=self.font, fill=(0, 0, 0, 255))
 
     def text_rowcol(self, rcTopLeft, strText, layer=AGENT_LAYER):
-        print("Text:", "rc:", rcTopLeft, "text:", strText, "layer:", layer)
         xyPixLeftTop = tuple((array(rcTopLeft) * self.nPixCell)[[1, 0]])
         self.text(*xyPixLeftTop, strText, layer)
 
@@ -606,7 +605,6 @@ class PILSVG(PILGL):
             self.draw_image_row_col(bg_svg, (row, col), layer=PILGL.SELECTED_AGENT_LAYER)
 
         if show_debug:
-            print("Call text:")
             self.text_rowcol((row + 0.2, col + 0.2,), str(agent_idx))
 
     def set_cell_occupied(self, agent_idx, row, col):
diff --git a/flatland/utils/simple_rail.py b/flatland/utils/simple_rail.py
index 6da29d7f6d1a52c42dd006b84f94a959990e0932..a12c26e66fdf5a9ff102bb79440bd4f4b805e819 100644
--- a/flatland/utils/simple_rail.py
+++ b/flatland/utils/simple_rail.py
@@ -45,6 +45,46 @@ def make_simple_rail() -> Tuple[GridTransitionMap, np.array]:
     return rail, rail_map
 
 
+def make_disconnected_simple_rail() -> Tuple[GridTransitionMap, np.array]:
+    # We instantiate a very simple rail network on a 7x10 grid:
+    # Note that that cells have invalid RailEnvTransitions!
+    #        |
+    #        |
+    #        |
+    # _ _ _ _\ _    _  _ _ _
+    #                /
+    #                |
+    #                |
+    #                |
+    transitions = RailEnvTransitions()
+    cells = transitions.transition_list
+    empty = cells[0]
+    dead_end_from_south = cells[7]
+    dead_end_from_west = transitions.rotate_transition(dead_end_from_south, 90)
+    dead_end_from_north = transitions.rotate_transition(dead_end_from_south, 180)
+    dead_end_from_east = transitions.rotate_transition(dead_end_from_south, 270)
+    vertical_straight = cells[1]
+    horizontal_straight = transitions.rotate_transition(vertical_straight, 90)
+    simple_switch_north_left = cells[2]
+    simple_switch_north_right = cells[10]
+    simple_switch_east_west_north = transitions.rotate_transition(simple_switch_north_right, 270)
+    simple_switch_east_west_south = transitions.rotate_transition(simple_switch_north_left, 270)
+    rail_map = np.array(
+        [[empty] * 3 + [dead_end_from_south] + [empty] * 6] +
+        [[empty] * 3 + [vertical_straight] + [empty] * 6] * 2 +
+        [[dead_end_from_east] + [horizontal_straight] * 2 +
+         [simple_switch_east_west_north] +
+         [dead_end_from_west]  + [dead_end_from_east] + [simple_switch_east_west_south] +
+         [horizontal_straight] * 2 + [dead_end_from_west]] +
+        [[empty] * 6 + [vertical_straight] + [empty] * 3] * 2 +
+        [[empty] * 6 + [dead_end_from_north] + [empty] * 3], dtype=np.uint16)
+    rail = GridTransitionMap(width=rail_map.shape[1],
+                             height=rail_map.shape[0], transitions=transitions)
+    rail.grid = rail_map
+    return rail, rail_map
+
+
+
 def make_simple_rail2() -> Tuple[GridTransitionMap, np.array]:
     # We instantiate a very simple rail network on a 7x10 grid:
     #        |
diff --git a/flatland_2.0.md b/flatland_2.0.md
index bb8fde87654684312400d7901824bf9f4b85007d..501369bda9b3f4aed3692b3fb6070a5d6c5bf3e7 100644
--- a/flatland_2.0.md
+++ b/flatland_2.0.md
@@ -11,273 +11,15 @@ Thus the following changes are coming in the next version to be closer to real r
 - **Stochastic Events** cause agents to stop and get stuck for different numbers of time steps.
 - **Different Speed Classes** allow agents to move at different speeds and thus enhance complexity in the search for optimal solutions.
 
-Below we explain these changes in more detail and how you can play with their parametrization. We appreciate *your feedback* on the performance and the difficulty on these levels to help us shape the best possible **Flat**land 2.0 environment.
 
-## Generate levels
-
-We are currently working on different new level generators and you can expect that the levels in the submission testing will not all come from just one but rather different level generators to be sure that the controllers can handle any railway specific challenge.
-
-Let's have a look at the `sparse_rail_generator`.
-
-### Sparse Rail Generator
-![Example_Sparse](https://i.imgur.com/DP8sIyx.png)
-
-The idea behind the sparse rail generator is to mimic classic railway structures where dense nodes (cities) are sparsely connected to each other and where you have to manage traffic flow between the nodes efficiently. 
-The cities in this level generator are much simplified in comparison to real city networks but it mimics parts of the problems faced in daily operations of any railway company.
-
-There are a few parameters you can tune to build your own map and test different complexity levels of the levels. 
-**Warning** some combinations of parameters do not go well together and will lead to infeasible level generation. 
-In the worst case, the level generator currently issues a warning when it cannot build the environment according to the parameters provided. 
-This will lead to a crash of the whole env. 
-We are currently working on improvements here and are **happy for any suggestions from your side**.
-
-To build an environment you instantiate a `RailEnv` as follows:
-
-```
-# Initialize the generator
-rail_generator=sparse_rail_generator(
-    num_cities=10,  # Number of cities in map
-    num_intersections=10,  # Number of interesections in map
-    num_trainstations=50,  # Number of possible start/targets on map
-    min_node_dist=6,  # Minimal distance of nodes
-    node_radius=3,  # Proximity of stations to city center
-    num_neighb=3,  # Number of connections to other cities
-    seed=5,  # Random seed
-    grid_mode=False  # Ordered distribution of nodes
-)
-
-# Build the environment
-env = RailEnv(
-    width=50,
-    height=50,
-    rail_generator=rail_generator
-    schedule_generator=sparse_schedule_generator(),
-    number_of_agents=10,
-    obs_builder_object=TreeObsForRailEnv(max_depth=3,predictor=shortest_path_predictor)
-)
-```
-
-You can see that you now need both a `rail_generator` and a `schedule_generator` to generate a level. These need to work nicely together. The `rail_generator` will only generate the railway infrastructure and provide hints to the `schedule_generator` about where to place agents. The `schedule_generator` will then generate a schedule, meaning it places agents at different train stations and gives them tasks by providing individual targets.
-
-You can tune the following parameters in the `sparse_rail_generator`:
-
-- `num_cities` is the number of cities on a map. Cities are the only nodes that can host start and end points for agent tasks (Train stations). Here you have to be carefull that the number is not too high as all the cities have to fit on the map. When `grid_mode=False` you have to be carefull when chosing `min_node_dist` because leves will fails if not all cities (and intersections) can be placed with at least `min_node_dist` between them.
-- `num_intersections` is the number of nodes that don't hold any trainstations. They are also the first priority that a city connects to. We use these to allow for sparse connections between cities.
-- `num_trainstations` defines the *Total* number of trainstations in the network. This also sets the max number of allowed agents in the environment. This is also a delicate parameter as there is only a limitid amount of space available around nodes and thus if the number is too high the level generation will fail. *Important*: Only the number of agents provided to the environment will actually produce active train stations. The others will just be present as dead-ends (See figures below).
-- `min_node_dist` is only used if `grid_mode=False` and represents the minimal distance between two nodes.
-- `node_radius` defines the extent of a city. Each trainstation is placed at a distance to the closes city node that is smaller or equal to this number.
-- `num_neighb`defines the number of neighbouring nodes that connect to each other. Thus this changes the connectivity and thus the amount of alternative routes in the network.
-- `grid_mode` True -> Nodes evenly distriubted in env, False-> Random distribution of nodes
-- `enhance_intersection`: True -> Extra rail elements added at intersections
-- `seed` is used to initialize the random generator
-
-
-If you run into any bugs with sets of parameters please let us know.
-
-Here is a network with `grid_mode=False` and the parameters from above.
-
-![sparse_random](https://i.imgur.com/Xg7nifF.png)
-
-and here with `grid_mode=True`
-
-![sparse_ordered](https://i.imgur.com/jyA7Pt4.png)
-
-## Add Stochasticity
-
-Another area where we improved **Flat**land 2.0 are stochastic events added during the episodes. 
-This is very common for railway networks where the initial plan usually needs to be rescheduled during operations as minor events such as delayed departure from trainstations, malfunctions on trains or infrastructure or just the weather lead to delayed trains.
-
-We implemted a poisson process to simulate delays by stopping agents at random times for random durations. The parameters necessary for the stochastic events can be provided when creating the environment.
-
-```
-# Use a the malfunction generator to break agents from time to time
-
-stochastic_data = {
-    'prop_malfunction': 0.5,  # Percentage of defective agents
-    'malfunction_rate': 30,  # Rate of malfunction occurence
-    'min_duration': 3,  # Minimal duration of malfunction
-    'max_duration': 10  # Max duration of malfunction
-}
-```
-
-The parameters are as follows:
-
-- `prop_malfunction` is the proportion of agents that can malfunction. `1.0` means that each agent can break.
-- `malfunction_rate` is the mean rate of the poisson process in number of environment steps.
-- `min_duration` and `max_duration` set the range of malfunction durations. They are sampled uniformly
-
-You can introduce stochasticity by simply creating the env as follows:
-
-```
-env = RailEnv(
-    ...
-    stochastic_data=stochastic_data,  # Malfunction data generator
-    ...    
-)
-```
-In your controller, you can check whether an agent is malfunctioning: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-action_dict = dict()
-for a in range(env.get_num_agents()):
-    if info['malfunction'][a] == 0:
-        action_dict.update({a: ...})
-
-# Custom observation builder
-tree_observation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
-
-# Different agent types (trains) with different speeds.
-speed_ration_map = {1.: 0.25,  # Fast passenger train
-                    1. / 2.: 0.25,  # Fast freight train
-                    1. / 3.: 0.25,  # Slow commuter train
-                    1. / 4.: 0.25}  # Slow freight train
-
-env = RailEnv(width=50,
-              height=50,
-              rail_generator=sparse_rail_generator(num_cities=20,  # Number of cities in map (where train stations are)
-                                                   num_intersections=5,  # Number of intersections (no start / target)
-                                                   num_trainstations=15,  # Number of possible start/targets on map
-                                                   min_node_dist=3,  # Minimal distance of nodes
-                                                   node_radius=2,  # Proximity of stations to city center
-                                                   num_neighb=4,  # Number of connections to other cities/intersections
-                                                   seed=15,  # Random seed
-                                                   grid_mode=True,
-                                                   enhance_intersection=True
-                                                   ),
-              schedule_generator=sparse_schedule_generator(speed_ration_map),
-              number_of_agents=10,
-              stochastic_data=stochastic_data,  # Malfunction data generator
-              obs_builder_object=tree_observation)
-```
-
-You will quickly realize that this will lead to unforeseen difficulties which means that **your controller** needs to observe the environment at all times to be able to react to the stochastic events.
-
-## Add different speed profiles
-
-One of the main contributions to the complexity of railway network operations stems from the fact that all trains travel at different speeds while sharing a very limited railway network. 
-In **Flat**land 2.0 this feature will be enabled as well and will lead to much more complex configurations. Here we count on your support if you find bugs or improvements  :).
-
-The different speed profiles can be generated using the `schedule_generator`, where you can actually chose as many different speeds as you like. 
-Keep in mind that the *fastest speed* is 1 and all slower speeds must be between 1 and 0. 
-For the submission scoring you can assume that there will be no more than 5 speed profiles.
-
-
- 
-Later versions of **Flat**land might have varying speeds during episodes. Therefore, we return the agent speeds. 
-Notice that we do not guarantee that the speed will be computed at each step, but if not costly we will return it at each step.
-In your controller, you can get the agents' speed from the `info` returned by `step`: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-for a in range(env.get_num_agents()):
-    speed = info['speed'][a]
-```
-
-## Actions and observation with different speed levels
-
-Because the different speeds are implemented as fractions the agents ability to perform actions has been updated. 
-We **do not allow actions to change within the cell **. 
-This means that each agent can only chose an action to be taken when entering a cell. 
-This action is then executed when a step to the next cell is valid. For example
-
-- Agent enters switch and choses to deviate left. Agent fractional speed is 1/4 and thus the agent will take 4 time steps to complete its journey through the cell. On the 4th time step the agent will leave the cell deviating left as chosen at the entry of the cell.
-    - All actions chosen by the agent during its travels within a cell are ignored
-    - Agents can make observations at any time step. Make sure to discard observations without any information. See this [example](https://gitlab.aicrowd.com/flatland/baselines/blob/master/torch_training/training_navigation.py) for a simple implementation.
-- The environment checks if agent is allowed to move to next cell only at the time of the switch to the next cell
-
-In your controller, you can check whether an agent requires an action by checking `info`: 
-```
-obs, rew, done, info = env.step(actions) 
-...
-action_dict = dict()
-for a in range(env.get_num_agents()):
-    if info['action_required'][a] and info['malfunction'][a] == 0:
-        action_dict.update({a: ...})
-
-```
-Notice that `info['action_required'][a]` does not mean that the action will have an effect: 
-if the next cell is blocked or the agent breaks down, the action cannot be performed and an action will be required again in the next step. 
-
-## Rail Generators and Schedule Generators
-The separation between rail generator and schedule generator reflects the organisational separation in the railway domain
-- Infrastructure Manager (IM): is responsible for the layout and maintenance of tracks
-- Railway Undertaking (RU): operates trains on the infrastructure
-Usually, there is a third organisation, which ensures discrimination-free access to the infrastructure for concurrent requests for the infrastructure in a **schedule planning phase**.
-However, in the **Flat**land challenge, we focus on the re-scheduling problem during live operations.
-
-Technically, 
-``` 
-RailGeneratorProduct = Tuple[GridTransitionMap, Optional[Any]]
-RailGenerator = Callable[[int, int, int, int], RailGeneratorProduct]
-
-AgentPosition = Tuple[int, int]
-ScheduleGeneratorProduct = Tuple[List[AgentPosition], List[AgentPosition], List[AgentPosition], List[float]]
-ScheduleGenerator = Callable[[GridTransitionMap, int, Optional[Any]], ScheduleGeneratorProduct]
-```
-
-We can then produce `RailGenerator`s by currying:
-```
-def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2, min_node_dist=20, node_radius=2,
-                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0):
-
-    def generator(width, height, num_agents, num_resets=0):
-    
-        # generate the grid and (optionally) some hints for the schedule_generator
-        ...
-         
-        return grid_map, {'agents_hints': {
-            'num_agents': num_agents,
-            'agent_start_targets_nodes': agent_start_targets_nodes,
-            'train_stations': train_stations
-        }}
-
-    return generator
-```
-And, similarly, `ScheduleGenerator`s:
-```
-def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None) -> ScheduleGenerator:
-    def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None):
-        # place agents:
-        # - initial position
-        # - initial direction
-        # - (initial) speed
-        # - malfunction
-        ...
-                
-        return agents_position, agents_direction, agents_target, speeds, agents_malfunction
-
-    return generator
-```
-Notice that the `rail_generator` may pass `agents_hints` to the  `schedule_generator` which the latter may interpret.
-For instance, the way the `sparse_rail_generator` generates the grid, it already determines the agent's goal and target.
-Hence, `rail_generator` and `schedule_generator` have to match if `schedule_generator` presupposes some specific `agents_hints`.
-
-The environment's `reset` takes care of applying the two generators:
-```
-    def __init__(self,
-            ...
-             rail_generator: RailGenerator = random_rail_generator(),
-             schedule_generator: ScheduleGenerator = random_schedule_generator(),
-             ...
-             ):
-        self.rail_generator: RailGenerator = rail_generator
-        self.schedule_generator: ScheduleGenerator = schedule_generator
-        
-    def reset(self, regen_rail=True, replace_agents=True):
-        rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)
-
-        ...
-
-        if replace_agents:
-            agents_hints = None
-            if optionals and 'agents_hints' in optionals:
-                agents_hints = optionals['agents_hints']
-            self.agents_static = EnvAgentStatic.from_lists(
-                *self.schedule_generator(self.rail, self.get_num_agents(), hints=agents_hints))
-```
+We explain these changes in more detail and how you can play with their parametrization in Tutorials 3--5:
+* [Tutorials](https://gitlab.aicrowd.com/flatland/flatland/tree/master/docs/tutorials)
 
+We appreciate *your feedback* on the performance and the difficulty on these levels to help us shape the best possible **Flat**land 2.0 environment.
 
 ## Example code
 
-To see all the changes in action you can just run the `flatland_example_2_0.py` file in the examples folder. The file can be found [here](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/flatland_2_0_example.py).
+To see all the changes in action you can just run the 
+* [examples/flatland_example_2_0.py](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/flatland_2_0_example.py) 
+
+example.
diff --git a/make_docs.py b/make_docs.py
index 0a805e591647162e2757b60a7bcd9f75f4d1d0b9..81fe58736c68817164dc5e2471827ad071aaca31 100644
--- a/make_docs.py
+++ b/make_docs.py
@@ -20,23 +20,36 @@ def remove_exists(filename):
 
 # clean docs config and html files, and rebuild everything
 # wildcards do not work under Windows
-for file in glob.glob(r'./docs/flatland*.rst'):
-    remove_exists(file)
+for image_file in glob.glob(r'./docs/flatland*.rst'):
+    remove_exists(image_file)
 remove_exists('docs/modules.rst')
 
-# copy md files from root folder into docs folder
-for file in glob.glob(r'./*.md'):
-    print(file)
-    shutil.copy(file, 'docs/')
+for md_file in glob.glob(r'./*.md') + glob.glob(r'./docs/specifications/*.md') + glob.glob(r'./docs/tutorials/*.md'):
+    from m2r import parse_from_file
 
-subprocess.call(['sphinx-apidoc', '--force', '-a', '-e', '-o', 'docs/', 'flatland', '-H', 'Flatland Reference'])
+    rst_content = parse_from_file(md_file)
+    rst_file = md_file.replace(".md", ".rst")
+    remove_exists(rst_file)
+    with open(rst_file, 'w') as out:
+        print("m2r {}->{}".format(md_file, rst_file))
+
+        out.write(rst_content)
+        out.flush()
+
+subprocess.call(['sphinx-apidoc', '--force', '-a', '-e', '-o', 'docs/', 'flatland', '-H', 'API Reference', '--tocfile',
+                 '05_apidoc'])
 
 os.environ["SPHINXPROJ"] = "Flatland"
 os.chdir('docs')
 subprocess.call(['python', '-msphinx', '-M', 'clean', '.', '_build'])
-# TODO fix sphinx warnings instead of suppressing them...
+img_dest = '_build/html/img'
+if not os.path.exists(img_dest):
+    os.makedirs(img_dest)
+for image_file in glob.glob(r'./specifications/img/*'):
+    shutil.copy(image_file, img_dest)
+
 subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build'])
-# subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build', '-Q'])
+
 
 # we do not currrently use pydeps, commented out https://gitlab.aicrowd.com/flatland/flatland/issues/149
 # subprocess.call(['python', '-mpydeps', '../flatland', '-o', '_build/html/flatland.svg', '--no-config', '--noshow'])
diff --git a/requirements_continuous_integration.txt b/requirements_continuous_integration.txt
index 0054fec1724765b8ffa8be620c5d4c1ac4cfd6b3..06199e95d5dbba7bf58f40abe7e26e54c39078b2 100644
--- a/requirements_continuous_integration.txt
+++ b/requirements_continuous_integration.txt
@@ -18,4 +18,4 @@ jupyter-core>=4.5.0
 notebook>=5.7.8
 pytest-xvfb>=1.2.0
 git+https://github.com/who8mylunch/Jupyter_Canvas_Widget.git@bd151ae1509c50b5809944dd3294f58b7b069c86
-recommonmark>=0.6.0
+m2r>=0.2.1
diff --git a/setup.py b/setup.py
index 8461a3fb422c59b07a519eaab9b421a7c5f51727..dd5629062f72b6d832f38466f0028af7a250f2ff 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ import sys
 from setuptools import setup, find_packages
 
 assert sys.version_info >= (3, 6)
-with open('README.rst') as readme_file:
+with open('README.md') as readme_file:
     readme = readme_file.read()
 
 
diff --git a/tests/test_flatland_core_transition_map.py b/tests/test_flatland_core_transition_map.py
index 930cc24fb4a9be817c14f2cca149747ac6ca370b..0913e45959d08230a815c33d98fb6de8eb99d956 100644
--- a/tests/test_flatland_core_transition_map.py
+++ b/tests/test_flatland_core_transition_map.py
@@ -174,7 +174,7 @@ def test_get_entry_directions():
     north_west_turn = transitions.rotate_transition(south_east_turn, 180)
 
     def _assert(transition, expected):
-        actual = transitions.get_entry_directions(transition)
+        actual = Grid4Transitions.get_entry_directions(transition)
         assert actual == expected, "Found {}, expected {}.".format(actual, expected)
 
     _assert(south_east_turn, [True, False, False, True])
diff --git a/tests/test_flatland_envs_predictions.py b/tests/test_flatland_envs_predictions.py
index 9ef122e6aa8325d3ec70307332cc7235090270de..f4ab68bc45a82b8f196fcfbebb26fd68a36c37a4 100644
--- a/tests/test_flatland_envs_predictions.py
+++ b/tests/test_flatland_envs_predictions.py
@@ -7,7 +7,8 @@ import numpy as np
 from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv
-from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_env import RailEnv, RailEnvActions, RailEnvNextAction
+from flatland.envs.rail_env_shortest_paths import get_shortest_paths, WalkingElement
 from flatland.envs.rail_generators import rail_from_grid_transition_map
 from flatland.envs.schedule_generators import random_schedule_generator
 from flatland.utils.rendertools import RenderTool
@@ -142,6 +143,21 @@ def test_shortest_path_predictor(rendering=False):
         1], agent.direction] == 5.0, "found {} instead of {}".format(
         distance_map[agent.handle, agent.position[0], agent.position[1], agent.direction], 5.0)
 
+    paths = get_shortest_paths(env.distance_map)[0]
+    assert paths == [
+        WalkingElement((5, 6), 0, RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD, next_position=(4, 6),
+                                                    next_direction=0)),
+        WalkingElement((4, 6), 0, RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD, next_position=(3, 6),
+                                                    next_direction=0)),
+        WalkingElement((3, 6), 0, RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD, next_position=(3, 7),
+                                                    next_direction=1)),
+        WalkingElement((3, 7), 1, RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD, next_position=(3, 8),
+                                                    next_direction=1)),
+        WalkingElement((3, 8), 1, RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD, next_position=(3, 9),
+                                                    next_direction=1)),
+        WalkingElement((3, 9), 1, RailEnvNextAction(action=RailEnvActions.STOP_MOVING, next_position=(3, 9),
+                                                    next_direction=1))]
+
     # extract the data
     predictions = env.obs_builder.predictions
     positions = np.array(list(map(lambda prediction: [*prediction[1:3]], predictions[0])))
@@ -220,12 +236,13 @@ def test_shortest_path_predictor(rendering=False):
         [20.],
     ])
 
+    assert np.array_equal(time_offsets, expected_time_offsets), \
+        "time_offsets {}, expected {}".format(time_offsets, expected_time_offsets)
+
     assert np.array_equal(positions, expected_positions), \
         "positions {}, expected {}".format(positions, expected_positions)
     assert np.array_equal(directions, expected_directions), \
         "directions {}, expected {}".format(directions, expected_directions)
-    assert np.array_equal(time_offsets, expected_time_offsets), \
-        "time_offsets {}, expected {}".format(time_offsets, expected_time_offsets)
 
 
 def test_shortest_path_predictor_conflicts(rendering=False):
diff --git a/tests/test_flatland_envs_rail_env.py b/tests/test_flatland_envs_rail_env.py
index 0114730a2ac1d0df618eea773dfcf1cd7175dee2..0fefd3e212ddb5f084c1e219f4063079e03dabdf 100644
--- a/tests/test_flatland_envs_rail_env.py
+++ b/tests/test_flatland_envs_rail_env.py
@@ -6,11 +6,13 @@ from flatland.core.grid.rail_env_grid import RailEnvTransitions
 from flatland.core.transition_map import GridTransitionMap
 from flatland.envs.agent_utils import EnvAgent
 from flatland.envs.agent_utils import EnvAgentStatic
-from flatland.envs.observations import GlobalObsForRailEnv
+from flatland.envs.observations import GlobalObsForRailEnv, TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
 from flatland.envs.rail_generators import complex_rail_generator
 from flatland.envs.rail_generators import rail_from_grid_transition_map
 from flatland.envs.schedule_generators import random_schedule_generator, complex_schedule_generator
+from flatland.utils.simple_rail import make_simple_rail
 
 """Tests for `flatland` package."""
 
@@ -212,3 +214,36 @@ def test_dead_end():
 
     rail_env.reset()
     rail_env.agents = [EnvAgent(position=(2, 0), direction=0, target=(4, 0), moving=False)]
+
+
+def test_get_entry_directions():
+    rail, rail_map = make_simple_rail()
+    env = RailEnv(width=rail_map.shape[1],
+                  height=rail_map.shape[0],
+                  rail_generator=rail_from_grid_transition_map(rail),
+                  schedule_generator=random_schedule_generator(),
+                  number_of_agents=1,
+                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
+                  )
+
+    def _assert(position, expected):
+        actual = env.get_valid_directions_on_grid(*position)
+        assert actual == expected, "[{},{}] actual={}, expected={}".format(*position, actual, expected)
+
+    # north dead end
+    _assert((0, 3), [True, False, False, False])
+
+    # west dead end
+    _assert((3, 0), [False, False, False, True])
+
+    # switch
+    _assert((3, 3), [False, True, True, True])
+
+    # horizontal
+    _assert((3, 2), [False, True, False, True])
+
+    # vertical
+    _assert((2, 3), [True, False, True, False])
+
+    # nowhere
+    _assert((0, 0), [False, False, False, False])
diff --git a/tests/test_flatland_envs_rail_env_shortest_paths.py b/tests/test_flatland_envs_rail_env_shortest_paths.py
new file mode 100644
index 0000000000000000000000000000000000000000..4600c4a3002995e1238a0ccbda762501ac985408
--- /dev/null
+++ b/tests/test_flatland_envs_rail_env_shortest_paths.py
@@ -0,0 +1,194 @@
+import numpy as np
+
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import DummyPredictorForRailEnv
+from flatland.envs.rail_env import RailEnvNextAction, RailEnvActions, RailEnv
+from flatland.envs.rail_env_shortest_paths import get_shortest_paths, WalkingElement
+from flatland.envs.rail_env_utils import load_flatland_environment_from_file
+from flatland.envs.rail_generators import rail_from_grid_transition_map
+from flatland.envs.schedule_generators import random_schedule_generator
+from flatland.utils.simple_rail import make_disconnected_simple_rail
+
+
+def test_get_shortest_paths_unreachable():
+    rail, rail_map = make_disconnected_simple_rail()
+
+    env = RailEnv(width=rail_map.shape[1],
+                  height=rail_map.shape[0],
+                  rail_generator=rail_from_grid_transition_map(rail),
+                  schedule_generator=random_schedule_generator(),
+                  number_of_agents=1,
+                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=DummyPredictorForRailEnv(max_depth=10)),
+                  )
+
+    # set the initial position
+    agent = env.agents_static[0]
+    agent.position = (3, 1)  # west dead-end
+    agent.direction = Grid4TransitionsEnum.WEST
+    agent.target = (3, 9)  # east dead-end
+    agent.moving = True
+
+    # reset to set agents from agents_static
+    env.reset(False, False)
+
+    actual = get_shortest_paths(env.distance_map)
+    expected = {0: None}
+
+    assert actual == expected, "actual={},expected={}".format(actual, expected)
+
+
+def test_get_shortest_paths():
+    env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests')
+    actual = get_shortest_paths(env.distance_map)
+
+    expected = {
+        0: [
+            WalkingElement(position=(1, 1), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(1, 2), next_direction=1)),
+            WalkingElement(position=(1, 2), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(1, 3), next_direction=1)),
+            WalkingElement(position=(1, 3), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 3), next_direction=2)),
+            WalkingElement(position=(2, 3), direction=2,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 4), next_direction=1)),
+            WalkingElement(position=(2, 4), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 5), next_direction=1)),
+            WalkingElement(position=(2, 5), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 6), next_direction=1)),
+            WalkingElement(position=(2, 6), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 7), next_direction=1)),
+            WalkingElement(position=(2, 7), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 8), next_direction=1)),
+            WalkingElement(position=(2, 8), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 9), next_direction=1)),
+            WalkingElement(position=(2, 9), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 10), next_direction=1)),
+            WalkingElement(position=(2, 10), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 11), next_direction=1)),
+            WalkingElement(position=(2, 11), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 12), next_direction=1)),
+            WalkingElement(position=(2, 12), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 13), next_direction=1)),
+            WalkingElement(position=(2, 13), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 14), next_direction=1)),
+            WalkingElement(position=(2, 14), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 15), next_direction=1)),
+            WalkingElement(position=(2, 15), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 16), next_direction=1)),
+            WalkingElement(position=(2, 16), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 17), next_direction=1)),
+            WalkingElement(position=(2, 17), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 18), next_direction=1)),
+            WalkingElement(position=(2, 18), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.STOP_MOVING,
+                                                                 next_position=(2, 18), next_direction=1))],
+        1: [
+            WalkingElement(position=(3, 18), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(3, 17), next_direction=3)),
+            WalkingElement(position=(3, 17), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(3, 16), next_direction=3)),
+            WalkingElement(position=(3, 16), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 16), next_direction=0)),
+            WalkingElement(position=(2, 16), direction=0,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 15), next_direction=3)),
+            WalkingElement(position=(2, 15), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 14), next_direction=3)),
+            WalkingElement(position=(2, 14), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 13), next_direction=3)),
+            WalkingElement(position=(2, 13), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 12), next_direction=3)),
+            WalkingElement(position=(2, 12), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 11), next_direction=3)),
+            WalkingElement(position=(2, 11), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 10), next_direction=3)),
+            WalkingElement(position=(2, 10), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 9), next_direction=3)),
+            WalkingElement(position=(2, 9), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 8), next_direction=3)),
+            WalkingElement(position=(2, 8), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 7), next_direction=3)),
+            WalkingElement(position=(2, 7), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 6), next_direction=3)),
+            WalkingElement(position=(2, 6), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 5), next_direction=3)),
+            WalkingElement(position=(2, 5), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 4), next_direction=3)),
+            WalkingElement(position=(2, 4), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 3), next_direction=3)),
+            WalkingElement(position=(2, 3), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 2), next_direction=3)),
+            WalkingElement(position=(2, 2), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(2, 1), next_direction=3)),
+            WalkingElement(position=(2, 1), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.STOP_MOVING,
+                                                                 next_position=(2, 1), next_direction=3))]
+    }
+
+    for agent_handle in expected:
+        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
+            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
+
+
+def test_get_shortest_paths_max_depth():
+    env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests')
+    actual = get_shortest_paths(env.distance_map, max_depth=2)
+
+    expected = {
+        0: [
+            WalkingElement(position=(1, 1), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(1, 2), next_direction=1)),
+            WalkingElement(position=(1, 2), direction=1,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(1, 3), next_direction=1))
+        ],
+        1: [
+            WalkingElement(position=(3, 18), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(3, 17), next_direction=3)),
+            WalkingElement(position=(3, 17), direction=3,
+                           next_action_element=RailEnvNextAction(action=RailEnvActions.MOVE_FORWARD,
+                                                                 next_position=(3, 16), next_direction=3)),
+        ]
+    }
+
+    for agent_handle in expected:
+        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
+            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])