diff --git a/Makefile b/Makefile
index c84991f6757fafa0ef6198205bcf92d886a0e060..35704d190e0f0fc1de0dd219f79bf462e7d8bda6 100644
--- a/Makefile
+++ b/Makefile
@@ -61,20 +61,10 @@ test-all: ## run tests on every Python version with tox
 	tox
 
 coverage: ## check code coverage quickly with the default Python
-	coverage run --source flatland -m pytest
-	coverage report -m
-	coverage html
-	$(BROWSER) htmlcov/index.html
+	python make_coverage.py
 
 docs: ## generate Sphinx HTML documentation, including API docs
-	rm -f docs/flatland*.rst
-	rm -f docs/modules.rst
-	sphinx-apidoc --force -a -e -o docs/ flatland -H "Flatland Reference"
-	$(MAKE) -C docs clean
-	cp *.md docs
-	$(MAKE) -C docs html
-	pydeps --no-config --noshow flatland -o docs/_build/html/flatland.svg
-	$(BROWSER) docs/_build/html/index.html
+	python make_docs.py
 
 servedocs: docs ## compile the docs watching for changes
 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index 720590be315ca03abd07838c4475a5c586edf8dd..42792f98c427589e3b21d230b6451f85b984917e 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -12,7 +12,7 @@ def swap_attr(obj, attr, new_val):
             ...
 
         This will set obj.attr to 5 for the duration of the with: block,
-        restoring the old value at the end of the block. If `attr` doesn't
+        restoring the old value at the end of the block. If `attr` doesn`t
         exist on `obj`, it will be created and then deleted at the end of the
         block.
 
diff --git a/benchmarks/run_all_examples.py b/benchmarks/run_all_examples.py
index 509e232416db525c28ce3d69ac84ad1657c533eb..1b3e3be066989e18af3f36e1dd73ded37c0bc6cf 100644
--- a/benchmarks/run_all_examples.py
+++ b/benchmarks/run_all_examples.py
@@ -18,6 +18,7 @@ for entry in [entry for entry in importlib_resources.contents('examples') if
     with path('examples', entry) as file_in:
         print("")
         print("")
+
         print("")
         print("*****************************************************************")
         print("Running {}".format(entry))
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index eb44e95ecbb9bc4079613db7f5d1e95ed4359691..0000000000000000000000000000000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-# TODO fix sphinx warnings instead of suppressing them...
-SPHINXOPTS    = -Q
-SPHINXBUILD   = python -msphinx
-SPHINXPROJ    = flatland
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/changelog_index.rst b/docs/changelog_index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..081c500ffe5427b3dc9987c5fd0ed2ae6482ba6a
--- /dev/null
+++ b/docs/changelog_index.rst
@@ -0,0 +1,8 @@
+Changes
+=======
+
+.. toctree::
+   :maxdepth: 2
+
+   changelog.md
+   flatland_2.0.md
diff --git a/docs/conf.py b/docs/conf.py
index b0366ddb9abee1f32c9e8b17d7e5e78e2c2ad2c6..66f5183f192dc083f87fc9a0175c9f6fe733545e 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.abspath('..'))
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['recommonmark', 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx']
+extensions = ['recommonmark', 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'numpydoc']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -48,7 +48,6 @@ source_suffix = {
     '.md': 'markdown',
 }
 
-
 # The master toctree document.
 master_doc = 'index'
 
@@ -159,3 +158,6 @@ texinfo_documents = [
      'One line description of project.',
      'Miscellaneous'),
 ]
+
+# https://stackoverflow.com/questions/12206334/sphinx-autosummary-toctree-contains-reference-to-nonexisting-document-warnings
+numpydoc_show_class_members = False
diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index 0cfa43bb130880b84a71f2ce982cfefefb9deffd..8bde9adfa41089ab932a2d753befdc16b0a5d7ec 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -1,9 +1,9 @@
-=====
+===============
 Getting Started
-=====
+===============
 
 Overview
---------------
+--------
 
 Following are three short tutorials to help new users get acquainted with how
 to create RailEnvs, how to train simple DQN agents on them, and how to customize
@@ -17,7 +17,7 @@ To use flatland in a project:
 
 
 Part 1 : Basic Usage
---------------
+--------------------
 
 The basic usage of RailEnv environments consists in creating a RailEnv object
 endowed with a rail generator, that generates new rail networks on each reset,
@@ -120,7 +120,7 @@ The complete code for this part of the Getting Started guide can be found in
 
 
 Part 2 : Training a Simple an Agent on Flatland
---------------
+-----------------------------------------------
 This is a brief tutorial on how to train an agent on Flatland.
 Here we use a simple random agent to illustrate the process on how to interact with the environment.
 The corresponding code can be found in examples/training_example.py and in the baselines repository
@@ -153,7 +153,7 @@ The difficulty of a railway network depends on the dimensions (`width` x `height
 By varying the number of start and goal connections (nr_start_goal) and the number of extra railway elements added (nr_extra)
 the number of alternative paths of each agents can be modified. The more possible paths an agent has to reach its target the easier the task becomes.
 Here we don't specify any observation builder but rather use the standard tree observation. If you would like to use a custom obervation please follow
- the instructions in the next tutorial.
+the instructions in the next tutorial.
 Feel free to vary these parameters to see how your own agent holds up on different setting. The evalutation set of railway configurations will
 cover the whole spectrum from easy to complex tasks.
 
@@ -188,7 +188,7 @@ The environment returns an array of new observations, reward dictionary for all
 This information can be used to update the policy of your agent and if done['__all__'] == True the episode terminates.
 
 Part 3 : Customizing Observations and Level Generators
---------------
+------------------------------------------------------
 
 Example code for generating custom observations given a RailEnv and to generate
 random rail maps are available in examples/custom_observation_example.py and
@@ -257,6 +257,7 @@ random (but consistent) agents along with their targets and initial directions,
 given a rail map (GridTransitionMap object) and the desired number of agents:
 
 .. code-block:: python
+
     agents_position, agents_direction, agents_target = get_rnd_agents_pos_tgt_dir_on_rail(
         rail_map,
         num_agents)
diff --git a/docs/index.rst b/docs/index.rst
index f644f73e56b74ee5ea765e15d3ed5bef832614e9..ba35554ab50a17f118e744eefd3a77967828a17b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,13 +10,13 @@ Welcome to flatland's documentation!
    about_flatland
    gettingstarted
    intro_observationbuilder
-   specifications/specifications.md
+   intro_observation_actions
+   specifications_index
    modules
    FAQ
    localevaluation
    contributing
-   changelog.md
-   flatland_2.0.md
+   changelog_index
    authors
 
 
diff --git a/docs/observation_actions.rst b/docs/intro_observation_actions.rst
similarity index 100%
rename from docs/observation_actions.rst
rename to docs/intro_observation_actions.rst
diff --git a/docs/intro_observationbuilder.rst b/docs/intro_observationbuilder.rst
index 4386f9e07df07693d2a4e72fa1920e4275972c67..50d65fd4771e5a5362aa7250e6f50c9f884299f4 100644
--- a/docs/intro_observationbuilder.rst
+++ b/docs/intro_observationbuilder.rst
@@ -1,9 +1,9 @@
-=====
+==============================================================
 Getting Started with custom observations and custom predictors
-=====
+==============================================================
 
 Overview
---------------
+--------
 
 One of the main objectives of the Flatland-Challenge_ is to find a suitable observation (relevant features for the problem at hand) to solve the task. Therefore **Flatland** was built with as much flexibility as possible when it comes to building your custom observations: observations in Flatland environments are fully customizable.
 Whenever an environment needs to compute new observations for each agent, it queries an object derived from the :code:`ObservationBuilder` base class, which takes the current state of the environment and returns the desired observation.
@@ -12,7 +12,7 @@ Whenever an environment needs to compute new observations for each agent, it que
 .. _Flatland-Challenge: https://www.aicrowd.com/challenges/flatland-challenge
 
 Example 1 : Simple (but useless) observation
---------------------------------------------------------
+--------------------------------------------
 In this first example we implement all the functions necessary for the observation builder to be valid and work with **Flatland**.
 Custom observation builder objects need to derive from the `flatland.core.env_observation_builder.ObservationBuilder`_
 base class and must implement two methods, :code:`reset(self)` and :code:`get(self, handle)`.
@@ -300,9 +300,9 @@ When building your custom observation builder, you might want to aggregate and d
 Transitions maps
 ~~~~~~~~~~~~~~~~
 
-The transition maps build the base for all movement in the environment. They contain all the information about allowed transitions for the agent at any given position. Because railway movement is limited to the railway tracks, these are important features for any controller that want to interact with the environment. All functionality and features of a transition map can be found here_
+The transition maps build the base for all movement in the environment. They contain all the information about allowed transitions for the agent at any given position. Because railway movement is limited to the railway tracks, these are important features for any controller that want to interact with the environment. All functionality and features of a transition map can be found here_.
 
-.. _here:https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/core/transition_map.py
+.. _here: https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/core/transition_map.py
 
 **Get Transitions for cell**
 
@@ -312,7 +312,7 @@ To access the possible transitions at any given cell there are different possibi
 
 2. When more detailed information about the cell in general is necessary you can also get the full transitions of a cell by calling :code:`transition_int = env.rail.get_full_transitions(*position)`. This will return an :code:`int16` for the cell representing the allowed transitions. To understand the transitions returned it is best to represent it as a binary number :code:`bin(transition_int)`, where the bits have to following meaning: :code:`NN NE NS NW EN EE ES EW SN SE SS SW WN WE WS WW`. For example the binary code :code:`1000 0000 0010 0000`, represents a straigt where an agent facing north can transition north and an agent facing south can transition south and no other transitions are possible. To get a better feeling what the binary representations of the elements look like go to this Link_
 
-.. _Link:https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/core/grid/rail_env_grid.py#L29
+.. _Link: https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/core/grid/rail_env_grid.py#L29
 
 
 These two objects can be used for example to detect switches that are usable by other agents but not the observing agent itself. This can be an important feature when actions have to be taken in order to avoid conflicts.
@@ -350,9 +350,11 @@ All the agent in the initiated environment can be found in the :code:`env.agents
 Beyond the basic agent information we can also access more details about the agents type by looking at speed data:
 
 - Agent max speed :code:`agent.speed_data["speed"]` wich defines the traveling speed when the agent is moving.
-- Agent position fraction :code:``agent.speed_data["position_fraction"]` which is a number between 0 and 1 and indicates when the move to the next cell will occur. Each speed of an agent is 1 or a smaller fraction. At each :code:`env.step()` the agent moves at its fractional speed forwards and only changes to the next cell when the cumulated fractions are :code:`agent.speed_data["position_fraction"] >= 1.`
-- Agent can move at different speed which can be set up by modifying the agent.speed_data within the schedule_generator. For example refer this Link_
-.. _Link:https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/schedule_generators.py#L59
+- Agent position fraction :code:`agent.speed_data["position_fraction"]` which is a number between 0 and 1 and indicates when the move to the next cell will occur. Each speed of an agent is 1 or a smaller fraction. At each :code:`env.step()` the agent moves at its fractional speed forwards and only changes to the next cell when the cumulated fractions are :code:`agent.speed_data["position_fraction"] >= 1.`
+- Agent can move at different speed which can be set up by modifying the agent.speed_data within the schedule_generator. For example refer this _Link_Schedule_Generators.
+
+.. _Link_Schedule_Generators: https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/schedule_generators.py#L59
+
 **Agent malfunction information**
 
 Similar to the speed data you can also access individual data about the malfunctions of an agent. All data is available through :code:`agent.malfunction_data` with:
diff --git a/docs/localevaluation.rst b/docs/localevaluation.rst
index abfab39aaa33e836981d8e890bd1a9070d3dcae0..10f9001ba1722e93d7ecf6347fb6099d31f65ed7 100644
--- a/docs/localevaluation.rst
+++ b/docs/localevaluation.rst
@@ -1,22 +1,21 @@
-=====
+================
 Local Evaluation
-=====
+================
 
 This document explains you how to locally evaluate your submissions before making 
 an official submission to the competition.
 
 Requirements
---------------
+------------
 
 * **flatland-rl** : We expect that you have `flatland-rl` installed by following the instructions in  :doc:`installation`.
 
 * **redis** : Additionally you will also need to have  `redis installed <https://redis.io/topics/quickstart>`_ and **should have it running in the background.**
 
 Test Data
---------------
+---------
 
-* **test env data** : You can `download and untar the test-env-data <https://www.aicrowd.com/challenges/flatland-challenge/dataset_files>`_, 
-at a location of your choice, lets say `/path/to/test-env-data/`. After untarring the folder, the folder structure should look something like : 
+* **test env data** : You can `download and untar the test-env-data <https://www.aicrowd.com/challenges/flatland-challenge/dataset_files>`, at a location of your choice, lets say `/path/to/test-env-data/`. After untarring the folder, the folder structure should look something like:
 
 
 .. code-block:: console
@@ -48,11 +47,10 @@ Evaluation Service
     flatland-evaluator --tests /path/to/test-env-data/
 
 RemoteClient
-------------------
+------------
+
+* **run client** : Some `sample submission code can be found in the starter-kit <https://github.com/AIcrowd/flatland-challenge-starter-kit/>`_, but before you can run your code locally using `FlatlandRemoteClient`, you will have to set the `AICROWD_TESTS_FOLDER` environment variable to the location where you previous untarred the folder with `the test-env-data`:
 
-* **run client** : Some `sample submission code can be found in the starter-kit <https://github.com/AIcrowd/flatland-challenge-starter-kit/>`_, 
-but before you can run your code locally using `FlatlandRemoteClient`, you will have to set the `AICROWD_TESTS_FOLDER` environment variable to the location where you 
-previous untarred the folder with `the test-env-data`:
 
 .. code-block:: console
 
@@ -64,3 +62,4 @@ previous untarred the folder with `the test-env-data`:
 
     # and then finally run your code
     python run.py
+
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 40b4f60ada7eb91f7153d03787084019bf219fb9..0000000000000000000000000000000000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,36 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=python -msphinx
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-set SPHINXPROJ=flatland
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
-	echo.then set the SPHINXBUILD environment variable to point to the full
-	echo.path of the 'sphinx-build' executable. Alternatively you may add the
-	echo.Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/specifications/rendering.md b/docs/specifications/rendering.md
index 46f53d0e2fb882fd39f039e9c5055f0aa3cbd2f5..baba1c06e16da99f20ee61e6feb6d031f0a415c2 100644
--- a/docs/specifications/rendering.md
+++ b/docs/specifications/rendering.md
@@ -4,8 +4,8 @@
 This doc specifies the software to meet the requirements in the Visualization requirements doc.
 
 ## References
-- [Visualization Requirements](Specifications/Visualization)
-- [Core Spec](Specifications/Core)
+- [Visualization Requirements](visualization)
+- [Core Spec](./core)
 
 ## Interfaces
 ### Interface with Environment Component
@@ -72,4 +72,4 @@ To-be-filled
 
 #### Overlay dynamic primitives over the background at each time step.
 
-No point trying to figure out changes. Need to explicitly draw every primitive anyways (that’s how these renders work).
\ No newline at end of file
+No point trying to figure out changes. Need to explicitly draw every primitive anyways (that’s how these renders work).
diff --git a/docs/specifications/specifications.md b/docs/specifications/specifications.md
index ec680147a4cbb2b9bf2962b7bbabd9d5312aecac..7d00259450cfe3217faa839df23cc00fd29ba3eb 100644
--- a/docs/specifications/specifications.md
+++ b/docs/specifications/specifications.md
@@ -11,7 +11,7 @@ In a humand-readable language, they provide
 ## Overview
 ![UML_flatland.png](img/UML_flatland.png)
 [Diagram Source](https://confluence.sbb.ch/x/pQfsSw)
-## [Core](core)
+
 
 
 ## Rail Generators and Schedule Generators
@@ -330,6 +330,3 @@ RailEnv.step()
                                                     self.get()
                                                     ...
 ```
-
-## [Rendering](rendering)
-## [Railway](railway)
diff --git a/docs/specifications_index.rst b/docs/specifications_index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ce42dad5f3885bb425ed2cde88e0ea5dfb3eb358
--- /dev/null
+++ b/docs/specifications_index.rst
@@ -0,0 +1,12 @@
+Flatland Specs
+==============
+
+.. toctree::
+   :maxdepth: 2
+
+   specifications/specifications.md
+   specifications/core.md
+   specifications/railway.md
+   specifications/rendering.md
+   specifications/specifications.md
+   specifications/visualization.md
diff --git a/examples/custom_observation_example_03_ObservePredictions.py b/examples/custom_observation_example_03_ObservePredictions.py
index b6027184c631c79c15f967e8867b8b5f3c8ba0f6..9238a2af4137e37e9d79bc3c1aaade2bb987403e 100644
--- a/examples/custom_observation_example_03_ObservePredictions.py
+++ b/examples/custom_observation_example_03_ObservePredictions.py
@@ -11,6 +11,7 @@ from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
 from flatland.envs.rail_generators import complex_rail_generator
 from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.utils.ordered_set import OrderedSet
 from flatland.utils.rendertools import RenderTool
 
 random.seed(100)
@@ -82,7 +83,7 @@ class ObservePredictions(TreeObsForRailEnv):
         # We are going to track what cells where considered while building the obervation and make them accesible
         # For rendering
 
-        visited = set()
+        visited = OrderedSet()
         for _idx in range(10):
             # Check if any of the other prediction overlap with agents own predictions
             x_coord = self.predictions[handle][_idx][1]
diff --git a/flatland/core/env.py b/flatland/core/env.py
index 1bc5b6f3eba4ee4713bd3c8d6b88440006c215a5..2dc983c5edd49e4ebb5033877b69f36f293141b2 100644
--- a/flatland/core/env.py
+++ b/flatland/core/env.py
@@ -15,6 +15,7 @@ class Environment:
 
     Agents are identified by agent ids (handles).
     Examples:
+
         >>> obs = env.reset()
         >>> print(obs)
         {
@@ -40,6 +41,7 @@ class Environment:
             "train_0": {},  # info for train_0
             "train_1": {},  # info for train_1
         }
+
     """
 
     def __init__(self):
@@ -51,7 +53,8 @@ class Environment:
         """
         Resets the env and returns observations from agents in the environment.
 
-        Returns:
+        Returns
+        -------
         obs : dict
             New observations for each agent.
         """
@@ -66,7 +69,7 @@ class Environment:
         The returns are dicts mapping from agent_id strings to values.
 
         Parameters
-        -------
+        ----------
         action_dict : dict
             Dictionary of actions to execute, indexed by agent id.
 
diff --git a/flatland/core/env_observation_builder.py b/flatland/core/env_observation_builder.py
index 4acdf16f292a1b3ef5b78620e588dea8c3ff27e3..ba79e7fc0e46d8182951b5a8e2520d7ffc9eacb0 100644
--- a/flatland/core/env_observation_builder.py
+++ b/flatland/core/env_observation_builder.py
@@ -2,10 +2,11 @@
 ObservationBuilder objects are objects that can be passed to environments designed for customizability.
 The ObservationBuilder-derived custom classes implement 2 functions, reset() and get() or get(handle).
 
-+ Reset() is called after each environment reset, to allow for pre-computing relevant data.
++ `reset()` is called after each environment reset, to allow for pre-computing relevant data.
+
++ `get()` is called whenever an observation has to be computed, potentially for each agent independently in case of \
+multi-agent environments.
 
-+ Get() is called whenever an observation has to be computed, potentially for each agent independently in
-case of multi-agent environments.
 """
 import numpy as np
 
@@ -14,7 +15,7 @@ class ObservationBuilder:
     """
     ObservationBuilder base class.
 
-    Derived objects must implement and `observation_space' attribute as a tuple with the dimensions of the returned
+    Derived objects must implement and `observation_space` attribute as a tuple with the dimensions of the returned
     observations.
     """
 
@@ -32,19 +33,19 @@ class ObservationBuilder:
 
     def get_many(self, handles=[]):
         """
-        Called whenever an observation has to be computed for the `env' environment, for each agent with handle
-        in the `handles' list.
+        Called whenever an observation has to be computed for the `env` environment, for each agent with handle
+        in the `handles` list.
 
         Parameters
-        -------
-        handles : list of handles (optional)
+        ----------
+        handles : list of handles, optional
             List with the handles of the agents for which to compute the observation vector.
 
         Returns
         -------
         function
             A dictionary of observation structures, specific to the corresponding environment, with handles from
-            `handles' as keys.
+            `handles` as keys.
         """
         observations = {}
         for h in handles:
@@ -53,12 +54,12 @@ class ObservationBuilder:
 
     def get(self, handle=0):
         """
-        Called whenever an observation has to be computed for the `env' environment, possibly
-        for each agent independently (agent id `handle').
+        Called whenever an observation has to be computed for the `env` environment, possibly
+        for each agent independently (agent id `handle`).
 
         Parameters
-        -------
-        handle : int (optional)
+        ----------
+        handle : int, optional
             Handle of the agent for which to compute the observation vector.
 
         Returns
diff --git a/flatland/core/env_prediction_builder.py b/flatland/core/env_prediction_builder.py
index 13eb38140fb25730d1817e2db7a17c350a2260c7..c1026fe02bfc2cfb30fd57ef570022ca3b15f6f0 100644
--- a/flatland/core/env_prediction_builder.py
+++ b/flatland/core/env_prediction_builder.py
@@ -3,9 +3,9 @@ PredictionBuilder objects are objects that can be passed to environments designe
 The PredictionBuilder-derived custom classes implement 2 functions, reset() and get([handle]).
 If predictions are not required in every step or not for all agents, then
 
-+ Reset() is called after each environment reset, to allow for pre-computing relevant data.
++ `reset()` is called after each environment reset, to allow for pre-computing relevant data.
 
-+ Get() is called whenever an step has to be computed, potentially for each agent independently in
++ `get()` is called whenever an step has to be computed, potentially for each agent independently in \
 case of multi-agent environments.
 """
 
@@ -33,11 +33,11 @@ class PredictionBuilder:
         Called whenever get_many in the observation build is called.
 
         Parameters
-        -------
+        ----------
         custom_args: dict
             Implementation-dependent custom arguments, see the sub-classes.
 
-        handle : int (optional)
+        handle : int, optional
             Handle of the agent for which to compute the observation vector.
 
         Returns
diff --git a/flatland/core/grid/grid4.py b/flatland/core/grid/grid4.py
index b4b5b17c9b1e6f5c7abd3c82a1fbf1e5ee96fcfe..da721dd99fbc0ea46124dcd036425df3206aa339 100644
--- a/flatland/core/grid/grid4.py
+++ b/flatland/core/grid/grid4.py
@@ -24,9 +24,9 @@ class Grid4Transitions(Transitions):
     """
     Grid4Transitions class derived from Transitions.
 
-    Special case of `Transitions' over a 2D-grid (FlatLand).
+    Special case of `Transitions` over a 2D-grid (FlatLand).
     Transitions are possible to neighboring cells on the grid if allowed.
-    GridTransitions keeps track of valid transitions supplied as `transitions'
+    GridTransitions keeps track of valid transitions supplied as `transitions`
     list, each represented as a bitmap of 16 bits.
 
     Whether a transition is allowed or not depends on which direction an agent
@@ -67,8 +67,8 @@ class Grid4Transitions(Transitions):
         """
         Get the 4 possible transitions ((N,E,S,W), 4 elements tuple
         if no diagonal transitions allowed) available for an agent oriented
-        in direction `orientation' and inside a cell with
-        transitions `cell_transition'.
+        in direction `orientation` and inside a cell with
+        transitions `cell_transition`.
 
         Parameters
         ----------
@@ -90,9 +90,9 @@ class Grid4Transitions(Transitions):
         """
         Set the possible transitions (e.g., (N,E,S,W), 4 elements tuple
         if no diagonal transitions allowed) available for an agent
-        oriented in direction `orientation' and inside a cell with transitions
-        `cell_transition'. A new `cell_transition' is returned with
-        the specified bits replaced by `new_transitions'.
+        oriented in direction `orientation` and inside a cell with transitions
+        `cell_transition'. A new `cell_transition` is returned with
+        the specified bits replaced by `new_transitions`.
 
         Parameters
         ----------
@@ -107,8 +107,8 @@ class Grid4Transitions(Transitions):
         -------
         int
             An updated bitmap that replaces the original transitions validity
-            of `cell_transition' with `new_transitions', for the appropriate
-            `orientation'.
+            of `cell_transition' with `new_transitions`, for the appropriate
+            `orientation`.
 
         """
         mask = (1 << ((4 - orientation) * 4)) - (1 << ((3 - orientation) * 4))
@@ -127,8 +127,8 @@ class Grid4Transitions(Transitions):
     def get_transition(self, cell_transition, orientation, direction):
         """
         Get the transition bit (1 value) that determines whether an agent
-        oriented in direction `orientation' and inside a cell with transitions
-        `cell_transition' can move to the cell in direction `direction'
+        oriented in direction `orientation` and inside a cell with transitions
+        `cell_transition' can move to the cell in direction `direction`
         relative to the current cell.
 
         Parameters
@@ -151,8 +151,8 @@ class Grid4Transitions(Transitions):
     def set_transition(self, cell_transition, orientation, direction, new_transition, remove_deadends=False):
         """
         Set the transition bit (1 value) that determines whether an agent
-        oriented in direction `orientation' and inside a cell with transitions
-        `cell_transition' can move to the cell in direction `direction'
+        oriented in direction `orientation` and inside a cell with transitions
+        `cell_transition' can move to the cell in direction `direction`
         relative to the current cell.
 
         Parameters
@@ -171,8 +171,8 @@ class Grid4Transitions(Transitions):
         -------
         int
             An updated bitmap that replaces the original transitions validity
-            of `cell_transition' with `new_transitions', for the appropriate
-            `orientation'.
+            of `cell_transition' with `new_transitions`, for the appropriate
+            `orientation`.
 
         """
         if new_transition:
@@ -196,7 +196,7 @@ class Grid4Transitions(Transitions):
             16 bits used to encode the valid transitions for a cell.
         rotation : int
             Angle by which to clock-wise rotate the transition bits in
-            `cell_transition' by. I.e., rotation={0, 90, 180, 270} degrees.
+            `cell_transition` by. I.e., rotation={0, 90, 180, 270} degrees.
 
         Returns
         -------
diff --git a/flatland/core/grid/grid8.py b/flatland/core/grid/grid8.py
index 2ba379a5ecb4099de999905d34daf91ccccac640..a4dcc3d215a3ff0fd699924e489d2fb17191a37c 100644
--- a/flatland/core/grid/grid8.py
+++ b/flatland/core/grid/grid8.py
@@ -20,9 +20,9 @@ class Grid8Transitions(Transitions):
     """
     Grid8Transitions class derived from Transitions.
 
-    Special case of `Transitions' over a 2D-grid (FlatLand).
+    Special case of `Transitions` over a 2D-grid (FlatLand).
     Transitions are possible to neighboring cells on the grid if allowed.
-    GridTransitions keeps track of valid transitions supplied as `transitions'
+    GridTransitions keeps track of valid transitions supplied as `transitions`
     list, each represented as a bitmap of 64 bits.
 
     0=North, 1=North-East, etc.
@@ -82,8 +82,8 @@ class Grid8Transitions(Transitions):
         -------
         int
             An updated bitmap that replaces the original transitions validity
-            of `cell_transition' with `new_transitions', for the appropriate
-            `orientation'.
+            of `cell_transition' with `new_transitions`, for the appropriate
+            `orientation`.
 
         """
         mask = (1 << ((8 - orientation) * 8)) - (1 << ((7 - orientation) * 8))
@@ -106,8 +106,8 @@ class Grid8Transitions(Transitions):
     def get_transition(self, cell_transition, orientation, direction):
         """
         Get the transition bit (1 value) that determines whether an agent
-        oriented in direction `orientation' and inside a cell with transitions
-        `cell_transition' can move to the cell in direction `direction'
+        oriented in direction `orientation` and inside a cell with transitions
+        `cell_transition' can move to the cell in direction `direction`
         relative to the current cell.
 
         Parameters
@@ -131,8 +131,8 @@ class Grid8Transitions(Transitions):
 
         """
         Set the transition bit (1 value) that determines whether an agent
-        oriented in direction `orientation' and inside a cell with transitions
-        `cell_transition' can move to the cell in direction `direction'
+        oriented in direction `orientation` and inside a cell with transitions
+        `cell_transition' can move to the cell in direction `direction`
         relative to the current cell.
 
         Parameters
@@ -150,8 +150,8 @@ class Grid8Transitions(Transitions):
         -------
         int
             An updated bitmap that replaces the original transitions validity
-            of `cell_transition' with `new_transitions', for the appropriate
-            `orientation'.
+            of `cell_transition' with `new_transitions`, for the appropriate
+            `orientation`.
 
         """
         if new_transition:
@@ -172,7 +172,7 @@ class Grid8Transitions(Transitions):
             64 bits used to encode the valid transitions for a cell.
         rotation : int
             Angle by which to clock-wise rotate the transition bits in
-            `cell_transition' by. I.e., rotation={0, 45, 90, 135, 180,
+            `cell_transition` by. I.e., rotation={0, 45, 90, 135, 180,
             225, 270, 315} degrees.
 
         Returns
diff --git a/flatland/core/grid/grid_utils.py b/flatland/core/grid/grid_utils.py
index 87f0d78df75a30194d8182b79708edbe55c0fa20..5051a9357b80cc581c76a02a3d2ef1d0c46b56e9 100644
--- a/flatland/core/grid/grid_utils.py
+++ b/flatland/core/grid/grid_utils.py
@@ -217,25 +217,27 @@ class Vec2dOperations:
         return x1, y1
 
 
-def position_to_coordinate(depth, positions):
-    """Converts coordinates to positions:
-         [ (0,0) (0,1) ..  (0,w-1)
-           (1,0) (1,1)     (1,w-1)
-           ...
-           (d-1,0) (d-1,1)     (d-1,w-1)
-          ]
+def position_to_coordinate(depth: int, positions):
+    """Converts coordinates to positions::
+
+        [ (0,0) (0,1) ..  (0,w-1)
+          (1,0) (1,1)     (1,w-1)
+            ...
+          (d-1,0) (d-1,1)     (d-1,w-1)
+        ]
 
          -->
 
-         [ 0      d    ..  (w-1)*d
-           1      d+1
-           ...
-           d-1    2d-1     w*d-1
-         ]
+        [ 0      d    ..  (w-1)*d
+          1      d+1
+          ...
+          d-1    2d-1     w*d-1
+        ]
 
-    :param depth:
-    :param positions:
-    :return:
+    Parameters
+    ----------
+    depth : int
+    positions : List[Tuple[int,int]]
     """
     coords = ()
     for p in positions:
@@ -245,7 +247,8 @@ def position_to_coordinate(depth, positions):
 
 def coordinate_to_position(depth, coords):
     """
-    Converts positions to coordinates:
+    Converts positions to coordinates::
+
          [ 0      d    ..  (w-1)*d
            1      d+1
            ...
diff --git a/flatland/core/grid/rail_env_grid.py b/flatland/core/grid/rail_env_grid.py
index 680e945316ab3a4876bd36fa8e6b001ea346cd26..ffc0e5bfe9dc992052356ed45dc7fe6cf98adfee 100644
--- a/flatland/core/grid/rail_env_grid.py
+++ b/flatland/core/grid/rail_env_grid.py
@@ -1,13 +1,12 @@
 from flatland.core.grid.grid4 import Grid4Transitions
+from flatland.utils.ordered_set import OrderedSet
 
 
 class RailEnvTransitions(Grid4Transitions):
     """
-    Special case of `GridTransitions' over a 2D-grid, with a pre-defined set
+    Special case of `GridTransitions` over a 2D-grid, with a pre-defined set
     of transitions mimicking the types of real Swiss rail connections.
 
-    --------------------------------------------------------------------------
-
     As no diagonal transitions are allowed in the RailEnv environment, the
     possible transitions for RailEnv from a cell to its neighboring ones
     are represented over 16 bits.
@@ -44,7 +43,7 @@ class RailEnvTransitions(Grid4Transitions):
         )
 
         # create this to make validation faster
-        self.transitions_all = set()
+        self.transitions_all = OrderedSet()
         for index, trans in enumerate(self.transitions):
             self.transitions_all.add(trans)
             if index in (2, 4, 6, 7, 8, 9, 10):
diff --git a/flatland/core/transition_map.py b/flatland/core/transition_map.py
index f860b77c6ecf5088ad8c47fd2b0665a88aeeef0c..07678add5549c3ac13df876132ed3bcdbf5bec5e 100644
--- a/flatland/core/transition_map.py
+++ b/flatland/core/transition_map.py
@@ -12,6 +12,7 @@ from flatland.core.grid.grid_utils import IntVector2DArray, IntVector2D
 from flatland.core.grid.grid_utils import Vec2dOperations as Vec2d
 from flatland.core.grid.rail_env_grid import RailEnvTransitions
 from flatland.core.transitions import Transitions
+from flatland.utils.ordered_set import OrderedSet
 
 
 class TransitionMap:
@@ -25,7 +26,7 @@ class TransitionMap:
     def get_transitions(self, cell_id):
         """
         Return a tuple of transitions available in a cell specified by
-        `cell_id' (e.g., a tuple of size of the maximum number of transitions,
+        `cell_id` (e.g., a tuple of size of the maximum number of transitions,
         with values 0 or 1, or potentially in between,
         for stochastic transitions).
 
@@ -45,8 +46,8 @@ class TransitionMap:
 
     def set_transitions(self, cell_id, new_transitions):
         """
-        Replaces the available transitions in cell `cell_id' with the tuple
-        `new_transitions'. `new_transitions' must have
+        Replaces the available transitions in cell `cell_id` with the tuple
+        `new_transitions'. `new_transitions` must have
         one element for each possible transition.
 
         Parameters
@@ -62,8 +63,8 @@ class TransitionMap:
 
     def get_transition(self, cell_id, transition_index):
         """
-        Return the status of whether an agent in cell `cell_id' can perform a
-        movement along transition `transition_index (e.g., the NESW direction
+        Return the status of whether an agent in cell `cell_id` can perform a
+        movement along transition `transition_index` (e.g., the NESW direction
         of movement, for agents on a grid).
 
         Parameters
@@ -87,8 +88,8 @@ class TransitionMap:
 
     def set_transition(self, cell_id, transition_index, new_transition):
         """
-        Replaces the validity of transition to `transition_index' in cell
-        `cell_id' with the new `new_transition'.
+        Replaces the validity of transition to `transition_index` in cell
+        `cell_id' with the new `new_transition`.
 
 
         Parameters
@@ -158,7 +159,7 @@ class GridTransitionMap(TransitionMap):
     def get_transitions(self, row, column, orientation):
         """
         Return a tuple of transitions available in a cell specified by
-        `cell_id' (e.g., a tuple of size of the maximum number of transitions,
+        `cell_id` (e.g., a tuple of size of the maximum number of transitions,
         with values 0 or 1, or potentially in between,
         for stochastic transitions).
 
@@ -180,8 +181,8 @@ class GridTransitionMap(TransitionMap):
 
     def set_transitions(self, cell_id, new_transitions):
         """
-        Replaces the available transitions in cell `cell_id' with the tuple
-        `new_transitions'. `new_transitions' must have
+        Replaces the available transitions in cell `cell_id` with the tuple
+        `new_transitions'. `new_transitions` must have
         one element for each possible transition.
 
         Parameters
@@ -206,8 +207,8 @@ class GridTransitionMap(TransitionMap):
 
     def get_transition(self, cell_id, transition_index):
         """
-        Return the status of whether an agent in cell `cell_id' can perform a
-        movement along transition `transition_index (e.g., the NESW direction
+        Return the status of whether an agent in cell `cell_id` can perform a
+        movement along transition `transition_index` (e.g., the NESW direction
         of movement, for agents on a grid).
 
         Parameters
@@ -234,8 +235,8 @@ class GridTransitionMap(TransitionMap):
 
     def set_transition(self, cell_id, transition_index, new_transition, remove_deadends=False):
         """
-        Replaces the validity of transition to `transition_index' in cell
-        `cell_id' with the new `new_transition'.
+        Replaces the validity of transition to `transition_index` in cell
+        `cell_id' with the new `new_transition`.
 
 
         Parameters
@@ -263,7 +264,7 @@ class GridTransitionMap(TransitionMap):
 
     def save_transition_map(self, filename):
         """
-        Save the transitions grid as `filename', in npy format.
+        Save the transitions grid as `filename`, in npy format.
 
         Parameters
         ----------
@@ -275,9 +276,9 @@ class GridTransitionMap(TransitionMap):
 
     def load_transition_map(self, package, resource):
         """
-        Load the transitions grid from `filename' (npy format).
+        Load the transitions grid from `filename` (npy format).
         The load function only updates the transitions grid, and possibly width and height, but the object has to be
-        initialized with the correct `transitions' object anyway.
+        initialized with the correct `transitions` object anyway.
 
         Parameters
         ----------
@@ -287,7 +288,7 @@ class GridTransitionMap(TransitionMap):
             Name of the file from which to load the transitions grid within the package.
         override_gridsize : bool
             If override_gridsize=True, the width and height of the GridTransitionMap object are replaced with the size
-            of the map loaded from `filename'. If override_gridsize=False, the transitions grid is either cropped (if
+            of the map loaded from `filename`. If override_gridsize=False, the transitions grid is either cropped (if
             the grid size is larger than (height,width) ) or padded with zeros (if the grid size is smaller than
             (height,width) )
 
@@ -338,7 +339,7 @@ class GridTransitionMap(TransitionMap):
         tmp = self.get_full_transitions(rcPos[0], rcPos[1])
 
         def is_simple_turn(trans):
-            all_simple_turns = set()
+            all_simple_turns = OrderedSet()
             for trans in [int('0100000000000010', 2),  # Case 1b (8)  - simple turn right
                           int('0001001000000000', 2)  # Case 1c (9)  - simple turn left]:
                           ]:
@@ -353,7 +354,7 @@ class GridTransitionMap(TransitionMap):
         # print("_path_exists({},{},{}".format(start, direction, end))
         # BFS - Check if a path exists between the 2 nodes
 
-        visited = set()
+        visited = OrderedSet()
         stack = [(start, direction)]
         while stack:
             node = stack.pop()
@@ -377,8 +378,7 @@ class GridTransitionMap(TransitionMap):
         """
         Check validity of cell at rcPos = tuple(row, column)
         Checks that:
-        - surrounding cells have inbound transitions for all the
-            outbound transitions of this cell.
+        - surrounding cells have inbound transitions for all the outbound transitions of this cell.
 
         These are NOT checked - see transition.is_valid:
         - all transitions have the mirror transitions (N->E <=> W->S)
@@ -429,8 +429,7 @@ class GridTransitionMap(TransitionMap):
         """
         Check validity of cell at rcPos = tuple(row, column)
         Checks that:
-        - surrounding cells have inbound transitions for all the
-            outbound transitions of this cell.
+        - surrounding cells have inbound transitions for all the outbound transitions of this cell.
 
         These are NOT checked - see transition.is_valid:
         - all transitions have the mirror transitions (N->E <=> W->S)
diff --git a/flatland/core/transitions.py b/flatland/core/transitions.py
index 5049c23bff3667a08375fee270a8867ca013c467..995c2d4e7a911c4e01b64b0673c2bdf1420d324c 100644
--- a/flatland/core/transitions.py
+++ b/flatland/core/transitions.py
@@ -12,7 +12,7 @@ class Transitions:
 
     Generic class that implements checks to control whether a
     certain transition is allowed (agent facing a direction
-    `orientation' and moving into direction `orientation')
+    `orientation' and moving into direction `orientation`)
     """
 
     def get_type(self):
@@ -21,7 +21,7 @@ class Transitions:
     def get_transitions(self, cell_transition, orientation):
         """
         Return a tuple of transitions available in a cell specified by
-        `cell_transition' for an agent facing direction `orientation'
+        `cell_transition' for an agent facing direction `orientation`
         (e.g., a tuple of size of the maximum number of transitions,
         with values 0 or 1, or potentially in between,
         for stochastic transitions).
@@ -45,9 +45,9 @@ class Transitions:
 
     def set_transitions(self, cell_transition, orientation, new_transitions):
         """
-        Return a `cell_transition' specification where the transitions
-        available for an agent facing direction `orientation' are replaced
-        with the tuple `new_transitions'. `new_orientations' must have
+        Return a `cell_transition` specification where the transitions
+        available for an agent facing direction `orientation` are replaced
+        with the tuple `new_transitions'. `new_orientations` must have
         one element for each possible transition.
 
         Parameters
@@ -65,8 +65,8 @@ class Transitions:
         -------
         [cell-content]
             An updated class-specific object that replaces the original
-            transitions validity of `cell_transition' with `new_transitions',
-            for the appropriate `orientation'.
+            transitions validity of `cell_transition' with `new_transitions`,
+            for the appropriate `orientation`.
 
         """
         raise NotImplementedError()
@@ -74,8 +74,8 @@ class Transitions:
     def get_transition(self, cell_transition, orientation, direction):
         """
         Return the status of whether an agent oriented in directions
-        `orientation' and inside a cell with transitions `cell_transition'
-        can move to the cell in direction `direction' relative
+        `orientation' and inside a cell with transitions `cell_transition`
+        can move to the cell in direction `direction` relative
         to the current cell.
 
         Parameters
@@ -101,11 +101,11 @@ class Transitions:
     def set_transition(self, cell_transition, orientation, direction,
                        new_transition):
         """
-        Return a `cell_transition' specification where the status of
-        whether an agent oriented in direction `orientation' and inside
-        a cell with transitions `cell_transition' can move to the cell
-        in direction `direction' relative to the current cell is set
-        to `new_transition'.
+        Return a `cell_transition` specification where the status of
+        whether an agent oriented in direction `orientation` and inside
+        a cell with transitions `cell_transition` can move to the cell
+        in direction `direction` relative to the current cell is set
+        to `new_transition`.
 
         Parameters
         ----------
@@ -125,8 +125,8 @@ class Transitions:
         -------
         [cell-content]
             An updated class-specific object that replaces the original
-            transitions validity of `cell_transition' with `new_transitions',
-            for the appropriate `orientation' to `direction'.
+            transitions validity of `cell_transition' with `new_transitions`,
+            for the appropriate `orientation' to `direction`.
 
         """
         raise NotImplementedError()
diff --git a/flatland/envs/agent_utils.py b/flatland/envs/agent_utils.py
index b228e10b6c146f5692166e179bb9f574a68c9134..f659ec8436a941606b6d649e24d2481e5be9b66d 100644
--- a/flatland/envs/agent_utils.py
+++ b/flatland/envs/agent_utils.py
@@ -1,8 +1,11 @@
 from itertools import starmap
+from typing import Tuple
 
 import numpy as np
 from attr import attrs, attrib, Factory
 
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
+
 
 @attrs
 class EnvAgentStatic(object):
@@ -11,10 +14,10 @@ class EnvAgentStatic(object):
         rather than where it is at the moment.
         The target should also be stored here.
     """
-    position = attrib()
-    direction = attrib()
-    target = attrib()
-    moving = attrib(default=False)
+    position = attrib(type=Tuple[int, int])
+    direction = attrib(type=Grid4TransitionsEnum)
+    target = attrib(type=Tuple[int, int])
+    moving = attrib(default=False, type=bool)
 
     # speed_data: speed is added to position_fraction on each moving step, until position_fraction>=1.0,
     # after which 'transition_action_on_cellexit' is executed (equivalent to executing that action in the previous
@@ -27,7 +30,8 @@ class EnvAgentStatic(object):
     # number of time the agent had to stop, since the last time it broke down
     malfunction_data = attrib(
         default=Factory(
-            lambda: dict({'malfunction': 0, 'malfunction_rate': 0, 'next_malfunction': 0, 'nr_malfunctions': 0})))
+            lambda: dict({'malfunction': 0, 'malfunction_rate': 0, 'next_malfunction': 0, 'nr_malfunctions': 0,
+                          'moving_before_malfunction': False})))
 
     @classmethod
     def from_lists(cls, positions, directions, targets, speeds=None, malfunction_rates=None):
diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py
index a833fc01949d4184d5ca2442c6bb429d697318f3..2a1c52207630a72f2749ba22ab7c46241839d4ab 100644
--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -6,9 +6,9 @@ import pprint
 import numpy as np
 
 from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.core.grid.grid4_utils import get_new_position
 from flatland.core.grid.grid_utils import coordinate_to_position
+from flatland.utils.ordered_set import OrderedSet
 
 
 class TreeObsForRailEnv(ObservationBuilder):
@@ -45,8 +45,8 @@ class TreeObsForRailEnv(ObservationBuilder):
 
     def get_many(self, handles=None):
         """
-        Called whenever an observation has to be computed for the `env' environment, for each agent with handle
-        in the `handles' list.
+        Called whenever an observation has to be computed for the `env` environment, for each agent with handle
+        in the `handles` list.
         """
 
         if handles is None:
@@ -74,15 +74,17 @@ class TreeObsForRailEnv(ObservationBuilder):
 
     def get(self, handle):
         """
-        Computes the current observation for agent `handle' in env
+        Computes the current observation for agent `handle` in env
 
         The observation vector is composed of 4 sequential parts, corresponding to data from the up to 4 possible
         movements in a RailEnv (up to because only a subset of possible transitions are allowed in RailEnv).
         The possible movements are sorted relative to the current orientation of the agent, rather than NESW as for
-        the transitions. The order is:
+        the transitions. The order is::
+
             [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
 
-        Each branch data is organized as:
+        Each branch data is organized as::
+
             [root node information] +
             [recursive branch data from 'left'] +
             [... from 'forward'] +
@@ -91,39 +93,50 @@ class TreeObsForRailEnv(ObservationBuilder):
 
         Each node information is composed of 9 features:
 
-        #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored.
+        #1:
+            if own target lies on the explored branch the current distance from the agent in number of cells is stored.
 
-        #2: if another agents target is detected the distance in number of cells from the agents current location
+        #2:
+            if another agents target is detected the distance in number of cells from the agents current location\
             is stored
 
-        #3: if another agent is detected the distance in number of cells from current agent position is stored.
+        #3:
+            if another agent is detected the distance in number of cells from current agent position is stored.
 
-        #4: possible conflict detected
-            tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the
+        #4:
+            possible conflict detected
+            tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the \
              distance in number of cells from current agent position
 
             0 = No other agent reserve the same cell at similar time
 
-        #5: if an not usable switch (for agent) is detected we store the distance.
+        #5:
+            if an not usable switch (for agent) is detected we store the distance.
 
-        #6: This feature stores the distance in number of cells to the next branching  (current node)
+        #6:
+            This feature stores the distance in number of cells to the next branching  (current node)
 
-        #7: minimum distance from node to the agent's target given the direction of the agent if this path is chosen
+        #7:
+            minimum distance from node to the agent's target given the direction of the agent if this path is chosen
 
-        #8: agent in the same direction
-            n = number of agents present same direction
+        #8:
+            agent in the same direction
+            n = number of agents present same direction \
                 (possible future use: number of other agents in the same direction in this branch)
             0 = no agent present same direction
 
-        #9: agent in the opposite direction
-            n = number of agents present other direction than myself (so conflict)
+        #9:
+            agent in the opposite direction
+            n = number of agents present other direction than myself (so conflict) \
                 (possible future use: number of other agents in other direction in this branch, ie. number of conflicts)
             0 = no agent present other direction than myself
 
-        #10: malfunctioning/blokcing agents
+        #10:
+            malfunctioning/blokcing agents
             n = number of time steps the oberved agent remains blocked
 
-        #11: slowest observed speed of an agent in same direction
+        #11:
+            slowest observed speed of an agent in same direction
             1 if no agent is observed
 
             min_fractional speed otherwise
@@ -154,7 +167,7 @@ class TreeObsForRailEnv(ObservationBuilder):
         observation = [0, 0, 0, 0, 0, 0, self.env.distance_map.get()[(handle, *agent.position, agent.direction)], 0, 0,
                        agent.malfunction_data['malfunction'], agent.speed_data['speed']]
 
-        visited = set()
+        visited = OrderedSet()
 
         # Start from the current orientation, and see which transitions are available;
         # organize them as [left, forward, right, back], relative to the current orientation
@@ -170,7 +183,7 @@ class TreeObsForRailEnv(ObservationBuilder):
                 branch_observation, branch_visited = \
                     self._explore_branch(handle, new_cell, branch_direction, 1, 1)
                 observation = observation + branch_observation
-                visited = visited.union(branch_visited)
+                visited |= branch_visited
             else:
                 # add cells filled with infinity if no transition is possible
                 observation = observation + [-np.inf] * self._num_cells_to_fill_in(self.max_depth)
@@ -207,7 +220,7 @@ class TreeObsForRailEnv(ObservationBuilder):
         last_is_terminal = False  # wrong cell OR cycle;  either way, we don't want the agent to land here
         last_is_target = False
 
-        visited = set()
+        visited = OrderedSet()
         agent = self.env.agents[handle]
         time_per_cell = np.reciprocal(agent.speed_data["speed"])
         own_target_encountered = np.inf
@@ -335,7 +348,7 @@ class TreeObsForRailEnv(ObservationBuilder):
                     last_is_dead_end = True
 
                 if not last_is_dead_end:
-                    # Keep walking through the tree along `direction'
+                    # Keep walking through the tree along `direction`
                     exploring = True
                     # convert one-hot encoding to 0,1,2,3
                     direction = np.argmax(cell_transitions)
@@ -354,7 +367,7 @@ class TreeObsForRailEnv(ObservationBuilder):
                 last_is_terminal = True
                 break
 
-        # `position' is either a terminal node or a switch
+        # `position` is either a terminal node or a switch
 
         # #############################
         # #############################
@@ -420,7 +433,7 @@ class TreeObsForRailEnv(ObservationBuilder):
                                                                           depth + 1)
                 observation = observation + branch_observation
                 if len(branch_visited) != 0:
-                    visited = visited.union(branch_visited)
+                    visited |= branch_visited
             elif last_is_switch and possible_transitions[branch_direction]:
                 new_cell = get_new_position(position, branch_direction)
                 branch_observation, branch_visited = self._explore_branch(handle,
@@ -430,7 +443,7 @@ class TreeObsForRailEnv(ObservationBuilder):
                                                                           depth + 1)
                 observation = observation + branch_observation
                 if len(branch_visited) != 0:
-                    visited = visited.union(branch_visited)
+                    visited |= branch_visited
             else:
                 # no exploring possible, add just cells with infinity
                 observation = observation + [-np.inf] * self._num_cells_to_fill_in(self.max_depth - depth)
@@ -488,10 +501,10 @@ class GlobalObsForRailEnv(ObservationBuilder):
     Gives a global observation of the entire rail environment.
     The observation is composed of the following elements:
 
-        - transition map array with dimensions (env.height, env.width, 16),
+        - transition map array with dimensions (env.height, env.width, 16),\
           assuming 16 bits encoding of transitions.
 
-        - Two 2D arrays (map_height, map_width, 2) containing respectively the position of the given agent
+        - Two 2D arrays (map_height, map_width, 2) containing respectively the position of the given agent\
          target and the positions of the other agents targets.
 
         - A 3D array (map_height, map_width, 4) wtih
@@ -545,14 +558,14 @@ class LocalObsForRailEnv(ObservationBuilder):
     Gives a local observation of the rail environment around the agent.
     The observation is composed of the following elements:
 
-        - transition map array of the local environment around the given agent,
-          with dimensions (view_height,2*view_width+1, 16),
+        - transition map array of the local environment around the given agent, \
+          with dimensions (view_height,2*view_width+1, 16), \
           assuming 16 bits encoding of transitions.
 
-        - Two 2D arrays (view_height,2*view_width+1, 2) containing respectively,
+        - Two 2D arrays (view_height,2*view_width+1, 2) containing respectively, \
         if they are in the agent's vision range, its target position, the positions of the other targets.
 
-        - A 2D array (view_height,2*view_width+1, 4) containing the one hot encoding of directions
+        - A 2D array (view_height,2*view_width+1, 4) containing the one hot encoding of directions \
           of the other agents at their position coordinates, if they are in the agent's vision range.
 
         - A 4 elements array with one hot encoding of the direction.
@@ -560,6 +573,8 @@ class LocalObsForRailEnv(ObservationBuilder):
     Use the parameters view_width and view_height to define the rectangular view of the agent.
     The center parameters moves the agent along the height axis of this rectangle. If it is 0 the agent only has
     observation in front of it.
+
+    .. deprecated:: 2.0.0
     """
 
     def __init__(self, view_width, view_height, center):
@@ -624,8 +639,8 @@ class LocalObsForRailEnv(ObservationBuilder):
 
     def get_many(self, handles=None):
         """
-        Called whenever an observation has to be computed for the `env' environment, for each agent with handle
-        in the `handles' list.
+        Called whenever an observation has to be computed for the `env` environment, for each agent with handle
+        in the `handles` list.
         """
 
         observations = {}
diff --git a/flatland/envs/predictions.py b/flatland/envs/predictions.py
index c77b57871fcf30b231132c364c115a38d6de3889..ccf4b967c3eabe2ff85dc4084720aa8fc3ca9628 100644
--- a/flatland/envs/predictions.py
+++ b/flatland/envs/predictions.py
@@ -7,6 +7,7 @@ import numpy as np
 from flatland.core.env_prediction_builder import PredictionBuilder
 from flatland.core.grid.grid4_utils import get_new_position
 from flatland.envs.rail_env import RailEnvActions
+from flatland.utils.ordered_set import OrderedSet
 
 
 class DummyPredictorForRailEnv(PredictionBuilder):
@@ -22,8 +23,10 @@ class DummyPredictorForRailEnv(PredictionBuilder):
         Called whenever get_many in the observation build is called.
 
         Parameters
-        -------
-        handle : int (optional)
+        ----------
+        custom_args: dict
+            Not used in this dummy implementation.
+        handle : int, optional
             Handle of the agent for which to compute the observation vector.
 
         Returns
@@ -94,8 +97,10 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
         Requires distance_map to extract the shortest path.
 
         Parameters
-        -------
-        handle : int (optional)
+        ----------
+        custom_args: dict
+            - distance_map : dict
+        handle : int, optional
             Handle of the agent for which to compute the observation vector.
 
         Returns
@@ -125,7 +130,7 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
             prediction[0] = [0, *_agent_initial_position, _agent_initial_direction, 0]
             new_direction = _agent_initial_direction
             new_position = _agent_initial_position
-            visited = set()
+            visited = OrderedSet()
             for index in range(1, self.max_depth + 1):
                 # if we're at the target, stop moving...
                 if agent.position == agent.target:
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index f5881836072ec09a2fe4ee9a70482566867362c4..294ffab233458f1f3b98c18be50743ba65bd2d73 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -4,13 +4,14 @@ Definition of the RailEnv environment.
 # TODO:  _ this is a global method --> utils or remove later
 import warnings
 from enum import IntEnum
-from typing import List
+from typing import List, Set, NamedTuple, Optional, Tuple, Dict
 
 import msgpack
 import msgpack_numpy as m
 import numpy as np
 
 from flatland.core.env import Environment
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.core.grid.grid4_utils import get_new_position
 from flatland.core.transition_map import GridTransitionMap
 from flatland.envs.agent_utils import EnvAgentStatic, EnvAgent
@@ -18,6 +19,7 @@ from flatland.envs.distance_map import DistanceMap
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.rail_generators import random_rail_generator, RailGenerator
 from flatland.envs.schedule_generators import random_schedule_generator, ScheduleGenerator
+from flatland.utils.ordered_set import OrderedSet
 
 m.patch()
 
@@ -40,6 +42,11 @@ class RailEnvActions(IntEnum):
         }[a]
 
 
+RailEnvGridPos = NamedTuple('RailEnvGridPos', [('r', int), ('c', int)])
+RailEnvNextAction = NamedTuple('RailEnvNextAction', [('action', RailEnvActions), ('next_position', RailEnvGridPos),
+                                                     ('next_direction', Grid4TransitionsEnum)])
+
+
 class RailEnv(Environment):
     """
     RailEnv environment class.
@@ -115,7 +122,7 @@ class RailEnv(Environment):
         Environment init.
 
         Parameters
-        -------
+        ----------
         rail_generator : function
             The rail_generator function is a function that takes the width,
             height and agents handles of a  rail environment, along with the number of times
@@ -140,16 +147,13 @@ class RailEnv(Environment):
             ObservationBuilder-derived object that takes builds observation
             vectors for each agent.
         max_episode_steps : int or None
-
-        file_name: you can load a pickle file. from previously saved *.pkl file
-
         """
         super().__init__()
 
         self.rail_generator: RailGenerator = rail_generator
         self.schedule_generator: ScheduleGenerator = schedule_generator
         self.rail_generator = rail_generator
-        self.rail: GridTransitionMap = None
+        self.rail: Optional[GridTransitionMap] = None
         self.width = width
         self.height = height
 
@@ -265,7 +269,10 @@ class RailEnv(Environment):
 
             agent.malfunction_data['malfunction'] = 0
 
-            self._agent_new_malfunction(i_agent, RailEnvActions.DO_NOTHING)
+            initial_malfunction = self._agent_malfunction(i_agent)
+
+            if initial_malfunction:
+                agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING
 
         self.num_resets += 1
         self._elapsed_steps = 0
@@ -281,7 +288,7 @@ class RailEnv(Environment):
         # Return the new observation vectors for each agent
         return self._get_observations()
 
-    def _agent_new_malfunction(self, i_agent, action) -> bool:
+    def _agent_malfunction(self, i_agent) -> bool:
         """
         Returns true if the agent enters into malfunction. (False, if not broken down or already broken down before).
         """
@@ -308,12 +315,28 @@ class RailEnv(Environment):
             num_broken_steps = np.random.randint(self.min_number_of_steps_broken,
                                                  self.max_number_of_steps_broken + 1) + 1
             agent.malfunction_data['malfunction'] = num_broken_steps
+            agent.malfunction_data['moving_before_malfunction'] = agent.moving
 
             return True
+        else:
+            # The train was broken before...
+            if agent.malfunction_data['malfunction'] > 0:
+
+                # Last step of malfunction --> Agent starts moving again after getting fixed
+                if agent.malfunction_data['malfunction'] < 2:
+                    agent.malfunction_data['malfunction'] -= 1
+
+                    # restore moving state before malfunction without further penalty
+                    self.agents[i_agent].moving = agent.malfunction_data['moving_before_malfunction']
+
+                else:
+                    agent.malfunction_data['malfunction'] -= 1
+
+                    # Nothing left to do with broken agent
+                    return True
         return False
 
-    # TODO refactor to decrease length of this method!
-    def step(self, action_dict_):
+    def step(self, action_dict_: Dict[int, RailEnvActions]):
         self._elapsed_steps += 1
 
         # Reset the step rewards
@@ -321,8 +344,9 @@ class RailEnv(Environment):
         for i_agent in range(self.get_num_agents()):
             self.rewards_dict[i_agent] = 0
 
+        # If we're done, set reward and info_dict and step() is done.
         if self.dones["__all__"]:
-            self.rewards_dict = {i: r + self.global_reward for i, r in self.rewards_dict.items()}
+            self.rewards_dict = {i: self.global_reward for i in range(self.get_num_agents())}
             info_dict = {
                 'action_required': {i: False for i in range(self.get_num_agents())},
                 'malfunction': {i: 0 for i in range(self.get_num_agents())},
@@ -330,166 +354,173 @@ class RailEnv(Environment):
             }
             return self._get_observations(), self.rewards_dict, self.dones, info_dict
 
+        # Perform step on all agents
         for i_agent in range(self.get_num_agents()):
+            self._step_agent(i_agent, action_dict_.get(i_agent))
 
-            if self.dones[i_agent]:  # this agent has already completed...
-                continue
-
-            agent = self.agents[i_agent]
-            agent.old_direction = agent.direction
-            agent.old_position = agent.position
+        # Check for end of episode + set global reward to all rewards!
+        if np.all([np.array_equal(agent.position, agent.target) for agent in self.agents]):
+            self.dones["__all__"] = True
+            self.rewards_dict = {i: self.global_reward for i in range(self.get_num_agents())}
 
-            # No action has been supplied for this agent -> set DO_NOTHING as default
-            if i_agent not in action_dict_:
-                action = RailEnvActions.DO_NOTHING
-            else:
-                action = action_dict_[i_agent]
+        if (self._max_episode_steps is not None) and (self._elapsed_steps >= self._max_episode_steps):
+            self.dones["__all__"] = True
+            for k in self.dones.keys():
+                self.dones[k] = True
 
-            if action < 0 or action > len(RailEnvActions):
-                print('ERROR: illegal action=', action,
-                      'for agent with index=', i_agent,
-                      '"DO NOTHING" will be executed instead')
-                action = RailEnvActions.DO_NOTHING
+        action_required_agents = {
+            i: self.agents[i].speed_data['position_fraction'] == 0.0 for i in range(self.get_num_agents())
+        }
+        malfunction_agents = {
+            i: self.agents[i].malfunction_data['malfunction'] for i in range(self.get_num_agents())
+        }
+        speed_agents = {i: self.agents[i].speed_data['speed'] for i in range(self.get_num_agents())}
 
-            # Check if agent breaks at this step
-            new_malfunction = self._agent_new_malfunction(i_agent, action)
+        info_dict = {
+            'action_required': action_required_agents,
+            'malfunction': malfunction_agents,
+            'speed': speed_agents
+        }
 
-            # Is the agent at the beginning of the cell? Then, it can take an action
-            # Design choice (Erik+Christian):
-            #  as long as we're broken down at the beginning of the cell, we can choose other actions!
-            if agent.speed_data['position_fraction'] == 0.0:
-                if action == RailEnvActions.DO_NOTHING and agent.moving:
-                    # Keep moving
-                    action = RailEnvActions.MOVE_FORWARD
+        return self._get_observations(), self.rewards_dict, self.dones, info_dict
 
-                if action == RailEnvActions.STOP_MOVING and agent.moving:
-                    # Only allow halting an agent on entering new cells.
-                    agent.moving = False
-                    self.rewards_dict[i_agent] += self.stop_penalty
+    def _step_agent(self, i_agent, action: Optional[RailEnvActions] = None):
+        """
+        Performs a step and step, start and stop penalty on a single agent in the following sub steps:
+        - malfunction
+        - action handling if at the beginning of cell
+        - movement
 
-                if not agent.moving and not (
-                    action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
-                    # Allow agent to start with any forward or direction action
-                    agent.moving = True
-                    self.rewards_dict[i_agent] += self.start_penalty
+        Parameters
+        ----------
+        i_agent : int
+        action_dict_ : Dict[int,RailEnvActions]
 
-                # Store the action
-                if agent.moving and action not in [RailEnvActions.DO_NOTHING, RailEnvActions.STOP_MOVING]:
-                    _, new_cell_valid, new_direction, new_position, transition_valid = \
-                        self._check_action_on_agent(action, agent)
+        """
+        if self.dones[i_agent]:  # this agent has already completed...
+            return
 
-                    if all([new_cell_valid, transition_valid]):
-                        agent.speed_data['transition_action_on_cellexit'] = action
-                    else:
-                        # But, if the chosen invalid action was LEFT/RIGHT, and the agent is moving,
-                        # try to keep moving forward!
-                        if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT):
-                            _, new_cell_valid, new_direction, new_position, transition_valid = \
-                                self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent)
-
-                            if all([new_cell_valid, transition_valid]):
-                                agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.MOVE_FORWARD
-                            else:
-                                # If the agent cannot move due to an invalid transition, we set its state to not moving
-                                self.rewards_dict[i_agent] += self.invalid_action_penalty
-                                self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
-                                self.rewards_dict[i_agent] += self.stop_penalty
-                                agent.moving = False
-
-                        else:
-                            # If the agent cannot move due to an invalid transition, we set its state to not moving
-                            self.rewards_dict[i_agent] += self.invalid_action_penalty
-                            self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
-                            self.rewards_dict[i_agent] += self.stop_penalty
-                            agent.moving = False
-
-            # if we've just broken in this step, nothing else to do
-            if new_malfunction:
-                continue
+        agent = self.agents[i_agent]
+        agent.old_direction = agent.direction
+        agent.old_position = agent.position
 
-            # The train was broken before...
-            if agent.malfunction_data['malfunction'] > 0:
+        # is the agent malfunctioning?
+        malfunction = self._agent_malfunction(i_agent)
 
-                # Last step of malfunction --> Agent starts moving again after getting fixed
-                if agent.malfunction_data['malfunction'] < 2:
-                    agent.malfunction_data['malfunction'] -= 1
-                    self.agents[i_agent].moving = True
-                    action = RailEnvActions.DO_NOTHING
+        # if agent is broken, actions are ignored and agent does not move,
+        # the agent is not penalized in this step!
+        if malfunction:
+            self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+            return
 
-                else:
-                    agent.malfunction_data['malfunction'] -= 1
+        # Is the agent at the beginning of the cell? Then, it can take an action.
+        if agent.speed_data['position_fraction'] == 0.0:
+            # No action has been supplied for this agent -> set DO_NOTHING as default
+            if action is None:
+                action = RailEnvActions.DO_NOTHING
 
-                    # Broken agents are stopped
-                    self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
-                    self.agents[i_agent].moving = False
+            if action < 0 or action > len(RailEnvActions):
+                print('ERROR: illegal action=', action,
+                      'for agent with index=', i_agent,
+                      '"DO NOTHING" will be executed instead')
+                action = RailEnvActions.DO_NOTHING
 
-                    # Nothing left to do with broken agent
-                    continue
+            if action == RailEnvActions.DO_NOTHING and agent.moving:
+                # Keep moving
+                action = RailEnvActions.MOVE_FORWARD
 
-            # Now perform a movement.
-            # If agent.moving, increment the position_fraction by the speed of the agent
-            # If the new position fraction is >= 1, reset to 0, and perform the stored
-            #   transition_action_on_cellexit if the cell is free.
-            if agent.moving:
+            if action == RailEnvActions.STOP_MOVING and agent.moving:
+                # Only allow halting an agent on entering new cells.
+                agent.moving = False
+                self.rewards_dict[i_agent] += self.stop_penalty
 
-                agent.speed_data['position_fraction'] += agent.speed_data['speed']
-                if agent.speed_data['position_fraction'] >= 1.0:
-                    # Perform stored action to transition to the next cell as soon as cell is free
-                    # Notice that we've already check new_cell_valid and transition valid when we stored the action,
-                    # so we only have to check cell_free now!
+            if not agent.moving and not (
+                action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
+                # Allow agent to start with any forward or direction action
+                agent.moving = True
+                self.rewards_dict[i_agent] += self.start_penalty
 
-                    # cell and transition validity was checked when we stored transition_action_on_cellexit!
-                    cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent(
-                        agent.speed_data['transition_action_on_cellexit'], agent)
+            # Store the action if action is moving
+            # If not moving, the action will be stored when the agent starts moving again.
+            if agent.moving:
+                _action_stored = False
+                _, new_cell_valid, new_direction, new_position, transition_valid = \
+                    self._check_action_on_agent(action, agent)
 
-                    if cell_free:
-                        agent.position = new_position
-                        agent.direction = new_direction
-                        agent.speed_data['position_fraction'] = 0.0
+                if all([new_cell_valid, transition_valid]):
+                    agent.speed_data['transition_action_on_cellexit'] = action
+                    _action_stored = True
+                else:
+                    # But, if the chosen invalid action was LEFT/RIGHT, and the agent is moving,
+                    # try to keep moving forward!
+                    if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT):
+                        _, new_cell_valid, new_direction, new_position, transition_valid = \
+                            self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent)
+
+                        if all([new_cell_valid, transition_valid]):
+                            agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.MOVE_FORWARD
+                            _action_stored = True
+
+                if not _action_stored:
+                    # If the agent cannot move due to an invalid transition, we set its state to not moving
+                    self.rewards_dict[i_agent] += self.invalid_action_penalty
+                    self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+                    self.rewards_dict[i_agent] += self.stop_penalty
+                    agent.moving = False
 
+        # Now perform a movement.
+        # If agent.moving, increment the position_fraction by the speed of the agent
+        # If the new position fraction is >= 1, reset to 0, and perform the stored
+        #   transition_action_on_cellexit if the cell is free.
+        if agent.moving:
+            agent.speed_data['position_fraction'] += agent.speed_data['speed']
+            if agent.speed_data['position_fraction'] >= 1.0:
+                # Perform stored action to transition to the next cell as soon as cell is free
+                # Notice that we've already checked new_cell_valid and transition valid when we stored the action,
+                # so we only have to check cell_free now!
+
+                # cell and transition validity was checked when we stored transition_action_on_cellexit!
+                cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent(
+                    agent.speed_data['transition_action_on_cellexit'], agent)
+
+                # N.B. validity of new_cell and transition should have been verified before the action was stored!
+                assert new_cell_valid
+                assert transition_valid
+                if cell_free:
+                    agent.position = new_position
+                    agent.direction = new_direction
+                    agent.speed_data['position_fraction'] = 0.0
+
+            # has the agent reached its target?
             if np.equal(agent.position, agent.target).all():
                 self.dones[i_agent] = True
                 agent.moving = False
             else:
                 self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
 
-        # Check for end of episode + add global reward to all rewards!
-        if np.all([np.array_equal(agent2.position, agent2.target) for agent2 in self.agents]):
-            self.dones["__all__"] = True
-            self.rewards_dict = {i: 0 * r + self.global_reward for i, r in self.rewards_dict.items()}
-
-        if (self._max_episode_steps is not None) and (self._elapsed_steps >= self._max_episode_steps):
-            self.dones["__all__"] = True
-            for k in self.dones.keys():
-                self.dones[k] = True
-
-        action_required_agents = {
-            i: self.agents[i].speed_data['position_fraction'] == 0.0 for i in range(self.get_num_agents())
-        }
-        malfunction_agents = {
-            i: self.agents[i].malfunction_data['malfunction'] for i in range(self.get_num_agents())
-        }
-        speed_agents = {i: self.agents[i].speed_data['speed'] for i in range(self.get_num_agents())}
+    def _check_action_on_agent(self, action: RailEnvActions, agent: EnvAgent):
+        """
 
-        info_dict = {
-            'action_required': action_required_agents,
-            'malfunction': malfunction_agents,
-            'speed': speed_agents
-        }
+        Parameters
+        ----------
+        action : RailEnvActions
+        agent : EnvAgent
 
-        return self._get_observations(), self.rewards_dict, self.dones, info_dict
+        Returns
+        -------
+        bool
+            Is it a legal move?
+            1) transition allows the new_direction in the cell,
+            2) the new cell is not empty (case 0),
+            3) the cell is free, i.e., no agent is currently in that cell
 
-    def _check_action_on_agent(self, action, agent):
 
+        """
         # compute number of possible transitions in the current
         # cell used to check for invalid actions
         new_direction, transition_valid = self.check_action(agent, action)
         new_position = get_new_position(agent.position, new_direction)
 
-        # Is it a legal move?
-        # 1) transition allows the new_direction in the cell,
-        # 2) the new cell is not empty (case 0),
-        # 3) the cell is free, i.e., no agent is currently in that cell
         new_cell_valid = (
             np.array_equal(  # Check the new position is still in the grid
                 new_position,
@@ -505,11 +536,24 @@ class RailEnv(Environment):
 
         # Check the new position is not the same as any of the existing agent positions
         # (including itself, for simplicity, since it is moving)
-        cell_free = not np.any(
-            np.equal(new_position, [agent2.position for agent2 in self.agents]).all(1))
+        cell_free = not np.any(np.equal(new_position, [agent2.position for agent2 in self.agents]).all(1))
         return cell_free, new_cell_valid, new_direction, new_position, transition_valid
 
-    def check_action(self, agent, action):
+    def check_action(self, agent: EnvAgent, action: RailEnvActions):
+        """
+
+        Parameters
+        ----------
+        agent : EnvAgent
+        action : RailEnvActions
+
+        Returns
+        -------
+        Tuple[Grid4TransitionsEnum,Tuple[int,int]]
+
+
+
+        """
         transition_valid = None
         possible_transitions = self.rail.get_transitions(*agent.position, agent.direction)
         num_transitions = np.count_nonzero(possible_transitions)
@@ -527,15 +571,68 @@ class RailEnv(Environment):
 
         new_direction %= 4
 
-        if action == RailEnvActions.MOVE_FORWARD:
-            if num_transitions == 1:
-                # - dead-end, straight line or curved line;
-                # new_direction will be the only valid transition
-                # - take only available transition
-                new_direction = np.argmax(possible_transitions)
-                transition_valid = True
+        if action == RailEnvActions.MOVE_FORWARD and num_transitions == 1:
+            # - dead-end, straight line or curved line;
+            # new_direction will be the only valid transition
+            # - take only available transition
+            new_direction = np.argmax(possible_transitions)
+            transition_valid = True
         return new_direction, transition_valid
 
+    @staticmethod
+    def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum,
+                                agent_position: Tuple[int, int],
+                                rail: GridTransitionMap) -> Set[RailEnvNextAction]:
+        """
+        Get the valid move actions (forward, left, right) for an agent.
+
+        Parameters
+        ----------
+        agent_direction : Grid4TransitionsEnum
+        agent_position: Tuple[int,int]
+        rail : GridTransitionMap
+
+
+        Returns
+        -------
+        Set of `RailEnvNextAction` (tuples of (action,position,direction))
+            Possible move actions (forward,left,right) and the next position/direction they lead to.
+            It is not checked that the next cell is free.
+        """
+        valid_actions: Set[RailEnvNextAction] = OrderedSet()
+        possible_transitions = rail.get_transitions(*agent_position, agent_direction)
+        num_transitions = np.count_nonzero(possible_transitions)
+        # Start from the current orientation, and see which transitions are available;
+        # organize them as [left, forward, right], relative to the current orientation
+        # If only one transition is possible, the forward branch is aligned with it.
+        if rail.is_dead_end(agent_position):
+            action = RailEnvActions.MOVE_FORWARD
+            exit_direction = (agent_direction + 2) % 4
+            if possible_transitions[exit_direction]:
+                new_position = get_new_position(agent_position, exit_direction)
+                valid_actions.add(RailEnvNextAction(action, new_position, exit_direction))
+        elif num_transitions == 1:
+            action = RailEnvActions.MOVE_FORWARD
+            for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+                if possible_transitions[new_direction]:
+                    new_position = get_new_position(agent_position, new_direction)
+                    valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+        else:
+            for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]:
+                if possible_transitions[new_direction]:
+                    if new_direction == agent_direction:
+                        action = RailEnvActions.MOVE_FORWARD
+                    elif new_direction == (agent_direction + 1) % 4:
+                        action = RailEnvActions.MOVE_RIGHT
+                    elif new_direction == (agent_direction - 1) % 4:
+                        action = RailEnvActions.MOVE_LEFT
+                    else:
+                        raise Exception("Illegal state")
+
+                    new_position = get_new_position(agent_position, new_direction)
+                    valid_actions.add(RailEnvNextAction(action, new_position, new_direction))
+        return valid_actions
+
     def _get_observations(self):
         self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents())))
         return self.obs_dict
diff --git a/flatland/envs/rail_generators.py b/flatland/envs/rail_generators.py
index 345039cfb86862ad8c0fa7d4cff925d7a742462a..60c606f789f0d83f04fd5c549e155f437c977b7d 100644
--- a/flatland/envs/rail_generators.py
+++ b/flatland/envs/rail_generators.py
@@ -38,8 +38,10 @@ def complex_rail_generator(nr_start_goal=1,
                            max_dist=99999,
                            seed=0) -> RailGenerator:
     """
+    complex_rail_generator
+
     Parameters
-    -------
+    ----------
     width : int
         The width (number of cells) of the grid to generate.
     height : int
@@ -165,7 +167,7 @@ def rail_from_manual_specifications_generator(rail_spec):
     transitions specifications.
 
     Parameters
-    -------
+    ----------
     rail_spec : list of list of tuples
         List (rows) of lists (columns) of tuples, each specifying a rail_spec_of_cell for
         the RailEnv environment as (cell_type, rotation), with rotation being
@@ -207,7 +209,7 @@ def rail_from_file(filename) -> RailGenerator:
     Utility to load pickle file
 
     Parameters
-    -------
+    ----------
     filename : Pickle file generated by env.save() or editor
 
     Returns
@@ -241,14 +243,14 @@ def rail_from_grid_transition_map(rail_map) -> RailGenerator:
     16-bit transitions specifications.
 
     Parameters
-    -------
+    ----------
     rail_map : GridTransitionMap object
         GridTransitionMap object to return when the generator is called.
 
     Returns
     -------
     function
-        Generator function that always returns the given `rail_map' object.
+        Generator function that always returns the given `rail_map` object.
     """
 
     def generator(width: int, height: int, num_agents: int, num_resets: int = 0) -> RailGeneratorProduct:
@@ -261,13 +263,13 @@ def random_rail_generator(cell_type_relative_proportion=[1.0] * 11) -> RailGener
     """
     Dummy random level generator:
     - fill in cells at random in [width-2, height-2]
-    - keep filling cells in among the unfilled ones, such that all transitions
-      are legit;  if no cell can be filled in without violating some
-      transitions, pick one among those that can satisfy most transitions
-      (1,2,3 or 4), and delete (+mark to be re-filled) the cells that were
+    - keep filling cells in among the unfilled ones, such that all transitions\
+      are legit;  if no cell can be filled in without violating some\
+      transitions, pick one among those that can satisfy most transitions\
+      (1,2,3 or 4), and delete (+mark to be re-filled) the cells that were\
       incompatible.
-    - keep trying for a total number of insertions
-      (e.g., (W-2)*(H-2)*MAX_REPETITIONS ); if no solution is found, empty the
+    - keep trying for a total number of insertions\
+      (e.g., (W-2)*(H-2)*MAX_REPETITIONS ); if no solution is found, empty the\
       board and try again from scratch.
     - finally pad the border of the map with dead-ends to avoid border issues.
 
@@ -277,7 +279,7 @@ def random_rail_generator(cell_type_relative_proportion=[1.0] * 11) -> RailGener
     found to turn most un-genereatable levels into valid ones.
 
     Parameters
-    -------
+    ----------
     width : int
         The width (number of cells) of the grid to generate.
     height : int
@@ -527,11 +529,12 @@ def random_rail_generator(cell_type_relative_proportion=[1.0] * 11) -> RailGener
 
 
 def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2, min_node_dist=20, node_radius=2,
-                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0):
+                          num_neighb=3, grid_mode=False, enhance_intersection=False, seed=0) -> RailGenerator:
     """
     This is a level generator which generates complex sparse rail configurations
 
     :param num_cities: Number of city node (can hold trainstations)
+    :type num_cities: int
     :param num_intersections: Number of intersection that city nodes can connect to
     :param num_trainstations: Total number of trainstations in env
     :param min_node_dist: Minimal distance between nodes
@@ -540,13 +543,10 @@ def sparse_rail_generator(num_cities=5, num_intersections=4, num_trainstations=2
     :param grid_mode: True -> NOdes evenly distirbuted in env, False-> Random distribution of nodes
     :param enhance_intersection: True -> Extra rail elements added at intersections
     :param seed: Random Seed
-    :return:
-        -------
-    numpy.ndarray of type numpy.uint16
-        The matrix with the correct 16-bit bitmaps for each cell.
+    :return: numpy.ndarray of type numpy.uint16 -- The matrix with the correct 16-bit bitmaps for each cell.
     """
 
-    def generator(width, height, num_agents, num_resets=0):
+    def generator(width, height, num_agents, num_resets=0) -> RailGeneratorProduct:
 
         if num_agents > num_trainstations:
             num_agents = num_trainstations
diff --git a/flatland/envs/schedule_generators.py b/flatland/envs/schedule_generators.py
index ba703acf5e35e502d0c6248571a9a795a1676352..b3576a2bec77f75afc9331cc6c190649590a990c 100644
--- a/flatland/envs/schedule_generators.py
+++ b/flatland/envs/schedule_generators.py
@@ -17,7 +17,7 @@ ScheduleGenerator = Callable[[GridTransitionMap, int, Optional[Any]], ScheduleGe
 def speed_initialization_helper(nb_agents: int, speed_ratio_map: Mapping[float, float] = None) -> List[float]:
     """
     Parameters
-    -------
+    ----------
     nb_agents : int
         The number of agents to generate a speed for
     speed_ratio_map : Mapping[float,float]
@@ -115,22 +115,19 @@ def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None) ->
     return generator
 
 
-def random_schedule_generator(speed_ratio_map: Mapping[float, float] = None) -> ScheduleGenerator:
+def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] = None) -> ScheduleGenerator:
     """
-    Given a `rail' GridTransitionMap, return a random placement of agents (initial position, direction and target).
+    Given a `rail` GridTransitionMap, return a random placement of agents (initial position, direction and target).
 
     Parameters
-    -------
-        rail : GridTransitionMap
-            The railway to place agents on.
-        num_agents : int
-            The number of agents to generate a speed for
-        speed_ratio_map : Mapping[float,float]
-            A map of speeds mappint to their ratio of appearance. The ratios must sum up to 1.
+    ----------
+        speed_ratio_map : Optional[Mapping[float, float]]
+            A map of speeds mapping to their ratio of appearance. The ratios must sum up to 1.
+
     Returns
     -------
         Tuple[List[Tuple[int,int]], List[Tuple[int,int]], List[Tuple[int,int]], List[float]]
-        initial positions, directions, targets speeds
+            initial positions, directions, targets speeds
     """
 
     def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None) -> ScheduleGeneratorProduct:
@@ -213,7 +210,7 @@ def schedule_from_file(filename) -> ScheduleGenerator:
     Utility to load pickle file
 
     Parameters
-    -------
+    ----------
     input_file : Pickle file generated by env.save() or editor
 
     Returns
diff --git a/flatland/utils/ordered_set.py b/flatland/utils/ordered_set.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bd1689488f566872445334ac6e3bb8362daa347
--- /dev/null
+++ b/flatland/utils/ordered_set.py
@@ -0,0 +1,49 @@
+# in order for enumeration to be deterministic for testing purposes
+# https://stackoverflow.com/questions/1653970/does-python-have-an-ordered-set
+from collections import OrderedDict
+from collections.abc import MutableSet
+
+
+class OrderedSet(OrderedDict, MutableSet):
+
+    def update(self, *args, **kwargs):
+        if kwargs:
+            raise TypeError("update() takes no keyword arguments")
+
+        for s in args:
+            for e in s:
+                self.add(e)
+
+    def add(self, elem):
+        self[elem] = None
+
+    def discard(self, elem):
+        self.pop(elem, None)
+
+    def __le__(self, other):
+        return all(e in other for e in self)
+
+    def __lt__(self, other):
+        return self <= other and self != other
+
+    def __ge__(self, other):
+        return all(e in self for e in other)
+
+    def __gt__(self, other):
+        return self >= other and self != other
+
+    def __repr__(self):
+        return 'OrderedSet([%s])' % (', '.join(map(repr, self.keys())))
+
+    def __str__(self):
+        return '{%s}' % (', '.join(map(repr, self.keys())))
+
+    difference = property(lambda self: self.__sub__)
+    difference_update = property(lambda self: self.__isub__)
+    intersection = property(lambda self: self.__and__)
+    intersection_update = property(lambda self: self.__iand__)
+    issubset = property(lambda self: self.__le__)
+    issuperset = property(lambda self: self.__ge__)
+    symmetric_difference = property(lambda self: self.__xor__)
+    symmetric_difference_update = property(lambda self: self.__ixor__)
+    union = property(lambda self: self.__or__)
diff --git a/flatland/utils/rendertools.py b/flatland/utils/rendertools.py
index 802b361b623cdaea08271f5748ac86194056bdf2..99958bf38449ef8eb58c519990f1975106409c4e 100644
--- a/flatland/utils/rendertools.py
+++ b/flatland/utils/rendertools.py
@@ -560,7 +560,7 @@ class RenderTool(object):
                 position = agent.position
                 direction = agent.direction
                 for possible_directions in range(4):
-                    # Is a transition along movement `desired_movement_from_new_cell' to the current cell possible?
+                    # Is a transition along movement `desired_movement_from_new_cell` to the current cell possible?
                     isValid = env.rail.get_transition((*agent.position, agent.direction), possible_directions)
                     if isValid:
                         direction = possible_directions
diff --git a/make_docs.py b/make_docs.py
index be36a7bb7caed714445399b73f958ab42172242c..0a805e591647162e2757b60a7bcd9f75f4d1d0b9 100644
--- a/make_docs.py
+++ b/make_docs.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
-
+import glob
 import os
+import shutil
 import subprocess
 import webbrowser
 from urllib.request import pathname2url
@@ -18,16 +19,26 @@ def remove_exists(filename):
 
 
 # clean docs config and html files, and rebuild everything
-remove_exists('docs/flatland.rst')
+# wildcards do not work under Windows
+for file in glob.glob(r'./docs/flatland*.rst'):
+    remove_exists(file)
 remove_exists('docs/modules.rst')
 
-subprocess.call(['sphinx-apidoc', '-o', 'docs/', 'flatland'])
+# copy md files from root folder into docs folder
+for file in glob.glob(r'./*.md'):
+    print(file)
+    shutil.copy(file, 'docs/')
+
+subprocess.call(['sphinx-apidoc', '--force', '-a', '-e', '-o', 'docs/', 'flatland', '-H', 'Flatland Reference'])
 
-os.environ["SPHINXPROJ"] = "flatland"
+os.environ["SPHINXPROJ"] = "Flatland"
 os.chdir('docs')
 subprocess.call(['python', '-msphinx', '-M', 'clean', '.', '_build'])
 # TODO fix sphinx warnings instead of suppressing them...
-subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build', '-Q'])
-subprocess.call(['python', '-mpydeps', '../flatland', '-o', '_build/html/flatland.svg', '--no-config', '--noshow'])
+subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build'])
+# subprocess.call(['python', '-msphinx', '-M', 'html', '.', '_build', '-Q'])
+
+# we do not currrently use pydeps, commented out https://gitlab.aicrowd.com/flatland/flatland/issues/149
+# subprocess.call(['python', '-mpydeps', '../flatland', '-o', '_build/html/flatland.svg', '--no-config', '--noshow'])
 
 browser('_build/html/index.html')
diff --git a/requirements_continuous_integration.txt b/requirements_continuous_integration.txt
index db4885e736a6225c46624f3b20266c403d935626..0054fec1724765b8ffa8be620c5d4c1ac4cfd6b3 100644
--- a/requirements_continuous_integration.txt
+++ b/requirements_continuous_integration.txt
@@ -4,8 +4,10 @@ wheel>=0.32.1
 watchdog>=0.9.0
 benchmarker>=4.0.1
 coverage>=4.5.1
-Sphinx>=1.8.1
+# pin sphinx to <2.0 because of https://github.com/readthedocs/sphinx_rtd_theme/issues/746
+Sphinx>=1.8.1,<2.0
 sphinx-rtd-theme>=0.4.3
+numpydoc>=0.9.1
 docutils>=0.15.2
 flake8>=3.7.7
 flake8-eradicate>=0.2.0
diff --git a/tests/test_flatland_envs_observations.py b/tests/test_flatland_envs_observations.py
index 46000de429092d3fe4effe87382d1e12bc2c3401..d2663916a17a70597d10e489da7aead4f8932dc4 100644
--- a/tests/test_flatland_envs_observations.py
+++ b/tests/test_flatland_envs_observations.py
@@ -59,7 +59,7 @@ def _step_along_shortest_path(env, obs_builder, rail):
 
                 # Check all possible transitions in new_cell
                 for agent_orientation in range(4):
-                    # Is a transition along movement `entry_direction' to the neighbour possible?
+                    # Is a transition along movement `entry_direction` to the neighbour possible?
                     is_valid = obs_builder.env.rail.get_transition((neighbour[0], neighbour[1], agent_orientation),
                                                                    desired_movement_from_new_cell)
                     if is_valid:
diff --git a/tests/test_flatland_envs_sparse_rail_generator.py b/tests/test_flatland_envs_sparse_rail_generator.py
index c32b3c3e3309069a3232393a45997434af3cfb96..2d22e39a925ad0e3bc35e7094b4bbb711ae852e2 100644
--- a/tests/test_flatland_envs_sparse_rail_generator.py
+++ b/tests/test_flatland_envs_sparse_rail_generator.py
@@ -1,3 +1,5 @@
+import random
+
 import numpy as np
 
 from flatland.core.grid.grid_utils import Vec2dOperations as Vec2d
@@ -35,6 +37,793 @@ def test_sparse_rail_generator():
     assert s1 == 31
 
 
+def test_sparse_rail_generator_deterministic():
+    """Check that sparse_rail_generator runs deterministic over different python versions!"""
+    random.seed(0)
+    np.random.seed(0)
+
+    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
+                       'malfunction_rate': 70,  # Rate of malfunction occurence
+                       'min_duration': 2,  # Minimal duration of malfunction
+                       'max_duration': 5  # Max duration of malfunction
+                       }
+
+    speed_ration_map = {1.: 1.,  # Fast passenger train
+                        1. / 2.: 0.,  # Fast freight train
+                        1. / 3.: 0.,  # Slow commuter train
+                        1. / 4.: 0.}  # Slow freight train
+
+    env = RailEnv(width=25,
+                  height=30,
+                  rail_generator=sparse_rail_generator(num_cities=5,
+                                                       # Number of cities in map (where train stations are)
+                                                       num_intersections=4,
+                                                       # Number of intersections (no start / target)
+                                                       num_trainstations=25,  # Number of possible start/targets on map
+                                                       min_node_dist=6,  # Minimal distance of nodes
+                                                       node_radius=3,  # Proximity of stations to city center
+                                                       num_neighb=3,
+                                                       # Number of connections to other cities/intersections
+                                                       seed=215545,  # Random seed
+                                                       grid_mode=True,
+                                                       enhance_intersection=False
+                                                       ),
+                  schedule_generator=sparse_schedule_generator(speed_ration_map),
+                  number_of_agents=1,
+                  stochastic_data=stochastic_data,  # Malfunction data generator
+                  )
+    assert env.rail.get_full_transitions(0, 0) == 0, "[0][0]"
+    assert env.rail.get_full_transitions(0, 1) == 0, "[0][1]"
+    assert env.rail.get_full_transitions(0, 2) == 0, "[0][2]"
+    assert env.rail.get_full_transitions(0, 3) == 0, "[0][3]"
+    assert env.rail.get_full_transitions(0, 4) == 0, "[0][4]"
+    assert env.rail.get_full_transitions(0, 5) == 0, "[0][5]"
+    assert env.rail.get_full_transitions(0, 6) == 0, "[0][6]"
+    assert env.rail.get_full_transitions(0, 7) == 0, "[0][7]"
+    assert env.rail.get_full_transitions(0, 8) == 0, "[0][8]"
+    assert env.rail.get_full_transitions(0, 9) == 0, "[0][9]"
+    assert env.rail.get_full_transitions(0, 10) == 0, "[0][10]"
+    assert env.rail.get_full_transitions(0, 11) == 0, "[0][11]"
+    assert env.rail.get_full_transitions(0, 12) == 0, "[0][12]"
+    assert env.rail.get_full_transitions(0, 13) == 0, "[0][13]"
+    assert env.rail.get_full_transitions(0, 14) == 0, "[0][14]"
+    assert env.rail.get_full_transitions(0, 15) == 0, "[0][15]"
+    assert env.rail.get_full_transitions(0, 16) == 0, "[0][16]"
+    assert env.rail.get_full_transitions(0, 17) == 0, "[0][17]"
+    assert env.rail.get_full_transitions(0, 18) == 0, "[0][18]"
+    assert env.rail.get_full_transitions(0, 19) == 0, "[0][19]"
+    assert env.rail.get_full_transitions(0, 20) == 0, "[0][20]"
+    assert env.rail.get_full_transitions(0, 21) == 0, "[0][21]"
+    assert env.rail.get_full_transitions(0, 22) == 0, "[0][22]"
+    assert env.rail.get_full_transitions(0, 23) == 0, "[0][23]"
+    assert env.rail.get_full_transitions(0, 24) == 0, "[0][24]"
+    assert env.rail.get_full_transitions(1, 0) == 0, "[1][0]"
+    assert env.rail.get_full_transitions(1, 1) == 0, "[1][1]"
+    assert env.rail.get_full_transitions(1, 2) == 0, "[1][2]"
+    assert env.rail.get_full_transitions(1, 3) == 0, "[1][3]"
+    assert env.rail.get_full_transitions(1, 4) == 0, "[1][4]"
+    assert env.rail.get_full_transitions(1, 5) == 0, "[1][5]"
+    assert env.rail.get_full_transitions(1, 6) == 0, "[1][6]"
+    assert env.rail.get_full_transitions(1, 7) == 0, "[1][7]"
+    assert env.rail.get_full_transitions(1, 8) == 0, "[1][8]"
+    assert env.rail.get_full_transitions(1, 9) == 0, "[1][9]"
+    assert env.rail.get_full_transitions(1, 10) == 0, "[1][10]"
+    assert env.rail.get_full_transitions(1, 11) == 0, "[1][11]"
+    assert env.rail.get_full_transitions(1, 12) == 0, "[1][12]"
+    assert env.rail.get_full_transitions(1, 13) == 0, "[1][13]"
+    assert env.rail.get_full_transitions(1, 14) == 0, "[1][14]"
+    assert env.rail.get_full_transitions(1, 15) == 0, "[1][15]"
+    assert env.rail.get_full_transitions(1, 16) == 0, "[1][16]"
+    assert env.rail.get_full_transitions(1, 17) == 0, "[1][17]"
+    assert env.rail.get_full_transitions(1, 18) == 0, "[1][18]"
+    assert env.rail.get_full_transitions(1, 19) == 0, "[1][19]"
+    assert env.rail.get_full_transitions(1, 20) == 0, "[1][20]"
+    assert env.rail.get_full_transitions(1, 21) == 0, "[1][21]"
+    assert env.rail.get_full_transitions(1, 22) == 0, "[1][22]"
+    assert env.rail.get_full_transitions(1, 23) == 0, "[1][23]"
+    assert env.rail.get_full_transitions(1, 24) == 0, "[1][24]"
+    assert env.rail.get_full_transitions(2, 0) == 0, "[2][0]"
+    assert env.rail.get_full_transitions(2, 1) == 0, "[2][1]"
+    assert env.rail.get_full_transitions(2, 2) == 0, "[2][2]"
+    assert env.rail.get_full_transitions(2, 3) == 0, "[2][3]"
+    assert env.rail.get_full_transitions(2, 4) == 0, "[2][4]"
+    assert env.rail.get_full_transitions(2, 5) == 0, "[2][5]"
+    assert env.rail.get_full_transitions(2, 6) == 0, "[2][6]"
+    assert env.rail.get_full_transitions(2, 7) == 0, "[2][7]"
+    assert env.rail.get_full_transitions(2, 8) == 0, "[2][8]"
+    assert env.rail.get_full_transitions(2, 9) == 0, "[2][9]"
+    assert env.rail.get_full_transitions(2, 10) == 0, "[2][10]"
+    assert env.rail.get_full_transitions(2, 11) == 0, "[2][11]"
+    assert env.rail.get_full_transitions(2, 12) == 0, "[2][12]"
+    assert env.rail.get_full_transitions(2, 13) == 0, "[2][13]"
+    assert env.rail.get_full_transitions(2, 14) == 0, "[2][14]"
+    assert env.rail.get_full_transitions(2, 15) == 0, "[2][15]"
+    assert env.rail.get_full_transitions(2, 16) == 0, "[2][16]"
+    assert env.rail.get_full_transitions(2, 17) == 0, "[2][17]"
+    assert env.rail.get_full_transitions(2, 18) == 0, "[2][18]"
+    assert env.rail.get_full_transitions(2, 19) == 0, "[2][19]"
+    assert env.rail.get_full_transitions(2, 20) == 0, "[2][20]"
+    assert env.rail.get_full_transitions(2, 21) == 0, "[2][21]"
+    assert env.rail.get_full_transitions(2, 22) == 0, "[2][22]"
+    assert env.rail.get_full_transitions(2, 23) == 0, "[2][23]"
+    assert env.rail.get_full_transitions(2, 24) == 0, "[2][24]"
+    assert env.rail.get_full_transitions(3, 0) == 0, "[3][0]"
+    assert env.rail.get_full_transitions(3, 1) == 0, "[3][1]"
+    assert env.rail.get_full_transitions(3, 2) == 0, "[3][2]"
+    assert env.rail.get_full_transitions(3, 3) == 16386, "[3][3]"
+    assert env.rail.get_full_transitions(3, 4) == 1025, "[3][4]"
+    assert env.rail.get_full_transitions(3, 5) == 1025, "[3][5]"
+    assert env.rail.get_full_transitions(3, 6) == 1025, "[3][6]"
+    assert env.rail.get_full_transitions(3, 7) == 1025, "[3][7]"
+    assert env.rail.get_full_transitions(3, 8) == 1025, "[3][8]"
+    assert env.rail.get_full_transitions(3, 9) == 1025, "[3][9]"
+    assert env.rail.get_full_transitions(3, 10) == 1025, "[3][10]"
+    assert env.rail.get_full_transitions(3, 11) == 1025, "[3][11]"
+    assert env.rail.get_full_transitions(3, 12) == 4608, "[3][12]"
+    assert env.rail.get_full_transitions(3, 13) == 0, "[3][13]"
+    assert env.rail.get_full_transitions(3, 14) == 0, "[3][14]"
+    assert env.rail.get_full_transitions(3, 15) == 0, "[3][15]"
+    assert env.rail.get_full_transitions(3, 16) == 0, "[3][16]"
+    assert env.rail.get_full_transitions(3, 17) == 0, "[3][17]"
+    assert env.rail.get_full_transitions(3, 18) == 0, "[3][18]"
+    assert env.rail.get_full_transitions(3, 19) == 0, "[3][19]"
+    assert env.rail.get_full_transitions(3, 20) == 0, "[3][20]"
+    assert env.rail.get_full_transitions(3, 21) == 0, "[3][21]"
+    assert env.rail.get_full_transitions(3, 22) == 8192, "[3][22]"
+    assert env.rail.get_full_transitions(3, 23) == 0, "[3][23]"
+    assert env.rail.get_full_transitions(3, 24) == 0, "[3][24]"
+    assert env.rail.get_full_transitions(4, 0) == 0, "[4][0]"
+    assert env.rail.get_full_transitions(4, 1) == 0, "[4][1]"
+    assert env.rail.get_full_transitions(4, 2) == 0, "[4][2]"
+    assert env.rail.get_full_transitions(4, 3) == 32800, "[4][3]"
+    assert env.rail.get_full_transitions(4, 4) == 0, "[4][4]"
+    assert env.rail.get_full_transitions(4, 5) == 0, "[4][5]"
+    assert env.rail.get_full_transitions(4, 6) == 0, "[4][6]"
+    assert env.rail.get_full_transitions(4, 7) == 0, "[4][7]"
+    assert env.rail.get_full_transitions(4, 8) == 0, "[4][8]"
+    assert env.rail.get_full_transitions(4, 9) == 0, "[4][9]"
+    assert env.rail.get_full_transitions(4, 10) == 0, "[4][10]"
+    assert env.rail.get_full_transitions(4, 11) == 0, "[4][11]"
+    assert env.rail.get_full_transitions(4, 12) == 32800, "[4][12]"
+    assert env.rail.get_full_transitions(4, 13) == 0, "[4][13]"
+    assert env.rail.get_full_transitions(4, 14) == 0, "[4][14]"
+    assert env.rail.get_full_transitions(4, 15) == 0, "[4][15]"
+    assert env.rail.get_full_transitions(4, 16) == 0, "[4][16]"
+    assert env.rail.get_full_transitions(4, 17) == 0, "[4][17]"
+    assert env.rail.get_full_transitions(4, 18) == 0, "[4][18]"
+    assert env.rail.get_full_transitions(4, 19) == 0, "[4][19]"
+    assert env.rail.get_full_transitions(4, 20) == 0, "[4][20]"
+    assert env.rail.get_full_transitions(4, 21) == 0, "[4][21]"
+    assert env.rail.get_full_transitions(4, 22) == 32800, "[4][22]"
+    assert env.rail.get_full_transitions(4, 23) == 0, "[4][23]"
+    assert env.rail.get_full_transitions(4, 24) == 0, "[4][24]"
+    assert env.rail.get_full_transitions(5, 0) == 0, "[5][0]"
+    assert env.rail.get_full_transitions(5, 1) == 0, "[5][1]"
+    assert env.rail.get_full_transitions(5, 2) == 0, "[5][2]"
+    assert env.rail.get_full_transitions(5, 3) == 32800, "[5][3]"
+    assert env.rail.get_full_transitions(5, 4) == 0, "[5][4]"
+    assert env.rail.get_full_transitions(5, 5) == 0, "[5][5]"
+    assert env.rail.get_full_transitions(5, 6) == 0, "[5][6]"
+    assert env.rail.get_full_transitions(5, 7) == 0, "[5][7]"
+    assert env.rail.get_full_transitions(5, 8) == 0, "[5][8]"
+    assert env.rail.get_full_transitions(5, 9) == 0, "[5][9]"
+    assert env.rail.get_full_transitions(5, 10) == 0, "[5][10]"
+    assert env.rail.get_full_transitions(5, 11) == 0, "[5][11]"
+    assert env.rail.get_full_transitions(5, 12) == 32800, "[5][12]"
+    assert env.rail.get_full_transitions(5, 13) == 0, "[5][13]"
+    assert env.rail.get_full_transitions(5, 14) == 0, "[5][14]"
+    assert env.rail.get_full_transitions(5, 15) == 0, "[5][15]"
+    assert env.rail.get_full_transitions(5, 16) == 0, "[5][16]"
+    assert env.rail.get_full_transitions(5, 17) == 0, "[5][17]"
+    assert env.rail.get_full_transitions(5, 18) == 0, "[5][18]"
+    assert env.rail.get_full_transitions(5, 19) == 0, "[5][19]"
+    assert env.rail.get_full_transitions(5, 20) == 0, "[5][20]"
+    assert env.rail.get_full_transitions(5, 21) == 0, "[5][21]"
+    assert env.rail.get_full_transitions(5, 22) == 32800, "[5][22]"
+    assert env.rail.get_full_transitions(5, 23) == 0, "[5][23]"
+    assert env.rail.get_full_transitions(5, 24) == 0, "[5][24]"
+    assert env.rail.get_full_transitions(6, 0) == 0, "[6][0]"
+    assert env.rail.get_full_transitions(6, 1) == 0, "[6][1]"
+    assert env.rail.get_full_transitions(6, 2) == 0, "[6][2]"
+    assert env.rail.get_full_transitions(6, 3) == 32800, "[6][3]"
+    assert env.rail.get_full_transitions(6, 4) == 0, "[6][4]"
+    assert env.rail.get_full_transitions(6, 5) == 0, "[6][5]"
+    assert env.rail.get_full_transitions(6, 6) == 0, "[6][6]"
+    assert env.rail.get_full_transitions(6, 7) == 0, "[6][7]"
+    assert env.rail.get_full_transitions(6, 8) == 0, "[6][8]"
+    assert env.rail.get_full_transitions(6, 9) == 0, "[6][9]"
+    assert env.rail.get_full_transitions(6, 10) == 0, "[6][10]"
+    assert env.rail.get_full_transitions(6, 11) == 0, "[6][11]"
+    assert env.rail.get_full_transitions(6, 12) == 32800, "[6][12]"
+    assert env.rail.get_full_transitions(6, 13) == 0, "[6][13]"
+    assert env.rail.get_full_transitions(6, 14) == 0, "[6][14]"
+    assert env.rail.get_full_transitions(6, 15) == 0, "[6][15]"
+    assert env.rail.get_full_transitions(6, 16) == 0, "[6][16]"
+    assert env.rail.get_full_transitions(6, 17) == 0, "[6][17]"
+    assert env.rail.get_full_transitions(6, 18) == 0, "[6][18]"
+    assert env.rail.get_full_transitions(6, 19) == 0, "[6][19]"
+    assert env.rail.get_full_transitions(6, 20) == 0, "[6][20]"
+    assert env.rail.get_full_transitions(6, 21) == 0, "[6][21]"
+    assert env.rail.get_full_transitions(6, 22) == 32800, "[6][22]"
+    assert env.rail.get_full_transitions(6, 23) == 0, "[6][23]"
+    assert env.rail.get_full_transitions(6, 24) == 0, "[6][24]"
+    assert env.rail.get_full_transitions(7, 0) == 0, "[7][0]"
+    assert env.rail.get_full_transitions(7, 1) == 0, "[7][1]"
+    assert env.rail.get_full_transitions(7, 2) == 0, "[7][2]"
+    assert env.rail.get_full_transitions(7, 3) == 32800, "[7][3]"
+    assert env.rail.get_full_transitions(7, 4) == 0, "[7][4]"
+    assert env.rail.get_full_transitions(7, 5) == 0, "[7][5]"
+    assert env.rail.get_full_transitions(7, 6) == 0, "[7][6]"
+    assert env.rail.get_full_transitions(7, 7) == 0, "[7][7]"
+    assert env.rail.get_full_transitions(7, 8) == 0, "[7][8]"
+    assert env.rail.get_full_transitions(7, 9) == 0, "[7][9]"
+    assert env.rail.get_full_transitions(7, 10) == 0, "[7][10]"
+    assert env.rail.get_full_transitions(7, 11) == 0, "[7][11]"
+    assert env.rail.get_full_transitions(7, 12) == 32800, "[7][12]"
+    assert env.rail.get_full_transitions(7, 13) == 0, "[7][13]"
+    assert env.rail.get_full_transitions(7, 14) == 0, "[7][14]"
+    assert env.rail.get_full_transitions(7, 15) == 0, "[7][15]"
+    assert env.rail.get_full_transitions(7, 16) == 0, "[7][16]"
+    assert env.rail.get_full_transitions(7, 17) == 0, "[7][17]"
+    assert env.rail.get_full_transitions(7, 18) == 0, "[7][18]"
+    assert env.rail.get_full_transitions(7, 19) == 0, "[7][19]"
+    assert env.rail.get_full_transitions(7, 20) == 0, "[7][20]"
+    assert env.rail.get_full_transitions(7, 21) == 0, "[7][21]"
+    assert env.rail.get_full_transitions(7, 22) == 32800, "[7][22]"
+    assert env.rail.get_full_transitions(7, 23) == 0, "[7][23]"
+    assert env.rail.get_full_transitions(7, 24) == 0, "[7][24]"
+    assert env.rail.get_full_transitions(8, 0) == 0, "[8][0]"
+    assert env.rail.get_full_transitions(8, 1) == 0, "[8][1]"
+    assert env.rail.get_full_transitions(8, 2) == 0, "[8][2]"
+    assert env.rail.get_full_transitions(8, 3) == 32800, "[8][3]"
+    assert env.rail.get_full_transitions(8, 4) == 0, "[8][4]"
+    assert env.rail.get_full_transitions(8, 5) == 8192, "[8][5]"
+    assert env.rail.get_full_transitions(8, 6) == 0, "[8][6]"
+    assert env.rail.get_full_transitions(8, 7) == 0, "[8][7]"
+    assert env.rail.get_full_transitions(8, 8) == 0, "[8][8]"
+    assert env.rail.get_full_transitions(8, 9) == 8192, "[8][9]"
+    assert env.rail.get_full_transitions(8, 10) == 8192, "[8][10]"
+    assert env.rail.get_full_transitions(8, 11) == 0, "[8][11]"
+    assert env.rail.get_full_transitions(8, 12) == 32800, "[8][12]"
+    assert env.rail.get_full_transitions(8, 13) == 8192, "[8][13]"
+    assert env.rail.get_full_transitions(8, 14) == 0, "[8][14]"
+    assert env.rail.get_full_transitions(8, 15) == 0, "[8][15]"
+    assert env.rail.get_full_transitions(8, 16) == 0, "[8][16]"
+    assert env.rail.get_full_transitions(8, 17) == 0, "[8][17]"
+    assert env.rail.get_full_transitions(8, 18) == 0, "[8][18]"
+    assert env.rail.get_full_transitions(8, 19) == 0, "[8][19]"
+    assert env.rail.get_full_transitions(8, 20) == 0, "[8][20]"
+    assert env.rail.get_full_transitions(8, 21) == 0, "[8][21]"
+    assert env.rail.get_full_transitions(8, 22) == 32800, "[8][22]"
+    assert env.rail.get_full_transitions(8, 23) == 0, "[8][23]"
+    assert env.rail.get_full_transitions(8, 24) == 0, "[8][24]"
+    assert env.rail.get_full_transitions(9, 0) == 8192, "[9][0]"
+    assert env.rail.get_full_transitions(9, 1) == 0, "[9][1]"
+    assert env.rail.get_full_transitions(9, 2) == 0, "[9][2]"
+    assert env.rail.get_full_transitions(9, 3) == 32800, "[9][3]"
+    assert env.rail.get_full_transitions(9, 4) == 8192, "[9][4]"
+    assert env.rail.get_full_transitions(9, 5) == 32800, "[9][5]"
+    assert env.rail.get_full_transitions(9, 6) == 0, "[9][6]"
+    assert env.rail.get_full_transitions(9, 7) == 0, "[9][7]"
+    assert env.rail.get_full_transitions(9, 8) == 0, "[9][8]"
+    assert env.rail.get_full_transitions(9, 9) == 72, "[9][9]"
+    assert env.rail.get_full_transitions(9, 10) == 37408, "[9][10]"
+    assert env.rail.get_full_transitions(9, 11) == 0, "[9][11]"
+    assert env.rail.get_full_transitions(9, 12) == 49186, "[9][12]"
+    assert env.rail.get_full_transitions(9, 13) == 3089, "[9][13]"
+    assert env.rail.get_full_transitions(9, 14) == 4608, "[9][14]"
+    assert env.rail.get_full_transitions(9, 15) == 0, "[9][15]"
+    assert env.rail.get_full_transitions(9, 16) == 0, "[9][16]"
+    assert env.rail.get_full_transitions(9, 17) == 0, "[9][17]"
+    assert env.rail.get_full_transitions(9, 18) == 0, "[9][18]"
+    assert env.rail.get_full_transitions(9, 19) == 0, "[9][19]"
+    assert env.rail.get_full_transitions(9, 20) == 0, "[9][20]"
+    assert env.rail.get_full_transitions(9, 21) == 0, "[9][21]"
+    assert env.rail.get_full_transitions(9, 22) == 32800, "[9][22]"
+    assert env.rail.get_full_transitions(9, 23) == 0, "[9][23]"
+    assert env.rail.get_full_transitions(9, 24) == 0, "[9][24]"
+    assert env.rail.get_full_transitions(10, 0) == 32800, "[10][0]"
+    assert env.rail.get_full_transitions(10, 1) == 0, "[10][1]"
+    assert env.rail.get_full_transitions(10, 2) == 0, "[10][2]"
+    assert env.rail.get_full_transitions(10, 3) == 32800, "[10][3]"
+    assert env.rail.get_full_transitions(10, 4) == 32800, "[10][4]"
+    assert env.rail.get_full_transitions(10, 5) == 32800, "[10][5]"
+    assert env.rail.get_full_transitions(10, 6) == 0, "[10][6]"
+    assert env.rail.get_full_transitions(10, 7) == 0, "[10][7]"
+    assert env.rail.get_full_transitions(10, 8) == 0, "[10][8]"
+    assert env.rail.get_full_transitions(10, 9) == 4, "[10][9]"
+    assert env.rail.get_full_transitions(10, 10) == 1097, "[10][10]"
+    assert env.rail.get_full_transitions(10, 11) == 1025, "[10][11]"
+    assert env.rail.get_full_transitions(10, 12) == 37408, "[10][12]"
+    assert env.rail.get_full_transitions(10, 13) == 0, "[10][13]"
+    assert env.rail.get_full_transitions(10, 14) == 128, "[10][14]"
+    assert env.rail.get_full_transitions(10, 15) == 0, "[10][15]"
+    assert env.rail.get_full_transitions(10, 16) == 0, "[10][16]"
+    assert env.rail.get_full_transitions(10, 17) == 0, "[10][17]"
+    assert env.rail.get_full_transitions(10, 18) == 0, "[10][18]"
+    assert env.rail.get_full_transitions(10, 19) == 0, "[10][19]"
+    assert env.rail.get_full_transitions(10, 20) == 0, "[10][20]"
+    assert env.rail.get_full_transitions(10, 21) == 0, "[10][21]"
+    assert env.rail.get_full_transitions(10, 22) == 32800, "[10][22]"
+    assert env.rail.get_full_transitions(10, 23) == 0, "[10][23]"
+    assert env.rail.get_full_transitions(10, 24) == 0, "[10][24]"
+    assert env.rail.get_full_transitions(11, 0) == 16458, "[11][0]"
+    assert env.rail.get_full_transitions(11, 1) == 17411, "[11][1]"
+    assert env.rail.get_full_transitions(11, 2) == 1025, "[11][2]"
+    assert env.rail.get_full_transitions(11, 3) == 52275, "[11][3]"
+    assert env.rail.get_full_transitions(11, 4) == 3089, "[11][4]"
+    assert env.rail.get_full_transitions(11, 5) == 2064, "[11][5]"
+    assert env.rail.get_full_transitions(11, 6) == 0, "[11][6]"
+    assert env.rail.get_full_transitions(11, 7) == 0, "[11][7]"
+    assert env.rail.get_full_transitions(11, 8) == 0, "[11][8]"
+    assert env.rail.get_full_transitions(11, 9) == 0, "[11][9]"
+    assert env.rail.get_full_transitions(11, 10) == 0, "[11][10]"
+    assert env.rail.get_full_transitions(11, 11) == 0, "[11][11]"
+    assert env.rail.get_full_transitions(11, 12) == 32800, "[11][12]"
+    assert env.rail.get_full_transitions(11, 13) == 0, "[11][13]"
+    assert env.rail.get_full_transitions(11, 14) == 0, "[11][14]"
+    assert env.rail.get_full_transitions(11, 15) == 0, "[11][15]"
+    assert env.rail.get_full_transitions(11, 16) == 0, "[11][16]"
+    assert env.rail.get_full_transitions(11, 17) == 0, "[11][17]"
+    assert env.rail.get_full_transitions(11, 18) == 0, "[11][18]"
+    assert env.rail.get_full_transitions(11, 19) == 0, "[11][19]"
+    assert env.rail.get_full_transitions(11, 20) == 0, "[11][20]"
+    assert env.rail.get_full_transitions(11, 21) == 0, "[11][21]"
+    assert env.rail.get_full_transitions(11, 22) == 32800, "[11][22]"
+    assert env.rail.get_full_transitions(11, 23) == 0, "[11][23]"
+    assert env.rail.get_full_transitions(11, 24) == 0, "[11][24]"
+    assert env.rail.get_full_transitions(12, 0) == 128, "[12][0]"
+    assert env.rail.get_full_transitions(12, 1) == 128, "[12][1]"
+    assert env.rail.get_full_transitions(12, 2) == 0, "[12][2]"
+    assert env.rail.get_full_transitions(12, 3) == 49186, "[12][3]"
+    assert env.rail.get_full_transitions(12, 4) == 1025, "[12][4]"
+    assert env.rail.get_full_transitions(12, 5) == 1025, "[12][5]"
+    assert env.rail.get_full_transitions(12, 6) == 1025, "[12][6]"
+    assert env.rail.get_full_transitions(12, 7) == 1025, "[12][7]"
+    assert env.rail.get_full_transitions(12, 8) == 1025, "[12][8]"
+    assert env.rail.get_full_transitions(12, 9) == 1025, "[12][9]"
+    assert env.rail.get_full_transitions(12, 10) == 1025, "[12][10]"
+    assert env.rail.get_full_transitions(12, 11) == 1025, "[12][11]"
+    assert env.rail.get_full_transitions(12, 12) == 34864, "[12][12]"
+    assert env.rail.get_full_transitions(12, 13) == 0, "[12][13]"
+    assert env.rail.get_full_transitions(12, 14) == 0, "[12][14]"
+    assert env.rail.get_full_transitions(12, 15) == 0, "[12][15]"
+    assert env.rail.get_full_transitions(12, 16) == 0, "[12][16]"
+    assert env.rail.get_full_transitions(12, 17) == 0, "[12][17]"
+    assert env.rail.get_full_transitions(12, 18) == 0, "[12][18]"
+    assert env.rail.get_full_transitions(12, 19) == 0, "[12][19]"
+    assert env.rail.get_full_transitions(12, 20) == 0, "[12][20]"
+    assert env.rail.get_full_transitions(12, 21) == 0, "[12][21]"
+    assert env.rail.get_full_transitions(12, 22) == 32800, "[12][22]"
+    assert env.rail.get_full_transitions(12, 23) == 0, "[12][23]"
+    assert env.rail.get_full_transitions(12, 24) == 0, "[12][24]"
+    assert env.rail.get_full_transitions(13, 0) == 0, "[13][0]"
+    assert env.rail.get_full_transitions(13, 1) == 0, "[13][1]"
+    assert env.rail.get_full_transitions(13, 2) == 0, "[13][2]"
+    assert env.rail.get_full_transitions(13, 3) == 32800, "[13][3]"
+    assert env.rail.get_full_transitions(13, 4) == 0, "[13][4]"
+    assert env.rail.get_full_transitions(13, 5) == 0, "[13][5]"
+    assert env.rail.get_full_transitions(13, 6) == 0, "[13][6]"
+    assert env.rail.get_full_transitions(13, 7) == 0, "[13][7]"
+    assert env.rail.get_full_transitions(13, 8) == 0, "[13][8]"
+    assert env.rail.get_full_transitions(13, 9) == 0, "[13][9]"
+    assert env.rail.get_full_transitions(13, 10) == 0, "[13][10]"
+    assert env.rail.get_full_transitions(13, 11) == 0, "[13][11]"
+    assert env.rail.get_full_transitions(13, 12) == 32800, "[13][12]"
+    assert env.rail.get_full_transitions(13, 13) == 0, "[13][13]"
+    assert env.rail.get_full_transitions(13, 14) == 0, "[13][14]"
+    assert env.rail.get_full_transitions(13, 15) == 0, "[13][15]"
+    assert env.rail.get_full_transitions(13, 16) == 0, "[13][16]"
+    assert env.rail.get_full_transitions(13, 17) == 0, "[13][17]"
+    assert env.rail.get_full_transitions(13, 18) == 0, "[13][18]"
+    assert env.rail.get_full_transitions(13, 19) == 0, "[13][19]"
+    assert env.rail.get_full_transitions(13, 20) == 0, "[13][20]"
+    assert env.rail.get_full_transitions(13, 21) == 0, "[13][21]"
+    assert env.rail.get_full_transitions(13, 22) == 32800, "[13][22]"
+    assert env.rail.get_full_transitions(13, 23) == 0, "[13][23]"
+    assert env.rail.get_full_transitions(13, 24) == 0, "[13][24]"
+    assert env.rail.get_full_transitions(14, 0) == 0, "[14][0]"
+    assert env.rail.get_full_transitions(14, 1) == 0, "[14][1]"
+    assert env.rail.get_full_transitions(14, 2) == 0, "[14][2]"
+    assert env.rail.get_full_transitions(14, 3) == 32800, "[14][3]"
+    assert env.rail.get_full_transitions(14, 4) == 0, "[14][4]"
+    assert env.rail.get_full_transitions(14, 5) == 0, "[14][5]"
+    assert env.rail.get_full_transitions(14, 6) == 0, "[14][6]"
+    assert env.rail.get_full_transitions(14, 7) == 0, "[14][7]"
+    assert env.rail.get_full_transitions(14, 8) == 0, "[14][8]"
+    assert env.rail.get_full_transitions(14, 9) == 0, "[14][9]"
+    assert env.rail.get_full_transitions(14, 10) == 0, "[14][10]"
+    assert env.rail.get_full_transitions(14, 11) == 0, "[14][11]"
+    assert env.rail.get_full_transitions(14, 12) == 32800, "[14][12]"
+    assert env.rail.get_full_transitions(14, 13) == 0, "[14][13]"
+    assert env.rail.get_full_transitions(14, 14) == 0, "[14][14]"
+    assert env.rail.get_full_transitions(14, 15) == 0, "[14][15]"
+    assert env.rail.get_full_transitions(14, 16) == 0, "[14][16]"
+    assert env.rail.get_full_transitions(14, 17) == 0, "[14][17]"
+    assert env.rail.get_full_transitions(14, 18) == 0, "[14][18]"
+    assert env.rail.get_full_transitions(14, 19) == 0, "[14][19]"
+    assert env.rail.get_full_transitions(14, 20) == 0, "[14][20]"
+    assert env.rail.get_full_transitions(14, 21) == 0, "[14][21]"
+    assert env.rail.get_full_transitions(14, 22) == 32800, "[14][22]"
+    assert env.rail.get_full_transitions(14, 23) == 0, "[14][23]"
+    assert env.rail.get_full_transitions(14, 24) == 0, "[14][24]"
+    assert env.rail.get_full_transitions(15, 0) == 0, "[15][0]"
+    assert env.rail.get_full_transitions(15, 1) == 0, "[15][1]"
+    assert env.rail.get_full_transitions(15, 2) == 0, "[15][2]"
+    assert env.rail.get_full_transitions(15, 3) == 32800, "[15][3]"
+    assert env.rail.get_full_transitions(15, 4) == 0, "[15][4]"
+    assert env.rail.get_full_transitions(15, 5) == 0, "[15][5]"
+    assert env.rail.get_full_transitions(15, 6) == 0, "[15][6]"
+    assert env.rail.get_full_transitions(15, 7) == 0, "[15][7]"
+    assert env.rail.get_full_transitions(15, 8) == 0, "[15][8]"
+    assert env.rail.get_full_transitions(15, 9) == 0, "[15][9]"
+    assert env.rail.get_full_transitions(15, 10) == 0, "[15][10]"
+    assert env.rail.get_full_transitions(15, 11) == 0, "[15][11]"
+    assert env.rail.get_full_transitions(15, 12) == 32800, "[15][12]"
+    assert env.rail.get_full_transitions(15, 13) == 0, "[15][13]"
+    assert env.rail.get_full_transitions(15, 14) == 0, "[15][14]"
+    assert env.rail.get_full_transitions(15, 15) == 0, "[15][15]"
+    assert env.rail.get_full_transitions(15, 16) == 0, "[15][16]"
+    assert env.rail.get_full_transitions(15, 17) == 0, "[15][17]"
+    assert env.rail.get_full_transitions(15, 18) == 0, "[15][18]"
+    assert env.rail.get_full_transitions(15, 19) == 0, "[15][19]"
+    assert env.rail.get_full_transitions(15, 20) == 0, "[15][20]"
+    assert env.rail.get_full_transitions(15, 21) == 0, "[15][21]"
+    assert env.rail.get_full_transitions(15, 22) == 32800, "[15][22]"
+    assert env.rail.get_full_transitions(15, 23) == 0, "[15][23]"
+    assert env.rail.get_full_transitions(15, 24) == 0, "[15][24]"
+    assert env.rail.get_full_transitions(16, 0) == 0, "[16][0]"
+    assert env.rail.get_full_transitions(16, 1) == 0, "[16][1]"
+    assert env.rail.get_full_transitions(16, 2) == 0, "[16][2]"
+    assert env.rail.get_full_transitions(16, 3) == 32800, "[16][3]"
+    assert env.rail.get_full_transitions(16, 4) == 0, "[16][4]"
+    assert env.rail.get_full_transitions(16, 5) == 0, "[16][5]"
+    assert env.rail.get_full_transitions(16, 6) == 0, "[16][6]"
+    assert env.rail.get_full_transitions(16, 7) == 0, "[16][7]"
+    assert env.rail.get_full_transitions(16, 8) == 0, "[16][8]"
+    assert env.rail.get_full_transitions(16, 9) == 0, "[16][9]"
+    assert env.rail.get_full_transitions(16, 10) == 0, "[16][10]"
+    assert env.rail.get_full_transitions(16, 11) == 0, "[16][11]"
+    assert env.rail.get_full_transitions(16, 12) == 32800, "[16][12]"
+    assert env.rail.get_full_transitions(16, 13) == 0, "[16][13]"
+    assert env.rail.get_full_transitions(16, 14) == 0, "[16][14]"
+    assert env.rail.get_full_transitions(16, 15) == 0, "[16][15]"
+    assert env.rail.get_full_transitions(16, 16) == 0, "[16][16]"
+    assert env.rail.get_full_transitions(16, 17) == 0, "[16][17]"
+    assert env.rail.get_full_transitions(16, 18) == 0, "[16][18]"
+    assert env.rail.get_full_transitions(16, 19) == 0, "[16][19]"
+    assert env.rail.get_full_transitions(16, 20) == 0, "[16][20]"
+    assert env.rail.get_full_transitions(16, 21) == 0, "[16][21]"
+    assert env.rail.get_full_transitions(16, 22) == 32800, "[16][22]"
+    assert env.rail.get_full_transitions(16, 23) == 0, "[16][23]"
+    assert env.rail.get_full_transitions(16, 24) == 0, "[16][24]"
+    assert env.rail.get_full_transitions(17, 0) == 0, "[17][0]"
+    assert env.rail.get_full_transitions(17, 1) == 0, "[17][1]"
+    assert env.rail.get_full_transitions(17, 2) == 0, "[17][2]"
+    assert env.rail.get_full_transitions(17, 3) == 32800, "[17][3]"
+    assert env.rail.get_full_transitions(17, 4) == 0, "[17][4]"
+    assert env.rail.get_full_transitions(17, 5) == 0, "[17][5]"
+    assert env.rail.get_full_transitions(17, 6) == 0, "[17][6]"
+    assert env.rail.get_full_transitions(17, 7) == 0, "[17][7]"
+    assert env.rail.get_full_transitions(17, 8) == 0, "[17][8]"
+    assert env.rail.get_full_transitions(17, 9) == 0, "[17][9]"
+    assert env.rail.get_full_transitions(17, 10) == 0, "[17][10]"
+    assert env.rail.get_full_transitions(17, 11) == 0, "[17][11]"
+    assert env.rail.get_full_transitions(17, 12) == 32800, "[17][12]"
+    assert env.rail.get_full_transitions(17, 13) == 0, "[17][13]"
+    assert env.rail.get_full_transitions(17, 14) == 0, "[17][14]"
+    assert env.rail.get_full_transitions(17, 15) == 0, "[17][15]"
+    assert env.rail.get_full_transitions(17, 16) == 0, "[17][16]"
+    assert env.rail.get_full_transitions(17, 17) == 0, "[17][17]"
+    assert env.rail.get_full_transitions(17, 18) == 0, "[17][18]"
+    assert env.rail.get_full_transitions(17, 19) == 0, "[17][19]"
+    assert env.rail.get_full_transitions(17, 20) == 0, "[17][20]"
+    assert env.rail.get_full_transitions(17, 21) == 0, "[17][21]"
+    assert env.rail.get_full_transitions(17, 22) == 32800, "[17][22]"
+    assert env.rail.get_full_transitions(17, 23) == 0, "[17][23]"
+    assert env.rail.get_full_transitions(17, 24) == 0, "[17][24]"
+    assert env.rail.get_full_transitions(18, 0) == 0, "[18][0]"
+    assert env.rail.get_full_transitions(18, 1) == 0, "[18][1]"
+    assert env.rail.get_full_transitions(18, 2) == 0, "[18][2]"
+    assert env.rail.get_full_transitions(18, 3) == 32800, "[18][3]"
+    assert env.rail.get_full_transitions(18, 4) == 0, "[18][4]"
+    assert env.rail.get_full_transitions(18, 5) == 0, "[18][5]"
+    assert env.rail.get_full_transitions(18, 6) == 0, "[18][6]"
+    assert env.rail.get_full_transitions(18, 7) == 0, "[18][7]"
+    assert env.rail.get_full_transitions(18, 8) == 0, "[18][8]"
+    assert env.rail.get_full_transitions(18, 9) == 0, "[18][9]"
+    assert env.rail.get_full_transitions(18, 10) == 0, "[18][10]"
+    assert env.rail.get_full_transitions(18, 11) == 0, "[18][11]"
+    assert env.rail.get_full_transitions(18, 12) == 32800, "[18][12]"
+    assert env.rail.get_full_transitions(18, 13) == 0, "[18][13]"
+    assert env.rail.get_full_transitions(18, 14) == 0, "[18][14]"
+    assert env.rail.get_full_transitions(18, 15) == 0, "[18][15]"
+    assert env.rail.get_full_transitions(18, 16) == 0, "[18][16]"
+    assert env.rail.get_full_transitions(18, 17) == 0, "[18][17]"
+    assert env.rail.get_full_transitions(18, 18) == 0, "[18][18]"
+    assert env.rail.get_full_transitions(18, 19) == 0, "[18][19]"
+    assert env.rail.get_full_transitions(18, 20) == 0, "[18][20]"
+    assert env.rail.get_full_transitions(18, 21) == 0, "[18][21]"
+    assert env.rail.get_full_transitions(18, 22) == 32800, "[18][22]"
+    assert env.rail.get_full_transitions(18, 23) == 0, "[18][23]"
+    assert env.rail.get_full_transitions(18, 24) == 0, "[18][24]"
+    assert env.rail.get_full_transitions(19, 0) == 0, "[19][0]"
+    assert env.rail.get_full_transitions(19, 1) == 0, "[19][1]"
+    assert env.rail.get_full_transitions(19, 2) == 0, "[19][2]"
+    assert env.rail.get_full_transitions(19, 3) == 32872, "[19][3]"
+    assert env.rail.get_full_transitions(19, 4) == 1025, "[19][4]"
+    assert env.rail.get_full_transitions(19, 5) == 1025, "[19][5]"
+    assert env.rail.get_full_transitions(19, 6) == 1025, "[19][6]"
+    assert env.rail.get_full_transitions(19, 7) == 1025, "[19][7]"
+    assert env.rail.get_full_transitions(19, 8) == 1025, "[19][8]"
+    assert env.rail.get_full_transitions(19, 9) == 1025, "[19][9]"
+    assert env.rail.get_full_transitions(19, 10) == 1025, "[19][10]"
+    assert env.rail.get_full_transitions(19, 11) == 1025, "[19][11]"
+    assert env.rail.get_full_transitions(19, 12) == 6672, "[19][12]"
+    assert env.rail.get_full_transitions(19, 13) == 0, "[19][13]"
+    assert env.rail.get_full_transitions(19, 14) == 0, "[19][14]"
+    assert env.rail.get_full_transitions(19, 15) == 0, "[19][15]"
+    assert env.rail.get_full_transitions(19, 16) == 0, "[19][16]"
+    assert env.rail.get_full_transitions(19, 17) == 0, "[19][17]"
+    assert env.rail.get_full_transitions(19, 18) == 0, "[19][18]"
+    assert env.rail.get_full_transitions(19, 19) == 0, "[19][19]"
+    assert env.rail.get_full_transitions(19, 20) == 0, "[19][20]"
+    assert env.rail.get_full_transitions(19, 21) == 0, "[19][21]"
+    assert env.rail.get_full_transitions(19, 22) == 32800, "[19][22]"
+    assert env.rail.get_full_transitions(19, 23) == 0, "[19][23]"
+    assert env.rail.get_full_transitions(19, 24) == 0, "[19][24]"
+    assert env.rail.get_full_transitions(20, 0) == 0, "[20][0]"
+    assert env.rail.get_full_transitions(20, 1) == 0, "[20][1]"
+    assert env.rail.get_full_transitions(20, 2) == 0, "[20][2]"
+    assert env.rail.get_full_transitions(20, 3) == 32800, "[20][3]"
+    assert env.rail.get_full_transitions(20, 4) == 0, "[20][4]"
+    assert env.rail.get_full_transitions(20, 5) == 0, "[20][5]"
+    assert env.rail.get_full_transitions(20, 6) == 0, "[20][6]"
+    assert env.rail.get_full_transitions(20, 7) == 0, "[20][7]"
+    assert env.rail.get_full_transitions(20, 8) == 0, "[20][8]"
+    assert env.rail.get_full_transitions(20, 9) == 0, "[20][9]"
+    assert env.rail.get_full_transitions(20, 10) == 0, "[20][10]"
+    assert env.rail.get_full_transitions(20, 11) == 0, "[20][11]"
+    assert env.rail.get_full_transitions(20, 12) == 32800, "[20][12]"
+    assert env.rail.get_full_transitions(20, 13) == 0, "[20][13]"
+    assert env.rail.get_full_transitions(20, 14) == 0, "[20][14]"
+    assert env.rail.get_full_transitions(20, 15) == 0, "[20][15]"
+    assert env.rail.get_full_transitions(20, 16) == 0, "[20][16]"
+    assert env.rail.get_full_transitions(20, 17) == 0, "[20][17]"
+    assert env.rail.get_full_transitions(20, 18) == 0, "[20][18]"
+    assert env.rail.get_full_transitions(20, 19) == 0, "[20][19]"
+    assert env.rail.get_full_transitions(20, 20) == 0, "[20][20]"
+    assert env.rail.get_full_transitions(20, 21) == 0, "[20][21]"
+    assert env.rail.get_full_transitions(20, 22) == 32800, "[20][22]"
+    assert env.rail.get_full_transitions(20, 23) == 0, "[20][23]"
+    assert env.rail.get_full_transitions(20, 24) == 0, "[20][24]"
+    assert env.rail.get_full_transitions(21, 0) == 0, "[21][0]"
+    assert env.rail.get_full_transitions(21, 1) == 0, "[21][1]"
+    assert env.rail.get_full_transitions(21, 2) == 0, "[21][2]"
+    assert env.rail.get_full_transitions(21, 3) == 32800, "[21][3]"
+    assert env.rail.get_full_transitions(21, 4) == 0, "[21][4]"
+    assert env.rail.get_full_transitions(21, 5) == 0, "[21][5]"
+    assert env.rail.get_full_transitions(21, 6) == 0, "[21][6]"
+    assert env.rail.get_full_transitions(21, 7) == 0, "[21][7]"
+    assert env.rail.get_full_transitions(21, 8) == 0, "[21][8]"
+    assert env.rail.get_full_transitions(21, 9) == 0, "[21][9]"
+    assert env.rail.get_full_transitions(21, 10) == 0, "[21][10]"
+    assert env.rail.get_full_transitions(21, 11) == 0, "[21][11]"
+    assert env.rail.get_full_transitions(21, 12) == 32800, "[21][12]"
+    assert env.rail.get_full_transitions(21, 13) == 0, "[21][13]"
+    assert env.rail.get_full_transitions(21, 14) == 0, "[21][14]"
+    assert env.rail.get_full_transitions(21, 15) == 0, "[21][15]"
+    assert env.rail.get_full_transitions(21, 16) == 0, "[21][16]"
+    assert env.rail.get_full_transitions(21, 17) == 0, "[21][17]"
+    assert env.rail.get_full_transitions(21, 18) == 0, "[21][18]"
+    assert env.rail.get_full_transitions(21, 19) == 0, "[21][19]"
+    assert env.rail.get_full_transitions(21, 20) == 0, "[21][20]"
+    assert env.rail.get_full_transitions(21, 21) == 0, "[21][21]"
+    assert env.rail.get_full_transitions(21, 22) == 32800, "[21][22]"
+    assert env.rail.get_full_transitions(21, 23) == 0, "[21][23]"
+    assert env.rail.get_full_transitions(21, 24) == 0, "[21][24]"
+    assert env.rail.get_full_transitions(22, 0) == 0, "[22][0]"
+    assert env.rail.get_full_transitions(22, 1) == 0, "[22][1]"
+    assert env.rail.get_full_transitions(22, 2) == 0, "[22][2]"
+    assert env.rail.get_full_transitions(22, 3) == 32800, "[22][3]"
+    assert env.rail.get_full_transitions(22, 4) == 0, "[22][4]"
+    assert env.rail.get_full_transitions(22, 5) == 0, "[22][5]"
+    assert env.rail.get_full_transitions(22, 6) == 0, "[22][6]"
+    assert env.rail.get_full_transitions(22, 7) == 0, "[22][7]"
+    assert env.rail.get_full_transitions(22, 8) == 0, "[22][8]"
+    assert env.rail.get_full_transitions(22, 9) == 0, "[22][9]"
+    assert env.rail.get_full_transitions(22, 10) == 0, "[22][10]"
+    assert env.rail.get_full_transitions(22, 11) == 0, "[22][11]"
+    assert env.rail.get_full_transitions(22, 12) == 32800, "[22][12]"
+    assert env.rail.get_full_transitions(22, 13) == 0, "[22][13]"
+    assert env.rail.get_full_transitions(22, 14) == 0, "[22][14]"
+    assert env.rail.get_full_transitions(22, 15) == 0, "[22][15]"
+    assert env.rail.get_full_transitions(22, 16) == 0, "[22][16]"
+    assert env.rail.get_full_transitions(22, 17) == 0, "[22][17]"
+    assert env.rail.get_full_transitions(22, 18) == 0, "[22][18]"
+    assert env.rail.get_full_transitions(22, 19) == 0, "[22][19]"
+    assert env.rail.get_full_transitions(22, 20) == 0, "[22][20]"
+    assert env.rail.get_full_transitions(22, 21) == 0, "[22][21]"
+    assert env.rail.get_full_transitions(22, 22) == 32800, "[22][22]"
+    assert env.rail.get_full_transitions(22, 23) == 0, "[22][23]"
+    assert env.rail.get_full_transitions(22, 24) == 0, "[22][24]"
+    assert env.rail.get_full_transitions(23, 0) == 0, "[23][0]"
+    assert env.rail.get_full_transitions(23, 1) == 0, "[23][1]"
+    assert env.rail.get_full_transitions(23, 2) == 0, "[23][2]"
+    assert env.rail.get_full_transitions(23, 3) == 32800, "[23][3]"
+    assert env.rail.get_full_transitions(23, 4) == 0, "[23][4]"
+    assert env.rail.get_full_transitions(23, 5) == 0, "[23][5]"
+    assert env.rail.get_full_transitions(23, 6) == 0, "[23][6]"
+    assert env.rail.get_full_transitions(23, 7) == 0, "[23][7]"
+    assert env.rail.get_full_transitions(23, 8) == 0, "[23][8]"
+    assert env.rail.get_full_transitions(23, 9) == 0, "[23][9]"
+    assert env.rail.get_full_transitions(23, 10) == 0, "[23][10]"
+    assert env.rail.get_full_transitions(23, 11) == 0, "[23][11]"
+    assert env.rail.get_full_transitions(23, 12) == 32800, "[23][12]"
+    assert env.rail.get_full_transitions(23, 13) == 0, "[23][13]"
+    assert env.rail.get_full_transitions(23, 14) == 0, "[23][14]"
+    assert env.rail.get_full_transitions(23, 15) == 0, "[23][15]"
+    assert env.rail.get_full_transitions(23, 16) == 0, "[23][16]"
+    assert env.rail.get_full_transitions(23, 17) == 0, "[23][17]"
+    assert env.rail.get_full_transitions(23, 18) == 0, "[23][18]"
+    assert env.rail.get_full_transitions(23, 19) == 0, "[23][19]"
+    assert env.rail.get_full_transitions(23, 20) == 0, "[23][20]"
+    assert env.rail.get_full_transitions(23, 21) == 0, "[23][21]"
+    assert env.rail.get_full_transitions(23, 22) == 32800, "[23][22]"
+    assert env.rail.get_full_transitions(23, 23) == 0, "[23][23]"
+    assert env.rail.get_full_transitions(23, 24) == 0, "[23][24]"
+    assert env.rail.get_full_transitions(24, 0) == 0, "[24][0]"
+    assert env.rail.get_full_transitions(24, 1) == 0, "[24][1]"
+    assert env.rail.get_full_transitions(24, 2) == 0, "[24][2]"
+    assert env.rail.get_full_transitions(24, 3) == 32800, "[24][3]"
+    assert env.rail.get_full_transitions(24, 4) == 0, "[24][4]"
+    assert env.rail.get_full_transitions(24, 5) == 0, "[24][5]"
+    assert env.rail.get_full_transitions(24, 6) == 0, "[24][6]"
+    assert env.rail.get_full_transitions(24, 7) == 0, "[24][7]"
+    assert env.rail.get_full_transitions(24, 8) == 0, "[24][8]"
+    assert env.rail.get_full_transitions(24, 9) == 8192, "[24][9]"
+    assert env.rail.get_full_transitions(24, 10) == 0, "[24][10]"
+    assert env.rail.get_full_transitions(24, 11) == 0, "[24][11]"
+    assert env.rail.get_full_transitions(24, 12) == 32800, "[24][12]"
+    assert env.rail.get_full_transitions(24, 13) == 0, "[24][13]"
+    assert env.rail.get_full_transitions(24, 14) == 0, "[24][14]"
+    assert env.rail.get_full_transitions(24, 15) == 0, "[24][15]"
+    assert env.rail.get_full_transitions(24, 16) == 0, "[24][16]"
+    assert env.rail.get_full_transitions(24, 17) == 0, "[24][17]"
+    assert env.rail.get_full_transitions(24, 18) == 0, "[24][18]"
+    assert env.rail.get_full_transitions(24, 19) == 0, "[24][19]"
+    assert env.rail.get_full_transitions(24, 20) == 0, "[24][20]"
+    assert env.rail.get_full_transitions(24, 21) == 0, "[24][21]"
+    assert env.rail.get_full_transitions(24, 22) == 32800, "[24][22]"
+    assert env.rail.get_full_transitions(24, 23) == 0, "[24][23]"
+    assert env.rail.get_full_transitions(24, 24) == 0, "[24][24]"
+    assert env.rail.get_full_transitions(25, 0) == 0, "[25][0]"
+    assert env.rail.get_full_transitions(25, 1) == 0, "[25][1]"
+    assert env.rail.get_full_transitions(25, 2) == 0, "[25][2]"
+    assert env.rail.get_full_transitions(25, 3) == 32800, "[25][3]"
+    assert env.rail.get_full_transitions(25, 4) == 0, "[25][4]"
+    assert env.rail.get_full_transitions(25, 5) == 8192, "[25][5]"
+    assert env.rail.get_full_transitions(25, 6) == 0, "[25][6]"
+    assert env.rail.get_full_transitions(25, 7) == 0, "[25][7]"
+    assert env.rail.get_full_transitions(25, 8) == 0, "[25][8]"
+    assert env.rail.get_full_transitions(25, 9) == 32800, "[25][9]"
+    assert env.rail.get_full_transitions(25, 10) == 0, "[25][10]"
+    assert env.rail.get_full_transitions(25, 11) == 8192, "[25][11]"
+    assert env.rail.get_full_transitions(25, 12) == 32800, "[25][12]"
+    assert env.rail.get_full_transitions(25, 13) == 0, "[25][13]"
+    assert env.rail.get_full_transitions(25, 14) == 0, "[25][14]"
+    assert env.rail.get_full_transitions(25, 15) == 0, "[25][15]"
+    assert env.rail.get_full_transitions(25, 16) == 0, "[25][16]"
+    assert env.rail.get_full_transitions(25, 17) == 0, "[25][17]"
+    assert env.rail.get_full_transitions(25, 18) == 0, "[25][18]"
+    assert env.rail.get_full_transitions(25, 19) == 0, "[25][19]"
+    assert env.rail.get_full_transitions(25, 20) == 0, "[25][20]"
+    assert env.rail.get_full_transitions(25, 21) == 0, "[25][21]"
+    assert env.rail.get_full_transitions(25, 22) == 32800, "[25][22]"
+    assert env.rail.get_full_transitions(25, 23) == 0, "[25][23]"
+    assert env.rail.get_full_transitions(25, 24) == 0, "[25][24]"
+    assert env.rail.get_full_transitions(26, 0) == 8192, "[26][0]"
+    assert env.rail.get_full_transitions(26, 1) == 4, "[26][1]"
+    assert env.rail.get_full_transitions(26, 2) == 4608, "[26][2]"
+    assert env.rail.get_full_transitions(26, 3) == 32800, "[26][3]"
+    assert env.rail.get_full_transitions(26, 4) == 0, "[26][4]"
+    assert env.rail.get_full_transitions(26, 5) == 32800, "[26][5]"
+    assert env.rail.get_full_transitions(26, 6) == 0, "[26][6]"
+    assert env.rail.get_full_transitions(26, 7) == 0, "[26][7]"
+    assert env.rail.get_full_transitions(26, 8) == 0, "[26][8]"
+    assert env.rail.get_full_transitions(26, 9) == 32800, "[26][9]"
+    assert env.rail.get_full_transitions(26, 10) == 0, "[26][10]"
+    assert env.rail.get_full_transitions(26, 11) == 32800, "[26][11]"
+    assert env.rail.get_full_transitions(26, 12) == 32800, "[26][12]"
+    assert env.rail.get_full_transitions(26, 13) == 0, "[26][13]"
+    assert env.rail.get_full_transitions(26, 14) == 0, "[26][14]"
+    assert env.rail.get_full_transitions(26, 15) == 0, "[26][15]"
+    assert env.rail.get_full_transitions(26, 16) == 0, "[26][16]"
+    assert env.rail.get_full_transitions(26, 17) == 0, "[26][17]"
+    assert env.rail.get_full_transitions(26, 18) == 0, "[26][18]"
+    assert env.rail.get_full_transitions(26, 19) == 0, "[26][19]"
+    assert env.rail.get_full_transitions(26, 20) == 0, "[26][20]"
+    assert env.rail.get_full_transitions(26, 21) == 0, "[26][21]"
+    assert env.rail.get_full_transitions(26, 22) == 32800, "[26][22]"
+    assert env.rail.get_full_transitions(26, 23) == 0, "[26][23]"
+    assert env.rail.get_full_transitions(26, 24) == 0, "[26][24]"
+    assert env.rail.get_full_transitions(27, 0) == 72, "[27][0]"
+    assert env.rail.get_full_transitions(27, 1) == 17411, "[27][1]"
+    assert env.rail.get_full_transitions(27, 2) == 1097, "[27][2]"
+    assert env.rail.get_full_transitions(27, 3) == 1097, "[27][3]"
+    assert env.rail.get_full_transitions(27, 4) == 5633, "[27][4]"
+    assert env.rail.get_full_transitions(27, 5) == 3089, "[27][5]"
+    assert env.rail.get_full_transitions(27, 6) == 1025, "[27][6]"
+    assert env.rail.get_full_transitions(27, 7) == 1025, "[27][7]"
+    assert env.rail.get_full_transitions(27, 8) == 1025, "[27][8]"
+    assert env.rail.get_full_transitions(27, 9) == 1097, "[27][9]"
+    assert env.rail.get_full_transitions(27, 10) == 17411, "[27][10]"
+    assert env.rail.get_full_transitions(27, 11) == 1097, "[27][11]"
+    assert env.rail.get_full_transitions(27, 12) == 1097, "[27][12]"
+    assert env.rail.get_full_transitions(27, 13) == 5633, "[27][13]"
+    assert env.rail.get_full_transitions(27, 14) == 1025, "[27][14]"
+    assert env.rail.get_full_transitions(27, 15) == 1025, "[27][15]"
+    assert env.rail.get_full_transitions(27, 16) == 1025, "[27][16]"
+    assert env.rail.get_full_transitions(27, 17) == 1025, "[27][17]"
+    assert env.rail.get_full_transitions(27, 18) == 1025, "[27][18]"
+    assert env.rail.get_full_transitions(27, 19) == 1025, "[27][19]"
+    assert env.rail.get_full_transitions(27, 20) == 1025, "[27][20]"
+    assert env.rail.get_full_transitions(27, 21) == 1025, "[27][21]"
+    assert env.rail.get_full_transitions(27, 22) == 2064, "[27][22]"
+    assert env.rail.get_full_transitions(27, 23) == 0, "[27][23]"
+    assert env.rail.get_full_transitions(27, 24) == 0, "[27][24]"
+    assert env.rail.get_full_transitions(28, 0) == 0, "[28][0]"
+    assert env.rail.get_full_transitions(28, 1) == 32800, "[28][1]"
+    assert env.rail.get_full_transitions(28, 2) == 0, "[28][2]"
+    assert env.rail.get_full_transitions(28, 3) == 0, "[28][3]"
+    assert env.rail.get_full_transitions(28, 4) == 72, "[28][4]"
+    assert env.rail.get_full_transitions(28, 5) == 256, "[28][5]"
+    assert env.rail.get_full_transitions(28, 6) == 0, "[28][6]"
+    assert env.rail.get_full_transitions(28, 7) == 0, "[28][7]"
+    assert env.rail.get_full_transitions(28, 8) == 0, "[28][8]"
+    assert env.rail.get_full_transitions(28, 9) == 0, "[28][9]"
+    assert env.rail.get_full_transitions(28, 10) == 32800, "[28][10]"
+    assert env.rail.get_full_transitions(28, 11) == 0, "[28][11]"
+    assert env.rail.get_full_transitions(28, 12) == 16386, "[28][12]"
+    assert env.rail.get_full_transitions(28, 13) == 34864, "[28][13]"
+    assert env.rail.get_full_transitions(28, 14) == 0, "[28][14]"
+    assert env.rail.get_full_transitions(28, 15) == 0, "[28][15]"
+    assert env.rail.get_full_transitions(28, 16) == 0, "[28][16]"
+    assert env.rail.get_full_transitions(28, 17) == 0, "[28][17]"
+    assert env.rail.get_full_transitions(28, 18) == 0, "[28][18]"
+    assert env.rail.get_full_transitions(28, 19) == 0, "[28][19]"
+    assert env.rail.get_full_transitions(28, 20) == 0, "[28][20]"
+    assert env.rail.get_full_transitions(28, 21) == 0, "[28][21]"
+    assert env.rail.get_full_transitions(28, 22) == 0, "[28][22]"
+    assert env.rail.get_full_transitions(28, 23) == 0, "[28][23]"
+    assert env.rail.get_full_transitions(28, 24) == 0, "[28][24]"
+    assert env.rail.get_full_transitions(29, 0) == 0, "[29][0]"
+    assert env.rail.get_full_transitions(29, 1) == 128, "[29][1]"
+    assert env.rail.get_full_transitions(29, 2) == 0, "[29][2]"
+    assert env.rail.get_full_transitions(29, 3) == 0, "[29][3]"
+    assert env.rail.get_full_transitions(29, 4) == 0, "[29][4]"
+    assert env.rail.get_full_transitions(29, 5) == 0, "[29][5]"
+    assert env.rail.get_full_transitions(29, 6) == 0, "[29][6]"
+    assert env.rail.get_full_transitions(29, 7) == 0, "[29][7]"
+    assert env.rail.get_full_transitions(29, 8) == 0, "[29][8]"
+    assert env.rail.get_full_transitions(29, 9) == 0, "[29][9]"
+    assert env.rail.get_full_transitions(29, 10) == 128, "[29][10]"
+    assert env.rail.get_full_transitions(29, 11) == 0, "[29][11]"
+    assert env.rail.get_full_transitions(29, 12) == 128, "[29][12]"
+    assert env.rail.get_full_transitions(29, 13) == 128, "[29][13]"
+    assert env.rail.get_full_transitions(29, 14) == 0, "[29][14]"
+    assert env.rail.get_full_transitions(29, 15) == 0, "[29][15]"
+    assert env.rail.get_full_transitions(29, 16) == 0, "[29][16]"
+    assert env.rail.get_full_transitions(29, 17) == 0, "[29][17]"
+    assert env.rail.get_full_transitions(29, 18) == 0, "[29][18]"
+    assert env.rail.get_full_transitions(29, 19) == 0, "[29][19]"
+    assert env.rail.get_full_transitions(29, 20) == 0, "[29][20]"
+    assert env.rail.get_full_transitions(29, 21) == 0, "[29][21]"
+    assert env.rail.get_full_transitions(29, 22) == 0, "[29][22]"
+    assert env.rail.get_full_transitions(29, 23) == 0, "[29][23]"
+    assert env.rail.get_full_transitions(29, 24) == 0, "[29][24]"
+
+
 def test_rail_env_action_required_info():
     np.random.seed(0)
     speed_ration_map = {1.: 0.25,  # Fast passenger train
diff --git a/tests/test_flatland_malfunction.py b/tests/test_flatland_malfunction.py
index 81b61381ed67d927cac44f4c9733d8a040903ef5..fde9df58663993ae170c4c1e3fea55637feb4282 100644
--- a/tests/test_flatland_malfunction.py
+++ b/tests/test_flatland_malfunction.py
@@ -1,10 +1,16 @@
+import random
+
 import numpy as np
 
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.agent_utils import EnvAgent
 from flatland.envs.observations import TreeObsForRailEnv
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_env import RailEnv, RailEnvActions
+from flatland.envs.rail_generators import complex_rail_generator, sparse_rail_generator
+from flatland.envs.schedule_generators import complex_schedule_generator, sparse_schedule_generator
+from flatland.utils.rendertools import RenderTool
+from test_utils import Replay
 
 
 class SingleAgentNavigationObs(TreeObsForRailEnv):
@@ -42,7 +48,7 @@ class SingleAgentNavigationObs(TreeObsForRailEnv):
             for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                 if possible_transitions[direction]:
                     new_position = get_new_position(agent.position, direction)
-                    min_distances.append(self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
+                    min_distances.append(self.distance_map[handle, new_position[0], new_position[1], direction])
                 else:
                     min_distances.append(np.inf)
 
@@ -121,6 +127,7 @@ def test_malfunction_process_statistically():
                        'min_duration': 3,
                        'max_duration': 3}
     np.random.seed(5)
+    random.seed(0)
 
     env = RailEnv(width=20,
                   height=20,
@@ -144,5 +151,328 @@ def test_malfunction_process_statistically():
         env.step(action_dict)
 
     # check that generation of malfunctions works as expected
-    # results are different in py36 and py37, therefore no exact test on nb_malfunction
-    assert nb_malfunction > 150
+    assert nb_malfunction == 156, "nb_malfunction={}".format(nb_malfunction)
+
+
+def test_initial_malfunction(rendering=True):
+    random.seed(0)
+    np.random.seed(0)
+
+    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
+                       'malfunction_rate': 70,  # Rate of malfunction occurence
+                       'min_duration': 2,  # Minimal duration of malfunction
+                       'max_duration': 5  # Max duration of malfunction
+                       }
+
+    speed_ration_map = {1.: 1.,  # Fast passenger train
+                        1. / 2.: 0.,  # Fast freight train
+                        1. / 3.: 0.,  # Slow commuter train
+                        1. / 4.: 0.}  # Slow freight train
+
+    env = RailEnv(width=25,
+                  height=30,
+                  rail_generator=sparse_rail_generator(num_cities=5,
+                                                       # Number of cities in map (where train stations are)
+                                                       num_intersections=4,
+                                                       # Number of intersections (no start / target)
+                                                       num_trainstations=25,  # Number of possible start/targets on map
+                                                       min_node_dist=6,  # Minimal distance of nodes
+                                                       node_radius=3,  # Proximity of stations to city center
+                                                       num_neighb=3,
+                                                       # Number of connections to other cities/intersections
+                                                       seed=215545,  # Random seed
+                                                       grid_mode=True,
+                                                       enhance_intersection=False
+                                                       ),
+                  schedule_generator=sparse_schedule_generator(speed_ration_map),
+                  number_of_agents=1,
+                  stochastic_data=stochastic_data,  # Malfunction data generator
+                  )
+
+    if rendering:
+        renderer = RenderTool(env)
+        renderer.render_env(show=True, frames=False, show_observations=False)
+    _action = dict()
+
+    replay_steps = [
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=3
+        ),
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=2
+        ),
+        # malfunction stops in the next step and we're still at the beginning of the cell
+        # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=1
+        ),
+        Replay(
+            position=(28, 4),
+            direction=Grid4TransitionsEnum.WEST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        ),
+        Replay(
+            position=(27, 4),
+            direction=Grid4TransitionsEnum.NORTH,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        )
+    ]
+
+    info_dict = {
+        'action_required': [True]
+    }
+
+    for i, replay in enumerate(replay_steps):
+
+        def _assert(actual, expected, msg):
+            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
+
+        agent: EnvAgent = env.agents[0]
+
+        _assert(agent.position, replay.position, 'position')
+        _assert(agent.direction, replay.direction, 'direction')
+        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
+
+        if replay.action is not None:
+            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
+            _, _, _, info_dict = env.step({0: replay.action})
+
+        else:
+            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
+            _, _, _, info_dict = env.step({})
+
+        if rendering:
+            renderer.render_env(show=True, show_observations=True)
+
+
+def test_initial_malfunction_stop_moving(rendering=True):
+    random.seed(0)
+    np.random.seed(0)
+
+    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
+                       'malfunction_rate': 70,  # Rate of malfunction occurence
+                       'min_duration': 2,  # Minimal duration of malfunction
+                       'max_duration': 5  # Max duration of malfunction
+                       }
+
+    speed_ration_map = {1.: 1.,  # Fast passenger train
+                        1. / 2.: 0.,  # Fast freight train
+                        1. / 3.: 0.,  # Slow commuter train
+                        1. / 4.: 0.}  # Slow freight train
+
+    env = RailEnv(width=25,
+                  height=30,
+                  rail_generator=sparse_rail_generator(num_cities=5,
+                                                       # Number of cities in map (where train stations are)
+                                                       num_intersections=4,
+                                                       # Number of intersections (no start / target)
+                                                       num_trainstations=25,  # Number of possible start/targets on map
+                                                       min_node_dist=6,  # Minimal distance of nodes
+                                                       node_radius=3,  # Proximity of stations to city center
+                                                       num_neighb=3,
+                                                       # Number of connections to other cities/intersections
+                                                       seed=215545,  # Random seed
+                                                       grid_mode=True,
+                                                       enhance_intersection=False
+                                                       ),
+                  schedule_generator=sparse_schedule_generator(speed_ration_map),
+                  number_of_agents=1,
+                  stochastic_data=stochastic_data,  # Malfunction data generator
+                  )
+
+    if rendering:
+        renderer = RenderTool(env)
+        renderer.render_env(show=True, frames=False, show_observations=False)
+    _action = dict()
+
+    replay_steps = [
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=3
+        ),
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=2
+        ),
+        # malfunction stops in the next step and we're still at the beginning of the cell
+        # --> if we take action DO_NOTHING, agent should restart without moving
+        #
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.STOP_MOVING,
+            malfunction=1
+        ),
+        # we have stopped and do nothing --> should stand still
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=0
+        ),
+        # we start to move forward --> should go to next cell now
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        ),
+        Replay(
+            position=(28, 4),
+            direction=Grid4TransitionsEnum.WEST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        )
+    ]
+
+    info_dict = {
+        'action_required': [True]
+    }
+
+    for i, replay in enumerate(replay_steps):
+
+        def _assert(actual, expected, msg):
+            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
+
+        agent: EnvAgent = env.agents[0]
+
+        _assert(agent.position, replay.position, 'position')
+        _assert(agent.direction, replay.direction, 'direction')
+        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
+
+        if replay.action is not None:
+            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
+            _, _, _, info_dict = env.step({0: replay.action})
+
+        else:
+            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
+            _, _, _, info_dict = env.step({})
+
+        if rendering:
+            renderer.render_env(show=True, show_observations=True)
+
+
+def test_initial_malfunction_do_nothing(rendering=True):
+    random.seed(0)
+    np.random.seed(0)
+
+    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
+                       'malfunction_rate': 70,  # Rate of malfunction occurence
+                       'min_duration': 2,  # Minimal duration of malfunction
+                       'max_duration': 5  # Max duration of malfunction
+                       }
+
+    speed_ration_map = {1.: 1.,  # Fast passenger train
+                        1. / 2.: 0.,  # Fast freight train
+                        1. / 3.: 0.,  # Slow commuter train
+                        1. / 4.: 0.}  # Slow freight train
+
+    env = RailEnv(width=25,
+                  height=30,
+                  rail_generator=sparse_rail_generator(num_cities=5,
+                                                       # Number of cities in map (where train stations are)
+                                                       num_intersections=4,
+                                                       # Number of intersections (no start / target)
+                                                       num_trainstations=25,  # Number of possible start/targets on map
+                                                       min_node_dist=6,  # Minimal distance of nodes
+                                                       node_radius=3,  # Proximity of stations to city center
+                                                       num_neighb=3,
+                                                       # Number of connections to other cities/intersections
+                                                       seed=215545,  # Random seed
+                                                       grid_mode=True,
+                                                       enhance_intersection=False
+                                                       ),
+                  schedule_generator=sparse_schedule_generator(speed_ration_map),
+                  number_of_agents=1,
+                  stochastic_data=stochastic_data,  # Malfunction data generator
+                  )
+
+    if rendering:
+        renderer = RenderTool(env)
+        renderer.render_env(show=True, frames=False, show_observations=False)
+    _action = dict()
+
+    replay_steps = [
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=3
+        ),
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=2
+        ),
+        # malfunction stops in the next step and we're still at the beginning of the cell
+        # --> if we take action DO_NOTHING, agent should restart without moving
+        #
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=1
+        ),
+        # we haven't started moving yet --> stay here
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.DO_NOTHING,
+            malfunction=0
+        ),
+        # we start to move forward --> should go to next cell now
+        Replay(
+            position=(28, 5),
+            direction=Grid4TransitionsEnum.EAST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        ),
+        Replay(
+            position=(28, 4),
+            direction=Grid4TransitionsEnum.WEST,
+            action=RailEnvActions.MOVE_FORWARD,
+            malfunction=0
+        )
+    ]
+
+    info_dict = {
+        'action_required': [True]
+    }
+
+    for i, replay in enumerate(replay_steps):
+
+        def _assert(actual, expected, msg):
+            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
+
+        agent: EnvAgent = env.agents[0]
+
+        _assert(agent.position, replay.position, 'position')
+        _assert(agent.direction, replay.direction, 'direction')
+        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
+
+        if replay.action is not None:
+            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
+            _, _, _, info_dict = env.step({0: replay.action})
+
+        else:
+            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
+            _, _, _, info_dict = env.step({})
+
+        if rendering:
+            renderer.render_env(show=True, show_observations=True)
diff --git a/tests/test_multi_speed.py b/tests/test_multi_speed.py
index 86edc08c07552488e72537ec9b1f3b0b7625efed..1cf0c325ac48e9e3d5ac04fb51b5f8462c867726 100644
--- a/tests/test_multi_speed.py
+++ b/tests/test_multi_speed.py
@@ -1,7 +1,4 @@
-from typing import List
-
 import numpy as np
-from attr import attrib, attrs
 
 from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.envs.agent_utils import EnvAgent, EnvAgentStatic
@@ -12,6 +9,7 @@ from flatland.envs.rail_generators import complex_rail_generator, rail_from_grid
 from flatland.envs.schedule_generators import complex_schedule_generator, random_schedule_generator
 from flatland.utils.rendertools import RenderTool
 from flatland.utils.simple_rail import make_simple_rail
+from test_utils import ReplayConfig, Replay
 
 np.random.seed(1)
 
@@ -97,21 +95,8 @@ def test_multi_speed_init():
                 old_pos[i_agent] = env.agents[i_agent].position
 
 
-@attrs
-class Replay(object):
-    position = attrib()
-    direction = attrib()
-    action = attrib(type=RailEnvActions)
-    malfunction = attrib(default=0, type=int)
-
-
-@attrs
-class TestConfig(object):
-    replay = attrib(type=List[Replay])
-    target = attrib()
-    speed = attrib(type=float)
-
-
+# TODO test penalties!
+# TODO test invalid actions!
 def test_multispeed_actions_no_malfunction_no_blocking(rendering=True):
     """Test that actions are correctly performed on cell exit for a single agent."""
     rail, rail_map = make_simple_rail()
@@ -132,7 +117,7 @@ def test_multispeed_actions_no_malfunction_no_blocking(rendering=True):
     if rendering:
         renderer = RenderTool(env, gl="PILSVG")
 
-    test_config = TestConfig(
+    test_config = ReplayConfig(
         replay=[
             Replay(
                 position=(3, 9),  # east dead-end
@@ -179,6 +164,7 @@ def test_multispeed_actions_no_malfunction_no_blocking(rendering=True):
                 direction=Grid4TransitionsEnum.SOUTH,
                 action=RailEnvActions.STOP_MOVING
             ),
+            #
             Replay(
                 position=(4, 6),
                 direction=Grid4TransitionsEnum.SOUTH,
@@ -205,7 +191,6 @@ def test_multispeed_actions_no_malfunction_no_blocking(rendering=True):
         speed=0.5
     )
 
-    # TODO test penalties!
     agentStatic: EnvAgentStatic = env.agents_static[0]
     info_dict = {
         'action_required': [True]
@@ -230,7 +215,7 @@ def test_multispeed_actions_no_malfunction_no_blocking(rendering=True):
         _assert(agent.position, replay.position, 'position')
         _assert(agent.direction, replay.direction, 'direction')
 
-        if replay.action:
+        if replay.action is not None:
             assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
             _, _, _, info_dict = env.step({0: replay.action})
 
@@ -263,7 +248,7 @@ def test_multispeed_actions_no_malfunction_blocking(rendering=True):
         renderer = RenderTool(env, gl="PILSVG")
 
     test_configs = [
-        TestConfig(
+        ReplayConfig(
             replay=[
                 Replay(
                     position=(3, 8),
@@ -331,7 +316,7 @@ def test_multispeed_actions_no_malfunction_blocking(rendering=True):
             ],
             target=(3, 0),  # west dead-end
             speed=1 / 3),
-        TestConfig(
+        ReplayConfig(
             replay=[
                 Replay(
                     position=(3, 9),  # east dead-end
@@ -438,13 +423,13 @@ def test_multispeed_actions_no_malfunction_blocking(rendering=True):
             _assert(a, agent.position, replay.position, 'position')
             _assert(a, agent.direction, replay.direction, 'direction')
 
-
-
-            if replay.action:
-                assert info_dict['action_required'][a] == True, "[{}] agent {} expecting action_required={}".format(step, a, True)
+            if replay.action is not None:
+                assert info_dict['action_required'][a] == True, "[{}] agent {} expecting action_required={}".format(
+                    step, a, True)
                 action_dict[a] = replay.action
             else:
-                assert info_dict['action_required'][a] == False, "[{}] agent {} expecting action_required={}".format(step, a, False)
+                assert info_dict['action_required'][a] == False, "[{}] agent {} expecting action_required={}".format(
+                    step, a, False)
         _, _, _, info_dict = env.step(action_dict)
 
         if rendering:
@@ -471,7 +456,7 @@ def test_multispeed_actions_malfunction_no_blocking(rendering=True):
     if rendering:
         renderer = RenderTool(env, gl="PILSVG")
 
-    test_config = TestConfig(
+    test_config = ReplayConfig(
         replay=[
             Replay(
                 position=(3, 9),  # east dead-end
@@ -493,7 +478,7 @@ def test_multispeed_actions_malfunction_no_blocking(rendering=True):
                 position=(3, 8),
                 direction=Grid4TransitionsEnum.WEST,
                 action=None,
-                malfunction=2 # recovers in two steps from now!
+                malfunction=2  # recovers in two steps from now!
             ),
             # agent recovers in this step
             Replay(
@@ -515,7 +500,7 @@ def test_multispeed_actions_malfunction_no_blocking(rendering=True):
                 position=(3, 6),
                 direction=Grid4TransitionsEnum.WEST,
                 action=RailEnvActions.MOVE_FORWARD,
-                malfunction=2 # recovers in two steps from now!
+                malfunction=2  # recovers in two steps from now!
             ),
             # agent recovers in this step; since we're at the beginning, we provide a different action although we're broken!
             Replay(
@@ -548,9 +533,20 @@ def test_multispeed_actions_malfunction_no_blocking(rendering=True):
                 direction=Grid4TransitionsEnum.SOUTH,
                 action=None
             ),
+            # DO_NOTHING keeps moving!
             Replay(
                 position=(5, 6),
                 direction=Grid4TransitionsEnum.SOUTH,
+                action=RailEnvActions.DO_NOTHING
+            ),
+            Replay(
+                position=(5, 6),
+                direction=Grid4TransitionsEnum.SOUTH,
+                action=None
+            ),
+            Replay(
+                position=(6, 6),
+                direction=Grid4TransitionsEnum.SOUTH,
                 action=RailEnvActions.MOVE_FORWARD
             ),
 
@@ -584,10 +580,11 @@ def test_multispeed_actions_malfunction_no_blocking(rendering=True):
         _assert(agent.position, replay.position, 'position')
         _assert(agent.direction, replay.direction, 'direction')
 
-        if replay.malfunction:
-            agent.malfunction_data['malfunction'] = 2
+        if replay.malfunction > 0:
+            agent.malfunction_data['malfunction'] = replay.malfunction
+            agent.malfunction_data['moving_before_malfunction'] = agent.moving
 
-        if replay.action:
+        if replay.action is not None:
             assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
             _, _, _, info_dict = env.step({0: replay.action})
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6347bd0f5048350c099ba2568dac7caba74baf2d
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,22 @@
+"""Test Utils."""
+from typing import List, Tuple
+
+from attr import attrs, attrib
+
+from flatland.core.grid.grid4 import Grid4TransitionsEnum
+from flatland.envs.rail_env import RailEnvActions
+
+
+@attrs
+class Replay(object):
+    position = attrib(type=Tuple[int, int])
+    direction = attrib(type=Grid4TransitionsEnum)
+    action = attrib(type=RailEnvActions)
+    malfunction = attrib(default=0, type=int)
+
+
+@attrs
+class ReplayConfig(object):
+    replay = attrib(type=List[Replay])
+    target = attrib(type=Tuple[int, int])
+    speed = attrib(type=float)
diff --git a/tox.ini b/tox.ini
index 0b445e902454376b22ef95cd31d11800f659f3a8..b7a359b43c79cbb5d274755b35fd9f6a7873daff 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,8 +21,6 @@ commands =
     flake8 flatland tests examples benchmarks
 
 [testenv:docs]
-; TODO https://gitlab.aicrowd.com/flatland/flatland/issues/33 docs requires make installed, therefore exclude Windows
-platform = linux|linux2|darwin
 ; use python3.6 because of incompatibility under Windows of the pycairo installed through conda for py37
 basepython = python3.6
 whitelist_externals = make
@@ -46,8 +44,6 @@ commands =
     make docs
 
 [testenv:coverage]
-; TODO https://gitlab.aicrowd.com/flatland/flatland/issues/33 coverage requires make, therefore exclude Windows
-platform = linux|linux2|darwin
 ; use python3.6 because of incompatibility under Windows of the pycairo installed through conda for py37
 basepython = python3.6
 whitelist_externals = make
@@ -68,7 +64,7 @@ deps =
     -r{toxinidir}/requirements_continuous_integration.txt
 changedir = {toxinidir}
 commands =
-    make coverage
+    python make_coverage.py
 
 [testenv:benchmarks]
 ; use python3.6 because of incompatibility under Windows of the pycairo installed through conda for py37