diff --git a/README.rst b/README.rst
index 6811219991cfb849f7d1a2288ab413a4c889c0d7..23d96aa7083814f85f62f9868da284d01912e879 100644
--- a/README.rst
+++ b/README.rst
@@ -16,7 +16,7 @@ Flatland
   :align: center
 
 Flatland is a opensource toolkit for developing and comparing Multi Agent Reinforcement Learning algorithms in little (or ridiculously large !) gridworlds.
-The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the official docs `here <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/>`_.
+The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the `official docs <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/>`_.
 
 This library was developed by `SBB <https://www.sbb.ch/en/>`_ , `AIcrowd <https://www.aicrowd.com/>`_ and numerous contributors and AIcrowd research fellows from the AIcrowd community. 
 
diff --git a/examples/training_example.py b/examples/training_example.py
index dd9ded92510be8ec5fa6c222b7259157db920430..313920939aabb8bc63b2198ff77d27a24d699468 100644
--- a/examples/training_example.py
+++ b/examples/training_example.py
@@ -80,7 +80,6 @@ for trials in range(1, n_trials + 1):
         # Environment step which returns the observations for all agents, their corresponding
         # reward and whether their are done
         next_obs, all_rewards, done, _ = env.step(action_dict)
-
         # Update replay buffer and train agent
         for a in range(env.get_num_agents()):
             agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
diff --git a/flatland/__init__.py b/flatland/__init__.py
index cccf18e6fc8ff614ce5f8b6eec433a883d474d81..feccf8656b98057e0c7486483d2a6c6281077a37 100644
--- a/flatland/__init__.py
+++ b/flatland/__init__.py
@@ -4,4 +4,4 @@
 
 __author__ = """S.P. Mohanty"""
 __email__ = 'mohanty@aicrowd.com'
-__version__ = '0.1.2'
+__version__ = '0.2.0'
diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py
index add983c032f6a91c624faa3bfc2fada5036a139b..2192e61f7b162471d2af54a0a5e100a620ad3517 100644
--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -209,8 +209,7 @@ class TreeObsForRailEnv(ObservationBuilder):
         #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored.
 
         #2: if another agents target is detected the distance in number of cells from the agents current locaiton
-        is stored
-
+            is stored
 
         #3: if another agent is detected the distance in number of cells from current agent position is stored.
 
diff --git a/flatland/envs/predictions.py b/flatland/envs/predictions.py
index 2605e84cc96a19e246f4015c97ee4a31d92f6acb..ca675ce209bc7e4aeb96305eb945751bb903b8ed 100644
--- a/flatland/envs/predictions.py
+++ b/flatland/envs/predictions.py
@@ -140,13 +140,15 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder):
                     new_position = get_new_position(agent.position, new_direction)
                 elif np.sum(cell_transitions) > 1:
                     min_dist = np.inf
+                    no_dist_found = True
                     for direction in range(4):
                         if cell_transitions[direction] == 1:
                             neighbour_cell = get_new_position(agent.position, direction)
                             target_dist = distance_map[agent.handle, neighbour_cell[0], neighbour_cell[1], direction]
-                            if target_dist < min_dist:
+                            if target_dist < min_dist or no_dist_found:
                                 min_dist = target_dist
                                 new_direction = direction
+                                no_dist_found = False
                     new_position = get_new_position(agent.position, new_direction)
                 else:
                     raise Exception("No transition possible {}".format(cell_transitions))
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index 4e8832ecaa665c97f566829d520f07f07f2df381..127f51c2dcc1d6a6dd29f99398e702e09f9a02a5 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -4,6 +4,7 @@ Definition of the RailEnv environment and related level-generation functions.
 Generator functions are functions that take width, height and num_resets as arguments and return
 a GridTransitionMap object.
 """
+# TODO:  _ this is a global method --> utils or remove later
 
 from enum import IntEnum
 
@@ -84,6 +85,7 @@ class RailEnv(Environment):
                                         a GridTransitionMap object
                 rail_from_manual_sp ecifications_generator(rail_spec) : generate a rail from
                                         a rail specifications array
+                TODO: generate_rail_from_saved_list or from list of ndarray bitmaps ---
         width : int
             The width of the rail map. Potentially in the future,
             a range of widths to sample from.
@@ -107,7 +109,7 @@ class RailEnv(Environment):
         self.obs_builder._set_env(self)
 
         self.action_space = [1]
-        self.observation_space = self.obs_builder.observation_space
+        self.observation_space = self.obs_builder.observation_space  # updated on resets?
 
         self.rewards = [0] * number_of_agents
         self.done = False
@@ -163,8 +165,8 @@ class RailEnv(Environment):
 
         self.restart_agents()
 
-        for iAgent in range(self.get_num_agents()):
-            agent = self.agents[iAgent]
+        for i_agent in range(self.get_num_agents()):
+            agent = self.agents[i_agent]
             agent.speed_data['position_fraction'] = 0.0
 
         self.num_resets += 1
@@ -200,7 +202,9 @@ class RailEnv(Environment):
             self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()}
             return self._get_observations(), self.rewards_dict, self.dones, {}
 
-        for i_agent, agent in enumerate(self.agents):
+        # for i in range(len(self.agents_handles)):
+        for i_agent in range(self.get_num_agents()):
+            agent = self.agents[i_agent]
             agent.old_direction = agent.direction
             agent.old_position = agent.position
             if self.dones[i_agent]:  # this agent has already completed...
@@ -227,7 +231,7 @@ class RailEnv(Environment):
                 self.rewards_dict[i_agent] += stop_penalty
 
             if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
-                # Only allow agent to start moving by pressing forward.
+                # Allow agent to start with any forward or direction action
                 agent.moving = True
                 self.rewards_dict[i_agent] += start_penalty
 
@@ -245,10 +249,10 @@ class RailEnv(Environment):
             action_selected = False
             if agent.speed_data['position_fraction'] == 0.:
                 if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
-                    cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \
+                    cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
                         self._check_action_on_agent(action, agent)
 
-                    if all([new_cell_isValid, transition_isValid]):
+                    if all([new_cell_valid, transition_valid]):
                         agent.speed_data['transition_action_on_cellexit'] = action
                         action_selected = True
 
@@ -256,26 +260,26 @@ class RailEnv(Environment):
                         # But, if the chosen invalid action was LEFT/RIGHT, and the agent is moving,
                         # try to keep moving forward!
                         if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT) and agent.moving:
-                            cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \
+                            cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
                                 self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent)
 
-                            if all([new_cell_isValid, transition_isValid]):
+                            if all([new_cell_valid, transition_valid]):
                                 agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.MOVE_FORWARD
                                 action_selected = True
 
                             else:
                                 # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                                 self.rewards_dict[i_agent] += invalid_action_penalty
-                                agent.moving = False
+                                self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
                                 self.rewards_dict[i_agent] += stop_penalty
-
+                                agent.moving = False
                                 continue
                         else:
                             # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
                             self.rewards_dict[i_agent] += invalid_action_penalty
-                            agent.moving = False
+                            self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
                             self.rewards_dict[i_agent] += stop_penalty
-
+                            agent.moving = False
                             continue
 
             if agent.moving and (action_selected or agent.speed_data['position_fraction'] > 0.0):
@@ -287,10 +291,10 @@ class RailEnv(Environment):
 
                 # Now 'transition_action_on_cellexit' will be guaranteed to be valid; it was checked on entering
                 # the cell
-                cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \
+                cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
                     self._check_action_on_agent(agent.speed_data['transition_action_on_cellexit'], agent)
 
-                if all([new_cell_isValid, transition_isValid, cell_isFree]):
+                if all([new_cell_valid, transition_valid, cell_free]):
                     agent.position = new_position
                     agent.direction = new_direction
                     agent.speed_data['position_fraction'] = 0.0
@@ -310,14 +314,14 @@ class RailEnv(Environment):
     def _check_action_on_agent(self, action, agent):
         # compute number of possible transitions in the current
         # cell used to check for invalid actions
-        new_direction, transition_isValid = self.check_action(agent, action)
+        new_direction, transition_valid = self.check_action(agent, action)
         new_position = get_new_position(agent.position, new_direction)
 
         # Is it a legal move?
         # 1) transition allows the new_direction in the cell,
         # 2) the new cell is not empty (case 0),
         # 3) the cell is free, i.e., no agent is currently in that cell
-        new_cell_isValid = (
+        new_cell_valid = (
             np.array_equal(  # Check the new position is still in the grid
                 new_position,
                 np.clip(new_position, [0, 0], [self.height - 1, self.width - 1]))
@@ -325,19 +329,19 @@ class RailEnv(Environment):
             self.rail.get_full_transitions(*new_position) > 0)
 
         # If transition validity hasn't been checked yet.
-        if transition_isValid is None:
-            transition_isValid = self.rail.get_transition(
+        if transition_valid is None:
+            transition_valid = self.rail.get_transition(
                 (*agent.position, agent.direction),
                 new_direction)
 
         # Check the new position is not the same as any of the existing agent positions
         # (including itself, for simplicity, since it is moving)
-        cell_isFree = not np.any(
+        cell_free = not np.any(
             np.equal(new_position, [agent2.position for agent2 in self.agents]).all(1))
-        return cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid
+        return cell_free, new_cell_valid, new_direction, new_position, transition_valid
 
     def check_action(self, agent, action):
-        transition_isValid = None
+        transition_valid = None
         possible_transitions = self.rail.get_transitions(*agent.position, agent.direction)
         num_transitions = np.count_nonzero(possible_transitions)
 
@@ -345,12 +349,12 @@ class RailEnv(Environment):
         if action == RailEnvActions.MOVE_LEFT:
             new_direction = agent.direction - 1
             if num_transitions <= 1:
-                transition_isValid = False
+                transition_valid = False
 
         elif action == RailEnvActions.MOVE_RIGHT:
             new_direction = agent.direction + 1
             if num_transitions <= 1:
-                transition_isValid = False
+                transition_valid = False
 
         new_direction %= 4
 
@@ -360,8 +364,8 @@ class RailEnv(Environment):
                 # new_direction will be the only valid transition
                 # - take only available transition
                 new_direction = np.argmax(possible_transitions)
-                transition_isValid = True
-        return new_direction, transition_isValid
+                transition_valid = True
+        return new_direction, transition_valid
 
     def _get_observations(self):
         self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents())))
diff --git a/setup.cfg b/setup.cfg
index dd16bd77df8727f52ed2f3102b1042799271a611..d83f09f0c50a153fce8c40c22cf7d496eb02304b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.2
+current_version = 0.2.0
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index 3cc28b34f7acf0d05958606c79beef439632035f..900dada07e587333db5459bc4a482be080f241b9 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@ setup(
     test_suite='tests',
     tests_require=test_requirements,
     url='https://gitlab.aicrowd.com/flatland/flatland',
-    version='0.1.2',
+    version='0.2.0',
     zip_safe=False,
 )
diff --git a/tests/test_distance_map.py b/tests/test_distance_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..79f4bab164312f757ad584bd3708a7af3fb7a97e
--- /dev/null
+++ b/tests/test_distance_map.py
@@ -0,0 +1,56 @@
+import numpy as np
+
+from flatland.core.grid.grid4 import Grid4Transitions
+from flatland.core.transition_map import GridTransitionMap
+from flatland.envs.generators import rail_from_GridTransitionMap_generator
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+
+
+def test_walker():
+    # _ _ _
+
+    cells = [int('0000000000000000', 2),  # empty cell - Case 0
+             int('1000000000100000', 2),  # Case 1 - straight
+             int('1001001000100000', 2),  # Case 2 - simple switch
+             int('1000010000100001', 2),  # Case 3 - diamond drossing
+             int('1001011000100001', 2),  # Case 4 - single slip switch
+             int('1100110000110011', 2),  # Case 5 - double slip switch
+             int('0101001000000010', 2),  # Case 6 - symmetrical switch
+             int('0010000000000000', 2)]  # Case 7 - dead end
+    transitions = Grid4Transitions([])
+    dead_end_from_south = cells[7]
+    dead_end_from_west = transitions.rotate_transition(dead_end_from_south, 90)
+    dead_end_from_east = transitions.rotate_transition(dead_end_from_south, 270)
+    vertical_straight = cells[1]
+    horizontal_straight = transitions.rotate_transition(vertical_straight, 90)
+
+    rail_map = np.array(
+        [[dead_end_from_east] + [horizontal_straight] + [dead_end_from_west]], dtype=np.uint16)
+    rail = GridTransitionMap(width=rail_map.shape[1],
+                             height=rail_map.shape[0], transitions=transitions)
+    rail.grid = rail_map
+    env = RailEnv(width=rail_map.shape[1],
+                  height=rail_map.shape[0],
+                  rail_generator=rail_from_GridTransitionMap_generator(rail),
+                  number_of_agents=1,
+                  obs_builder_object=TreeObsForRailEnv(max_depth=2,
+                                                       predictor=ShortestPathPredictorForRailEnv(max_depth=10)),
+                  )
+    # reset to initialize agents_static
+    env.reset()
+
+    # set initial position and direction for testing...
+    env.agents_static[0].position = (0, 1)
+    env.agents_static[0].direction = 1
+    env.agents_static[0].target = (0, 0)
+
+    # reset to set agents from agents_static
+    env.reset(False, False)
+    obs_builder: TreeObsForRailEnv = env.obs_builder
+
+    print(obs_builder.distance_map[(0, *[0, 1], 1)])
+    assert obs_builder.distance_map[(0, *[0, 1], 1)] == 3
+    print(obs_builder.distance_map[(0, *[0, 2], 3)])
+    assert obs_builder.distance_map[(0, *[0, 2], 1)] == 2  # does not work yet, Erik's proposal.