diff --git a/examples/debugging_example_DELETE.py b/examples/debugging_example_DELETE.py
new file mode 100644
index 0000000000000000000000000000000000000000..8df84833de1c1a0dbb3974cab6bf9b722049b994
--- /dev/null
+++ b/examples/debugging_example_DELETE.py
@@ -0,0 +1,87 @@
+import random
+import time
+
+import numpy as np
+
+from flatland.core.env_observation_builder import ObservationBuilder
+from flatland.core.grid.grid_utils import coordinate_to_position
+from flatland.envs.generators import random_rail_generator, complex_rail_generator
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+random.seed(1)
+np.random.seed(1)
+
+class SingleAgentNavigationObs(TreeObsForRailEnv):
+    """
+    We derive our bbservation builder from TreeObsForRailEnv, to exploit the existing implementation to compute
+    the minimum distances from each grid node to each agent's target.
+
+    We then build a representation vector with 3 binary components, indicating which of the 3 available directions
+    for each agent (Left, Forward, Right) lead to the shortest path to its target.
+    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
+    will be [1, 0, 0].
+    """
+    def __init__(self):
+        super().__init__(max_depth=0)
+        self.observation_space = [3]
+
+    def reset(self):
+        # Recompute the distance map, if the environment has changed.
+        super().reset()
+
+    def get(self, handle):
+        agent = self.env.agents[handle]
+
+        possible_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
+        num_transitions = np.count_nonzero(possible_transitions)
+
+        # Start from the current orientation, and see which transitions are available;
+        # organize them as [left, forward, right], relative to the current orientation
+        # If only one transition is possible, the forward branch is aligned with it.
+        if num_transitions == 1:
+            observation = [0, 1, 0]
+        else:
+            min_distances = []
+            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
+                if possible_transitions[direction]:
+                    new_position = self._new_position(agent.position, direction)
+                    min_distances.append(self.distance_map[handle, new_position[0], new_position[1], direction])
+                else:
+                    min_distances.append(np.inf)
+
+            observation = [0, 0, 0]
+            observation[np.argmin(min_distances)] = 1
+
+        return observation
+
+
+env = RailEnv(width=14,
+              height=14,
+              rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999, seed=0),
+              number_of_agents=2,
+              obs_builder_object=SingleAgentNavigationObs())
+
+obs = env.reset()
+env_renderer = RenderTool(env, gl="PILSVG")
+env_renderer.render_env(show=True, frames=True, show_observations=False)
+for step in range(100):
+    actions = {}
+    for i in range(len(obs)):
+        actions[i] = np.argmax(obs[i])+1
+
+    if step%5 == 0:
+        print("Agent halts")
+        actions[0] = 4 # Halt
+
+    obs, all_rewards, done, _ = env.step(actions)
+    print("Agent 0 broken-ness: ", env.agents[0].broken_data['broken'])
+
+    env_renderer.render_env(show=True, frames=True, show_observations=False)
+    time.sleep(0.5)
+    if done["__all__"]:
+        break
+env_renderer.close_window()
+
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index ca003a80460a2ccdd4bfb01402a9cddc06261e1e..79c86955819e6d353b4e31369d8304f4155b4357 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -75,6 +75,20 @@ class RailEnv(Environment):
     - stop_penalty = 0  # penalty for stopping a moving agent
     - start_penalty = 0  # penalty for starting a stopped agent
 
+    Stochastic breaking of trains:
+    Trains in RailEnv can break down if they are halted too often (either by their own choice or because an invalid
+    action or cell is selected.
+
+    Every time an agent stops, an agent has a certain probability of breaking. The probability is the product of 2
+    distributions: the first distribution selects the average number of trains that will break during an episode
+    (e.g., max(1, 10% of the trains) ). The second distribution is a Poisson distribution with mean set to the average
+    number of stops at which a train breaks.
+    If a random number in [0,1] is lower than the product of the 2 distributions, the train breaks.
+    A broken train samples a random number of steps it will stay broken for, during which all its actions are ignored. 
+
+    TODO: currently, the parameters that control the stochasticity of the environment are hard-coded in init().
+    For Round 2, they will be passed to the constructor as arguments, to allow for more flexibility.
+
     """
 
     def __init__(self,
@@ -151,6 +165,15 @@ class RailEnv(Environment):
 
         self.valid_positions = None
 
+        # Stochastic train breaking parameters
+        self.min_average_broken_trains = 1
+        self.average_proportion_of_broken_trains = 0.1 # ~10% of the trains can be expected to break down in an episode
+        self.mean_number_halts_to_break = 3
+
+        # Uniform distribution
+        self.min_number_of_steps_broken = 4
+        self.max_number_of_steps_broken = 8
+
     # no more agent_handles
     def get_agent_handles(self):
         return range(self.get_num_agents())
@@ -212,6 +235,26 @@ class RailEnv(Environment):
         # Return the new observation vectors for each agent
         return self._get_observations()
 
+    def _agent_stopped(self, i_agent):
+        self.agents[i_agent].broken_data['number_of_halts'] += 1
+
+        def poisson_pdf(x, mean):
+            return np.power(mean, x) * np.exp(-mean) / np.prod(range(2, x))
+
+        p1_prob_train_i_breaks = max(self.min_average_broken_trains / len(self.agents),
+                                     self.average_proportion_of_broken_trains)
+        p2_prob_train_breaks_at_halt_j = poisson_pdf(self.agents[i_agent].broken_data['number_of_halts'],
+                                                     self.mean_number_halts_to_break)
+
+        s1 = np.random.random()
+        s2 = np.random.random()
+
+        if s1 * s2 <= p1_prob_train_i_breaks * p2_prob_train_breaks_at_halt_j:
+            # +1 because the counter is decreased at the beginning of step()
+            num_broken_steps = np.random.randint(self.min_number_of_steps_broken, self.max_number_of_steps_broken+1) + 1
+            self.agents[i_agent].broken_data['broken'] = num_broken_steps
+            self.agents[i_agent].broken_data['number_of_halts'] = 0
+
     def step(self, action_dict_):
         self._elapsed_steps += 1
 
@@ -240,10 +283,19 @@ class RailEnv(Environment):
             agent = self.agents[i_agent]
             agent.old_direction = agent.direction
             agent.old_position = agent.position
+
+            if agent.broken_data['broken'] > 0:
+                agent.broken_data['broken'] -= 1
+
             if self.dones[i_agent]:  # this agent has already completed...
                 continue
 
-            if i_agent not in action_dict:  # no action has been supplied for this agent
+            # No action has been supplied for this agent
+            if i_agent not in action_dict:
+                action_dict[i_agent] = RailEnvActions.DO_NOTHING
+
+            # The train is broken
+            if agent.broken_data['broken'] > 0:
                 action_dict[i_agent] = RailEnvActions.DO_NOTHING
 
             if action_dict[i_agent] < 0 or action_dict[i_agent] > len(RailEnvActions):
@@ -262,6 +314,7 @@ class RailEnv(Environment):
                 # Only allow halting an agent on entering new cells.
                 agent.moving = False
                 self.rewards_dict[i_agent] += stop_penalty
+                self._agent_stopped(i_agent)
 
             if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING):
                 # Allow agent to start with any forward or direction action
@@ -305,6 +358,8 @@ class RailEnv(Environment):
                                 self.rewards_dict[i_agent] += invalid_action_penalty
                                 self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
                                 self.rewards_dict[i_agent] += stop_penalty
+                                if agent.moving:
+                                    self._agent_stopped(i_agent)
                                 agent.moving = False
                                 continue
                         else:
@@ -312,6 +367,8 @@ class RailEnv(Environment):
                             self.rewards_dict[i_agent] += invalid_action_penalty
                             self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
                             self.rewards_dict[i_agent] += stop_penalty
+                            if agent.moving:
+                                self._agent_stopped(i_agent)
                             agent.moving = False
                             continue
 
@@ -331,12 +388,15 @@ class RailEnv(Environment):
                     agent.direction = new_direction
                     agent.speed_data['position_fraction'] = 0.0
                 else:
-                    # If the agent cannot move due to any reason, we set its state to not moving.
+                    # If the agent cannot move due to any reason, we set its state to not moving
+                    if agent.moving:
+                        self._agent_stopped(i_agent)
                     agent.moving = False
 
             if np.equal(agent.position, agent.target).all():
                 self.dones[i_agent] = True
                 agent.moving = False
+                # Do not call self._agent_stopped, as the agent has terminated its task
             else:
                 self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']