Compare revisions

a6c4ae6a · a6c4ae6a · a6c4ae6a · a6c4ae6a · a6c4ae6a · a6c4ae6a
--- a/docs/interface/wrappers.rst
+++ b/docs/interface/wrappers.rst
+
+Environment Wrappers
+====================
+
+..
+
+   We provide various environment wrappers to work with both the rail env and the petting zoo interface.
+
+
+Background
+----------
+
+These wrappers changes certain environment behavior which can help to get better reinforcement learning training.
+
+Supported Inbuilt Wrappers
+--------------------------
+
+We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper.
+
+ShortestPathAction Wrapper
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To use the ShortestPathAction Wrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = ShortestPathActionWrapper(rail_env)
+
+The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (\ ``0``\ ), Next Shortest Path (\ ``1``\ ) and Stop (\ ``2``\ ).  Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass ``0`` as the action.
+
+SkipNoChoice Wrapper
+^^^^^^^^^^^^^^^^^^^^
+
+To use the SkipNoChoiceWrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0)
--- a/env_data/tests/Level_distance_map_shortest_path.pkl
+++ b/env_data/tests/Level_distance_map_shortest_path.pkl
--- a/env_data/tests/Test_2_Level_0.pkl
+++ b/env_data/tests/Test_2_Level_0.pkl
--- a/env_data/tests/Test_9_Level_1.pkl
+++ b/env_data/tests/Test_9_Level_1.pkl
--- a/env_data/tests/service_test/Test_0/Level_0.pkl
+++ b/env_data/tests/service_test/Test_0/Level_0.pkl
--- a/env_data/tests/service_test/Test_0/Level_1.pkl
+++ b/env_data/tests/service_test/Test_0/Level_1.pkl
--- a/env_data/tests/service_test/metadata.csv
+++ b/env_data/tests/service_test/metadata.csv
+test_id,env_id,n_agents,x_dim,y_dim,n_cities,max_rail_pairs_in_city,n_envs_run,seed,grid_mode,max_rails_between_cities,malfunction_duration_min,malfunction_duration_max,malfunction_interval,speed_ratios
+Test_0,Level_0,7,30,30,2,2,10,335971,False,2,20,50,540,"{1.0: 0.25, 0.5: 0.25, 0.33: 0.25, 0.25: 0.25}"
+Test_0,Level_1,7,30,30,2,2,10,335972,False,2,20,50,540,"{1.0: 0.25, 0.5: 0.25, 0.33: 0.25, 0.25: 0.25}"
--- a/env_data/tests/test-10x10.mpk
+++ b/env_data/tests/test-10x10.mpk
--- a/env_data/tests/test_002.pkl
+++ b/env_data/tests/test_002.pkl
--- a/env_data/tests/test_env_loop.pkl
+++ b/env_data/tests/test_env_loop.pkl
--- a/examples/complex_rail_benchmark.py
+++ b/examples/complex_rail_benchmark.py
-"""Run benchmarks on complex rail flatland."""
-import random
-
-import numpy as np
-
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
-
-
-def run_benchmark():
-    """Run benchmark on a small number of agents in complex rail environment."""
-    random.seed(1)
-    np.random.seed(1)
-
-    # Example generate a random rail
-    env = RailEnv(width=15, height=15,
-                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
-                  schedule_generator=complex_schedule_generator(),
-                  number_of_agents=5)
-    env.reset()
-
-    n_trials = 20
-    action_dict = dict()
-    action_prob = [0] * 4
-
-    for trials in range(1, n_trials + 1):
-
-        # Reset environment
-        obs, info = env.reset()
-
-        # Run episode
-        for step in range(100):
-            # Action
-            for a in range(env.get_num_agents()):
-                action = np.random.randint(0, 4)
-                action_prob[action] += 1
-                action_dict.update({a: action})
-
-            # Environment step
-            next_obs, all_rewards, done, _ = env.step(action_dict)
-
-            if done['__all__']:
-                break
-        if trials % 100 == 0:
-            action_prob = [1] * 4
-
-
-if __name__ == "__main__":
-    run_benchmark()
--- a/examples/custom_observation_example_01_SimpleObs.py
+++ b/examples/custom_observation_example_01_SimpleObs.py
@@ -3,8 +3,9 @@ import random
 import numpy as np

 from flatland.core.env_observation_builder import ObservationBuilder
+from flatland.envs.line_generators import sparse_line_generator
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import random_rail_generator
+from flatland.envs.rail_generators import sparse_rail_generator

 random.seed(100)
 np.random.seed(100)
@@ -27,12 +28,31 @@ class SimpleObs(ObservationBuilder):
        return observation


+def create_env():
+    nAgents = 3
+    n_cities = 2
+    max_rails_between_cities = 2
+    max_rails_in_city = 4
+    seed = 0
+    env = RailEnv(
+        width=20,
+        height=30,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=SimpleObs()
+    )
+    return env
+
+
 def main():
-    env = RailEnv(width=7,
-                  height=7,
-                  rail_generator=random_rail_generator(),
-                  number_of_agents=3,
-                  obs_builder_object=SimpleObs())
+    env = create_env()
    env.reset()

    # Print the observation vector for each agents

--- a/examples/custom_observation_example_02_SingleAgentNavigationObs.py
+++ b/examples/custom_observation_example_02_SingleAgentNavigationObs.py
@@ -8,9 +8,9 @@ import numpy as np

 from flatland.core.env_observation_builder import ObservationBuilder
 from flatland.core.grid.grid4_utils import get_new_position
+from flatland.envs.line_generators import sparse_line_generator
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_generators import sparse_rail_generator
 from flatland.utils.misc import str2bool
 from flatland.utils.rendertools import RenderTool

@@ -63,40 +63,71 @@ class SingleAgentNavigationObs(ObservationBuilder):
        return observation


+def create_env():
+    nAgents = 1
+    n_cities = 2
+    max_rails_between_cities = 2
+    max_rails_in_city = 4
+    seed = 0
+    env = RailEnv(
+        width=30,
+        height=40,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=SingleAgentNavigationObs()
+    )
+    return env
+
+
+def custom_observation_example_02_SingleAgentNavigationObs(sleep_for_animation, do_rendering):
+    env = create_env()
+    obs, info = env.reset()
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env)
+        env_renderer.render_env(show=True, frames=True, show_observations=False)
+
+    for step in range(100):
+        action = np.argmax(obs[0]) + 1
+        obs, all_rewards, done, _ = env.step({0: action})
+        print("Rewards: ", all_rewards, "  [done=", done, "]")
+
+        if env_renderer is not None:
+            env_renderer.render_env(show=True, frames=True, show_observations=True)
+        if sleep_for_animation:
+            time.sleep(0.1)
+        if done["__all__"]:
+            break
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+
 def main(args):
    try:
-        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
+    do_rendering = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
        else:
            assert False, "unhandled option"

-    env = RailEnv(width=7,
-                  height=7,
-                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
-                                                        seed=1),
-                  schedule_generator=complex_schedule_generator(),
-                  number_of_agents=1,
-                  obs_builder_object=SingleAgentNavigationObs())
-
-    obs, info = env.reset()
-    env_renderer = RenderTool(env, gl="PILSVG")
-    env_renderer.render_env(show=True, frames=True, show_observations=True)
-    for step in range(100):
-        action = np.argmax(obs[0]) + 1
-        obs, all_rewards, done, _ = env.step({0: action})
-        print("Rewards: ", all_rewards, "  [done=", done, "]")
-        env_renderer.render_env(show=True, frames=True, show_observations=True)
-        if sleep_for_animation:
-            time.sleep(0.1)
-        if done["__all__"]:
-            break
-    env_renderer.close_window()
+    # execute example
+    custom_observation_example_02_SingleAgentNavigationObs(sleep_for_animation, do_rendering)


 if __name__ == '__main__':

--- a/examples/custom_observation_example_03_ObservePredictions.py
+++ b/examples/custom_observation_example_03_ObservePredictions.py
@@ -9,10 +9,10 @@ import numpy as np
 from flatland.core.env import Environment
 from flatland.core.env_observation_builder import ObservationBuilder
 from flatland.core.grid.grid_utils import coordinate_to_position
+from flatland.envs.line_generators import sparse_line_generator
 from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_generators import sparse_rail_generator
 from flatland.utils.misc import str2bool
 from flatland.utils.ordered_set import OrderedSet
 from flatland.utils.rendertools import RenderTool
@@ -102,19 +102,30 @@ class ObservePredictions(ObservationBuilder):
            self.predictor.set_env(self.env)


-def main(args):
-    try:
-        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
-    except getopt.GetoptError as err:
-        print(str(err))  # will print something like "option -a not recognized"
-        sys.exit(2)
-    sleep_for_animation = True
-    for o, a in opts:
-        if o in ("--sleep-for-animation"):
-            sleep_for_animation = str2bool(a)
-        else:
-            assert False, "unhandled option"
-
+def create_env(custom_obs_builder):
+    nAgents = 3
+    n_cities = 2
+    max_rails_between_cities = 4
+    max_rails_in_city = 2
+    seed = 0
+    env = RailEnv(
+        width=30,
+        height=30,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=custom_obs_builder
+    )
+    return env
+
+
+def custom_observation_example_03_ObservePredictions(sleep_for_animation, do_rendering):
    # Initiate the Predictor
    custom_predictor = ShortestPathPredictorForRailEnv(10)

@@ -122,19 +133,14 @@ def main(args):
    custom_obs_builder = ObservePredictions(custom_predictor)

    # Initiate Environment
-    env = RailEnv(width=10,
-                  height=10,
-                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=8, max_dist=99999,
-                                                        seed=1),
-                  schedule_generator=complex_schedule_generator(),
-                  number_of_agents=3,
-                  obs_builder_object=custom_obs_builder)
-
+    env = create_env(custom_obs_builder)
    obs, info = env.reset()
-    env_renderer = RenderTool(env, gl="PILSVG")

-    # We render the initial step and show the obsered cells as colored boxes
-    env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False)
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env)
+        # We render the initial step and show the obsered cells as colored boxes
+        env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False)

    action_dict = {}
    for step in range(100):
@@ -143,10 +149,37 @@ def main(args):
            action_dict[a] = action
        obs, all_rewards, done, _ = env.step(action_dict)
        print("Rewards: ", all_rewards, "  [done=", done, "]")
-        env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False)
+        if env_renderer is not None:
+            env_renderer.render_env(show=True, frames=True, show_observations=True, show_predictions=False)
        if sleep_for_animation:
            time.sleep(0.5)

+        if done["__all__"]:
+            print("All done!")
+            break
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+
+def main(args):
+    try:
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
+    except getopt.GetoptError as err:
+        print(str(err))  # will print something like "option -a not recognized"
+        sys.exit(2)
+    sleep_for_animation = True
+    do_rendering = True
+    for o, a in opts:
+        if o in ("--sleep-for-animation"):
+            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
+        else:
+            assert False, "unhandled option"
+
+    # execute example
+    custom_observation_example_03_ObservePredictions(sleep_for_animation, do_rendering)

 if __name__ == '__main__':
    if 'argv' in globals():

--- a/examples/custom_railmap_example.py
+++ b/examples/custom_railmap_example.py
+import getopt
 import random
-from typing import Any
+import sys
+import time
+from typing import Tuple

 import numpy as np

+from flatland.core.env_observation_builder import DummyObservationBuilder
 from flatland.core.grid.rail_env_grid import RailEnvTransitions
 from flatland.core.transition_map import GridTransitionMap
+from flatland.envs.line_generators import sparse_line_generator
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import RailGenerator, RailGeneratorProduct
-from flatland.envs.schedule_generators import ScheduleGenerator
-from flatland.envs.schedule_utils import Schedule
+from flatland.envs.rail_generators import rail_from_grid_transition_map
+from flatland.utils.misc import str2bool
 from flatland.utils.rendertools import RenderTool

-random.seed(100)
-np.random.seed(100)

+def custom_rail_map() -> Tuple[GridTransitionMap, np.array]:
+    # We instantiate a very simple rail network on a 7x10 grid:
+    #  0 1 2 3 4 5 6 7 8 9  10
+    # 0        /-------------\
+    # 1        |             |
+    # 2        |             |
+    # 3 _ _ _ /_  _ _        |
+    # 4              \   ___ /
+    # 5               |/
+    # 6               |
+    # 7               |
+    transitions = RailEnvTransitions()
+    cells = transitions.transition_list

-def custom_rail_generator() -> RailGenerator:
-    def generator(width: int, height: int, num_agents: int = 0, num_resets: int = 0) -> RailGeneratorProduct:
-        rail_trans = RailEnvTransitions()
-        grid_map = GridTransitionMap(width=width, height=height, transitions=rail_trans)
-        rail_array = grid_map.grid
-        rail_array.fill(0)
-        new_tran = rail_trans.set_transition(1, 1, 1, 1)
-        print(new_tran)
-        rail_array[0, 0] = new_tran
-        rail_array[0, 1] = new_tran
-        return grid_map, None
+    empty = cells[0]
+    dead_end_from_south = cells[7]
+    right_turn_from_south = cells[8]
+    right_turn_from_west = transitions.rotate_transition(right_turn_from_south, 90)
+    right_turn_from_north = transitions.rotate_transition(right_turn_from_south, 180)
+    dead_end_from_west = transitions.rotate_transition(dead_end_from_south, 90)
+    dead_end_from_north = transitions.rotate_transition(dead_end_from_south, 180)
+    dead_end_from_east = transitions.rotate_transition(dead_end_from_south, 270)
+    vertical_straight = cells[1]
+    simple_switch_north_left = cells[2]
+    simple_switch_north_right = cells[10]
+    simple_switch_left_east = transitions.rotate_transition(simple_switch_north_left, 90)
+    horizontal_straight = transitions.rotate_transition(vertical_straight, 90)
+    double_switch_south_horizontal_straight = horizontal_straight + cells[6]
+    double_switch_north_horizontal_straight = transitions.rotate_transition(
+        double_switch_south_horizontal_straight, 180)
+    rail_map = np.array(
+        [[empty] * 3 + [right_turn_from_south] + [horizontal_straight] * 5 + [right_turn_from_west]] +
+        [[empty] * 3 + [vertical_straight] + [empty] * 5 + [vertical_straight]] * 2 +
+        [[dead_end_from_east] + [horizontal_straight] * 2 + [simple_switch_left_east] + [horizontal_straight] * 2 + [
+            right_turn_from_west] + [empty] * 2 + [vertical_straight]] +
+        [[empty] * 6 + [simple_switch_north_right] + [horizontal_straight] * 2 + [right_turn_from_north]] +
+        [[empty] * 6 + [vertical_straight] + [empty] * 3] +
+        [[empty] * 6 + [dead_end_from_north] + [empty] * 3], dtype=np.uint16)
+    rail = GridTransitionMap(width=rail_map.shape[1],
+                             height=rail_map.shape[0], transitions=transitions)
+    rail.grid = rail_map
+    city_positions = [(0, 3), (6, 6)]
+    train_stations = [
+        [((0, 3), 0)],
+        [((6, 6), 0)],
+    ]
+    city_orientations = [0, 2]
+    agents_hints = {'city_positions': city_positions,
+                    'train_stations': train_stations,
+                    'city_orientations': city_orientations
+                    }
+    optionals = {'agents_hints': agents_hints}
+    return rail, rail_map, optionals

-    return generator

+def create_env():
+    rail, rail_map, optionals = custom_rail_map()
+    env = RailEnv(width=rail_map.shape[1],
+                  height=rail_map.shape[0],
+                  rail_generator=rail_from_grid_transition_map(rail, optionals),
+                  line_generator=sparse_line_generator(),
+                  number_of_agents=2,
+                  obs_builder_object=DummyObservationBuilder(),
+                  )
+    return env

-def custom_schedule_generator() -> ScheduleGenerator:
-    def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None,
-                  num_resets: int = 0) -> Schedule:
-        agents_positions = []
-        agents_direction = []
-        agents_target = []
-        speeds = []
-        return Schedule(agent_positions=agents_positions, agent_directions=agents_direction,
-                        agent_targets=agents_target, agent_speeds=speeds, agent_malfunction_rates=None)

-    return generator
+def custom_railmap_example(sleep_for_animation, do_rendering):
+    random.seed(100)
+    np.random.seed(100)

+    env = create_env()
+    env.reset()

-env = RailEnv(width=6,
-              height=4,
-              rail_generator=custom_rail_generator(),
-              schedule_generator=custom_schedule_generator(),
-              number_of_agents=1)
+    if do_rendering:
+        env_renderer = RenderTool(env)
+        env_renderer.render_env(show=True, show_observations=False)
+        env_renderer.close_window()

-env.reset()
+    if sleep_for_animation:
+        time.sleep(1)

-env_renderer = RenderTool(env)
-env_renderer.render_env(show=True)
+    # uncomment to keep the renderer open
+    # input("Press Enter to continue...")

-# uncomment to keep the renderer open
-# input("Press Enter to continue...")
+
+def main(args):
+    try:
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
+    except getopt.GetoptError as err:
+        print(str(err))  # will print something like "option -a not recognized"
+        sys.exit(2)
+    sleep_for_animation = True
+    do_rendering = True
+    for o, a in opts:
+        if o in ("--sleep-for-animation"):
+            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
+        else:
+            assert False, "unhandled option"
+
+    # execute example
+    custom_railmap_example(sleep_for_animation, do_rendering)
+
+
+if __name__ == '__main__':
+    if 'argv' in globals():
+        main(argv)
+    else:
+        main(sys.argv[1:])
--- a/examples/flatland_2_0_example.py
+++ b/examples/flatland_2_0_example.py
-import time
-
-import numpy as np
-
-from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv
-from flatland.envs.predictions import ShortestPathPredictorForRailEnv
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import sparse_rail_generator
-from flatland.envs.schedule_generators import sparse_schedule_generator
-from flatland.utils.rendertools import RenderTool, AgentRenderVariant
-
-np.random.seed(1)
-
-# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
-# Training on simple small tasks is the best way to get familiar with the environment
-
-# Use a the malfunction generator to break agents from time to time
-stochastic_data = {'prop_malfunction': 0.3,  # Percentage of defective agents
-                   'malfunction_rate': 30,  # Rate of malfunction occurence
-                   'min_duration': 3,  # Minimal duration of malfunction
-                   'max_duration': 20  # Max duration of malfunction
-                   }
-
-# Custom observation builder
-TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
-
-# Different agent types (trains) with different speeds.
-speed_ration_map = {1.: 0.25,  # Fast passenger train
-                    1. / 2.: 0.25,  # Fast freight train
-                    1. / 3.: 0.25,  # Slow commuter train
-                    1. / 4.: 0.25}  # Slow freight train
-
-env = RailEnv(width=100,
-              height=100,
-              rail_generator=sparse_rail_generator(max_num_cities=30,
-                                                   # Number of cities in map (where train stations are)
-                                                   seed=14,  # Random seed
-                                                   grid_mode=False,
-                                                   max_rails_between_cities=2,
-                                                   max_rails_in_city=8,
-                                                   ),
-              schedule_generator=sparse_schedule_generator(speed_ration_map),
-              number_of_agents=100,
-              stochastic_data=stochastic_data,  # Malfunction data generator
-              obs_builder_object=GlobalObsForRailEnv(),
-              remove_agents_at_target=True
-              )
-
-# RailEnv.DEPOT_POSITION = lambda agent, agent_handle : (agent_handle % env.height,0)
-
-env_renderer = RenderTool(env, gl="PILSVG",
-                          agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
-                          show_debug=True,
-                          screen_height=1000,
-                          screen_width=1000)
-
-
-# Import your own Agent or use RLlib to train agents on Flatland
-# As an example we use a random agent instead
-class RandomAgent:
-
-    def __init__(self, state_size, action_size):
-        self.state_size = state_size
-        self.action_size = action_size
-
-    def act(self, state):
-        """
-        :param state: input is the observation of the agent
-        :return: returns an action
-        """
-        return 2  # np.random.choice(np.arange(self.action_size))
-
-    def step(self, memories):
-        """
-        Step function to improve agent by adjusting policy given the observations
-
-        :param memories: SARS Tuple to be
-        :return:
-        """
-        return
-
-    def save(self, filename):
-        # Store the current policy
-        return
-
-    def load(self, filename):
-        # Load a policy
-        return
-
-
-# Initialize the agent with the parameters corresponding to the environment and observation_builder
-# Set action space to 4 to remove stop action
-agent = RandomAgent(218, 4)
-
-# Empty dictionary for all agent action
-action_dict = dict()
-
-print("Start episode...")
-# Reset environment and get initial observations for all agents
-start_reset = time.time()
-obs, info = env.reset()
-end_reset = time.time()
-print(end_reset - start_reset)
-print(env.get_num_agents(), )
-# Reset the rendering sytem
-env_renderer.reset()
-
-# Here you can also further enhance the provided observation by means of normalization
-# See training navigation example in the baseline repository
-
-score = 0
-# Run episode
-frame_step = 0
-for step in range(500):
-    # Chose an action for each agent in the environment
-    for a in range(env.get_num_agents()):
-        action = agent.act(obs[a])
-        action_dict.update({a: action})
-
-    # Environment step which returns the observations for all agents, their corresponding
-    # reward and whether their are done
-    next_obs, all_rewards, done, _ = env.step(action_dict)
-    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
-    frame_step += 1
-    # Update replay buffer and train agent
-    for a in range(env.get_num_agents()):
-        agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
-        score += all_rewards[a]
-
-    obs = next_obs.copy()
-    if done['__all__']:
-        break
-
-print('Episode: Steps {}\t Score = {}'.format(step, score))
--- a/examples/flatland_3_0_example.py
+++ b/examples/flatland_3_0_example.py
+import getopt
+import sys
+import time
+
+import numpy as np
+
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.envs.malfunction_generators import MalfunctionParameters
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.persistence import RailEnvPersister
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.utils.misc import str2bool
+from flatland.utils.rendertools import RenderTool, AgentRenderVariant
+
+
+# Import your own Agent or use RLlib to train agents on Flatland
+# As an example we use a random agent instead
+class RandomAgent:
+
+    def __init__(self, state_size, action_size):
+        self.state_size = state_size
+        self.action_size = action_size
+
+    def act(self, state):
+        """
+        :param state: input is the observation of the agent
+        :return: returns an action
+        """
+        return 2  # np.random.choice(np.arange(self.action_size))
+
+    def step(self, memories):
+        """
+        Step function to improve agent by adjusting policy given the observations
+
+        :param memories: SARS Tuple to be
+        :return:
+        """
+        return
+
+    def save(self, filename):
+        # Store the current policy
+        return
+
+    def load(self, filename):
+        # Load a policy
+        return
+
+
+def create_env():
+    # Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
+    # Training on simple small tasks is the best way to get familiar with the environment
+
+    # Use a the malfunction generator to break agents from time to time
+    stochastic_data = MalfunctionParameters(malfunction_rate=30,  # Rate of malfunction occurence
+                                            min_duration=3,  # Minimal duration of malfunction
+                                            max_duration=20  # Max duration of malfunction
+                                            )
+    # Custom observation builder
+    TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+    nAgents = 3
+    n_cities = 2
+    max_rails_between_cities = 2
+    max_rails_in_city = 4
+    seed = 0
+    env = RailEnv(
+        width=20,
+        height=30,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
+    )
+    return env
+
+
+def flatland_3_0_example(sleep_for_animation, do_rendering):
+    np.random.seed(1)
+
+    env = create_env()
+    env.reset()
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env, gl="PILSVG",
+                                  agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
+                                  show_debug=True,
+                                  screen_height=1000,
+                                  screen_width=1000)
+
+    # Initialize the agent with the parameters corresponding to the environment and observation_builder
+    # Set action space to 4 to remove stop action
+    agent = RandomAgent(218, 4)
+
+    # Empty dictionary for all agent action
+    action_dict = dict()
+
+    print("Start episode...")
+
+    # Reset environment and get initial observations for all agents
+    start_reset = time.time()
+    obs, info = env.reset()
+    end_reset = time.time()
+    print(end_reset - start_reset)
+    print(env.get_num_agents(), )
+
+    # Reset the rendering sytem
+    if env_renderer is not None:
+        env_renderer.reset()
+
+    # Here you can also further enhance the provided observation by means of normalization
+    # See training navigation example in the baseline repository
+
+    score = 0
+    # Run episode
+    frame_step = 0
+    for step in range(500):
+        # Chose an action for each agent in the environment
+        for a in range(env.get_num_agents()):
+            action = agent.act(obs[a])
+            action_dict.update({a: action})
+
+        # Environment step which returns the observations for all agents, their corresponding
+        # reward and whether their are done
+        next_obs, all_rewards, done, _ = env.step(action_dict)
+        if env_renderer is not None:
+            env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
+
+        frame_step += 1
+        # Update replay buffer and train agent
+        for a in range(env.get_num_agents()):
+            agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
+            score += all_rewards[a]
+
+        obs = next_obs.copy()
+        if done['__all__']:
+            break
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+    print('Episode: Steps {}\t Score = {}'.format(step, score))
+    RailEnvPersister.save(env, "saved_episode_2.pkl")
+
+
+def main(args):
+    try:
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
+    except getopt.GetoptError as err:
+        print(str(err))  # will print something like "option -a not recognized"
+        sys.exit(2)
+    sleep_for_animation = True
+    do_rendering = True
+    for o, a in opts:
+        if o in ("--sleep-for-animation"):
+            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
+        else:
+            assert False, "unhandled option"
+
+    # execute example
+    flatland_3_0_example(sleep_for_animation, do_rendering)
+
+
+if __name__ == '__main__':
+    if 'argv' in globals():
+        main(argv)
+    else:
+        main(sys.argv[1:])
--- a/examples/flatland_performance_profiling.py
+++ b/examples/flatland_performance_profiling.py
+import cProfile
+import pstats
+
+import numpy as np
+
+from flatland.core.env_observation_builder import DummyObservationBuilder
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.envs.malfunction_generators import MalfunctionParameters, ParamMalfunctionGen
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.utils.rendertools import RenderTool, AgentRenderVariant
+
+
+class RandomAgent:
+    def __init__(self, action_size):
+        self.action_size = action_size
+
+    def act(self, state):
+        """
+        :param state: input is the observation of the agent
+        :return: returns an action
+        """
+        return np.random.choice(np.arange(self.action_size))
+
+
+def get_rail_env(nAgents=70, use_dummy_obs=False, width=300, height=300):
+    # Rail Generator:
+
+    num_cities = 5  # Number of cities to place on the map
+    seed = 1  # Random seed
+    max_rails_between_cities = 2  # Maximum number of rails connecting 2 cities
+    max_rail_pairs_in_cities = 2  # Maximum number of pairs of tracks within a city
+    # Even tracks are used as start points, odd tracks are used as endpoints)
+
+    rail_generator = sparse_rail_generator(
+        max_num_cities=num_cities,
+        seed=seed,
+        max_rails_between_cities=max_rails_between_cities,
+        max_rail_pairs_in_city=max_rail_pairs_in_cities,
+    )
+
+    # Line Generator
+
+    # sparse_line_generator accepts a dictionary which maps speeds to probabilities.
+    # Different agent types (trains) with different speeds.
+    speed_probability_map = {
+        1.: 0.25,  # Fast passenger train
+        1. / 2.: 0.25,  # Fast freight train
+        1. / 3.: 0.25,  # Slow commuter train
+        1. / 4.: 0.25  # Slow freight train
+    }
+
+    line_generator = sparse_line_generator(speed_probability_map)
+
+    # Malfunction Generator:
+
+    stochastic_data = MalfunctionParameters(
+        malfunction_rate=1 / 10000,  # Rate of malfunction occurence
+        min_duration=15,  # Minimal duration of malfunction
+        max_duration=50  # Max duration of malfunction
+    )
+
+    malfunction_generator = ParamMalfunctionGen(stochastic_data)
+
+    # Observation Builder
+
+    # tree observation returns a tree of possible paths from the current position.
+    max_depth = 3  # Max depth of the tree
+    predictor = ShortestPathPredictorForRailEnv(
+        max_depth=50)  # (Specific to Tree Observation - read code)
+
+    observation_builder = TreeObsForRailEnv(
+        max_depth=max_depth,
+        predictor=predictor
+    )
+
+    if use_dummy_obs:
+        observation_builder = DummyObservationBuilder()
+
+    number_of_agents = nAgents  # Number of trains to create
+    seed = 1  # Random seed
+
+    env = RailEnv(
+        width=width,
+        height=height,
+        rail_generator=rail_generator,
+        line_generator=line_generator,
+        number_of_agents=number_of_agents,
+        random_seed=seed,
+        obs_builder_object=observation_builder,
+        malfunction_generator=malfunction_generator
+    )
+    return env
+
+
+def run_simulation(env_fast: RailEnv, do_rendering):
+    agent = RandomAgent(action_size=5)
+    max_steps = 200
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env_fast,
+                                  gl="PGL",
+                                  show_debug=True,
+                                  agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS)
+        env_renderer.set_new_rail()
+        env_renderer.reset()
+    for step in range(max_steps):
+
+        # Chose an action for each agent in the environment
+        for handle in range(env_fast.get_num_agents()):
+            action = agent.act(handle)
+            action_dict.update({handle: action})
+
+        next_obs, all_rewards, done, _ = env_fast.step(action_dict)
+        if env_renderer is not None:
+            env_renderer.render_env(
+                show=True,
+                frames=False,
+                show_observations=True,
+                show_predictions=False
+            )
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+
+USE_PROFILER = True
+
+PROFILE_CREATE = False
+PROFILE_RESET = False
+PROFILE_STEP = True
+PROFILE_OBSERVATION = False
+
+RUN_SIMULATION = False
+DO_RENDERING = False
+
+if __name__ == "__main__":
+    print("Start ...")
+    if USE_PROFILER:
+        profiler = cProfile.Profile()
+
+    print("Create env ... ")
+    if PROFILE_CREATE:
+        profiler.enable()
+    env_fast = get_rail_env(nAgents=200, use_dummy_obs=False, width=100, height=100)
+    if PROFILE_CREATE:
+        profiler.disable()
+
+    print("Reset env ... ")
+    if PROFILE_RESET:
+        profiler.enable()
+    env_fast.reset(random_seed=1)
+    if PROFILE_RESET:
+        profiler.disable()
+
+    print("Make actions ... ")
+    action_dict = {agent.handle: 0 for agent in env_fast.agents}
+
+    print("Step env ... ")
+    if PROFILE_STEP:
+        profiler.enable()
+    for i in range(1):
+        env_fast.step(action_dict)
+    if PROFILE_STEP:
+        profiler.disable()
+
+    if PROFILE_OBSERVATION:
+        profiler.enable()
+
+    print("get observation ... ")
+    obs = env_fast._get_observations()
+
+    if PROFILE_OBSERVATION:
+        profiler.disable()
+
+    if USE_PROFILER:
+        if False:
+            print("---- tottime")
+            stats = pstats.Stats(profiler).sort_stats('tottime')  # ncalls, 'cumtime'...
+            stats.print_stats(20)
+
+        if True:
+            print("---- cumtime")
+            stats = pstats.Stats(profiler).sort_stats('cumtime')  # ncalls, 'cumtime'...
+            stats.print_stats(200)
+
+        if False:
+            print("---- ncalls")
+            stats = pstats.Stats(profiler).sort_stats('ncalls')  # ncalls, 'cumtime'...
+            stats.print_stats(200)
+
+    print("... end ")
+
+    if RUN_SIMULATION:
+        run_simulation(env_fast, DO_RENDERING)
--- a/examples/introduction_flatland_2_1.py
+++ b/examples/introduction_flatland_2_1.py
+import os
+
 import numpy as np

+from flatland.envs.line_generators import sparse_line_generator
 # In Flatland you can use custom observation builders and predicitors
 # Observation builders generate the observation needed by the controller
 # Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
+from flatland.envs.malfunction_generators import MalfunctionParameters, ParamMalfunctionGen
 from flatland.envs.observations import GlobalObsForRailEnv
 # First of all we import the Flatland rail environment
 from flatland.envs.rail_env import RailEnv
 from flatland.envs.rail_env import RailEnvActions
 from flatland.envs.rail_generators import sparse_rail_generator
-from flatland.envs.schedule_generators import sparse_schedule_generator
 # We also include a renderer because we want to visualize what is going on in the environment
 from flatland.utils.rendertools import RenderTool, AgentRenderVariant

@@ -28,9 +31,11 @@ from flatland.utils.rendertools import RenderTool, AgentRenderVariant
 # The railway infrastructure can be build using any of the provided generators in env/rail_generators.py
 # Here we use the sparse_rail_generator with the following parameters

+DO_RENDERING = False
+
 width = 16 * 7  # With of map
 height = 9 * 7  # Height of map
-nr_trains = 20  # Number of trains that have an assigned task in the env
+nr_trains = 50  # Number of trains that have an assigned task in the env
 cities_in_map = 20  # Number of cities where agents can start or end
 seed = 14  # Random seed
 grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
@@ -41,9 +46,17 @@ rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
                                       seed=seed,
                                       grid_mode=grid_distribution_of_cities,
                                       max_rails_between_cities=max_rails_between_cities,
-                                       max_rails_in_city=max_rail_in_cities,
+                                       max_rail_pairs_in_city=max_rail_in_cities,
                                       )

+# rail_generator = SparseRailGen(max_num_cities=cities_in_map,
+#                                       seed=seed,
+#                                       grid_mode=grid_distribution_of_cities,
+#                                       max_rails_between_cities=max_rails_between_cities,
+#                                       max_rails_in_city=max_rail_in_cities,
+#                                       )
+
+
 # The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
 # The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
 # distribution of speed profiles
@@ -56,15 +69,15 @@ speed_ration_map = {1.: 0.25,  # Fast passenger train

 # We can now initiate the schedule generator with the given speed profiles

-schedule_generator = sparse_schedule_generator(speed_ration_map)
+line_generator = sparse_line_generator(speed_ration_map)

 # We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
 # during an episode.

-stochastic_data = {'malfunction_rate': 100,  # Rate of malfunction occurence of single agent
-                   'min_duration': 15,  # Minimal duration of malfunction
-                   'max_duration': 50  # Max duration of malfunction
-                   }
+stochastic_data = MalfunctionParameters(malfunction_rate=1 / 10000,  # Rate of malfunction occurence
+                                        min_duration=15,  # Minimal duration of malfunction
+                                        max_duration=50  # Max duration of malfunction
+                                        )

 # Custom observation builder without predictor
 observation_builder = GlobalObsForRailEnv()
@@ -76,20 +89,21 @@ observation_builder = GlobalObsForRailEnv()
 env = RailEnv(width=width,
              height=height,
              rail_generator=rail_generator,
-              schedule_generator=schedule_generator,
+              line_generator=line_generator,
              number_of_agents=nr_trains,
-              stochastic_data=stochastic_data,  # Malfunction data generator
              obs_builder_object=observation_builder,
-              remove_agents_at_target=True  # Removes agents at the end of their journey to make space for others
-              )
+              malfunction_generator=ParamMalfunctionGen(stochastic_data),
+              remove_agents_at_target=True)
 env.reset()

 # Initiate the renderer
-env_renderer = RenderTool(env, gl="PILSVG",
-                          agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
-                          show_debug=False,
-                          screen_height=600,  # Adjust these parameters to fit your resolution
-                          screen_width=800)  # Adjust these parameters to fit your resolution
+env_renderer = None
+if DO_RENDERING:
+    env_renderer = RenderTool(env,
+                              agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
+                              show_debug=False,
+                              screen_height=600,  # Adjust these parameters to fit your resolution
+                              screen_width=800)  # Adjust these parameters to fit your resolution


 # The first thing we notice is that some agents don't have feasible paths to their target.
@@ -148,7 +162,7 @@ print("\n Their current statuses are:")
 print("============================")

 for agent_idx, agent in enumerate(env.agents):
-    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status),
+    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.state),
                                                                             str(agent.position)))

 # The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level
@@ -180,7 +194,7 @@ print("========================================================")
 for agent_id in agents_with_same_start:
    print(
        "Agent {} status is: {} with the current position being {}.".format(
-            agent_id, str(env.agents[agent_id].status),
+            agent_id, str(env.agents[agent_id].state),
            str(env.agents[agent_id].position)))

 # As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt
@@ -197,8 +211,8 @@ print("=========================================")

 for agent_idx, agent in enumerate(env.agents):
    print(
-        "Agent {} speed is: {:.2f} with the current fractional position being {}".format(
-            agent_idx, agent.speed_data['speed'], agent.speed_data['position_fraction']))
+        "Agent {} speed is: {:.2f} with the current fractional position being {}/{}".format(
+            agent_idx, agent.speed_counter.speed, agent.speed_counter.counter, agent.speed_counter.max_count))

 # New the agents can also have stochastic malfunctions happening which will lead to them being unable to move
 # for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows
@@ -208,7 +222,7 @@ print("========================================")
 for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} is OK = {}".format(
-            agent_idx, agent.malfunction_data['malfunction'] < 1))
+            agent_idx, agent.malfunction_handler.in_malfunction))

 # Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take
 # an action at every time step as it will only change the outcome when actions are chosen at cell entry.
@@ -234,7 +248,8 @@ for info in information['action_required']:
 print("\nStart episode...")

 # Reset the rendering system
-env_renderer.reset()
+if env_renderer is not None:
+    env_renderer.reset()

 # Here you can also further enhance the provided observation by means of normalization
 # See training navigation example in the baseline repository
@@ -244,7 +259,9 @@ score = 0
 # Run episode
 frame_step = 0

-for step in range(500):
+os.makedirs("tmp/frames", exist_ok=True)
+
+for step in range(200):
    # Chose an action for each agent in the environment
    for a in range(env.get_num_agents()):
        action = controller.act(observations[a])
@@ -255,8 +272,10 @@ for step in range(500):

    next_obs, all_rewards, done, _ = env.step(action_dict)

-    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
-    # env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step))
+    if env_renderer is not None:
+        env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
+        env_renderer.gl.save_image('tmp/frames/flatland_frame_{:04d}.png'.format(step))
+
    frame_step += 1
    # Update replay buffer and train agent
    for a in range(env.get_num_agents()):
@@ -267,3 +286,7 @@ for step in range(500):
    if done['__all__']:
        break
    print('Episode: Steps {}\t Score = {}'.format(step, score))
+
+# close the renderer / rendering window
+if env_renderer is not None:
+    env_renderer.close_window()
--- a/examples/simple_example_1.py
+++ b/examples/simple_example_1.py
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import rail_from_manual_specifications_generator
-from flatland.utils.rendertools import RenderTool
-
-# Example generate a rail given a manual specification,
-# a map of tuples (cell_type, rotation)
-specs = [[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)],
-         [(0, 0), (0, 0), (0, 0), (0, 0), (7, 0), (0, 0)],
-         [(7, 270), (1, 90), (1, 90), (1, 90), (2, 90), (7, 90)],
-         [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]]
-
-env = RailEnv(width=6,
-              height=4,
-              rail_generator=rail_from_manual_specifications_generator(specs),
-              number_of_agents=1)
-
-env.reset()
-
-env_renderer = RenderTool(env)
-env_renderer.render_env(show=True, show_predictions=False, show_observations=False)
-
-# uncomment to keep the renderer open
-#input("Press Enter to continue...")
No results found