Compare revisions

a6c4ae6a · a6c4ae6a · a6c4ae6a · a6c4ae6a · fe418775 · fe418775
--- a/examples/flatland_3_0_example.py
+++ b/examples/flatland_3_0_example.py
+import getopt
+import sys
+import time
+
+import numpy as np
+
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.envs.malfunction_generators import MalfunctionParameters
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.persistence import RailEnvPersister
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.utils.misc import str2bool
+from flatland.utils.rendertools import RenderTool, AgentRenderVariant
+
+
+# Import your own Agent or use RLlib to train agents on Flatland
+# As an example we use a random agent instead
+class RandomAgent:
+
+    def __init__(self, state_size, action_size):
+        self.state_size = state_size
+        self.action_size = action_size
+
+    def act(self, state):
+        """
+        :param state: input is the observation of the agent
+        :return: returns an action
+        """
+        return 2  # np.random.choice(np.arange(self.action_size))
+
+    def step(self, memories):
+        """
+        Step function to improve agent by adjusting policy given the observations
+
+        :param memories: SARS Tuple to be
+        :return:
+        """
+        return
+
+    def save(self, filename):
+        # Store the current policy
+        return
+
+    def load(self, filename):
+        # Load a policy
+        return
+
+
+def create_env():
+    # Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
+    # Training on simple small tasks is the best way to get familiar with the environment
+
+    # Use a the malfunction generator to break agents from time to time
+    stochastic_data = MalfunctionParameters(malfunction_rate=30,  # Rate of malfunction occurence
+                                            min_duration=3,  # Minimal duration of malfunction
+                                            max_duration=20  # Max duration of malfunction
+                                            )
+    # Custom observation builder
+    TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+    nAgents = 3
+    n_cities = 2
+    max_rails_between_cities = 2
+    max_rails_in_city = 4
+    seed = 0
+    env = RailEnv(
+        width=20,
+        height=30,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
+    )
+    return env
+
+
+def flatland_3_0_example(sleep_for_animation, do_rendering):
+    np.random.seed(1)
+
+    env = create_env()
+    env.reset()
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env, gl="PILSVG",
+                                  agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
+                                  show_debug=True,
+                                  screen_height=1000,
+                                  screen_width=1000)
+
+    # Initialize the agent with the parameters corresponding to the environment and observation_builder
+    # Set action space to 4 to remove stop action
+    agent = RandomAgent(218, 4)
+
+    # Empty dictionary for all agent action
+    action_dict = dict()
+
+    print("Start episode...")
+
+    # Reset environment and get initial observations for all agents
+    start_reset = time.time()
+    obs, info = env.reset()
+    end_reset = time.time()
+    print(end_reset - start_reset)
+    print(env.get_num_agents(), )
+
+    # Reset the rendering sytem
+    if env_renderer is not None:
+        env_renderer.reset()
+
+    # Here you can also further enhance the provided observation by means of normalization
+    # See training navigation example in the baseline repository
+
+    score = 0
+    # Run episode
+    frame_step = 0
+    for step in range(500):
+        # Chose an action for each agent in the environment
+        for a in range(env.get_num_agents()):
+            action = agent.act(obs[a])
+            action_dict.update({a: action})
+
+        # Environment step which returns the observations for all agents, their corresponding
+        # reward and whether their are done
+        next_obs, all_rewards, done, _ = env.step(action_dict)
+        if env_renderer is not None:
+            env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
+
+        frame_step += 1
+        # Update replay buffer and train agent
+        for a in range(env.get_num_agents()):
+            agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
+            score += all_rewards[a]
+
+        obs = next_obs.copy()
+        if done['__all__']:
+            break
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+    print('Episode: Steps {}\t Score = {}'.format(step, score))
+    RailEnvPersister.save(env, "saved_episode_2.pkl")
+
+
+def main(args):
+    try:
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
+    except getopt.GetoptError as err:
+        print(str(err))  # will print something like "option -a not recognized"
+        sys.exit(2)
+    sleep_for_animation = True
+    do_rendering = True
+    for o, a in opts:
+        if o in ("--sleep-for-animation"):
+            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
+        else:
+            assert False, "unhandled option"
+
+    # execute example
+    flatland_3_0_example(sleep_for_animation, do_rendering)
+
+
+if __name__ == '__main__':
+    if 'argv' in globals():
+        main(argv)
+    else:
+        main(sys.argv[1:])
--- a/examples/flatland_performance_profiling.py
+++ b/examples/flatland_performance_profiling.py
+import cProfile
+import pstats
+
+import numpy as np
+
+from flatland.core.env_observation_builder import DummyObservationBuilder
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.envs.malfunction_generators import MalfunctionParameters, ParamMalfunctionGen
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.utils.rendertools import RenderTool, AgentRenderVariant
+
+
+class RandomAgent:
+    def __init__(self, action_size):
+        self.action_size = action_size
+
+    def act(self, state):
+        """
+        :param state: input is the observation of the agent
+        :return: returns an action
+        """
+        return np.random.choice(np.arange(self.action_size))
+
+
+def get_rail_env(nAgents=70, use_dummy_obs=False, width=300, height=300):
+    # Rail Generator:
+
+    num_cities = 5  # Number of cities to place on the map
+    seed = 1  # Random seed
+    max_rails_between_cities = 2  # Maximum number of rails connecting 2 cities
+    max_rail_pairs_in_cities = 2  # Maximum number of pairs of tracks within a city
+    # Even tracks are used as start points, odd tracks are used as endpoints)
+
+    rail_generator = sparse_rail_generator(
+        max_num_cities=num_cities,
+        seed=seed,
+        max_rails_between_cities=max_rails_between_cities,
+        max_rail_pairs_in_city=max_rail_pairs_in_cities,
+    )
+
+    # Line Generator
+
+    # sparse_line_generator accepts a dictionary which maps speeds to probabilities.
+    # Different agent types (trains) with different speeds.
+    speed_probability_map = {
+        1.: 0.25,  # Fast passenger train
+        1. / 2.: 0.25,  # Fast freight train
+        1. / 3.: 0.25,  # Slow commuter train
+        1. / 4.: 0.25  # Slow freight train
+    }
+
+    line_generator = sparse_line_generator(speed_probability_map)
+
+    # Malfunction Generator:
+
+    stochastic_data = MalfunctionParameters(
+        malfunction_rate=1 / 10000,  # Rate of malfunction occurence
+        min_duration=15,  # Minimal duration of malfunction
+        max_duration=50  # Max duration of malfunction
+    )
+
+    malfunction_generator = ParamMalfunctionGen(stochastic_data)
+
+    # Observation Builder
+
+    # tree observation returns a tree of possible paths from the current position.
+    max_depth = 3  # Max depth of the tree
+    predictor = ShortestPathPredictorForRailEnv(
+        max_depth=50)  # (Specific to Tree Observation - read code)
+
+    observation_builder = TreeObsForRailEnv(
+        max_depth=max_depth,
+        predictor=predictor
+    )
+
+    if use_dummy_obs:
+        observation_builder = DummyObservationBuilder()
+
+    number_of_agents = nAgents  # Number of trains to create
+    seed = 1  # Random seed
+
+    env = RailEnv(
+        width=width,
+        height=height,
+        rail_generator=rail_generator,
+        line_generator=line_generator,
+        number_of_agents=number_of_agents,
+        random_seed=seed,
+        obs_builder_object=observation_builder,
+        malfunction_generator=malfunction_generator
+    )
+    return env
+
+
+def run_simulation(env_fast: RailEnv, do_rendering):
+    agent = RandomAgent(action_size=5)
+    max_steps = 200
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env_fast,
+                                  gl="PGL",
+                                  show_debug=True,
+                                  agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS)
+        env_renderer.set_new_rail()
+        env_renderer.reset()
+    for step in range(max_steps):
+
+        # Chose an action for each agent in the environment
+        for handle in range(env_fast.get_num_agents()):
+            action = agent.act(handle)
+            action_dict.update({handle: action})
+
+        next_obs, all_rewards, done, _ = env_fast.step(action_dict)
+        if env_renderer is not None:
+            env_renderer.render_env(
+                show=True,
+                frames=False,
+                show_observations=True,
+                show_predictions=False
+            )
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+
+USE_PROFILER = True
+
+PROFILE_CREATE = False
+PROFILE_RESET = False
+PROFILE_STEP = True
+PROFILE_OBSERVATION = False
+
+RUN_SIMULATION = False
+DO_RENDERING = False
+
+if __name__ == "__main__":
+    print("Start ...")
+    if USE_PROFILER:
+        profiler = cProfile.Profile()
+
+    print("Create env ... ")
+    if PROFILE_CREATE:
+        profiler.enable()
+    env_fast = get_rail_env(nAgents=200, use_dummy_obs=False, width=100, height=100)
+    if PROFILE_CREATE:
+        profiler.disable()
+
+    print("Reset env ... ")
+    if PROFILE_RESET:
+        profiler.enable()
+    env_fast.reset(random_seed=1)
+    if PROFILE_RESET:
+        profiler.disable()
+
+    print("Make actions ... ")
+    action_dict = {agent.handle: 0 for agent in env_fast.agents}
+
+    print("Step env ... ")
+    if PROFILE_STEP:
+        profiler.enable()
+    for i in range(1):
+        env_fast.step(action_dict)
+    if PROFILE_STEP:
+        profiler.disable()
+
+    if PROFILE_OBSERVATION:
+        profiler.enable()
+
+    print("get observation ... ")
+    obs = env_fast._get_observations()
+
+    if PROFILE_OBSERVATION:
+        profiler.disable()
+
+    if USE_PROFILER:
+        if False:
+            print("---- tottime")
+            stats = pstats.Stats(profiler).sort_stats('tottime')  # ncalls, 'cumtime'...
+            stats.print_stats(20)
+
+        if True:
+            print("---- cumtime")
+            stats = pstats.Stats(profiler).sort_stats('cumtime')  # ncalls, 'cumtime'...
+            stats.print_stats(200)
+
+        if False:
+            print("---- ncalls")
+            stats = pstats.Stats(profiler).sort_stats('ncalls')  # ncalls, 'cumtime'...
+            stats.print_stats(200)
+
+    print("... end ")
+
+    if RUN_SIMULATION:
+        run_simulation(env_fast, DO_RENDERING)
--- a/examples/introduction_flatland_3.py
+++ b/examples/introduction_flatland_3.py
+import os
+
+import numpy as np
+
+from flatland.envs.line_generators import sparse_line_generator
+# In Flatland you can use custom observation builders and predicitors
+# Observation builders generate the observation needed by the controller
+# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
+from flatland.envs.malfunction_generators import MalfunctionParameters, ParamMalfunctionGen
+from flatland.envs.observations import GlobalObsForRailEnv
+# First of all we import the Flatland rail environment
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_env import RailEnvActions
+from flatland.envs.rail_generators import sparse_rail_generator
+# We also include a renderer because we want to visualize what is going on in the environment
+from flatland.utils.rendertools import RenderTool, AgentRenderVariant
+
+# This is an introduction example for the Flatland 2.1.* version.
+# Changes and highlights of this version include
+# - Stochastic events (malfunctions)
+# - Different travel speeds for differet agents
+# - Levels are generated using a novel generator to reflect more realistic railway networks
+# - Agents start outside of the environment and enter at their own time
+# - Agents leave the environment after they have reached their goal
+# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
+# Training on simple small tasks is the best way to get familiar with the environment
+# We start by importing the necessary rail and schedule generators
+# The rail generator will generate the railway infrastructure
+# The schedule generator will assign tasks to all the agent within the railway network
+
+# The railway infrastructure can be build using any of the provided generators in env/rail_generators.py
+# Here we use the sparse_rail_generator with the following parameters
+
+DO_RENDERING = False
+
+width = 16 * 7  # With of map
+height = 9 * 7  # Height of map
+nr_trains = 50  # Number of trains that have an assigned task in the env
+cities_in_map = 20  # Number of cities where agents can start or end
+seed = 14  # Random seed
+grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
+max_rails_between_cities = 2  # Max number of tracks allowed between cities. This is number of entry point to a city
+max_rail_in_cities = 6  # Max number of parallel tracks within a city, representing a realistic trainstation
+
+rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
+                                       seed=seed,
+                                       grid_mode=grid_distribution_of_cities,
+                                       max_rails_between_cities=max_rails_between_cities,
+                                       max_rail_pairs_in_city=max_rail_in_cities,
+                                       )
+
+# rail_generator = SparseRailGen(max_num_cities=cities_in_map,
+#                                       seed=seed,
+#                                       grid_mode=grid_distribution_of_cities,
+#                                       max_rails_between_cities=max_rails_between_cities,
+#                                       max_rails_in_city=max_rail_in_cities,
+#                                       )
+
+
+# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
+# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
+# distribution of speed profiles
+
+# Different agent types (trains) with different speeds.
+speed_ration_map = {1.: 0.25,  # Fast passenger train
+                    1. / 2.: 0.25,  # Fast freight train
+                    1. / 3.: 0.25,  # Slow commuter train
+                    1. / 4.: 0.25}  # Slow freight train
+
+# We can now initiate the schedule generator with the given speed profiles
+
+line_generator = sparse_line_generator(speed_ration_map)
+
+# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
+# during an episode.
+
+stochastic_data = MalfunctionParameters(malfunction_rate=1 / 10000,  # Rate of malfunction occurence
+                                        min_duration=15,  # Minimal duration of malfunction
+                                        max_duration=50  # Max duration of malfunction
+                                        )
+
+# Custom observation builder without predictor
+observation_builder = GlobalObsForRailEnv()
+
+# Custom observation builder with predictor, uncomment line below if you want to try this one
+# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+
+# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
+env = RailEnv(width=width,
+              height=height,
+              rail_generator=rail_generator,
+              line_generator=line_generator,
+              number_of_agents=nr_trains,
+              obs_builder_object=observation_builder,
+              malfunction_generator=ParamMalfunctionGen(stochastic_data),
+              remove_agents_at_target=True)
+env.reset()
+
+# Initiate the renderer
+env_renderer = None
+if DO_RENDERING:
+    env_renderer = RenderTool(env,
+                              agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
+                              show_debug=False,
+                              screen_height=600,  # Adjust these parameters to fit your resolution
+                              screen_width=800)  # Adjust these parameters to fit your resolution
+
+
+# The first thing we notice is that some agents don't have feasible paths to their target.
+# We first look at the map we have created
+
+# nv_renderer.render_env(show=True)
+# time.sleep(2)
+# Import your own Agent or use RLlib to train agents on Flatland
+# As an example we use a random agent instead
+class RandomAgent:
+
+    def __init__(self, state_size, action_size):
+        self.state_size = state_size
+        self.action_size = action_size
+
+    def act(self, state):
+        """
+        :param state: input is the observation of the agent
+        :return: returns an action
+        """
+        return np.random.choice([RailEnvActions.MOVE_FORWARD, RailEnvActions.MOVE_RIGHT, RailEnvActions.MOVE_LEFT,
+                                 RailEnvActions.STOP_MOVING])
+
+    def step(self, memories):
+        """
+        Step function to improve agent by adjusting policy given the observations
+
+        :param memories: SARS Tuple to be
+        :return:
+        """
+        return
+
+    def save(self, filename):
+        # Store the current policy
+        return
+
+    def load(self, filename):
+        # Load a policy
+        return
+
+
+# Initialize the agent with the parameters corresponding to the environment and observation_builder
+controller = RandomAgent(218, env.action_space[0])
+
+# We start by looking at the information of each agent
+# We can see the task assigned to the agent by looking at
+print("\n Agents in the environment have to solve the following tasks: \n")
+for agent_idx, agent in enumerate(env.agents):
+    print(
+        "The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format(
+            agent_idx, agent.initial_position, agent.direction, agent.target))
+
+# The agent will always have a status indicating if it is currently present in the environment or done or active
+# For example we see that agent with index 0 is currently not active
+print("\n Their current statuses are:")
+print("============================")
+
+for agent_idx, agent in enumerate(env.agents):
+    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.state),
+                                                                             str(agent.position)))
+
+# The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level
+# If the starting cell is free they will enter the level
+# If multiple agents want to enter the same cell at the same time the lower index agent will enter first.
+
+# Let's check if there are any agents with the same start location
+agents_with_same_start = set()
+print("\n The following agents have the same initial position:")
+print("=====================================================")
+for agent_idx, agent in enumerate(env.agents):
+    for agent_2_idx, agent2 in enumerate(env.agents):
+        if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position:
+            print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx))
+            agents_with_same_start.add(agent_idx)
+
+# Lets try to enter with all of these agents at the same time
+action_dict = dict()
+
+for agent_id in agents_with_same_start:
+    action_dict[agent_id] = 1  # Try to move with the agents
+
+# Do a step in the environment to see what agents entered:
+env.step(action_dict)
+
+# Current state and position of the agents after all agents with same start position tried to move
+print("\n This happened when all tried to enter at the same time:")
+print("========================================================")
+for agent_id in agents_with_same_start:
+    print(
+        "Agent {} status is: {} with the current position being {}.".format(
+            agent_id, str(env.agents[agent_id].state),
+            str(env.agents[agent_id].position)))
+
+# As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt
+# to start again.
+
+# You will also notice, that the agents move at different speeds once they are on the rail.
+# The agents will always move at full speed when moving, never a speed inbetween.
+# The fastest an agent can go is 1, meaning that it moves to the next cell at every time step
+# All slower speeds indicate the fraction of a cell that is moved at each time step
+# Lets look at the current speed data of the agents:
+
+print("\n The speed information of the agents are:")
+print("=========================================")
+
+for agent_idx, agent in enumerate(env.agents):
+    print(
+        "Agent {} speed is: {:.2f} with the current fractional position being {}/{}".format(
+            agent_idx, agent.speed_counter.speed, agent.speed_counter.counter, agent.speed_counter.max_count))
+
+# New the agents can also have stochastic malfunctions happening which will lead to them being unable to move
+# for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows
+print("\n The malfunction data of the agents are:")
+print("========================================")
+
+for agent_idx, agent in enumerate(env.agents):
+    print(
+        "Agent {} is OK = {}".format(
+            agent_idx, agent.malfunction_handler.in_malfunction))
+
+# Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take
+# an action at every time step as it will only change the outcome when actions are chosen at cell entry.
+# Therefore the environment provides information about what agents need to provide an action in the next step.
+# You can access this in the following way.
+
+# Chose an action for each agent
+for a in range(env.get_num_agents()):
+    action = controller.act(0)
+    action_dict.update({a: action})
+# Do the environment step
+observations, rewards, dones, information = env.step(action_dict)
+print("\n The following agents can register an action:")
+print("========================================")
+for info in information['action_required']:
+    print("Agent {} needs to submit an action.".format(info))
+
+# We recommend that you monitor the malfunction data and the action required in order to optimize your training
+# and controlling code.
+
+# Let us now look at an episode playing out with random actions performed
+
+print("\nStart episode...")
+
+# Reset the rendering system
+if env_renderer is not None:
+    env_renderer.reset()
+
+# Here you can also further enhance the provided observation by means of normalization
+# See training navigation example in the baseline repository
+
+
+score = 0
+# Run episode
+frame_step = 0
+
+os.makedirs("tmp/frames", exist_ok=True)
+
+for step in range(200):
+    # Chose an action for each agent in the environment
+    for a in range(env.get_num_agents()):
+        action = controller.act(observations[a])
+        action_dict.update({a: action})
+
+    # Environment step which returns the observations for all agents, their corresponding
+    # reward and whether their are done
+
+    next_obs, all_rewards, done, _ = env.step(action_dict)
+
+    if env_renderer is not None:
+        env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
+        env_renderer.gl.save_image('tmp/frames/flatland_frame_{:04d}.png'.format(step))
+
+    frame_step += 1
+    # Update replay buffer and train agent
+    for a in range(env.get_num_agents()):
+        controller.step((observations[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
+        score += all_rewards[a]
+
+    observations = next_obs.copy()
+    if done['__all__']:
+        break
+    print('Episode: Steps {}\t Score = {}'.format(step, score))
+
+# close the renderer / rendering window
+if env_renderer is not None:
+    env_renderer.close_window()
--- a/examples/misc/generate_video/video_generation.md
+++ b/examples/misc/generate_video/video_generation.md
+# Making Videos from Env
+
+In order to generate Videos or gifs, it is easiest to generate image files and then run ffmpeg to generate a video.
+
+## 1. Generating Images from Env
+
+Start by importing the render and instantiating it
+
+```
+from flatland.utils.rendertools import RenderTool
+env_renderer = RenderTool(env, gl="PILSVG", )
+```
+
+If the environment changes don't forget to reset the renderer
+```
+env_renderer.reset()
+```
+
+You can now record an image after every step. It is best to use a format similar to the one below, where `frame_step` is counting the number of steps.
+```
+env_renderer.gl.save_image("./Images/Avoiding/flatland_frame_{:04d}.bmp".format(frame_step))
+```
+
+Once the images have been saved to the folder you can run a shell from that folder and run the following commands.
+
+Generate a mp4 out of the images:
+```
+ffmpeg -y -framerate 12 -i flatland_frame_%04d.bmp -hide_banner -c:v libx264 -pix_fmt yuv420p test.mp4
+```
+
+Generate a palette out of the video necessary to generate beautiful gifs:
+```
+ffmpeg  -i test.mp4 -filter_complex "[0:v] palettegen" palette.png
+```
+and finaly generate the gif
+```
+ffmpeg -i test.mp4 -i palette.png -filter_complex "[0:v][1:v] paletteuse" single_agent_navigation.gif
+```
--- a/examples/play_model.py
+++ b/examples/play_model.py
-# import torch
-import random
-import time
-# from flatland.baselines.dueling_double_dqn import Agent
-from collections import deque
-
-import numpy as np
-
-from flatland.envs.generators import complex_rail_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-
-
-class Player(object):
-    def __init__(self, env):
-        self.env = env
-        self.handle = env.get_agent_handles()
-
-        self.state_size = 105
-        self.action_size = 4
-        self.n_trials = 9999
-        self.eps = 1.
-        self.eps_end = 0.005
-        self.eps_decay = 0.998
-        self.action_dict = dict()
-        self.scores_window = deque(maxlen=100)
-        self.done_window = deque(maxlen=100)
-        self.scores = []
-        self.dones_list = []
-        self.action_prob = [0] * 4
-
-        # Removing refs to a real agent for now.
-        # self.agent = Agent(self.state_size, self.action_size, "FC", 0)
-        # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
-        # self.agent.qnetwork_local.load_state_dict(torch.load(
-        #    '../flatland/flatland/baselines/Nets/avoid_checkpoint15000.pth'))
-
-        self.iFrame = 0
-        self.tStart = time.time()
-
-        # Reset environment
-        # self.obs = self.env.reset()
-        self.env.obs_builder.reset()
-        self.obs = self.env._get_observations()
-        for envAgent in range(self.env.get_num_agents()):
-            norm = max(1, max_lt(self.obs[envAgent], np.inf))
-            self.obs[envAgent] = np.clip(np.array(self.obs[envAgent]) / norm, -1, 1)
-
-        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
-
-        self.score = 0
-        self.env_done = 0
-
-    def reset(self):
-        self.obs = self.env.reset()
-        return self.obs
-
-    def step(self):
-        env = self.env
-
-        # Pass the (stored) observation to the agent network and retrieve the action
-        for handle in env.get_agent_handles():
-            # Real Agent
-            # action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
-            # Random actions
-            action = random.randint(0, 3)
-            # Numpy version uses single random sequence
-            # action = np.random.randint(0, 4, size=1)
-            self.action_prob[action] += 1
-            self.action_dict.update({handle: action})
-
-        # Environment step - pass the agent actions to the environment,
-        # retrieve the response - observations, rewards, dones
-        next_obs, all_rewards, done, _ = self.env.step(self.action_dict)
-
-        for handle in env.get_agent_handles():
-            norm = max(1, max_lt(next_obs[handle], np.inf))
-            next_obs[handle] = np.clip(np.array(next_obs[handle]) / norm, -1, 1)
-
-        # Update replay buffer and train agent
-        if False:
-            for handle in self.env.get_agent_handles():
-                self.agent.step(self.obs[handle], self.action_dict[handle],
-                                all_rewards[handle], next_obs[handle], done[handle],
-                                train=False)
-                self.score += all_rewards[handle]
-
-        self.iFrame += 1
-
-        self.obs = next_obs.copy()
-        if done['__all__']:
-            self.env_done = 1
-
-
-def max_lt(seq, val):
-    """
-    Return greatest item in seq for which item < val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] < val and seq[idx] >= 0:
-            return seq[idx]
-        idx -= 1
-    return None
-
-
-def main(render=True, delay=0.0, n_trials=3, n_steps=50, sGL="QT"):
-    random.seed(1)
-    np.random.seed(1)
-
-    # Example generate a random rail
-    env = RailEnv(width=15, height=15,
-                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
-                  number_of_agents=5)
-
-    if render:
-        # env_renderer = RenderTool(env, gl="QTSVG")
-        env_renderer = RenderTool(env, gl=sGL)
-
-    oPlayer = Player(env)
-
-    for trials in range(1, n_trials + 1):
-
-        # Reset environment
-        oPlayer.reset()
-        env_renderer.set_new_rail()
-
-        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
-
-        # score = 0
-        # env_done = 0
-
-        # Run episode
-        for step in range(n_steps):
-            oPlayer.step()
-            if render:
-                env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step,
-                                       action_dict=oPlayer.action_dict)
-                # time.sleep(10)
-                if delay > 0:
-                    time.sleep(delay)
-
-
-def main_old(render=True, delay=0.0):
-    ''' DEPRECATED main which drives agent directly
-        Please use the new main() which creates a Player object which is also used by the Editor.
-        Please fix any bugs in main() and Player rather than here.
-        Will delete this one shortly.
-    '''
-
-    random.seed(1)
-    np.random.seed(1)
-
-    # Example generate a random rail
-    env = RailEnv(width=15, height=15,
-                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
-                  number_of_agents=5)
-
-    if render:
-        env_renderer = RenderTool(env, gl="QTSVG")
-        # env_renderer = RenderTool(env, gl="QT")
-
-    n_trials = 9999
-    eps = 1.
-    eps_end = 0.005
-    eps_decay = 0.998
-    action_dict = dict()
-    scores_window = deque(maxlen=100)
-    done_window = deque(maxlen=100)
-    scores = []
-    dones_list = []
-    action_prob = [0] * 4
-
-    # Real Agent
-    # state_size = 105
-    # action_size = 4
-    # agent = Agent(state_size, action_size, "FC", 0)
-    # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
-
-    def max_lt(seq, val):
-        """
-        Return greatest item in seq for which item < val applies.
-        None is returned if seq was empty or all items in seq were >= val.
-        """
-
-        idx = len(seq) - 1
-        while idx >= 0:
-            if seq[idx] < val and seq[idx] >= 0:
-                return seq[idx]
-            idx -= 1
-        return None
-
-    iFrame = 0
-    tStart = time.time()
-    for trials in range(1, n_trials + 1):
-
-        # Reset environment
-        obs = env.reset()
-        if render:
-            env_renderer.set_new_rail()
-
-        for a in range(env.get_num_agents()):
-            norm = max(1, max_lt(obs[a], np.inf))
-            obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1)
-
-        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
-
-        score = 0
-        env_done = 0
-
-        # Run episode
-        for step in range(100):
-            # if trials > 114:
-            # env_renderer.renderEnv(show=True)
-            # print(step)
-            # Action
-            for a in range(env.get_num_agents()):
-                action = random.randint(0, 3)  # agent.act(np.array(obs[a]), eps=eps)
-                action_prob[action] += 1
-                action_dict.update({a: action})
-
-            if render:
-                env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
-                if delay > 0:
-                    time.sleep(delay)
-
-            iFrame += 1
-
-            # Environment step
-            next_obs, all_rewards, done, _ = env.step(action_dict)
-
-            for a in range(env.get_num_agents()):
-                norm = max(1, max_lt(next_obs[a], np.inf))
-                next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
-
-            # Update replay buffer and train agent
-            # only needed for "real" agent
-            # for a in range(env.get_num_agents()):
-            #    agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
-            #    score += all_rewards[a]
-
-            obs = next_obs.copy()
-            if done['__all__']:
-                env_done = 1
-                break
-        # Epsilon decay
-        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
-
-        done_window.append(env_done)
-        scores_window.append(score)  # save most recent score
-        scores.append(np.mean(scores_window))
-        dones_list.append((np.mean(done_window)))
-
-        print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
-               '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
-            env.get_num_agents(),
-            trials,
-            np.mean(scores_window),
-            100 * np.mean(done_window),
-            eps, action_prob / np.sum(action_prob)),
-            end=" ")
-        if trials % 100 == 0:
-            tNow = time.time()
-            rFps = iFrame / (tNow - tStart)
-            print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
-                   '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
-                env.get_num_agents(),
-                trials,
-                np.mean(scores_window),
-                100 * np.mean(done_window),
-                eps, rFps, action_prob / np.sum(action_prob)))
-            # torch.save(agent.qnetwork_local.state_dict(),
-            #         '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
-            action_prob = [1] * 4
-
-
-if __name__ == "__main__":
-    main(render=True, delay=0)
--- a/examples/qt2.py
+++ b/examples/qt2.py
-import sys
-
-from PyQt5 import QtSvg
-from PyQt5.QtCore import Qt, QByteArray
-from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
-
-from flatland.utils import svg
-
-
-# Subclass QMainWindow to customise your application's main window
-class MainWindow(QMainWindow):
-
-    def __init__(self, *args, **kwargs):
-        super(MainWindow, self).__init__(*args, **kwargs)
-
-        self.setWindowTitle("My Awesome App")
-
-        layout = QGridLayout()
-        layout.setSpacing(0)
-
-        wMain = QWidget(self)
-
-        wMain.setLayout(layout)
-
-        label = QLabel("This is a PyQt5 window!")
-
-        # The `Qt` namespace has a lot of attributes to customise
-        # widgets. See: http://doc.qt.io/qt-5/qt.html
-        label.setAlignment(Qt.AlignCenter)
-        layout.addWidget(label, 0, 0)
-
-        svgWidget = QtSvg.QSvgWidget("./svg/Gleis_vertikal.svg")
-        layout.addWidget(svgWidget, 1, 0)
-
-        if True:
-            track = svg.Track()
-
-            svgWidget = None
-            iRow = 0
-            iCol = 2
-            iArt = 0
-            nCols = 3
-            for binTrans in list(track.dSvg.keys())[:2]:
-                sSVG = track.dSvg[binTrans].to_string()
-
-                bySVG = bytearray(sSVG, encoding='utf-8')
-
-                # with open(sfPath, "r") as fIn:
-                #    sSVG = fIn.read()
-                # bySVG = bytearray(sSVG, encoding='utf-8')
-
-                svgWidget = QtSvg.QSvgWidget()
-                oQB = QByteArray(bySVG)
-
-                bSuccess = svgWidget.renderer().load(oQB)
-                # print(x0, y0, x1, y1)
-                print(iRow, iCol, bSuccess)
-                print("\n\n\n", bySVG.decode("utf-8"))
-                # svgWidget.setGeometry(x0, y0, x1, y1)
-                layout.addWidget(svgWidget, iRow, iCol)
-
-                iArt += 1
-                iRow = int(iArt / nCols)
-                iCol = iArt % nCols
-
-        # Set the central widget of the Window. Widget will expand
-        # to take up all the space in the window by default.
-        self.setCentralWidget(wMain)
-
-
-app = QApplication(sys.argv)
-
-window = MainWindow()
-window.show()
-
-app.exec_()
--- a/examples/simple_example_1.py
+++ b/examples/simple_example_1.py
-import random
-
-from flatland.envs.generators import random_rail_generator, rail_from_manual_specifications_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import RenderTool
-import numpy as np
-
-# Example generate a rail given a manual specification,
-# a map of tuples (cell_type, rotation)
-specs = [[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)],
-         [(0, 0), (0, 0), (0, 0), (0, 0), (7, 0), (0, 0)],
-         [(7, 270), (1, 90), (1, 90), (1, 90), (2, 90), (7, 90)],
-         [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]]
-
-# CURVED RAIL + DEAD-ENDS TEST
-# specs = [[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)],
-#          [(7, 270), (1, 90), (1, 90), (8, 90), (0, 0), (0, 0)],
-#          [(0, 0),   (7, 270),(1, 90), (8, 180), (0, 00), (0, 0)],
-#          [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]]
-
-env = RailEnv(width=6,
-              height=4,
-              rail_generator=rail_from_manual_specifications_generator(specs),
-              number_of_agents=1,
-              obs_builder_object=TreeObsForRailEnv(max_depth=2))
-
-env.reset()
-
-env_renderer = RenderTool(env, gl="QT")
-env_renderer.renderEnv(show=True)
-
-input("Press Enter to continue...")
--- a/examples/simple_example_2.py
+++ b/examples/simple_example_2.py
-import random
-
-from flatland.envs.generators import random_rail_generator, rail_from_list_of_saved_GridTransitionMap_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import RenderTool
-import numpy as np
-
-random.seed(100)
-np.random.seed(100)
-
-# Relative weights of each cell type to be used by the random rail generators.
-transition_probability = [1.0,  # empty cell - Case 0
-                          1.0,  # Case 1 - straight
-                          1.0,  # Case 2 - simple switch
-                          0.3,  # Case 3 - diamond drossing
-                          0.5,  # Case 4 - single slip
-                          0.5,  # Case 5 - double slip
-                          0.2,  # Case 6 - symmetrical
-                          0.0,  # Case 7 - dead end
-                          0.2,  # Case 8 - turn left
-                          0.2,  # Case 9 - turn right
-                          1.0]  # Case 10 - mirrored switch
-
-# Example generate a random rail
-env = RailEnv(width=10,
-              height=10,
-              rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
-              number_of_agents=3,
-              obs_builder_object=TreeObsForRailEnv(max_depth=2))
-
-# env = RailEnv(width=10,
-#               height=10,
-#               rail_generator=rail_from_list_of_saved_GridTransitionMap_generator(['examples/sample_10_10_rail.npy']),
-#               number_of_agents=3,
-#               obs_builder_object=TreeObsForRailEnv(max_depth=2))
-
-env.reset()
-
-env_renderer = RenderTool(env, gl="QT")
-env_renderer.renderEnv(show=True)
-
-input("Press Enter to continue...")
--- a/examples/simple_example_3.py
+++ b/examples/simple_example_3.py
-import random
-
-from flatland.envs.generators import random_rail_generator, random_rail_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-import numpy as np
-
-random.seed(100)
-np.random.seed(100)
-
-env = RailEnv(width=7,
-              height=7,
-              rail_generator=random_rail_generator(),
-              number_of_agents=2)
-
-# Print the distance map of each cell to the target of the first agent
-# for i in range(4):
-#     print(env.obs_builder.distance_map[0, :, :, i])
-
-# Print the observation vector for agent 0
-obs, all_rewards, done, _ = env.step({0: 0})
-for i in range(env.get_num_agents()):
-    env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5)
-
-env_renderer = RenderTool(env, gl="QT")
-env_renderer.renderEnv(show=True)
-
-print("Manual control: s=perform step, q=quit, [agent id] [1-2-3 action] \
-       (turnleft+move, move to front, turnright+move)")
-for step in range(100):
-    cmd = input(">> ")
-    cmds = cmd.split(" ")
-
-    action_dict = {}
-
-    i = 0
-    while i < len(cmds):
-        if cmds[i] == 'q':
-            import sys
-
-            sys.exit()
-        elif cmds[i] == 's':
-            obs, all_rewards, done, _ = env.step(action_dict)
-            action_dict = {}
-            print("Rewards: ", all_rewards, "  [done=", done, "]")
-        else:
-            agent_id = int(cmds[i])
-            action = int(cmds[i + 1])
-            action_dict[agent_id] = action
-            i = i + 1
-        i += 1
-
-    env_renderer.renderEnv(show=True)
--- a/examples/tkplay.py
+++ b/examples/tkplay.py
-import time
-import tkinter as tk
-
-from PIL import ImageTk, Image
-
-from examples.play_model import Player
-from flatland.envs.generators import complex_rail_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-
-
-def tkmain(n_trials=2):
-    # This creates the main window of an application
-    window = tk.Tk()
-    window.title("Join")
-    window.configure(background='grey')
-
-    # Example generate a random rail
-    env = RailEnv(width=15, height=15,
-                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
-                  number_of_agents=5)
-
-    env_renderer = RenderTool(env, gl="PIL")
-
-    oPlayer = Player(env)
-    n_trials = 1
-    n_steps = 20
-    delay = 0
-    for trials in range(1, n_trials + 1):
-
-        # Reset environment8
-        oPlayer.reset()
-        env_renderer.set_new_rail()
-
-        first = True
-
-        for step in range(n_steps):
-            oPlayer.step()
-            env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step,
-                                   action_dict=oPlayer.action_dict)
-            img = env_renderer.getImage()
-            img = Image.fromarray(img)
-            tkimg = ImageTk.PhotoImage(img)
-
-            if first:
-                panel = tk.Label(window, image=tkimg)
-                panel.pack(side="bottom", fill="both", expand="yes")
-            else:
-                # update the image in situ
-                panel.configure(image=tkimg)
-                panel.image = tkimg
-
-            window.update()
-            if delay > 0:
-                time.sleep(delay)
-            first = False
-
-
-if __name__ == "__main__":
-    tkmain()
--- a/examples/training_example.py
+++ b/examples/training_example.py
-from flatland.envs.generators import complex_rail_generator
-from flatland.envs.rail_env import RailEnv
+import getopt
+import sys
+
 import numpy as np
+
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.utils.misc import str2bool
 from flatland.utils.rendertools import RenderTool

-np.random.seed(1)

-# Use the complex_rail_generator to generate feasible network configurations with corresponding tasks
-# Training on simple small tasks is the best way to get familiar with the environment
-#
-env = RailEnv(width=15,
-              height=15,
-              rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=10, max_dist=99999, seed=0),
-              number_of_agents=5)
+def create_env():
+    nAgents = 1
+    n_cities = 2
+    max_rails_between_cities = 2
+    max_rails_in_city = 4
+    seed = 0
+    env = RailEnv(
+        width=30,
+        height=40,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=n_cities,
+            seed=seed,
+            grid_mode=True,
+            max_rails_between_cities=max_rails_between_cities,
+            max_rail_pairs_in_city=max_rails_in_city
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=nAgents,
+        obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
+    )
+    return env


 # Import your own Agent or use RLlib to train agents on Flatland
@@ -40,41 +61,94 @@ class RandomAgent:
        """
        return

-    def save(self):
+    def save(self, filename):
        # Store the current policy
        return

+    def load(self, filename):
+        # Load a policy
+        return

-# Load the agent with the parameters corresponding to the environment and observation_builder
-agent = RandomAgent(env.get_observation_size(), env.get_action_size())
-n_trials = 1000
-
-# Empty dictionary for all agent action
-action_dict = dict()
-
-for trials in range(1, n_trials + 1):
-
-    # Reset environment and get initial observations for all agents
-    obs = env.reset()
-    # Here you can also further enhance the provided observation by means of normalization
-    # See training navigation example in the baseline repository
-
-    score = 0
-    # Run episode
-    for step in range(100):
-        # Chose an action for each agent in the environment
-        for a in range(env.get_num_agents()):
-            action = agent.act(obs[a])
-            action_dict.update({a: action})
-
-        # Environment step which returns the observations for all agents, their corresponding
-        # reward and whether their are done
-        next_obs, all_rewards, done, _ = env.step(action_dict)
-
-        # Update replay buffer and train agent
-        agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
-        score += all_rewards[a]

-        obs = next_obs.copy()
-        if done['__all__']:
-            break
+def training_example(sleep_for_animation, do_rendering):
+    np.random.seed(1)
+
+    # Use the complex_rail_generator to generate feasible network configurations with corresponding tasks
+    # Training on simple small tasks is the best way to get familiar with the environment
+    env = create_env()
+    env.reset()
+
+    env_renderer = None
+    if do_rendering:
+        env_renderer = RenderTool(env)
+
+    # Initialize the agent with the parameters corresponding to the environment and observation_builder
+    agent = RandomAgent(218, 5)
+    n_trials = 5
+
+    # Empty dictionary for all agent action
+    action_dict = dict()
+    print("Starting Training...")
+
+    for trials in range(1, n_trials + 1):
+
+        # Reset environment and get initial observations for all agents
+        obs, info = env.reset()
+
+        if env_renderer is not None:
+            env_renderer.reset()
+
+        # Here you can also further enhance the provided observation by means of normalization
+        # See training navigation example in the baseline repository
+
+        score = 0
+        # Run episode
+        for step in range(500):
+            # Chose an action for each agent in the environment
+            for a in range(env.get_num_agents()):
+                action = agent.act(obs[a])
+                action_dict.update({a: action})
+            # Environment step which returns the observations for all agents, their corresponding
+            # reward and whether their are done
+            next_obs, all_rewards, done, _ = env.step(action_dict)
+            if env_renderer is not None:
+                env_renderer.render_env(show=True, show_observations=True, show_predictions=False)
+
+            # Update replay buffer and train agent
+            for a in range(env.get_num_agents()):
+                agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
+                score += all_rewards[a]
+            obs = next_obs.copy()
+            if done['__all__']:
+                break
+        print('Episode Nr. {}\t Score = {}'.format(trials, score))
+
+    if env_renderer is not None:
+        env_renderer.close_window()
+
+
+def main(args):
+    try:
+        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", "do_rendering=", ""])
+    except getopt.GetoptError as err:
+        print(str(err))  # will print something like "option -a not recognized"
+        sys.exit(2)
+    sleep_for_animation = True
+    do_rendering = True
+    for o, a in opts:
+        if o in ("--sleep-for-animation"):
+            sleep_for_animation = str2bool(a)
+        elif o in ("--do_rendering"):
+            do_rendering = str2bool(a)
+        else:
+            assert False, "unhandled option"
+
+    # execute example
+    training_example(sleep_for_animation, do_rendering)
+
+
+if __name__ == '__main__':
+    if 'argv' in globals():
+        main(argv)
+    else:
+        main(sys.argv[1:])
--- a/examples/training_navigation.py
+++ b/examples/training_navigation.py
-import random
-from collections import deque
-
-import numpy as np
-import torch
-
-from flatland.baselines.dueling_double_dqn import Agent
-from flatland.envs.generators import complex_rail_generator
-from flatland.envs.rail_env import RailEnv
-from flatland.utils.rendertools import RenderTool
-
-random.seed(1)
-np.random.seed(1)
-
-# Example generate a rail given a manual specification,
-# a map of tuples (cell_type, rotation)
-transition_probability = [15,  # empty cell - Case 0
-                          5,  # Case 1 - straight
-                          5,  # Case 2 - simple switch
-                          1,  # Case 3 - diamond crossing
-                          1,  # Case 4 - single slip
-                          1,  # Case 5 - double slip
-                          1,  # Case 6 - symmetrical
-                          0,  # Case 7 - dead end
-                          1,  # Case 1b (8)  - simple turn right
-                          1,  # Case 1c (9)  - simple turn left
-                          1]  # Case 2b (10) - simple switch mirrored
-
-# Example generate a random rail
-"""
-env = RailEnv(width=20,
-              height=20,
-              rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
-              number_of_agents=1)
-"""
-env = RailEnv(width=15,
-              height=15,
-              rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=10, max_dist=99999, seed=0),
-              number_of_agents=5)
-
-"""
-env = RailEnv(width=20,
-              height=20,
-              rail_generator=rail_from_list_of_saved_GridTransitionMap_generator(
-                      ['../notebooks/temp.npy']),
-              number_of_agents=3)
-
-"""
-env_renderer = RenderTool(env, gl="QTSVG")
-handle = env.get_agent_handles()
-
-state_size = 105 * 2
-action_size = 4
-n_trials = 15000
-eps = 1.
-eps_end = 0.005
-eps_decay = 0.9995
-action_dict = dict()
-final_action_dict = dict()
-scores_window = deque(maxlen=100)
-done_window = deque(maxlen=100)
-time_obs = deque(maxlen=2)
-scores = []
-dones_list = []
-action_prob = [0] * 4
-agent_obs = [None] * env.get_num_agents()
-agent_next_obs = [None] * env.get_num_agents()
-agent = Agent(state_size, action_size, "FC", 0)
-agent.qnetwork_local.load_state_dict(torch.load('./flatland/baselines/Nets/avoid_checkpoint15000.pth'))
-
-demo = True
-
-
-def max_lt(seq, val):
-    """
-    Return greatest item in seq for which item < val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    max = 0
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
-            max = seq[idx]
-        idx -= 1
-    return max
-
-
-def min_lt(seq, val):
-    """
-    Return smallest item in seq for which item > val applies.
-    None is returned if seq was empty or all items in seq were >= val.
-    """
-    min = np.inf
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] > val and seq[idx] < min:
-            min = seq[idx]
-        idx -= 1
-    return min
-
-
-def norm_obs_clip(obs, clip_min=-1, clip_max=1):
-    """
-    This function returns the difference between min and max value of an observation
-    :param obs: Observation that should be normalized
-    :param clip_min: min value where observation will be clipped
-    :param clip_max: max value where observation will be clipped
-    :return: returnes normalized and clipped observatoin
-    """
-    max_obs = max(1, max_lt(obs, 1000))
-    min_obs = max(0, min_lt(obs, 0))
-    if max_obs == min_obs:
-        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
-    norm = np.abs(max_obs - min_obs)
-    if norm == 0:
-        norm = 1.
-    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
-
-
-for trials in range(1, n_trials + 1):
-
-    # Reset environment
-    obs = env.reset()
-
-    final_obs = obs.copy()
-    final_obs_next = obs.copy()
-
-    for a in range(env.get_num_agents()):
-        data, distance = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=5, current_depth=0)
-
-        data = norm_obs_clip(data)
-        distance = norm_obs_clip(distance)
-        obs[a] = np.concatenate((data, distance))
-
-    for i in range(2):
-        time_obs.append(obs)
-    # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
-    for a in range(env.get_num_agents()):
-        agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-
-    score = 0
-    env_done = 0
-    # Run episode
-    for step in range(100):
-        if demo:
-            env_renderer.renderEnv(show=True)
-        # print(step)
-        # Action
-        for a in range(env.get_num_agents()):
-            if demo:
-                eps = 0
-            # action = agent.act(np.array(obs[a]), eps=eps)
-            action = agent.act(agent_obs[a])
-            action_prob[action] += 1
-            action_dict.update({a: action})
-
-        # Environment step
-        next_obs, all_rewards, done, _ = env.step(action_dict)
-        for a in range(env.get_num_agents()):
-            data, distance = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=5,
-                                                        current_depth=0)
-            data = norm_obs_clip(data)
-            distance = norm_obs_clip(distance)
-            next_obs[a] = np.concatenate((data, distance))
-
-        time_obs.append(next_obs)
-
-        # Update replay buffer and train agent
-        for a in range(env.get_num_agents()):
-            agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-
-            if done[a]:
-                final_obs[a] = agent_obs[a].copy()
-                final_obs_next[a] = agent_next_obs[a].copy()
-                final_action_dict.update({a: action_dict[a]})
-            if not demo and not done[a]:
-                agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
-            score += all_rewards[a]
-
-        agent_obs = agent_next_obs.copy()
-        if done['__all__']:
-            env_done = 1
-            for a in range(env.get_num_agents()):
-                agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
-            break
-    # Epsilon decay
-    eps = max(eps_end, eps_decay * eps)  # decrease epsilon
-
-    done_window.append(env_done)
-    scores_window.append(score)  # save most recent score
-    scores.append(np.mean(scores_window))
-    dones_list.append((np.mean(done_window)))
-
-    print('\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%' +
-          '\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-              env.get_num_agents(),
-              trials,
-              np.mean(scores_window),
-              100 * np.mean(done_window),
-              eps, action_prob / np.sum(action_prob)), end=" ")
-
-    if trials % 100 == 0:
-        print(
-            '\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%' +
-            '\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-                env.get_num_agents(),
-                trials,
-                np.mean(scores_window),
-                100 * np.mean(done_window),
-                eps,
-                action_prob / np.sum(action_prob)))
-        torch.save(agent.qnetwork_local.state_dict(),
-                   '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
-        action_prob = [1] * 4
--- a/flatland/__init__.py
+++ b/flatland/__init__.py
@@ -4,4 +4,4 @@

 __author__ = """S.P. Mohanty"""
 __email__ = 'mohanty@aicrowd.com'
-__version__ = '0.1.1'
+__version__ = '3.0.15'
--- a/flatland/action_plan/__init__.py
+++ b/flatland/action_plan/__init__.py
--- a/flatland/action_plan/action_plan.py
+++ b/flatland/action_plan/action_plan.py
+import pprint
+from typing import Dict, List, Optional, NamedTuple
+
+import numpy as np
+
+from flatland.core.grid.grid_utils import Vec2dOperations as Vec2d
+from flatland.envs.rail_env import RailEnv, RailEnvActions
+from flatland.envs.rail_env_shortest_paths import get_action_for_move
+from flatland.envs.rail_trainrun_data_structures import Waypoint, Trainrun, TrainrunWaypoint
+
+# ---- ActionPlan ---------------
+# an action plan element represents the actions to be taken by an agent at the given time step
+ActionPlanElement = NamedTuple('ActionPlanElement', [
+    ('scheduled_at', int),
+    ('action', RailEnvActions)
+])
+# an action plan gathers all the the actions to be taken by a single agent at the corresponding time steps
+ActionPlan = List[ActionPlanElement]
+
+# An action plan dict gathers all the actions for every agent identified by the dictionary key = agent_handle
+ActionPlanDict = Dict[int, ActionPlan]
+
+
+class ControllerFromTrainruns():
+    """Takes train runs, derives the actions from it and re-acts them."""
+    pp = pprint.PrettyPrinter(indent=4)
+
+    def __init__(self,
+                 env: RailEnv,
+                 trainrun_dict: Dict[int, Trainrun]):
+
+        self.env: RailEnv = env
+        self.trainrun_dict: Dict[int, Trainrun] = trainrun_dict
+        self.action_plan: ActionPlanDict = [self._create_action_plan_for_agent(agent_id, chosen_path)
+                                            for agent_id, chosen_path in trainrun_dict.items()]
+
+    def get_waypoint_before_or_at_step(self, agent_id: int, step: int) -> Waypoint:
+        """
+        Get the way point point from which the current position can be extracted.
+
+        Parameters
+        ----------
+        agent_id
+        step
+
+        Returns
+        -------
+        WalkingElement
+
+        """
+        trainrun = self.trainrun_dict[agent_id]
+        entry_time_step = trainrun[0].scheduled_at
+        # the agent has no position before and at choosing to enter the grid (one tick elapses before the agent enters the grid)
+        if step <= entry_time_step:
+            return Waypoint(position=None, direction=self.env.agents[agent_id].initial_direction)
+
+        # the agent has no position as soon as the target is reached
+        exit_time_step = trainrun[-1].scheduled_at
+        if step >= exit_time_step:
+            # agent loses position as soon as target cell is reached
+            return Waypoint(position=None, direction=trainrun[-1].waypoint.direction)
+
+        waypoint = None
+        for trainrun_waypoint in trainrun:
+            if step < trainrun_waypoint.scheduled_at:
+                return waypoint
+            if step >= trainrun_waypoint.scheduled_at:
+                waypoint = trainrun_waypoint.waypoint
+        assert waypoint is not None
+        return waypoint
+
+    def get_action_at_step(self, agent_id: int, current_step: int) -> Optional[RailEnvActions]:
+        """
+        Get the current action if any is defined in the `ActionPlan`.
+        ASSUMPTION we assume the env has `remove_agents_at_target=True` and `activate_agents=False`!!
+
+        Parameters
+        ----------
+        agent_id
+        current_step
+
+        Returns
+        -------
+        WalkingElement, optional
+
+        """
+        for action_plan_element in self.action_plan[agent_id]:
+            scheduled_at = action_plan_element.scheduled_at
+            if scheduled_at > current_step:
+                return None
+            elif current_step == scheduled_at:
+                return action_plan_element.action
+        return None
+
+    def act(self, current_step: int) -> Dict[int, RailEnvActions]:
+        """
+        Get the action dictionary to be replayed at the current step.
+        Returns only action where required (no action for done agents or those not at the beginning of the cell).
+
+        ASSUMPTION we assume the env has `remove_agents_at_target=True` and `activate_agents=False`!!
+
+        Parameters
+        ----------
+        current_step: int
+
+        Returns
+        -------
+        Dict[int, RailEnvActions]
+
+        """
+        action_dict = {}
+        for agent_id in range(len(self.env.agents)):
+            action: Optional[RailEnvActions] = self.get_action_at_step(agent_id, current_step)
+            if action is not None:
+                action_dict[agent_id] = action
+        return action_dict
+
+    def print_action_plan(self):
+        """Pretty-prints `ActionPlanDict` of this `ControllerFromTrainruns`  to stdout."""
+        self.__class__.print_action_plan_dict(self.action_plan)
+
+    @staticmethod
+    def print_action_plan_dict(action_plan: ActionPlanDict):
+        """Pretty-prints `ActionPlanDict` to stdout."""
+        for agent_id, plan in enumerate(action_plan):
+            print("{}: ".format(agent_id))
+            for step in plan:
+                print("  {}".format(step))
+
+    @staticmethod
+    def assert_actions_plans_equal(expected_action_plan: ActionPlanDict, actual_action_plan: ActionPlanDict):
+        assert len(expected_action_plan) == len(actual_action_plan)
+        for k in range(len(expected_action_plan)):
+            assert len(expected_action_plan[k]) == len(actual_action_plan[k]), \
+                "len for agent {} should be the same.\n\n  expected ({}) = {}\n\n  actual ({}) = {}".format(
+                    k,
+                    len(expected_action_plan[k]),
+                    ControllerFromTrainruns.pp.pformat(expected_action_plan[k]),
+                    len(actual_action_plan[k]),
+                    ControllerFromTrainruns.pp.pformat(actual_action_plan[k]))
+            for i in range(len(expected_action_plan[k])):
+                assert expected_action_plan[k][i] == actual_action_plan[k][i], \
+                    "not the same at agent {} at step {}\n\n  expected = {}\n\n  actual = {}".format(
+                        k, i,
+                        ControllerFromTrainruns.pp.pformat(expected_action_plan[k][i]),
+                        ControllerFromTrainruns.pp.pformat(actual_action_plan[k][i]))
+        assert expected_action_plan == actual_action_plan, \
+            "expected {}, found {}".format(expected_action_plan, actual_action_plan)
+
+    def _create_action_plan_for_agent(self, agent_id, trainrun) -> ActionPlan:
+        action_plan = []
+        agent = self.env.agents[agent_id]
+        minimum_cell_time = agent.speed_counter.max_count + 1
+        for path_loop, trainrun_waypoint in enumerate(trainrun):
+            trainrun_waypoint: TrainrunWaypoint = trainrun_waypoint
+
+            position = trainrun_waypoint.waypoint.position
+
+            if Vec2d.is_equal(agent.target, position):
+                break
+
+            next_trainrun_waypoint: TrainrunWaypoint = trainrun[path_loop + 1]
+            next_position = next_trainrun_waypoint.waypoint.position
+
+            if path_loop == 0:
+                self._add_action_plan_elements_for_first_path_element_of_agent(
+                    action_plan,
+                    trainrun_waypoint,
+                    next_trainrun_waypoint,
+                    minimum_cell_time
+                )
+                continue
+
+            just_before_target = Vec2d.is_equal(agent.target, next_position)
+
+            self._add_action_plan_elements_for_current_path_element(
+                action_plan,
+                minimum_cell_time,
+                trainrun_waypoint,
+                next_trainrun_waypoint)
+
+            # add a final element
+            if just_before_target:
+                self._add_action_plan_elements_for_target_at_path_element_just_before_target(
+                    action_plan,
+                    minimum_cell_time,
+                    trainrun_waypoint,
+                    next_trainrun_waypoint)
+        return action_plan
+
+    def _add_action_plan_elements_for_current_path_element(self,
+                                                           action_plan: ActionPlan,
+                                                           minimum_cell_time: int,
+                                                           trainrun_waypoint: TrainrunWaypoint,
+                                                           next_trainrun_waypoint: TrainrunWaypoint):
+        scheduled_at = trainrun_waypoint.scheduled_at
+        next_entry_value = next_trainrun_waypoint.scheduled_at
+
+        position = trainrun_waypoint.waypoint.position
+        direction = trainrun_waypoint.waypoint.direction
+        next_position = next_trainrun_waypoint.waypoint.position
+        next_direction = next_trainrun_waypoint.waypoint.direction
+        next_action = get_action_for_move(position,
+                                          direction,
+                                          next_position,
+                                          next_direction,
+                                          self.env.rail)
+
+        # if the next entry is later than minimum_cell_time, then stop here and
+        # move minimum_cell_time before the exit
+        # we have to do this since agents in the RailEnv are processed in the step() in the order of their handle
+        if next_entry_value > scheduled_at + minimum_cell_time:
+            action = ActionPlanElement(scheduled_at, RailEnvActions.STOP_MOVING)
+            action_plan.append(action)
+
+            action = ActionPlanElement(next_entry_value - minimum_cell_time, next_action)
+            action_plan.append(action)
+        else:
+            action = ActionPlanElement(scheduled_at, next_action)
+            action_plan.append(action)
+
+    def _add_action_plan_elements_for_target_at_path_element_just_before_target(self,
+                                                                                action_plan: ActionPlan,
+                                                                                minimum_cell_time: int,
+                                                                                trainrun_waypoint: TrainrunWaypoint,
+                                                                                next_trainrun_waypoint: TrainrunWaypoint):
+        scheduled_at = trainrun_waypoint.scheduled_at
+
+        action = ActionPlanElement(scheduled_at + minimum_cell_time, RailEnvActions.STOP_MOVING)
+        action_plan.append(action)
+
+    def _add_action_plan_elements_for_first_path_element_of_agent(self,
+                                                                  action_plan: ActionPlan,
+                                                                  trainrun_waypoint: TrainrunWaypoint,
+                                                                  next_trainrun_waypoint: TrainrunWaypoint,
+                                                                  minimum_cell_time: int):
+        scheduled_at = trainrun_waypoint.scheduled_at
+        position = trainrun_waypoint.waypoint.position
+        direction = trainrun_waypoint.waypoint.direction
+        next_position = next_trainrun_waypoint.waypoint.position
+        next_direction = next_trainrun_waypoint.waypoint.direction
+
+        # add intial do nothing if we do not enter immediately, actually not necessary
+        if scheduled_at > 0:
+            action = ActionPlanElement(0, RailEnvActions.DO_NOTHING)
+            action_plan.append(action)
+
+        # add action to enter the grid
+        action = ActionPlanElement(scheduled_at, RailEnvActions.MOVE_FORWARD)
+        action_plan.append(action)
+
+        next_action = get_action_for_move(position,
+                                          direction,
+                                          next_position,
+                                          next_direction,
+                                          self.env.rail)
+
+        # if the agent is blocked in the cell, we have to call stop upon entering!
+        if next_trainrun_waypoint.scheduled_at > scheduled_at + 1 + minimum_cell_time:
+            action = ActionPlanElement(scheduled_at + 1, RailEnvActions.STOP_MOVING)
+            action_plan.append(action)
+
+        # execute the action exactly minimum_cell_time before the entry into the next cell
+        action = ActionPlanElement(next_trainrun_waypoint.scheduled_at - minimum_cell_time, next_action)
+        action_plan.append(action)
--- a/flatland/action_plan/action_plan_player.py
+++ b/flatland/action_plan/action_plan_player.py
+from typing import Callable
+
+from flatland.action_plan.action_plan import ControllerFromTrainruns
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_trainrun_data_structures import Waypoint
+
+ControllerFromTrainrunsReplayerRenderCallback = Callable[[RailEnv], None]
+
+
+class ControllerFromTrainrunsReplayer():
+    """Allows to verify a `DeterministicController` by replaying it against a FLATland env without malfunction."""
+
+    @staticmethod
+    def replay_verify(ctl: ControllerFromTrainruns, env: RailEnv,
+                      call_back: ControllerFromTrainrunsReplayerRenderCallback = lambda *a, **k: None):
+        """Replays this deterministic `ActionPlan` and verifies whether it is feasible.
+
+        Parameters
+        ----------
+        ctl
+        env
+        call_back
+            Called before/after each step() call. The env is passed to it.
+        """
+        call_back(env)
+        i = 0
+        while not env.dones['__all__'] and i <= env._max_episode_steps:
+            for agent_id, agent in enumerate(env.agents):
+                waypoint: Waypoint = ctl.get_waypoint_before_or_at_step(agent_id, i)
+                assert agent.position == waypoint.position, \
+                    "before {}, agent {} at {}, expected {}".format(i, agent_id, agent.position,
+                                                                    waypoint.position)
+            actions = ctl.act(i)
+
+            obs, all_rewards, done, _ = env.step(actions)
+
+            call_back(env)
+
+            i += 1
--- a/flatland/baselines/Nets/avoid_checkpoint15000.pth
+++ b/flatland/baselines/Nets/avoid_checkpoint15000.pth
--- a/flatland/baselines/dueling_double_dqn.py
+++ b/flatland/baselines/dueling_double_dqn.py
-import copy
-import os
-import random
-from collections import namedtuple, deque, Iterable
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-import torch.optim as optim
-
-from flatland.baselines.model import QNetwork, QNetwork2
-
-BUFFER_SIZE = int(1e5)  # replay buffer size
-BATCH_SIZE = 512  # minibatch size
-GAMMA = 0.99  # discount factor 0.99
-TAU = 1e-3  # for soft update of target parameters
-LR = 0.5e-4  # learning rate 5
-UPDATE_EVERY = 10  # how often to update the network
-double_dqn = True  # If using double dqn algorithm
-input_channels = 5  # Number of Input channels
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-device = torch.device("cpu")
-print(device)
-
-
-class Agent:
-    """Interacts with and learns from the environment."""
-
-    def __init__(self, state_size, action_size, net_type, seed, double_dqn=True, input_channels=5):
-        """Initialize an Agent object.
-
-        Params
-        ======
-            state_size (int): dimension of each state
-            action_size (int): dimension of each action
-            seed (int): random seed
-        """
-        self.state_size = state_size
-        self.action_size = action_size
-        self.seed = random.seed(seed)
-        self.version = net_type
-        self.double_dqn = double_dqn
-        # Q-Network
-        if self.version == "Conv":
-            self.qnetwork_local = QNetwork2(state_size, action_size, seed, input_channels).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-        else:
-            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
-            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
-
-        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
-
-        # Replay memory
-        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        # Initialize time step (for updating every UPDATE_EVERY steps)
-        self.t_step = 0
-
-    def save(self, filename):
-        torch.save(self.qnetwork_local.state_dict(), filename + ".local")
-        torch.save(self.qnetwork_target.state_dict(), filename + ".target")
-
-    def load(self, filename):
-        if os.path.exists(filename + ".local"):
-            self.qnetwork_local.load_state_dict(torch.load(filename + ".local"))
-        if os.path.exists(filename + ".target"):
-            self.qnetwork_target.load_state_dict(torch.load(filename + ".target"))
-
-    def step(self, state, action, reward, next_state, done, train=True):
-        # Save experience in replay memory
-        self.memory.add(state, action, reward, next_state, done)
-
-        # Learn every UPDATE_EVERY time steps.
-        self.t_step = (self.t_step + 1) % UPDATE_EVERY
-        if self.t_step == 0:
-            # If enough samples are available in memory, get random subset and learn
-            if len(self.memory) > BATCH_SIZE:
-                experiences = self.memory.sample()
-                if train:
-                    self.learn(experiences, GAMMA)
-
-    def act(self, state, eps=0.):
-        """Returns actions for given state as per current policy.
-
-        Params
-        ======
-            state (array_like): current state
-            eps (float): epsilon, for epsilon-greedy action selection
-        """
-        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
-        self.qnetwork_local.eval()
-        with torch.no_grad():
-            action_values = self.qnetwork_local(state)
-        self.qnetwork_local.train()
-
-        # Epsilon-greedy action selection
-        if random.random() > eps:
-            return np.argmax(action_values.cpu().data.numpy())
-        else:
-            return random.choice(np.arange(self.action_size))
-
-    def learn(self, experiences, gamma):
-
-        """Update value parameters using given batch of experience tuples.
-
-        Params
-        ======
-            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
-            gamma (float): discount factor
-        """
-        states, actions, rewards, next_states, dones = experiences
-
-        # Get expected Q values from local model
-        Q_expected = self.qnetwork_local(states).gather(1, actions)
-
-        if self.double_dqn:
-            # Double DQN
-            q_best_action = self.qnetwork_local(next_states).max(1)[1]
-            Q_targets_next = self.qnetwork_target(next_states).gather(1, q_best_action.unsqueeze(-1))
-        else:
-            # DQN
-            Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(-1)
-
-            # Compute Q targets for current states
-
-        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
-
-        # Compute loss
-        loss = F.mse_loss(Q_expected, Q_targets)
-        # Minimize the loss
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-
-        # ------------------- update target network ------------------- #
-        self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)
-
-    def soft_update(self, local_model, target_model, tau):
-        """Soft update model parameters.
-        θ_target = τ*θ_local + (1 - τ)*θ_target
-
-        Params
-        ======
-            local_model (PyTorch model): weights will be copied from
-            target_model (PyTorch model): weights will be copied to
-            tau (float): interpolation parameter
-        """
-        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
-            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
-
-
-class ReplayBuffer:
-    """Fixed-size buffer to store experience tuples."""
-
-    def __init__(self, action_size, buffer_size, batch_size, seed):
-        """Initialize a ReplayBuffer object.
-
-        Params
-        ======
-            action_size (int): dimension of each action
-            buffer_size (int): maximum size of buffer
-            batch_size (int): size of each training batch
-            seed (int): random seed
-        """
-        self.action_size = action_size
-        self.memory = deque(maxlen=buffer_size)
-        self.batch_size = batch_size
-        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
-        self.seed = random.seed(seed)
-
-    def add(self, state, action, reward, next_state, done):
-        """Add a new experience to memory."""
-        e = self.experience(np.expand_dims(state, 0), action, reward, np.expand_dims(next_state, 0), done)
-        self.memory.append(e)
-
-    def sample(self):
-        """Randomly sample a batch of experiences from memory."""
-        experiences = random.sample(self.memory, k=self.batch_size)
-
-        states = torch.from_numpy(self.__v_stack_impr([e.state for e in experiences if e is not None])) \
-            .float().to(device)
-        actions = torch.from_numpy(self.__v_stack_impr([e.action for e in experiences if e is not None])) \
-            .long().to(device)
-        rewards = torch.from_numpy(self.__v_stack_impr([e.reward for e in experiences if e is not None])) \
-            .float().to(device)
-        next_states = torch.from_numpy(self.__v_stack_impr([e.next_state for e in experiences if e is not None])) \
-            .float().to(device)
-        dones = torch.from_numpy(self.__v_stack_impr([e.done for e in experiences if e is not None]).astype(np.uint8)) \
-            .float().to(device)
-
-        return (states, actions, rewards, next_states, dones)
-
-    def __len__(self):
-        """Return the current size of internal memory."""
-        return len(self.memory)
-
-    def __v_stack_impr(self, states):
-        sub_dim = len(states[0][0]) if isinstance(states[0], Iterable) else 1
-        np_states = np.reshape(np.array(states), (len(states), sub_dim))
-        return np_states
--- a/flatland/baselines/model.py
+++ b/flatland/baselines/model.py
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class QNetwork(nn.Module):
-    def __init__(self, state_size, action_size, seed, hidsize1=128, hidsize2=128):
-        super(QNetwork, self).__init__()
-
-        self.fc1_val = nn.Linear(state_size, hidsize1)
-        self.fc2_val = nn.Linear(hidsize1, hidsize2)
-        self.fc3_val = nn.Linear(hidsize2, 1)
-
-        self.fc1_adv = nn.Linear(state_size, hidsize1)
-        self.fc2_adv = nn.Linear(hidsize1, hidsize2)
-        self.fc3_adv = nn.Linear(hidsize2, action_size)
-
-    def forward(self, x):
-        val = F.relu(self.fc1_val(x))
-        val = F.relu(self.fc2_val(val))
-        val = self.fc3_val(val)
-
-        # advantage calculation
-        adv = F.relu(self.fc1_adv(x))
-        adv = F.relu(self.fc2_adv(adv))
-        adv = self.fc3_adv(adv)
-        return val + adv - adv.mean()
-
-
-class QNetwork2(nn.Module):
-    def __init__(self, state_size, action_size, seed, input_channels, hidsize1=128, hidsize2=64):
-        super(QNetwork2, self).__init__()
-        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=3, stride=1)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=3)
-        self.bn2 = nn.BatchNorm2d(32)
-        self.conv3 = nn.Conv2d(32, 64, kernel_size=5, stride=3)
-        self.bn3 = nn.BatchNorm2d(64)
-
-        self.fc1_val = nn.Linear(6400, hidsize1)
-        self.fc2_val = nn.Linear(hidsize1, hidsize2)
-        self.fc3_val = nn.Linear(hidsize2, 1)
-
-        self.fc1_adv = nn.Linear(6400, hidsize1)
-        self.fc2_adv = nn.Linear(hidsize1, hidsize2)
-        self.fc3_adv = nn.Linear(hidsize2, action_size)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        x = F.relu(self.conv2(x))
-        x = F.relu(self.conv3(x))
-
-        # value function approximation
-        val = F.relu(self.fc1_val(x.view(x.size(0), -1)))
-        val = F.relu(self.fc2_val(val))
-        val = self.fc3_val(val)
-
-        # advantage calculation
-        adv = F.relu(self.fc1_adv(x.view(x.size(0), -1)))
-        adv = F.relu(self.fc2_adv(adv))
-        adv = self.fc3_adv(adv)
-        return val + adv - adv.mean()
--- a/flatland/cli.py
+++ b/flatland/cli.py
@@ -2,17 +2,110 @@

 """Console script for flatland."""
 import sys
+import time
+
 import click
+import numpy as np
+import redis
+
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.envs.line_generators import sparse_line_generator
+from flatland.evaluators.service import FlatlandRemoteEvaluationService, FLATLAND_RL_SERVICE_ID
+from flatland.utils.rendertools import RenderTool


 @click.command()
-def main(args=None):
-    """Console script for flatland."""
-    click.echo("Replace this message by putting your code into "
-               "flatland.cli.main")
-    click.echo("See click documentation at http://click.pocoo.org/")
+def demo(args=None):
+    """Demo script to check installation"""
+    env = RailEnv(
+        width=30,
+        height=30,
+        rail_generator=sparse_rail_generator(
+            max_num_cities=3,
+            grid_mode=False,
+            max_rails_between_cities=4,
+            max_rail_pairs_in_city=2,
+            seed=0
+        ),
+        line_generator=sparse_line_generator(),
+        number_of_agents=5)
+
+    env._max_episode_steps = int(15 * (env.width + env.height))
+    env_renderer = RenderTool(env)
+
+    obs, info = env.reset()
+    _done = False
+    # Run a single episode here
+    step = 0
+    while not _done:
+        # Compute Action
+        _action = {}
+        for _idx, _ in enumerate(env.agents):
+            _action[_idx] = np.random.randint(0, 5)
+        obs, all_rewards, done, _ = env.step(_action)
+        _done = done['__all__']
+        step += 1
+        env_renderer.render_env(
+            show=True,
+            frames=False,
+            show_observations=False,
+            show_predictions=False
+        )
+        time.sleep(0.1)
+
    return 0


+@click.command()
+@click.option('--tests',
+              type=click.Path(exists=True),
+              help="Path to folder containing Flatland tests",
+              required=True
+              )
+@click.option('--service_id',
+              default=FLATLAND_RL_SERVICE_ID,
+              help="Evaluation Service ID. This has to match the service id on the client.",
+              required=False
+              )
+@click.option('--shuffle',
+              type=bool,
+              default=False,
+              help="Shuffle the environments before starting evaluation.",
+              required=False
+              )
+@click.option('--disable_timeouts',
+              default=False,
+              help="Disable all evaluation timeouts.",
+              required=False
+              )
+@click.option('--results_path',
+              type=click.Path(exists=False),
+              default=None,
+              help="Path where the evaluator should write the results metadata.",
+              required=False
+              )
+def evaluator(tests, service_id, shuffle, disable_timeouts, results_path):
+    try:
+        redis_connection = redis.Redis()
+        redis_connection.ping()
+    except redis.exceptions.ConnectionError as e:
+        raise Exception(
+            "\nRedis server does not seem to be running on your localhost.\n"
+            "Please ensure that you have a redis server running on your localhost"
+        )
+
+    grader = FlatlandRemoteEvaluationService(
+        test_env_folder=tests,
+        flatland_rl_service_id=service_id,
+        visualize=False,
+        result_output_path=results_path,
+        verbose=False,
+        shuffle=shuffle,
+        disable_timeouts=disable_timeouts
+    )
+    grader.run()
+
+
 if __name__ == "__main__":
-    sys.exit(main())  # pragma: no cover
+    sys.exit(demo())  # pragma: no cover
No results found