From 098e4652e7bfe864347908dbacf937ef76b8b11d Mon Sep 17 00:00:00 2001 From: MLErik <baerenjesus@gmail.com> Date: Wed, 9 Oct 2019 12:27:53 -0400 Subject: [PATCH] updated introduction file --- examples/introduction_flatland_2_1.py | 134 ----------------- examples/introduction_flatland_2_1_1.py | 192 ++++++++++++++++++++++++ flatland/envs/rail_env.py | 2 +- 3 files changed, 193 insertions(+), 135 deletions(-) delete mode 100644 examples/introduction_flatland_2_1.py create mode 100644 examples/introduction_flatland_2_1_1.py diff --git a/examples/introduction_flatland_2_1.py b/examples/introduction_flatland_2_1.py deleted file mode 100644 index 5ece03e9..00000000 --- a/examples/introduction_flatland_2_1.py +++ /dev/null @@ -1,134 +0,0 @@ -import time - -import numpy as np - -from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv -from flatland.envs.predictions import ShortestPathPredictorForRailEnv -from flatland.envs.rail_env import RailEnv -from flatland.envs.rail_generators import sparse_rail_generator -from flatland.envs.schedule_generators import sparse_schedule_generator -from flatland.utils.rendertools import RenderTool, AgentRenderVariant - -np.random.seed(1) - -# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks -# Training on simple small tasks is the best way to get familiar with the environment - -# Use a the malfunction generator to break agents from time to time -stochastic_data = {'prop_malfunction': 0.3, # Percentage of defective agents - 'malfunction_rate': 30, # Rate of malfunction occurence - 'min_duration': 3, # Minimal duration of malfunction - 'max_duration': 20 # Max duration of malfunction - } - -# Custom observation builder -TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()) - -# Different agent types (trains) with different speeds. -speed_ration_map = {1.: 0.25, # Fast passenger train - 1. / 2.: 0.25, # Fast freight train - 1. / 3.: 0.25, # Slow commuter train - 1. / 4.: 0.25} # Slow freight train - -env = RailEnv(width=100, - height=100, - rail_generator=sparse_rail_generator(max_num_cities=30, - # Number of cities in map (where train stations are) - seed=14, # Random seed - grid_mode=False, - max_rails_between_cities=2, - max_rails_in_city=8, - ), - schedule_generator=sparse_schedule_generator(speed_ration_map), - number_of_agents=100, - stochastic_data=stochastic_data, # Malfunction data generator - obs_builder_object=GlobalObsForRailEnv(), - remove_agents_at_target=True - ) - -# RailEnv.DEPOT_POSITION = lambda agent, agent_handle : (agent_handle % env.height,0) - -env_renderer = RenderTool(env, gl="PILSVG", - agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX, - show_debug=True, - screen_height=1000, - screen_width=1000) - - -# Import your own Agent or use RLlib to train agents on Flatland -# As an example we use a random agent instead -class RandomAgent: - - def __init__(self, state_size, action_size): - self.state_size = state_size - self.action_size = action_size - - def act(self, state): - """ - :param state: input is the observation of the agent - :return: returns an action - """ - return 2 # np.random.choice(np.arange(self.action_size)) - - def step(self, memories): - """ - Step function to improve agent by adjusting policy given the observations - - :param memories: SARS Tuple to be - :return: - """ - return - - def save(self, filename): - # Store the current policy - return - - def load(self, filename): - # Load a policy - return - - -# Initialize the agent with the parameters corresponding to the environment and observation_builder -# Set action space to 4 to remove stop action -agent = RandomAgent(218, 4) - -# Empty dictionary for all agent action -action_dict = dict() - -print("Start episode...") -# Reset environment and get initial observations for all agents -start_reset = time.time() -obs, info = env.reset() -end_reset = time.time() -print(end_reset - start_reset) -print(env.get_num_agents(), ) -# Reset the rendering sytem -env_renderer.reset() - -# Here you can also further enhance the provided observation by means of normalization -# See training navigation example in the baseline repository - -score = 0 -# Run episode -frame_step = 0 -for step in range(500): - # Chose an action for each agent in the environment - for a in range(env.get_num_agents()): - action = agent.act(obs[a]) - action_dict.update({a: action}) - - # Environment step which returns the observations for all agents, their corresponding - # reward and whether their are done - next_obs, all_rewards, done, _ = env.step(action_dict) - env_renderer.render_env(show=True, show_observations=False, show_predictions=False) - frame_step += 1 - # Update replay buffer and train agent - for a in range(env.get_num_agents()): - agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])) - score += all_rewards[a] - - obs = next_obs.copy() - if done['__all__']: - break - -print('Episode: Steps {}\t Score = {}'.format(step, score)) diff --git a/examples/introduction_flatland_2_1_1.py b/examples/introduction_flatland_2_1_1.py new file mode 100644 index 00000000..ae7a4082 --- /dev/null +++ b/examples/introduction_flatland_2_1_1.py @@ -0,0 +1,192 @@ +import time + +# In Flatland you can use custom observation builders and predicitors +# Observation builders generate the observation needed by the controller +# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network +from flatland.envs.observations import GlobalObsForRailEnv +# First of all we import the Flatland rail environment +from flatland.envs.rail_env import RailEnv +from flatland.envs.rail_generators import sparse_rail_generator +from flatland.envs.schedule_generators import sparse_schedule_generator +# We also include a renderer because we want to visualize what is going on in the environment +from flatland.utils.rendertools import RenderTool, AgentRenderVariant + +# This is an introduction example for the Flatland 2.1.1 version. +# Changes and highlights of this version include +# - Stochastic events (malfunctions) +# - Different travel speeds for differet agents +# - Levels are generated using a novel generator to reflect more realistic railway networks +# - Agents start outside of the environment and enter at their own time +# - Agents leave the environment after they have reached their goal +# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks +# Training on simple small tasks is the best way to get familiar with the environment +# We start by importing the necessary rail and schedule generators +# The rail generator will generate the railway infrastructure +# The schedule generator will assign tasks to all the agent within the railway network + +# The railway infrastructure can be build using any of the provided generators in env/rail_generators.py +# Here we use the sparse_rail_generator with the following parameters + +width = 100 # With of map +height = 100 # Height of ap +nr_trains = 10 # Number of trains that have an assigned task in the env +cities_in_map = 20 # Number of cities where agents can start or end +seed = 14 # Random seed +grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed +max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city +max_rail_in_cities = 6 # Max number of parallel tracks within a city, representing a realistic trainstation + +rail_generator = sparse_rail_generator(max_num_cities=cities_in_map, + seed=seed, + grid_mode=grid_distribution_of_cities, + max_rails_between_cities=max_rails_between_cities, + max_rails_in_city=max_rail_in_cities, + ) + +# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent. +# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical +# distribution of speed profiles + +# Different agent types (trains) with different speeds. +speed_ration_map = {1.: 0.25, # Fast passenger train + 1. / 2.: 0.25, # Fast freight train + 1. / 3.: 0.25, # Slow commuter train + 1. / 4.: 0.25} # Slow freight train + +# We can now initiate the schedule generator with the given speed profiles + +schedule_generator = sparse_schedule_generator(speed_ration_map) + +# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions +# during an episode. + +stochastic_data = {'prop_malfunction': 0.3, # Percentage of defective agents + 'malfunction_rate': 30, # Rate of malfunction occurence + 'min_duration': 3, # Minimal duration of malfunction + 'max_duration': 20 # Max duration of malfunction + } + +# Custom observation builder without predictor +observation_builder = GlobalObsForRailEnv() + +# Custom observation builder with predictor, uncomment line below if you want to try this one +# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()) + +# Construct the enviornment with the given observation, generataors, predictors, and stochastic data +env = RailEnv(width=width, + height=height, + rail_generator=rail_generator, + schedule_generator=schedule_generator, + number_of_agents=nr_trains, + stochastic_data=stochastic_data, # Malfunction data generator + obs_builder_object=observation_builder, + remove_agents_at_target=True # Removes agents at the end of their journey to make space for others + ) + +# Initiate the renderer +env_renderer = RenderTool(env, gl="PILSVG", + agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX, + show_debug=False, + screen_height=1000, # Adjust these parameters to fit your resolution + screen_width=1000) # Adjust these parameters to fit your resolution + + +# We first look at the map we have created + +# nv_renderer.render_env(show=True) +#time.sleep(2) +# Import your own Agent or use RLlib to train agents on Flatland +# As an example we use a random agent instead +class RandomAgent: + + def __init__(self, state_size, action_size): + self.state_size = state_size + self.action_size = action_size + + def act(self, state): + """ + :param state: input is the observation of the agent + :return: returns an action + """ + return 2 # np.random.choice(np.arange(self.action_size)) + + def step(self, memories): + """ + Step function to improve agent by adjusting policy given the observations + + :param memories: SARS Tuple to be + :return: + """ + return + + def save(self, filename): + # Store the current policy + return + + def load(self, filename): + # Load a policy + return + +# Initialize the agent with the parameters corresponding to the environment and observation_builder +controller = RandomAgent(218, env.action_space[0]) + +# We start by looking at the information of each agent +# We can see the task assigned to the agent by looking at +print("Agents in the environment have to solve the following tasks: \n") +for agent_idx, agent in enumerate(env.agents): + print( + "The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format( + agent_idx, agent.initial_position, agent.direction, agent.target)) + +# The agent will always have a status indicating if it is currently present in the environment or done or active +# For example we see that agent with index 0 is currently not active +print("Their current statuses are: \n") +for agent_idx, agent in enumerate(env.agents): + print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status), + str(agent.position))) + +# The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level +# If the starting cell is free they will enter the level +# If multiple agents want to enter the same cell at the same time the lower index agent will enter first. + + +# Empty dictionary for all agent action +action_dict = dict() + +print("Start episode...") +# Reset environment and get initial observations for all agents +start_reset = time.time() +obs, info = env.reset() +end_reset = time.time() +print(end_reset - start_reset) +print(env.get_num_agents(), ) +# Reset the rendering sytem +env_renderer.reset() + +# Here you can also further enhance the provided observation by means of normalization +# See training navigation example in the baseline repository + +score = 0 +# Run episode +frame_step = 0 +for step in range(500): + # Chose an action for each agent in the environment + for a in range(env.get_num_agents()): + action = controller.act(obs[a]) + action_dict.update({a: action}) + + # Environment step which returns the observations for all agents, their corresponding + # reward and whether their are done + next_obs, all_rewards, done, _ = env.step(action_dict) + env_renderer.render_env(show=True, show_observations=False, show_predictions=False) + frame_step += 1 + # Update replay buffer and train agent + for a in range(env.get_num_agents()): + controller.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])) + score += all_rewards[a] + + obs = next_obs.copy() + if done['__all__']: + break + +print('Episode: Steps {}\t Score = {}'.format(step, score)) diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 86987a56..eec88742 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -186,7 +186,7 @@ class RailEnv(Environment): self.num_resets = 0 self.distance_map = DistanceMap(self.agents, self.height, self.width) - self.action_space = [1] + self.action_space = [5] self._seed() -- GitLab