import numpy as np import time # In Flatland you can use custom observation builders and predicitors # Observation builders generate the observation needed by the controller # Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network from flatland.envs.observations import GlobalObsForRailEnv # First of all we import the Flatland rail environment from flatland.envs.rail_env import RailEnv from flatland.envs.rail_env import RailEnvActions, RailAgentStatus from flatland.envs.rail_generators import sparse_rail_generator from flatland.envs.schedule_generators import sparse_schedule_generator # We also include a renderer because we want to visualize what is going on in the environment from flatland.utils.rendertools import RenderTool, AgentRenderVariant from flatland.envs.malfunction_generators import malfunction_from_params from libs.graph import BuildGraphFromEnvironment, GraphPathsLocker from libs.graph_agent import GraphAgent, AgentsList import os width = 40 # With of map height = 40 # Height of map nr_trains = 8 # Number of trains that have an assigned task in the env cities_in_map = 5 # Number of cities where agents can start or end seed = 14 # Random seed width = 150 # With of map height = 150 # Height of map nr_trains = 100 # Number of trains that have an assigned task in the env cities_in_map = 100 # Number of cities where agents can start or end seed = 14 # Random seed # width = 26 # With of map # height = 26 # Height of map # nr_trains = 1 # Number of trains that have an assigned task in the env # cities_in_map = 2 # Number of cities where agents can start or end # seed = 14 # Random seed # width = 40 # With of map # height = 40 # Height of map # nr_trains = 5 # Number of trains that have an assigned task in the env # cities_in_map = 5 # Number of cities where agents can start or end # seed = 14 # Random seed # width = 30 # With of map # height = 30 # Height of map # nr_trains = 3 # Number of trains that have an assigned task in the env # cities_in_map = 100 # Number of cities where agents can start or end # seed = 14 # Random seed width = 80 # With of map height = 80 # Height of map nr_trains = 50 # Number of trains that have an assigned task in the env cities_in_map = 100 # Number of cities where agents can start or end seed = 14 # Random seed grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city max_rail_in_cities = 6 # Max number of parallel tracks within a city, representing a realistic trainstation rail_generator = sparse_rail_generator(max_num_cities=cities_in_map, seed=seed, grid_mode=grid_distribution_of_cities, max_rails_between_cities=max_rails_between_cities, max_rails_in_city=max_rail_in_cities, ) # The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent. # The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical # distribution of speed profiles # Different agent types (trains) with different speeds. speed_ration_map = {1.: 0.25, # Fast passenger train 1. / 2.: 0.25, # Fast freight train 1. / 3.: 0.25, # Slow commuter train 1. / 4.: 0.25} # Slow freight train # We can now initiate the schedule generator with the given speed profiles schedule_generator = sparse_schedule_generator(speed_ration_map) # We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions # during an episode. stochastic_data = {'malfunction_rate': 100, # Rate of malfunction occurence of single agent 'prop_malfunction': 0.01, 'min_duration': 15, # Minimal duration of malfunction 'max_duration': 50 # Max duration of malfunction } # Custom observation builder without predictor observation_builder = GlobalObsForRailEnv() # Custom observation builder with predictor, uncomment line below if you want to try this one # observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()) # Construct the enviornment with the given observation, generataors, predictors, and stochastic data env = RailEnv(width=width, height=height, rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=nr_trains, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator obs_builder_object=observation_builder, remove_agents_at_target=True # Removes agents at the end of their journey to make space for others ) env.reset() # Initiate the renderer env_renderer = RenderTool(env, gl="PILSVG", agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX, show_debug=False, screen_height=1920, # Adjust these parameters to fit your resolution screen_width=1080) # Adjust these parameters to fit your resolution # The first thing we notice is that some agents don't have feasible paths to their target. # We first look at the map we have created # nv_renderer.render_env(show=True) timev = time.time() graph = BuildGraphFromEnvironment(env) locker = GraphPathsLocker(env.height, env.width) controllers = [GraphAgent(graph.vs, graph.es, graph.rev_es, graph.calc_distances(agent.target), agent.initial_position, agent.direction, agent.target, locker, env=env, agent_id=i) for i, agent in enumerate(env.agents)] alist = AgentsList(controllers, env.agents, max(5, int(round(0.1*(env.width+env.height)/2)))) print("Time for graph and agents:", time.time()-timev) # We start by looking at the information of each agent # We can see the task assigned to the agent by looking at print("\n Agents in the environment have to solve the following tasks: \n") for agent_idx, agent in enumerate(env.agents): print( "The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format( agent_idx, agent.initial_position, agent.direction, agent.target)) # The agent will always have a status indicating if it is currently present in the environment or done or active # For example we see that agent with index 0 is currently not active print("\n Their current statuses are:") print("============================") for agent_idx, agent in enumerate(env.agents): print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status), str(agent.position))) # The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level # If the starting cell is free they will enter the level # If multiple agents want to enter the same cell at the same time the lower index agent will enter first. # Let's check if there are any agents with the same start location agents_with_same_start = set() print("\n The following agents have the same initial position:") print("=====================================================") for agent_idx, agent in enumerate(env.agents): for agent_2_idx, agent2 in enumerate(env.agents): if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position: print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx)) agents_with_same_start.add(agent_idx) # Lets try to enter with all of these agents at the same time action_dict = dict() # for agent_id in agents_with_same_start: # action_dict[agent_id] = 1 # Try to move with the agents # Do a step in the environment to see what agents entered: # env.step(action_dict) # Current state and position of the agents after all agents with same start position tried to move # print("\n This happened when all tried to enter at the same time:") # print("========================================================") # for agent_id in agents_with_same_start: # print( # "Agent {} status is: {} with the current position being {}.".format( # agent_id, str(env.agents[agent_id].status), # str(env.agents[agent_id].position))) # As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt # to start again. # You will also notice, that the agents move at different speeds once they are on the rail. # The agents will always move at full speed when moving, never a speed inbetween. # The fastest an agent can go is 1, meaning that it moves to the next cell at every time step # All slower speeds indicate the fraction of a cell that is moved at each time step # Lets look at the current speed data of the agents: print("\n The speed information of the agents are:") print("=========================================") for agent_idx, agent in enumerate(env.agents): print( "Agent {} speed is: {:.2f} with the current fractional position being {}".format( agent_idx, agent.speed_data['speed'], agent.speed_data['position_fraction'])) # New the agents can also have stochastic malfunctions happening which will lead to them being unable to move # for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows print("\n The malfunction data of the agents are:") print("========================================") for agent_idx, agent in enumerate(env.agents): print( "Agent {} is OK = {}".format( agent_idx, agent.malfunction_data['malfunction'] < 1)) # Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take # an action at every time step as it will only change the outcome when actions are chosen at cell entry. # Therefore the environment provides information about what agents need to provide an action in the next step. # You can access this in the following way. # Chose an action for each agent # for a in range(env.get_num_agents()): # action = controller.act(0) # action_dict.update({a: action}) # for i, a in enumerate(env.agents): # action = controllers[i].act(a) # action_dict.update({i: action}) # Do the environment step observations, rewards, dones, information = env.step(action_dict) print("\n The following agents can register an action:") print("========================================") for info in information['action_required']: print("Agent {} needs to submit an action.".format(info)) # We recommend that you monitor the malfunction data and the action required in order to optimize your training # and controlling code. # Let us now look at an episode playing out with random actions performed print("\nStart episode...") # Reset the rendering system env_renderer.reset() # Here you can also further enhance the provided observation by means of normalization # See training navigation example in the baseline repository score = 0 # Run episode frame_step = 0 # for step in range(500): step = 0 while True: step += 1 # Chose an action for each agent in the environment # for a in range(env.get_num_agents()): # action = controller.act(observations[a]) # action_dict.update({a: action}) # for i, a in enumerate(env.agents): for i in alist.active(): a = env.agents[i] if (a.speed_data['position_fraction']==0.0): action = controllers[i].act(a) action_dict.update({i: action}) # env.agents[a].position = env.agents[a].target # Environment step which returns the observations for all agents, their corresponding # reward and whether their are done next_obs, all_rewards, done, _ = env.step(action_dict) # env_renderer.render_env(show=True, show_observations=False, show_predictions=False) env_renderer.render_env(show=True, show_observations=True, show_predictions=True) # os.makedirs('./misc/Fames2/', exist_ok=True) # env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step)) frame_step += 1 score += np.sum(list(all_rewards.values())) # # observations = next_obs.copy() if done['__all__']: break finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents]) print('Episode: Steps {}\t Score = {}\t Finished = {}\t Not started = {}'.format(step, score, finished, alist.not_started())) finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents]) print(f'Trains finished {finished}/{len(env.agents)} = {finished*100/len(env.agents):.2f}%')