introduction_flatland_3.py 12.9 KB
Newer Older
1
import numpy as np
2
import os
3

Erik Nygren's avatar
Erik Nygren committed
4
5
6
# In Flatland you can use custom observation builders and predicitors
# Observation builders generate the observation needed by the controller
# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
7
8
from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters, ParamMalfunctionGen

Erik Nygren's avatar
Erik Nygren committed
9
10
11
from flatland.envs.observations import GlobalObsForRailEnv
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
12
from flatland.envs.rail_env import RailEnvActions
Erik Nygren's avatar
Erik Nygren committed
13
from flatland.envs.rail_generators import sparse_rail_generator
14
#from flatland.envs.sparse_rail_gen import SparseRailGen
15
from flatland.envs.line_generators import sparse_line_generator
Erik Nygren's avatar
Erik Nygren committed
16
17
18
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant

19
# This is an introduction example for the Flatland 2.1.* version.
Erik Nygren's avatar
Erik Nygren committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Changes and highlights of this version include
# - Stochastic events (malfunctions)
# - Different travel speeds for differet agents
# - Levels are generated using a novel generator to reflect more realistic railway networks
# - Agents start outside of the environment and enter at their own time
# - Agents leave the environment after they have reached their goal
# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
# Training on simple small tasks is the best way to get familiar with the environment
# We start by importing the necessary rail and schedule generators
# The rail generator will generate the railway infrastructure
# The schedule generator will assign tasks to all the agent within the railway network

# The railway infrastructure can be build using any of the provided generators in env/rail_generators.py
# Here we use the sparse_rail_generator with the following parameters

35
36
width = 16 * 7  # With of map
height = 9 * 7  # Height of map
Erik Nygren's avatar
Erik Nygren committed
37
nr_trains = 50  # Number of trains that have an assigned task in the env
Erik Nygren's avatar
Erik Nygren committed
38
cities_in_map = 20  # Number of cities where agents can start or end
Erik Nygren's avatar
Erik Nygren committed
39
40
41
seed = 14  # Random seed
grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2  # Max number of tracks allowed between cities. This is number of entry point to a city
Erik Nygren's avatar
Erik Nygren committed
42
max_rail_in_cities = 6  # Max number of parallel tracks within a city, representing a realistic trainstation
Erik Nygren's avatar
Erik Nygren committed
43
44
45
46
47

rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
                                       seed=seed,
                                       grid_mode=grid_distribution_of_cities,
                                       max_rails_between_cities=max_rails_between_cities,
48
                                       max_rail_pairs_in_city=max_rail_in_cities,
Erik Nygren's avatar
Erik Nygren committed
49
50
                                       )

51
52
53
54
55
56
57
58
#rail_generator = SparseRailGen(max_num_cities=cities_in_map,
#                                       seed=seed,
#                                       grid_mode=grid_distribution_of_cities,
#                                       max_rails_between_cities=max_rails_between_cities,
#                                       max_rails_in_city=max_rail_in_cities,
#                                       )


Erik Nygren's avatar
Erik Nygren committed
59
60
61
62
63
64
65
66
67
68
69
70
# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
# distribution of speed profiles

# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0.25,  # Fast passenger train
                    1. / 2.: 0.25,  # Fast freight train
                    1. / 3.: 0.25,  # Slow commuter train
                    1. / 4.: 0.25}  # Slow freight train

# We can now initiate the schedule generator with the given speed profiles

71
line_generator = sparse_line_generator(speed_ration_map)
Erik Nygren's avatar
Erik Nygren committed
72
73
74
75

# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
# during an episode.

76
stochastic_data = MalfunctionParameters(malfunction_rate=1/10000,  # Rate of malfunction occurence
Erik Nygren's avatar
Erik Nygren committed
77
78
79
                                        min_duration=15,  # Minimal duration of malfunction
                                        max_duration=50  # Max duration of malfunction
                                        )
Erik Nygren's avatar
Erik Nygren committed
80
81
82
83
84
85
86
# Custom observation builder without predictor
observation_builder = GlobalObsForRailEnv()

# Custom observation builder with predictor, uncomment line below if you want to try this one
# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())

# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
Erik Nygren's avatar
Erik Nygren committed
87
88
89
env = RailEnv(width=width,
              height=height,
              rail_generator=rail_generator,
90
              line_generator=line_generator,
Erik Nygren's avatar
Erik Nygren committed
91
92
              number_of_agents=nr_trains,
              obs_builder_object=observation_builder,
93
94
              #malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
              malfunction_generator=ParamMalfunctionGen(stochastic_data),
95
              remove_agents_at_target=True)
96
env.reset()
Erik Nygren's avatar
Erik Nygren committed
97
98

# Initiate the renderer
99
env_renderer = RenderTool(env,
Erik Nygren's avatar
Erik Nygren committed
100
                          agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
Erik Nygren's avatar
Erik Nygren committed
101
                          show_debug=False,
Erik Nygren's avatar
Erik Nygren committed
102
103
                          screen_height=600,  # Adjust these parameters to fit your resolution
                          screen_width=800)  # Adjust these parameters to fit your resolution
Erik Nygren's avatar
Erik Nygren committed
104
105


106
# The first thing we notice is that some agents don't have feasible paths to their target.
Erik Nygren's avatar
Erik Nygren committed
107
108
109
# We first look at the map we have created

# nv_renderer.render_env(show=True)
Erik Nygren's avatar
Erik Nygren committed
110
# time.sleep(2)
Erik Nygren's avatar
Erik Nygren committed
111
112
113
114
115
116
117
118
119
120
121
122
123
# Import your own Agent or use RLlib to train agents on Flatland
# As an example we use a random agent instead
class RandomAgent:

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

    def act(self, state):
        """
        :param state: input is the observation of the agent
        :return: returns an action
        """
124
125
        return np.random.choice([RailEnvActions.MOVE_FORWARD, RailEnvActions.MOVE_RIGHT, RailEnvActions.MOVE_LEFT,
                                 RailEnvActions.STOP_MOVING])
Erik Nygren's avatar
Erik Nygren committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

    def step(self, memories):
        """
        Step function to improve agent by adjusting policy given the observations

        :param memories: SARS Tuple to be
        :return:
        """
        return

    def save(self, filename):
        # Store the current policy
        return

    def load(self, filename):
        # Load a policy
        return

Erik Nygren's avatar
Erik Nygren committed
144

Erik Nygren's avatar
Erik Nygren committed
145
146
147
148
149
# Initialize the agent with the parameters corresponding to the environment and observation_builder
controller = RandomAgent(218, env.action_space[0])

# We start by looking at the information of each agent
# We can see the task assigned to the agent by looking at
150
print("\n Agents in the environment have to solve the following tasks: \n")
Erik Nygren's avatar
Erik Nygren committed
151
152
153
154
155
156
157
for agent_idx, agent in enumerate(env.agents):
    print(
        "The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format(
            agent_idx, agent.initial_position, agent.direction, agent.target))

# The agent will always have a status indicating if it is currently present in the environment or done or active
# For example we see that agent with index 0 is currently not active
158
159
160
print("\n Their current statuses are:")
print("============================")

Erik Nygren's avatar
Erik Nygren committed
161
162
163
164
165
166
167
168
for agent_idx, agent in enumerate(env.agents):
    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status),
                                                                             str(agent.position)))

# The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level
# If the starting cell is free they will enter the level
# If multiple agents want to enter the same cell at the same time the lower index agent will enter first.

169
# Let's check if there are any agents with the same start location
170
agents_with_same_start = set()
171
print("\n The following agents have the same initial position:")
Erik Nygren's avatar
Erik Nygren committed
172
print("=====================================================")
173
174
175
176
for agent_idx, agent in enumerate(env.agents):
    for agent_2_idx, agent2 in enumerate(env.agents):
        if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position:
            print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx))
177
            agents_with_same_start.add(agent_idx)
178
179

# Lets try to enter with all of these agents at the same time
Erik Nygren's avatar
Erik Nygren committed
180
action_dict = dict()
181
182

for agent_id in agents_with_same_start:
Erik Nygren's avatar
Erik Nygren committed
183
    action_dict[agent_id] = 1  # Try to move with the agents
184
185
186
187
188
189
190
191
192

# Do a step in the environment to see what agents entered:
env.step(action_dict)

# Current state and position of the agents after all agents with same start position tried to move
print("\n This happened when all tried to enter at the same time:")
print("========================================================")
for agent_id in agents_with_same_start:
    print(
Erik Nygren's avatar
Erik Nygren committed
193
        "Agent {} status is: {} with the current position being {}.".format(
194
            agent_id, str(env.agents[agent_id].status),
Erik Nygren's avatar
Erik Nygren committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
            str(env.agents[agent_id].position)))

# As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt
# to start again.

# You will also notice, that the agents move at different speeds once they are on the rail.
# The agents will always move at full speed when moving, never a speed inbetween.
# The fastest an agent can go is 1, meaning that it moves to the next cell at every time step
# All slower speeds indicate the fraction of a cell that is moved at each time step
# Lets look at the current speed data of the agents:

print("\n The speed information of the agents are:")
print("=========================================")

for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} speed is: {:.2f} with the current fractional position being {}".format(
            agent_idx, agent.speed_data['speed'], agent.speed_data['position_fraction']))

# New the agents can also have stochastic malfunctions happening which will lead to them being unable to move
# for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows
print("\n The malfunction data of the agents are:")
print("========================================")

for agent_idx, agent in enumerate(env.agents):
    print(
221
222
        "Agent {} is OK = {}".format(
            agent_idx, agent.malfunction_data['malfunction'] < 1))
Erik Nygren's avatar
Erik Nygren committed
223
224
225
226
227
228
229
230
231
232
233
234

# Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take
# an action at every time step as it will only change the outcome when actions are chosen at cell entry.
# Therefore the environment provides information about what agents need to provide an action in the next step.
# You can access this in the following way.

# Chose an action for each agent
for a in range(env.get_num_agents()):
    action = controller.act(0)
    action_dict.update({a: action})
# Do the environment step
observations, rewards, dones, information = env.step(action_dict)
235
print("\n The following agents can register an action:")
Erik Nygren's avatar
Erik Nygren committed
236
print("========================================")
237
238
for info in information['action_required']:
    print("Agent {} needs to submit an action.".format(info))
Erik Nygren's avatar
Erik Nygren committed
239
240
241
242
243

# We recommend that you monitor the malfunction data and the action required in order to optimize your training
# and controlling code.

# Let us now look at an episode playing out with random actions performed
Erik Nygren's avatar
Erik Nygren committed
244

245
print("\nStart episode...")
Erik Nygren's avatar
Erik Nygren committed
246
247

# Reset the rendering system
Erik Nygren's avatar
Erik Nygren committed
248
249
250
251
252
env_renderer.reset()

# Here you can also further enhance the provided observation by means of normalization
# See training navigation example in the baseline repository

253

Erik Nygren's avatar
Erik Nygren committed
254
255
256
score = 0
# Run episode
frame_step = 0
257

258
259
os.makedirs("tmp/frames", exist_ok=True)

Erik Nygren's avatar
Erik Nygren committed
260
for step in range(500):
Erik Nygren's avatar
Erik Nygren committed
261
262
    # Chose an action for each agent in the environment
    for a in range(env.get_num_agents()):
Erik Nygren's avatar
Erik Nygren committed
263
        action = controller.act(observations[a])
Erik Nygren's avatar
Erik Nygren committed
264
265
266
267
        action_dict.update({a: action})

    # Environment step which returns the observations for all agents, their corresponding
    # reward and whether their are done
268

Erik Nygren's avatar
Erik Nygren committed
269
    next_obs, all_rewards, done, _ = env.step(action_dict)
270

271
    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
272
    env_renderer.gl.save_image('tmp/frames/flatland_frame_{:04d}.png'.format(step))
Erik Nygren's avatar
Erik Nygren committed
273
274
275
    frame_step += 1
    # Update replay buffer and train agent
    for a in range(env.get_num_agents()):
Erik Nygren's avatar
Erik Nygren committed
276
        controller.step((observations[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
Erik Nygren's avatar
Erik Nygren committed
277
278
        score += all_rewards[a]

Erik Nygren's avatar
Erik Nygren committed
279
    observations = next_obs.copy()
Erik Nygren's avatar
Erik Nygren committed
280
281
    if done['__all__']:
        break
Erik Nygren's avatar
Erik Nygren committed
282
    print('Episode: Steps {}\t Score = {}'.format(step, score))