test_flatland_malfunction.py 17.3 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
5
import numpy as np

6
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
7
from flatland.core.grid.grid4 import Grid4TransitionsEnum
8
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
9
10
11
from flatland.envs.rail_env import RailEnv, RailEnvActions
from flatland.envs.rail_generators import complex_rail_generator, sparse_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator, sparse_schedule_generator
12
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
13
14


15
class SingleAgentNavigationObs(ObservationBuilder):
16
    """
17
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
18
19
20
21
22
23
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
24
        super().__init__()
25
26

    def reset(self):
27
        pass
28

29
    def get(self, handle: int = 0) -> List[int]:
30
31
32
33
34
35
36
37
38
39
40
41
42
43
        agent = self.env.agents[handle]

        possible_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
44
                    new_position = get_new_position(agent.position, direction)
u214892's avatar
u214892 committed
45
46
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
47
48
49
50
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
51
            observation[np.argmin(min_distances)] = 1
52
53
54
55
56

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
57
    # Set fixed malfunction duration for this test
58
    stochastic_data = {'prop_malfunction': 1.,
59
                       'malfunction_rate': 1000,
60
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
61
                       'max_duration': 3}
62
63
    np.random.seed(5)

64
65
    env = RailEnv(width=20,
                  height=20,
66
67
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
68
                  schedule_generator=complex_schedule_generator(),
69
70
71
72
73
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)

    obs = env.reset()
Erik Nygren's avatar
Erik Nygren committed
74
75
76
77

    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0

78
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
79
80
    total_down_time = 0
    agent_old_position = env.agents[0].position
81
82
83
84
85
86
    for step in range(100):
        actions = {}
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
87
            # Stop the agent and set it to be malfunctioning
88
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
89
            env.agents[0].malfunction_data['next_malfunction'] = 0
90
91
            agent_halts += 1

92
93
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
94
95
96
97
98
99
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
100
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
101
102
103
104
105
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
106
    # Check that the appropriate number of malfunctions is achieved
107
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 21
Erik Nygren's avatar
Erik Nygren committed
108

Erik Nygren's avatar
Erik Nygren committed
109
    # Check that 20 stops where performed
Erik Nygren's avatar
Erik Nygren committed
110
    assert agent_halts == 20
111

Erik Nygren's avatar
Erik Nygren committed
112
113
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
114
115
116
117
118
119
120
121
122
123


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 2,
                       'min_duration': 3,
                       'max_duration': 3}
    np.random.seed(5)
u214892's avatar
u214892 committed
124
    random.seed(0)
u214892's avatar
u214892 committed
125
126
127
128
129
130
131
132
133
134
135
136
137

    env = RailEnv(width=20,
                  height=20,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)

    env.reset()
    nb_malfunction = 0
    for step in range(100):
138
        action_dict: Dict[int, RailEnvActions] = {}
u214892's avatar
u214892 committed
139
140
141
142
        for agent in env.agents:
            if agent.malfunction_data['malfunction'] > 0:
                nb_malfunction += 1
            # We randomly select an action
143
            action_dict[agent.handle] = RailEnvActions(np.random.randint(4))
u214892's avatar
u214892 committed
144
145
146
147

        env.step(action_dict)

    # check that generation of malfunctions works as expected
148
    assert nb_malfunction == 156, "nb_malfunction={}".format(nb_malfunction)
u214892's avatar
u214892 committed
149
150


151
def test_initial_malfunction():
u214892's avatar
u214892 committed
152
    random.seed(0)
u214892's avatar
u214892 committed
153
154
    np.random.seed(0)

u214892's avatar
u214892 committed
155
156
157
158
159
160
161
162
163
164
165
166
167
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
168
                  rail_generator=sparse_rail_generator(max_num_cities=5,
169
170
                                                       max_rails_between_cities=3,
                                                       seed=215545,
171
                                                       grid_mode=True
u214892's avatar
u214892 committed
172
173
174
175
176
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
177
    set_penalties_for_replay(env)
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
    replay_config = ReplayConfig(
        replay=[
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
                position=(28, 4),
                direction=Grid4TransitionsEnum.WEST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
                position=(27, 4),
                direction=Grid4TransitionsEnum.NORTH,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
        target=env.agents[0].target
    )
    run_replay_config(env, [replay_config])


def test_initial_malfunction_stop_moving():
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
243
                  rail_generator=sparse_rail_generator(max_num_cities=5,
244
245
246
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
247
248
249
250
251
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
252
    set_penalties_for_replay(env)
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    replay_config = ReplayConfig(
        replay=[
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when stopped
            ),
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when stopped
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
                malfunction=1,
                reward=env.step_penalty  # full step penalty while stopped
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
                reward=env.step_penalty  # full step penalty while stopped
            ),
            # we start to move forward --> should go to next cell now
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.start_penalty + env.step_penalty * 1.0  # full step penalty while stopped
            ),
            Replay(
                position=(28, 4),
                direction=Grid4TransitionsEnum.WEST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # full step penalty while stopped
            )
        ],
        speed=env.agents[0].speed_data['speed'],
        target=env.agents[0].target
    )

    run_replay_config(env, [replay_config])
309
310


311
def test_initial_malfunction_do_nothing():
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
328
                  rail_generator=sparse_rail_generator(max_num_cities=5,
329
330
331
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
332
333
334
335
336
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
337
    set_penalties_for_replay(env)
338
339
    replay_config = ReplayConfig(
        replay=[Replay(
340
341
342
            position=(28, 5),
            direction=Grid4TransitionsEnum.EAST,
            action=RailEnvActions.DO_NOTHING,
343
344
345
            set_malfunction=3,
            malfunction=3,
            reward=env.step_penalty  # full step penalty while malfunctioning
346
        ),
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
                reward=env.step_penalty  # full step penalty while malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=1,
                reward=env.step_penalty  # full step penalty while stopped
            ),
            # we haven't started moving yet --> stay here
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
                reward=env.step_penalty  # full step penalty while stopped
            ),
            # we start to move forward --> should go to next cell now
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.start_penalty + env.step_penalty * 1.0  # start penalty + step penalty for speed 1.0
            ),
            Replay(
                position=(28, 4),
                direction=Grid4TransitionsEnum.WEST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # step penalty for speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
        target=env.agents[0].target
    )

    run_replay_config(env, [replay_config])
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 0.5,  # Rate of malfunction occurence
                       'min_duration': 5,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
412
                  rail_generator=sparse_rail_generator(max_num_cities=5,
413
414
415
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
416
417
418
419
420
421
422
423
424
425
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])