test_flatland_malfunction.py 18.8 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
5
import numpy as np

6
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
7
from flatland.core.grid.grid4 import Grid4TransitionsEnum
8
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
9
from flatland.envs.agent_utils import RailAgentStatus
u214892's avatar
u214892 committed
10
11
12
from flatland.envs.rail_env import RailEnv, RailEnvActions
from flatland.envs.rail_generators import complex_rail_generator, sparse_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator, sparse_schedule_generator
13
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
14
15


16
class SingleAgentNavigationObs(ObservationBuilder):
17
    """
18
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
19
20
21
22
23
24
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
25
        super().__init__()
26
27

    def reset(self):
28
        pass
29

30
    def get(self, handle: int = 0) -> List[int]:
31
32
        agent = self.env.agents[handle]

u214892's avatar
u214892 committed
33
34
35
36
37
38
39
40
41
42
        if agent.status == RailAgentStatus.READY_TO_DEPART:
            _agent_initial_position = agent.initial_position
        elif agent.status == RailAgentStatus.ACTIVE:
            _agent_initial_position = agent.position
        elif agent.status == RailAgentStatus.DONE:
            _agent_initial_position = agent.target
        else:
            return None

        possible_transitions = self.env.rail.get_transitions(*_agent_initial_position, agent.direction)
43
44
45
46
47
48
49
50
51
52
53
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
u214892's avatar
u214892 committed
54
                    new_position = get_new_position(_agent_initial_position, direction)
u214892's avatar
u214892 committed
55
56
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
57
58
59
60
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
61
            observation[np.argmin(min_distances)] = 1
62
63
64
65
66

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
67
    # Set fixed malfunction duration for this test
68
    stochastic_data = {'prop_malfunction': 1.,
69
                       'malfunction_rate': 1000,
70
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
71
                       'max_duration': 3}
72
73
    np.random.seed(5)

74
75
    env = RailEnv(width=20,
                  height=20,
76
77
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
78
                  schedule_generator=complex_schedule_generator(),
79
80
81
82
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)

u214892's avatar
u214892 committed
83
    obs = env.reset(False, False, True)
Erik Nygren's avatar
Erik Nygren committed
84
85
86

    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0
u214892's avatar
u214892 committed
87
88
    for agent in env.agents:
        agent.status = RailAgentStatus.ACTIVE
Erik Nygren's avatar
Erik Nygren committed
89

90
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
91
92
    total_down_time = 0
    agent_old_position = env.agents[0].position
93
94
    for step in range(100):
        actions = {}
u214892's avatar
u214892 committed
95

96
97
98
99
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
100
            # Stop the agent and set it to be malfunctioning
101
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
102
            env.agents[0].malfunction_data['next_malfunction'] = 0
103
104
            agent_halts += 1

105
106
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
107
108
109
110
111
112
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
113
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
114
115
116
117
118
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
119
    # Check that the appropriate number of malfunctions is achieved
u214892's avatar
u214892 committed
120
121
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 21, "Actual {}".format(
        env.agents[0].malfunction_data['nr_malfunctions'])
Erik Nygren's avatar
Erik Nygren committed
122

Erik Nygren's avatar
Erik Nygren committed
123
    # Check that 20 stops where performed
Erik Nygren's avatar
Erik Nygren committed
124
    assert agent_halts == 20
125

Erik Nygren's avatar
Erik Nygren committed
126
127
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 2,
                       'min_duration': 3,
                       'max_duration': 3}

    env = RailEnv(width=20,
                  height=20,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)
u214892's avatar
u214892 committed
146
147
148
    np.random.seed(5)
    random.seed(0)
    env.reset(False, False, True)
u214892's avatar
u214892 committed
149
150
    nb_malfunction = 0
    for step in range(100):
151
        action_dict: Dict[int, RailEnvActions] = {}
u214892's avatar
u214892 committed
152
153
154
155
        for agent in env.agents:
            if agent.malfunction_data['malfunction'] > 0:
                nb_malfunction += 1
            # We randomly select an action
156
            action_dict[agent.handle] = RailEnvActions(np.random.randint(4))
u214892's avatar
u214892 committed
157
158
159
160

        env.step(action_dict)

    # check that generation of malfunctions works as expected
161
    assert nb_malfunction == 128, "nb_malfunction={}".format(nb_malfunction)
u214892's avatar
u214892 committed
162
163


164
def test_initial_malfunction():
u214892's avatar
u214892 committed
165
166
167
168
169
170
171
172
173
174
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train
u214892's avatar
u214892 committed
175
176
    np.random.seed(5)
    random.seed(0)
u214892's avatar
u214892 committed
177
178
    env = RailEnv(width=25,
                  height=30,
179
                  rail_generator=sparse_rail_generator(max_num_cities=5,
180
181
                                                       max_rails_between_cities=3,
                                                       seed=215545,
182
                                                       grid_mode=True
u214892's avatar
u214892 committed
183
184
185
186
187
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
188
    set_penalties_for_replay(env)
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    replay_config = ReplayConfig(
        replay=[
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
217
218
                position=(28, 6),
                direction=Grid4TransitionsEnum.EAST,
219
220
221
222
223
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
224
                position=(27, 6),
225
226
227
228
229
230
231
                direction=Grid4TransitionsEnum.NORTH,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
232
233
234
        target=env.agents[0].target,
        initial_position=(28, 5),
        initial_direction=Grid4TransitionsEnum.EAST,
235
    )
u214892's avatar
u214892 committed
236

237
238
239
240
    run_replay_config(env, [replay_config])


def test_initial_malfunction_stop_moving():
241
242
243
244
245
246
247
248
249
250
251
252
253
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
254
                  rail_generator=sparse_rail_generator(max_num_cities=5,
255
256
257
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
258
259
260
261
262
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
263
    set_penalties_for_replay(env)
264
265
266
    replay_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
267
                position=None,
268
                direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
269
                action=RailEnvActions.MOVE_FORWARD,
270
271
                set_malfunction=3,
                malfunction=3,
u214892's avatar
u214892 committed
272
273
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.READY_TO_DEPART
274
275
276
277
278
279
            ),
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
u214892's avatar
u214892 committed
280
281
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.ACTIVE
282
283
284
285
286
287
288
289
290
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
                malfunction=1,
u214892's avatar
u214892 committed
291
292
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
293
294
295
296
297
298
299
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
u214892's avatar
u214892 committed
300
301
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
302
303
304
305
306
307
308
            ),
            # we start to move forward --> should go to next cell now
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
309
310
                reward=env.start_penalty + env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
311
312
            ),
            Replay(
313
314
                position=(28, 6),
                direction=Grid4TransitionsEnum.EAST,
315
316
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
317
318
                reward=env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
319
320
321
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
322
323
324
        target=env.agents[0].target,
        initial_position=(28, 5),
        initial_direction=Grid4TransitionsEnum.EAST,
325
    )
u214892's avatar
u214892 committed
326
    run_replay_config(env, [replay_config], activate_agents=False)
327
328


329
def test_initial_malfunction_do_nothing():
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
346
                  rail_generator=sparse_rail_generator(max_num_cities=5,
347
348
349
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
350
351
352
353
354
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
355
    set_penalties_for_replay(env)
356
    replay_config = ReplayConfig(
u214892's avatar
u214892 committed
357
358
359
360
361
362
363
364
365
366
        replay=[
            Replay(
                position=None,
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.READY_TO_DEPART
            ),
367
368
369
370
371
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
u214892's avatar
u214892 committed
372
373
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.ACTIVE
374
375
376
377
378
379
380
381
382
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=1,
u214892's avatar
u214892 committed
383
384
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
385
386
387
388
389
390
391
            ),
            # we haven't started moving yet --> stay here
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
u214892's avatar
u214892 committed
392
393
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
394
395
396
397
398
399
400
            ),
            # we start to move forward --> should go to next cell now
            Replay(
                position=(28, 5),
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
401
402
                reward=env.start_penalty + env.step_penalty * 1.0,  # start penalty + step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
403
404
            ),
            Replay(
405
406
                position=(28, 6),
                direction=Grid4TransitionsEnum.EAST,
407
408
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
409
410
                reward=env.step_penalty * 1.0,  # step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
411
412
413
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
414
415
416
        target=env.agents[0].target,
        initial_position=(28, 5),
        initial_direction=Grid4TransitionsEnum.EAST,
417
418
    )

u214892's avatar
u214892 committed
419
    run_replay_config(env, [replay_config], activate_agents=False)
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 0.5,  # Rate of malfunction occurence
                       'min_duration': 5,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
439
                  rail_generator=sparse_rail_generator(max_num_cities=5,
440
441
442
                                                       max_rails_between_cities=3,
                                                       seed=215545,
                                                       grid_mode=True
443
444
445
446
447
448
449
450
451
452
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])