test_flatland_malfunction.py 20.4 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
import numpy as np
5
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
6

7
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
8
from flatland.core.grid.grid4 import Grid4TransitionsEnum
9
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
10
from flatland.envs.agent_utils import RailAgentStatus
u214892's avatar
u214892 committed
11
from flatland.envs.rail_env import RailEnv, RailEnvActions
12
13
14
from flatland.envs.rail_generators import rail_from_grid_transition_map
from flatland.envs.schedule_generators import random_schedule_generator
from flatland.utils.simple_rail import make_simple_rail2
15
16


17
class SingleAgentNavigationObs(ObservationBuilder):
18
    """
19
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
20
21
22
23
24
25
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
26
        super().__init__()
27
28

    def reset(self):
29
        pass
30

31
    def get(self, handle: int = 0) -> List[int]:
32
33
        agent = self.env.agents[handle]

u214892's avatar
u214892 committed
34
        if agent.status == RailAgentStatus.READY_TO_DEPART:
u214892's avatar
u214892 committed
35
            agent_virtual_position = agent.initial_position
u214892's avatar
u214892 committed
36
        elif agent.status == RailAgentStatus.ACTIVE:
u214892's avatar
u214892 committed
37
            agent_virtual_position = agent.position
u214892's avatar
u214892 committed
38
        elif agent.status == RailAgentStatus.DONE:
u214892's avatar
u214892 committed
39
            agent_virtual_position = agent.target
u214892's avatar
u214892 committed
40
41
42
        else:
            return None

u214892's avatar
u214892 committed
43
        possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
44
45
46
47
48
49
50
51
52
53
54
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
u214892's avatar
u214892 committed
55
                    new_position = get_new_position(agent_virtual_position, direction)
u214892's avatar
u214892 committed
56
57
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
58
59
60
61
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
62
            observation[np.argmin(min_distances)] = 1
63
64
65
66
67

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
68
    # Set fixed malfunction duration for this test
69
    stochastic_data = {'prop_malfunction': 1.,
70
                       'malfunction_rate': 1000,
71
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
72
                       'max_duration': 3}
73
74
75
76
77
78
79
80
81
82
83
84

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
    # reset to initialize agents_static
85
<<<<<<< HEAD
Erik Nygren's avatar
Erik Nygren committed
86
    obs, info = env.reset(False, False, True, random_seed=10)
87
88
89
=======
    obs, info = env.reset(False, False, True, random_seed=0)
>>>>>>> fixed first tests in malfunction test
90
    print(env.agents[0].malfunction_data)
Erik Nygren's avatar
Erik Nygren committed
91
92
    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0
u214892's avatar
u214892 committed
93
94
    for agent in env.agents:
        agent.status = RailAgentStatus.ACTIVE
Erik Nygren's avatar
Erik Nygren committed
95

96
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
97
98
    total_down_time = 0
    agent_old_position = env.agents[0].position
99
100
101

    # Move target to unreachable position in order to not interfere with test
    env.agents[0].target = (0, 0)
102
103
    for step in range(100):
        actions = {}
u214892's avatar
u214892 committed
104

105
106
107
108
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
109
            # Stop the agent and set it to be malfunctioning
110
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
111
            env.agents[0].malfunction_data['next_malfunction'] = 0
112
113
            agent_halts += 1

114
115
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
116
117
118
119
120
121
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
122
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
123
124
125
126
127
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
128
    # Check that the appropriate number of malfunctions is achieved
Erik Nygren's avatar
Erik Nygren committed
129
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 21, "Actual {}".format(
u214892's avatar
u214892 committed
130
        env.agents[0].malfunction_data['nr_malfunctions'])
Erik Nygren's avatar
Erik Nygren committed
131

Erik Nygren's avatar
Erik Nygren committed
132
    # Check that 20 stops where performed
Erik Nygren's avatar
Erik Nygren committed
133
    assert agent_halts == 20
134

Erik Nygren's avatar
Erik Nygren committed
135
136
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
137
138
139
140
141
142
143
144
145
146


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 2,
                       'min_duration': 3,
                       'max_duration': 3}

147
148
149
150
151
152
153
154
155
156
157
    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
    # reset to initialize agents_static
158
<<<<<<< HEAD
Erik Nygren's avatar
Erik Nygren committed
159
    env.reset(True, True, False, random_seed=10)
160
161
162
=======
    env.reset(False, False, False, random_seed=0)
>>>>>>> fixed first tests in malfunction test
163

Erik Nygren's avatar
Erik Nygren committed
164
    env.agents[0].target = (0, 0)
u214892's avatar
u214892 committed
165
    nb_malfunction = 0
Erik Nygren's avatar
Erik Nygren committed
166
    for step in range(20):
167
        action_dict: Dict[int, RailEnvActions] = {}
u214892's avatar
u214892 committed
168
169
        for agent in env.agents:
            # We randomly select an action
170
            action_dict[agent.handle] = RailEnvActions(np.random.randint(4))
u214892's avatar
u214892 committed
171
172
173

        env.step(action_dict)
    # check that generation of malfunctions works as expected
Erik Nygren's avatar
Erik Nygren committed
174
    assert env.agents[0].malfunction_data["nr_malfunctions"] == 5
u214892's avatar
u214892 committed
175
176


177
def test_malfunction_before_entry():
Erik Nygren's avatar
Erik Nygren committed
178
    """Tests that malfunctions are produced by stochastic_data!"""
179
180
181
182
183
184
185
186
187
188
189
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 2,
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
Erik Nygren's avatar
Erik Nygren committed
190
191
192
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=10,
                  random_seed=1,
193
194
195
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
196
    env.reset(False, False, False, random_seed=10)
197
    env.agents[0].target = (0, 0)
Erik Nygren's avatar
Erik Nygren committed
198
199
200
201
202
203
204
205
206
207
    assert env.agents[1].malfunction_data['malfunction'] == 11
    assert env.agents[2].malfunction_data['malfunction'] == 11
    assert env.agents[3].malfunction_data['malfunction'] == 11
    assert env.agents[4].malfunction_data['malfunction'] == 11
    assert env.agents[5].malfunction_data['malfunction'] == 0
    assert env.agents[6].malfunction_data['malfunction'] == 11
    assert env.agents[7].malfunction_data['malfunction'] == 11
    assert env.agents[8].malfunction_data['malfunction'] == 11
    assert env.agents[9].malfunction_data['malfunction'] == 0

208
209
210
211
    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
Erik Nygren's avatar
Erik Nygren committed
212
            action_dict[agent.handle] = RailEnvActions(2)
213
214
215
216
            if step < 10:
                action_dict[agent.handle] = RailEnvActions(0)

        env.step(action_dict)
Erik Nygren's avatar
Erik Nygren committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
    assert env.agents[1].malfunction_data['malfunction'] == 1
    assert env.agents[2].malfunction_data['malfunction'] == 1
    assert env.agents[3].malfunction_data['malfunction'] == 1
    assert env.agents[4].malfunction_data['malfunction'] == 1
    assert env.agents[5].malfunction_data['malfunction'] == 2
    assert env.agents[6].malfunction_data['malfunction'] == 1
    assert env.agents[7].malfunction_data['malfunction'] == 1
    assert env.agents[8].malfunction_data['malfunction'] == 1
    assert env.agents[9].malfunction_data['malfunction'] == 3

    # Print for test generation
    # for a in range(env.get_num_agents()):
    #    print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a,
    #                                                                               env.agents[a].malfunction_data[
    #                                                                                   'malfunction']))
232
233


234
def test_initial_malfunction():
235

u214892's avatar
u214892 committed
236
237
238
239
240
241
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

242
243
    rail, rail_map = make_simple_rail2()

u214892's avatar
u214892 committed
244
245
    env = RailEnv(width=25,
                  height=30,
246
247
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
u214892's avatar
u214892 committed
248
249
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
250
                  obs_builder_object=SingleAgentNavigationObs()
u214892's avatar
u214892 committed
251
                  )
252
253

    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
254
255
    env.reset(False, False, True, random_seed=10)
    env.agents[0].target = (0, 5)
256
    set_penalties_for_replay(env)
257
258
259
    replay_config = ReplayConfig(
        replay=[
            Replay(
260
                position=(3, 2),
261
262
263
264
265
266
267
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
268
                position=(3, 2),
269
270
271
272
273
274
275
276
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
277
                position=(3, 2),
278
279
280
281
282
283
284
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
285
                position=(3, 3),
286
                direction=Grid4TransitionsEnum.EAST,
287
288
289
290
291
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
292
293
                position=(3, 4),
                direction=Grid4TransitionsEnum.EAST,
294
295
296
297
298
299
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
300
        target=env.agents[0].target,
301
        initial_position=(3, 2),
u214892's avatar
u214892 committed
302
        initial_direction=Grid4TransitionsEnum.EAST,
303
    )
304
    run_replay_config(env, [replay_config])
305
306
307


def test_initial_malfunction_stop_moving():
308
309
310
311
312
313
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

314
    rail, rail_map = make_simple_rail2()
315
316
317

    env = RailEnv(width=25,
                  height=30,
318
319
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
320
321
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
322
                  obs_builder_object=SingleAgentNavigationObs()
323
                  )
324
325
326
327
    # reset to initialize agents_static

    print(env.agents[0].initial_position, env.agents[0].direction, env.agents[0].position, env.agents[0].status)

328
    set_penalties_for_replay(env)
329
330
331
    replay_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
332
                position=None,
333
                direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
334
                action=RailEnvActions.MOVE_FORWARD,
335
336
                set_malfunction=3,
                malfunction=3,
u214892's avatar
u214892 committed
337
338
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.READY_TO_DEPART
339
340
            ),
            Replay(
341
                position=(3, 2),
342
343
344
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
u214892's avatar
u214892 committed
345
346
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.ACTIVE
347
348
349
350
351
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
352
                position=(3, 2),
353
354
355
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
                malfunction=1,
u214892's avatar
u214892 committed
356
357
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
358
359
360
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
361
                position=(3, 2),
362
363
364
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
u214892's avatar
u214892 committed
365
366
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
367
368
369
            ),
            # we start to move forward --> should go to next cell now
            Replay(
370
                position=(3, 2),
371
372
373
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
374
375
                reward=env.start_penalty + env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
376
377
            ),
            Replay(
378
                position=(3, 3),
379
                direction=Grid4TransitionsEnum.EAST,
380
381
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
382
383
                reward=env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
384
385
386
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
387
        target=env.agents[0].target,
388
        initial_position=(3, 2),
u214892's avatar
u214892 committed
389
        initial_direction=Grid4TransitionsEnum.EAST,
390
    )
391
392

    run_replay_config(env, [replay_config], activate_agents=False)
393
394


395
def test_initial_malfunction_do_nothing():
396
397
398
399
400
401
402
403
404
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

405
406
    rail, rail_map = make_simple_rail2()

407
408
409

    env = RailEnv(width=25,
                  height=30,
410
411
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
412
413
414
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
415
416
    # reset to initialize agents_static
    env.reset()
417
    set_penalties_for_replay(env)
418
    replay_config = ReplayConfig(
u214892's avatar
u214892 committed
419
420
421
422
423
424
425
426
427
428
        replay=[
            Replay(
                position=None,
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.READY_TO_DEPART
            ),
429
            Replay(
430
                position=(3, 2),
431
432
433
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=2,
u214892's avatar
u214892 committed
434
435
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.ACTIVE
436
437
438
439
440
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
441
                position=(3, 2),
442
443
444
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=1,
u214892's avatar
u214892 committed
445
446
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
447
448
449
            ),
            # we haven't started moving yet --> stay here
            Replay(
450
                position=(3, 2),
451
452
453
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
                malfunction=0,
u214892's avatar
u214892 committed
454
455
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
456
            ),
457

458
            Replay(
459
                position=(3, 2),
460
461
462
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
463
464
                reward=env.start_penalty + env.step_penalty * 1.0,  # start penalty + step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
465
            ),  # we start to move forward --> should go to next cell now
466
            Replay(
467
                position=(3, 3),
468
                direction=Grid4TransitionsEnum.EAST,
469
470
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
471
472
                reward=env.step_penalty * 1.0,  # step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
473
474
475
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
476
        target=env.agents[0].target,
477
        initial_position=(3, 2),
u214892's avatar
u214892 committed
478
        initial_direction=Grid4TransitionsEnum.EAST,
479
    )
480
    run_replay_config(env, [replay_config], activate_agents=False)
481
482
483
484
485
486
487


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
488
489
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
490
491
492
                       'max_duration': 5  # Max duration of malfunction
                       }

493
    rail, rail_map = make_simple_rail2()
494
495
496

    env = RailEnv(width=25,
                  height=30,
497
498
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
499
500
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
501
                  obs_builder_object=SingleAgentNavigationObs()
502
                  )
503
504
    # reset to initialize agents_static
    env.reset()
505
506
507
508
509
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])