test_flatland_malfunction.py 24.1 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
import numpy as np
5
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
6

7
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
8
from flatland.core.grid.grid4 import Grid4TransitionsEnum
9
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
10
from flatland.envs.agent_utils import RailAgentStatus
u214892's avatar
u214892 committed
11
from flatland.envs.rail_env import RailEnv, RailEnvActions
12
13
14
from flatland.envs.rail_generators import rail_from_grid_transition_map
from flatland.envs.schedule_generators import random_schedule_generator
from flatland.utils.simple_rail import make_simple_rail2
15
16


17
class SingleAgentNavigationObs(ObservationBuilder):
18
    """
19
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
20
21
22
23
24
25
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
26
        super().__init__()
27
28

    def reset(self):
29
        pass
30

31
    def get(self, handle: int = 0) -> List[int]:
32
33
        agent = self.env.agents[handle]

u214892's avatar
u214892 committed
34
        if agent.status == RailAgentStatus.READY_TO_DEPART:
u214892's avatar
u214892 committed
35
            agent_virtual_position = agent.initial_position
u214892's avatar
u214892 committed
36
        elif agent.status == RailAgentStatus.ACTIVE:
u214892's avatar
u214892 committed
37
            agent_virtual_position = agent.position
u214892's avatar
u214892 committed
38
        elif agent.status == RailAgentStatus.DONE:
u214892's avatar
u214892 committed
39
            agent_virtual_position = agent.target
u214892's avatar
u214892 committed
40
41
42
        else:
            return None

u214892's avatar
u214892 committed
43
        possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
44
45
46
47
48
49
50
51
52
53
54
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
u214892's avatar
u214892 committed
55
                    new_position = get_new_position(agent_virtual_position, direction)
u214892's avatar
u214892 committed
56
57
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
58
59
60
61
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
62
            observation[np.argmin(min_distances)] = 1
63
64
65
66
67

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
68
    # Set fixed malfunction duration for this test
69
    stochastic_data = {'prop_malfunction': 1.,
70
                       'malfunction_rate': 1000,
71
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
72
                       'max_duration': 3}
73
74
75
76
77
78
79
80
81
82
83

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
84
    env.reset()
85
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
86
    obs, info = env.reset(False, False, True, random_seed=10)
Erik Nygren's avatar
Erik Nygren committed
87

Erik Nygren's avatar
Erik Nygren committed
88
89
    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0
u214892's avatar
u214892 committed
90
91
    for agent in env.agents:
        agent.status = RailAgentStatus.ACTIVE
Erik Nygren's avatar
Erik Nygren committed
92

93
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
94
95
    total_down_time = 0
    agent_old_position = env.agents[0].position
96
97
98

    # Move target to unreachable position in order to not interfere with test
    env.agents[0].target = (0, 0)
99
100
    for step in range(100):
        actions = {}
u214892's avatar
u214892 committed
101

102
103
104
105
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
106
            # Stop the agent and set it to be malfunctioning
107
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
108
            env.agents[0].malfunction_data['next_malfunction'] = 0
109
110
            agent_halts += 1

111
112
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
113
114
115
116
117
118
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
119
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
120
121
122
123
124
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
125
    # Check that the appropriate number of malfunctions is achieved
126
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 20, "Actual {}".format(
u214892's avatar
u214892 committed
127
        env.agents[0].malfunction_data['nr_malfunctions'])
Erik Nygren's avatar
Erik Nygren committed
128

Erik Nygren's avatar
Erik Nygren committed
129
    # Check that 20 stops where performed
130
    assert agent_halts == 20
131

Erik Nygren's avatar
Erik Nygren committed
132
133
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
134
135
136
137
138
139


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
140
141
142
                       'malfunction_rate': 5,
                       'min_duration': 5,
                       'max_duration': 5}
u214892's avatar
u214892 committed
143

144
145
146
147
148
149
    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
150
                  number_of_agents=10,
151
152
153
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
154
    env.reset()
155
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
156
    env.reset(True, True, False, random_seed=10)
157

Erik Nygren's avatar
Erik Nygren committed
158
    env.agents[0].target = (0, 0)
159
160
    agent_malfunction_list = [[0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2],
                              [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1],
161
                              [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 6, 5],
162
163
164
165
                              [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0],
                              [6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                              [6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2],
                              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
166
                              [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 6, 5, 4],
167
168
                              [0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0],
                              [6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
169

Erik Nygren's avatar
Erik Nygren committed
170
    for step in range(20):
171
        action_dict: Dict[int, RailEnvActions] = {}
172
        for agent_idx in range(env.get_num_agents()):
u214892's avatar
u214892 committed
173
            # We randomly select an action
174
175
176
177
            action_dict[agent_idx] = RailEnvActions(np.random.randint(4))
            # For generating tests only:
            # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction'])
            assert env.agents[agent_idx].malfunction_data['malfunction'] == agent_malfunction_list[agent_idx][step]
u214892's avatar
u214892 committed
178
        env.step(action_dict)
179

u214892's avatar
u214892 committed
180

181
def test_malfunction_before_entry():
Erik Nygren's avatar
Erik Nygren committed
182
    """Tests that malfunctions are produced by stochastic_data!"""
183
184
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
185
                       'malfunction_rate': 1,
186
187
188
189
190
191
192
193
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
Erik Nygren's avatar
Erik Nygren committed
194
195
196
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=10,
                  random_seed=1,
197
198
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
199
    env.reset()
200
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
201
    env.reset(False, False, False, random_seed=10)
202
    env.agents[0].target = (0, 0)
203

204
205
    # Print for test generation
    assert env.agents[0].malfunction_data['malfunction'] == 11
206
207
208
209
210
211
212
213
214
215
    assert env.agents[1].malfunction_data['malfunction'] == 11
    assert env.agents[2].malfunction_data['malfunction'] == 11
    assert env.agents[3].malfunction_data['malfunction'] == 11
    assert env.agents[4].malfunction_data['malfunction'] == 11
    assert env.agents[5].malfunction_data['malfunction'] == 11
    assert env.agents[6].malfunction_data['malfunction'] == 11
    assert env.agents[7].malfunction_data['malfunction'] == 11
    assert env.agents[8].malfunction_data['malfunction'] == 11
    assert env.agents[9].malfunction_data['malfunction'] == 11

216
217
218
219
    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
Erik Nygren's avatar
Erik Nygren committed
220
            action_dict[agent.handle] = RailEnvActions(2)
221
222
223
224
            if step < 10:
                action_dict[agent.handle] = RailEnvActions(0)

        env.step(action_dict)
225
226
227
228
229
230
231
232
233
234
    assert env.agents[1].malfunction_data['malfunction'] == 2
    assert env.agents[2].malfunction_data['malfunction'] == 2
    assert env.agents[3].malfunction_data['malfunction'] == 2
    assert env.agents[4].malfunction_data['malfunction'] == 2
    assert env.agents[5].malfunction_data['malfunction'] == 2
    assert env.agents[6].malfunction_data['malfunction'] == 2
    assert env.agents[7].malfunction_data['malfunction'] == 2
    assert env.agents[8].malfunction_data['malfunction'] == 2
    assert env.agents[9].malfunction_data['malfunction'] == 2

235
    # for a in range(env.get_num_agents()):
Erik Nygren's avatar
Erik Nygren committed
236
237
238
    #    print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a,
    #                                                                               env.agents[a].malfunction_data[
    #                                                                                   'malfunction']))
239
240


241
def test_initial_malfunction():
u214892's avatar
u214892 committed
242
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
243
                       'malfunction_rate': 100,  # Rate of malfunction occurence
u214892's avatar
u214892 committed
244
245
246
247
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

248
249
    rail, rail_map = make_simple_rail2()

u214892's avatar
u214892 committed
250
251
    env = RailEnv(width=25,
                  height=30,
252
                  rail_generator=rail_from_grid_transition_map(rail),
253
                  schedule_generator=random_schedule_generator(seed=10),
u214892's avatar
u214892 committed
254
255
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
256
                  obs_builder_object=SingleAgentNavigationObs()
u214892's avatar
u214892 committed
257
                  )
258
    env.reset()
259
260

    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
261
    env.reset(False, False, True, random_seed=10)
262
    print(env.agents[0].malfunction_data)
Erik Nygren's avatar
Erik Nygren committed
263
    env.agents[0].target = (0, 5)
264
    set_penalties_for_replay(env)
265
266
267
    replay_config = ReplayConfig(
        replay=[
            Replay(
268
                position=(3, 2),
269
270
271
272
273
274
275
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
276
                position=(3, 2),
277
278
279
280
281
282
283
284
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
285
                position=(3, 2),
286
287
288
289
290
291
292
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
293
                position=(3, 3),
294
                direction=Grid4TransitionsEnum.EAST,
295
296
297
298
299
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
300
301
                position=(3, 4),
                direction=Grid4TransitionsEnum.EAST,
302
303
304
305
306
307
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
308
        target=env.agents[0].target,
309
        initial_position=(3, 2),
u214892's avatar
u214892 committed
310
        initial_direction=Grid4TransitionsEnum.EAST,
311
    )
312
    run_replay_config(env, [replay_config])
313
314
315


def test_initial_malfunction_stop_moving():
316
317
318
319
320
321
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

322
    rail, rail_map = make_simple_rail2()
323
324
325

    env = RailEnv(width=25,
                  height=30,
326
327
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
328
329
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
330
                  obs_builder_object=SingleAgentNavigationObs()
331
                  )
332
    env.reset()
333
334
335

    print(env.agents[0].initial_position, env.agents[0].direction, env.agents[0].position, env.agents[0].status)

336
    set_penalties_for_replay(env)
337
338
339
    replay_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
340
                position=None,
341
                direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
342
                action=RailEnvActions.MOVE_FORWARD,
343
344
                set_malfunction=3,
                malfunction=3,
u214892's avatar
u214892 committed
345
346
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.READY_TO_DEPART
347
348
            ),
            Replay(
349
                position=(3, 2),
350
351
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
352
                malfunction=3,
u214892's avatar
u214892 committed
353
354
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.ACTIVE
355
356
357
358
359
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
360
                position=(3, 2),
361
362
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
363
                malfunction=2,
u214892's avatar
u214892 committed
364
365
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
366
367
368
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
369
                position=(3, 2),
370
371
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
372
                malfunction=1,
u214892's avatar
u214892 committed
373
374
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
375
376
377
            ),
            # we start to move forward --> should go to next cell now
            Replay(
378
                position=(3, 2),
379
380
381
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
382
383
                reward=env.start_penalty + env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
384
385
            ),
            Replay(
386
                position=(3, 3),
387
                direction=Grid4TransitionsEnum.EAST,
388
389
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
390
391
                reward=env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
392
393
394
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
395
        target=env.agents[0].target,
396
        initial_position=(3, 2),
u214892's avatar
u214892 committed
397
        initial_direction=Grid4TransitionsEnum.EAST,
398
    )
399
400

    run_replay_config(env, [replay_config], activate_agents=False)
401
402


403
def test_initial_malfunction_do_nothing():
404
405
406
407
408
409
410
411
412
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

413
414
    rail, rail_map = make_simple_rail2()

415
416
    env = RailEnv(width=25,
                  height=30,
417
418
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
419
420
421
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
422
423
    # reset to initialize agents_static
    env.reset()
424
    set_penalties_for_replay(env)
425
    replay_config = ReplayConfig(
u214892's avatar
u214892 committed
426
427
428
429
430
431
432
433
434
435
        replay=[
            Replay(
                position=None,
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.READY_TO_DEPART
            ),
436
            Replay(
437
                position=(3, 2),
438
439
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
440
                malfunction=3,
u214892's avatar
u214892 committed
441
442
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.ACTIVE
443
444
445
446
447
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
448
                position=(3, 2),
449
450
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
451
                malfunction=2,
u214892's avatar
u214892 committed
452
453
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
454
455
456
            ),
            # we haven't started moving yet --> stay here
            Replay(
457
                position=(3, 2),
458
459
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
460
                malfunction=1,
u214892's avatar
u214892 committed
461
462
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
463
            ),
464

465
            Replay(
466
                position=(3, 2),
467
468
469
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
470
471
                reward=env.start_penalty + env.step_penalty * 1.0,  # start penalty + step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
472
            ),  # we start to move forward --> should go to next cell now
473
            Replay(
474
                position=(3, 3),
475
                direction=Grid4TransitionsEnum.EAST,
476
477
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
478
479
                reward=env.step_penalty * 1.0,  # step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
480
481
482
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
483
        target=env.agents[0].target,
484
        initial_position=(3, 2),
u214892's avatar
u214892 committed
485
        initial_direction=Grid4TransitionsEnum.EAST,
486
    )
487
    run_replay_config(env, [replay_config], activate_agents=False)
488
489
490
491
492
493
494


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
495
496
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
497
498
499
                       'max_duration': 5  # Max duration of malfunction
                       }

500
    rail, rail_map = make_simple_rail2()
501
502
503

    env = RailEnv(width=25,
                  height=30,
504
505
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
506
507
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
508
                  obs_builder_object=SingleAgentNavigationObs()
509
                  )
510
511
    # reset to initialize agents_static
    env.reset()
512
513
514
515
516
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])
Erik Nygren's avatar
Erik Nygren committed
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536


def tests_random_interference_from_outside():
    """Tests that malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 1,
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
537
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)
    env_data = []

    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

        _, reward, _, _ = env.step(action_dict)
        # Append the rewards of the first trial
        env_data.append((reward[0],env.agents[0].position))
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
    # Run the same test as above but with an external random generator running
    # Check that the reward stays the same

    rail, rail_map = make_simple_rail2()
    random.seed(47)
    np.random.seed(1234)
    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
570
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)


    # Print for test generation
    dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4]
    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

            # Do dummy random number generations
            a = random.shuffle(dummy_list)
            b = np.random.rand()

        _, reward, _, _ = env.step(action_dict)
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]