test_flatland_malfunction.py 24 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
import numpy as np
5
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
6

7
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
8
from flatland.core.grid.grid4 import Grid4TransitionsEnum
9
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
10
from flatland.envs.agent_utils import RailAgentStatus
u214892's avatar
u214892 committed
11
from flatland.envs.rail_env import RailEnv, RailEnvActions
12
13
14
from flatland.envs.rail_generators import rail_from_grid_transition_map
from flatland.envs.schedule_generators import random_schedule_generator
from flatland.utils.simple_rail import make_simple_rail2
15
16


17
class SingleAgentNavigationObs(ObservationBuilder):
18
    """
19
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
20
21
22
23
24
25
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
26
        super().__init__()
27
28

    def reset(self):
29
        pass
30

31
    def get(self, handle: int = 0) -> List[int]:
32
33
        agent = self.env.agents[handle]

u214892's avatar
u214892 committed
34
        if agent.status == RailAgentStatus.READY_TO_DEPART:
u214892's avatar
u214892 committed
35
            agent_virtual_position = agent.initial_position
u214892's avatar
u214892 committed
36
        elif agent.status == RailAgentStatus.ACTIVE:
u214892's avatar
u214892 committed
37
            agent_virtual_position = agent.position
u214892's avatar
u214892 committed
38
        elif agent.status == RailAgentStatus.DONE:
u214892's avatar
u214892 committed
39
            agent_virtual_position = agent.target
u214892's avatar
u214892 committed
40
41
42
        else:
            return None

u214892's avatar
u214892 committed
43
        possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
44
45
46
47
48
49
50
51
52
53
54
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
u214892's avatar
u214892 committed
55
                    new_position = get_new_position(agent_virtual_position, direction)
u214892's avatar
u214892 committed
56
57
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
58
59
60
61
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
62
            observation[np.argmin(min_distances)] = 1
63
64
65
66
67

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
68
    # Set fixed malfunction duration for this test
69
    stochastic_data = {'prop_malfunction': 1.,
70
                       'malfunction_rate': 1000,
71
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
72
                       'max_duration': 3}
73
74
75
76
77
78
79
80
81
82
83
84

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
85
    obs, info = env.reset(False, False, True, random_seed=10)
Erik Nygren's avatar
Erik Nygren committed
86

Erik Nygren's avatar
Erik Nygren committed
87
88
    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0
u214892's avatar
u214892 committed
89
90
    for agent in env.agents:
        agent.status = RailAgentStatus.ACTIVE
Erik Nygren's avatar
Erik Nygren committed
91

92
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
93
94
    total_down_time = 0
    agent_old_position = env.agents[0].position
95
96
97

    # Move target to unreachable position in order to not interfere with test
    env.agents[0].target = (0, 0)
98
99
    for step in range(100):
        actions = {}
u214892's avatar
u214892 committed
100

101
102
103
104
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
105
            # Stop the agent and set it to be malfunctioning
106
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
107
            env.agents[0].malfunction_data['next_malfunction'] = 0
108
109
            agent_halts += 1

110
111
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
112
113
114
115
116
117
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
118
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
119
120
121
122
123
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
124
    # Check that the appropriate number of malfunctions is achieved
125
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 20, "Actual {}".format(
u214892's avatar
u214892 committed
126
        env.agents[0].malfunction_data['nr_malfunctions'])
Erik Nygren's avatar
Erik Nygren committed
127

Erik Nygren's avatar
Erik Nygren committed
128
    # Check that 20 stops where performed
129
    assert agent_halts == 20
130

Erik Nygren's avatar
Erik Nygren committed
131
132
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
133
134
135
136
137
138


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
139
140
141
                       'malfunction_rate': 5,
                       'min_duration': 5,
                       'max_duration': 5}
u214892's avatar
u214892 committed
142

143
144
145
146
147
148
    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
149
                  number_of_agents=10,
150
151
152
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
153

154
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
155
    env.reset(True, True, False, random_seed=10)
156

Erik Nygren's avatar
Erik Nygren committed
157
    env.agents[0].target = (0, 0)
158
159

    agent_malfunction_list = [[0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0],
160
                              [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 6, 5],
161
                              [0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4],
162
                              [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 6, 5, 4],
163
164
165
166
167
168
                              [6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0],
                              [6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3],
                              [0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5],
                              [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2, 1, 0],
                              [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1],
                              [6, 6, 6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0]]
169

Erik Nygren's avatar
Erik Nygren committed
170
    for step in range(20):
171
        action_dict: Dict[int, RailEnvActions] = {}
172
        for agent_idx in range(env.get_num_agents()):
u214892's avatar
u214892 committed
173
            # We randomly select an action
174
175
176
177
            action_dict[agent_idx] = RailEnvActions(np.random.randint(4))
            # For generating tests only:
            # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction'])
            assert env.agents[agent_idx].malfunction_data['malfunction'] == agent_malfunction_list[agent_idx][step]
u214892's avatar
u214892 committed
178
        env.step(action_dict)
179

u214892's avatar
u214892 committed
180

181
def test_malfunction_before_entry():
Erik Nygren's avatar
Erik Nygren committed
182
    """Tests that malfunctions are produced by stochastic_data!"""
183
184
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
185
                       'malfunction_rate': 1,
186
187
188
189
190
191
192
193
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
Erik Nygren's avatar
Erik Nygren committed
194
195
196
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=10,
                  random_seed=1,
197
198
199
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
200
    env.reset(False, False, False, random_seed=10)
201
    env.agents[0].target = (0, 0)
202

203
204
    # Print for test generation
    assert env.agents[0].malfunction_data['malfunction'] == 11
205
206
207
208
209
210
211
212
213
214
    assert env.agents[1].malfunction_data['malfunction'] == 11
    assert env.agents[2].malfunction_data['malfunction'] == 11
    assert env.agents[3].malfunction_data['malfunction'] == 11
    assert env.agents[4].malfunction_data['malfunction'] == 11
    assert env.agents[5].malfunction_data['malfunction'] == 11
    assert env.agents[6].malfunction_data['malfunction'] == 11
    assert env.agents[7].malfunction_data['malfunction'] == 11
    assert env.agents[8].malfunction_data['malfunction'] == 11
    assert env.agents[9].malfunction_data['malfunction'] == 11

215
216
217
218
    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
Erik Nygren's avatar
Erik Nygren committed
219
            action_dict[agent.handle] = RailEnvActions(2)
220
221
222
223
            if step < 10:
                action_dict[agent.handle] = RailEnvActions(0)

        env.step(action_dict)
224
225
226
227
228
229
230
231
232
233
    assert env.agents[1].malfunction_data['malfunction'] == 2
    assert env.agents[2].malfunction_data['malfunction'] == 2
    assert env.agents[3].malfunction_data['malfunction'] == 2
    assert env.agents[4].malfunction_data['malfunction'] == 2
    assert env.agents[5].malfunction_data['malfunction'] == 2
    assert env.agents[6].malfunction_data['malfunction'] == 2
    assert env.agents[7].malfunction_data['malfunction'] == 2
    assert env.agents[8].malfunction_data['malfunction'] == 2
    assert env.agents[9].malfunction_data['malfunction'] == 2

234
    # for a in range(env.get_num_agents()):
Erik Nygren's avatar
Erik Nygren committed
235
236
237
    #    print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a,
    #                                                                               env.agents[a].malfunction_data[
    #                                                                                   'malfunction']))
238
239


240
def test_initial_malfunction():
u214892's avatar
u214892 committed
241
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
242
                       'malfunction_rate': 100,  # Rate of malfunction occurence
u214892's avatar
u214892 committed
243
244
245
246
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

247
248
    rail, rail_map = make_simple_rail2()

u214892's avatar
u214892 committed
249
250
    env = RailEnv(width=25,
                  height=30,
251
                  rail_generator=rail_from_grid_transition_map(rail),
252
                  schedule_generator=random_schedule_generator(seed=10),
u214892's avatar
u214892 committed
253
254
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
255
                  obs_builder_object=SingleAgentNavigationObs()
u214892's avatar
u214892 committed
256
                  )
257
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
258
    env.reset(False, False, True, random_seed=10)
259
    print(env.agents[0].malfunction_data)
Erik Nygren's avatar
Erik Nygren committed
260
    env.agents[0].target = (0, 5)
261
    set_penalties_for_replay(env)
262
263
264
    replay_config = ReplayConfig(
        replay=[
            Replay(
265
                position=(3, 2),
266
267
268
269
270
271
272
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
273
                position=(3, 2),
274
275
276
277
278
279
280
281
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
282
                position=(3, 2),
283
284
285
286
287
288
289
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
290
                position=(3, 3),
291
                direction=Grid4TransitionsEnum.EAST,
292
293
294
295
296
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
297
298
                position=(3, 4),
                direction=Grid4TransitionsEnum.EAST,
299
300
301
302
303
304
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
305
        target=env.agents[0].target,
306
        initial_position=(3, 2),
u214892's avatar
u214892 committed
307
        initial_direction=Grid4TransitionsEnum.EAST,
308
    )
309
    run_replay_config(env, [replay_config])
310
311
312


def test_initial_malfunction_stop_moving():
313
314
315
316
317
318
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

319
    rail, rail_map = make_simple_rail2()
320
321
322

    env = RailEnv(width=25,
                  height=30,
323
324
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
325
326
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
327
                  obs_builder_object=SingleAgentNavigationObs()
328
                  )
329
    env.reset()
330
331
332

    print(env.agents[0].initial_position, env.agents[0].direction, env.agents[0].position, env.agents[0].status)

333
    set_penalties_for_replay(env)
334
335
336
    replay_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
337
                position=None,
338
                direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
339
                action=RailEnvActions.MOVE_FORWARD,
340
341
                set_malfunction=3,
                malfunction=3,
u214892's avatar
u214892 committed
342
343
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.READY_TO_DEPART
344
345
            ),
            Replay(
346
                position=(3, 2),
347
348
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
349
                malfunction=3,
u214892's avatar
u214892 committed
350
351
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.ACTIVE
352
353
354
355
356
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
357
                position=(3, 2),
358
359
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
360
                malfunction=2,
u214892's avatar
u214892 committed
361
362
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
363
364
365
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
366
                position=(3, 2),
367
368
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
369
                malfunction=1,
u214892's avatar
u214892 committed
370
371
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
372
373
374
            ),
            # we start to move forward --> should go to next cell now
            Replay(
375
                position=(3, 2),
376
377
378
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
379
380
                reward=env.start_penalty + env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
381
382
            ),
            Replay(
383
                position=(3, 3),
384
                direction=Grid4TransitionsEnum.EAST,
385
386
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
387
388
                reward=env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
389
390
391
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
392
        target=env.agents[0].target,
393
        initial_position=(3, 2),
u214892's avatar
u214892 committed
394
        initial_direction=Grid4TransitionsEnum.EAST,
395
    )
396
397

    run_replay_config(env, [replay_config], activate_agents=False)
398
399


400
def test_initial_malfunction_do_nothing():
401
402
403
404
405
406
407
408
409
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

410
411
    rail, rail_map = make_simple_rail2()

412
413
    env = RailEnv(width=25,
                  height=30,
414
415
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
416
417
418
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
419
420
    # reset to initialize agents_static
    env.reset()
421
    set_penalties_for_replay(env)
422
    replay_config = ReplayConfig(
u214892's avatar
u214892 committed
423
424
425
426
427
428
429
430
431
432
        replay=[
            Replay(
                position=None,
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.READY_TO_DEPART
            ),
433
            Replay(
434
                position=(3, 2),
435
436
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
437
                malfunction=3,
u214892's avatar
u214892 committed
438
439
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.ACTIVE
440
441
442
443
444
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
445
                position=(3, 2),
446
447
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
448
                malfunction=2,
u214892's avatar
u214892 committed
449
450
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
451
452
453
            ),
            # we haven't started moving yet --> stay here
            Replay(
454
                position=(3, 2),
455
456
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
457
                malfunction=1,
u214892's avatar
u214892 committed
458
459
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
460
            ),
461

462
            Replay(
463
                position=(3, 2),
464
465
466
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
467
468
                reward=env.start_penalty + env.step_penalty * 1.0,  # start penalty + step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
469
            ),  # we start to move forward --> should go to next cell now
470
            Replay(
471
                position=(3, 3),
472
                direction=Grid4TransitionsEnum.EAST,
473
474
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
475
476
                reward=env.step_penalty * 1.0,  # step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
477
478
479
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
480
        target=env.agents[0].target,
481
        initial_position=(3, 2),
u214892's avatar
u214892 committed
482
        initial_direction=Grid4TransitionsEnum.EAST,
483
    )
484
    run_replay_config(env, [replay_config], activate_agents=False)
485
486
487
488
489
490
491


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
492
493
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
494
495
496
                       'max_duration': 5  # Max duration of malfunction
                       }

497
    rail, rail_map = make_simple_rail2()
498
499
500

    env = RailEnv(width=25,
                  height=30,
501
502
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
503
504
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
505
                  obs_builder_object=SingleAgentNavigationObs()
506
                  )
507
508
    # reset to initialize agents_static
    env.reset()
509
510
511
512
513
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])
Erik Nygren's avatar
Erik Nygren committed
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533


def tests_random_interference_from_outside():
    """Tests that malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 1,
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
534
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)
    env_data = []

    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

        _, reward, _, _ = env.step(action_dict)
        # Append the rewards of the first trial
Erik Nygren's avatar
Erik Nygren committed
550
        env_data.append((reward[0], env.agents[0].position))
Erik Nygren's avatar
Erik Nygren committed
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
    # Run the same test as above but with an external random generator running
    # Check that the reward stays the same

    rail, rail_map = make_simple_rail2()
    random.seed(47)
    np.random.seed(1234)
    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
567
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)

    # Print for test generation
    dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4]
    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

            # Do dummy random number generations
Erik Nygren's avatar
Erik Nygren committed
583
584
            random.shuffle(dummy_list)
            np.random.rand()
Erik Nygren's avatar
Erik Nygren committed
585
586
587
588

        _, reward, _, _ = env.step(action_dict)
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]