test_flatland_malfunction.py 27.7 KB
Newer Older
u214892's avatar
u214892 committed
1
import random
2
from typing import Dict, List
u214892's avatar
u214892 committed
3

4
import numpy as np
5
from test_utils import Replay, ReplayConfig, run_replay_config, set_penalties_for_replay
6

7
from flatland.core.env_observation_builder import ObservationBuilder
u214892's avatar
u214892 committed
8
from flatland.core.grid.grid4 import Grid4TransitionsEnum
9
from flatland.core.grid.grid4_utils import get_new_position
u214892's avatar
u214892 committed
10
from flatland.envs.agent_utils import RailAgentStatus
u214892's avatar
u214892 committed
11
from flatland.envs.rail_env import RailEnv, RailEnvActions
12
13
14
from flatland.envs.rail_generators import rail_from_grid_transition_map
from flatland.envs.schedule_generators import random_schedule_generator
from flatland.utils.simple_rail import make_simple_rail2
15
16


17
class SingleAgentNavigationObs(ObservationBuilder):
18
    """
19
    We build a representation vector with 3 binary components, indicating which of the 3 available directions
20
21
22
23
24
25
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
26
        super().__init__()
27
28

    def reset(self):
29
        pass
30

31
    def get(self, handle: int = 0) -> List[int]:
32
33
        agent = self.env.agents[handle]

u214892's avatar
u214892 committed
34
        if agent.status == RailAgentStatus.READY_TO_DEPART:
u214892's avatar
u214892 committed
35
            agent_virtual_position = agent.initial_position
u214892's avatar
u214892 committed
36
        elif agent.status == RailAgentStatus.ACTIVE:
u214892's avatar
u214892 committed
37
            agent_virtual_position = agent.position
u214892's avatar
u214892 committed
38
        elif agent.status == RailAgentStatus.DONE:
u214892's avatar
u214892 committed
39
            agent_virtual_position = agent.target
u214892's avatar
u214892 committed
40
41
42
        else:
            return None

u214892's avatar
u214892 committed
43
        possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction)
44
45
46
47
48
49
50
51
52
53
54
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
u214892's avatar
u214892 committed
55
                    new_position = get_new_position(agent_virtual_position, direction)
u214892's avatar
u214892 committed
56
57
                    min_distances.append(
                        self.env.distance_map.get()[handle, new_position[0], new_position[1], direction])
58
59
60
61
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
62
            observation[np.argmin(min_distances)] = 1
63
64
65
66
67

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
68
    # Set fixed malfunction duration for this test
69
    stochastic_data = {'prop_malfunction': 1.,
70
                       'malfunction_rate': 1000,
71
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
72
                       'max_duration': 3}
73
74
75
76
77
78
79
80
81
82
83
84

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
85
    obs, info = env.reset(False, False, True, random_seed=10)
Erik Nygren's avatar
Erik Nygren committed
86

Erik Nygren's avatar
Erik Nygren committed
87
88
    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0
u214892's avatar
u214892 committed
89
90
    for agent in env.agents:
        agent.status = RailAgentStatus.ACTIVE
Erik Nygren's avatar
Erik Nygren committed
91

92
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
93
94
    total_down_time = 0
    agent_old_position = env.agents[0].position
95
96
97

    # Move target to unreachable position in order to not interfere with test
    env.agents[0].target = (0, 0)
98
99
    for step in range(100):
        actions = {}
u214892's avatar
u214892 committed
100

101
102
103
104
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
105
            # Stop the agent and set it to be malfunctioning
106
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
107
            env.agents[0].malfunction_data['next_malfunction'] = 0
108
109
            agent_halts += 1

110
111
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
112
113
114
115
116
117
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
118
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
119
120
121
122
123
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
124
    # Check that the appropriate number of malfunctions is achieved
125
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 20, "Actual {}".format(
u214892's avatar
u214892 committed
126
        env.agents[0].malfunction_data['nr_malfunctions'])
Erik Nygren's avatar
Erik Nygren committed
127

Erik Nygren's avatar
Erik Nygren committed
128
    # Check that 20 stops where performed
129
    assert agent_halts == 20
130

Erik Nygren's avatar
Erik Nygren committed
131
132
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
133
134
135
136
137
138


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
139
140
141
                       'malfunction_rate': 5,
                       'min_duration': 5,
                       'max_duration': 5}
u214892's avatar
u214892 committed
142

143
144
145
146
147
148
    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
149
                  number_of_agents=10,
150
151
152
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  obs_builder_object=SingleAgentNavigationObs()
                  )
153

154
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
155
    env.reset(True, True, False, random_seed=10)
156

Erik Nygren's avatar
Erik Nygren committed
157
    env.agents[0].target = (0, 0)
158
159
    # Next line only for test generation
    # agent_malfunction_list = [[] for i in range(20)]
160
161
162
163
164
165
166
    agent_malfunction_list = [[0, 0, 0, 0, 5, 5, 0, 0, 0, 0], [0, 0, 0, 0, 5, 5, 0, 0, 0, 0], [0, 0, 0, 0, 4, 4, 0, 0, 0, 0],
     [0, 0, 0, 0, 3, 3, 0, 0, 0, 0], [0, 0, 0, 0, 2, 2, 0, 0, 0, 5], [0, 0, 0, 0, 1, 1, 5, 0, 0, 4],
     [0, 0, 0, 5, 0, 0, 4, 5, 0, 3], [5, 0, 0, 4, 5, 5, 3, 4, 0, 2], [4, 5, 0, 3, 4, 4, 2, 3, 5, 1],
     [3, 4, 0, 2, 3, 3, 1, 2, 4, 0], [2, 3, 5, 1, 2, 2, 0, 1, 3, 0], [1, 2, 4, 0, 1, 1, 5, 0, 2, 0],
     [0, 1, 3, 0, 0, 0, 4, 0, 1, 0], [5, 0, 2, 0, 0, 5, 3, 5, 0, 5], [4, 0, 1, 0, 0, 4, 2, 4, 0, 4],
     [3, 0, 0, 0, 0, 3, 1, 3, 5, 3], [2, 0, 0, 0, 0, 2, 0, 2, 4, 2], [1, 0, 5, 5, 5, 1, 5, 1, 3, 1],
     [0, 0, 4, 4, 4, 0, 4, 0, 2, 0], [5, 0, 3, 3, 3, 5, 3, 5, 1, 5]]
167

Erik Nygren's avatar
Erik Nygren committed
168
    for step in range(20):
169
        action_dict: Dict[int, RailEnvActions] = {}
170
        for agent_idx in range(env.get_num_agents()):
u214892's avatar
u214892 committed
171
            # We randomly select an action
172
173
            action_dict[agent_idx] = RailEnvActions(np.random.randint(4))
            # For generating tests only:
174
            #agent_malfunction_list[step].append(env.agents[agent_idx].malfunction_data['malfunction'])
175
            assert env.agents[agent_idx].malfunction_data['malfunction'] == agent_malfunction_list[step][agent_idx]
u214892's avatar
u214892 committed
176
        env.step(action_dict)
177
178
    # For generating test onlz
    #print(agent_malfunction_list)
179

u214892's avatar
u214892 committed
180

181
def test_malfunction_before_entry():
182
    """Tests that malfunctions are working properlz for agents before entering the environment!"""
183
184
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
185
                       'malfunction_rate': 5,
186
187
188
189
190
191
192
193
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
Erik Nygren's avatar
Erik Nygren committed
194
195
196
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=10,
                  random_seed=1,
197
198
199
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
200
    env.reset(False, False, False, random_seed=10)
201

202
203
204
    # Test initial malfunction values for all agents
    # we want some agents to be malfuncitoning already and some to be working
    # we want different next_malfunction values for the agents
205
    assert env.agents[0].malfunction_data['next_malfunction'] == 5
206
207
208
209
210
211
212
213
214
215
216
217
218
    assert env.agents[1].malfunction_data['next_malfunction'] == 6
    assert env.agents[2].malfunction_data['next_malfunction'] == 6
    assert env.agents[3].malfunction_data['next_malfunction'] == 3
    assert env.agents[4].malfunction_data['next_malfunction'] == 1
    assert env.agents[5].malfunction_data['next_malfunction'] == 1
    assert env.agents[6].malfunction_data['next_malfunction'] == 3
    assert env.agents[7].malfunction_data['next_malfunction'] == 4
    assert env.agents[8].malfunction_data['next_malfunction'] == 6
    assert env.agents[9].malfunction_data['next_malfunction'] == 0
    assert env.agents[0].malfunction_data['malfunction'] == 0
    assert env.agents[1].malfunction_data['malfunction'] == 0
    assert env.agents[2].malfunction_data['malfunction'] == 0
    assert env.agents[3].malfunction_data['malfunction'] == 0
219
220
    assert env.agents[4].malfunction_data['malfunction'] == 10
    assert env.agents[5].malfunction_data['malfunction'] == 10
221
222
223
224
    assert env.agents[6].malfunction_data['malfunction'] == 0
    assert env.agents[7].malfunction_data['malfunction'] == 0
    assert env.agents[8].malfunction_data['malfunction'] == 0
    assert env.agents[9].malfunction_data['malfunction'] == 0
225

226
227
228
229
    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
Erik Nygren's avatar
Erik Nygren committed
230
            action_dict[agent.handle] = RailEnvActions(2)
231
232
233
234
            if step < 10:
                action_dict[agent.handle] = RailEnvActions(0)

        env.step(action_dict)
235
236

    # We want to check that all agents are malfunctioning and that their values changed
237
238
239

    # Test  malfunction values for all agents after 20 steps
    assert env.agents[0].malfunction_data['next_malfunction'] == 4
240
241
242
243
244
245
246
247
248
249
    assert env.agents[1].malfunction_data['next_malfunction'] == 6
    assert env.agents[2].malfunction_data['next_malfunction'] == 2
    assert env.agents[3].malfunction_data['next_malfunction'] == 2
    assert env.agents[4].malfunction_data['next_malfunction'] == 1
    assert env.agents[5].malfunction_data['next_malfunction'] == 1
    assert env.agents[6].malfunction_data['next_malfunction'] == 2
    assert env.agents[7].malfunction_data['next_malfunction'] == 1
    assert env.agents[8].malfunction_data['next_malfunction'] == 1
    assert env.agents[9].malfunction_data['next_malfunction'] == 4
    assert env.agents[0].malfunction_data['malfunction'] == 0
250
251
    assert env.agents[1].malfunction_data['malfunction'] == 8
    assert env.agents[2].malfunction_data['malfunction'] == 8
252
    assert env.agents[3].malfunction_data['malfunction'] == 0
253
254
    assert env.agents[4].malfunction_data['malfunction'] == 1
    assert env.agents[5].malfunction_data['malfunction'] == 1
255
    assert env.agents[6].malfunction_data['malfunction'] == 0
256
257
258
    assert env.agents[7].malfunction_data['malfunction'] == 6
    assert env.agents[8].malfunction_data['malfunction'] == 8
    assert env.agents[9].malfunction_data['malfunction'] == 2
259
260
261
262
263
264
    # Print for test generation
    #for a in range(env.get_num_agents()):
    #    print("assert env.agents[{}].malfunction_data['next_malfunction'] == {}".format(a, env.agents[a].malfunction_data['next_malfunction']))
    #for a in range(env.get_num_agents()):
    #    print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a, env.agents[a].malfunction_data[
    #            'malfunction']))
265

266
def test_initial_malfunction():
u214892's avatar
u214892 committed
267
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
268
                       'malfunction_rate': 100,  # Rate of malfunction occurence
u214892's avatar
u214892 committed
269
270
271
272
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

273
274
    rail, rail_map = make_simple_rail2()

u214892's avatar
u214892 committed
275
276
    env = RailEnv(width=25,
                  height=30,
277
                  rail_generator=rail_from_grid_transition_map(rail),
278
                  schedule_generator=random_schedule_generator(seed=10),
u214892's avatar
u214892 committed
279
280
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
281
                  obs_builder_object=SingleAgentNavigationObs()
u214892's avatar
u214892 committed
282
                  )
283
    # reset to initialize agents_static
Erik Nygren's avatar
Erik Nygren committed
284
    env.reset(False, False, True, random_seed=10)
285
    print(env.agents[0].malfunction_data)
Erik Nygren's avatar
Erik Nygren committed
286
    env.agents[0].target = (0, 5)
287
    set_penalties_for_replay(env)
288
289
290
    replay_config = ReplayConfig(
        replay=[
            Replay(
291
                position=(3, 2),
292
293
294
295
296
297
298
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            Replay(
299
                position=(3, 2),
300
301
302
303
304
305
306
307
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=2,
                reward=env.step_penalty  # full step penalty when malfunctioning
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
            Replay(
308
                position=(3, 2),
309
310
311
312
313
314
315
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=1,
                reward=env.start_penalty + env.step_penalty * 1.0
                # malfunctioning ends: starting and running at speed 1.0
            ),
            Replay(
316
                position=(3, 3),
317
                direction=Grid4TransitionsEnum.EAST,
318
319
320
321
322
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            ),
            Replay(
323
324
                position=(3, 4),
                direction=Grid4TransitionsEnum.EAST,
325
326
327
328
329
330
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
                reward=env.step_penalty * 1.0  # running at speed 1.0
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
331
        target=env.agents[0].target,
332
        initial_position=(3, 2),
u214892's avatar
u214892 committed
333
        initial_direction=Grid4TransitionsEnum.EAST,
334
    )
335
    run_replay_config(env, [replay_config])
336
337
338


def test_initial_malfunction_stop_moving():
339
340
341
342
343
344
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

345
    rail, rail_map = make_simple_rail2()
346
347
348

    env = RailEnv(width=25,
                  height=30,
349
350
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
351
352
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
353
                  obs_builder_object=SingleAgentNavigationObs()
354
                  )
355
    env.reset()
356
357
358

    print(env.agents[0].initial_position, env.agents[0].direction, env.agents[0].position, env.agents[0].status)

359
    set_penalties_for_replay(env)
360
361
362
    replay_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
363
                position=None,
364
                direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
365
                action=RailEnvActions.MOVE_FORWARD,
366
367
                set_malfunction=3,
                malfunction=3,
u214892's avatar
u214892 committed
368
369
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.READY_TO_DEPART
370
371
            ),
            Replay(
372
                position=(3, 2),
373
374
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
375
                malfunction=3,
u214892's avatar
u214892 committed
376
377
                reward=env.step_penalty,  # full step penalty when stopped
                status=RailAgentStatus.ACTIVE
378
379
380
381
382
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action STOP_MOVING, agent should restart without moving
            #
            Replay(
383
                position=(3, 2),
384
385
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.STOP_MOVING,
386
                malfunction=2,
u214892's avatar
u214892 committed
387
388
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
389
390
391
            ),
            # we have stopped and do nothing --> should stand still
            Replay(
392
                position=(3, 2),
393
394
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
395
                malfunction=1,
u214892's avatar
u214892 committed
396
397
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
398
399
400
            ),
            # we start to move forward --> should go to next cell now
            Replay(
401
                position=(3, 2),
402
403
404
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
405
406
                reward=env.start_penalty + env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
407
408
            ),
            Replay(
409
                position=(3, 3),
410
                direction=Grid4TransitionsEnum.EAST,
411
412
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
413
414
                reward=env.step_penalty * 1.0,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
415
416
417
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
418
        target=env.agents[0].target,
419
        initial_position=(3, 2),
u214892's avatar
u214892 committed
420
        initial_direction=Grid4TransitionsEnum.EAST,
421
    )
422
423

    run_replay_config(env, [replay_config], activate_agents=False)
424
425


426
def test_initial_malfunction_do_nothing():
427
428
429
430
431
432
433
434
435
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

436
437
    rail, rail_map = make_simple_rail2()

438
439
    env = RailEnv(width=25,
                  height=30,
440
441
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
442
443
444
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
445
446
    # reset to initialize agents_static
    env.reset()
447
    set_penalties_for_replay(env)
448
    replay_config = ReplayConfig(
u214892's avatar
u214892 committed
449
450
451
452
453
454
455
456
457
458
        replay=[
            Replay(
                position=None,
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                set_malfunction=3,
                malfunction=3,
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.READY_TO_DEPART
            ),
459
            Replay(
460
                position=(3, 2),
461
462
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
463
                malfunction=3,
u214892's avatar
u214892 committed
464
465
                reward=env.step_penalty,  # full step penalty while malfunctioning
                status=RailAgentStatus.ACTIVE
466
467
468
469
470
            ),
            # malfunction stops in the next step and we're still at the beginning of the cell
            # --> if we take action DO_NOTHING, agent should restart without moving
            #
            Replay(
471
                position=(3, 2),
472
473
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
474
                malfunction=2,
u214892's avatar
u214892 committed
475
476
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
477
478
479
            ),
            # we haven't started moving yet --> stay here
            Replay(
480
                position=(3, 2),
481
482
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.DO_NOTHING,
483
                malfunction=1,
u214892's avatar
u214892 committed
484
485
                reward=env.step_penalty,  # full step penalty while stopped
                status=RailAgentStatus.ACTIVE
486
            ),
487

488
            Replay(
489
                position=(3, 2),
490
491
492
                direction=Grid4TransitionsEnum.EAST,
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
493
494
                reward=env.start_penalty + env.step_penalty * 1.0,  # start penalty + step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
495
            ),  # we start to move forward --> should go to next cell now
496
            Replay(
497
                position=(3, 3),
498
                direction=Grid4TransitionsEnum.EAST,
499
500
                action=RailEnvActions.MOVE_FORWARD,
                malfunction=0,
u214892's avatar
u214892 committed
501
502
                reward=env.step_penalty * 1.0,  # step penalty for speed 1.0
                status=RailAgentStatus.ACTIVE
503
504
505
            )
        ],
        speed=env.agents[0].speed_data['speed'],
u214892's avatar
u214892 committed
506
        target=env.agents[0].target,
507
        initial_position=(3, 2),
u214892's avatar
u214892 committed
508
        initial_direction=Grid4TransitionsEnum.EAST,
509
    )
510
    run_replay_config(env, [replay_config], activate_agents=False)
511
512
513
514
515
516
517


def test_initial_nextmalfunction_not_below_zero():
    random.seed(0)
    np.random.seed(0)

    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
518
519
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
520
521
522
                       'max_duration': 5  # Max duration of malfunction
                       }

523
    rail, rail_map = make_simple_rail2()
524
525
526

    env = RailEnv(width=25,
                  height=30,
527
528
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
529
530
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
531
                  obs_builder_object=SingleAgentNavigationObs()
532
                  )
533
534
    # reset to initialize agents_static
    env.reset()
535
536
537
538
539
    agent = env.agents[0]
    env.step({})
    # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186
    assert agent.malfunction_data['next_malfunction'] >= 0, \
        "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction'])
Erik Nygren's avatar
Erik Nygren committed
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559


def tests_random_interference_from_outside():
    """Tests that malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 1,
                       'min_duration': 10,
                       'max_duration': 10}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
560
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)
    env_data = []

    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

        _, reward, _, _ = env.step(action_dict)
        # Append the rewards of the first trial
Erik Nygren's avatar
Erik Nygren committed
576
        env_data.append((reward[0], env.agents[0].position))
Erik Nygren's avatar
Erik Nygren committed
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
    # Run the same test as above but with an external random generator running
    # Check that the reward stays the same

    rail, rail_map = make_simple_rail2()
    random.seed(47)
    np.random.seed(1234)
    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
593
    env.reset()
Erik Nygren's avatar
Erik Nygren committed
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents[0].initial_position = (3, 0)
    env.agents[0].target = (3, 9)
    env.reset(False, False, False)

    # Print for test generation
    dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4]
    for step in range(200):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # We randomly select an action
            action_dict[agent.handle] = RailEnvActions(2)

            # Do dummy random number generations
Erik Nygren's avatar
Erik Nygren committed
609
610
            random.shuffle(dummy_list)
            np.random.rand()
Erik Nygren's avatar
Erik Nygren committed
611
612
613
614

        _, reward, _, _ = env.step(action_dict)
        assert reward[0] == env_data[step][0]
        assert env.agents[0].position == env_data[step][1]
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671


def test_last_malfunction_step():
    """
    Test to check that agent moves when it is not malfunctioning

    """

    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 5,
                       'min_duration': 4,
                       'max_duration': 4}

    rail, rail_map = make_simple_rail2()

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(seed=2),  # seed 12
                  number_of_agents=1,
                  random_seed=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )
    env.reset()
    # reset to initialize agents_static
    env.agents[0].speed_data['speed'] = 0.33
    env.agents_static[0].target = (0, 0)

    env.reset(False, False, True)
    # Force malfunction to be off at beginning and next malfunction to happen in 2 steps
    env.agents[0].malfunction_data['next_malfunction'] = 2
    env.agents[0].malfunction_data['malfunction'] = 0
    env_data = []

    for step in range(20):
        action_dict: Dict[int, RailEnvActions] = {}
        for agent in env.agents:
            # Go forward all the time
            action_dict[agent.handle] = RailEnvActions(2)

        # Check if the agent is still allowed to move in this step
        if env.agents[0].malfunction_data['malfunction'] > 1 or env.agents[0].malfunction_data['next_malfunction'] < 1:
            agent_can_move = False
        else:
            agent_can_move = True

        # Store the position before and after the step
        pre_position = env.agents[0].speed_data['position_fraction']
        _, reward, _, _ = env.step(action_dict)
        post_position = env.agents[0].speed_data['position_fraction']

        # Assert that the agent moved while it was still allowed
        if agent_can_move:
            assert pre_position != post_position
        else:
            assert post_position == pre_position