test_flatland_malfunction.py 10 KB
Newer Older
u214892's avatar
u214892 committed
1
2
import random

3
4
import numpy as np

u214892's avatar
u214892 committed
5
6
from flatland.core.grid.grid4 import Grid4TransitionsEnum
from flatland.envs.agent_utils import EnvAgent
7
from flatland.envs.observations import TreeObsForRailEnv
u214892's avatar
u214892 committed
8
9
10
11
12
from flatland.envs.rail_env import RailEnv, RailEnvActions
from flatland.envs.rail_generators import complex_rail_generator, sparse_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator, sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from test_utils import Replay
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


class SingleAgentNavigationObs(TreeObsForRailEnv):
    """
    We derive our bbservation builder from TreeObsForRailEnv, to exploit the existing implementation to compute
    the minimum distances from each grid node to each agent's target.

    We then build a representation vector with 3 binary components, indicating which of the 3 available directions
    for each agent (Left, Forward, Right) lead to the shortest path to its target.
    E.g., if taking the Left branch (if available) is the shortest route to the agent's target, the observation vector
    will be [1, 0, 0].
    """

    def __init__(self):
        super().__init__(max_depth=0)
        self.observation_space = [3]

    def reset(self):
        # Recompute the distance map, if the environment has changed.
        super().reset()

    def get(self, handle):
        agent = self.env.agents[handle]

        possible_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
        num_transitions = np.count_nonzero(possible_transitions)

        # Start from the current orientation, and see which transitions are available;
        # organize them as [left, forward, right], relative to the current orientation
        # If only one transition is possible, the forward branch is aligned with it.
        if num_transitions == 1:
            observation = [0, 1, 0]
        else:
            min_distances = []
            for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]:
                if possible_transitions[direction]:
                    new_position = self._new_position(agent.position, direction)
                    min_distances.append(self.distance_map[handle, new_position[0], new_position[1], direction])
                else:
                    min_distances.append(np.inf)

            observation = [0, 0, 0]
            observation[np.argmin(min_distances)] = 1

        return observation


def test_malfunction_process():
Erik Nygren's avatar
Erik Nygren committed
61
    # Set fixed malfunction duration for this test
62
    stochastic_data = {'prop_malfunction': 1.,
63
                       'malfunction_rate': 1000,
64
                       'min_duration': 3,
Erik Nygren's avatar
Erik Nygren committed
65
                       'max_duration': 3}
66
67
    np.random.seed(5)

68
69
    env = RailEnv(width=20,
                  height=20,
70
71
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
72
                  schedule_generator=complex_schedule_generator(),
73
74
75
76
77
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)

    obs = env.reset()
Erik Nygren's avatar
Erik Nygren committed
78
79
80
81

    # Check that a initial duration for malfunction was assigned
    assert env.agents[0].malfunction_data['next_malfunction'] > 0

82
    agent_halts = 0
Erik Nygren's avatar
Erik Nygren committed
83
84
85
    total_down_time = 0
    agent_malfunctioning = False
    agent_old_position = env.agents[0].position
86
87
88
89
90
91
    for step in range(100):
        actions = {}
        for i in range(len(obs)):
            actions[i] = np.argmax(obs[i]) + 1

        if step % 5 == 0:
Erik Nygren's avatar
Erik Nygren committed
92
            # Stop the agent and set it to be malfunctioning
93
            env.agents[0].malfunction_data['malfunction'] = -1
Erik Nygren's avatar
Erik Nygren committed
94
            env.agents[0].malfunction_data['next_malfunction'] = 0
95
96
            agent_halts += 1

97
98
        obs, all_rewards, done, _ = env.step(actions)

Erik Nygren's avatar
Erik Nygren committed
99
100
101
102
103
104
        if env.agents[0].malfunction_data['malfunction'] > 0:
            agent_malfunctioning = True
        else:
            agent_malfunctioning = False

        if agent_malfunctioning:
Erik Nygren's avatar
Erik Nygren committed
105
            # Check that agent is not moving while malfunctioning
Erik Nygren's avatar
Erik Nygren committed
106
107
108
109
110
            assert agent_old_position == env.agents[0].position

        agent_old_position = env.agents[0].position
        total_down_time += env.agents[0].malfunction_data['malfunction']

Erik Nygren's avatar
Erik Nygren committed
111
    # Check that the appropriate number of malfunctions is achieved
112
    assert env.agents[0].malfunction_data['nr_malfunctions'] == 21
Erik Nygren's avatar
Erik Nygren committed
113

Erik Nygren's avatar
Erik Nygren committed
114
    # Check that 20 stops where performed
Erik Nygren's avatar
Erik Nygren committed
115
    assert agent_halts == 20
116

Erik Nygren's avatar
Erik Nygren committed
117
118
    # Check that malfunctioning data was standing around
    assert total_down_time > 0
u214892's avatar
u214892 committed
119
120
121
122
123
124
125
126
127
128


def test_malfunction_process_statistically():
    """Tests hat malfunctions are produced by stochastic_data!"""
    # Set fixed malfunction duration for this test
    stochastic_data = {'prop_malfunction': 1.,
                       'malfunction_rate': 2,
                       'min_duration': 3,
                       'max_duration': 3}
    np.random.seed(5)
u214892's avatar
u214892 committed
129
    random.seed(0)
u214892's avatar
u214892 committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

    env = RailEnv(width=20,
                  height=20,
                  rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=5, max_dist=99999,
                                                        seed=0),
                  schedule_generator=complex_schedule_generator(),
                  number_of_agents=2,
                  obs_builder_object=SingleAgentNavigationObs(),
                  stochastic_data=stochastic_data)

    env.reset()
    nb_malfunction = 0
    for step in range(100):
        action_dict = {}
        for agent in env.agents:
            if agent.malfunction_data['malfunction'] > 0:
                nb_malfunction += 1
            # We randomly select an action
            action_dict[agent.handle] = np.random.randint(4)

        env.step(action_dict)

    # check that generation of malfunctions works as expected
153
    # results are different in py36 and py37, therefore no exact test on nb_malfunction
u214892's avatar
u214892 committed
154
    assert nb_malfunction == 149, "nb_malfunction={}".format(nb_malfunction)
u214892's avatar
u214892 committed
155
156
157
158


def test_initial_malfunction(rendering=True):
    random.seed(0)
u214892's avatar
u214892 committed
159
160
    np.random.seed(0)

u214892's avatar
u214892 committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    stochastic_data = {'prop_malfunction': 1.,  # Percentage of defective agents
                       'malfunction_rate': 70,  # Rate of malfunction occurence
                       'min_duration': 2,  # Minimal duration of malfunction
                       'max_duration': 5  # Max duration of malfunction
                       }

    speed_ration_map = {1.: 1.,  # Fast passenger train
                        1. / 2.: 0.,  # Fast freight train
                        1. / 3.: 0.,  # Slow commuter train
                        1. / 4.: 0.}  # Slow freight train

    env = RailEnv(width=25,
                  height=30,
                  rail_generator=sparse_rail_generator(num_cities=5,
                                                       # Number of cities in map (where train stations are)
                                                       num_intersections=4,
                                                       # Number of intersections (no start / target)
                                                       num_trainstations=25,  # Number of possible start/targets on map
                                                       min_node_dist=6,  # Minimal distance of nodes
                                                       node_radius=3,  # Proximity of stations to city center
                                                       num_neighb=3,
                                                       # Number of connections to other cities/intersections
                                                       seed=215545,  # Random seed
                                                       grid_mode=True,
                                                       enhance_intersection=False
                                                       ),
                  schedule_generator=sparse_schedule_generator(speed_ration_map),
                  number_of_agents=1,
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )

    if rendering:
        renderer = RenderTool(env)
        renderer.render_env(show=True, frames=False, show_observations=False)
    _action = dict()

    replay_steps = [
        Replay(
u214892's avatar
u214892 committed
199
            position=(28, 5),
u214892's avatar
u214892 committed
200
201
202
203
204
            direction=Grid4TransitionsEnum.EAST,
            action=RailEnvActions.MOVE_FORWARD,
            malfunction=3
        ),
        Replay(
u214892's avatar
u214892 committed
205
            position=(28, 5),
u214892's avatar
u214892 committed
206
207
208
209
210
            direction=Grid4TransitionsEnum.EAST,
            action=RailEnvActions.MOVE_FORWARD,
            malfunction=2
        ),
        Replay(
u214892's avatar
u214892 committed
211
            position=(28, 5),
u214892's avatar
u214892 committed
212
213
214
215
216
            direction=Grid4TransitionsEnum.EAST,
            action=RailEnvActions.MOVE_FORWARD,
            malfunction=1
        ),
        Replay(
u214892's avatar
u214892 committed
217
            position=(28, 4),
u214892's avatar
u214892 committed
218
219
220
221
222
            direction=Grid4TransitionsEnum.WEST,
            action=RailEnvActions.MOVE_FORWARD,
            malfunction=0
        ),
        Replay(
u214892's avatar
u214892 committed
223
224
            position=(27, 4),
            direction=Grid4TransitionsEnum.NORTH,
u214892's avatar
u214892 committed
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
            action=RailEnvActions.MOVE_FORWARD,
            malfunction=0
        )
    ]

    info_dict = {
        'action_required': [True]
    }

    for i, replay in enumerate(replay_steps):

        def _assert(actual, expected, msg):
            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)

        agent: EnvAgent = env.agents[0]

        _assert(agent.position, replay.position, 'position')
        _assert(agent.direction, replay.direction, 'direction')
        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')

u214892's avatar
u214892 committed
245
        if replay.action is not None:
u214892's avatar
u214892 committed
246
247
248
249
250
251
252
253
254
            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
            _, _, _, info_dict = env.step({0: replay.action})

        else:
            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
            _, _, _, info_dict = env.step({})

        if rendering:
            renderer.render_env(show=True, show_observations=True)