test_flatland_rail_agent_status.py 9.46 KB
Newer Older
u214892's avatar
u214892 committed
1
2
3
4
5
from flatland.core.grid.grid4 import Grid4TransitionsEnum
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv, RailEnvActions
from flatland.envs.rail_generators import rail_from_grid_transition_map
6
from flatland.envs.line_generators import sparse_line_generator
u214892's avatar
u214892 committed
7
from flatland.utils.simple_rail import make_simple_rail
8
from test_utils import ReplayConfig, Replay, run_replay_config, set_penalties_for_replay
9
from flatland.envs.step_utils.states import TrainState
u214892's avatar
u214892 committed
10
11
12

def test_initial_status():
    """Test that agent lifecycle works correctly ready-to-depart -> active -> done."""
13
14
15
    rail, rail_map, optionals = make_simple_rail()
    env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail, optionals),
                  line_generator=sparse_line_generator(), number_of_agents=1,
u214892's avatar
u214892 committed
16
                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
17
                  remove_agents_at_target=False)
18
    env.reset()
Dipam Chakraborty's avatar
Dipam Chakraborty committed
19

Dipam Chakraborty's avatar
Dipam Chakraborty committed
20
21
    env._max_episode_steps = 1000

Dipam Chakraborty's avatar
Dipam Chakraborty committed
22
23
24
25
    # Perform DO_NOTHING actions until all trains get to READY_TO_DEPART
    for _ in range(max([agent.earliest_departure for agent in env.agents])):
        env.step({}) # DO_NOTHING for all agents

u214892's avatar
u214892 committed
26
27
28
29
    set_penalties_for_replay(env)
    test_config = ReplayConfig(
        replay=[
            Replay(
u214892's avatar
u214892 committed
30
                position=None,  # not entered grid yet
u214892's avatar
u214892 committed
31
                direction=Grid4TransitionsEnum.EAST,
32
                state=TrainState.READY_TO_DEPART,
u214892's avatar
u214892 committed
33
                action=RailEnvActions.DO_NOTHING,
34
                reward=env.step_penalty * 0.5,
u214892's avatar
u214892 committed
35

u214892's avatar
u214892 committed
36
37
            ),
            Replay(
u214892's avatar
u214892 committed
38
                position=None,  # not entered grid yet before step
u214892's avatar
u214892 committed
39
                direction=Grid4TransitionsEnum.EAST,
40
                state=TrainState.READY_TO_DEPART,
u214892's avatar
u214892 committed
41
                action=RailEnvActions.MOVE_LEFT,
42
                reward=env.step_penalty * 0.5,  # auto-correction left to forward without penalty!
u214892's avatar
u214892 committed
43
44
45
46
            ),
            Replay(
                position=(3, 9),
                direction=Grid4TransitionsEnum.EAST,
47
                state=TrainState.MOVING,
48
49
                action=RailEnvActions.MOVE_LEFT,
                reward=env.start_penalty + env.step_penalty * 0.5,  # running at speed 0.5
u214892's avatar
u214892 committed
50
51
            ),
            Replay(
Erik Nygren's avatar
Erik Nygren committed
52
                position=(3, 9),
53
                direction=Grid4TransitionsEnum.EAST,
54
                state=TrainState.MOVING,
55
                action=None,
u214892's avatar
u214892 committed
56
57
58
59
60
                reward=env.step_penalty * 0.5,  # running at speed 0.5
            ),
            Replay(
                position=(3, 8),
                direction=Grid4TransitionsEnum.WEST,
61
                state=TrainState.MOVING,
62
                action=RailEnvActions.MOVE_FORWARD,
u214892's avatar
u214892 committed
63
64
65
                reward=env.step_penalty * 0.5,  # running at speed 0.5
            ),
            Replay(
Erik Nygren's avatar
Erik Nygren committed
66
                position=(3, 8),
u214892's avatar
u214892 committed
67
                direction=Grid4TransitionsEnum.WEST,
68
                state=TrainState.MOVING,
69
                action=None,
u214892's avatar
u214892 committed
70
                reward=env.step_penalty * 0.5,  # running at speed 0.5
u214892's avatar
u214892 committed
71

u214892's avatar
u214892 committed
72
73
74
75
            ),
            Replay(
                position=(3, 7),
                direction=Grid4TransitionsEnum.WEST,
76
                action=RailEnvActions.MOVE_FORWARD,
u214892's avatar
u214892 committed
77
                reward=env.step_penalty * 0.5,  # running at speed 0.5
78
                state=TrainState.MOVING
u214892's avatar
u214892 committed
79
80
            ),
            Replay(
Erik Nygren's avatar
Erik Nygren committed
81
                position=(3, 7),
u214892's avatar
u214892 committed
82
                direction=Grid4TransitionsEnum.WEST,
83
                action=None,
u214892's avatar
u214892 committed
84
                reward=env.step_penalty * 0.5,  # wrong action is corrected to forward without penalty!
85
                state=TrainState.MOVING
u214892's avatar
u214892 committed
86
87
88
89
            ),
            Replay(
                position=(3, 6),
                direction=Grid4TransitionsEnum.WEST,
90
91
                action=RailEnvActions.MOVE_RIGHT,
                reward=env.step_penalty * 0.5,  #
92
                state=TrainState.MOVING
u214892's avatar
u214892 committed
93
94
            ),
            Replay(
Erik Nygren's avatar
Erik Nygren committed
95
                position=(3, 6),
u214892's avatar
u214892 committed
96
                direction=Grid4TransitionsEnum.WEST,
u214892's avatar
u214892 committed
97
                action=None,
98
                reward=env.global_reward,  #
99
                state=TrainState.MOVING
u214892's avatar
u214892 committed
100
            ),
Dipam Chakraborty's avatar
Dipam Chakraborty committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
            # Replay(
            #     position=(3, 5),
            #     direction=Grid4TransitionsEnum.WEST,
            #     action=None,
            #     reward=env.global_reward,  # already done
            #     status=RailAgentStatus.DONE
            # ),
            # Replay(
            #     position=(3, 5),
            #     direction=Grid4TransitionsEnum.WEST,
            #     action=None,
            #     reward=env.global_reward,  # already done
            #     status=RailAgentStatus.DONE
            # )
u214892's avatar
u214892 committed
115
116

        ],
u214892's avatar
u214892 committed
117
118
        initial_position=(3, 9),  # east dead-end
        initial_direction=Grid4TransitionsEnum.EAST,
u214892's avatar
u214892 committed
119
120
121
122
        target=(3, 5),
        speed=0.5
    )

Dipam Chakraborty's avatar
Dipam Chakraborty committed
123
    run_replay_config(env, [test_config], activate_agents=False, skip_reward_check=True)
124
    assert env.agents[0].state == TrainState.DONE
u214892's avatar
u214892 committed
125

Erik Nygren's avatar
Erik Nygren committed
126

u214892's avatar
u214892 committed
127
128
def test_status_done_remove():
    """Test that agent lifecycle works correctly ready-to-depart -> active -> done."""
129
130
131
    rail, rail_map, optionals = make_simple_rail()
    env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail, optionals),
                  line_generator=sparse_line_generator(), number_of_agents=1,
u214892's avatar
u214892 committed
132
                  obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
133
                  remove_agents_at_target=True)
134
    env.reset()
u214892's avatar
u214892 committed
135

Dipam Chakraborty's avatar
Dipam Chakraborty committed
136
137
138
139
    # Perform DO_NOTHING actions until all trains get to READY_TO_DEPART
    for _ in range(max([agent.earliest_departure for agent in env.agents])):
        env.step({}) # DO_NOTHING for all agents

Dipam Chakraborty's avatar
Dipam Chakraborty committed
140
141
    env._max_episode_steps = 1000

u214892's avatar
u214892 committed
142
143
144
145
146
147
    set_penalties_for_replay(env)
    test_config = ReplayConfig(
        replay=[
            Replay(
                position=None,  # not entered grid yet
                direction=Grid4TransitionsEnum.EAST,
148
                state=TrainState.READY_TO_DEPART,
u214892's avatar
u214892 committed
149
                action=RailEnvActions.DO_NOTHING,
150
                reward=env.step_penalty * 0.5,
u214892's avatar
u214892 committed
151
152
153
154
155

            ),
            Replay(
                position=None,  # not entered grid yet before step
                direction=Grid4TransitionsEnum.EAST,
156
                state=TrainState.READY_TO_DEPART,
u214892's avatar
u214892 committed
157
                action=RailEnvActions.MOVE_LEFT,
158
159
160
161
162
                reward=env.step_penalty * 0.5,  # auto-correction left to forward without penalty!
            ),
            Replay(
                position=(3, 9),
                direction=Grid4TransitionsEnum.EAST,
163
                state=TrainState.MOVING,
164
165
                action=RailEnvActions.MOVE_FORWARD,
                reward=env.start_penalty + env.step_penalty * 0.5,  # running at speed 0.5
u214892's avatar
u214892 committed
166
167
168
169
            ),
            Replay(
                position=(3, 9),
                direction=Grid4TransitionsEnum.EAST,
170
                state=TrainState.MOVING,
u214892's avatar
u214892 committed
171
172
173
174
175
176
                action=None,
                reward=env.step_penalty * 0.5,  # running at speed 0.5
            ),
            Replay(
                position=(3, 8),
                direction=Grid4TransitionsEnum.WEST,
177
                state=TrainState.MOVING,
u214892's avatar
u214892 committed
178
179
180
181
182
183
                action=RailEnvActions.MOVE_FORWARD,
                reward=env.step_penalty * 0.5,  # running at speed 0.5
            ),
            Replay(
                position=(3, 8),
                direction=Grid4TransitionsEnum.WEST,
184
                state=TrainState.MOVING,
u214892's avatar
u214892 committed
185
186
187
188
189
190
191
                action=None,
                reward=env.step_penalty * 0.5,  # running at speed 0.5

            ),
            Replay(
                position=(3, 7),
                direction=Grid4TransitionsEnum.WEST,
192
                action=RailEnvActions.MOVE_RIGHT,
u214892's avatar
u214892 committed
193
                reward=env.step_penalty * 0.5,  # running at speed 0.5
194
                state=TrainState.MOVING
u214892's avatar
u214892 committed
195
196
            ),
            Replay(
197
                position=(3, 7),
u214892's avatar
u214892 committed
198
                direction=Grid4TransitionsEnum.WEST,
199
                action=None,
u214892's avatar
u214892 committed
200
                reward=env.step_penalty * 0.5,  # wrong action is corrected to forward without penalty!
201
                state=TrainState.MOVING
u214892's avatar
u214892 committed
202
203
204
205
            ),
            Replay(
                position=(3, 6),
                direction=Grid4TransitionsEnum.WEST,
206
207
                action=RailEnvActions.MOVE_FORWARD,
                reward=env.step_penalty * 0.5,  # done
208
                state=TrainState.MOVING
u214892's avatar
u214892 committed
209
210
            ),
            Replay(
211
                position=(3, 6),
u214892's avatar
u214892 committed
212
213
214
                direction=Grid4TransitionsEnum.WEST,
                action=None,
                reward=env.global_reward,  # already done
215
                state=TrainState.MOVING
u214892's avatar
u214892 committed
216
            ),
Dipam Chakraborty's avatar
Dipam Chakraborty committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
            # Replay(
            #     position=None,
            #     direction=Grid4TransitionsEnum.WEST,
            #     action=None,
            #     reward=env.global_reward,  # already done
            #     status=RailAgentStatus.DONE_REMOVED
            # ),
            # Replay(
            #     position=None,
            #     direction=Grid4TransitionsEnum.WEST,
            #     action=None,
            #     reward=env.global_reward,  # already done
            #     status=RailAgentStatus.DONE_REMOVED
            # )
u214892's avatar
u214892 committed
231
232
233
234
235
236
237
238

        ],
        initial_position=(3, 9),  # east dead-end
        initial_direction=Grid4TransitionsEnum.EAST,
        target=(3, 5),
        speed=0.5
    )

Dipam Chakraborty's avatar
Dipam Chakraborty committed
239
    run_replay_config(env, [test_config], activate_agents=False, skip_reward_check=True)
240
    assert env.agents[0].state == TrainState.DONE