run.py 14.8 KB
Newer Older
1
'''
2
3
I did experiments in an early submission. Please note that the epsilon can have an
effects on the evaluation outcome :
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
DDDQNPolicy experiments - EPSILON impact analysis
----------------------------------------------------------------------------------------
checkpoint = "./checkpoints/201124171810-7800.pth"  # Training on AGENTS=10 with Depth=2
EPSILON = 0.000 # Sum Normalized Reward :  0.000000000000000 (primary score)
EPSILON = 0.002 # Sum Normalized Reward : 18.445875081269286 (primary score)
EPSILON = 0.005 # Sum Normalized Reward : 18.371733625865854 (primary score)
EPSILON = 0.010 # Sum Normalized Reward : 18.249244799876152 (primary score)
EPSILON = 0.020 # Sum Normalized Reward : 17.526987022691376 (primary score)
EPSILON = 0.030 # Sum Normalized Reward : 16.796885571003942 (primary score)
EPSILON = 0.040 # Sum Normalized Reward : 17.280787151431426 (primary score)
EPSILON = 0.050 # Sum Normalized Reward : 16.256945636647025 (primary score)
EPSILON = 0.100 # Sum Normalized Reward : 14.828347241759966 (primary score)
EPSILON = 0.200 # Sum Normalized Reward : 11.192330074898457 (primary score)
EPSILON = 0.300 # Sum Normalized Reward : 14.523067754608782 (primary score)
EPSILON = 0.400 # Sum Normalized Reward : 12.901508220410834 (primary score)
EPSILON = 0.500 # Sum Normalized Reward :  3.754660231871272 (primary score)
EPSILON = 1.000 # Sum Normalized Reward :  1.397180159192391 (primary score)
'''

Adrian Egli's avatar
rl    
Adrian Egli committed
23
24
25
26
27
28
29
import sys
import time
from argparse import Namespace
from pathlib import Path

import numpy as np
from flatland.core.env_observation_builder import DummyObservationBuilder
Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
30
from flatland.envs.agent_utils import RailAgentStatus
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
31
from flatland.envs.observations import TreeObsForRailEnv
Adrian Egli's avatar
rl    
Adrian Egli committed
32
33
34
35
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.evaluators.client import FlatlandRemoteClient
from flatland.evaluators.client import TimeoutException

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
36
from reinforcement_learning.dddqn_policy import DDDQNPolicy
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
37
from reinforcement_learning.deadlockavoidance_with_decision_agent import DeadLockAvoidanceWithDecisionAgent
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
38
39
from reinforcement_learning.multi_decision_agent import MultiDecisionAgent
from reinforcement_learning.ppo_agent import PPOPolicy
Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
40
from utils.agent_action_config import get_action_size, map_actions, set_action_size_reduced, set_action_size_full
Adrian Egli's avatar
rl    
Adrian Egli committed
41
42
43
from utils.dead_lock_avoidance_agent import DeadLockAvoidanceAgent
from utils.deadlock_check import check_if_all_blocked
from utils.fast_tree_obs import FastTreeObs
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
44
from utils.observation_utils import normalize_observation
Adrian Egli's avatar
rl    
Adrian Egli committed
45
46
47
48
49
50

base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))

####################################################
# EVALUATION PARAMETERS
Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
51
set_action_size_full()
Adrian Egli's avatar
rl    
Adrian Egli committed
52
53
54

# Print per-step logs
VERBOSE = True
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
55
USE_FAST_TREEOBS = True
56
57

if False:
58
59
60
    # -------------------------------------------------------------------------------------------------------
    # RL solution
    # -------------------------------------------------------------------------------------------------------
61
62
63
64
65
66
67
68
    # 116591 adrian_egli
    # graded	71.305	0.633	RL	Successfully Graded ! More details about this submission can be found at:
    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/51
    # Fri, 22 Jan 2021 23:37:56
    set_action_size_reduced()
    load_policy = "DDDQN"
    checkpoint = "./checkpoints/210122120236-3000.pth"  # 17.011131341978228
    EPSILON = 0.0
Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
69

70
if False:
71
72
73
    # -------------------------------------------------------------------------------------------------------
    # RL solution
    # -------------------------------------------------------------------------------------------------------
74
75
76
77
78
79
80
81
    # 116658 adrian_egli
    # graded	73.821	0.655	RL	Successfully Graded ! More details about this submission can be found at:
    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/52
    # Sat, 23 Jan 2021 07:41:35
    set_action_size_reduced()
    load_policy = "PPO"
    checkpoint = "./checkpoints/210122235754-5000.pth"  # 16.00113400887389
    EPSILON = 0.0
Egli Adrian (IT-SCI-API-PFI)'s avatar
PPO    
Egli Adrian (IT-SCI-API-PFI) committed
82

Egli Adrian (IT-SCI-API-PFI)'s avatar
DDDQN    
Egli Adrian (IT-SCI-API-PFI) committed
83
if True:
84
85
86
    # -------------------------------------------------------------------------------------------------------
    # RL solution
    # -------------------------------------------------------------------------------------------------------
87
88
89
90
    # 116659 adrian_egli
    # graded	80.579	0.715	RL	Successfully Graded ! More details about this submission can be found at:
    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/53
    # Sat, 23 Jan 2021 07:45:49
Egli Adrian (IT-SCI-API-PFI)'s avatar
DDDQN    
Egli Adrian (IT-SCI-API-PFI) committed
91
92
93
94
    set_action_size_reduced()
    load_policy = "DDDQN"
    checkpoint = "./checkpoints/210122165109-5000.pth"  # 17.993750197899438
    EPSILON = 0.0
Egli Adrian (IT-SCI-API-PFI)'s avatar
PPO    
Egli Adrian (IT-SCI-API-PFI) committed
95

96
if False:
97
98
99
    # -------------------------------------------------------------------------------------------------------
    # !! This is not a RL solution !!!!
    # -------------------------------------------------------------------------------------------------------
100
101
102
103
    # 116727 adrian_egli
    # graded	106.786	0.768	RL	Successfully Graded ! More details about this submission can be found at:
    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/54
    # Sat, 23 Jan 2021 14:31:50
Egli Adrian (IT-SCI-API-PFI)'s avatar
DDDQN    
Egli Adrian (IT-SCI-API-PFI) committed
104
    set_action_size_reduced()
105
106
    load_policy = "DeadLockAvoidance"
    checkpoint = None
Egli Adrian (IT-SCI-API-PFI)'s avatar
DDDQN    
Egli Adrian (IT-SCI-API-PFI) committed
107
    EPSILON = 0.0
Egli Adrian (IT-SCI-API-PFI)'s avatar
PPO    
Egli Adrian (IT-SCI-API-PFI) committed
108

Adrian Egli's avatar
rl    
Adrian Egli committed
109
110
111
112
# Use last action cache
USE_ACTION_CACHE = False

# Observation parameters (must match training parameters!)
113
observation_tree_depth = 2
Adrian Egli's avatar
rl    
Adrian Egli committed
114
115
116
117
118
119
120
121
122
observation_radius = 10
observation_max_path_depth = 30

####################################################

remote_client = FlatlandRemoteClient()

# Observation builder
predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
123
124
125
126
if USE_FAST_TREEOBS:
    def check_is_observation_valid(observation):
        return True

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
127

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
128
129
130
    def get_normalized_observation(observation, tree_depth: int, observation_radius=0):
        return observation

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
131

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    tree_observation = FastTreeObs(max_depth=observation_tree_depth)
    state_size = tree_observation.observation_dim
else:
    def check_is_observation_valid(observation):
        return observation


    def get_normalized_observation(observation, tree_depth: int, observation_radius=0):
        return normalize_observation(observation, tree_depth, observation_radius)


    tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor)
    # Calculate the state size given the depth of the tree observation and the number of features
    n_features_per_node = tree_observation.observation_dim
    n_nodes = sum([np.power(4, i) for i in range(observation_tree_depth + 1)])
    state_size = n_features_per_node * n_nodes
Adrian Egli's avatar
rl    
Adrian Egli committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

#####################################################################
# Main evaluation loop
#####################################################################
evaluation_number = 0

while True:
    evaluation_number += 1

    # We use a dummy observation and call TreeObsForRailEnv ourselves when needed.
    # This way we decide if we want to calculate the observations or not instead
    # of having them calculated every time we perform an env step.
    time_start = time.time()
    observation, info = remote_client.env_create(
        obs_builder_object=DummyObservationBuilder()
    )
    env_creation_time = time.time() - time_start

    if not observation:
        # If the remote_client returns False on a `env_create` call,
        # then it basically means that your agent has already been
        # evaluated on all the required evaluation environments,
        # and hence it's safe to break out of the main evaluation loop.
        break

    print("Env Path : ", remote_client.current_env_path)
    print("Env Creation Time : ", env_creation_time)

    local_env = remote_client.env
    nb_agents = len(local_env.agents)
    max_nb_steps = local_env._max_episode_steps

    tree_observation.set_env(local_env)
    tree_observation.reset()
182

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
183
184
185
186
    # Creates the policy. No GPU on evaluation server.
    if load_policy == "DDDQN":
        policy = DDDQNPolicy(state_size, get_action_size(), Namespace(**{'use_gpu': False}), evaluation_mode=True)
    elif load_policy == "PPO":
Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
187
        policy = PPOPolicy(state_size, get_action_size())
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    elif load_policy == "DeadLockAvoidance":
        policy = DeadLockAvoidanceAgent(local_env, get_action_size(), enable_eps=False)
    elif load_policy == "DeadLockAvoidanceWithDecision":
        # inter_policy = PPOPolicy(state_size, get_action_size(), use_replay_buffer=False, in_parameters=train_params)
        inter_policy = DDDQNPolicy(state_size, get_action_size(), Namespace(**{'use_gpu': False}), evaluation_mode=True)
        policy = DeadLockAvoidanceWithDecisionAgent(local_env, state_size, get_action_size(), inter_policy)
    elif load_policy == "MultiDecision":
        policy = MultiDecisionAgent(state_size, get_action_size(), Namespace(**{'use_gpu': False}))
    else:
        policy = PPOPolicy(state_size, get_action_size(), use_replay_buffer=False,
                           in_parameters=Namespace(**{'use_gpu': False}))

    policy.load(checkpoint)

202
    policy.reset(local_env)
Adrian Egli's avatar
rl    
Adrian Egli committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    observation = tree_observation.get_many(list(range(nb_agents)))

    print("Evaluation {}: {} agents in {}x{}".format(evaluation_number, nb_agents, local_env.width, local_env.height))

    # Now we enter into another infinite loop where we
    # compute the actions for all the individual steps in this episode
    # until the episode is `done`
    steps = 0

    # Bookkeeping
    time_taken_by_controller = []
    time_taken_per_step = []

    # Action cache: keep track of last observation to avoid running the same inferrence multiple times.
    # This only makes sense for deterministic policies.
    agent_last_obs = {}
    agent_last_action = {}
    nb_hit = 0

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
222
    policy.start_episode(train=False)
Adrian Egli's avatar
rl    
Adrian Egli committed
223
224
225
226
227
228
229
230
231
232
233
234
    while True:
        try:
            #####################################################################
            # Evaluation of a single episode
            #####################################################################
            steps += 1
            obs_time, agent_time, step_time = 0.0, 0.0, 0.0
            no_ops_mode = False

            if not check_if_all_blocked(env=local_env):
                time_start = time.time()
                action_dict = {}
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
235
                policy.start_step(train=False)
236
237
238
239
                for agent_handle in range(nb_agents):
                    if info['action_required'][agent_handle]:
                        if agent_handle in agent_last_obs and np.all(
                                agent_last_obs[agent_handle] == observation[agent_handle]):
Adrian Egli's avatar
rl    
Adrian Egli committed
240
                            # cache hit
241
                            action = agent_last_action[agent_handle]
Adrian Egli's avatar
rl    
Adrian Egli committed
242
243
                            nb_hit += 1
                        else:
Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
244
245
246
247
                            normalized_observation = get_normalized_observation(observation[agent_handle],
                                                                                observation_tree_depth,
                                                                                observation_radius=observation_radius)

248
                            action = policy.act(agent_handle, normalized_observation, eps=EPSILON)
Adrian Egli's avatar
rl    
Adrian Egli committed
249

250
                    action_dict[agent_handle] = action
Adrian Egli's avatar
rl    
Adrian Egli committed
251

252
253
254
                    if USE_ACTION_CACHE:
                        agent_last_obs[agent_handle] = observation[agent_handle]
                        agent_last_action[agent_handle] = action
255

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
256
                policy.end_step(train=False)
Adrian Egli's avatar
rl    
Adrian Egli committed
257
258
259
260
                agent_time = time.time() - time_start
                time_taken_by_controller.append(agent_time)

                time_start = time.time()
261
                _, all_rewards, done, info = remote_client.env_step(map_actions(action_dict))
Adrian Egli's avatar
rl    
Adrian Egli committed
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
                step_time = time.time() - time_start
                time_taken_per_step.append(step_time)

                time_start = time.time()
                observation = tree_observation.get_many(list(range(nb_agents)))
                obs_time = time.time() - time_start

            else:
                # Fully deadlocked: perform no-ops
                no_ops_mode = True

                time_start = time.time()
                _, all_rewards, done, info = remote_client.env_step({})
                step_time = time.time() - time_start
                time_taken_per_step.append(step_time)

Egli Adrian (IT-SCI-API-PFI)'s avatar
clear    
Egli Adrian (IT-SCI-API-PFI) committed
278
279
280
281
282
            nb_agents_done = 0
            for i_agent, agent in enumerate(local_env.agents):
                # manage the boolean flag to check if all agents are indeed done (or done_removed)
                if (agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]):
                    nb_agents_done += 1
Adrian Egli's avatar
rl    
Adrian Egli committed
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

            if VERBOSE or done['__all__']:
                print(
                    "Step {}/{}\tAgents done: {}\t Obs time {:.3f}s\t Inference time {:.5f}s\t Step time {:.3f}s\t Cache hits {}\t No-ops? {}".format(
                        str(steps).zfill(4),
                        max_nb_steps,
                        nb_agents_done,
                        obs_time,
                        agent_time,
                        step_time,
                        nb_hit,
                        no_ops_mode
                    ), end="\r")

            if done['__all__']:
                # When done['__all__'] == True, then the evaluation of this
                # particular Env instantiation is complete, and we can break out
                # of this loop, and move onto the next Env evaluation
                print()
                break

        except TimeoutException as err:
            # A timeout occurs, won't get any reward for this episode :-(
            # Skip to next episode as further actions in this one will be ignored.
            # The whole evaluation will be stopped if there are 10 consecutive timeouts.
            print("Timeout! Will skip this episode and go to the next.", err)
            break

Egli Adrian (IT-SCI-API-PFI)'s avatar
Egli Adrian (IT-SCI-API-PFI) committed
311
312
    policy.end_episode(train=False)

Adrian Egli's avatar
rl    
Adrian Egli committed
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    np_time_taken_by_controller = np.array(time_taken_by_controller)
    np_time_taken_per_step = np.array(time_taken_per_step)
    print("Mean/Std of Time taken by Controller : ", np_time_taken_by_controller.mean(),
          np_time_taken_by_controller.std())
    print("Mean/Std of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std())
    print("=" * 100)

print("Evaluation of all environments complete!")
########################################################################
# Submit your Results
#
# Please do not forget to include this call, as this triggers the
# final computation of the score statistics, video generation, etc
# and is necessary to have your submission marked as successfully evaluated
########################################################################
print(remote_client.submit())