Commit e177f0a4 authored by MasterScrat's avatar MasterScrat
Browse files

Added torch dependency, using multiprocessing pool for inference

parent b4cdfd75
......@@ -71,7 +71,7 @@ dependencies:
- crowdai-api==0.1.21
- cycler==0.10.0
- filelock==3.0.12
- flatland-rl==2.2.1
- git+git://gitlab.aicrowd.com/flatland/flatland@eval_timeouts
- future==0.17.1
- gym==0.14.0
- idna==2.8
......@@ -102,6 +102,7 @@ dependencies:
- svgutils==0.3.1
- timeout-decorator==0.4.1
- toml==0.10.0
- torch==1.5.0
- tox==3.13.2
- urllib3==1.25.3
- ushlex==0.99.1
......
import sys
from argparse import Namespace
from multiprocessing.pool import Pool
from pathlib import Path
import numpy as np
......@@ -15,135 +16,137 @@ sys.path.append(str(base_dir))
from reinforcement_learning.dddqn_policy import DDDQNPolicy
from utils.observation_utils import normalize_observation
#####################################################################
# Instantiate a Remote Client
#####################################################################
remote_client = FlatlandRemoteClient()
def infer_action(obs):
norm_obs = normalize_observation(obs, tree_depth=observation_tree_depth, observation_radius=observation_radius)
return policy.act(norm_obs, eps=0.0)
# Checkpoint to use
checkpoint = "checkpoints/multi-100.pth"
# Evaluation is faster on CPU (except if you use a really huge)
parameters = {
'use_gpu': False
}
def rl_controller(obs, number_of_agents):
obs_list = []
for agent in range(number_of_agents):
if obs[agent] and info['action_required'][agent]:
obs_list.append(obs[agent])
# Observation parameters
observation_tree_depth = 2
observation_radius = 10
observation_max_path_depth = 30
return dict(zip(range(number_of_agents), pool.map(infer_action, obs_list)))
# Observation builder
predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor)
num_features_per_node = tree_observation.observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
state_size = num_features_per_node * nr_nodes
action_size = 5
if __name__ == "__main__":
#####################################################################
# Instantiate a Remote Client
#####################################################################
policy = DDDQNPolicy(state_size, action_size, Namespace(**parameters), evaluation_mode=True)
policy.qnetwork_local = torch.load(checkpoint)
remote_client = FlatlandRemoteClient()
# Checkpoint to use
checkpoint = "checkpoints/multi-100.pth"
# Controller
def rl_controller(obs, number_of_agents):
action_dict = {}
for agent in range(number_of_agents):
agent_obs = {}
if obs[agent]:
agent_obs[agent] = normalize_observation(obs[agent], tree_depth=observation_tree_depth, observation_radius=observation_radius)
action = 0
if info['action_required'][agent]:
action = policy.act(agent_obs[agent], eps=0.0)
action_dict.update({agent: action})
return action_dict
#####################################################################
# Main evaluation loop
#
# This iterates over an arbitrary number of env evaluations
#####################################################################
evaluation_number = 0
while True:
evaluation_number += 1
time_start = time.time()
observation, info = remote_client.env_create(
obs_builder_object=tree_observation
)
env_creation_time = time.time() - time_start
if not observation:
#
# If the remote_client returns False on a `env_create` call,
# then it basically means that your agent has already been
# evaluated on all the required evaluation environments,
# and hence its safe to break out of the main evaluation loop
break
print("Evaluation Number : {}".format(evaluation_number))
local_env = remote_client.env
number_of_agents = len(local_env.agents)
# Now we enter into another infinite loop where we
# compute the actions for all the individual steps in this episode
# until the episode is `done`
time_taken_by_controller = []
time_taken_per_step = []
steps = 0
# Evaluation is faster on CPU (except if you use a really huge)
parameters = {
'use_gpu': False
}
# Observation parameters
observation_tree_depth = 2
observation_radius = 10
observation_max_path_depth = 30
# Observation builder
predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor)
num_features_per_node = tree_observation.observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
state_size = num_features_per_node * nr_nodes
action_size = 5
policy = DDDQNPolicy(state_size, action_size, Namespace(**parameters), evaluation_mode=True)
policy.qnetwork_local = torch.load(checkpoint)
# Controller
pool = Pool()
#####################################################################
# Main evaluation loop
#
# This iterates over an arbitrary number of env evaluations
#####################################################################
evaluation_number = 0
while True:
#####################################################################
# Evaluation of a single episode
#####################################################################
time_start = time.time()
action = rl_controller(observation, number_of_agents)
time_taken = time.time() - time_start
print(steps, time_taken)
time_taken_by_controller.append(time_taken)
evaluation_number += 1
time_start = time.time()
observation, all_rewards, done, info = remote_client.env_step(action)
steps += 1
time_taken = time.time() - time_start
time_taken_per_step.append(time_taken)
observation, info = remote_client.env_create(
obs_builder_object=tree_observation
)
env_creation_time = time.time() - time_start
if done['__all__']:
print("Reward : ", sum(list(all_rewards.values())))
if not observation:
#
# When done['__all__'] == True, then the evaluation of this
# particular Env instantiation is complete, and we can break out
# of this loop, and move onto the next Env evaluation
# If the remote_client returns False on a `env_create` call,
# then it basically means that your agent has already been
# evaluated on all the required evaluation environments,
# and hence its safe to break out of the main evaluation loop
break
np_time_taken_by_controller = np.array(time_taken_by_controller)
np_time_taken_per_step = np.array(time_taken_per_step)
print("=" * 100)
print("=" * 100)
print("Evaluation Number : ", evaluation_number)
print("Current Env Path : ", remote_client.current_env_path)
print("Env Creation Time : ", env_creation_time)
print("Number of Steps : ", steps)
print("Mean/Std of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std())
print("Mean/Std of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std())
print("=" * 100)
print("Evaluation of all environments complete...")
########################################################################
# Submit your Results
#
# Please do not forget to include this call, as this triggers the
# final computation of the score statistics, video generation, etc
# and is necesaary to have your submission marked as successfully evaluated
########################################################################
print(remote_client.submit())
print("Evaluation Number : {}".format(evaluation_number))
local_env = remote_client.env
number_of_agents = len(local_env.agents)
# Now we enter into another infinite loop where we
# compute the actions for all the individual steps in this episode
# until the episode is `done`
time_taken_by_controller = []
time_taken_per_step = []
steps = 0
while True:
#####################################################################
# Evaluation of a single episode
#####################################################################
time_start = time.time()
action = rl_controller(observation, number_of_agents)
time_taken = time.time() - time_start
print(steps, time_taken)
time_taken_by_controller.append(time_taken)
time_start = time.time()
observation, all_rewards, done, info = remote_client.env_step(action)
steps += 1
time_taken = time.time() - time_start
time_taken_per_step.append(time_taken)
if done['__all__']:
print("Reward : ", sum(list(all_rewards.values())))
#
# When done['__all__'] == True, then the evaluation of this
# particular Env instantiation is complete, and we can break out
# of this loop, and move onto the next Env evaluation
break
np_time_taken_by_controller = np.array(time_taken_by_controller)
np_time_taken_per_step = np.array(time_taken_per_step)
print("=" * 100)
print("=" * 100)
print("Evaluation Number : ", evaluation_number)
print("Current Env Path : ", remote_client.current_env_path)
print("Env Creation Time : ", env_creation_time)
print("Number of Steps : ", steps)
print("Mean/Std of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std())
print("Mean/Std of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std())
print("=" * 100)
print("Evaluation of all environments complete...")
########################################################################
# Submit your Results
#
# Please do not forget to include this call, as this triggers the
# final computation of the score statistics, video generation, etc
# and is necessary to have your submission marked as successfully evaluated
########################################################################
print(remote_client.submit())
......@@ -53,7 +53,7 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_ran
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray, np.ndarray, np.ndarray):
def _split_node_into_feature_groups(node) -> (np.ndarray, np.ndarray, np.ndarray):
data = np.zeros(6)
distance = np.zeros(1)
agent_data = np.zeros(4)
......@@ -75,7 +75,7 @@ def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray
return data, distance, agent_data
def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
def _split_subtree_into_feature_groups(node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
if node == -np.inf:
remaining_depth = max_tree_depth - current_tree_depth
# reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
......@@ -96,7 +96,7 @@ def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tre
return data, distance, agent_data
def split_tree_into_feature_groups(tree: TreeObsForRailEnv.Node, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
def split_tree_into_feature_groups(tree, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
"""
This function splits the tree into three difference arrays of values
"""
......@@ -111,7 +111,7 @@ def split_tree_into_feature_groups(tree: TreeObsForRailEnv.Node, max_tree_depth:
return data, distance, agent_data
def normalize_observation(observation: TreeObsForRailEnv.Node, tree_depth: int, observation_radius=0):
def normalize_observation(observation, tree_depth: int, observation_radius=0):
"""
This function normalizes the observation used by the RL algorithm
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment