Skip to content
Snippets Groups Projects
Commit 165d7605 authored by Erik Nygren's avatar Erik Nygren
Browse files

Merge branch 'master' of gitlab.aicrowd.com:flatland/baselines

parents 84e8aeb1 6d56e096
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,8 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv ...@@ -5,6 +5,8 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.seed import seed as set_seed from ray.rllib.utils.seed import seed as set_seed
class RailEnvRLLibWrapper(MultiAgentEnv): class RailEnvRLLibWrapper(MultiAgentEnv):
def __init__(self, config): def __init__(self, config):
...@@ -18,13 +20,15 @@ class RailEnvRLLibWrapper(MultiAgentEnv): ...@@ -18,13 +20,15 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
self.predefined_env = False self.predefined_env = False
if config['rail_generator'] == "complex_rail_generator": if config['rail_generator'] == "complex_rail_generator":
self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5, self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'],
min_dist=config['min_dist'],
nr_extra=config['nr_extra'], nr_extra=config['nr_extra'],
seed=config['seed'] * (1 + vector_index)) seed=config['seed'] * (1 + vector_index))
elif config['rail_generator'] == "random_rail_generator": elif config['rail_generator'] == "random_rail_generator":
self.rail_generator = random_rail_generator() self.rail_generator = random_rail_generator()
elif config['rail_generator'] == "load_env": elif config['rail_generator'] == "load_env":
self.predefined_env = True self.predefined_env = True
self.rail_generator = random_rail_generator()
else: else:
raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}') raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}')
...@@ -32,12 +36,11 @@ class RailEnvRLLibWrapper(MultiAgentEnv): ...@@ -32,12 +36,11 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
set_seed(config['seed'] * (1 + vector_index)) set_seed(config['seed'] * (1 + vector_index))
self.env = RailEnv(width=config["width"], height=config["height"], self.env = RailEnv(width=config["width"], height=config["height"],
number_of_agents=config["number_of_agents"], number_of_agents=config["number_of_agents"],
obs_builder_object=config['obs_builder'], rail_generator=self.rail_generator, obs_builder_object=config['obs_builder'], rail_generator=self.rail_generator)
prediction_builder_object=config['predictor'])
if self.predefined_env: if self.predefined_env:
self.env.load(config['load_env_path']) # self.env.load(config['load_env_path'])
self.env.load_resource('torch_training.railway', config['load_env_path']) self.env.load_resource('torch_training.railway', 'complex_scene.pkl')
self.width = self.env.width self.width = self.env.width
self.height = self.env.height self.height = self.env.height
...@@ -50,25 +53,28 @@ class RailEnvRLLibWrapper(MultiAgentEnv): ...@@ -50,25 +53,28 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
else: else:
obs = self.env.reset() obs = self.env.reset()
predictions = self.env.predict()
if predictions != {}: # predictions = self.env.predict()
# pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of # if predictions != {}:
# agents at each time step # # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of
pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0) # # agents at each time step
pred_dir = [x[:, 2] for x in list(predictions.values())] # pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0)
# pred_dir = [x[:, 2] for x in list(predictions.values())]
o = dict() o = dict()
for i_agent in range(len(self.env.agents)): for i_agent in range(len(self.env.agents)):
data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
if predictions != {}: num_features_per_node=8, current_depth=0)
pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent) # if predictions != {}:
# pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent)
agent_id_one_hot = np.zeros(len(self.env.agents)) #
agent_id_one_hot[i_agent] = 1 # agent_id_one_hot = np.zeros(len(self.env.agents))
o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs] # agent_id_one_hot[i_agent] = 1
else: # o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs]
o[i_agent] = obs[i_agent] # else:
o[i_agent] = [data, distance, agent_data]
# needed for the renderer # needed for the renderer
self.rail = self.env.rail self.rail = self.env.rail
...@@ -93,23 +99,25 @@ class RailEnvRLLibWrapper(MultiAgentEnv): ...@@ -93,23 +99,25 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
r = dict() r = dict()
o = dict() o = dict()
predictions = self.env.predict() # predictions = self.env.predict()
if predictions != {}: # if predictions != {}:
# pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of # # pred_pos is a 3 dimensions array (N_Agents, T_pred, 2) containing x and y coordinates of
# agents at each time step # # agents at each time step
pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0) # pred_pos = np.concatenate([[x[:, 1:3]] for x in list(predictions.values())], axis=0)
pred_dir = [x[:, 2] for x in list(predictions.values())] # pred_dir = [x[:, 2] for x in list(predictions.values())]
for i_agent in range(len(self.env.agents)): for i_agent in range(len(self.env.agents)):
if i_agent not in self.agents_done: if i_agent not in self.agents_done:
data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
if predictions != {}: num_features_per_node=8, current_depth=0)
pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent)
agent_id_one_hot = np.zeros(len(self.env.agents)) # if predictions != {}:
agent_id_one_hot[i_agent] = 1 # pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent)
o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs] # agent_id_one_hot = np.zeros(len(self.env.agents))
else: # agent_id_one_hot[i_agent] = 1
o[i_agent] = obs[i_agent] # o[i_agent] = [obs[i_agent], agent_id_one_hot, pred_obs]
# else:
o[i_agent] = [data, distance, agent_data]
r[i_agent] = rewards[i_agent] r[i_agent] = rewards[i_agent]
d[i_agent] = dones[i_agent] d[i_agent] = dones[i_agent]
......
...@@ -23,7 +23,7 @@ def min_lt(seq, val): ...@@ -23,7 +23,7 @@ def min_lt(seq, val):
min = np.inf min = np.inf
idx = len(seq) - 1 idx = len(seq) - 1
while idx >= 0: while idx >= 0:
if seq[idx] > val and seq[idx] < min: if seq[idx] >= val and seq[idx] < min:
min = seq[idx] min = seq[idx]
idx -= 1 idx -= 1
return min return min
...@@ -38,7 +38,8 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): ...@@ -38,7 +38,8 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
:return: returnes normalized and clipped observatoin :return: returnes normalized and clipped observatoin
""" """
max_obs = max(1, max_lt(obs, 1000)) max_obs = max(1, max_lt(obs, 1000))
min_obs = max(0, min_lt(obs, 0)) min_obs = min(max_obs, min_lt(obs, 0))
if max_obs == min_obs: if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max) return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs) norm = np.abs(max_obs - min_obs)
...@@ -49,12 +50,22 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): ...@@ -49,12 +50,22 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1):
class CustomPreprocessor(Preprocessor): class CustomPreprocessor(Preprocessor):
def _init_shape(self, obs_space, options): def _init_shape(self, obs_space, options):
return sum([space.shape[0] for space in obs_space]),
# return (sum([space.shape[0] for space in obs_space]), ) # return (sum([space.shape[0] for space in obs_space]), )
return ((sum([space.shape[0] for space in obs_space[:2]]) + obs_space[2].shape[0] * obs_space[2].shape[1]),) # return ((sum([space.shape[0] for space in obs_space[:2]]) + obs_space[2].shape[0] * obs_space[2].shape[1]),)
def transform(self, observation): def transform(self, observation):
print('OBSSSSSSSSSSSSSSSSSs', observation, observation.shape)
data = norm_obs_clip(observation[0])
distance = norm_obs_clip(observation[1])
agent_data = np.clip(observation[2], -1, 1)
return np.concatenate((np.concatenate((data, distance)), agent_data))
return norm_obs_clip(observation)
return np.concatenate([norm_obs_clip(observation[0]), norm_obs_clip(observation[1])])
# if len(observation) == 111: # if len(observation) == 111:
# return np.concatenate([norm_obs_clip(obs) for obs in observation]) # return np.concatenate([norm_obs_clip(obs) for obs in observation])
# print('OBSERVATION:', observation, len(observation[0]))
return np.concatenate([norm_obs_clip(observation[0]), observation[1], observation[ return np.concatenate([norm_obs_clip(observation[0]), observation[1], observation[
2].flatten()]) #, norm_obs_clip(observation[1]), observation[2], observation[3].flatten()]) 2].flatten()]) #, norm_obs_clip(observation[1]), observation[2], observation[3].flatten()])
#one_hot = observation[-3:] #one_hot = observation[-3:]
......
run_experiment.name = "memory_experiment_results"
run_experiment.num_iterations = 2002
run_experiment.save_every = 50
run_experiment.hidden_sizes = [32, 32]
run_experiment.map_width = {"grid_search": [8, 10, 12, 14]}
run_experiment.map_height = 8
run_experiment.n_agents = 3
run_experiment.rail_generator = "complex_rail_generator"
run_experiment.nr_extra = 1
run_experiment.policy_folder_name = "ppo_policy_with_pred_map_size_{config[map_width]}"
run_experiment.horizon = 50
run_experiment.seed = 123
#run_experiment.conv_model = {"grid_search": [True, False]}
run_experiment.conv_model = False
run_experiment.obs_builder = @TreeObsForRailEnv()
TreeObsForRailEnv.max_depth = 2
TreeObsForRailEnv.predictor = @DummyPredictorForRailEnv()
LocalObsForRailEnv.view_radius = 5
run_experiment.entropy_coeff = 1e-3
run_experiment.kl_coeff = 0.2
run_experiment.lambda_gae = 0.9
#run_experiment.predictor = "dummy_predictor"
run_experiment.step_memory = 1
...@@ -50,11 +50,11 @@ ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs) ...@@ -50,11 +50,11 @@ ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000) ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000)
CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/env_complexity_benchmark/' \ CHECKPOINT_PATH = '/home/guillaume/Desktop/distMAgent/experiment_agent_memory/' \
'ppo_policy_nr_extra_10_0qxx0qy_/checkpoint_1001/checkpoint-1001' 'ppo_policy_hidden_size_32_entropy_coeff_0.0001_mu413rlu/checkpoint_201/checkpoint-201'
N_EPISODES = 10 N_EPISODES = 10
N_STEPS_PER_EPISODE = 80 N_STEPS_PER_EPISODE = 50
def render_training_result(config): def render_training_result(config):
...@@ -65,16 +65,24 @@ def render_training_result(config): ...@@ -65,16 +65,24 @@ def render_training_result(config):
# Example configuration to generate a random rail # Example configuration to generate a random rail
env_config = {"width": config['map_width'], env_config = {"width": config['map_width'],
"height": config['map_height'], "height": config['map_height'],
"rail_generator": config["rail_generator"], "rail_generator": "load_env",#config["rail_generator"],
"nr_extra": config["nr_extra"], "nr_extra": config["nr_extra"],
"number_of_agents": config['n_agents'], "number_of_agents": config['n_agents'],
"seed": config['seed'], "seed": config['seed'],
"obs_builder": config['obs_builder']} "obs_builder": config['obs_builder'],
"predictor": config["predictor"],
"step_memory": config["step_memory"]}
# Observation space and action space definitions # Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv): if isinstance(config["obs_builder"], TreeObsForRailEnv):
obs_space = gym.spaces.Box(low=-1, high=1, shape=(147,)) if config['predictor'] is None:
obs_space = gym.spaces.Tuple(
(gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory'])
else:
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),
gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
'step_memory'])
preprocessor = "tree_obs_prep" preprocessor = "tree_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnv): elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
...@@ -109,7 +117,7 @@ def render_training_result(config): ...@@ -109,7 +117,7 @@ def render_training_result(config):
else: else:
raise ValueError("Undefined observation space") raise ValueError("Undefined observation space")
act_space = gym.spaces.Discrete(4) act_space = gym.spaces.Discrete(5)
# Dict with the different policies to train # Dict with the different policies to train
policy_graphs = { policy_graphs = {
...@@ -131,10 +139,11 @@ def render_training_result(config): ...@@ -131,10 +139,11 @@ def render_training_result(config):
"policies_to_train": list(policy_graphs.keys())} "policies_to_train": list(policy_graphs.keys())}
trainer_config["horizon"] = config['horizon'] trainer_config["horizon"] = config['horizon']
trainer_config["num_workers"] = 0 trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 3 trainer_config["num_cpus_per_worker"] = 4
trainer_config["num_gpus"] = 0 trainer_config["num_gpus"] = 0.2
trainer_config["num_gpus_per_worker"] = 0 trainer_config["num_gpus_per_worker"] = 0.2
trainer_config["num_cpus_for_driver"] = 1 trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1 trainer_config["num_envs_per_worker"] = 1
trainer_config['entropy_coeff'] = config['entropy_coeff'] trainer_config['entropy_coeff'] = config['entropy_coeff']
...@@ -145,17 +154,20 @@ def render_training_result(config): ...@@ -145,17 +154,20 @@ def render_training_result(config):
trainer_config['log_level'] = 'WARN' trainer_config['log_level'] = 'WARN'
trainer_config['num_sgd_iter'] = 10 trainer_config['num_sgd_iter'] = 10
trainer_config['clip_param'] = 0.2 trainer_config['clip_param'] = 0.2
trainer_config['kl_coeff'] = config['kl_coeff']
trainer_config['lambda'] = config['lambda_gae']
env = RailEnvRLLibWrapper(env_config) env = RailEnvRLLibWrapper(env_config)
trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config) trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config)
print('hidden sizes:', config['hidden_sizes'])
trainer.restore(CHECKPOINT_PATH) trainer.restore(CHECKPOINT_PATH)
policy = trainer.get_policy(config['policy_folder_name'].format(**locals())) policy = trainer.get_policy(config['policy_folder_name'].format(**locals()))
preprocessor = CustomPreprocessor(gym.spaces.Box(low=-1, high=1, shape=(147,))) preprocessor = CustomPreprocessor(obs_space)
env_renderer = RenderTool(env, gl="PIL") env_renderer = RenderTool(env, gl="PIL")
for episode in range(N_EPISODES): for episode in range(N_EPISODES):
observation = env.reset() observation = env.reset()
...@@ -184,7 +196,8 @@ def render_training_result(config): ...@@ -184,7 +196,8 @@ def render_training_result(config):
@gin.configurable @gin.configurable
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed, conv_model, rail_generator, nr_extra): entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff,
lambda_gae, predictor, step_memory):
render_training_result( render_training_result(
config={"n_agents": n_agents, config={"n_agents": n_agents,
...@@ -200,13 +213,17 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, ...@@ -200,13 +213,17 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
"seed": seed, "seed": seed,
"conv_model": conv_model, "conv_model": conv_model,
"rail_generator": rail_generator, "rail_generator": rail_generator,
"nr_extra": 10# nr_extra "nr_extra": nr_extra,
"kl_coeff": kl_coeff,
"lambda_gae": lambda_gae,
"predictor": predictor,
"step_memory": step_memory
} }
) )
if __name__ == '__main__': if __name__ == '__main__':
gin.external_configurable(tune.grid_search) gin.external_configurable(tune.grid_search)
dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/env_complexity_benchmark' # To Modify dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/experiment_agent_memory' # To Modify
gin.parse_config_file(dir + '/config.gin') gin.parse_config_file(dir + '/config.gin')
run_experiment(local_dir=dir) run_experiment(local_dir=dir)
...@@ -34,6 +34,7 @@ gin.external_configurable(LocalObsForRailEnv) ...@@ -34,6 +34,7 @@ gin.external_configurable(LocalObsForRailEnv)
gin.external_configurable(GlobalObsForRailEnvDirectionDependent) gin.external_configurable(GlobalObsForRailEnvDirectionDependent)
from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor
import numpy as np
ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor) ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
...@@ -44,10 +45,32 @@ ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000) ...@@ -44,10 +45,32 @@ ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000)
__file_dirname__ = os.path.dirname(os.path.realpath(__file__)) __file_dirname__ = os.path.dirname(os.path.realpath(__file__))
def on_episode_start(info):
episode = info['episode']
map_width = info['env'].envs[0].width
map_height = info['env'].envs[0].height
episode.horizon = map_width + map_height
# def on_episode_step(info):
# episode = info['episode']
# print('#########################', episode._agent_reward_history)
# # print(ds)
def on_episode_end(info):
episode = info['episode']
score = 0
for k, v in episode._agent_reward_history.items():
score += np.sum(v)
score /= (len(episode._agent_reward_history) * 3 * episode.horizon)
episode.custom_metrics["score"] = score
def train(config, reporter): def train(config, reporter):
print('Init Env') print('Init Env')
set_seed(config['seed'], config['seed'], config['seed']) set_seed(config['seed'], config['seed'], config['seed'])
config['map_height'] = config['map_width']
# Example configuration to generate a random rail # Example configuration to generate a random rail
env_config = {"width": config['map_width'], env_config = {"width": config['map_width'],
...@@ -57,19 +80,24 @@ def train(config, reporter): ...@@ -57,19 +80,24 @@ def train(config, reporter):
"number_of_agents": config['n_agents'], "number_of_agents": config['n_agents'],
"seed": config['seed'], "seed": config['seed'],
"obs_builder": config['obs_builder'], "obs_builder": config['obs_builder'],
"predictor": config["predictor"], "min_dist": config['min_dist'],
# "predictor": config["predictor"],
"step_memory": config["step_memory"]} "step_memory": config["step_memory"]}
# Observation space and action space definitions # Observation space and action space definitions
if isinstance(config["obs_builder"], TreeObsForRailEnv): if isinstance(config["obs_builder"], TreeObsForRailEnv):
if config['predictor'] is None: obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)), ))
obs_space = gym.spaces.Tuple( # gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
(gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory']) # gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
else: # 'step_memory'])
obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)), # if config['predictor'] is None:
gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)), # obs_space = gym.spaces.Tuple(
gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[ # (gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory'])
'step_memory']) # else:
# obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),
# gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
# gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
# 'step_memory'])
preprocessor = "tree_obs_prep" preprocessor = "tree_obs_prep"
elif isinstance(config["obs_builder"], GlobalObsForRailEnv): elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
...@@ -124,12 +152,12 @@ def train(config, reporter): ...@@ -124,12 +152,12 @@ def train(config, reporter):
trainer_config['multiagent'] = {"policy_graphs": policy_graphs, trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
"policy_mapping_fn": policy_mapping_fn, "policy_mapping_fn": policy_mapping_fn,
"policies_to_train": list(policy_graphs.keys())} "policies_to_train": list(policy_graphs.keys())}
trainer_config["horizon"] = config['horizon'] trainer_config["horizon"] = 1.5 * (config['map_width'] + config['map_height'])#config['horizon']
trainer_config["num_workers"] = 0 trainer_config["num_workers"] = 0
trainer_config["num_cpus_per_worker"] = 4 trainer_config["num_cpus_per_worker"] = 7
trainer_config["num_gpus"] = 0.2 trainer_config["num_gpus"] = 0.0
trainer_config["num_gpus_per_worker"] = 0.2 trainer_config["num_gpus_per_worker"] = 0.0
trainer_config["num_cpus_for_driver"] = 1 trainer_config["num_cpus_for_driver"] = 1
trainer_config["num_envs_per_worker"] = 1 trainer_config["num_envs_per_worker"] = 1
trainer_config['entropy_coeff'] = config['entropy_coeff'] trainer_config['entropy_coeff'] = config['entropy_coeff']
...@@ -142,6 +170,10 @@ def train(config, reporter): ...@@ -142,6 +170,10 @@ def train(config, reporter):
trainer_config['clip_param'] = 0.2 trainer_config['clip_param'] = 0.2
trainer_config['kl_coeff'] = config['kl_coeff'] trainer_config['kl_coeff'] = config['kl_coeff']
trainer_config['lambda'] = config['lambda_gae'] trainer_config['lambda'] = config['lambda_gae']
trainer_config['callbacks'] = {
"on_episode_start": tune.function(on_episode_start),
"on_episode_end": tune.function(on_episode_end)
}
def logger_creator(conf): def logger_creator(conf):
"""Creates a Unified logger with a default logdir prefix """Creates a Unified logger with a default logdir prefix
...@@ -172,7 +204,7 @@ def train(config, reporter): ...@@ -172,7 +204,7 @@ def train(config, reporter):
def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae, entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
predictor, step_memory): step_memory, min_dist):
tune.run( tune.run(
train, train,
name=name, name=name,
...@@ -193,12 +225,13 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, ...@@ -193,12 +225,13 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
"nr_extra": nr_extra, "nr_extra": nr_extra,
"kl_coeff": kl_coeff, "kl_coeff": kl_coeff,
"lambda_gae": lambda_gae, "lambda_gae": lambda_gae,
"predictor": predictor, "min_dist": min_dist,
# "predictor": predictor,
"step_memory": step_memory "step_memory": step_memory
}, },
resources_per_trial={ resources_per_trial={
"cpu": 5, "cpu": 8,
"gpu": 0.2 "gpu": 0
}, },
verbose=2, verbose=2,
local_dir=local_dir local_dir=local_dir
...@@ -207,8 +240,9 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, ...@@ -207,8 +240,9 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
if __name__ == '__main__': if __name__ == '__main__':
gin.external_configurable(tune.grid_search) gin.external_configurable(tune.grid_search)
with path('RLLib_training.experiment_configs.experiment_agent_memory', 'config.gin') as f: # with path('RLLib_training.experiment_configs.n_agents_experiment', 'config.gin') as f:
gin.parse_config_file(f) # gin.parse_config_file(f)
gin.parse_config_file('/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test/config.gin')
dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory') dir = '/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test'
# dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory')
run_experiment(local_dir=dir) run_experiment(local_dir=dir)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment