diff --git a/MANIFEST.in b/MANIFEST.in index c3286295576ccdb7033587afbc434f097390a42a..13b466c2a497ec403b8c09340191ff43c6aa4b56 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,6 @@ include HISTORY.rst include LICENSE include README.rst include requirements_torch_training.txt -include requirements_RLLib_training.txt diff --git a/README.md b/README.md index cdd54b5229cd88c94618782bf242595b453413f8..d8dc09bde2ace60244a901506eb25d1790acb74a 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,6 @@ With the above introductions you will solve tasks like these and even more...  -# RLLib Training -The `RLLib_training` folder shows an example of how to train agents with algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib> - # Sequential Agent This is a very simple baseline to show you have the `complex_level_generator` generates feasible network configurations. If you run the `run_test.py` file you will see a simple agent that solves the level by sequentially running each agent along its shortest path. diff --git a/RLLib_training/README.md b/RLLib_training/README.md deleted file mode 100644 index 8bda956f226af1c7ef4c7e1237b447cf7af4327a..0000000000000000000000000000000000000000 --- a/RLLib_training/README.md +++ /dev/null @@ -1,78 +0,0 @@ -This repository allows to run Rail Environment multi agent training with the RLLib Library. - -## Installation: - -To run scripts of this repository, the deep learning library tensorflow should be installed, along with the following packages: -```sh -pip install gym ray==0.7.0 gin-config opencv-python lz4 psutil -``` - -To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`. - -Then, you can modify the config.gin file path at the end of the `train_experiment.py` file. - -The results will be stored inside the folder, and the learning curves can be visualized in -tensorboard: - -``` -tensorboard --logdir=/path/to/folder_containing_config_gin_file -``` - -## Gin config files - -In each config.gin files, all the parameters of the `run_experiment` functions have to be specified. -For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added: - -``` -run_experiment.n_agents = 2 -``` - -If several number of agents have to be explored during the experiment, one can pass the following value to the `n_agents` parameter: - -``` -run_experiment.n_agents = {"grid_search": [2,5]} -``` - -which is the way to indicate to the tune library to experiment several values for a parameter. - -To reference a class or an object within gin, you should first register it from the `train_experiment.py` script adding the following line: - -``` -gin.external_configurable(TreeObsForRailEnv) -``` - -and then a `TreeObsForRailEnv` object can be referenced in the `config.gin` file: - -``` -run_experiment.obs_builder = {"grid_search": [@TreeObsForRailEnv(), @GlobalObsForRailEnv()]} -TreeObsForRailEnv.max_depth = 2 -``` - -Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv()` references instantiates an object of this class. - - - - -More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config - -## Run an example: -To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script: -``` -python RLLib_training/train_experiment.py -``` -This will load the gin config file in the folder `experiment_configs/config_examples`. - -To visualize the result of a training, one can load a training checkpoint and use the policy learned. -This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script: - -``` -CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7', - 'checkpoint_101', 'checkpoint-101') -``` -and load the corresponding gin config file: - -``` -gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin')) -``` - - diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py deleted file mode 100644 index f82cd42d9bbd836b681ff284a82f357b2760bb0c..0000000000000000000000000000000000000000 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ /dev/null @@ -1,135 +0,0 @@ -import numpy as np -from ray.rllib.env.multi_agent_env import MultiAgentEnv -from ray.rllib.utils.seed import seed as set_seed - -from flatland.envs.rail_env import RailEnv -from flatland.envs.rail_generators import complex_rail_generator, random_rail_generator -from flatland.envs.schedule_generators import complex_schedule_generator, random_schedule_generator - - -class RailEnvRLLibWrapper(MultiAgentEnv): - - def __init__(self, config): - - super(MultiAgentEnv, self).__init__() - - # Environment ID if num_envs_per_worker > 1 - if hasattr(config, "vector_index"): - vector_index = config.vector_index - else: - vector_index = 1 - - self.predefined_env = False - - if config['rail_generator'] == "complex_rail_generator": - self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], - min_dist=config['min_dist'], - nr_extra=config['nr_extra'], - seed=config['seed'] * (1 + vector_index)) - self.schedule_generator = complex_schedule_generator() - - elif config['rail_generator'] == "random_rail_generator": - self.rail_generator = random_rail_generator() - self.schedule_generator = random_schedule_generator() - elif config['rail_generator'] == "load_env": - self.predefined_env = True - self.rail_generator = random_rail_generator() - self.schedule_generator = random_schedule_generator() - else: - raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}') - - set_seed(config['seed'] * (1 + vector_index)) - self.env = RailEnv(width=config["width"], height=config["height"], - number_of_agents=config["number_of_agents"], - obs_builder_object=config['obs_builder'], - rail_generator=self.rail_generator, - schedule_generator=self.schedule_generator - ) - - if self.predefined_env: - self.env.load_resource('torch_training.railway', 'complex_scene.pkl') - - self.width = self.env.width - self.height = self.env.height - self.step_memory = config["step_memory"] - - # needed for the renderer - self.rail = self.env.rail - self.agents = self.env.agents - self.agents_static = self.env.agents_static - self.dev_obs_dict = self.env.dev_obs_dict - - def reset(self): - self.agents_done = [] - if self.predefined_env: - obs = self.env.reset(False, False) - else: - obs = self.env.reset() - - # RLLib only receives observation of agents that are not done. - o = dict() - - for i_agent in range(len(self.env.agents)): - data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]), - current_depth=0) - o[i_agent] = [data, distance, agent_data] - - # needed for the renderer - self.rail = self.env.rail - self.agents = self.env.agents - self.agents_static = self.env.agents_static - self.dev_obs_dict = self.env.dev_obs_dict - - # If step_memory > 1, we need to concatenate it the observations in memory, only works for - # step_memory = 1 or 2 for the moment - if self.step_memory < 2: - return o - else: - self.old_obs = o - oo = dict() - - for i_agent in range(len(self.env.agents)): - oo[i_agent] = [o[i_agent], o[i_agent]] - return oo - - def step(self, action_dict): - obs, rewards, dones, infos = self.env.step(action_dict) - - d = dict() - r = dict() - o = dict() - - for i_agent in range(len(self.env.agents)): - if i_agent not in self.agents_done: - data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]), - current_depth=0) - - o[i_agent] = [data, distance, agent_data] - r[i_agent] = rewards[i_agent] - d[i_agent] = dones[i_agent] - - d['__all__'] = dones['__all__'] - - if self.step_memory >= 2: - oo = dict() - - for i_agent in range(len(self.env.agents)): - if i_agent not in self.agents_done: - oo[i_agent] = [o[i_agent], self.old_obs[i_agent]] - - self.old_obs = o - - for agent, done in dones.items(): - if done and agent != '__all__': - self.agents_done.append(agent) - - if self.step_memory < 2: - return o, r, d, infos - else: - return oo, r, d, infos - - def get_agent_handles(self): - return self.env.get_agent_handles() - - def get_num_agents(self): - return self.env.get_num_agents() diff --git a/RLLib_training/__init__.py b/RLLib_training/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py deleted file mode 100644 index d4c81a83f1c05317315a3f71f99565006e9311e1..0000000000000000000000000000000000000000 --- a/RLLib_training/custom_preprocessors.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np -from ray.rllib.models.preprocessors import Preprocessor -from utils.observation_utils import norm_obs_clip - -class TreeObsPreprocessor(Preprocessor): - def _init_shape(self, obs_space, options): - print(options) - self.step_memory = options["custom_options"]["step_memory"] - return sum([space.shape[0] for space in obs_space]), - - def transform(self, observation): - - if self.step_memory == 2: - data = norm_obs_clip(observation[0][0]) - distance = norm_obs_clip(observation[0][1]) - agent_data = np.clip(observation[0][2], -1, 1) - data2 = norm_obs_clip(observation[1][0]) - distance2 = norm_obs_clip(observation[1][1]) - agent_data2 = np.clip(observation[1][2], -1, 1) - else: - data = norm_obs_clip(observation[0]) - distance = norm_obs_clip(observation[1]) - agent_data = np.clip(observation[2], -1, 1) - - return np.concatenate((np.concatenate((np.concatenate((data, distance)), agent_data)), np.concatenate((np.concatenate((data2, distance2)), agent_data2)))) - diff --git a/RLLib_training/experiment_configs/__init__.py b/RLLib_training/experiment_configs/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/RLLib_training/experiment_configs/config_example/config.gin b/RLLib_training/experiment_configs/config_example/config.gin deleted file mode 100644 index 59d2dfb508f13cccf4b9152f24ab06d44c290450..0000000000000000000000000000000000000000 --- a/RLLib_training/experiment_configs/config_example/config.gin +++ /dev/null @@ -1,25 +0,0 @@ -run_experiment.name = "experiment_example" -run_experiment.num_iterations = 1002 -run_experiment.save_every = 100 -run_experiment.hidden_sizes = [32, 32] - -run_experiment.map_width = 20 -run_experiment.map_height = 20 -run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]} -run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene -run_experiment.nr_extra = 5 -run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_" - -run_experiment.seed = 123 - -run_experiment.conv_model = False - -run_experiment.obs_builder = @TreeObsForRailEnv() -TreeObsForRailEnv.predictor = @ShortestPathPredictorForRailEnv() -TreeObsForRailEnv.max_depth = 2 - -run_experiment.entropy_coeff = 0.001 -run_experiment.kl_coeff = 0.2 -run_experiment.lambda_gae = 0.9 -run_experiment.step_memory = 2 -run_experiment.min_dist = 10 diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py deleted file mode 100644 index 1ee7cc1ce394f3b40791706871aa180ec0510b52..0000000000000000000000000000000000000000 --- a/RLLib_training/render_training_result.py +++ /dev/null @@ -1,169 +0,0 @@ -from RailEnvRLLibWrapper import RailEnvRLLibWrapper -from custom_preprocessors import TreeObsPreprocessor -import gym -import os - -from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG -from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer -from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph - -from ray.rllib.models import ModelCatalog - -import ray -import numpy as np - -import gin - -from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv -gin.external_configurable(DummyPredictorForRailEnv) -gin.external_configurable(ShortestPathPredictorForRailEnv) - -from ray.rllib.utils.seed import seed as set_seed -from flatland.envs.observations import TreeObsForRailEnv - -from flatland.utils.rendertools import RenderTool -import time - -gin.external_configurable(TreeObsForRailEnv) - -ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor) -ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000) - -__file_dirname__ = os.path.dirname(os.path.realpath(__file__)) - -CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7', - 'checkpoint_101', 'checkpoint-101') # To Modify -N_EPISODES = 10 -N_STEPS_PER_EPISODE = 50 - - -def render_training_result(config): - print('Init Env') - - set_seed(config['seed'], config['seed'], config['seed']) - - # Example configuration to generate a random rail - env_config = {"width": config['map_width'], - "height": config['map_height'], - "rail_generator": config["rail_generator"], - "nr_extra": config["nr_extra"], - "number_of_agents": config['n_agents'], - "seed": config['seed'], - "obs_builder": config['obs_builder'], - "min_dist": config['min_dist'], - "step_memory": config["step_memory"]} - - # Observation space and action space definitions - if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2) - preprocessor = TreeObsPreprocessor - - else: - raise ValueError("Undefined observation space") - - act_space = gym.spaces.Discrete(5) - - # Dict with the different policies to train - policy_graphs = { - "ppo_policy": (PolicyGraph, obs_space, act_space, {}) - } - - def policy_mapping_fn(agent_id): - return "ppo_policy" - - # Trainer configuration - trainer_config = DEFAULT_CONFIG.copy() - - trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes']} - - trainer_config['multiagent'] = {"policy_graphs": policy_graphs, - "policy_mapping_fn": policy_mapping_fn, - "policies_to_train": list(policy_graphs.keys())} - - trainer_config["num_workers"] = 0 - trainer_config["num_cpus_per_worker"] = 4 - trainer_config["num_gpus"] = 0.2 - trainer_config["num_gpus_per_worker"] = 0.2 - trainer_config["num_cpus_for_driver"] = 1 - trainer_config["num_envs_per_worker"] = 1 - trainer_config['entropy_coeff'] = config['entropy_coeff'] - trainer_config["env_config"] = env_config - trainer_config["batch_mode"] = "complete_episodes" - trainer_config['simple_optimizer'] = False - trainer_config['postprocess_inputs'] = True - trainer_config['log_level'] = 'WARN' - trainer_config['num_sgd_iter'] = 10 - trainer_config['clip_param'] = 0.2 - trainer_config['kl_coeff'] = config['kl_coeff'] - trainer_config['lambda'] = config['lambda_gae'] - - env = RailEnvRLLibWrapper(env_config) - - trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config) - - trainer.restore(CHECKPOINT_PATH) - - policy = trainer.get_policy("ppo_policy") - - preprocessor = preprocessor(obs_space, {"step_memory": config["step_memory"]}) - env_renderer = RenderTool(env, gl="PILSVG") - for episode in range(N_EPISODES): - - observation = env.reset() - for i in range(N_STEPS_PER_EPISODE): - preprocessed_obs = [] - for obs in observation.values(): - preprocessed_obs.append(preprocessor.transform(obs)) - action, _, infos = policy.compute_actions(preprocessed_obs, []) - logits = infos['behaviour_logits'] - actions = dict() - - # We select the greedy action. - for j, logit in enumerate(logits): - actions[j] = np.argmax(logit) - - # In case we prefer to sample an action stochastically according to the policy graph. - # for j, act in enumerate(action): - # actions[j] = act - - # Time to see the rendering at one step - time.sleep(1) - - env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i, - action_dict=list(actions.values())) - - observation, _, _, _ = env.step(actions) - - env_renderer.close_window() - - -@gin.configurable -def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, - map_width, map_height, policy_folder_name, obs_builder, - entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae, - step_memory, min_dist): - - render_training_result( - config={"n_agents": n_agents, - "hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers - "save_every": save_every, - "map_width": map_width, - "map_height": map_height, - 'policy_folder_name': policy_folder_name, - "obs_builder": obs_builder, - "entropy_coeff": entropy_coeff, - "seed": seed, - "conv_model": conv_model, - "rail_generator": rail_generator, - "nr_extra": nr_extra, - "kl_coeff": kl_coeff, - "lambda_gae": lambda_gae, - "min_dist": min_dist, - "step_memory": step_memory - } - ) - - -if __name__ == '__main__': - gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin')) # To Modify - run_experiment() diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py deleted file mode 100644 index 7435a8fed728ec363321ba7a2bcf04b186513559..0000000000000000000000000000000000000000 --- a/RLLib_training/train_experiment.py +++ /dev/null @@ -1,210 +0,0 @@ -import os - -import gin -import gym -from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv - -# Import PPO trainer: we can replace these imports by any other trainer from RLLib. -from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG -from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer -from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph -from ray.rllib.models import ModelCatalog - -gin.external_configurable(DummyPredictorForRailEnv) -gin.external_configurable(ShortestPathPredictorForRailEnv) - -import ray - -from ray.tune.logger import UnifiedLogger -from ray.tune.logger import pretty_print -import os - -from RailEnvRLLibWrapper import RailEnvRLLibWrapper -import tempfile - -from ray import tune - -from ray.rllib.utils.seed import seed as set_seed -from flatland.envs.observations import TreeObsForRailEnv - -gin.external_configurable(TreeObsForRailEnv) - -import numpy as np -from custom_preprocessors import TreeObsPreprocessor - -ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor) -ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000) - -__file_dirname__ = os.path.dirname(os.path.realpath(__file__)) - - -def on_episode_start(info): - episode = info['episode'] - map_width = info['env'].envs[0].width - map_height = info['env'].envs[0].height - episode.horizon = 3*(map_width + map_height) - - -def on_episode_end(info): - episode = info['episode'] - - # Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents - # and the number of the maximum time steps of the episode. - score = 0 - for k, v in episode._agent_reward_history.items(): - score += np.sum(v) - score /= (len(episode._agent_reward_history) * episode.horizon) - - # Calculation of the proportion of solved episodes before the maximum time step - done = 0 - if len(episode._agent_reward_history[0]) <= episode.horizon-5: - done = 1 - - episode.custom_metrics["score"] = score - episode.custom_metrics["proportion_episode_solved"] = done - - -def train(config, reporter): - print('Init Env') - - set_seed(config['seed'], config['seed'], config['seed']) - - # Given the depth of the tree observation and the number of features per node we get the following state_size - num_features_per_node = config['obs_builder'].observation_dim - tree_depth = 2 - nr_nodes = 0 - for i in range(tree_depth + 1): - nr_nodes += np.power(4, i) - obs_size = num_features_per_node * nr_nodes - - - # Environment parameters - env_config = {"width": config['map_width'], - "height": config['map_height'], - "rail_generator": config["rail_generator"], - "nr_extra": config["nr_extra"], - "number_of_agents": config['n_agents'], - "seed": config['seed'], - "obs_builder": config['obs_builder'], - "min_dist": config['min_dist'], - "step_memory": config["step_memory"]} - - # Observation space and action space definitions - if isinstance(config["obs_builder"], TreeObsForRailEnv): - obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2) - preprocessor = "tree_obs_prep" - else: - raise ValueError("Undefined observation space") # Only TreeObservation implemented for now. - - act_space = gym.spaces.Discrete(5) - - # Dict with the different policies to train. In this case, all trains follow the same policy - policy_graphs = { - "ppo_policy": (PolicyGraph, obs_space, act_space, {}) - } - - # Function that maps an agent id to the name of its respective policy. - def policy_mapping_fn(agent_id): - return "ppo_policy" - - # Trainer configuration - trainer_config = DEFAULT_CONFIG.copy() - trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor, - "custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}} - - trainer_config['multiagent'] = {"policy_graphs": policy_graphs, - "policy_mapping_fn": policy_mapping_fn, - "policies_to_train": list(policy_graphs.keys())} - - # Maximum time steps for an episode is set to 3*map_width*map_height - trainer_config["horizon"] = 3 * (config['map_width'] + config['map_height']) - - # Parameters for calculation parallelization - trainer_config["num_workers"] = 0 - trainer_config["num_cpus_per_worker"] = 8 - trainer_config["num_gpus"] = 0.2 - trainer_config["num_gpus_per_worker"] = 0.2 - trainer_config["num_cpus_for_driver"] = 1 - trainer_config["num_envs_per_worker"] = 1 - - # Parameters for PPO training - trainer_config['entropy_coeff'] = config['entropy_coeff'] - trainer_config["env_config"] = env_config - trainer_config["batch_mode"] = "complete_episodes" - trainer_config['simple_optimizer'] = False - trainer_config['log_level'] = 'WARN' - trainer_config['num_sgd_iter'] = 10 - trainer_config['clip_param'] = 0.2 - trainer_config['kl_coeff'] = config['kl_coeff'] - trainer_config['lambda'] = config['lambda_gae'] - trainer_config['callbacks'] = { - "on_episode_start": tune.function(on_episode_start), - "on_episode_end": tune.function(on_episode_end) - } - - - def logger_creator(conf): - """Creates a Unified logger with a default logdir prefix.""" - logdir = config['policy_folder_name'].format(**locals()) - logdir = tempfile.mkdtemp( - prefix=logdir, dir=config['local_dir']) - return UnifiedLogger(conf, logdir, None) - - logger = logger_creator - - trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config, logger_creator=logger) - - for i in range(100000 + 2): - print("== Iteration", i, "==") - - print(pretty_print(trainer.train())) - - if i % config['save_every'] == 0: - checkpoint = trainer.save() - print("checkpoint saved at", checkpoint) - - reporter(num_iterations_trained=trainer._iteration) - - -@gin.configurable -def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, - map_width, map_height, policy_folder_name, local_dir, obs_builder, - entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae, - step_memory, min_dist): - tune.run( - train, - name=name, - stop={"num_iterations_trained": num_iterations}, - config={"n_agents": n_agents, - "hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers - "save_every": save_every, - "map_width": map_width, - "map_height": map_height, - "local_dir": local_dir, - 'policy_folder_name': policy_folder_name, - "obs_builder": obs_builder, - "entropy_coeff": entropy_coeff, - "seed": seed, - "conv_model": conv_model, - "rail_generator": rail_generator, - "nr_extra": nr_extra, - "kl_coeff": kl_coeff, - "lambda_gae": lambda_gae, - "min_dist": min_dist, - "step_memory": step_memory # If equal to two, the current observation plus - # the observation of last time step will be given as input the the model. - }, - resources_per_trial={ - "cpu": 8, - "gpu": 0.2 - }, - verbose=2, - local_dir=local_dir - ) - - -if __name__ == '__main__': - folder_name = 'config_example' # To Modify - gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', folder_name, 'config.gin')) - dir = os.path.join(__file_dirname__, 'experiment_configs', folder_name) - run_experiment(local_dir=dir) diff --git a/setup.py b/setup.py index 2b9b731ea02a0c9bdbea7602ea1dfa2ad6e194e2..5bc77c5188d799da8898f959c180c78f9c1496f6 100644 --- a/setup.py +++ b/setup.py @@ -2,8 +2,7 @@ from setuptools import setup, find_packages install_reqs = [] dependency_links = [] -# TODO: include requirements_RLLib_training.txt -requirements_paths = ['requirements_torch_training.txt'] # , 'requirements_RLLib_training.txt'] +requirements_paths = ['requirements_torch_training.txt'] for requirements_path in requirements_paths: with open(requirements_path, 'r') as f: install_reqs += [