diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index fbdee61432f0aa2f558f543c002e9846570dc140..ad504e271f8758f6f445917c1963b9da316b9ba3 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -15,19 +15,37 @@ class RailEnvRLLibWrapper(MultiAgentEnv): # number_of_agents=1, # obs_builder_object=TreeObsForRailEnv(max_depth=2)): super(MultiAgentEnv, self).__init__() + if hasattr(config, "vector_index"): + vector_index = config.vector_index + else: + vector_index = 1 self.rail_generator = config["rail_generator"](nr_start_goal=config['number_of_agents'], min_dist=5, - nr_extra=30, seed=config['seed'] * (1+config.vector_index)) - set_seed(config['seed'] * (1+config.vector_index)) + nr_extra=30, seed=config['seed'] * (1+vector_index)) + set_seed(config['seed'] * (1+vector_index)) self.env = RailEnv(width=config["width"], height=config["height"], rail_generator=self.rail_generator, number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder']) + + self.env.load('./baselines/torch_training/railway/complex_scene.pkl') + + self.width = self.env.width + self.height = self.env.height + + def reset(self): self.agents_done = [] - obs = self.env.reset() + obs = self.env.reset(False, False) o = dict() # o['agents'] = obs # obs[0] = [obs[0], np.ones((17, 17)) * 17] # obs['global_obs'] = np.ones((17, 17)) * 17 + + + self.rail = self.env.rail + self.agents = self.env.agents + self.agents_static = self.env.agents_static + self.dev_obs_dict = self.env.dev_obs_dict + return obs def step(self, action_dict): @@ -50,6 +68,11 @@ class RailEnvRLLibWrapper(MultiAgentEnv): for agent, done in dones.items(): if done and agent != '__all__': self.agents_done.append(agent) + + self.rail = self.env.rail + self.agents = self.env.agents + self.agents_static = self.env.agents_static + self.dev_obs_dict = self.env.dev_obs_dict #print(obs) #return obs, rewards, dones, infos @@ -65,3 +88,6 @@ class RailEnvRLLibWrapper(MultiAgentEnv): def get_agent_handles(self): return self.env.get_agent_handles() + + def get_num_agents(self): + return self.env.get_num_agents() diff --git a/RLLib_training/experiment_configs/conv_model_test/config.gin b/RLLib_training/experiment_configs/conv_model_test/config.gin index a55633eac5159dd2cc7885f2b5e302a9fea29e9f..3c923ca647938ca28ef58e5a52d4717ed2b00dca 100644 --- a/RLLib_training/experiment_configs/conv_model_test/config.gin +++ b/RLLib_training/experiment_configs/conv_model_test/config.gin @@ -1,6 +1,6 @@ run_experiment.name = "observation_benchmark_results" run_experiment.num_iterations = 1002 -run_experiment.save_every = 100 +run_experiment.save_every = 50 run_experiment.hidden_sizes = [32, 32] run_experiment.map_width = 20 @@ -11,9 +11,11 @@ run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__._ run_experiment.horizon = 50 run_experiment.seed = 123 -run_experiment.conv_model = {"grid_search": [True, False]} +#run_experiment.conv_model = {"grid_search": [True, False]} +run_experiment.conv_model = False -run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +#run_experiment.obs_builder = {"grid_search": [@GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]}# [@TreeObsForRailEnv(), @GlobalObsForRailEnv() ]} +run_experiment.obs_builder = @TreeObsForRailEnv() TreeObsForRailEnv.max_depth = 2 LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin new file mode 100644 index 0000000000000000000000000000000000000000..03aae996a12bf69717599cf734a39b6780bbfc72 --- /dev/null +++ b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/config.gin @@ -0,0 +1,18 @@ +run_experiment.name = "observation_benchmark_loaded_env_results" +run_experiment.num_iterations = 1002 +run_experiment.save_every = 50 +run_experiment.hidden_sizes = 32 + +run_experiment.map_width = 20 +run_experiment.map_height = 20 +run_experiment.n_agents = 5 +run_experiment.policy_folder_name = "ppo_policy_{config[obs_builder].__class__.__name__}"#_entropy_coeff_{config[entropy_coeff]}_{config[hidden_sizes][0]}_hidden_sizes_" + +run_experiment.horizon = 50 +run_experiment.seed = 123 + +run_experiment.entropy_coeff = 1e-2 + +run_experiment.obs_builder = {"grid_search": [@LocalObsForRailEnv(), @TreeObsForRailEnv(), @GlobalObsForRailEnv(), @GlobalObsForRailEnvDirectionDependent]} +TreeObsForRailEnv.max_depth = 2 +LocalObsForRailEnv.view_radius = 5 diff --git a/RLLib_training/render_training_result.py b/RLLib_training/render_training_result.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9a08cfbfb57cf9b8b1486a3bf91ef575c8ce64 --- /dev/null +++ b/RLLib_training/render_training_result.py @@ -0,0 +1,210 @@ +from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper +import gym + + +from flatland.envs.generators import complex_rail_generator + + +# Import PPO trainer: we can replace these imports by any other trainer from RLLib. +from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG +from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer +# from baselines.CustomPPOTrainer import PPOTrainer as Trainer +from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph +# from baselines.CustomPPOPolicyGraph import CustomPPOPolicyGraph as PolicyGraph + +from ray.rllib.models import ModelCatalog +from ray.tune.logger import pretty_print +from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor + +from baselines.RLLib_training.custom_models import ConvModelGlobalObs + + +import ray +import numpy as np + +from ray.tune.logger import UnifiedLogger +import tempfile + +import gin + +from ray import tune + +from ray.rllib.utils.seed import seed as set_seed +from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\ + LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent + +from flatland.utils.rendertools import RenderTool +import time + +gin.external_configurable(TreeObsForRailEnv) +gin.external_configurable(GlobalObsForRailEnv) +gin.external_configurable(LocalObsForRailEnv) +gin.external_configurable(GlobalObsForRailEnvDirectionDependent) + +from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor + +ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor) +ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor) +ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor) +ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs) +ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000) + + +CHECKPOINT_PATH = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/' \ + 'conv_model_test/ppo_policy_TreeObsForRailEnv_5_agents_conv_model_False_ial1g3w9/checkpoint_51/checkpoint-51' + +N_EPISODES = 3 +N_STEPS_PER_EPISODE = 50 + + +def render_training_result(config): + print('Init Env') + + set_seed(config['seed'], config['seed'], config['seed']) + + transition_probability = [15, # empty cell - Case 0 + 5, # Case 1 - straight + 5, # Case 2 - simple switch + 1, # Case 3 - diamond crossing + 1, # Case 4 - single slip + 1, # Case 5 - double slip + 1, # Case 6 - symmetrical + 0, # Case 7 - dead end + 1, # Case 1b (8) - simple turn right + 1, # Case 1c (9) - simple turn left + 1] # Case 2b (10) - simple switch mirrored + + # Example configuration to generate a random rail + env_config = {"width": config['map_width'], + "height": config['map_height'], + "rail_generator": complex_rail_generator, + "number_of_agents": config['n_agents'], + "seed": config['seed'], + "obs_builder": config['obs_builder']} + + + # Observation space and action space definitions + if isinstance(config["obs_builder"], TreeObsForRailEnv): + obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) + preprocessor = "tree_obs_prep" + + elif isinstance(config["obs_builder"], GlobalObsForRailEnv): + obs_space = gym.spaces.Tuple(( + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)), + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)), + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2)))) + if config['conv_model']: + preprocessor = "conv_obs_prep" + else: + preprocessor = "global_obs_prep" + + elif isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent): + obs_space = gym.spaces.Tuple(( + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)), + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 5)), + gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2)))) + if config['conv_model']: + preprocessor = "conv_obs_prep" + else: + preprocessor = "global_obs_prep" + + elif isinstance(config["obs_builder"], LocalObsForRailEnv): + view_radius = config["obs_builder"].view_radius + obs_space = gym.spaces.Tuple(( + gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 16)), + gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 2)), + gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 4)), + gym.spaces.Box(low=0, high=1, shape=(4,)))) + preprocessor = "global_obs_prep" + + else: + raise ValueError("Undefined observation space") + + act_space = gym.spaces.Discrete(4) + + # Dict with the different policies to train + policy_graphs = { + config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {}) + } + + def policy_mapping_fn(agent_id): + return config['policy_folder_name'].format(**locals()) + + # Trainer configuration + trainer_config = DEFAULT_CONFIG.copy() + if config['conv_model']: + trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor} + else: + trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor} + + trainer_config['multiagent'] = {"policy_graphs": policy_graphs, + "policy_mapping_fn": policy_mapping_fn, + "policies_to_train": list(policy_graphs.keys())} + trainer_config["horizon"] = config['horizon'] + + trainer_config["num_workers"] = 0 + trainer_config["num_cpus_per_worker"] = 3 + trainer_config["num_gpus"] = 0 + trainer_config["num_gpus_per_worker"] = 0 + trainer_config["num_cpus_for_driver"] = 1 + trainer_config["num_envs_per_worker"] = 1 + trainer_config['entropy_coeff'] = config['entropy_coeff'] + trainer_config["env_config"] = env_config + trainer_config["batch_mode"] = "complete_episodes" + trainer_config['simple_optimizer'] = False + trainer_config['postprocess_inputs'] = True + trainer_config['log_level'] = 'WARN' + + env = RailEnvRLLibWrapper(env_config) + + trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config) + + trainer.restore(CHECKPOINT_PATH) + + policy = trainer.get_policy(config['policy_folder_name'].format(**locals())) + + env_renderer = RenderTool(env, gl="PIL", show=True) + for episode in range(N_EPISODES): + observation = env.reset() + for i in range(N_STEPS_PER_EPISODE): + + action, _, infos = policy.compute_actions(list(observation.values()), []) + env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i, + action_dict=action) + logits = infos['behaviour_logits'] + actions = dict() + for j, logit in enumerate(logits): + actions[j] = np.argmax(logit) + + time.sleep(1) + observation, _, _, _ = env.step(action) + + env_renderer.close_window() + + +@gin.configurable +def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, + map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, + entropy_coeff, seed, conv_model): + + render_training_result( + config={"n_agents": n_agents, + "hidden_sizes": hidden_sizes, # Array containing the sizes of the network layers + "save_every": save_every, + "map_width": map_width, + "map_height": map_height, + "local_dir": local_dir, + "horizon": horizon, # Max number of time steps + 'policy_folder_name': policy_folder_name, + "obs_builder": obs_builder, + "entropy_coeff": entropy_coeff, + "seed": seed, + "conv_model": conv_model + }) + + +if __name__ == '__main__': + gin.external_configurable(tune.grid_search) + dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/conv_model_test' # To Modify + gin.parse_config_file(dir + '/config.gin') + run_experiment(local_dir=dir) diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 48407f645a07d3947053365a86398a90aa4e41d7..e7085b48819028f1ee3e9eee2f9cf8945ff0c870 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -52,18 +52,6 @@ def train(config, reporter): set_seed(config['seed'], config['seed'], config['seed']) - transition_probability = [15, # empty cell - Case 0 - 5, # Case 1 - straight - 5, # Case 2 - simple switch - 1, # Case 3 - diamond crossing - 1, # Case 4 - single slip - 1, # Case 5 - double slip - 1, # Case 6 - symmetrical - 0, # Case 7 - dead end - 1, # Case 1b (8) - simple turn right - 1, # Case 1c (9) - simple turn left - 1] # Case 2b (10) - simple switch mirrored - # Example configuration to generate a random rail env_config = {"width": config['map_width'], "height": config['map_height'],