diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..85154f6969c973b61ab62b43ab8e068d75d5ca24 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,15 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst +include requirements_torch_training.txt +include requirements_RLLib_training.txt + + + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py index 1dbdc28727c744b65c9e8591e34cb14ef7e42199..3ae98bf99a063f22eb13fbca68217fbe27b1991e 100644 --- a/RLLib_training/RailEnvRLLibWrapper.py +++ b/RLLib_training/RailEnvRLLibWrapper.py @@ -1,9 +1,8 @@ +import numpy as np +from flatland.envs.generators import complex_rail_generator, random_rail_generator from flatland.envs.rail_env import RailEnv from ray.rllib.env.multi_agent_env import MultiAgentEnv -from flatland.envs.observations import TreeObsForRailEnv from ray.rllib.utils.seed import seed as set_seed -from flatland.envs.generators import complex_rail_generator, random_rail_generator -import numpy as np @@ -22,7 +21,8 @@ class RailEnvRLLibWrapper(MultiAgentEnv): if config['rail_generator'] == "complex_rail_generator": self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5, - nr_extra=config['nr_extra'], seed=config['seed'] * (1+vector_index)) + nr_extra=config['nr_extra'], + seed=config['seed'] * (1 + vector_index)) elif config['rail_generator'] == "random_rail_generator": self.rail_generator = random_rail_generator() elif config['rail_generator'] == "load_env": @@ -30,17 +30,16 @@ class RailEnvRLLibWrapper(MultiAgentEnv): self.rail_generator = random_rail_generator() else: - raise(ValueError, f'Unknown rail generator: {config["rail_generator"]}') + raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}') - set_seed(config['seed'] * (1+vector_index)) + set_seed(config['seed'] * (1 + vector_index)) self.env = RailEnv(width=config["width"], height=config["height"], - number_of_agents=config["number_of_agents"], - obs_builder_object=config['obs_builder'], rail_generator=self.rail_generator, - prediction_builder_object=config['predictor']) + number_of_agents=config["number_of_agents"], + obs_builder_object=config['obs_builder'], rail_generator=self.rail_generator) if self.predefined_env: - #self.env.load(config['load_env_path']) - self.env.load('/home/guillaume/EPFL/Master_Thesis/flatland/baselines/torch_training/railway/complex_scene.pkl') + # self.env.load(config['load_env_path']) + self.env.load_resource('torch_training.railway', 'complex_scene.pkl') self.width = self.env.width self.height = self.env.height @@ -63,7 +62,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv): o = dict() for i_agent in range(len(self.env.agents)): - + if predictions != {}: pred_obs = self.get_prediction_as_observation(pred_pos, pred_dir, i_agent) @@ -74,13 +73,13 @@ class RailEnvRLLibWrapper(MultiAgentEnv): o[i_agent] = obs[i_agent] # needed for the renderer - self.rail = self.env.rail - self.agents = self.env.agents - self.agents_static = self.env.agents_static - self.dev_obs_dict = self.env.dev_obs_dict + self.rail = self.env.rail + self.agents = self.env.agents + self.agents_static = self.env.agents_static + self.dev_obs_dict = self.env.dev_obs_dict if self.step_memory < 2: - return o + return o else: self.old_obs = o oo = dict() @@ -124,9 +123,9 @@ class RailEnvRLLibWrapper(MultiAgentEnv): for i_agent in range(len(self.env.agents)): if i_agent not in self.agents_done: oo[i_agent] = [o[i_agent], self.old_obs[i_agent]] - + self.old_obs = o - + for agent, done in dones.items(): if done and agent != '__all__': self.agents_done.append(agent) @@ -193,8 +192,8 @@ class RailEnvRLLibWrapper(MultiAgentEnv): elif collision_info[1] == 0: # In this case, the other agent (agent 2) was on the same cell at t-1 # There is a collision if agent 2 is at t, on the cell where was agent 1 at t-1 - coord_agent_1_t_minus_1 = pred_pos[agent_handle, time_offset-1, 0] + \ - 1000 * pred_pos[agent_handle, time_offset, 1] + coord_agent_1_t_minus_1 = pred_pos[agent_handle, time_offset - 1, 0] + \ + 1000 * pred_pos[agent_handle, time_offset, 1] coord_agent_2_t = coord_other_agents[collision_info[0], 1] if coord_agent_1_t_minus_1 == coord_agent_2_t: pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1 @@ -203,7 +202,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv): # In this case, the other agent (agent 2) will be on the same cell at t+1 # There is a collision if agent 2 is at t, on the cell where will be agent 1 at t+1 coord_agent_1_t_plus_1 = pred_pos[agent_handle, time_offset + 1, 0] + \ - 1000 * pred_pos[agent_handle, time_offset, 1] + 1000 * pred_pos[agent_handle, time_offset, 1] coord_agent_2_t = coord_other_agents[collision_info[0], 1] if coord_agent_1_t_plus_1 == coord_agent_2_t: pred_obs[time_offset, collision_info[0] + 1 * (collision_info[0] >= agent_handle)] = 1 diff --git a/RLLib_training/__init__.py b/RLLib_training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/RLLib_training/custom_preprocessors.py b/RLLib_training/custom_preprocessors.py index 1d1d214cef1af84b99c719359a859712559974bc..bd45dfd2bd8620c2866ac3d079a9e82dd4c20c7e 100644 --- a/RLLib_training/custom_preprocessors.py +++ b/RLLib_training/custom_preprocessors.py @@ -49,13 +49,14 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): class CustomPreprocessor(Preprocessor): def _init_shape(self, obs_space, options): - return (sum([space.shape[0] for space in obs_space]), ) - return ((sum([space.shape[0] for space in obs_space[:2]]) + obs_space[2].shape[0]*obs_space[2].shape[1])*2,) + # return (sum([space.shape[0] for space in obs_space]), ) + return ((sum([space.shape[0] for space in obs_space[:2]]) + obs_space[2].shape[0] * obs_space[2].shape[1]),) def transform(self, observation): # if len(observation) == 111: - return np.concatenate([norm_obs_clip(obs) for obs in observation]) - #return np.concatenate([norm_obs_clip(observation[0][0]), observation[0][1], observation[0][2].flatten(), norm_obs_clip(observation[1]), observation[2], observation[3].flatten()]) + # return np.concatenate([norm_obs_clip(obs) for obs in observation]) + return np.concatenate([norm_obs_clip(observation[0]), observation[1], observation[ + 2].flatten()]) #, norm_obs_clip(observation[1]), observation[2], observation[3].flatten()]) #one_hot = observation[-3:] #return np.append(obs, one_hot) # else: diff --git a/RLLib_training/experiment_configs/__init__.py b/RLLib_training/experiment_configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py b/RLLib_training/experiment_configs/experiment_agent_memory/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/RLLib_training/train.py b/RLLib_training/train.py index ecea5365af11463aacc8c51e38f7538ca11949a9..ba5f4eab43f5173dd410bc6d9b306d90e0e21ffc 100644 --- a/RLLib_training/train.py +++ b/RLLib_training/train.py @@ -1,39 +1,22 @@ -from flatland.envs import rail_env -from flatland.envs.rail_env import random_rail_generator -from baselines.RailEnvRLLibWrapper import RailEnvRLLibWrapper -from flatland.utils.rendertools import RenderTool import random -import gym - -import matplotlib.pyplot as plt - -from flatland.envs.generators import complex_rail_generator +import gym +import numpy as np +import ray import ray.rllib.agents.ppo.ppo as ppo -import ray.rllib.agents.dqn.dqn as dqn +from RailEnvRLLibWrapper import RailEnvRLLibWrapper +from flatland.envs.generators import complex_rail_generator from ray.rllib.agents.ppo.ppo import PPOTrainer -from ray.rllib.agents.dqn.dqn import DQNTrainer from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph -from ray.rllib.agents.dqn.dqn_policy_graph import DQNPolicyGraph - -from ray.tune.registry import register_env from ray.rllib.models import ModelCatalog from ray.tune.logger import pretty_print -from baselines.CustomPreprocessor import CustomPreprocessor - - -import ray -import numpy as np - -from ray.rllib.env.multi_agent_env import MultiAgentEnv - -# RailEnv.__bases__ = (RailEnv.__bases__[0], MultiAgentEnv) - +from RLLib_training.custom_preprocessors import CustomPreprocessor ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor) ray.init() + def train(config): print('Init Env') random.seed(1) @@ -52,28 +35,10 @@ def train(config): 1] # Case 2b (10) - simple switch mirrored # Example generate a random rail - """ - env = RailEnv(width=10, - height=10, - rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability), - number_of_agents=1) - """ env_config = {"width": 20, - "height":20, - "rail_generator":complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0), - "number_of_agents":5} - """ - env = RailEnv(width=20, - height=20, - rail_generator=rail_from_list_of_saved_GridTransitionMap_generator( - ['../notebooks/temp.npy']), - number_of_agents=3) - - """ - - # if config['render']: - # env_renderer = RenderTool(env, gl="QT") - # plt.figure(figsize=(5,5)) + "height": 20, + "rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0), + "number_of_agents": 5} obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,)) act_space = gym.spaces.Discrete(4) @@ -94,13 +59,13 @@ def train(config): agent_config["horizon"] = 50 agent_config["num_workers"] = 0 # agent_config["sample_batch_size"]: 1000 - #agent_config["num_cpus_per_worker"] = 40 - #agent_config["num_gpus"] = 2.0 - #agent_config["num_gpus_per_worker"] = 2.0 - #agent_config["num_cpus_for_driver"] = 5 - #agent_config["num_envs_per_worker"] = 15 + # agent_config["num_cpus_per_worker"] = 40 + # agent_config["num_gpus"] = 2.0 + # agent_config["num_gpus_per_worker"] = 2.0 + # agent_config["num_cpus_for_driver"] = 5 + # agent_config["num_envs_per_worker"] = 15 agent_config["env_config"] = env_config - #agent_config["batch_mode"] = "complete_episodes" + # agent_config["batch_mode"] = "complete_episodes" ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config) @@ -114,10 +79,5 @@ def train(config): # checkpoint = ppo_trainer.save() # print("checkpoint saved at", checkpoint) -train({}) - - - - - +train({}) diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py index 90ce6484513a4b3e0ea34a80cf3ccdca7c99b32c..5e4c2bc5f659564f2f9a74a29c815ceb3fe71eeb 100644 --- a/RLLib_training/train_experiment.py +++ b/RLLib_training/train_experiment.py @@ -1,38 +1,32 @@ -from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper -import gym +import os import gin - -from flatland.envs.generators import complex_rail_generator - +import gym +from flatland.envs.predictions import DummyPredictorForRailEnv +from importlib_resources import path # Import PPO trainer: we can replace these imports by any other trainer from RLLib. from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer -# from baselines.CustomPPOTrainer import PPOTrainer as Trainer from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph -# from baselines.CustomPPOPolicyGraph import CustomPPOPolicyGraph as PolicyGraph - from ray.rllib.models import ModelCatalog -from ray.tune.logger import pretty_print -from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor -from baselines.RLLib_training.custom_models import ConvModelGlobalObs - -from flatland.envs.predictions import DummyPredictorForRailEnv gin.external_configurable(DummyPredictorForRailEnv) - import ray -import numpy as np from ray.tune.logger import UnifiedLogger +from ray.tune.logger import pretty_print + +from RailEnvRLLibWrapper import RailEnvRLLibWrapper +from custom_models import ConvModelGlobalObs +from custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor import tempfile from ray import tune from ray.rllib.utils.seed import seed as set_seed -from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\ - LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent +from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, \ + LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent gin.external_configurable(TreeObsForRailEnv) gin.external_configurable(GlobalObsForRailEnv) @@ -45,7 +39,9 @@ ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor) ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor) ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs) -ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000) +ray.init() # object_store_memory=150000000000, redis_max_memory=30000000000) + +__file_dirname__ = os.path.dirname(os.path.realpath(__file__)) def on_episode_start(info): @@ -82,11 +78,13 @@ def train(config, reporter): # Observation space and action space definitions if isinstance(config["obs_builder"], TreeObsForRailEnv): if config['predictor'] is None: - obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)), ) * config['step_memory']) + obs_space = gym.spaces.Tuple( + (gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory']) else: obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)), - gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)), - gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) *config['step_memory']) + gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)), + gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[ + 'step_memory']) preprocessor = "tree_obs_prep" elif isinstance(config["obs_builder"], GlobalObsForRailEnv): @@ -121,7 +119,6 @@ def train(config, reporter): else: raise ValueError("Undefined observation space") - act_space = gym.spaces.Discrete(5) # Dict with the different policies to train @@ -132,7 +129,6 @@ def train(config, reporter): def policy_mapping_fn(agent_id): return config['policy_folder_name'].format(**locals()) - # Trainer configuration trainer_config = DEFAULT_CONFIG.copy() if config['conv_model']: @@ -141,8 +137,8 @@ def train(config, reporter): trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor} trainer_config['multiagent'] = {"policy_graphs": policy_graphs, - "policy_mapping_fn": policy_mapping_fn, - "policies_to_train": list(policy_graphs.keys())} + "policy_mapping_fn": policy_mapping_fn, + "policies_to_train": list(policy_graphs.keys())} trainer_config["horizon"] = config['horizon'] trainer_config["num_workers"] = 0 @@ -192,7 +188,6 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder, entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae, predictor, step_memory): - tune.run( train, name=name, @@ -220,12 +215,15 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every, "cpu": 5, "gpu": 0.2 }, + verbose=2, local_dir=local_dir ) if __name__ == '__main__': gin.external_configurable(tune.grid_search) - dir = '/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/experiment_agent_memory' # To Modify - gin.parse_config_file(dir + '/config.gin') + with path('RLLib_training.experiment_configs.experiment_agent_memory', 'config.gin') as f: + gin.parse_config_file(f) + + dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory') run_experiment(local_dir=dir) diff --git a/requirements_RLLib_training.txt b/requirements_RLLib_training.txt new file mode 100644 index 0000000000000000000000000000000000000000..b147984edbd294842d18d1aeedf72a17e3b5536f --- /dev/null +++ b/requirements_RLLib_training.txt @@ -0,0 +1,6 @@ +#ray==0.7.0 +gym ==0.12.5 +opencv-python==4.1.0.25 +#tensorflow==1.13.1 +lz4==2.1.10 +gin-config==0.1.4 \ No newline at end of file diff --git a/requirements_torch_training.txt b/requirements_torch_training.txt new file mode 100644 index 0000000000000000000000000000000000000000..2bce630587233b4c771ef7a43bc3aaf7f78fbb07 --- /dev/null +++ b/requirements_torch_training.txt @@ -0,0 +1 @@ +torch==1.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..723e1a6f701150f5853a0199057bc234137c2aa2 --- /dev/null +++ b/setup.py @@ -0,0 +1,53 @@ +import os + +from setuptools import setup, find_packages + +# TODO: setup does not support installation from url, move to requirements*.txt +# TODO: @master as soon as mr is merged on flatland. +os.system( + 'pip install git+https://gitlab.aicrowd.com/flatland/flatland.git@57-access-resources-through-importlib_resources') + +install_reqs = [] +# TODO: include requirements_RLLib_training.txt +requirements_paths = ['requirements_torch_training.txt'] # , 'requirements_RLLib_training.txt'] +for requirements_path in requirements_paths: + with open(requirements_path, 'r') as f: + install_reqs += [ + s for s in [ + line.strip(' \n') for line in f + ] if not s.startswith('#') and s != '' + ] +requirements = install_reqs +setup_requirements = install_reqs +test_requirements = install_reqs + +setup( + author="S.P. Mohanty", + author_email='mohanty@aicrowd.com', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + ], + description="Multi Agent Reinforcement Learning on Trains", + entry_points={ + 'console_scripts': [ + 'flatland=flatland.cli:main', + ], + }, + install_requires=requirements, + long_description='', + include_package_data=True, + keywords='flatland-baselines', + name='flatland-rl-baselines', + packages=find_packages('.'), + data_files=[], + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://gitlab.aicrowd.com/flatland/baselines', + version='0.1.1', + zip_safe=False, +) diff --git a/torch_training/Nets/__init__.py b/torch_training/Nets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/torch_training/Nets/avoid_checkpoint15000.pth b/torch_training/Nets/avoid_checkpoint15000.pth index 14882a37a86085b137f4422b6bba75f387a2d3b5..d3081e88f97ac75641c0d94c7cf794f34d436581 100644 Binary files a/torch_training/Nets/avoid_checkpoint15000.pth and b/torch_training/Nets/avoid_checkpoint15000.pth differ diff --git a/torch_training/dueling_double_dqn.py b/torch_training/dueling_double_dqn.py index 3b98a3a62a5a6b9e1cd1b4732b46831d5dfee95d..6c54e4ef0aed7d833c25f3eb516d2abcb3589eee 100644 --- a/torch_training/dueling_double_dqn.py +++ b/torch_training/dueling_double_dqn.py @@ -8,7 +8,7 @@ import torch import torch.nn.functional as F import torch.optim as optim -from model import QNetwork, QNetwork2 +from torch_training.model import QNetwork, QNetwork2 BUFFER_SIZE = int(1e5) # replay buffer size BATCH_SIZE = 512 # minibatch size diff --git a/torch_training/railway/__init__.py b/torch_training/railway/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/torch_training/training_navigation.py b/torch_training/training_navigation.py index 8c30d72010d95389dd22af06971170aa9c4b4480..96593864a31cf45864b3cf8f52c29a0d5a241ef0 100644 --- a/torch_training/training_navigation.py +++ b/torch_training/training_navigation.py @@ -5,6 +5,8 @@ import numpy as np import torch from dueling_double_dqn import Agent from flatland.envs.generators import complex_rail_generator +from flatland.envs.observations import TreeObsForRailEnv +from flatland.envs.predictions import DummyPredictorForRailEnv from flatland.envs.rail_env import RailEnv from flatland.utils.rendertools import RenderTool @@ -43,18 +45,22 @@ env = RailEnv(width=10, height=20) env.load("./railway/complex_scene.pkl") """ + env = RailEnv(width=8, height=8, - rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=5, min_dist=5, max_dist=99999, seed=0), - number_of_agents=1) + rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=1, min_dist=4, max_dist=99999, seed=0), + obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=DummyPredictorForRailEnv()), + number_of_agents=3) + env.reset(True, True) env_renderer = RenderTool(env, gl="PILSVG") handle = env.get_agent_handles() -state_size = 147 * 2 +state_size = 168 * 2 action_size = 5 n_trials = 15000 +max_steps = int(1.5 * (env.height + env.width)) eps = 1. eps_end = 0.005 eps_decay = 0.9995 @@ -69,7 +75,7 @@ action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() agent = Agent(state_size, action_size, "FC", 0) -#agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint1500.pth')) +# agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth')) demo = False @@ -95,7 +101,7 @@ def min_lt(seq, val): min = np.inf idx = len(seq) - 1 while idx >= 0: - if seq[idx] > val and seq[idx] < min: + if seq[idx] >= val and seq[idx] < min: min = seq[idx] idx -= 1 return min @@ -110,7 +116,8 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1): :return: returnes normalized and clipped observatoin """ max_obs = max(1, max_lt(obs, 1000)) - min_obs = max(0, min_lt(obs, 0)) + min_obs = min(max_obs, min_lt(obs, 0)) + if max_obs == min_obs: return np.clip(np.array(obs) / max_obs, clip_min, clip_max) norm = np.abs(max_obs - min_obs) @@ -127,13 +134,14 @@ for trials in range(1, n_trials + 1): env_renderer.set_new_rail() final_obs = obs.copy() final_obs_next = obs.copy() - for a in range(env.get_num_agents()): - data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=7, current_depth=0) + data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=8, + current_depth=0) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) + for i in range(2): time_obs.append(obs) # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) @@ -143,14 +151,14 @@ for trials in range(1, n_trials + 1): score = 0 env_done = 0 # Run episode - for step in range(100): + for step in range(max_steps): if demo: env_renderer.renderEnv(show=True, show_observations=False) # print(step) # Action for a in range(env.get_num_agents()): if demo: - eps = 1 + eps = 0 # action = agent.act(np.array(obs[a]), eps=eps) action = agent.act(agent_obs[a], eps=eps) action_prob[action] += 1 @@ -159,13 +167,12 @@ for trials in range(1, n_trials + 1): next_obs, all_rewards, done, _ = env.step(action_dict) for a in range(env.get_num_agents()): - data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=7, + data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=8, current_depth=0) data = norm_obs_clip(data) distance = norm_obs_clip(distance) agent_data = np.clip(agent_data, -1, 1) next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) - time_obs.append(next_obs) # Update replay buffer and train agent @@ -177,7 +184,7 @@ for trials in range(1, n_trials + 1): final_action_dict.update({a: action_dict[a]}) if not demo and not done[a]: agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) - score += all_rewards[a] + score += all_rewards[a] / env.get_num_agents() agent_obs = agent_next_obs.copy() if done['__all__']: @@ -189,11 +196,12 @@ for trials in range(1, n_trials + 1): eps = max(eps_end, eps_decay * eps) # decrease epsilon done_window.append(env_done) - scores_window.append(score) # save most recent score + scores_window.append(score / max_steps) # save most recent score scores.append(np.mean(scores_window)) dones_list.append((np.mean(done_window))) - print('\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + print( + '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( env.get_num_agents(), trials, np.mean(scores_window), @@ -202,7 +210,7 @@ for trials in range(1, n_trials + 1): if trials % 100 == 0: print( - '\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( env.get_num_agents(), trials, np.mean(scores_window), diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..3c22b56780ffa59d41f64cad3f9698c3f62a204d --- /dev/null +++ b/tox.ini @@ -0,0 +1,37 @@ +[tox] +; TODO py36, flake8 +envlist = py37 + +[travis] +python = +; TODO: py36 + 3.7: py37 + +[testenv] +whitelist_externals = sh + pip + python +setenv = + PYTHONPATH = {toxinidir} +passenv = + DISPLAY +; HTTP_PROXY+HTTPS_PROXY required behind corporate proxies + HTTP_PROXY + HTTPS_PROXY +deps = + -r{toxinidir}/requirements_torch_training.txt +commands = + python torch_training/training_navigation.py + +[flake8] +max-line-length = 120 +ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W505 + +[testenv:flake8] +basepython = python +passenv = DISPLAY +deps = + -r{toxinidir}/requirements_torch_training.txt +commands = + flake8 torch_training +