From 991ba71433e48d82d743ca8a8b0827db72d52415 Mon Sep 17 00:00:00 2001
From: u214892 <u214892@sbb.ch>
Date: Wed, 12 Jun 2019 17:46:00 +0200
Subject: [PATCH] #57 access resources for torch_training from resources;
 initial setup tox

---
 MANIFEST.in                                   | 15 ++++
 RLLib_training/RailEnvRLLibWrapper.py         |  2 +-
 RLLib_training/__init__.py                    |  0
 RLLib_training/experiment_configs/__init__.py |  0
 .../__init__.py                               |  0
 RLLib_training/train.py                       | 74 +++++--------------
 RLLib_training/train_experiment.py            | 63 +++++++---------
 requirements_RLLib_training.txt               |  6 ++
 requirements_torch_training.txt               |  1 +
 setup.py                                      | 52 +++++++++++++
 torch_training/Nets/__init__.py               |  0
 torch_training/dueling_double_dqn.py          |  2 +-
 torch_training/railway/__init__.py            |  0
 torch_training/training_navigation.py         | 35 +++++----
 tox.ini                                       | 37 ++++++++++
 15 files changed, 176 insertions(+), 111 deletions(-)
 create mode 100644 MANIFEST.in
 create mode 100644 RLLib_training/__init__.py
 create mode 100644 RLLib_training/experiment_configs/__init__.py
 create mode 100644 RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py
 create mode 100644 requirements_RLLib_training.txt
 create mode 100644 requirements_torch_training.txt
 create mode 100644 setup.py
 create mode 100644 torch_training/Nets/__init__.py
 create mode 100644 torch_training/railway/__init__.py
 create mode 100644 tox.ini

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..85154f6
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,15 @@
+include AUTHORS.rst
+include CONTRIBUTING.rst
+include HISTORY.rst
+include LICENSE
+include README.rst
+include requirements_torch_training.txt
+include requirements_RLLib_training.txt
+
+
+
+recursive-include tests *
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
+recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py
index 5ab92a4..c018683 100644
--- a/RLLib_training/RailEnvRLLibWrapper.py
+++ b/RLLib_training/RailEnvRLLibWrapper.py
@@ -26,7 +26,7 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
         self.env = RailEnv(width=10, height=20,
                 number_of_agents=config["number_of_agents"], obs_builder_object=config['obs_builder'])
 
-        self.env.load('/mount/SDC/flatland/baselines/torch_training/railway/complex_scene.pkl')
+        self.env.load_resource('torch_training.railway', 'complex_scene.pkl')
 
         self.width = self.env.width
         self.height = self.env.height
diff --git a/RLLib_training/__init__.py b/RLLib_training/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RLLib_training/experiment_configs/__init__.py b/RLLib_training/experiment_configs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py b/RLLib_training/experiment_configs/observation_benchmark_loaded_env/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RLLib_training/train.py b/RLLib_training/train.py
index ecea536..5d07c8b 100644
--- a/RLLib_training/train.py
+++ b/RLLib_training/train.py
@@ -1,39 +1,22 @@
-from flatland.envs import rail_env
-from flatland.envs.rail_env import random_rail_generator
-from baselines.RailEnvRLLibWrapper import RailEnvRLLibWrapper
-from flatland.utils.rendertools import RenderTool
 import random
-import gym
-
-import matplotlib.pyplot as plt
-
-from flatland.envs.generators import complex_rail_generator
 
+import gym
+import numpy as np
+import ray
 import ray.rllib.agents.ppo.ppo as ppo
-import ray.rllib.agents.dqn.dqn as dqn
 from ray.rllib.agents.ppo.ppo import PPOTrainer
-from ray.rllib.agents.dqn.dqn import DQNTrainer
 from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
-from ray.rllib.agents.dqn.dqn_policy_graph import DQNPolicyGraph
-
-from ray.tune.registry import register_env
 from ray.rllib.models import ModelCatalog
 from ray.tune.logger import pretty_print
-from baselines.CustomPreprocessor import CustomPreprocessor
-
-
-import ray
-import numpy as np
-
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
-
-# RailEnv.__bases__ = (RailEnv.__bases__[0], MultiAgentEnv)
-
 
+from RLLib_training.custom_preprocessors import CustomPreprocessor
+from RailEnvRLLibWrapper import RailEnvRLLibWrapper
+from flatland.envs.generators import complex_rail_generator
 
 ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor)
 ray.init()
 
+
 def train(config):
     print('Init Env')
     random.seed(1)
@@ -52,28 +35,10 @@ def train(config):
                               1]  # Case 2b (10) - simple switch mirrored
 
     # Example generate a random rail
-    """
-    env = RailEnv(width=10,
-                  height=10,
-                  rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
-                  number_of_agents=1)
-    """
     env_config = {"width": 20,
-                  "height":20,
-                  "rail_generator":complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
-                  "number_of_agents":5}
-    """
-    env = RailEnv(width=20,
-                  height=20,
-                  rail_generator=rail_from_list_of_saved_GridTransitionMap_generator(
-                          ['../notebooks/temp.npy']),
-                  number_of_agents=3)
-
-    """
-
-    # if config['render']:
-    #     env_renderer = RenderTool(env, gl="QT")
-    # plt.figure(figsize=(5,5))
+                  "height": 20,
+                  "rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
+                  "number_of_agents": 5}
 
     obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
     act_space = gym.spaces.Discrete(4)
@@ -94,13 +59,13 @@ def train(config):
     agent_config["horizon"] = 50
     agent_config["num_workers"] = 0
     # agent_config["sample_batch_size"]: 1000
-    #agent_config["num_cpus_per_worker"] = 40
-    #agent_config["num_gpus"] = 2.0
-    #agent_config["num_gpus_per_worker"] = 2.0
-    #agent_config["num_cpus_for_driver"] = 5
-    #agent_config["num_envs_per_worker"] = 15
+    # agent_config["num_cpus_per_worker"] = 40
+    # agent_config["num_gpus"] = 2.0
+    # agent_config["num_gpus_per_worker"] = 2.0
+    # agent_config["num_cpus_for_driver"] = 5
+    # agent_config["num_envs_per_worker"] = 15
     agent_config["env_config"] = env_config
-    #agent_config["batch_mode"] = "complete_episodes"
+    # agent_config["batch_mode"] = "complete_episodes"
 
     ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config)
 
@@ -114,10 +79,5 @@ def train(config):
         #     checkpoint = ppo_trainer.save()
         #     print("checkpoint saved at", checkpoint)
 
-train({})
-
-
-
-
-
 
+train({})
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index d58e9bf..26c8c5b 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -1,37 +1,26 @@
-from baselines.RLLib_training.RailEnvRLLibWrapper import RailEnvRLLibWrapper
-import gym
-
-
-from flatland.envs.generators import complex_rail_generator
-
+import os
+import tempfile
 
+import gin
+import gym
+import ray
+from importlib_resources import path
+from ray import tune
 # Import PPO trainer: we can replace these imports by any other trainer from RLLib.
 from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
 from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
-# from baselines.CustomPPOTrainer import PPOTrainer as Trainer
 from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
-# from baselines.CustomPPOPolicyGraph import CustomPPOPolicyGraph as PolicyGraph
-
 from ray.rllib.models import ModelCatalog
-from ray.tune.logger import pretty_print
-from baselines.RLLib_training.custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
-
-from baselines.RLLib_training.custom_models import ConvModelGlobalObs
-
-
-import ray
-import numpy as np
-
+from ray.rllib.utils.seed import seed as set_seed
 from ray.tune.logger import UnifiedLogger
-import tempfile
-
-import gin
-
-from ray import tune
+from ray.tune.logger import pretty_print
 
-from ray.rllib.utils.seed import seed as set_seed
-from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv,\
-                                       LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
+from RailEnvRLLibWrapper import RailEnvRLLibWrapper
+from custom_models import ConvModelGlobalObs
+from custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, \
+    LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
 
 gin.external_configurable(TreeObsForRailEnv)
 gin.external_configurable(GlobalObsForRailEnv)
@@ -44,7 +33,9 @@ ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
 ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
 ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor)
 ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
-ray.init()#object_store_memory=150000000000, redis_max_memory=30000000000)
+ray.init()  # object_store_memory=150000000000, redis_max_memory=30000000000)
+
+__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
 
 
 def train(config, reporter):
@@ -52,8 +43,8 @@ def train(config, reporter):
 
     set_seed(config['seed'], config['seed'], config['seed'])
 
-    config['map_width']= 20
-    config['map_height']= 10
+    config['map_width'] = 20
+    config['map_height'] = 10
     config['n_agents'] = 8
 
     # Example configuration to generate a random rail
@@ -101,7 +92,6 @@ def train(config, reporter):
     else:
         raise ValueError("Undefined observation space")
 
-
     act_space = gym.spaces.Discrete(4)
 
     # Dict with the different policies to train
@@ -112,7 +102,6 @@ def train(config, reporter):
     def policy_mapping_fn(agent_id):
         return config['policy_folder_name'].format(**locals())
 
-
     # Trainer configuration
     trainer_config = DEFAULT_CONFIG.copy()
     if config['conv_model']:
@@ -121,8 +110,8 @@ def train(config, reporter):
         trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
 
     trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                  "policy_mapping_fn": policy_mapping_fn,
-                                  "policies_to_train": list(policy_graphs.keys())}
+                                    "policy_mapping_fn": policy_mapping_fn,
+                                    "policies_to_train": list(policy_graphs.keys())}
     trainer_config["horizon"] = config['horizon']
 
     trainer_config["num_workers"] = 0
@@ -167,7 +156,6 @@ def train(config, reporter):
 def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                    map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
                    entropy_coeff, seed, conv_model):
-
     tune.run(
         train,
         name=name,
@@ -189,12 +177,15 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
             "cpu": 2,
             "gpu": 0.0
         },
+        verbose=2,
         local_dir=local_dir
     )
 
 
 if __name__ == '__main__':
     gin.external_configurable(tune.grid_search)
-    dir = '/mount/SDC/flatland/baselines/RLLib_training/experiment_configs/observation_benchmark_loaded_env'  # To Modify
-    gin.parse_config_file(dir + '/config.gin')
+    with path('RLLib_training.experiment_configs.observation_benchmark_loaded_env', 'config.gin') as f:
+        gin.parse_config_file(f)
+
+    dir = os.path.join(__file_dirname__, 'experiment_configs', 'observation_benchmark_loaded_env')
     run_experiment(local_dir=dir)
diff --git a/requirements_RLLib_training.txt b/requirements_RLLib_training.txt
new file mode 100644
index 0000000..b147984
--- /dev/null
+++ b/requirements_RLLib_training.txt
@@ -0,0 +1,6 @@
+#ray==0.7.0
+gym ==0.12.5
+opencv-python==4.1.0.25
+#tensorflow==1.13.1
+lz4==2.1.10
+gin-config==0.1.4
\ No newline at end of file
diff --git a/requirements_torch_training.txt b/requirements_torch_training.txt
new file mode 100644
index 0000000..2bce630
--- /dev/null
+++ b/requirements_torch_training.txt
@@ -0,0 +1 @@
+torch==1.1.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..20ddab1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,52 @@
+import os
+
+from setuptools import setup, find_packages
+
+# TODO: setup does not support installation from url, move to requirements*.txt
+# TODO: @master as soon as mr is merged on flatland.
+os.system('pip install git+https://gitlab.aicrowd.com/flatland/flatland.git@57-access-resources-through-importlib_resources')
+
+install_reqs = []
+# TODO: include requirements_RLLib_training.txt
+requirements_paths = ['requirements_torch_training.txt'] #, 'requirements_RLLib_training.txt']
+for requirements_path in requirements_paths:
+    with open(requirements_path, 'r') as f:
+        install_reqs += [
+            s for s in [
+                line.strip(' \n') for line in f
+            ] if not s.startswith('#') and s != ''
+        ]
+requirements = install_reqs
+setup_requirements = install_reqs
+test_requirements = install_reqs
+
+setup(
+    author="S.P. Mohanty",
+    author_email='mohanty@aicrowd.com',
+    classifiers=[
+        'Development Status :: 2 - Pre-Alpha',
+        'Intended Audience :: Developers',
+        'Natural Language :: English',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+    ],
+    description="Multi Agent Reinforcement Learning on Trains",
+    entry_points={
+        'console_scripts': [
+            'flatland=flatland.cli:main',
+        ],
+    },
+    install_requires=requirements,
+    long_description='',
+    include_package_data=True,
+    keywords='flatland-baselines',
+    name='flatland-rl-baselines',
+    packages=find_packages('.'),
+    data_files=[],
+    setup_requires=setup_requirements,
+    test_suite='tests',
+    tests_require=test_requirements,
+    url='https://gitlab.aicrowd.com/flatland/baselines',
+    version='0.1.1',
+    zip_safe=False,
+)
diff --git a/torch_training/Nets/__init__.py b/torch_training/Nets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/torch_training/dueling_double_dqn.py b/torch_training/dueling_double_dqn.py
index 3b98a3a..6c54e4e 100644
--- a/torch_training/dueling_double_dqn.py
+++ b/torch_training/dueling_double_dqn.py
@@ -8,7 +8,7 @@ import torch
 import torch.nn.functional as F
 import torch.optim as optim
 
-from model import QNetwork, QNetwork2
+from torch_training.model import QNetwork, QNetwork2
 
 BUFFER_SIZE = int(1e5)  # replay buffer size
 BATCH_SIZE = 512  # minibatch size
diff --git a/torch_training/railway/__init__.py b/torch_training/railway/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/torch_training/training_navigation.py b/torch_training/training_navigation.py
index 1d12b53..252ff7c 100644
--- a/torch_training/training_navigation.py
+++ b/torch_training/training_navigation.py
@@ -1,17 +1,20 @@
+import os
 import random
 from collections import deque
 
 import numpy as np
 import torch
 
-from dueling_double_dqn import Agent
 from flatland.envs.generators import complex_rail_generator
 from flatland.envs.rail_env import RailEnv
 from flatland.utils.rendertools import RenderTool
+from torch_training.dueling_double_dqn import Agent
 
 random.seed(1)
 np.random.seed(1)
 
+__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
+
 # Example generate a rail given a manual specification,
 # a map of tuples (cell_type, rotation)
 transition_probability = [15,  # empty cell - Case 0
@@ -42,7 +45,7 @@ env = RailEnv(width=15,
 
 env = RailEnv(width=10,
               height=20)
-env.load("./railway/complex_scene.pkl")
+env.load_resource('torch_training.railway', "complex_scene.pkl")
 
 env = RailEnv(width=15,
               height=15,
@@ -70,7 +73,7 @@ action_prob = [0] * action_size
 agent_obs = [None] * env.get_num_agents()
 agent_next_obs = [None] * env.get_num_agents()
 agent = Agent(state_size, action_size, "FC", 0)
-#agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth'))
+# agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth'))
 
 demo = False
 
@@ -131,7 +134,8 @@ for trials in range(1, n_trials + 1):
     final_obs_next = obs.copy()
 
     for a in range(env.get_num_agents()):
-        data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=7, current_depth=0)
+        data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=7,
+                                                                current_depth=0)
         data = norm_obs_clip(data)
         distance = norm_obs_clip(distance)
 
@@ -147,14 +151,12 @@ for trials in range(1, n_trials + 1):
     # Run episode
     for step in range(360):
         if demo:
-
-            env_renderer.renderEnv(show=True,show_observations=False)
+            env_renderer.renderEnv(show=True, show_observations=False)
         # print(step)
         # Action
         for a in range(env.get_num_agents()):
             if demo:
                 eps = 1
-            # action = agent.act(np.array(obs[a]), eps=eps)
             action = agent.act(agent_obs[a], eps=eps)
             action_prob[action] += 1
             action_dict.update({a: action})
@@ -163,10 +165,10 @@ for trials in range(1, n_trials + 1):
         next_obs, all_rewards, done, _ = env.step(action_dict)
         for a in range(env.get_num_agents()):
             data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=7,
-                                                        current_depth=0)
+                                                                    current_depth=0)
             data = norm_obs_clip(data)
             distance = norm_obs_clip(distance)
-            next_obs[a] = np.concatenate((np.concatenate((data, distance)),agent_data))
+            next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
 
         time_obs.append(next_obs)
 
@@ -196,12 +198,13 @@ for trials in range(1, n_trials + 1):
     scores.append(np.mean(scores_window))
     dones_list.append((np.mean(done_window)))
 
-    print('\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
-              env.get_num_agents(),
-              trials,
-              np.mean(scores_window),
-              100 * np.mean(done_window),
-              eps, action_prob / np.sum(action_prob)), end=" ")
+    print(
+        '\rTraining {} Agents.\t Episode {}\t Average Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+            env.get_num_agents(),
+            trials,
+            np.mean(scores_window),
+            100 * np.mean(done_window),
+            eps, action_prob / np.sum(action_prob)), end=" ")
 
     if trials % 100 == 0:
         print(
@@ -213,5 +216,5 @@ for trials in range(1, n_trials + 1):
                 eps,
                 action_prob / np.sum(action_prob)))
         torch.save(agent.qnetwork_local.state_dict(),
-                   './Nets/avoid_checkpoint' + str(trials) + '.pth')
+                   os.path.join(__file_dirname__, 'Nets', 'avoid_checkpoint' + str(trials) + '.pth'))
         action_prob = [1] * 4
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..3c22b56
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,37 @@
+[tox]
+; TODO py36, flake8
+envlist = py37
+
+[travis]
+python =
+; TODO: py36
+    3.7: py37
+
+[testenv]
+whitelist_externals = sh
+                      pip
+                      python
+setenv =
+    PYTHONPATH = {toxinidir}
+passenv =
+    DISPLAY
+; HTTP_PROXY+HTTPS_PROXY required behind corporate proxies
+    HTTP_PROXY
+    HTTPS_PROXY
+deps =
+    -r{toxinidir}/requirements_torch_training.txt
+commands =
+    python torch_training/training_navigation.py
+
+[flake8]
+max-line-length = 120
+ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W505
+
+[testenv:flake8]
+basepython = python
+passenv = DISPLAY
+deps =
+    -r{toxinidir}/requirements_torch_training.txt
+commands =
+    flake8 torch_training
+
-- 
GitLab