From ea93665fcb6943ea4999ab8bd8faf30a8c2a2049 Mon Sep 17 00:00:00 2001
From: Guillaume Mollard <guillaume@iccluster028.iccluster.epfl.ch>
Date: Fri, 14 Jun 2019 10:05:42 +0200
Subject: [PATCH] corrected score metric

---
 RLLib_training/RailEnvRLLibWrapper.py |  3 ++-
 RLLib_training/train_experiment.py    | 20 +++++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/RLLib_training/RailEnvRLLibWrapper.py b/RLLib_training/RailEnvRLLibWrapper.py
index e36f383..8f7e1ec 100644
--- a/RLLib_training/RailEnvRLLibWrapper.py
+++ b/RLLib_training/RailEnvRLLibWrapper.py
@@ -20,7 +20,8 @@ class RailEnvRLLibWrapper(MultiAgentEnv):
         self.predefined_env = False
 
         if config['rail_generator'] == "complex_rail_generator":
-            self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'], min_dist=5,
+            self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'],
+                                                         min_dist=config['min_dist'],
                                                          nr_extra=config['nr_extra'],
                                                          seed=config['seed'] * (1 + vector_index))
         elif config['rail_generator'] == "random_rail_generator":
diff --git a/RLLib_training/train_experiment.py b/RLLib_training/train_experiment.py
index b674d40..cc8debe 100644
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
@@ -62,8 +62,8 @@ def on_episode_end(info):
     episode = info['episode']
     score = 0
     for k, v in episode._agent_reward_history.items():
-        score += np.mean(v)
-    score /= (len(episode._agent_reward_history) * 1.5 * episode.horizon)
+        score += np.sum(v)
+    score /= (len(episode._agent_reward_history) * 3 * episode.horizon)
     episode.custom_metrics["score"] = score
 
 def train(config, reporter):
@@ -80,6 +80,7 @@ def train(config, reporter):
                   "number_of_agents": config['n_agents'],
                   "seed": config['seed'],
                   "obs_builder": config['obs_builder'],
+                  "min_dist": config['min_dist'],
                   # "predictor": config["predictor"],
                   "step_memory": config["step_memory"]}
 
@@ -154,9 +155,9 @@ def train(config, reporter):
     trainer_config["horizon"] = 1.5 * (config['map_width'] + config['map_height'])#config['horizon']
 
     trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 4
-    trainer_config["num_gpus"] = 0.2
-    trainer_config["num_gpus_per_worker"] = 0.2
+    trainer_config["num_cpus_per_worker"] = 7
+    trainer_config["num_gpus"] = 0.0
+    trainer_config["num_gpus_per_worker"] = 0.0
     trainer_config["num_cpus_for_driver"] = 1
     trainer_config["num_envs_per_worker"] = 1
     trainer_config['entropy_coeff'] = config['entropy_coeff']
@@ -203,7 +204,7 @@ def train(config, reporter):
 def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                    map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
                    entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
-                   step_memory):
+                   step_memory, min_dist):
     tune.run(
         train,
         name=name,
@@ -224,11 +225,12 @@ def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
                 "nr_extra": nr_extra,
                 "kl_coeff": kl_coeff,
                 "lambda_gae": lambda_gae,
+                "min_dist": min_dist,
                 # "predictor": predictor,
                 "step_memory": step_memory
                 },
         resources_per_trial={
-            "cpu": 2,
+            "cpu": 8,
             "gpu": 0
         },
         verbose=2,
@@ -240,7 +242,7 @@ if __name__ == '__main__':
     gin.external_configurable(tune.grid_search)
     # with path('RLLib_training.experiment_configs.n_agents_experiment', 'config.gin') as f:
     #     gin.parse_config_file(f)
-    gin.parse_config_file('/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/predictions_test/config.gin')
-    dir = '/home/guillaume/EPFL/Master_Thesis/flatland/baselines/RLLib_training/experiment_configs/predictions_test'
+    gin.parse_config_file('/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test/config.gin')
+    dir = '/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test'
     # dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory')
     run_experiment(local_dir=dir)
-- 
GitLab