Compare revisions

6d56e096 · 6d56e096 · c977124a · c977124a · c977124a · c977124a
--- a/RLLib_training/train.py
+++ b/RLLib_training/train.py
-import random
-
-import gym
-import numpy as np
-import ray
-import ray.rllib.agents.ppo.ppo as ppo
-from RailEnvRLLibWrapper import RailEnvRLLibWrapper
-from flatland.envs.generators import complex_rail_generator
-from ray.rllib.agents.ppo.ppo import PPOTrainer
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
-from ray.rllib.models import ModelCatalog
-from ray.tune.logger import pretty_print
-
-from RLLib_training.custom_preprocessors import CustomPreprocessor
-
-ModelCatalog.register_custom_preprocessor("my_prep", CustomPreprocessor)
-ray.init()
-
-
-def train(config):
-    print('Init Env')
-    random.seed(1)
-    np.random.seed(1)
-
-    transition_probability = [15,  # empty cell - Case 0
-                              5,  # Case 1 - straight
-                              5,  # Case 2 - simple switch
-                              1,  # Case 3 - diamond crossing
-                              1,  # Case 4 - single slip
-                              1,  # Case 5 - double slip
-                              1,  # Case 6 - symmetrical
-                              0,  # Case 7 - dead end
-                              1,  # Case 1b (8)  - simple turn right
-                              1,  # Case 1c (9)  - simple turn left
-                              1]  # Case 2b (10) - simple switch mirrored
-
-    # Example generate a random rail
-    env_config = {"width": 20,
-                  "height": 20,
-                  "rail_generator": complex_rail_generator(nr_start_goal=5, min_dist=5, max_dist=99999, seed=0),
-                  "number_of_agents": 5}
-
-    obs_space = gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(105,))
-    act_space = gym.spaces.Discrete(4)
-
-    # Dict with the different policies to train
-    policy_graphs = {
-        "ppo_policy": (PPOPolicyGraph, obs_space, act_space, {})
-    }
-
-    def policy_mapping_fn(agent_id):
-        return f"ppo_policy"
-
-    agent_config = ppo.DEFAULT_CONFIG.copy()
-    agent_config['model'] = {"fcnet_hiddens": [32, 32], "custom_preprocessor": "my_prep"}
-    agent_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                  "policy_mapping_fn": policy_mapping_fn,
-                                  "policies_to_train": list(policy_graphs.keys())}
-    agent_config["horizon"] = 50
-    agent_config["num_workers"] = 0
-    # agent_config["sample_batch_size"]: 1000
-    # agent_config["num_cpus_per_worker"] = 40
-    # agent_config["num_gpus"] = 2.0
-    # agent_config["num_gpus_per_worker"] = 2.0
-    # agent_config["num_cpus_for_driver"] = 5
-    # agent_config["num_envs_per_worker"] = 15
-    agent_config["env_config"] = env_config
-    # agent_config["batch_mode"] = "complete_episodes"
-
-    ppo_trainer = PPOTrainer(env=RailEnvRLLibWrapper, config=agent_config)
-
-    for i in range(100000 + 2):
-        print("== Iteration", i, "==")
-
-        print("-- PPO --")
-        print(pretty_print(ppo_trainer.train()))
-
-        # if i % config['save_every'] == 0:
-        #     checkpoint = ppo_trainer.save()
-        #     print("checkpoint saved at", checkpoint)
-
-
-train({})
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
-import os
-
-import gin
-import gym
-from flatland.envs.predictions import DummyPredictorForRailEnv
-from importlib_resources import path
-# Import PPO trainer: we can replace these imports by any other trainer from RLLib.
-from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
-from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
-from ray.rllib.models import ModelCatalog
-
-gin.external_configurable(DummyPredictorForRailEnv)
-
-import ray
-
-from ray.tune.logger import UnifiedLogger
-from ray.tune.logger import pretty_print
-
-from RailEnvRLLibWrapper import RailEnvRLLibWrapper
-from custom_models import ConvModelGlobalObs
-from custom_preprocessors import CustomPreprocessor, ConvModelPreprocessor
-import tempfile
-
-from ray import tune
-
-from ray.rllib.utils.seed import seed as set_seed
-from flatland.envs.observations import TreeObsForRailEnv, GlobalObsForRailEnv, \
-    LocalObsForRailEnv, GlobalObsForRailEnvDirectionDependent
-
-gin.external_configurable(TreeObsForRailEnv)
-gin.external_configurable(GlobalObsForRailEnv)
-gin.external_configurable(LocalObsForRailEnv)
-gin.external_configurable(GlobalObsForRailEnvDirectionDependent)
-
-from ray.rllib.models.preprocessors import TupleFlatteningPreprocessor
-import numpy as np
-
-ModelCatalog.register_custom_preprocessor("tree_obs_prep", CustomPreprocessor)
-ModelCatalog.register_custom_preprocessor("global_obs_prep", TupleFlatteningPreprocessor)
-ModelCatalog.register_custom_preprocessor("conv_obs_prep", ConvModelPreprocessor)
-ModelCatalog.register_custom_model("conv_model", ConvModelGlobalObs)
-ray.init()  # object_store_memory=150000000000, redis_max_memory=30000000000)
-
-__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
-
-
-def on_episode_start(info):
-    episode = info['episode']
-    map_width = info['env'].envs[0].width
-    map_height = info['env'].envs[0].height
-    episode.horizon = map_width + map_height
-    
-
-# def on_episode_step(info):
-#     episode = info['episode']
-#     print('#########################', episode._agent_reward_history)
-#     # print(ds)
-
-
-def on_episode_end(info):
-    episode = info['episode']
-    score = 0
-    for k, v in episode._agent_reward_history.items():
-        score += np.sum(v)
-    score /= (len(episode._agent_reward_history) * 3 * episode.horizon)
-    episode.custom_metrics["score"] = score
-
-def train(config, reporter):
-    print('Init Env')
-
-    set_seed(config['seed'], config['seed'], config['seed'])
-    config['map_height'] = config['map_width']
-
-    # Example configuration to generate a random rail
-    env_config = {"width": config['map_width'],
-                  "height": config['map_height'],
-                  "rail_generator": config["rail_generator"],
-                  "nr_extra": config["nr_extra"],
-                  "number_of_agents": config['n_agents'],
-                  "seed": config['seed'],
-                  "obs_builder": config['obs_builder'],
-                  "min_dist": config['min_dist'],
-                  # "predictor": config["predictor"],
-                  "step_memory": config["step_memory"]}
-
-    # Observation space and action space definitions
-    if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)), ))
-                                      # gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
-                                      # gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
-                                      #    'step_memory'])
-        # if config['predictor'] is None:
-        #     obs_space = gym.spaces.Tuple(
-        #         (gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),) * config['step_memory'])
-        # else:
-        #     obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(147,)),
-        #                                   gym.spaces.Box(low=0, high=1, shape=(config['n_agents'],)),
-        #                                   gym.spaces.Box(low=0, high=1, shape=(20, config['n_agents'])),) * config[
-        #                                      'step_memory'])
-        preprocessor = "tree_obs_prep"
-
-    elif isinstance(config["obs_builder"], GlobalObsForRailEnv):
-        obs_space = gym.spaces.Tuple((
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 8)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
-        if config['conv_model']:
-            preprocessor = "conv_obs_prep"
-        else:
-            preprocessor = "global_obs_prep"
-
-    elif isinstance(config["obs_builder"], GlobalObsForRailEnvDirectionDependent):
-        obs_space = gym.spaces.Tuple((
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 16)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 5)),
-            gym.spaces.Box(low=0, high=1, shape=(config['map_height'], config['map_width'], 2))))
-        if config['conv_model']:
-            preprocessor = "conv_obs_prep"
-        else:
-            preprocessor = "global_obs_prep"
-
-    elif isinstance(config["obs_builder"], LocalObsForRailEnv):
-        view_radius = config["obs_builder"].view_radius
-        obs_space = gym.spaces.Tuple((
-            gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 16)),
-            gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 2)),
-            gym.spaces.Box(low=0, high=1, shape=(2 * view_radius + 1, 2 * view_radius + 1, 4)),
-            gym.spaces.Box(low=0, high=1, shape=(4,))))
-        preprocessor = "global_obs_prep"
-
-    else:
-        raise ValueError("Undefined observation space")
-
-    act_space = gym.spaces.Discrete(5)
-
-    # Dict with the different policies to train
-    policy_graphs = {
-        config['policy_folder_name'].format(**locals()): (PolicyGraph, obs_space, act_space, {})
-    }
-
-    def policy_mapping_fn(agent_id):
-        return config['policy_folder_name'].format(**locals())
-
-    # Trainer configuration
-    trainer_config = DEFAULT_CONFIG.copy()
-    if config['conv_model']:
-        trainer_config['model'] = {"custom_model": "conv_model", "custom_preprocessor": preprocessor}
-    else:
-        trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor}
-
-    trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                    "policy_mapping_fn": policy_mapping_fn,
-                                    "policies_to_train": list(policy_graphs.keys())}
-    trainer_config["horizon"] = 1.5 * (config['map_width'] + config['map_height'])#config['horizon']
-
-    trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 7
-    trainer_config["num_gpus"] = 0.0
-    trainer_config["num_gpus_per_worker"] = 0.0
-    trainer_config["num_cpus_for_driver"] = 1
-    trainer_config["num_envs_per_worker"] = 1
-    trainer_config['entropy_coeff'] = config['entropy_coeff']
-    trainer_config["env_config"] = env_config
-    trainer_config["batch_mode"] = "complete_episodes"
-    trainer_config['simple_optimizer'] = False
-    trainer_config['postprocess_inputs'] = True
-    trainer_config['log_level'] = 'WARN'
-    trainer_config['num_sgd_iter'] = 10
-    trainer_config['clip_param'] = 0.2
-    trainer_config['kl_coeff'] = config['kl_coeff']
-    trainer_config['lambda'] = config['lambda_gae']
-    trainer_config['callbacks'] = {
-            "on_episode_start": tune.function(on_episode_start),
-            "on_episode_end": tune.function(on_episode_end)
-        }
-
-    def logger_creator(conf):
-        """Creates a Unified logger with a default logdir prefix
-        containing the agent name and the env id
-        """
-        logdir = config['policy_folder_name'].format(**locals())
-        logdir = tempfile.mkdtemp(
-            prefix=logdir, dir=config['local_dir'])
-        return UnifiedLogger(conf, logdir, None)
-
-    logger = logger_creator
-
-    trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config, logger_creator=logger)
-
-    for i in range(100000 + 2):
-        print("== Iteration", i, "==")
-
-        print(pretty_print(trainer.train()))
-
-        if i % config['save_every'] == 0:
-            checkpoint = trainer.save()
-            print("checkpoint saved at", checkpoint)
-
-        reporter(num_iterations_trained=trainer._iteration)
-
-
-@gin.configurable
-def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
-                   map_width, map_height, horizon, policy_folder_name, local_dir, obs_builder,
-                   entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
-                   step_memory, min_dist):
-    tune.run(
-        train,
-        name=name,
-        stop={"num_iterations_trained": num_iterations},
-        config={"n_agents": n_agents,
-                "hidden_sizes": hidden_sizes,  # Array containing the sizes of the network layers
-                "save_every": save_every,
-                "map_width": map_width,
-                "map_height": map_height,
-                "local_dir": local_dir,
-                "horizon": horizon,  # Max number of time steps
-                'policy_folder_name': policy_folder_name,
-                "obs_builder": obs_builder,
-                "entropy_coeff": entropy_coeff,
-                "seed": seed,
-                "conv_model": conv_model,
-                "rail_generator": rail_generator,
-                "nr_extra": nr_extra,
-                "kl_coeff": kl_coeff,
-                "lambda_gae": lambda_gae,
-                "min_dist": min_dist,
-                # "predictor": predictor,
-                "step_memory": step_memory
-                },
-        resources_per_trial={
-            "cpu": 8,
-            "gpu": 0
-        },
-        verbose=2,
-        local_dir=local_dir
-    )
-
-
-if __name__ == '__main__':
-    gin.external_configurable(tune.grid_search)
-    # with path('RLLib_training.experiment_configs.n_agents_experiment', 'config.gin') as f:
-    #     gin.parse_config_file(f)
-    gin.parse_config_file('/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test/config.gin')
-    dir = '/home/guillaume/flatland/baselines/RLLib_training/experiment_configs/score_metric_test'
-    # dir = os.path.join(__file_dirname__, 'experiment_configs', 'experiment_agent_memory')
-    run_experiment(local_dir=dir)
--- a/parameters.txt
+++ b/parameters.txt
+{'Test_0':[20,20,20,3],
+'Test_1':[10,10,3,4321],
+'Test_2':[10,10,5,123],
+'Test_3':[50,50,5,21],
+'Test_4':[50,50,20,85],
+'Test_5':[100,100,5,436],
+'Test_6':[100,100,20,6487],
+'Test_7':[100,100,50,567],
+'Test_8':[100,10,20,3245],
+'Test_9':[10,100,20,632]
+}
\ No newline at end of file
--- a/requirements_torch_training.txt
+++ b/requirements_torch_training.txt
-torch==1.1.0
\ No newline at end of file
+git+https://gitlab.aicrowd.com/flatland/flatland.git
+importlib-metadata>=0.17
+importlib_resources>=1.0.2
+torch>=1.1.0
\ No newline at end of file
--- a/score_test.py
+++ b/score_test.py
+import time
+
+import numpy as np
+
+from utils.misc_utils import RandomAgent, run_test
+
+with open('parameters.txt','r') as inf:
+    parameters = eval(inf.read())
+
+# Parameter initialization
+features_per_node = 9
+tree_depth = 3
+nodes = 0
+for i in range(tree_depth + 1):
+    nodes += np.power(4, i)
+state_size = features_per_node * nodes * 2
+action_size = 5
+action_dict = dict()
+nr_trials_per_test = 100
+test_results = []
+test_times = []
+test_dones = []
+agent = RandomAgent(state_size, action_size)
+start_time_scoring = time.time()
+test_idx = 0
+score_board = []
+for test_nr in parameters:
+    current_parameters = parameters[test_nr]
+    test_score, test_dones, test_time = run_test(current_parameters, agent, test_nr=test_idx)
+    print('---------')
+    print(' RESULTS')
+    print('---------')
+    print('{} score was {:.3f} with {:.2f}% environments solved. Test took {} Seconds to complete.\n\n\n'.format(
+        test_nr,
+        np.mean(test_score), np.mean(test_dones) * 100, test_time))
+    test_idx += 1
+    score_board.append([test_score, test_dones, test_times])
--- a/scoring/README.md
+++ b/scoring/README.md
+# Local Submission Scoring
+
+The files in this repo are supposed to help you score your agents behavior locally.
+
+**WARNING**: This is not the actual submission scoring --> Results will differ from the scores you achieve here. But the scoring setup is very similar to this setup.
+
+**Beta Stage**: The scoring function here is still under development, use with caution.
+
+## Introduction
+This repo contains a very basic setup to test your own agent/algorithm on the Flatland scoring setup.
+The repo contains 3 important files:
+
+- `generate_tests.py` Pre-generates the test files for faster testing
+- `score_tests.py` Scores your agent on the generated test files
+- `show_test.py` Shows samples of the generated test files
+- `parameters.txt` Parameters for generating the test files --> These differ in the challenge submission scoring
+
+To start the scoring of your agent you need to do the following
+
+## Parameters used for Level generation
+
+| Test Nr.  | X-Dim  | Y-Dim  | Nr. Agents  | Random Seed  |
+|:---------:|:------:|:------:|:-----------:|:------------:|
+| Test 0      | 10 | 10 | 1 | 3 |
+| Test 1      | 10 | 10 | 3 | 3 |
+| Test 2      | 10 | 10 | 5 | 3 |
+| Test 3      | 50 | 10 | 10 | 3 |
+| Test 4      | 20 | 50 | 10 | 3 |
+| Test 5      | 20 | 20 | 15 | 3 |
+| Test 6      | 50 | 50 | 10 | 3 |
+| Test 7      | 50 | 50 | 40 | 3 |
+| Test 8      | 100 | 100 | 10 | 3 |
+| Test 9      | 100 | 100 | 50 | 3 |
+
+These can be changed if you like to test your agents behavior on different tests.
+
+## Generate the test files
+To generate the set of test files you just have to run `python generate_tests.py`
+This generates pickle files of the levels to test on and places them in the corresponding folders.
+
+## Run Test
+To run the tests you have to modify the `score_tests.py` file to load your agent and the necessary predictor and observation.
+The following lines have to be replaced by you code:
+
+```
+# Load your agent
+agent = YourAgent
+agent.load(Your_Checkpoint)
+
+# Load the necessary Observation Builder and Predictor
+predictor = ShortestPathPredictorForRailEnv()
+observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor)
+```
+
+The agent and the observation builder as well as an observation wrapper can be passed to the test function like this
+
+```
+test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=your_observation_builder,
+                                             observation_wrapper=your_observation_wrapper,
+                                             test_nr=test_nr, nr_trials_per_test=10)
+```
+
+In order to speed up the test time you can limit the number of trials per test (`nr_trials_per_test=10`). After you have made these changes to the file you can run `python score_tests.py` which will produce an output similiar to this:
+
+```
+Running Test_0 with (x_dim,y_dim) = (10,10) and 1 Agents.
+Progress: |********************| 100.0% Complete 
+Test_0 score was -0.380 with 100.00% environments solved. Test took 0.62 Seconds to complete.
+
+Running Test_1 with (x_dim,y_dim) = (10,10) and 3 Agents.
+Progress: |********************| 100.0% Complete 
+Test_1 score was -1.540 with 80.00% environments solved. Test took 2.67 Seconds to complete.
+
+Running Test_2 with (x_dim,y_dim) = (10,10) and 5 Agents.
+Progress: |********************| 100.0% Complete 
+Test_2 score was -2.460 with 80.00% environments solved. Test took 4.48 Seconds to complete.
+
+Running Test_3 with (x_dim,y_dim) = (50,10) and 10 Agents.
+Progress: |**__________________| 10.0% Complete
+```
+
+The score is computed by
+
+```
+score = sum(mean(all_rewards))/max_steps
+```
+which is the sum over all time steps and the mean over all agents of the rewards. We normalize it by the maximum number of allowed steps for a level size. The max number of allowed steps is
+
+```
+max_steps = mult_factor * (env.height+env.width)
+```
+Where the `mult_factor` is a multiplication factor to allow for more time if difficulty is to high.
+
+The number of solved envs is just the percentage of episodes that terminated with all agents done.
+
+How these two numbers are used to define your final score will be posted on the [flatland page](https://www.aicrowd.com/organizers/sbb/challenges/flatland-challenge)
--- a/scoring/Tests/Test_0/.gitignore
+++ b/scoring/Tests/Test_0/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_1/.gitignore
+++ b/scoring/Tests/Test_1/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_2/.gitignore
+++ b/scoring/Tests/Test_2/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_3/.gitignore
+++ b/scoring/Tests/Test_3/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_4/.gitignore
+++ b/scoring/Tests/Test_4/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_5/.gitignore
+++ b/scoring/Tests/Test_5/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_6/.gitignore
+++ b/scoring/Tests/Test_6/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_7/.gitignore
+++ b/scoring/Tests/Test_7/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_8/.gitignore
+++ b/scoring/Tests/Test_8/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/Tests/Test_9/.gitignore
+++ b/scoring/Tests/Test_9/.gitignore
+## Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
--- a/scoring/generate_tests.py
+++ b/scoring/generate_tests.py
+import time
+
+import numpy as np
+
+from utils.misc_utils import create_testfiles
+
+with open('parameters.txt', 'r') as inf:
+    parameters = eval(inf.read())
+
+# Parameter initialization
+features_per_node = 9
+tree_depth = 3
+nodes = 0
+for i in range(tree_depth + 1):
+    nodes += np.power(4, i)
+state_size = features_per_node * nodes * 2
+action_size = 5
+action_dict = dict()
+nr_trials_per_test = 100
+test_idx = 0
+
+for test_nr in parameters:
+    current_parameters = parameters[test_nr]
+    create_testfiles(current_parameters, test_nr, nr_trials_per_test=100)
--- a/scoring/parameters.txt
+++ b/scoring/parameters.txt
+{'Test_0':[10,10,1,3],
+'Test_1':[10,10,3,3],
+'Test_2':[10,10,5,3],
+'Test_3':[50,10,10,3],
+'Test_4':[20,50,10,3],
+'Test_5':[20,20,15,3],
+'Test_6':[50,50,10,3],
+'Test_7':[50,50,40,3],
+'Test_8':[100,100,10,3],
+'Test_9':[100,100,50,3]
+}
\ No newline at end of file
--- a/scoring/score_test.py
+++ b/scoring/score_test.py
+import time
+
+import numpy as np
+import torch
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.predictions import ShortestPathPredictorForRailEnv
+
+from torch_training.dueling_double_dqn import Agent
+from scoring.utils.misc_utils import run_test
+from utils.observation_utils import normalize_observation
+
+with open('parameters.txt', 'r') as inf:
+    parameters = eval(inf.read())
+
+# Parameter initialization
+features_per_node = 9
+tree_depth = 3
+nodes = 0
+for i in range(tree_depth + 1):
+    nodes += np.power(4, i)
+state_size = features_per_node * nodes
+action_size = 5
+action_dict = dict()
+nr_trials_per_test = 100
+test_results = []
+test_times = []
+test_dones = []
+sequential_agent_test = False
+
+# Load your agent
+agent = Agent(state_size, action_size)
+agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint500.pth'))
+
+# Load the necessary Observation Builder and Predictor
+predictor = ShortestPathPredictorForRailEnv()
+observation_builder = TreeObsForRailEnv(max_depth=tree_depth, predictor=predictor)
+
+start_time_scoring = time.time()
+
+score_board = []
+for test_nr in parameters:
+    current_parameters = parameters[test_nr]
+    test_score, test_dones, test_time = run_test(current_parameters, agent, observation_builder=observation_builder,
+                                                 observation_wrapper=normalize_observation,
+                                                 test_nr=test_nr, nr_trials_per_test=10)
+    print('{} score was {:.3f} with {:.2f}% environments solved. Test took {:.2f} Seconds to complete.\n'.format(
+        test_nr,
+        np.mean(test_score), np.mean(test_dones) * 100, test_time))
+
+    score_board.append([np.mean(test_score), np.mean(test_dones) * 100, test_time])
+print('---------')
+print(' RESULTS')
+print('---------')
+test_idx = 0
+for test_nr in parameters:
+    print('{} score was {:.3f}\twith {:.2f}% environments solved.\tTest took {:.2f} Seconds to complete.'.format(
+        test_nr, score_board[test_idx][0], score_board[test_idx][1], score_board[test_idx][2]))
+    test_idx += 1
--- a/scoring/show_tests.py
+++ b/scoring/show_tests.py
+import time
+
+import numpy as np
+
+from utils.misc_utils import render_test
+
+with open('parameters.txt','r') as inf:
+    parameters = eval(inf.read())
+
+# Parameter initialization
+features_per_node = 9
+tree_depth = 3
+nodes = 0
+for i in range(tree_depth + 1):
+    nodes += np.power(4, i)
+state_size = features_per_node * nodes * 2
+action_size = 5
+action_dict = dict()
+nr_trials_per_test = 100
+test_idx = 0
+
+for test_nr in parameters:
+    current_parameters = parameters[test_nr]
+    render_test(current_parameters, test_nr, nr_examples=2)
+
No results found