Compare revisions

xzhaoma · Erik Nygren · Erik Nygren · u214892 · u214892 · Christian Baumberger
--- a/.gitignore
+++ b/.gitignore
 *pycache*
 *ppo_policy*
+
+torch_training/Nets/
--- a/MANIFEST.in
+++ b/MANIFEST.in
-include AUTHORS.rst
+include AUTHORS.md
 include CONTRIBUTING.rst
-include HISTORY.rst
+include changelog.md
 include LICENSE
-include README.rst
+include README.md
 include requirements_torch_training.txt
-include requirements_RLLib_training.txt



@@ -12,4 +11,4 @@ recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]

-recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
+recursive-include docs *.rst *.md conf.py *.jpg *.png *.gif
--- a/README.md
+++ b/README.md
-## Examples of scripts to train agents in the Flatland environment.

+# ⚠️ Deprecated repository

-# Torch Training
+This repository is deprecated! Please go to:
+
+#### **https://gitlab.aicrowd.com/flatland/flatland-examples**
+
+
+## Torch Training
 The `torch_training` folder shows an example of how to train agents with a DQN implemented in pytorch.
 In the links below you find introductions to training an agent on Flatland:

@@ -15,10 +20,7 @@ With the above introductions you will solve tasks like these and even more...
 ![Conflict_Avoidance](https://i.imgur.com/AvBHKaD.gif)


-# RLLib Training
-The `RLLib_training` folder shows an example of how to train agents with  algorithm from implemented in the RLLib library available at: <https://github.com/ray-project/ray/tree/master/python/ray/rllib>
-
-# Sequential Agent
+## Sequential Agent
 This is a very simple baseline to show you have the `complex_level_generator` generates feasible network configurations.
 If you run the `run_test.py` file you will see a simple agent that solves the level by sequentially running each agent along its shortest path.
 This is very innefficient but it solves all the instances generated by `complex_level_generator`. However when being scored for the AIcrowd competition, this agent fails due to the duration it needs to solve an episode.

--- a/RLLib_training/README.md
+++ b/RLLib_training/README.md
-This repository allows to run Rail Environment multi agent training with the RLLib Library.
-
-## Installation:
-
-To run scripts of this repository, the deep learning library tensorflow should be installed, along with the following packages:
-```sh
-pip install gym ray==0.7.0 gin-config opencv-python lz4 psutil
-```
-
-To start a training with different parameters, you can create a folder containing a config.gin file (see example in `experiment_configs/config_example/config.gin`.
-
-Then, you can modify the config.gin file path at the end of the `train_experiment.py` file.
-
-The results will be stored inside the folder, and the learning curves can be visualized in 
-tensorboard:
-
-```
-tensorboard --logdir=/path/to/folder_containing_config_gin_file
-```
-
-## Gin config files
-
-In each config.gin files, all the parameters of the `run_experiment` functions have to be specified.
-For example, to indicate the number of agents that have to be initialized at the beginning of each simulation, the following line should be added:
-
-```
-run_experiment.n_agents = 2
-```
-
-If several number of agents have to be explored during the experiment, one can pass the following value to the `n_agents` parameter:
-
-```
-run_experiment.n_agents = {"grid_search": [2,5]}
-```
-
-which is the way to indicate to the tune library to experiment several values for a parameter.
-
-To reference a class or an object within gin, you should first register it from the `train_experiment.py` script adding the following line:
-
-```
-gin.external_configurable(TreeObsForRailEnv)
-```
-
-and then a `TreeObsForRailEnv` object can be referenced in the `config.gin` file:
-
-```
-run_experiment.obs_builder = {"grid_search": [@TreeObsForRailEnv(), @GlobalObsForRailEnv()]}
-TreeObsForRailEnv.max_depth = 2
-```
-
-Note that `@TreeObsForRailEnv` references the class, while `@TreeObsForRailEnv()` references instantiates an object of this class.
-
-
-
-
-More documentation on how to use gin-config can be found on the github repository: https://github.com/google/gin-config
-
-## Run an example:
-To start a training on a 20X20 map, with different numbers of agents initialized at each episode, on can run the train_experiment.py script:
-```
-python RLLib_training/train_experiment.py
-```
-This will load the gin config file in the folder `experiment_configs/config_examples`.
-
-To visualize the result of a training, one can load a training checkpoint and use the policy learned.
-This is done in the `render_training_result.py` script. One has to modify the `CHECKPOINT_PATH` at the beginning of this script:
-
-```
-CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
-                               'checkpoint_101', 'checkpoint-101')
-```
-and load the corresponding gin config file:
-
-```
-gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin'))
-```
-
-
--- a/RLLib_training/RailEnvRLLibWrapper.py
+++ b/RLLib_training/RailEnvRLLibWrapper.py
-import numpy as np
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
-from ray.rllib.utils.seed import seed as set_seed
-
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator, random_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator, random_schedule_generator
-
-
-class RailEnvRLLibWrapper(MultiAgentEnv):
-
-    def __init__(self, config):
-
-        super(MultiAgentEnv, self).__init__()
-
-        # Environment ID if num_envs_per_worker > 1
-        if hasattr(config, "vector_index"):
-            vector_index = config.vector_index
-        else:
-            vector_index = 1
-
-        self.predefined_env = False
-
-        if config['rail_generator'] == "complex_rail_generator":
-            self.rail_generator = complex_rail_generator(nr_start_goal=config['number_of_agents'],
-                                                         min_dist=config['min_dist'],
-                                                         nr_extra=config['nr_extra'],
-                                                         seed=config['seed'] * (1 + vector_index))
-            self.schedule_generator = complex_schedule_generator()
-
-        elif config['rail_generator'] == "random_rail_generator":
-            self.rail_generator = random_rail_generator()
-            self.schedule_generator = random_schedule_generator()
-        elif config['rail_generator'] == "load_env":
-            self.predefined_env = True
-            self.rail_generator = random_rail_generator()
-            self.schedule_generator = random_schedule_generator()
-        else:
-            raise (ValueError, f'Unknown rail generator: {config["rail_generator"]}')
-
-        set_seed(config['seed'] * (1 + vector_index))
-        self.env = RailEnv(width=config["width"], height=config["height"],
-                           number_of_agents=config["number_of_agents"],
-                           obs_builder_object=config['obs_builder'],
-                           rail_generator=self.rail_generator,
-                           schedule_generator=self.schedule_generator
-                           )
-
-        if self.predefined_env:
-            self.env.load_resource('torch_training.railway', 'complex_scene.pkl')
-
-        self.width = self.env.width
-        self.height = self.env.height
-        self.step_memory = config["step_memory"]
-
-        # needed for the renderer
-        self.rail = self.env.rail
-        self.agents = self.env.agents
-        self.agents_static = self.env.agents_static
-        self.dev_obs_dict = self.env.dev_obs_dict
-
-    def reset(self):
-        self.agents_done = []
-        if self.predefined_env:
-            obs = self.env.reset(False, False)
-        else:
-            obs = self.env.reset()
-
-        # RLLib only receives observation of agents that are not done.
-        o = dict()
-
-        for i_agent in range(len(self.env.agents)):
-            data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
-                                                                         current_depth=0)
-            o[i_agent] = [data, distance, agent_data]
-
-        # needed for the renderer
-        self.rail = self.env.rail
-        self.agents = self.env.agents
-        self.agents_static = self.env.agents_static
-        self.dev_obs_dict = self.env.dev_obs_dict
-
-        # If step_memory > 1, we need to concatenate it the observations in memory, only works for
-        # step_memory = 1 or 2 for the moment
-        if self.step_memory < 2:
-            return o
-        else:
-            self.old_obs = o
-            oo = dict()
-
-            for i_agent in range(len(self.env.agents)):
-                oo[i_agent] = [o[i_agent], o[i_agent]]
-            return oo
-
-    def step(self, action_dict):
-        obs, rewards, dones, infos = self.env.step(action_dict)
-
-        d = dict()
-        r = dict()
-        o = dict()
-
-        for i_agent in range(len(self.env.agents)):
-            if i_agent not in self.agents_done:
-                data, distance, agent_data = self.env.obs_builder.split_tree(tree=np.array(obs[i_agent]),
-                                                                             current_depth=0)
-
-                o[i_agent] = [data, distance, agent_data]
-                r[i_agent] = rewards[i_agent]
-                d[i_agent] = dones[i_agent]
-
-        d['__all__'] = dones['__all__']
-
-        if self.step_memory >= 2:
-            oo = dict()
-
-            for i_agent in range(len(self.env.agents)):
-                if i_agent not in self.agents_done:
-                    oo[i_agent] = [o[i_agent], self.old_obs[i_agent]]
-
-            self.old_obs = o
-
-        for agent, done in dones.items():
-            if done and agent != '__all__':
-                self.agents_done.append(agent)
-
-        if self.step_memory < 2:
-            return o, r, d, infos
-        else:
-            return oo, r, d, infos
-
-    def get_agent_handles(self):
-        return self.env.get_agent_handles()
-
-    def get_num_agents(self):
-        return self.env.get_num_agents()
--- a/RLLib_training/__init__.py
+++ b/RLLib_training/__init__.py
--- a/RLLib_training/custom_preprocessors.py
+++ b/RLLib_training/custom_preprocessors.py
-import numpy as np
-from ray.rllib.models.preprocessors import Preprocessor
-from utils.observation_utils import norm_obs_clip
-
-class TreeObsPreprocessor(Preprocessor):
-    def _init_shape(self, obs_space, options):
-        print(options)
-        self.step_memory = options["custom_options"]["step_memory"]
-        return sum([space.shape[0] for space in obs_space]),
-
-    def transform(self, observation):
-
-        if self.step_memory == 2:
-            data = norm_obs_clip(observation[0][0])
-            distance = norm_obs_clip(observation[0][1])
-            agent_data = np.clip(observation[0][2], -1, 1)
-            data2 = norm_obs_clip(observation[1][0])
-            distance2 = norm_obs_clip(observation[1][1])
-            agent_data2 = np.clip(observation[1][2], -1, 1)
-        else:
-            data = norm_obs_clip(observation[0])
-            distance = norm_obs_clip(observation[1])
-            agent_data = np.clip(observation[2], -1, 1)
-
-        return np.concatenate((np.concatenate((np.concatenate((data, distance)), agent_data)), np.concatenate((np.concatenate((data2, distance2)), agent_data2))))
-
--- a/RLLib_training/experiment_configs/__init__.py
+++ b/RLLib_training/experiment_configs/__init__.py
--- a/RLLib_training/experiment_configs/config_example/config.gin
+++ b/RLLib_training/experiment_configs/config_example/config.gin
-run_experiment.name = "experiment_example"
-run_experiment.num_iterations = 1002
-run_experiment.save_every = 100
-run_experiment.hidden_sizes = [32, 32]
-
-run_experiment.map_width = 20
-run_experiment.map_height = 20
-run_experiment.n_agents = {"grid_search": [3, 4, 5, 6, 7, 8]}
-run_experiment.rail_generator = "complex_rail_generator" # Change this to "load_env" in order to load a predefined complex scene
-run_experiment.nr_extra = 5
-run_experiment.policy_folder_name = "ppo_policy_two_obs_with_predictions_n_agents_{config[n_agents]}_"
-
-run_experiment.seed = 123
-
-run_experiment.conv_model = False
-
-run_experiment.obs_builder = @TreeObsForRailEnv()
-TreeObsForRailEnv.predictor = @ShortestPathPredictorForRailEnv()
-TreeObsForRailEnv.max_depth = 2
-
-run_experiment.entropy_coeff = 0.001
-run_experiment.kl_coeff = 0.2 
-run_experiment.lambda_gae = 0.9
-run_experiment.step_memory = 2
-run_experiment.min_dist = 10
--- a/RLLib_training/render_training_result.py
+++ b/RLLib_training/render_training_result.py
-from RailEnvRLLibWrapper import RailEnvRLLibWrapper
-from custom_preprocessors import TreeObsPreprocessor
-import gym
-import os
-
-from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
-from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
-
-from ray.rllib.models import ModelCatalog
-
-import ray
-import numpy as np
-
-import gin
-
-from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv
-gin.external_configurable(DummyPredictorForRailEnv)
-gin.external_configurable(ShortestPathPredictorForRailEnv)
-
-from ray.rllib.utils.seed import seed as set_seed
-from flatland.envs.observations import TreeObsForRailEnv
-
-from flatland.utils.rendertools import RenderTool
-import time
-
-gin.external_configurable(TreeObsForRailEnv)
-
-ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor)
-ray.init()  # object_store_memory=150000000000, redis_max_memory=30000000000)
-
-__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
-
-CHECKPOINT_PATH = os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'ppo_policy_two_obs_with_predictions_n_agents_4_map_size_20q58l5_f7',
-                               'checkpoint_101', 'checkpoint-101')  # To Modify
-N_EPISODES = 10
-N_STEPS_PER_EPISODE = 50
-
-
-def render_training_result(config):
-    print('Init Env')
-
-    set_seed(config['seed'], config['seed'], config['seed'])
-
-    # Example configuration to generate a random rail
-    env_config = {"width": config['map_width'],
-                  "height": config['map_height'],
-                  "rail_generator": config["rail_generator"],
-                  "nr_extra": config["nr_extra"],
-                  "number_of_agents": config['n_agents'],
-                  "seed": config['seed'],
-                  "obs_builder": config['obs_builder'],
-                  "min_dist": config['min_dist'],
-                  "step_memory": config["step_memory"]}
-
-    # Observation space and action space definitions
-    if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(168,)),) * 2)
-        preprocessor = TreeObsPreprocessor
-
-    else:
-        raise ValueError("Undefined observation space")
-
-    act_space = gym.spaces.Discrete(5)
-
-    # Dict with the different policies to train
-    policy_graphs = {
-        "ppo_policy": (PolicyGraph, obs_space, act_space, {})
-    }
-
-    def policy_mapping_fn(agent_id):
-        return "ppo_policy"
-
-    # Trainer configuration
-    trainer_config = DEFAULT_CONFIG.copy()
-
-    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes']}
-
-    trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                    "policy_mapping_fn": policy_mapping_fn,
-                                    "policies_to_train": list(policy_graphs.keys())}
-
-    trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 4
-    trainer_config["num_gpus"] = 0.2
-    trainer_config["num_gpus_per_worker"] = 0.2
-    trainer_config["num_cpus_for_driver"] = 1
-    trainer_config["num_envs_per_worker"] = 1
-    trainer_config['entropy_coeff'] = config['entropy_coeff']
-    trainer_config["env_config"] = env_config
-    trainer_config["batch_mode"] = "complete_episodes"
-    trainer_config['simple_optimizer'] = False
-    trainer_config['postprocess_inputs'] = True
-    trainer_config['log_level'] = 'WARN'
-    trainer_config['num_sgd_iter'] = 10
-    trainer_config['clip_param'] = 0.2
-    trainer_config['kl_coeff'] = config['kl_coeff']
-    trainer_config['lambda'] = config['lambda_gae']
-
-    env = RailEnvRLLibWrapper(env_config)
-
-    trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config)
-
-    trainer.restore(CHECKPOINT_PATH)
-
-    policy = trainer.get_policy("ppo_policy")
-
-    preprocessor = preprocessor(obs_space, {"step_memory": config["step_memory"]})
-    env_renderer = RenderTool(env, gl="PILSVG")
-    for episode in range(N_EPISODES):
-
-        observation = env.reset()
-        for i in range(N_STEPS_PER_EPISODE):
-            preprocessed_obs = []
-            for obs in observation.values():
-                preprocessed_obs.append(preprocessor.transform(obs))
-            action, _, infos = policy.compute_actions(preprocessed_obs, [])
-            logits = infos['behaviour_logits']
-            actions = dict()
-
-            # We select the greedy action.
-            for j, logit in enumerate(logits):
-                actions[j] = np.argmax(logit)
-
-            # In case we prefer to sample an action stochastically according to the policy graph.
-            # for j, act in enumerate(action):
-                # actions[j] = act
-
-            # Time to see the rendering at one step
-            time.sleep(1)
-
-            env_renderer.renderEnv(show=True, frames=True, iEpisode=episode, iStep=i,
-                                   action_dict=list(actions.values()))
-
-            observation, _, _, _ = env.step(actions)
-
-    env_renderer.close_window()
-
-
-@gin.configurable
-def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
-                   map_width, map_height, policy_folder_name, obs_builder,
-                   entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
-                   step_memory, min_dist):
-
-    render_training_result(
-        config={"n_agents": n_agents,
-                "hidden_sizes": hidden_sizes,  # Array containing the sizes of the network layers
-                "save_every": save_every,
-                "map_width": map_width,
-                "map_height": map_height,
-                'policy_folder_name': policy_folder_name,
-                "obs_builder": obs_builder,
-                "entropy_coeff": entropy_coeff,
-                "seed": seed,
-                "conv_model": conv_model,
-                "rail_generator": rail_generator,
-                "nr_extra": nr_extra,
-                "kl_coeff": kl_coeff,
-                "lambda_gae": lambda_gae,
-                "min_dist": min_dist,
-                "step_memory": step_memory
-                }
-    )
-
-
-if __name__ == '__main__':
-    gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', 'config_example', 'config.gin'))  # To Modify
-    run_experiment()
--- a/RLLib_training/train_experiment.py
+++ b/RLLib_training/train_experiment.py
-import os
-
-import gin
-import gym
-from flatland.envs.predictions import DummyPredictorForRailEnv, ShortestPathPredictorForRailEnv
-
-# Import PPO trainer: we can replace these imports by any other trainer from RLLib.
-from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG
-from ray.rllib.agents.ppo.ppo import PPOTrainer as Trainer
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph as PolicyGraph
-from ray.rllib.models import ModelCatalog
-
-gin.external_configurable(DummyPredictorForRailEnv)
-gin.external_configurable(ShortestPathPredictorForRailEnv)
-
-import ray
-
-from ray.tune.logger import UnifiedLogger
-from ray.tune.logger import pretty_print
-import os
-
-from RailEnvRLLibWrapper import RailEnvRLLibWrapper
-import tempfile
-
-from ray import tune
-
-from ray.rllib.utils.seed import seed as set_seed
-from flatland.envs.observations import TreeObsForRailEnv
-
-gin.external_configurable(TreeObsForRailEnv)
-
-import numpy as np
-from custom_preprocessors import TreeObsPreprocessor
-
-ModelCatalog.register_custom_preprocessor("tree_obs_prep", TreeObsPreprocessor)
-ray.init()  # object_store_memory=150000000000, redis_max_memory=30000000000)
-
-__file_dirname__ = os.path.dirname(os.path.realpath(__file__))
-
-
-def on_episode_start(info):
-    episode = info['episode']
-    map_width = info['env'].envs[0].width
-    map_height = info['env'].envs[0].height
-    episode.horizon = 3*(map_width + map_height)
-
-
-def on_episode_end(info):
-    episode = info['episode']
-
-    # Calculation of a custom score metric: cum of all accumulated rewards, divided by the number of agents
-    # and the number of the maximum time steps of the episode.
-    score = 0
-    for k, v in episode._agent_reward_history.items():
-        score += np.sum(v)
-    score /= (len(episode._agent_reward_history) * episode.horizon)
-    
-    # Calculation of the proportion of solved episodes before the maximum time step
-    done = 0
-    if len(episode._agent_reward_history[0]) <= episode.horizon-5:
-        done = 1
-
-    episode.custom_metrics["score"] = score
-    episode.custom_metrics["proportion_episode_solved"] = done
-
-
-def train(config, reporter):
-    print('Init Env')
-
-    set_seed(config['seed'], config['seed'], config['seed'])
-
-    # Given the depth of the tree observation and the number of features per node we get the following state_size
-    num_features_per_node = config['obs_builder'].observation_dim
-    tree_depth = 2
-    nr_nodes = 0
-    for i in range(tree_depth + 1):
-        nr_nodes += np.power(4, i)
-    obs_size = num_features_per_node * nr_nodes
-
-
-    # Environment parameters
-    env_config = {"width": config['map_width'],
-                  "height": config['map_height'],
-                  "rail_generator": config["rail_generator"],
-                  "nr_extra": config["nr_extra"],
-                  "number_of_agents": config['n_agents'],
-                  "seed": config['seed'],
-                  "obs_builder": config['obs_builder'],
-                  "min_dist": config['min_dist'],
-                  "step_memory": config["step_memory"]}
-
-    # Observation space and action space definitions
-    if isinstance(config["obs_builder"], TreeObsForRailEnv):
-        obs_space = gym.spaces.Tuple((gym.spaces.Box(low=-float('inf'), high=float('inf'), shape=(obs_size,)),) * 2)
-        preprocessor = "tree_obs_prep"
-    else:
-        raise ValueError("Undefined observation space") # Only TreeObservation implemented for now.
-
-    act_space = gym.spaces.Discrete(5)
-
-    # Dict with the different policies to train. In this case, all trains follow the same policy
-    policy_graphs = {
-        "ppo_policy": (PolicyGraph, obs_space, act_space, {})
-    }
-
-    # Function that maps an agent id to the name of its respective policy.
-    def policy_mapping_fn(agent_id):
-        return "ppo_policy"
-
-    # Trainer configuration
-    trainer_config = DEFAULT_CONFIG.copy()
-    trainer_config['model'] = {"fcnet_hiddens": config['hidden_sizes'], "custom_preprocessor": preprocessor,
-            "custom_options": {"step_memory": config["step_memory"], "obs_size": obs_size}}
-
-    trainer_config['multiagent'] = {"policy_graphs": policy_graphs,
-                                    "policy_mapping_fn": policy_mapping_fn,
-                                    "policies_to_train": list(policy_graphs.keys())}
-
-    # Maximum time steps for an episode is set to 3*map_width*map_height
-    trainer_config["horizon"] = 3 * (config['map_width'] + config['map_height'])
-
-    # Parameters for calculation parallelization
-    trainer_config["num_workers"] = 0
-    trainer_config["num_cpus_per_worker"] = 8
-    trainer_config["num_gpus"] = 0.2
-    trainer_config["num_gpus_per_worker"] = 0.2
-    trainer_config["num_cpus_for_driver"] = 1
-    trainer_config["num_envs_per_worker"] = 1
-
-    # Parameters for PPO training
-    trainer_config['entropy_coeff'] = config['entropy_coeff']
-    trainer_config["env_config"] = env_config
-    trainer_config["batch_mode"] = "complete_episodes"
-    trainer_config['simple_optimizer'] = False
-    trainer_config['log_level'] = 'WARN'
-    trainer_config['num_sgd_iter'] = 10
-    trainer_config['clip_param'] = 0.2
-    trainer_config['kl_coeff'] = config['kl_coeff']
-    trainer_config['lambda'] = config['lambda_gae']
-    trainer_config['callbacks'] = {
-            "on_episode_start": tune.function(on_episode_start),
-            "on_episode_end": tune.function(on_episode_end)
-        }
-
-
-    def logger_creator(conf):
-        """Creates a Unified logger with a default logdir prefix."""
-        logdir = config['policy_folder_name'].format(**locals())
-        logdir = tempfile.mkdtemp(
-            prefix=logdir, dir=config['local_dir'])
-        return UnifiedLogger(conf, logdir, None)
-
-    logger = logger_creator
-
-    trainer = Trainer(env=RailEnvRLLibWrapper, config=trainer_config, logger_creator=logger)
-
-    for i in range(100000 + 2):
-        print("== Iteration", i, "==")
-
-        print(pretty_print(trainer.train()))
-
-        if i % config['save_every'] == 0:
-            checkpoint = trainer.save()
-            print("checkpoint saved at", checkpoint)
-
-        reporter(num_iterations_trained=trainer._iteration)
-
-
-@gin.configurable
-def run_experiment(name, num_iterations, n_agents, hidden_sizes, save_every,
-                   map_width, map_height, policy_folder_name, local_dir, obs_builder,
-                   entropy_coeff, seed, conv_model, rail_generator, nr_extra, kl_coeff, lambda_gae,
-                   step_memory, min_dist):
-    tune.run(
-        train,
-        name=name,
-        stop={"num_iterations_trained": num_iterations},
-        config={"n_agents": n_agents,
-                "hidden_sizes": hidden_sizes,  # Array containing the sizes of the network layers
-                "save_every": save_every,
-                "map_width": map_width,
-                "map_height": map_height,
-                "local_dir": local_dir,
-                'policy_folder_name': policy_folder_name,
-                "obs_builder": obs_builder,
-                "entropy_coeff": entropy_coeff,
-                "seed": seed,
-                "conv_model": conv_model,
-                "rail_generator": rail_generator,
-                "nr_extra": nr_extra,
-                "kl_coeff": kl_coeff,
-                "lambda_gae": lambda_gae,
-                "min_dist": min_dist,
-                "step_memory": step_memory  # If equal to two, the current observation plus
-                                            # the observation of last time step will be given as input the the model.
-                },
-        resources_per_trial={
-            "cpu": 8,
-            "gpu": 0.2
-        },
-        verbose=2,
-        local_dir=local_dir
-    )
-
-
-if __name__ == '__main__':
-    folder_name = 'config_example'  # To Modify
-    gin.parse_config_file(os.path.join(__file_dirname__, 'experiment_configs', folder_name, 'config.gin'))
-    dir = os.path.join(__file_dirname__, 'experiment_configs', folder_name)
-    run_experiment(local_dir=dir)
--- a/requirements_torch_training.txt
+++ b/requirements_torch_training.txt
-git+https://gitlab.aicrowd.com/flatland/flatland.git@42-run-baselines-in-ci
+git+https://gitlab.aicrowd.com/flatland/flatland.git
 importlib-metadata>=0.17
 importlib_resources>=1.0.2
 torch>=1.1.0
\ No newline at end of file
--- a/score_test.py
+++ b/score_test.py
@@ -20,9 +20,6 @@ nr_trials_per_test = 100
 test_results = []
 test_times = []
 test_dones = []
-# Load agent
-# agent = Agent(state_size, action_size, "FC", 0)
-# agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint1700.pth'))
 agent = RandomAgent(state_size, action_size)
 start_time_scoring = time.time()
 test_idx = 0

--- a/scoring/score_test.py
+++ b/scoring/score_test.py
@@ -28,8 +28,8 @@ test_dones = []
 sequential_agent_test = False

 # Load your agent
-agent = Agent(state_size, action_size, "FC", 0)
-agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint60000.pth'))
+agent = Agent(state_size, action_size)
+agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint500.pth'))

 # Load the necessary Observation Builder and Predictor
 predictor = ShortestPathPredictorForRailEnv()

--- a/scoring/utils/misc_utils.py
+++ b/scoring/utils/misc_utils.py
@@ -2,7 +2,6 @@ import random
 import time

 import numpy as np
-
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
@@ -66,7 +65,7 @@ def run_test(parameters, agent, observation_builder=None, observation_wrapper=No
                      number_of_agents=1,
                      )

-        obs = env.reset()
+        obs, info = env.reset()

        if observation_wrapper is not None:
            for a in range(env.get_num_agents()):
@@ -181,7 +180,7 @@ def run_test_sequential(parameters, agent, test_nr=0, tree_depth=3):
                      number_of_agents=1,
                      )

-        obs = env.reset()
+        obs, info = env.reset()
        done = env.dones
        # Run episode
        trial_score = 0

--- a/sequential_agent/run_test.py
+++ b/sequential_agent/run_test.py
 import numpy as np
-
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
 from flatland.envs.rail_generators import complex_rail_generator
 from flatland.envs.schedule_generators import complex_schedule_generator
 from flatland.utils.rendertools import RenderTool
+
 from sequential_agent.simple_order_agent import OrderedAgent

 np.random.seed(2)
@@ -49,7 +49,7 @@ action_dict = dict()
 for trials in range(1, n_trials + 1):

    # Reset environment
-    obs = env.reset(True, True)
+    obs, info = env.reset(True, True)
    done = env.dones
    env_renderer.reset()
    frame_step = 0

--- a/sequential_agent/simple_order_agent.py
+++ b/sequential_agent/simple_order_agent.py
 import numpy as np
-from utils.observation_utils import split_tree, min_gt
+from utils.observation_utils import split_tree_into_feature_groups, min_gt


 class OrderedAgent:
@@ -12,8 +12,7 @@ class OrderedAgent:
        :param state: input is the observation of the agent
        :return: returns an action
        """
-        _, distance, _ = split_tree(tree=np.array(state), num_features_per_node=11,
-                                    current_depth=0)
+        _, distance, _ = split_tree_into_feature_groups(state, 1)
        distance = distance[1:]
        min_dist = min_gt(distance, 0)
        min_direction = np.where(distance == min_dist)

--- a/setup.py
+++ b/setup.py
@@ -2,8 +2,7 @@ from setuptools import setup, find_packages

 install_reqs = []
 dependency_links = []
-# TODO: include requirements_RLLib_training.txt
-requirements_paths = ['requirements_torch_training.txt']  # , 'requirements_RLLib_training.txt']
+requirements_paths = ['requirements_torch_training.txt']
 for requirements_path in requirements_paths:
    with open(requirements_path, 'r') as f:
        install_reqs += [

--- a/tests/test_normalize_features.py
+++ b/tests/test_normalize_features.py
+import random
+
+import numpy as np
+from flatland.envs.observations import TreeObsForRailEnv
+from flatland.envs.rail_env import RailEnv
+from flatland.envs.rail_generators import complex_rail_generator
+from flatland.envs.schedule_generators import complex_schedule_generator
+
+from utils.observation_utils import normalize_observation
+
+
+def test_normalize_features():
+
+    random.seed(1)
+    np.random.seed(1)
+    max_depth = 4
+
+    for i in range(10):
+        tree_observer = TreeObsForRailEnv(max_depth=max_depth)
+        next_rand_number = random.randint(0, 100)
+
+        env = RailEnv(width=10,
+                      height=10,
+                      rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999,
+                                                            seed=next_rand_number),
+                      schedule_generator=complex_schedule_generator(),
+                      number_of_agents=1,
+                      obs_builder_object=tree_observer)
+
+        obs, all_rewards, done, _ = env.step({0: 0})
+
+        obs_new = tree_observer.get()
+        # data, distance, agent_data = split_tree(tree=np.array(obs_old), num_features_per_node=11)
+        data_normalized = normalize_observation(obs_new, max_depth, observation_radius=10)
+
+        filename = 'testdata/test_array_{}.csv'.format(i)
+        data_loaded = np.loadtxt(filename, delimiter=',')
+
+        assert np.allclose(data_loaded, data_normalized)
+
--- a/tests/testdata/test_array_0.csv
+++ b/tests/testdata/test_array_0.csv
No results found