Compare revisions

3e8a8194 · 3e8a8194 · 3e8a8194 · c977124a · c977124a · c977124a
--- a/torch_training/observation_builders/observations.py
+++ b/torch_training/observation_builders/observations.py
-"""
-Collection of environment-specific ObservationBuilder.
-"""
-import pprint
-from collections import deque
-
-import numpy as np
-
-from flatland.core.env_observation_builder import ObservationBuilder
-from flatland.core.grid.grid4 import Grid4TransitionsEnum
-from flatland.core.grid.grid_utils import coordinate_to_position
-
-
-class TreeObsForRailEnv(ObservationBuilder):
-    """
-    TreeObsForRailEnv object.
-
-    This object returns observation vectors for agents in the RailEnv environment.
-    The information is local to each agent and exploits the graph structure of the rail
-    network to simplify the representation of the state of the environment for each agent.
-
-    For details about the features in the tree observation see the get() function.
-    """
-
-    def __init__(self, max_depth, predictor=None):
-        super().__init__()
-        self.max_depth = max_depth
-        self.observation_dim = 9
-        # Compute the size of the returned observation vector
-        size = 0
-        pow4 = 1
-        for i in range(self.max_depth + 1):
-            size += pow4
-            pow4 *= 4
-        self.observation_dim = 9
-        self.observation_space = [size * self.observation_dim]
-        self.location_has_agent = {}
-        self.location_has_agent_direction = {}
-        self.predictor = predictor
-        self.agents_previous_reset = None
-        self.tree_explored_actions = [1, 2, 3, 0]
-        self.tree_explorted_actions_char = ['L', 'F', 'R', 'B']
-        self.distance_map = None
-        self.distance_map_computed = False
-
-    def reset(self):
-        agents = self.env.agents
-        nb_agents = len(agents)
-        compute_distance_map = True
-        if self.agents_previous_reset is not None and nb_agents == len(self.agents_previous_reset):
-            compute_distance_map = False
-            for i in range(nb_agents):
-                if agents[i].target != self.agents_previous_reset[i].target:
-                    compute_distance_map = True
-        # Don't compute the distance map if it was loaded
-        if self.agents_previous_reset is None and self.distance_map is not None:
-            self.location_has_target = {tuple(agent.target): 1 for agent in agents}
-            compute_distance_map = False
-
-        if compute_distance_map:
-            self._compute_distance_map()
-
-        self.agents_previous_reset = agents
-
-    def _compute_distance_map(self):
-        agents = self.env.agents
-        # For testing only --> To assert if a distance map need to be recomputed.
-        self.distance_map_computed = True
-        nb_agents = len(agents)
-        self.distance_map = np.inf * np.ones(shape=(nb_agents,
-                                                    self.env.height,
-                                                    self.env.width,
-                                                    4))
-        self.max_dist = np.zeros(nb_agents)
-        self.max_dist = [self._distance_map_walker(agent.target, i) for i, agent in enumerate(agents)]
-        # Update local lookup table for all agents' target locations
-        self.location_has_target = {tuple(agent.target): 1 for agent in agents}
-
-    def _distance_map_walker(self, position, target_nr):
-        """
-        Utility function to compute distance maps from each cell in the rail network (and each possible
-        orientation within it) to each agent's target cell.
-        """
-        # Returns max distance to target, from the farthest away node, while filling in distance_map
-        self.distance_map[target_nr, position[0], position[1], :] = 0
-
-        # Fill in the (up to) 4 neighboring nodes
-        # direction is the direction of movement, meaning that at least a possible orientation of an agent
-        # in cell (row,col) allows a movement in direction `direction'
-        nodes_queue = deque(self._get_and_update_neighbors(position, target_nr, 0, enforce_target_direction=-1))
-
-        # BFS from target `position' to all the reachable nodes in the grid
-        # Stop the search if the target position is re-visited, in any direction
-        visited = {(position[0], position[1], 0), (position[0], position[1], 1), (position[0], position[1], 2),
-                   (position[0], position[1], 3)}
-
-        max_distance = 0
-
-        while nodes_queue:
-            node = nodes_queue.popleft()
-
-            node_id = (node[0], node[1], node[2])
-
-            if node_id not in visited:
-                visited.add(node_id)
-
-                # From the list of possible neighbors that have at least a path to the current node, only keep those
-                # whose new orientation in the current cell would allow a transition to direction node[2]
-                valid_neighbors = self._get_and_update_neighbors((node[0], node[1]), target_nr, node[3], node[2])
-
-                for n in valid_neighbors:
-                    nodes_queue.append(n)
-
-                if len(valid_neighbors) > 0:
-                    max_distance = max(max_distance, node[3] + 1)
-
-        return max_distance
-
-    def _get_and_update_neighbors(self, position, target_nr, current_distance, enforce_target_direction=-1):
-        """
-        Utility function used by _distance_map_walker to perform a BFS walk over the rail, filling in the
-        minimum distances from each target cell.
-        """
-        neighbors = []
-
-        possible_directions = [0, 1, 2, 3]
-        if enforce_target_direction >= 0:
-            # The agent must land into the current cell with orientation `enforce_target_direction'.
-            # This is only possible if the agent has arrived from the cell in the opposite direction!
-            possible_directions = [(enforce_target_direction + 2) % 4]
-
-        for neigh_direction in possible_directions:
-            new_cell = self._new_position(position, neigh_direction)
-
-            if new_cell[0] >= 0 and new_cell[0] < self.env.height and new_cell[1] >= 0 and new_cell[1] < self.env.width:
-
-                desired_movement_from_new_cell = (neigh_direction + 2) % 4
-
-                # Check all possible transitions in new_cell
-                for agent_orientation in range(4):
-                    # Is a transition along movement `desired_movement_from_new_cell' to the current cell possible?
-                    is_valid = self.env.rail.get_transition((new_cell[0], new_cell[1], agent_orientation),
-                                                            desired_movement_from_new_cell)
-
-                    if is_valid:
-                        """
-                        # TODO: check that it works with deadends! -- still bugged!
-                        movement = desired_movement_from_new_cell
-                        if isNextCellDeadEnd:
-                            movement = (desired_movement_from_new_cell+2) % 4
-                        """
-                        new_distance = min(self.distance_map[target_nr, new_cell[0], new_cell[1], agent_orientation],
-                                           current_distance + 1)
-                        neighbors.append((new_cell[0], new_cell[1], agent_orientation, new_distance))
-                        self.distance_map[target_nr, new_cell[0], new_cell[1], agent_orientation] = new_distance
-
-        return neighbors
-
-    def _new_position(self, position, movement):
-        """
-        Utility function that converts a compass movement over a 2D grid to new positions (r, c).
-        """
-        if movement == Grid4TransitionsEnum.NORTH:
-            return (position[0] - 1, position[1])
-        elif movement == Grid4TransitionsEnum.EAST:
-            return (position[0], position[1] + 1)
-        elif movement == Grid4TransitionsEnum.SOUTH:
-            return (position[0] + 1, position[1])
-        elif movement == Grid4TransitionsEnum.WEST:
-            return (position[0], position[1] - 1)
-
-    def get_many(self, handles=None):
-        """
-        Called whenever an observation has to be computed for the `env' environment, for each agent with handle
-        in the `handles' list.
-        """
-
-        if handles is None:
-            handles = []
-        if self.predictor:
-            self.max_prediction_depth = 0
-            self.predicted_pos = {}
-            self.predicted_dir = {}
-            self.predictions = self.predictor.get(custom_args={'distance_map': self.distance_map})
-            if self.predictions:
-
-                for t in range(len(self.predictions[0])):
-                    pos_list = []
-                    dir_list = []
-                    for a in handles:
-                        pos_list.append(self.predictions[a][t][1:3])
-                        dir_list.append(self.predictions[a][t][3])
-                    self.predicted_pos.update({t: coordinate_to_position(self.env.width, pos_list)})
-                    self.predicted_dir.update({t: dir_list})
-                self.max_prediction_depth = len(self.predicted_pos)
-        observations = {}
-        for h in handles:
-            observations[h] = self.get(h)
-        return observations
-
-    def get(self, handle):
-        """
-        Computes the current observation for agent `handle' in env
-
-        The observation vector is composed of 4 sequential parts, corresponding to data from the up to 4 possible
-        movements in a RailEnv (up to because only a subset of possible transitions are allowed in RailEnv).
-        The possible movements are sorted relative to the current orientation of the agent, rather than NESW as for
-        the transitions. The order is:
-            [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
-
-        Each branch data is organized as:
-            [root node information] +
-            [recursive branch data from 'left'] +
-            [... from 'forward'] +
-            [... from 'right] +
-            [... from 'back']
-
-        Each node information is composed of 9 features:
-
-        #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored.
-
-        #2: if another agents target is detected the distance in number of cells from the agents current locaiton
-            is stored
-
-        #3: if another agent is detected the distance in number of cells from current agent position is stored.
-
-        #4: possible conflict detected
-            tot_dist = Other agent predicts to pass along this cell at the same time as the agent, we store the
-             distance in number of cells from current agent position
-
-            0 = No other agent reserve the same cell at similar time
-
-        #5: if an not usable switch (for agent) is detected we store the distance.
-
-        #6: This feature stores the distance in number of cells to the next branching  (current node)
-
-        #7: minimum distance from node to the agent's target given the direction of the agent if this path is chosen
-
-        #8: agent in the same direction
-            n = number of agents present same direction
-                (possible future use: number of other agents in the same direction in this branch)
-            0 = no agent present same direction
-
-        #9: agent in the opposite direction
-            n = number of agents present other direction than myself (so conflict)
-                (possible future use: number of other agents in other direction in this branch, ie. number of conflicts)
-            0 = no agent present other direction than myself
-
-
-
-
-        Missing/padding nodes are filled in with -inf (truncated).
-        Missing values in present node are filled in with +inf (truncated).
-
-
-        In case of the root node, the values are [0, 0, 0, 0, distance from agent to target].
-        In case the target node is reached, the values are [0, 0, 0, 0, 0].
-        """
-
-        # Update local lookup table for all agents' positions
-        self.location_has_agent = {tuple(agent.position): 1 for agent in self.env.agents}
-        self.location_has_agent_direction = {tuple(agent.position): agent.direction for agent in self.env.agents}
-        if handle > len(self.env.agents):
-            print("ERROR: obs _get - handle ", handle, " len(agents)", len(self.env.agents))
-        agent = self.env.agents[handle]  # TODO: handle being treated as index
-        possible_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
-        num_transitions = np.count_nonzero(possible_transitions)
-
-        # Root node - current position
-        observation = [0, 0, 0, 0, 0, 0, self.distance_map[(handle, *agent.position, agent.direction)], 0, 0]
-
-        visited = set()
-        # Start from the current orientation, and see which transitions are available;
-        # organize them as [left, forward, right, back], relative to the current orientation
-        # If only one transition is possible, the tree is oriented with this transition as the forward branch.
-        orientation = agent.direction
-
-        if num_transitions == 1:
-            orientation = np.argmax(possible_transitions)
-
-        for branch_direction in [(orientation + i) % 4 for i in range(-1, 3)]:
-            if possible_transitions[branch_direction]:
-                new_cell = self._new_position(agent.position, branch_direction)
-                branch_observation, branch_visited = \
-                    self._explore_branch(handle, new_cell, branch_direction, 1, 1)
-                observation = observation + branch_observation
-                visited = visited.union(branch_visited)
-            else:
-                # add cells filled with infinity if no transition is possible
-                observation = observation + [-np.inf] * self._num_cells_to_fill_in(self.max_depth)
-        self.env.dev_obs_dict[handle] = visited
-        return observation
-
-    def _num_cells_to_fill_in(self, remaining_depth):
-        """Computes the length of observation vector: sum_{i=0,depth-1} 2^i * observation_dim."""
-        num_observations = 0
-        pow4 = 1
-        for i in range(remaining_depth):
-            num_observations += pow4
-            pow4 *= 4
-        return num_observations * self.observation_dim
-
-    def _explore_branch(self, handle, position, direction, tot_dist, depth):
-        """
-        Utility function to compute tree-based observations.
-        We walk along the branch and collect the information documented in the get() function.
-        If there is a branching point a new node is created and each possible branch is explored.
-        """
-        # [Recursive branch opened]
-        if depth >= self.max_depth + 1:
-            return [], []
-
-        # Continue along direction until next switch or
-        # until no transitions are possible along the current direction (i.e., dead-ends)
-        # We treat dead-ends as nodes, instead of going back, to avoid loops
-        exploring = True
-        last_is_switch = False
-        last_is_dead_end = False
-        last_is_terminal = False  # wrong cell OR cycle;  either way, we don't want the agent to land here
-        last_is_target = False
-
-        visited = set()
-        agent = self.env.agents[handle]
-        own_target_encountered = np.inf
-        other_agent_encountered = np.inf
-        other_target_encountered = np.inf
-        potential_conflict = np.inf
-        unusable_switch = np.inf
-        other_agent_same_direction = 0
-        other_agent_opposite_direction = 0
-
-        num_steps = 1
-        while exploring:
-            # #############################
-            # #############################
-            # Modify here to compute any useful data required to build the end node's features. This code is called
-            # for each cell visited between the previous branching node and the next switch / target / dead-end.
-            if position in self.location_has_agent:
-                if tot_dist < other_agent_encountered:
-                    other_agent_encountered = tot_dist
-
-                if self.location_has_agent_direction[position] == direction:
-                    # Cummulate the number of agents on branch with same direction
-                    other_agent_same_direction += 1
-
-                if self.location_has_agent_direction[position] != direction:
-                    # Cummulate the number of agents on branch with other direction
-                    other_agent_opposite_direction += 1
-
-            # Check number of possible transitions for agent and total number of transitions in cell (type)
-            cell_transitions = self.env.rail.get_transitions(*position, direction)
-            transition_bit = bin(self.env.rail.get_full_transitions(*position))
-            total_transitions = transition_bit.count("1")
-            crossing_found = False
-            if int(transition_bit, 2) == int('1000010000100001', 2):
-                crossing_found = True
-
-            # Register possible future conflict
-            if self.predictor and num_steps < self.max_prediction_depth:
-                int_position = coordinate_to_position(self.env.width, [position])
-                if tot_dist < self.max_prediction_depth:
-                    pre_step = max(0, tot_dist - 1)
-                    post_step = min(self.max_prediction_depth - 1, tot_dist + 1)
-
-                    # Look for conflicting paths at distance num_step
-                    if int_position in np.delete(self.predicted_pos[tot_dist], handle, 0):
-                        conflicting_agent = np.where(self.predicted_pos[tot_dist] == int_position)
-                        for ca in conflicting_agent[0]:
-                            if direction != self.predicted_dir[tot_dist][ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-                            if self.env.dones[ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-
-                    # Look for conflicting paths at distance num_step-1
-                    elif int_position in np.delete(self.predicted_pos[pre_step], handle, 0):
-                        conflicting_agent = np.where(self.predicted_pos[pre_step] == int_position)
-                        for ca in conflicting_agent[0]:
-                            if direction != self.predicted_dir[pre_step][ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-                            if self.env.dones[ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-
-                    # Look for conflicting paths at distance num_step+1
-                    elif int_position in np.delete(self.predicted_pos[post_step], handle, 0):
-                        conflicting_agent = np.where(self.predicted_pos[post_step] == int_position)
-                        for ca in conflicting_agent[0]:
-                            if direction != self.predicted_dir[post_step][ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-                            if self.env.dones[ca] and tot_dist < potential_conflict:
-                                potential_conflict = tot_dist
-
-            if position in self.location_has_target and position != agent.target:
-                if tot_dist < other_target_encountered:
-                    other_target_encountered = tot_dist
-
-            if position == agent.target and tot_dist < own_target_encountered:
-                own_target_encountered = tot_dist
-
-            # #############################
-            # #############################
-            if (position[0], position[1], direction) in visited:
-                last_is_terminal = True
-                break
-            visited.add((position[0], position[1], direction))
-
-            # If the target node is encountered, pick that as node. Also, no further branching is possible.
-            if np.array_equal(position, self.env.agents[handle].target):
-                last_is_target = True
-                break
-
-            # Check if crossing is found --> Not an unusable switch
-            if crossing_found:
-                # Treat the crossing as a straight rail cell
-                total_transitions = 2
-            num_transitions = np.count_nonzero(cell_transitions)
-
-            exploring = False
-
-            # Detect Switches that can only be used by other agents.
-            if total_transitions > 2 > num_transitions and tot_dist < unusable_switch:
-                unusable_switch = tot_dist
-
-            if num_transitions == 1:
-                # Check if dead-end, or if we can go forward along direction
-                nbits = total_transitions
-                if nbits == 1:
-                    # Dead-end!
-                    last_is_dead_end = True
-
-                if not last_is_dead_end:
-                    # Keep walking through the tree along `direction'
-                    exploring = True
-                    # convert one-hot encoding to 0,1,2,3
-                    direction = np.argmax(cell_transitions)
-                    position = self._new_position(position, direction)
-                    num_steps += 1
-                    tot_dist += 1
-            elif num_transitions > 0:
-                # Switch detected
-                last_is_switch = True
-                break
-
-            elif num_transitions == 0:
-                # Wrong cell type, but let's cover it and treat it as a dead-end, just in case
-                print("WRONG CELL TYPE detected in tree-search (0 transitions possible) at cell", position[0],
-                      position[1], direction)
-                last_is_terminal = True
-                break
-
-        # `position' is either a terminal node or a switch
-
-        # #############################
-        # #############################
-        # Modify here to append new / different features for each visited cell!
-
-        if last_is_target:
-            observation = [own_target_encountered,
-                           other_target_encountered,
-                           other_agent_encountered,
-                           potential_conflict,
-                           unusable_switch,
-                           tot_dist,
-                           0,
-                           other_agent_same_direction,
-                           other_agent_opposite_direction
-                           ]
-
-        elif last_is_terminal:
-            observation = [own_target_encountered,
-                           other_target_encountered,
-                           other_agent_encountered,
-                           potential_conflict,
-                           unusable_switch,
-                           np.inf,
-                           self.distance_map[handle, position[0], position[1], direction],
-                           other_agent_same_direction,
-                           other_agent_opposite_direction
-                           ]
-
-        else:
-            observation = [own_target_encountered,
-                           other_target_encountered,
-                           other_agent_encountered,
-                           potential_conflict,
-                           unusable_switch,
-                           tot_dist,
-                           self.distance_map[handle, position[0], position[1], direction],
-                           other_agent_same_direction,
-                           other_agent_opposite_direction,
-                           ]
-        # #############################
-        # #############################
-        # Start from the current orientation, and see which transitions are available;
-        # organize them as [left, forward, right, back], relative to the current orientation
-        # Get the possible transitions
-        possible_transitions = self.env.rail.get_transitions(*position, direction)
-        for branch_direction in [(direction + 4 + i) % 4 for i in range(-1, 3)]:
-            if last_is_dead_end and self.env.rail.get_transition((*position, direction),
-                                                                 (branch_direction + 2) % 4):
-                # Swap forward and back in case of dead-end, so that an agent can learn that going forward takes
-                # it back
-                new_cell = self._new_position(position, (branch_direction + 2) % 4)
-                branch_observation, branch_visited = self._explore_branch(handle,
-                                                                          new_cell,
-                                                                          (branch_direction + 2) % 4,
-                                                                          tot_dist + 1,
-                                                                          depth + 1)
-                observation = observation + branch_observation
-                if len(branch_visited) != 0:
-                    visited = visited.union(branch_visited)
-            elif last_is_switch and possible_transitions[branch_direction]:
-                new_cell = self._new_position(position, branch_direction)
-                branch_observation, branch_visited = self._explore_branch(handle,
-                                                                          new_cell,
-                                                                          branch_direction,
-                                                                          tot_dist + 1,
-                                                                          depth + 1)
-                observation = observation + branch_observation
-                if len(branch_visited) != 0:
-                    visited = visited.union(branch_visited)
-            else:
-                # no exploring possible, add just cells with infinity
-                observation = observation + [-np.inf] * self._num_cells_to_fill_in(self.max_depth - depth)
-
-        return observation, visited
-
-    def util_print_obs_subtree(self, tree):
-        """
-        Utility function to pretty-print tree observations returned by this object.
-        """
-        pp = pprint.PrettyPrinter(indent=4)
-        pp.pprint(self.unfold_observation_tree(tree))
-
-    def unfold_observation_tree(self, tree, current_depth=0, actions_for_display=True):
-        """
-        Utility function to pretty-print tree observations returned by this object.
-        """
-        if len(tree) < self.observation_dim:
-            return
-
-        depth = 0
-        tmp = len(tree) / self.observation_dim - 1
-        pow4 = 4
-        while tmp > 0:
-            tmp -= pow4
-            depth += 1
-            pow4 *= 4
-
-        unfolded = {}
-        unfolded[''] = tree[0:self.observation_dim]
-        child_size = (len(tree) - self.observation_dim) // 4
-        for child in range(4):
-            child_tree = tree[(self.observation_dim + child * child_size):
-                              (self.observation_dim + (child + 1) * child_size)]
-            observation_tree = self.unfold_observation_tree(child_tree, current_depth=current_depth + 1)
-            if observation_tree is not None:
-                if actions_for_display:
-                    label = self.tree_explorted_actions_char[child]
-                else:
-                    label = self.tree_explored_actions[child]
-                unfolded[label] = observation_tree
-        return unfolded
-
-    def _set_env(self, env):
-        self.env = env
-        if self.predictor:
-            self.predictor._set_env(self.env)
--- a/torch_training/predictors/__init__.py
+++ b/torch_training/predictors/__init__.py
--- a/torch_training/predictors/predictions.py
+++ b/torch_training/predictors/predictions.py
-"""
-Collection of environment-specific PredictionBuilder.
-"""
-
-import numpy as np
-
-from flatland.core.env_prediction_builder import PredictionBuilder
-from flatland.core.grid.grid4_utils import get_new_position
-from flatland.envs.rail_env import RailEnvActions
-
-class ShortestPathPredictorForRailEnv(PredictionBuilder):
-    """
-    ShortestPathPredictorForRailEnv object.
-
-    This object returns shortest-path predictions for agents in the RailEnv environment.
-    The prediction acts as if no other agent is in the environment and always takes the forward action.
-    """
-
-    def __init__(self, max_depth):
-        self.max_depth = max_depth
-
-    def get(self, custom_args=None, handle=None):
-        """
-        Called whenever get_many in the observation build is called.
-        Requires distance_map to extract the shortest path.
-
-        Parameters
-        -------
-        custom_args: dict
-            - distance_map : dict
-        handle : int (optional)
-            Handle of the agent for which to compute the observation vector.
-
-        Returns
-        -------
-        np.array
-            Returns a dictionary indexed by the agent handle and for each agent a vector of (max_depth + 1)x5 elements:
-            - time_offset
-            - position axis 0
-            - position axis 1
-            - direction
-            - action taken to come here
-            The prediction at 0 is the current position, direction etc.
-        """
-        agents = self.env.agents
-        if handle:
-            agents = [self.env.agents[handle]]
-        assert custom_args is not None
-        distance_map = custom_args.get('distance_map')
-        assert distance_map is not None
-
-        prediction_dict = {}
-        for agent in agents:
-            _agent_initial_position = agent.position
-            _agent_initial_direction = agent.direction
-            prediction = np.zeros(shape=(self.max_depth + 1, 5))
-            prediction[0] = [0, *_agent_initial_position, _agent_initial_direction, 0]
-            visited = set()
-
-            for index in range(1, self.max_depth + 1):
-                # if we're at the target, stop moving...
-                if agent.position == agent.target:
-                    prediction[index] = [index, *agent.target, agent.direction, RailEnvActions.STOP_MOVING]
-                    visited.add((agent.position[0], agent.position[1], agent.direction))
-                    continue
-                if not agent.moving:
-                    prediction[index] = [index, *agent.position, agent.direction, RailEnvActions.STOP_MOVING]
-                    visited.add((agent.position[0], agent.position[1], agent.direction))
-                    continue
-                # Take shortest possible path
-                cell_transitions = self.env.rail.get_transitions(*agent.position, agent.direction)
-
-                new_position = None
-                new_direction = None
-                if np.sum(cell_transitions) == 1:
-                    new_direction = np.argmax(cell_transitions)
-                    new_position = get_new_position(agent.position, new_direction)
-                elif np.sum(cell_transitions) > 1:
-                    min_dist = np.inf
-                    no_dist_found = True
-                    for direction in range(4):
-                        if cell_transitions[direction] == 1:
-                            neighbour_cell = get_new_position(agent.position, direction)
-                            target_dist = distance_map[agent.handle, neighbour_cell[0], neighbour_cell[1], direction]
-                            if target_dist < min_dist or no_dist_found:
-                                min_dist = target_dist
-                                new_direction = direction
-                                no_dist_found = False
-                    new_position = get_new_position(agent.position, new_direction)
-                else:
-                    raise Exception("No transition possible {}".format(cell_transitions))
-
-                # update the agent's position and direction
-                agent.position = new_position
-                agent.direction = new_direction
-
-                # prediction is ready
-                prediction[index] = [index, *new_position, new_direction, 0]
-                visited.add((new_position[0], new_position[1], new_direction))
-            self.env.dev_pred_dict[agent.handle] = visited
-            prediction_dict[agent.handle] = prediction
-
-            # cleanup: reset initial position
-            agent.position = _agent_initial_position
-            agent.direction = _agent_initial_direction
-
-        return prediction_dict
--- a/torch_training/render_agent_behavior.py
+++ b/torch_training/render_agent_behavior.py
@@ -3,17 +3,17 @@ from collections import deque

 import numpy as np
 import torch
-from importlib_resources import path
-
-import torch_training.Nets
+from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
 from flatland.envs.observations import TreeObsForRailEnv
-from flatland.envs.predictions import ShortestPathPredictorForRailEnv
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.envs.schedule_generators import sparse_schedule_generator
 from flatland.utils.rendertools import RenderTool
+from importlib_resources import path
+
+import torch_training.Nets
 from torch_training.dueling_double_dqn import Agent
-from utils.observation_utils import norm_obs_clip, split_tree
+from utils.observation_utils import normalize_observation

 random.seed(1)
 np.random.seed(1)
@@ -27,28 +27,53 @@ x_dim = env.width
 y_dim = env.height
 """

-x_dim = np.random.randint(8, 20)
-y_dim = np.random.randint(8, 20)
-n_agents = np.random.randint(3, 8)
-n_goals = n_agents + np.random.randint(0, 3)
-min_dist = int(0.75 * min(x_dim, y_dim))
+# Parameters for the Environment
+x_dim = 25
+y_dim = 25
+n_agents = 1
+n_goals = 5
+min_dist = 5
+
+# We are training an Agent using the Tree Observation with depth 2
+observation_builder = TreeObsForRailEnv(max_depth=2)
+
+# Use a the malfunction generator to break agents from time to time
+stochastic_data = MalfunctionParameters(malfunction_rate=1./10000,  # Rate of malfunction occurence
+                                        min_duration=15,  # Minimal duration of malfunction
+                                        max_duration=50  # Max duration of malfunction
+                                        )
+
+# Custom observation builder
+TreeObservation = TreeObsForRailEnv(max_depth=2)
+
+# Different agent types (trains) with different speeds.
+speed_ration_map = {1.: 1.,  # Fast passenger train
+                    1. / 2.: 0.0,  # Fast freight train
+                    1. / 3.: 0.0,  # Slow commuter train
+                    1. / 4.: 0.0}  # Slow freight train

 env = RailEnv(width=x_dim,
              height=y_dim,
-              rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
-                                                    max_dist=99999,
-                                                    seed=0),
-              schedule_generator=complex_schedule_generator(),
-              obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()),
-              number_of_agents=n_agents)
-env.reset(True, True)
-
-observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())
+              rail_generator=sparse_rail_generator(max_num_cities=3,
+                                                   # Number of cities in map (where train stations are)
+                                                   seed=1,  # Random seed
+                                                   grid_mode=False,
+                                                   max_rails_between_cities=2,
+                                                   max_rails_in_city=4),
+              schedule_generator=sparse_schedule_generator(speed_ration_map),
+              number_of_agents=n_agents,
+              malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
+              obs_builder_object=TreeObservation)
+env.reset(True,True)
+
 env_renderer = RenderTool(env, gl="PILSVG", )
 num_features_per_node = env.obs_builder.observation_dim
-handle = env.get_agent_handles()
-features_per_node = 9
-state_size = features_per_node * 85 * 2
+
+tree_depth = 2
+nr_nodes = 0
+for i in range(tree_depth + 1):
+    nr_nodes += np.power(4, i)
+state_size = num_features_per_node * nr_nodes
 action_size = 5

 # We set the number of episodes we would like to train on
@@ -62,14 +87,13 @@ action_dict = dict()
 final_action_dict = dict()
 scores_window = deque(maxlen=100)
 done_window = deque(maxlen=100)
-time_obs = deque(maxlen=2)
 scores = []
 dones_list = []
 action_prob = [0] * action_size
 agent_obs = [None] * env.get_num_agents()
 agent_next_obs = [None] * env.get_num_agents()
-agent = Agent(state_size, action_size, "FC", 0)
-with path(torch_training.Nets, "avoid_checkpoint49700.pth") as file_in:
+agent = Agent(state_size, action_size)
+with path(torch_training.Nets, "navigator_checkpoint1000.pth") as file_in:
    agent.qnetwork_local.load_state_dict(torch.load(file_in))

 record_images = False
@@ -78,58 +102,38 @@ frame_step = 0
 for trials in range(1, n_trials + 1):

    # Reset environment
-    obs = env.reset(True, True)
-
-    env_renderer.set_new_rail()
-    obs_original = obs.copy()
-    final_obs = obs.copy()
-    final_obs_next = obs.copy()
-    for a in range(env.get_num_agents()):
-        data, distance, agent_data = split_tree(tree=np.array(obs[a]), num_features_per_node=num_features_per_node,
-                                                current_depth=0)
-        data = norm_obs_clip(data)
-        distance = norm_obs_clip(distance)
-        agent_data = np.clip(agent_data, -1, 1)
-        obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-        agent_data = env.agents[a]
-        speed = 1  # np.random.randint(1,5)
-        agent_data.speed_data['speed'] = 1. / speed
-
-    for i in range(2):
-        time_obs.append(obs)
-    # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
+    obs, info = env.reset(True, True)
+    env_renderer.reset()
+    # Build agent specific observations
    for a in range(env.get_num_agents()):
-        agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
+        agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
+    # Reset score and done
+    score = 0
+    env_done = 0

    # Run episode
    for step in range(max_steps):
-        env_renderer.render_env(show=True, show_observations=False, show_predictions=True)
-
-        if record_images:
-            env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
-            frame_step += 1

        # Action
        for a in range(env.get_num_agents()):
-            # action = agent.act(np.array(obs[a]), eps=eps)
-            action = agent.act(agent_obs[a], eps=0)
+            if info['action_required'][a]:
+                action = agent.act(agent_obs[a], eps=0.)
+
+            else:
+                action = 0
+
+            action_prob[action] += 1
            action_dict.update({a: action})
        # Environment step
+        obs, all_rewards, done, _ = env.step(action_dict)

-        next_obs, all_rewards, done, _ = env.step(action_dict)
-        # print(all_rewards,action)
-        obs_original = next_obs.copy()
-        for a in range(env.get_num_agents()):
-            data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
-                                                    num_features_per_node=num_features_per_node,
-                                                    current_depth=0)
-            data = norm_obs_clip(data)
-            distance = norm_obs_clip(distance)
-            agent_data = np.clip(agent_data, -1, 1)
-            next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
-        time_obs.append(next_obs)
+        env_renderer.render_env(show=True, show_predictions=True, show_observations=False)
+        # Build agent specific observations and normalize
        for a in range(env.get_num_agents()):
-            agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
-        agent_obs = agent_next_obs.copy()
+            if obs[a]:
+                agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
+
+
        if done['__all__']:
            break
+
--- a/torch_training/training_navigation.py
+++ b/torch_training/training_navigation.py
@@ -2,19 +2,25 @@ import getopt
 import random
 import sys
 from collections import deque
+# make sure the root path is in system path
+from pathlib import Path
+
+from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
+
+base_dir = Path(__file__).resolve().parent.parent
+sys.path.append(str(base_dir))

 import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from dueling_double_dqn import Agent
+from torch_training.dueling_double_dqn import Agent

-from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.rail_env import RailEnv
-from flatland.envs.rail_generators import complex_rail_generator
-from flatland.envs.schedule_generators import complex_schedule_generator
+from flatland.envs.rail_generators import sparse_rail_generator
+from flatland.envs.schedule_generators import sparse_schedule_generator
 from flatland.utils.rendertools import RenderTool
-from utils.observation_utils import norm_obs_clip, split_tree
-
+from utils.observation_utils import normalize_observation
+from flatland.envs.observations import TreeObsForRailEnv

 def main(argv):
    try:
@@ -30,29 +36,44 @@ def main(argv):
    np.random.seed(1)

    # Parameters for the Environment
-    x_dim = 10
-    y_dim = 10
+    x_dim = 35
+    y_dim = 35
    n_agents = 1
-    n_goals = 5
-    min_dist = 5

-    # We are training an Agent using the Tree Observation with depth 2
-    observation_builder = TreeObsForRailEnv(max_depth=2)

-    # Load the Environment
+    # Use a the malfunction generator to break agents from time to time
+    stochastic_data = MalfunctionParameters(malfunction_rate=1./10000,  # Rate of malfunction occurence
+                                            min_duration=15,  # Minimal duration of malfunction
+                                            max_duration=50  # Max duration of malfunction
+                                            )
+
+
+    # Custom observation builder
+    TreeObservation = TreeObsForRailEnv(max_depth=2)
+
+    # Different agent types (trains) with different speeds.
+    speed_ration_map = {1.: 0.,  # Fast passenger train
+                        1. / 2.: 1.0,  # Fast freight train
+                        1. / 3.: 0.0,  # Slow commuter train
+                        1. / 4.: 0.0}  # Slow freight train
+
    env = RailEnv(width=x_dim,
                  height=y_dim,
-                  rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
-                                                        max_dist=99999,
-                                                        seed=0),
-                  schedule_generator=complex_schedule_generator(),
-                  obs_builder_object=observation_builder,
-                  number_of_agents=n_agents)
-    env.reset(True, True)
-
+                  rail_generator=sparse_rail_generator(max_num_cities=3,
+                                                       # Number of cities in map (where train stations are)
+                                                       seed=1,  # Random seed
+                                                       grid_mode=False,
+                                                       max_rails_between_cities=2,
+                                                       max_rails_in_city=3),
+                  schedule_generator=sparse_schedule_generator(speed_ration_map),
+                  number_of_agents=n_agents,
+                  malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
+                  # Malfunction data generator
+                  obs_builder_object=TreeObservation)
+    # Reset env
+    env.reset(True,True)
    # After training we want to render the results so we also load a renderer
    env_renderer = RenderTool(env, gl="PILSVG", )
-
    # Given the depth of the tree observation and the number of features per node we get the following state_size
    num_features_per_node = env.obs_builder.observation_dim
    tree_depth = 2
@@ -66,7 +87,7 @@ def main(argv):

    # We set the number of episodes we would like to train on
    if 'n_trials' not in locals():
-        n_trials = 6000
+        n_trials = 15000

    # And the max number of steps we want to take per episode
    max_steps = int(3 * (env.height + env.width))
@@ -81,35 +102,28 @@ def main(argv):
    final_action_dict = dict()
    scores_window = deque(maxlen=100)
    done_window = deque(maxlen=100)
-    time_obs = deque(maxlen=2)
    scores = []
    dones_list = []
    action_prob = [0] * action_size
    agent_obs = [None] * env.get_num_agents()
    agent_next_obs = [None] * env.get_num_agents()
-
+    agent_obs_buffer = [None] * env.get_num_agents()
+    agent_action_buffer = [2] * env.get_num_agents()
+    cummulated_reward = np.zeros(env.get_num_agents())
+    update_values = False
    # Now we load a Double dueling DQN agent
-    agent = Agent(state_size, action_size, "FC", 0)
-
-    Training = True
+    agent = Agent(state_size, action_size)

    for trials in range(1, n_trials + 1):

        # Reset environment
-        obs = env.reset(True, True)
-        if not Training:
-            env_renderer.set_new_rail()
-
-        # Split the observation tree into its parts and normalize the observation using the utility functions.
-        # Build agent specific local observation
+        obs, info = env.reset(True, True)
+        env_renderer.reset()
+        # Build agent specific observations
        for a in range(env.get_num_agents()):
-            rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
-                                                              num_features_per_node=num_features_per_node,
-                                                              current_depth=0)
-            rail_data = norm_obs_clip(rail_data)
-            distance_data = norm_obs_clip(distance_data)
-            agent_data = np.clip(agent_data, -1, 1)
-            agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
+            if obs[a]:
+                agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
+                agent_obs_buffer[a] = agent_obs[a].copy()

        # Reset score and done
        score = 0
@@ -117,45 +131,36 @@ def main(argv):

        # Run episode
        for step in range(max_steps):
-
-            # Only render when not triaing
-            if not Training:
-                env_renderer.render_env(show=True, show_observations=True)
-
-            # Chose the actions
+            # Action
            for a in range(env.get_num_agents()):
-                if not Training:
-                    eps = 0
-
-                action = agent.act(agent_obs[a], eps=eps)
+                if info['action_required'][a]:
+                    # If an action is require, we want to store the obs a that step as well as the action
+                    update_values = True
+                    action = agent.act(agent_obs[a], eps=eps)
+                    action_prob[action] += 1
+                else:
+                    update_values = False
+                    action = 0
                action_dict.update({a: action})

-                # Count number of actions takes for statistics
-                action_prob[action] += 1
-
            # Environment step
-            next_obs, all_rewards, done, _ = env.step(action_dict)
-
-            for a in range(env.get_num_agents()):
-                rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
-                                                                  num_features_per_node=num_features_per_node,
-                                                                  current_depth=0)
-                rail_data = norm_obs_clip(rail_data)
-                distance_data = norm_obs_clip(distance_data)
-                agent_data = np.clip(agent_data, -1, 1)
-                agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
-
+            next_obs, all_rewards, done, info = env.step(action_dict)
            # Update replay buffer and train agent
            for a in range(env.get_num_agents()):
+                # Only update the values when we are done or when an action was taken and thus relevant information is present
+                if update_values or done[a]:
+                    agent.step(agent_obs_buffer[a], agent_action_buffer[a], all_rewards[a],
+                               agent_obs[a], done[a])
+                    cummulated_reward[a] = 0.

-                # Remember and train agent
-                if Training:
-                    agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
+                    agent_obs_buffer[a] = agent_obs[a].copy()
+                    agent_action_buffer[a] = action_dict[a]
+                if next_obs[a]:
+                    agent_obs[a] = normalize_observation(next_obs[a], tree_depth, observation_radius=10)

-                # Update the current score
                score += all_rewards[a] / env.get_num_agents()

-            agent_obs = agent_next_obs.copy()
+            # Copy observation
            if done['__all__']:
                env_done = 1
                break
@@ -163,8 +168,12 @@ def main(argv):
        # Epsilon decay
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon

-        # Store the information about training progress
-        done_window.append(env_done)
+        # Collection information about training
+        tasks_finished = 0
+        for _idx in range(env.get_num_agents()):
+            if done[_idx] == 1:
+                tasks_finished += 1
+        done_window.append(tasks_finished / max(1, env.get_num_agents()))
        scores_window.append(score / max_steps)  # save most recent score
        scores.append(np.mean(scores_window))
        dones_list.append((np.mean(done_window)))
@@ -189,52 +198,6 @@ def main(argv):
                       './Nets/navigator_checkpoint' + str(trials) + '.pth')
            action_prob = [1] * action_size

-    # Render the trained agent
-
-    # Reset environment
-    obs = env.reset(True, True)
-    env_renderer.set_new_rail()
-
-    # Split the observation tree into its parts and normalize the observation using the utility functions.
-    # Build agent specific local observation
-    for a in range(env.get_num_agents()):
-        rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
-                                                          num_features_per_node=num_features_per_node,
-                                                          current_depth=0)
-        rail_data = norm_obs_clip(rail_data)
-        distance_data = norm_obs_clip(distance_data)
-        agent_data = np.clip(agent_data, -1, 1)
-        agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
-
-    # Reset score and done
-    score = 0
-    env_done = 0
-
-    # Run episode
-    for step in range(max_steps):
-        env_renderer.render_env(show=True, show_observations=False)
-
-        # Chose the actions
-        for a in range(env.get_num_agents()):
-            eps = 0
-            action = agent.act(agent_obs[a], eps=eps)
-            action_dict.update({a: action})
-
-        # Environment step
-        next_obs, all_rewards, done, _ = env.step(action_dict)
-
-        for a in range(env.get_num_agents()):
-            rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
-                                                              num_features_per_node=num_features_per_node,
-                                                              current_depth=0)
-            rail_data = norm_obs_clip(rail_data)
-            distance_data = norm_obs_clip(distance_data)
-            agent_data = np.clip(agent_data, -1, 1)
-            agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
-
-        agent_obs = agent_next_obs.copy()
-        if done['__all__']:
-            break
    # Plot overall training progress at the end
    plt.plot(scores)
    plt.show()

--- a/utils/misc_utils.py
+++ b/utils/misc_utils.py
@@ -3,13 +3,13 @@ import time
 from collections import deque

 import numpy as np
-from line_profiler import LineProfiler
-
 from flatland.envs.observations import GlobalObsForRailEnv
 from flatland.envs.rail_env import RailEnv
 from flatland.envs.rail_generators import complex_rail_generator
 from flatland.envs.schedule_generators import complex_schedule_generator
-from utils.observation_utils import norm_obs_clip, split_tree
+from line_profiler import LineProfiler
+
+from utils.observation_utils import norm_obs_clip, split_tree_into_feature_groups


 def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='*'):
@@ -102,10 +102,9 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
        # Reset the env

        lp_reset(True, True)
-        obs = env.reset(True, True)
+        obs, info = env.reset(True, True)
        for a in range(env.get_num_agents()):
-            data, distance, agent_data = split_tree(tree=np.array(obs[a]),
-                                                    current_depth=0)
+            data, distance, agent_data = split_tree_into_feature_groups(obs[a], tree_depth)
            data = norm_obs_clip(data)
            distance = norm_obs_clip(distance)
            agent_data = np.clip(agent_data, -1, 1)
@@ -129,8 +128,7 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
            next_obs, all_rewards, done, _ = lp_step(action_dict)

            for a in range(env.get_num_agents()):
-                data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
-                                                        current_depth=0)
+                data, distance, agent_data = split_tree_into_feature_groups(next_obs[a], tree_depth)
                data = norm_obs_clip(data)
                distance = norm_obs_clip(distance)
                agent_data = np.clip(agent_data, -1, 1)

--- a/utils/observation_utils.py
+++ b/utils/observation_utils.py
 import numpy as np
+from flatland.envs.observations import TreeObsForRailEnv


 def max_lt(seq, val):
@@ -45,61 +46,79 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_ran
    min_obs = 0  # min(max_obs, min_gt(obs, 0))
    if normalize_to_range:
        min_obs = min_gt(obs, 0)
+    if min_obs > max_obs:
+        min_obs = max_obs
    if max_obs == min_obs:
        return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
    norm = np.abs(max_obs - min_obs)
    return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)


-def split_tree(tree, num_features_per_node, current_depth=0):
+def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray, np.ndarray, np.ndarray):
+    data = np.zeros(6)
+    distance = np.zeros(1)
+    agent_data = np.zeros(4)
+
+    data[0] = node.dist_own_target_encountered
+    data[1] = node.dist_other_target_encountered
+    data[2] = node.dist_other_agent_encountered
+    data[3] = node.dist_potential_conflict
+    data[4] = node.dist_unusable_switch
+    data[5] = node.dist_to_next_branch
+
+    distance[0] = node.dist_min_to_target
+
+    agent_data[0] = node.num_agents_same_direction
+    agent_data[1] = node.num_agents_opposite_direction
+    agent_data[2] = node.num_agents_malfunctioning
+    agent_data[3] = node.speed_min_fractional
+
+    return data, distance, agent_data
+
+
+def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
+
+    if node == -np.inf:
+        remaining_depth = max_tree_depth - current_tree_depth
+        # reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
+        num_remaining_nodes = int((4**(remaining_depth+1) - 1) / (4 - 1))
+        return [-np.inf] * num_remaining_nodes*6, [-np.inf] * num_remaining_nodes, [-np.inf] * num_remaining_nodes*4
+
+    data, distance, agent_data = _split_node_into_feature_groups(node)
+
+    if not node.childs:
+        return data, distance, agent_data
+
+    for direction in TreeObsForRailEnv.tree_explored_actions_char:
+        sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(node.childs[direction], current_tree_depth + 1, max_tree_depth)
+        data = np.concatenate((data, sub_data))
+        distance = np.concatenate((distance, sub_distance))
+        agent_data = np.concatenate((agent_data, sub_agent_data))
+
+    return data, distance, agent_data
+
+
+def split_tree_into_feature_groups(tree: TreeObsForRailEnv.Node, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
    """
-    Splits the tree observation into different sub groups that need the same normalization.
-    This is necessary because the tree observation includes two different distance:
-    1. Distance from the agent --> This is measured in cells from current agent location
-    2. Distance to targer --> This is measured as distance from cell to agent target
-    3. Binary data --> Contains information about presence of object --> No normalization necessary
-    Number 1. will depend on the depth and size of the tree search
-    Number 2. will depend on the size of the map and thus the max distance on the map
-    Number 3. Is independent of tree depth and map size and thus must be handled differently
-    Therefore we split the tree into these two classes for better normalization.
-    :param tree: Tree that needs to be split
-    :param num_features_per_node: Features per node ATTENTION! this parameter is vital to correct splitting of the tree.
-    :param current_depth: Keeping track of the current depth in the tree
-    :return: Returns the three different groups of distance and binary values.
+    This function splits the tree into three difference arrays of values
    """
-    if len(tree) < num_features_per_node:
-        return [], [], []
-
-    depth = 0
-    tmp = len(tree) / num_features_per_node - 1
-    pow4 = 4
-    while tmp > 0:
-        tmp -= pow4
-        depth += 1
-        pow4 *= 4
-    child_size = (len(tree) - num_features_per_node) // 4
+    data, distance, agent_data = _split_node_into_feature_groups(tree)
+
+    for direction in TreeObsForRailEnv.tree_explored_actions_char:
+        sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(tree.childs[direction], 1, max_tree_depth)
+        data = np.concatenate((data, sub_data))
+        distance = np.concatenate((distance, sub_distance))
+        agent_data = np.concatenate((agent_data, sub_agent_data))
+
+    return data, distance, agent_data
+
+
+def normalize_observation(observation: TreeObsForRailEnv.Node, tree_depth: int, observation_radius=0):
    """
-    Here we split the node features into the different classes of distances and binary values.
-    Pay close attention to this part if you modify any of the features in the tree observation.
+    This function normalizes the observation used by the RL algorithm
    """
-    tree_data = tree[:6].tolist()
-    distance_data = [tree[6]]
-    agent_data = tree[7:num_features_per_node].tolist()
-    # Split each child of the current node and continue to next depth level
-    for children in range(4):
-        child_tree = tree[(num_features_per_node + children * child_size):
-                          (num_features_per_node + (children + 1) * child_size)]
-        tmp_tree_data, tmp_distance_data, tmp_agent_data = split_tree(child_tree, num_features_per_node,
-                                                                      current_depth=current_depth + 1)
-        if len(tmp_tree_data) > 0:
-            tree_data.extend(tmp_tree_data)
-            distance_data.extend(tmp_distance_data)
-            agent_data.extend(tmp_agent_data)
-    return tree_data, distance_data, agent_data
-
-def normalize_observation(observation, num_features_per_node=9, observation_radius=0):
-    data, distance, agent_data = split_tree(tree=np.array(observation), num_features_per_node=num_features_per_node,
-                                            current_depth=0)
+    data, distance, agent_data = split_tree_into_feature_groups(observation, tree_depth)
+
    data = norm_obs_clip(data, fixed_radius=observation_radius)
    distance = norm_obs_clip(distance, normalize_to_range=True)
    agent_data = np.clip(agent_data, -1, 1)
No results found