diff --git a/examples/training_navigation.py b/examples/training_navigation.py index 975d33fb3139ebc2040b941dbe73d8aeb3b225eb..b78851c8ff9f99ac61f3e331bd77c501c408113b 100644 --- a/examples/training_navigation.py +++ b/examples/training_navigation.py @@ -63,8 +63,9 @@ for trials in range(1, n_trials + 1): action_dict.update({a: action}) # Environment step + print(trials,step) next_obs, all_rewards, done, _ = env.step(action_dict) - + print("stepped") # Update replay buffer and train agent for a in range(env.number_of_agents): diff --git a/flatland/core/env_observation_builder.py b/flatland/core/env_observation_builder.py index d7bee9301ea8b4d17ef431d4616c13c19490669f..ce0a28ae9f984249233e9086f78debd3c1cbc54e 100644 --- a/flatland/core/env_observation_builder.py +++ b/flatland/core/env_observation_builder.py @@ -272,6 +272,7 @@ class TreeObsForRailEnv(ObservationBuilder): other_agent_encountered = False other_target_encountered = False while exploring: + # ############################# # ############################# # Modify here to compute any useful data required to build the end node's features. This code is called diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 36040d5cf66f1adc74faaade7e45acbd711676e0..37a97ff80beff3a2d2abc40f297d456021b071f3 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -612,9 +612,7 @@ class RailEnv(Environment): direction = reverse_direction movement = reverse_direction is_deadend = True - new_position = self._new_position(pos, movement) - # Is it a legal move? 1) transition allows the movement in the # cell, 2) the new cell is not empty (case 0), 3) the cell is # free, i.e., no agent is currently in that cell @@ -668,7 +666,6 @@ class RailEnv(Environment): # Reset the step actions (in case some agent doesn't 'register_action' # on the next step) self.actions = [0]*self.number_of_agents - return self._get_observations(), self.rewards_dict, self.dones, {} def _new_position(self, position, movement):