diff --git a/examples/training_example.py b/examples/training_example.py index cfed6c92cc74c45445c436a65d15c9eb8292fe32..5391dbbe4d1709eb7727a6f6fa62612f01439ce0 100644 --- a/examples/training_example.py +++ b/examples/training_example.py @@ -14,11 +14,11 @@ np.random.seed(1) TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()) LocalGridObs = LocalObsForRailEnv(view_height=10, view_width=2, center=2) -env = RailEnv(width=50, - height=50, +env = RailEnv(width=20, + height=20, rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=1, min_dist=8, max_dist=99999, seed=0), obs_builder_object=TreeObservation, - number_of_agents=5) + number_of_agents=10) env_renderer = RenderTool(env, gl="PILSVG", ) @@ -69,6 +69,9 @@ for trials in range(1, n_trials + 1): # Reset environment and get initial observations for all agents obs = env.reset() + for idx in range(env.get_num_agents()): + tmp_agent = env.agents[idx] + tmp_agent.speed_data["speed"] = 1 / (idx + 1) env_renderer.reset() # Here you can also further enhance the provided observation by means of normalization # See training navigation example in the baseline repository @@ -84,7 +87,7 @@ for trials in range(1, n_trials + 1): # Environment step which returns the observations for all agents, their corresponding # reward and whether their are done next_obs, all_rewards, done, _ = env.step(action_dict) - env_renderer.render_env(show=True, show_observations=True, show_predictions=True) + env_renderer.render_env(show=True, show_observations=True, show_predictions=False) # Update replay buffer and train agent for a in range(env.get_num_agents()): diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py index 2c9a747372ae74bb2f9e286d0d1fc200260d7f01..4fd6bd655095bbf6279280cd0d4c040bfc7fd9f6 100644 --- a/flatland/envs/observations.py +++ b/flatland/envs/observations.py @@ -324,6 +324,7 @@ class TreeObsForRailEnv(ObservationBuilder): visited = set() agent = self.env.agents[handle] + time_per_cell = np.reciprocal(agent.speed_data["speed"]) own_target_encountered = np.inf other_agent_encountered = np.inf other_target_encountered = np.inf @@ -359,20 +360,23 @@ class TreeObsForRailEnv(ObservationBuilder): crossing_found = True # Register possible future conflict - if self.predictor and num_steps < self.max_prediction_depth: + predicted_time = int(tot_dist * time_per_cell) + if self.predictor and predicted_time < self.max_prediction_depth: int_position = coordinate_to_position(self.env.width, [position]) if tot_dist < self.max_prediction_depth: - pre_step = max(0, tot_dist - 1) - post_step = min(self.max_prediction_depth - 1, tot_dist + 1) + + pre_step = max(0, predicted_time - 1) + post_step = min(self.max_prediction_depth - 1, predicted_time + 1) # Look for conflicting paths at distance tot_dist - if int_position in np.delete(self.predicted_pos[tot_dist], handle, 0): - conflicting_agent = np.where(self.predicted_pos[tot_dist] == int_position) + if int_position in np.delete(self.predicted_pos[predicted_time], handle, 0): + conflicting_agent = np.where(self.predicted_pos[predicted_time] == int_position) for ca in conflicting_agent[0]: - if direction != self.predicted_dir[tot_dist][ca] and cell_transitions[self._reverse_dir( - self.predicted_dir[tot_dist][ca])] == 1 and tot_dist < potential_conflict: + if direction != self.predicted_dir[predicted_time][ca] and cell_transitions[ + self._reverse_dir( + self.predicted_dir[predicted_time][ca])] == 1 and tot_dist < potential_conflict: potential_conflict = tot_dist - if self.env.dones[ca] and tot_dist < potential_conflict: + if self.env.dones[ca] and predicted_time < potential_conflict: potential_conflict = tot_dist # Look for conflicting paths at distance num_step-1