diff --git a/flatland/envs/agent_utils.py b/flatland/envs/agent_utils.py index ef87c96e995015f98022ea9e619276fd28ba49ad..9831f6037678ec2e0f4547870f95a7fa76513314 100644 --- a/flatland/envs/agent_utils.py +++ b/flatland/envs/agent_utils.py @@ -9,10 +9,12 @@ from flatland.envs.schedule_utils import Schedule class RailAgentStatus(IntEnum): - READY_TO_DEPART = 0 # not in grid yet (position is None) -> prediction as if it were at initial position - ACTIVE = 1 # in grid (position is not None), not done -> prediction is remaining path - DONE = 2 # in grid (position is not None), but done -> prediction is stay at target forever - DONE_REMOVED = 3 # removed from grid (position is None) -> prediction is None + WAITING = 0 + READY_TO_DEPART = 1 # not in grid yet (position is None) -> prediction as if it were at initial position + ACTIVE = 2 # in grid (position is not None), not done -> prediction is remaining path + DONE = 3 # in grid (position is not None), but done -> prediction is stay at target forever + DONE_REMOVED = 4 # removed from grid (position is None) -> prediction is None + CANCELLED = 5 Agent = NamedTuple('Agent', [('initial_position', Tuple[int, int]), @@ -33,13 +35,14 @@ Agent = NamedTuple('Agent', [('initial_position', Tuple[int, int]), @attrs class EnvAgent: + # INIT FROM HERE IN _from_schedule() initial_position = attrib(type=Tuple[int, int]) initial_direction = attrib(type=Grid4TransitionsEnum) direction = attrib(type=Grid4TransitionsEnum) target = attrib(type=Tuple[int, int]) moving = attrib(default=False, type=bool) - # NEW - time scheduling + # NEW : EnvAgent - Schedule properties earliest_departure = attrib(default=None, type=int) # default None during _from_schedule() latest_arrival = attrib(default=None, type=int) # default None during _from_schedule() @@ -58,8 +61,9 @@ class EnvAgent: 'moving_before_malfunction': False}))) handle = attrib(default=None) + # INIT TILL HERE IN _from_schedule() - status = attrib(default=RailAgentStatus.READY_TO_DEPART, type=RailAgentStatus) + status = attrib(default=RailAgentStatus.WAITING, type=RailAgentStatus) position = attrib(default=None, type=Optional[Tuple[int, int]]) # used in rendering @@ -68,12 +72,17 @@ class EnvAgent: def reset(self): """ - Resets the agents to their initial values of the episode + Resets the agents to their initial values of the episode. Called after ScheduleTime generation. """ self.position = None # TODO: set direction to None: https://gitlab.aicrowd.com/flatland/flatland/issues/280 self.direction = self.initial_direction - self.status = RailAgentStatus.READY_TO_DEPART + + if (self.earliest_departure == 0): + self.status = RailAgentStatus.READY_TO_DEPART + else: + self.status = RailAgentStatus.WAITING + self.old_position = None self.old_direction = None self.moving = False diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index cef55ae732eba625a92a04fbdfc315fadda57aa7..289058834446bbdb24cd26a937f30c596f5e3a7b 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -34,7 +34,7 @@ from gym.utils import seeding # from flatland.envs.rail_generators import random_rail_generator, RailGenerator # from flatland.envs.schedule_generators import random_schedule_generator, ScheduleGenerator -# NEW +# NEW : Imports from flatland.envs.schedule_time_generators import schedule_time_generator # Adrian Egli performance fix (the fast methods brings more than 50%) @@ -379,23 +379,34 @@ class RailEnv(Environment): if optionals and 'agents_hints' in optionals: agents_hints = optionals['agents_hints'] - schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets, - self.np_random) + schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, + self.num_resets, self.np_random) self.agents = EnvAgent.from_schedule(schedule) # Get max number of allowed time steps from schedule generator # Look at the specific schedule generator used to see where this number comes from - self._max_episode_steps = schedule.max_episode_steps + self._max_episode_steps = schedule.max_episode_steps # NEW UPDATE THIS! + # Agent Positions Map self.agent_positions = np.zeros((self.height, self.width), dtype=int) - 1 - # Reset agents to initial - self.reset_agents() + # Reset distance map - basically initializing self.distance_map.reset(self.agents, self.rail) - # NEW - time window scheduling - schedule_time_generator(self.agents, self.distance_map, schedule, self.np_random, temp_info=optionals) + # NEW : Time Schedule Generation + # find agent speeds (needed for max_ep_steps recalculation) + if (type(self.schedule_generator.speed_ratio_map) is dict): + config_speeds = list(self.schedule_generator.speed_ratio_map.keys()) + else: + config_speeds = [1.0] + + self._max_episode_steps = schedule_time_generator(self.agents, config_speeds, self.distance_map, + self._max_episode_steps, self.np_random, temp_info=optionals) + + # Reset agents to initial states + self.reset_agents() + # WHY for agent in self.agents: # Induce malfunctions if activate_agents: @@ -584,10 +595,24 @@ class RailEnv(Environment): if have_all_agents_ended: self.dones["__all__"] = True self.rewards_dict = {i: self.global_reward for i in range(self.get_num_agents())} + if (self._max_episode_steps is not None) and (self._elapsed_steps >= self._max_episode_steps): self.dones["__all__"] = True - for i_agent in range(self.get_num_agents()): + + for i_agent, agent in enumerate(self.agents): + # NEW : STEP:REW: CANCELLED check / reward (never departed) + if (agent.status == RailAgentStatus.READY_TO_DEPART): + agent.status = RailAgentStatus.CANCELLED + # NEGATIVE REWARD? + + # NEW : STEP:REW: Departed but never reached + if (agent.status == RailAgentStatus.ACTIVE): + pass + # NEGATIVE REWARD? + self.dones[i_agent] = True + + if self.record_steps: self.record_timestep(action_dict_) @@ -738,6 +763,13 @@ class RailEnv(Environment): if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]: # this agent has already completed... return + # NEW : STEP: WAITING > WAITING or WAITING > READY_TO_DEPART + if (agent.status == RailAgentStatus.WAITING): + if ( self._elapsed_steps >= agent.earliest_departure ): + agent.status == RailAgentStatus.READY_TO_DEPART + self.motionCheck.addAgent(i_agent, None, None) + return + # agent gets active by a MOVE_* action and if c if agent.status == RailAgentStatus.READY_TO_DEPART: is_action_starting = action in [ @@ -848,7 +880,8 @@ class RailEnv(Environment): def _step_agent2_cf(self, i_agent): agent = self.agents[i_agent] - if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]: + # NEW : REW: no reward during WAITING... + if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED, RailAgentStatus.WAITING]: return (move, rc_next) = self.motionCheck.check_motion(i_agent, agent.position) @@ -889,18 +922,37 @@ class RailEnv(Environment): agent.direction = new_direction agent.speed_data['position_fraction'] = 0.0 + # NEW : REW: Check DONE before / after LA & Check if RUNNING before / after LA # has the agent reached its target? if np.equal(agent.position, agent.target).all(): - agent.status = RailAgentStatus.DONE - self.dones[i_agent] = True - self.active_agents.remove(i_agent) - agent.moving = False - self._remove_agent_from_scene(agent) - else: - self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] + # arrived before Latest Arrival + if (self._elapsed_steps <= agent.latest_arrival): + agent.status = RailAgentStatus.DONE + self.dones[i_agent] = True + self.active_agents.remove(i_agent) + agent.moving = False + self._remove_agent_from_scene(agent) + else: # arrived after latest arrival + agent.status = RailAgentStatus.DONE + self.dones[i_agent] = True + self.active_agents.remove(i_agent) + agent.moving = False + self._remove_agent_from_scene(agent) + # NEGATIVE REWARD? + + else: # not reached its target and moving + # running before Latest Arrival + if (self._elapsed_steps <= agent.latest_arrival): + self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] + else: # running after Latest Arrival + self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] # + # NEGATIVE REWARD? per step? else: - # step penalty if not moving (stopped now or before) - self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] + # stopped (!move) before Latest Arrival + if (self._elapsed_steps <= agent.latest_arrival): + self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] + else: # stopped (!move) after Latest Arrival + self.rewards_dict[i_agent] += self.step_penalty * \ + agent.speed_data['speed'] # + # NEGATIVE REWARD? per step? def _set_agent_to_initial_position(self, agent: EnvAgent, new_position: IntVector2D): """ diff --git a/flatland/envs/rail_env_shortest_paths.py b/flatland/envs/rail_env_shortest_paths.py index 6bfb4bb558f135388b41ee2b830f74984e62eddc..afa0dd79be256149b0800ac2763bb6820970406f 100644 --- a/flatland/envs/rail_env_shortest_paths.py +++ b/flatland/envs/rail_env_shortest_paths.py @@ -227,7 +227,9 @@ def get_shortest_paths(distance_map: DistanceMap, max_depth: Optional[int] = Non shortest_paths = dict() def _shortest_path_for_agent(agent): - if agent.status == RailAgentStatus.READY_TO_DEPART: + if agent.status == RailAgentStatus.WAITING: + position = agent.initial_position + elif agent.status == RailAgentStatus.READY_TO_DEPART: position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: position = agent.position diff --git a/flatland/envs/schedule_time_generators.py b/flatland/envs/schedule_time_generators.py index 1587ffc378b424b21cfb9e4f3660f631521e5f32..dafa0aca361037a7758ce3e3dff740b52879e8bd 100644 --- a/flatland/envs/schedule_time_generators.py +++ b/flatland/envs/schedule_time_generators.py @@ -30,17 +30,44 @@ from flatland.envs.distance_map import DistanceMap # city_positions = [] # #### DATA COLLECTION ************************* -def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, schedule: Schedule, - np_random: RandomState = None, temp_info=None) -> None: +def schedule_time_generator(agents: List[EnvAgent], config_speeds: List[float], distance_map: DistanceMap, + max_episode_steps: int, np_random: RandomState = None, temp_info=None) -> int: + + # Multipliers + old_max_episode_steps_multiplier = 3.0 + new_max_episode_steps_multiplier = 1.5 + travel_buffer_multiplier = 1.3 # must be strictly lesser than new_max_episode_steps_multiplier + end_buffer_multiplier = 0.05 + mean_shortest_path_multiplier = 0.2 from flatland.envs.rail_env_shortest_paths import get_shortest_paths shortest_paths = get_shortest_paths(distance_map) + shortest_paths_lengths = [len(v) for k,v in shortest_paths.items()] + + # Find mean_shortest_path_time + agent_shortest_path_times = [] + for agent in agents: + speed = agent.speed_data['speed'] + distance = shortest_paths_lengths[agent.handle] + agent_shortest_path_times.append(int(np.ceil(distance / speed))) + + mean_shortest_path_time = np.mean(agent_shortest_path_times) + + # Deciding on a suitable max_episode_steps + max_sp_len = max(shortest_paths_lengths) # longest path + min_speed = min(config_speeds) # slowest possible speed in config - max_episode_steps = int(schedule.max_episode_steps * 1.0) #needs to be increased due to fractional speeds taking way longer (best - be calculated here) - end_buffer = max_episode_steps // 20 #schedule.end_buffer + longest_sp_time = max_sp_len / min_speed + max_episode_steps_new = int(np.ceil(longest_sp_time * new_max_episode_steps_multiplier)) + + max_episode_steps_old = int(max_episode_steps * old_max_episode_steps_multiplier) + + max_episode_steps = min(max_episode_steps_new, max_episode_steps_old) + + end_buffer = max_episode_steps * end_buffer_multiplier latest_arrival_max = max_episode_steps-end_buffer - travel_buffer_multiplier = 1.7 + # Useless unless needed by returning earliest_departures = [] latest_arrivals = [] @@ -89,11 +116,10 @@ def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, s # #### DATA COLLECTION ************************* for agent in agents: - agent_speed = agent.speed_data['speed'] - agent_shortest_path = shortest_paths[agent.handle] - agent_shortest_path_len = len(agent_shortest_path) - agent_shortest_path_time = int(np.ceil(agent_shortest_path_len / agent_speed)) # for fractional speeds 1/3 etc - agent_travel_time_max = min( int(np.ceil(agent_shortest_path_time * travel_buffer_multiplier)), latest_arrival_max) # min(this, latest_arrival_max), SHOULD NOT BE lesser than shortest path time + agent_shortest_path_time = agent_shortest_path_times[agent.handle] + agent_travel_time_max = int(np.ceil((agent_shortest_path_time * travel_buffer_multiplier) \ + + (mean_shortest_path_time * mean_shortest_path_multiplier))) + departure_window_max = latest_arrival_max - agent_travel_time_max earliest_departure = np_random.randint(0, departure_window_max) @@ -124,6 +150,9 @@ def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, s # save_sp_fig() # #### DATA COLLECTION ************************* + # returns max_episode_steps after deciding on the new value + return max_episode_steps + # #### DATA COLLECTION ************************* # # Histogram 1