diff --git a/changelog.md b/changelog.md index 8a4da1f11c78ba21da8e249250000ce68cac736e..5eba35db5832e31148577f38cf68968eb997fbd4 100644 --- a/changelog.md +++ b/changelog.md @@ -3,6 +3,10 @@ Changelog Changes since Flatland 2.0.0 -------------------------- +### Changes in malfunction behavior +- agent attribute `next_malfunction`is not used anymore, it will be removed fully in future versions. +- `break_agent()` function is introduced which induces malfunctions in agent according to poisson process +- `_fix_agent_after_malfunction()` fixes agents after attribute `malfunction == 0` ### Changes in `Environment` - moving of member variable `distance_map_computed` to new class `DistanceMap` diff --git a/examples/introduction_flatland_2_1.py b/examples/introduction_flatland_2_1.py index b832717c76735f7b89768e31df5013af77874c33..cf6b69dc30096d27064290aede6af91cec24d5f0 100644 --- a/examples/introduction_flatland_2_1.py +++ b/examples/introduction_flatland_2_1.py @@ -1,9 +1,12 @@ +import numpy as np + # In Flatland you can use custom observation builders and predicitors # Observation builders generate the observation needed by the controller # Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network from flatland.envs.observations import GlobalObsForRailEnv # First of all we import the Flatland rail environment from flatland.envs.rail_env import RailEnv +from flatland.envs.rail_env import RailEnvActions from flatland.envs.rail_generators import sparse_rail_generator from flatland.envs.schedule_generators import sparse_schedule_generator # We also include a renderer because we want to visualize what is going on in the environment @@ -25,10 +28,10 @@ from flatland.utils.rendertools import RenderTool, AgentRenderVariant # The railway infrastructure can be build using any of the provided generators in env/rail_generators.py # Here we use the sparse_rail_generator with the following parameters -width = 50 # With of map -height = 50 # Height of map +width = 16 * 7 # With of map +height = 9 * 7 # Height of map nr_trains = 20 # Number of trains that have an assigned task in the env -cities_in_map = 12 # Number of cities where agents can start or end +cities_in_map = 20 # Number of cities where agents can start or end seed = 14 # Random seed grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city @@ -58,10 +61,9 @@ schedule_generator = sparse_schedule_generator(speed_ration_map) # We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions # during an episode. -stochastic_data = {'prop_malfunction': 0.3, # Percentage of defective agents - 'malfunction_rate': 30, # Rate of malfunction occurence - 'min_duration': 3, # Minimal duration of malfunction - 'max_duration': 20 # Max duration of malfunction +stochastic_data = {'malfunction_rate': 100, # Rate of malfunction occurence of single agent + 'min_duration': 15, # Minimal duration of malfunction + 'max_duration': 50 # Max duration of malfunction } # Custom observation builder without predictor @@ -86,8 +88,8 @@ env.reset() env_renderer = RenderTool(env, gl="PILSVG", agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX, show_debug=False, - screen_height=1000, # Adjust these parameters to fit your resolution - screen_width=1000) # Adjust these parameters to fit your resolution + screen_height=600, # Adjust these parameters to fit your resolution + screen_width=800) # Adjust these parameters to fit your resolution # The first thing we notice is that some agents don't have feasible paths to their target. @@ -108,7 +110,8 @@ class RandomAgent: :param state: input is the observation of the agent :return: returns an action """ - return 2 # np.random.choice(np.arange(self.action_size)) + return np.random.choice([RailEnvActions.MOVE_FORWARD, RailEnvActions.MOVE_RIGHT, RailEnvActions.MOVE_LEFT, + RailEnvActions.STOP_MOVING]) def step(self, memories): """ @@ -204,9 +207,8 @@ print("========================================") for agent_idx, agent in enumerate(env.agents): print( - "Agent {} will malfunction = {} at a rate of {}, the next malfunction will occur in {} step. Agent OK = {}".format( - agent_idx, agent.malfunction_data['malfunction_rate'] > 0, agent.malfunction_data['malfunction_rate'], - agent.malfunction_data['next_malfunction'], agent.malfunction_data['malfunction'] < 1)) + "Agent {} is OK = {}".format( + agent_idx, agent.malfunction_data['malfunction'] < 1)) # Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take # an action at every time step as it will only change the outcome when actions are chosen at cell entry. @@ -242,7 +244,7 @@ score = 0 # Run episode frame_step = 0 -for step in range(100): +for step in range(500): # Chose an action for each agent in the environment for a in range(env.get_num_agents()): action = controller.act(observations[a]) @@ -254,6 +256,7 @@ for step in range(100): next_obs, all_rewards, done, _ = env.step(action_dict) env_renderer.render_env(show=True, show_observations=False, show_predictions=False) + # env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step)) frame_step += 1 # Update replay buffer and train agent for a in range(env.get_num_agents()): @@ -263,5 +266,4 @@ for step in range(100): observations = next_obs.copy() if done['__all__']: break - print('Episode: Steps {}\t Score = {}'.format(step, score)) diff --git a/flatland/envs/agent_utils.py b/flatland/envs/agent_utils.py index 6a0e595bbd2e47202a9fc2e78c64f438f8190684..2bb9677aab020560aeb28aad97edfa23efebe9bf 100644 --- a/flatland/envs/agent_utils.py +++ b/flatland/envs/agent_utils.py @@ -62,7 +62,8 @@ class EnvAgentStatic(object): malfunction_datas = [] for i in range(len(schedule.agent_positions)): malfunction_datas.append({'malfunction': 0, - 'malfunction_rate': schedule.agent_malfunction_rates[i] if schedule.agent_malfunction_rates is not None else 0., + 'malfunction_rate': schedule.agent_malfunction_rates[ + i] if schedule.agent_malfunction_rates is not None else 0., 'next_malfunction': 0, 'nr_malfunctions': 0}) diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 168157566c8ab3dfcd923cb0ceb8b8b412b13ceb..8e83688e646b6d02b2bce813522eed8c630d7fc4 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -1,6 +1,7 @@ """ Definition of the RailEnv environment. """ +import random # TODO: _ this is a global method --> utils or remove later from enum import IntEnum from typing import List, NamedTuple, Optional, Dict @@ -196,26 +197,20 @@ class RailEnv(Environment): # Stochastic train malfunctioning parameters if stochastic_data is not None: - prop_malfunction = stochastic_data['prop_malfunction'] mean_malfunction_rate = stochastic_data['malfunction_rate'] malfunction_min_duration = stochastic_data['min_duration'] malfunction_max_duration = stochastic_data['max_duration'] else: - prop_malfunction = 0. mean_malfunction_rate = 0. malfunction_min_duration = 0. malfunction_max_duration = 0. - # percentage of malfunctioning trains - self.proportion_malfunctioning_trains = prop_malfunction - - # Mean malfunction in number of stops + # Mean malfunction in number of time steps self.mean_malfunction_rate = mean_malfunction_rate # Uniform distribution parameters for malfunction duration self.min_number_of_steps_broken = malfunction_min_duration self.max_number_of_steps_broken = malfunction_max_duration - # Reset environment self.valid_positions = None @@ -225,6 +220,7 @@ class RailEnv(Environment): def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) + random.seed(seed) return [seed] # no more agent_handles @@ -254,6 +250,7 @@ class RailEnv(Environment): """ Reset the agents to their starting positions defined in agents_static """ self.agents = EnvAgent.list_from_static(self.agents_static) + self.active_agents = [i for i in range(len(self.agents))] @staticmethod def compute_max_episode_steps(width: int, height: int, ratio_nr_agents_to_nr_cities: float = 20.0) -> int: @@ -371,20 +368,16 @@ class RailEnv(Environment): for i_agent in range(self.get_num_agents()): self.set_agent_active(i_agent) - for i_agent, agent in enumerate(self.agents): - # A proportion of agent in the environment will receive a positive malfunction rate - if self.np_random.rand() < self.proportion_malfunctioning_trains: - agent.malfunction_data['malfunction_rate'] = self.mean_malfunction_rate - next_breakdown = int( - self._exp_distirbution_synced(rate=agent.malfunction_data['malfunction_rate'])) - agent.malfunction_data['next_malfunction'] = next_breakdown - agent.malfunction_data['malfunction'] = 0 - - initial_malfunction = self._agent_malfunction(i_agent) + for agent in self.agents: + # Induce malfunctions + self._break_agent(self.mean_malfunction_rate, agent) - if initial_malfunction: + if agent.malfunction_data["malfunction"] > 0: agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING + # Fix agents that finished their malfunction + self._fix_agent_after_malfunction(agent) + self.num_resets += 1 self._elapsed_steps = 0 @@ -407,53 +400,47 @@ class RailEnv(Environment): observation_dict: Dict = self._get_observations() return observation_dict, info_dict - def _agent_malfunction(self, i_agent) -> bool: + def _fix_agent_after_malfunction(self, agent: EnvAgent): """ - Returns true if the agent enters into malfunction. (False, if not broken down or already broken down before). + Updates agent malfunction variables and fixes broken agents + + Parameters + ---------- + agent """ - agent = self.agents[i_agent] - # Decrease counter for next event only if agent is currently not broken and agent has a malfunction rate - if agent.malfunction_data['malfunction_rate'] >= 1 and agent.malfunction_data['next_malfunction'] > 0 and \ - agent.malfunction_data['malfunction'] < 1: - agent.malfunction_data['next_malfunction'] -= 1 - - # Only agents that have a positive rate for malfunctions and are not currently broken are considered - # If counter has come to zero --> Agent has malfunction - # set next malfunction time and duration of current malfunction - if agent.malfunction_data['malfunction_rate'] >= 1 and 1 > agent.malfunction_data['malfunction'] and \ - agent.malfunction_data['next_malfunction'] < 1: - # Increase number of malfunctions - agent.malfunction_data['nr_malfunctions'] += 1 - - # Next malfunction in number of stops - next_breakdown = int( - self._exp_distirbution_synced(rate=agent.malfunction_data['malfunction_rate'])) - agent.malfunction_data['next_malfunction'] = max(next_breakdown, 1) - # Duration of current malfunction - num_broken_steps = self.np_random.randint(self.min_number_of_steps_broken, - self.max_number_of_steps_broken + 1) + 1 - agent.malfunction_data['malfunction'] = num_broken_steps - agent.malfunction_data['moving_before_malfunction'] = agent.moving - - return True - else: - # The train was broken before... - if agent.malfunction_data['malfunction'] > 0: + # Ignore agents that are OK + if self._is_agent_ok(agent): + return - # Last step of malfunction --> Agent starts moving again after getting fixed - if agent.malfunction_data['malfunction'] < 2: - agent.malfunction_data['malfunction'] -= 1 + # Reduce number of malfunction steps left + if agent.malfunction_data['malfunction'] > 1: + agent.malfunction_data['malfunction'] -= 1 + return - # restore moving state before malfunction without further penalty - self.agents[i_agent].moving = agent.malfunction_data['moving_before_malfunction'] + # Restart agents at the end of their malfunction + agent.malfunction_data['malfunction'] -= 1 + if 'moving_before_malfunction' in agent.malfunction_data: + agent.moving = agent.malfunction_data['moving_before_malfunction'] + return - else: - agent.malfunction_data['malfunction'] -= 1 + def _break_agent(self, rate: float, agent) -> bool: + """ + Malfunction generator that breaks agents at a given rate. - # Nothing left to do with broken agent - return True - return False + Parameters + ---------- + agent + + """ + if agent.malfunction_data['malfunction'] < 1: + if self.np_random.rand() < self._malfunction_prob(rate): + num_broken_steps = self.np_random.randint(self.min_number_of_steps_broken, + self.max_number_of_steps_broken + 1) + 1 + agent.malfunction_data['malfunction'] = num_broken_steps + agent.malfunction_data['moving_before_malfunction'] = agent.moving + agent.malfunction_data['nr_malfunctions'] += 1 + return def step(self, action_dict_: Dict[int, RailEnvActions]): """ @@ -493,10 +480,14 @@ class RailEnv(Environment): "status": {}, } have_all_agents_ended = True # boolean flag to check if all agents are done + for i_agent, agent in enumerate(self.agents): # Reset the step rewards self.rewards_dict[i_agent] = 0 + # Induce malfunction before we do a step, thus a broken agent can't move in this step + self._break_agent(self.mean_malfunction_rate, agent) + # Perform step on the agent self._step_agent(i_agent, action_dict_.get(i_agent)) @@ -509,6 +500,9 @@ class RailEnv(Environment): info_dict["speed"][i_agent] = agent.speed_data['speed'] info_dict["status"][i_agent] = agent.status + # Fix agents that finished their malfunction such that they can perform an action in the next step + self._fix_agent_after_malfunction(agent) + # Check for end of episode + set global reward to all rewards! if have_all_agents_ended: self.dones["__all__"] = True @@ -553,12 +547,9 @@ class RailEnv(Environment): agent.old_direction = agent.direction agent.old_position = agent.position - # is the agent malfunctioning? - malfunction = self._agent_malfunction(i_agent) - # if agent is broken, actions are ignored and agent does not move. # full step penalty in this case - if malfunction: + if agent.malfunction_data['malfunction'] > 0: self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] return @@ -645,6 +636,7 @@ class RailEnv(Environment): if np.equal(agent.position, agent.target).all(): agent.status = RailAgentStatus.DONE self.dones[i_agent] = True + self.active_agents.remove(i_agent) agent.moving = False self._remove_agent_from_scene(agent) else: @@ -961,7 +953,7 @@ class RailEnv(Environment): load_data = read_binary(package, resource) self.set_full_state_msg(load_data) - def _exp_distirbution_synced(self, rate): + def _exp_distirbution_synced(self, rate: float) -> float: """ Generates sample from exponential distribution We need this to guarantee synchronity between different instances with same seed. @@ -971,3 +963,28 @@ class RailEnv(Environment): u = self.np_random.rand() x = - np.log(1 - u) * rate return x + + def _malfunction_prob(self, rate: float) -> float: + """ + Probability of a single agent to break. According to Poisson process with given rate + :param rate: + :return: + """ + if rate <= 0: + return 0. + else: + return 1 - np.exp(- (1 / rate)) + + def _is_agent_ok(self, agent: EnvAgent) -> bool: + """ + Check if an agent is ok, meaning it can move and is not malfuncitoinig + Parameters + ---------- + agent + + Returns + ------- + True if agent is ok, False otherwise + + """ + return agent.malfunction_data['malfunction'] < 1 diff --git a/flatland/envs/schedule_generators.py b/flatland/envs/schedule_generators.py index cb8b1537080f34f5851130a67e4e907b7593371a..903b58f956c69d7063bc1fe328e8dae9abf157e8 100644 --- a/flatland/envs/schedule_generators.py +++ b/flatland/envs/schedule_generators.py @@ -187,7 +187,7 @@ def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] = """ def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None, - num_resets: int = 0) -> Schedule: + num_resets: int = 0) -> Schedule: _runtime_seed = seed + num_resets np.random.seed(_runtime_seed) @@ -204,7 +204,7 @@ def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] = if len(valid_positions) < num_agents: warnings.warn("schedule_generators: len(valid_positions) < num_agents") return Schedule(agent_positions=[], agent_directions=[], - agent_targets=[], agent_speeds=[], agent_malfunction_rates=None) + agent_targets=[], agent_speeds=[], agent_malfunction_rates=None) agents_position_idx = [i for i in np.random.choice(len(valid_positions), num_agents, replace=False)] agents_position = [valid_positions[agents_position_idx[i]] for i in range(num_agents)] @@ -304,12 +304,9 @@ def schedule_from_file(filename, load_from_package=None) -> ScheduleGenerator: agents_target = [a.target for a in agents_static] if len(data['agents_static'][0]) > 5: agents_speed = [a.speed_data['speed'] for a in agents_static] - agents_malfunction = [a.malfunction_data['malfunction_rate'] for a in agents_static] else: agents_speed = None - agents_malfunction = None return Schedule(agent_positions=agents_position, agent_directions=agents_direction, - agent_targets=agents_target, agent_speeds=agents_speed, - agent_malfunction_rates=agents_malfunction) + agent_targets=agents_target, agent_speeds=agents_speed, agent_malfunction_rates=None) return generator diff --git a/tests/test_flatland_malfunction.py b/tests/test_flatland_malfunction.py index d9fa74ed364aed87ba936f74c39b0e4ab31771c0..e4f2c4789c1e5b87ef8e08a3f6221325c10a41fd 100644 --- a/tests/test_flatland_malfunction.py +++ b/tests/test_flatland_malfunction.py @@ -66,8 +66,7 @@ class SingleAgentNavigationObs(ObservationBuilder): def test_malfunction_process(): # Set fixed malfunction duration for this test - stochastic_data = {'prop_malfunction': 1., - 'malfunction_rate': 1000, + stochastic_data = {'malfunction_rate': 1, 'min_duration': 3, 'max_duration': 3} @@ -84,11 +83,6 @@ def test_malfunction_process(): # reset to initialize agents_static obs, info = env.reset(False, False, True, random_seed=10) - # Check that a initial duration for malfunction was assigned - assert env.agents[0].malfunction_data['next_malfunction'] > 0 - for agent in env.agents: - agent.status = RailAgentStatus.ACTIVE - agent_halts = 0 total_down_time = 0 agent_old_position = env.agents[0].position @@ -101,12 +95,6 @@ def test_malfunction_process(): for i in range(len(obs)): actions[i] = np.argmax(obs[i]) + 1 - if step % 5 == 0: - # Stop the agent and set it to be malfunctioning - env.agents[0].malfunction_data['malfunction'] = -1 - env.agents[0].malfunction_data['next_malfunction'] = 0 - agent_halts += 1 - obs, all_rewards, done, _ = env.step(actions) if env.agents[0].malfunction_data['malfunction'] > 0: @@ -122,12 +110,9 @@ def test_malfunction_process(): total_down_time += env.agents[0].malfunction_data['malfunction'] # Check that the appropriate number of malfunctions is achieved - assert env.agents[0].malfunction_data['nr_malfunctions'] == 20, "Actual {}".format( + assert env.agents[0].malfunction_data['nr_malfunctions'] == 23, "Actual {}".format( env.agents[0].malfunction_data['nr_malfunctions']) - # Check that 20 stops where performed - assert agent_halts == 20 - # Check that malfunctioning data was standing around assert total_down_time > 0 @@ -135,8 +120,7 @@ def test_malfunction_process(): def test_malfunction_process_statistically(): """Tests hat malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test - stochastic_data = {'prop_malfunction': 1., - 'malfunction_rate': 5, + stochastic_data = {'malfunction_rate': 5, 'min_duration': 5, 'max_duration': 5} @@ -155,17 +139,18 @@ def test_malfunction_process_statistically(): env.reset(True, True, False, random_seed=10) env.agents[0].target = (0, 0) - - agent_malfunction_list = [[0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 6, 5], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4], - [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 6, 5, 4], - [6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0], - [6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3], - [0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5], - [0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1], - [6, 6, 6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0]] + # Next line only for test generation + #agent_malfunction_list = [[] for i in range(10)] + agent_malfunction_list = [[0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1], + [0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 0], + [5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4, 3, 2, 1, 0], + [0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1], + [0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2], + [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [5, 4, 3, 2, 1, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0]] for step in range(20): action_dict: Dict[int, RailEnvActions] = {} @@ -173,16 +158,16 @@ def test_malfunction_process_statistically(): # We randomly select an action action_dict[agent_idx] = RailEnvActions(np.random.randint(4)) # For generating tests only: - # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction']) + #agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction']) assert env.agents[agent_idx].malfunction_data['malfunction'] == agent_malfunction_list[agent_idx][step] env.step(action_dict) + #print(agent_malfunction_list) def test_malfunction_before_entry(): - """Tests that malfunctions are produced by stochastic_data!""" + """Tests that malfunctions are working properly for agents before entering the environment!""" # Set fixed malfunction duration for this test - stochastic_data = {'prop_malfunction': 1., - 'malfunction_rate': 1, + stochastic_data = {'malfunction_rate': 2, 'min_duration': 10, 'max_duration': 10} @@ -191,7 +176,7 @@ def test_malfunction_before_entry(): env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), - schedule_generator=random_schedule_generator(seed=2), # seed 12 + schedule_generator=random_schedule_generator(seed=1), # seed 12 number_of_agents=10, random_seed=1, stochastic_data=stochastic_data, # Malfunction data generator @@ -200,46 +185,62 @@ def test_malfunction_before_entry(): env.reset(False, False, False, random_seed=10) env.agents[0].target = (0, 0) - # Print for test generation - assert env.agents[0].malfunction_data['malfunction'] == 11 - assert env.agents[1].malfunction_data['malfunction'] == 11 - assert env.agents[2].malfunction_data['malfunction'] == 11 - assert env.agents[3].malfunction_data['malfunction'] == 11 - assert env.agents[4].malfunction_data['malfunction'] == 11 - assert env.agents[5].malfunction_data['malfunction'] == 11 - assert env.agents[6].malfunction_data['malfunction'] == 11 - assert env.agents[7].malfunction_data['malfunction'] == 11 - assert env.agents[8].malfunction_data['malfunction'] == 11 - assert env.agents[9].malfunction_data['malfunction'] == 11 + # Test initial malfunction values for all agents + # we want some agents to be malfuncitoning already and some to be working + # we want different next_malfunction values for the agents + assert env.agents[0].malfunction_data['malfunction'] == 0 + assert env.agents[1].malfunction_data['malfunction'] == 0 + assert env.agents[2].malfunction_data['malfunction'] == 10 + assert env.agents[3].malfunction_data['malfunction'] == 0 + assert env.agents[4].malfunction_data['malfunction'] == 0 + assert env.agents[5].malfunction_data['malfunction'] == 0 + assert env.agents[6].malfunction_data['malfunction'] == 0 + assert env.agents[7].malfunction_data['malfunction'] == 0 + assert env.agents[8].malfunction_data['malfunction'] == 10 + assert env.agents[9].malfunction_data['malfunction'] == 10 + + #for a in range(10): + # print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a,env.agents[a].malfunction_data['malfunction'])) + + +def test_malfunction_values_and_behavior(): + """ + Test the malfunction counts down as desired + Returns + ------- - for step in range(20): - action_dict: Dict[int, RailEnvActions] = {} - for agent in env.agents: - # We randomly select an action - action_dict[agent.handle] = RailEnvActions(2) - if step < 10: - action_dict[agent.handle] = RailEnvActions(0) + """ + # Set fixed malfunction duration for this test + rail, rail_map = make_simple_rail2() + action_dict: Dict[int, RailEnvActions] = {} + stochastic_data = {'malfunction_rate': 0.001, + 'min_duration': 10, + 'max_duration': 10} + env = RailEnv(width=25, + height=30, + rail_generator=rail_from_grid_transition_map(rail), + schedule_generator=random_schedule_generator(seed=2), # seed 12 + stochastic_data=stochastic_data, + number_of_agents=1, + random_seed=1, + ) + + # reset to initialize agents_static + env.reset(False, False, activate_agents=True, random_seed=10) + + # Assertions + assert_list = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 10, 9, 8, 7, 6, 5] + print("[") + for time_step in range(15): + # Move in the env env.step(action_dict) - assert env.agents[1].malfunction_data['malfunction'] == 2 - assert env.agents[2].malfunction_data['malfunction'] == 2 - assert env.agents[3].malfunction_data['malfunction'] == 2 - assert env.agents[4].malfunction_data['malfunction'] == 2 - assert env.agents[5].malfunction_data['malfunction'] == 2 - assert env.agents[6].malfunction_data['malfunction'] == 2 - assert env.agents[7].malfunction_data['malfunction'] == 2 - assert env.agents[8].malfunction_data['malfunction'] == 2 - assert env.agents[9].malfunction_data['malfunction'] == 2 - - # for a in range(env.get_num_agents()): - # print("assert env.agents[{}].malfunction_data['malfunction'] == {}".format(a, - # env.agents[a].malfunction_data[ - # 'malfunction'])) + # Check that next_step decreases as expected + assert env.agents[0].malfunction_data['malfunction'] == assert_list[time_step] def test_initial_malfunction(): - stochastic_data = {'prop_malfunction': 1., # Percentage of defective agents - 'malfunction_rate': 100, # Rate of malfunction occurence + stochastic_data = {'malfunction_rate': 1000, # Rate of malfunction occurence 'min_duration': 2, # Minimal duration of malfunction 'max_duration': 5 # Max duration of malfunction } @@ -283,22 +284,22 @@ def test_initial_malfunction(): direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=1, - reward=env.start_penalty + env.step_penalty * 1.0 - # malfunctioning ends: starting and running at speed 1.0 - ), + reward=env.step_penalty + + ), # malfunctioning ends: starting and running at speed 1.0 Replay( - position=(3, 3), + position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, - reward=env.step_penalty * 1.0 # running at speed 1.0 + reward=env.start_penalty + env.step_penalty * 1.0 # running at speed 1.0 ), Replay( - position=(3, 4), + position=(3, 3), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, - reward=env.step_penalty * 1.0 # running at speed 1.0 + reward=env.step_penalty # running at speed 1.0 ) ], speed=env.agents[0].speed_data['speed'], @@ -346,7 +347,7 @@ def test_initial_malfunction_stop_moving(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, - malfunction=3, + malfunction=2, reward=env.step_penalty, # full step penalty when stopped status=RailAgentStatus.ACTIVE ), @@ -357,7 +358,7 @@ def test_initial_malfunction_stop_moving(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.STOP_MOVING, - malfunction=2, + malfunction=1, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), @@ -366,7 +367,7 @@ def test_initial_malfunction_stop_moving(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, - malfunction=1, + malfunction=0, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), @@ -434,7 +435,7 @@ def test_initial_malfunction_do_nothing(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, - malfunction=3, + malfunction=2, reward=env.step_penalty, # full step penalty while malfunctioning status=RailAgentStatus.ACTIVE ), @@ -445,7 +446,7 @@ def test_initial_malfunction_do_nothing(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, - malfunction=2, + malfunction=1, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), @@ -454,7 +455,7 @@ def test_initial_malfunction_do_nothing(): position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, - malfunction=1, + malfunction=0, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), @@ -484,45 +485,14 @@ def test_initial_malfunction_do_nothing(): run_replay_config(env, [replay_config], activate_agents=False) -def test_initial_nextmalfunction_not_below_zero(): - random.seed(0) - np.random.seed(0) - - stochastic_data = {'prop_malfunction': 1., # Percentage of defective agents - 'malfunction_rate': 70, # Rate of malfunction occurence - 'min_duration': 2, # Minimal duration of malfunction - 'max_duration': 5 # Max duration of malfunction - } - - rail, rail_map = make_simple_rail2() - - env = RailEnv(width=25, - height=30, - rail_generator=rail_from_grid_transition_map(rail), - schedule_generator=random_schedule_generator(), - number_of_agents=1, - stochastic_data=stochastic_data, # Malfunction data generator - obs_builder_object=SingleAgentNavigationObs() - ) - # reset to initialize agents_static - env.reset() - agent = env.agents[0] - env.step({}) - # was next_malfunction was -1 befor the bugfix https://gitlab.aicrowd.com/flatland/flatland/issues/186 - assert agent.malfunction_data['next_malfunction'] >= 0, \ - "next_malfunction should be >=0, found {}".format(agent.malfunction_data['next_malfunction']) - - def tests_random_interference_from_outside(): """Tests that malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test - stochastic_data = {'prop_malfunction': 1., - 'malfunction_rate': 1, + stochastic_data = {'malfunction_rate': 1, 'min_duration': 10, 'max_duration': 10} rail, rail_map = make_simple_rail2() - env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), @@ -534,9 +504,7 @@ def tests_random_interference_from_outside(): env.reset() # reset to initialize agents_static env.agents[0].speed_data['speed'] = 0.33 - env.agents[0].initial_position = (3, 0) - env.agents[0].target = (3, 9) - env.reset(False, False, False) + env.reset(False, False, False, random_seed=10) env_data = [] for step in range(200): @@ -567,11 +535,8 @@ def tests_random_interference_from_outside(): env.reset() # reset to initialize agents_static env.agents[0].speed_data['speed'] = 0.33 - env.agents[0].initial_position = (3, 0) - env.agents[0].target = (3, 9) - env.reset(False, False, False) + env.reset(False, False, False, random_seed=10) - # Print for test generation dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4] for step in range(200): action_dict: Dict[int, RailEnvActions] = {} @@ -586,3 +551,57 @@ def tests_random_interference_from_outside(): _, reward, _, _ = env.step(action_dict) assert reward[0] == env_data[step][0] assert env.agents[0].position == env_data[step][1] + + +def test_last_malfunction_step(): + """ + Test to check that agent moves when it is not malfunctioning + + """ + + # Set fixed malfunction duration for this test + stochastic_data = {'malfunction_rate': 5, + 'min_duration': 4, + 'max_duration': 4} + + rail, rail_map = make_simple_rail2() + + env = RailEnv(width=25, + height=30, + rail_generator=rail_from_grid_transition_map(rail), + schedule_generator=random_schedule_generator(seed=2), # seed 12 + number_of_agents=1, + random_seed=1, + stochastic_data=stochastic_data, # Malfunction data generator + ) + env.reset() + # reset to initialize agents_static + env.agents[0].speed_data['speed'] = 1. / 3. + env.agents_static[0].target = (0, 0) + + env.reset(False, False, True) + # Force malfunction to be off at beginning and next malfunction to happen in 2 steps + env.agents[0].malfunction_data['next_malfunction'] = 2 + env.agents[0].malfunction_data['malfunction'] = 0 + env_data = [] + for step in range(20): + action_dict: Dict[int, RailEnvActions] = {} + for agent in env.agents: + # Go forward all the time + action_dict[agent.handle] = RailEnvActions(2) + + if env.agents[0].malfunction_data['malfunction'] < 1: + agent_can_move = True + # Store the position before and after the step + pre_position = env.agents[0].speed_data['position_fraction'] + _, reward, _, _ = env.step(action_dict) + # Check if the agent is still allowed to move in this step + + if env.agents[0].malfunction_data['malfunction'] > 0: + agent_can_move = False + post_position = env.agents[0].speed_data['position_fraction'] + # Assert that the agent moved while it was still allowed + if agent_can_move: + assert pre_position != post_position + else: + assert post_position == pre_position diff --git a/tests/test_multi_speed.py b/tests/test_multi_speed.py index 243ea078d0e920aeaab912f81553e17a5f37b1c1..f83990cc39bf73e50719b2291006eed68d1d1360 100644 --- a/tests/test_multi_speed.py +++ b/tests/test_multi_speed.py @@ -437,79 +437,79 @@ def test_multispeed_actions_malfunction_no_blocking(): reward=env.step_penalty * 0.5 # recovered: running at speed 0.5 ), Replay( - position=(3, 7), + position=(3, 8), direction=Grid4TransitionsEnum.WEST, - action=RailEnvActions.MOVE_FORWARD, + action=None, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( position=(3, 7), direction=Grid4TransitionsEnum.WEST, - action=None, + action=RailEnvActions.MOVE_FORWARD, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( - position=(3, 6), + position=(3, 7), direction=Grid4TransitionsEnum.WEST, - action=RailEnvActions.MOVE_FORWARD, + action=None, set_malfunction=2, # recovers in two steps from now! malfunction=2, reward=env.step_penalty * 0.5 # step penalty for speed 0.5 when malfunctioning ), # agent recovers in this step; since we're at the beginning, we provide a different action although we're broken! Replay( - position=(3, 6), + position=(3, 7), direction=Grid4TransitionsEnum.WEST, - action=RailEnvActions.MOVE_LEFT, + action=None, malfunction=1, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( - position=(3, 6), + position=(3, 7), direction=Grid4TransitionsEnum.WEST, action=None, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( - position=(4, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 6), + direction=Grid4TransitionsEnum.WEST, action=RailEnvActions.STOP_MOVING, reward=env.stop_penalty + env.step_penalty * 0.5 # stopping and step penalty for speed 0.5 ), Replay( - position=(4, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 6), + direction=Grid4TransitionsEnum.WEST, action=RailEnvActions.STOP_MOVING, reward=env.step_penalty * 0.5 # step penalty for speed 0.5 while stopped ), Replay( - position=(4, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 6), + direction=Grid4TransitionsEnum.WEST, action=RailEnvActions.MOVE_FORWARD, reward=env.start_penalty + env.step_penalty * 0.5 # starting and running at speed 0.5 ), Replay( - position=(4, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 6), + direction=Grid4TransitionsEnum.WEST, action=None, reward=env.step_penalty * 0.5 # running at speed 0.5 ), # DO_NOTHING keeps moving! Replay( - position=(5, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 5), + direction=Grid4TransitionsEnum.WEST, action=RailEnvActions.DO_NOTHING, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( - position=(5, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 5), + direction=Grid4TransitionsEnum.WEST, action=None, reward=env.step_penalty * 0.5 # running at speed 0.5 ), Replay( - position=(6, 6), - direction=Grid4TransitionsEnum.SOUTH, + position=(3, 4), + direction=Grid4TransitionsEnum.WEST, action=RailEnvActions.MOVE_FORWARD, reward=env.step_penalty * 0.5 # running at speed 0.5 ), diff --git a/tests/test_utils.py b/tests/test_utils.py index 1a98c161829dedc429465b6606101fd19784cbaa..6dfc6239ed191d06c16feeca5e8d68dbd6654952 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -118,9 +118,8 @@ def run_replay_config(env: RailEnv, test_configs: List[ReplayConfig], rendering: # recognizes the agent as potentially malfuncitoning # We also set next malfunction to infitiy to avoid interference with our tests agent.malfunction_data['malfunction'] = replay.set_malfunction - agent.malfunction_data['malfunction_rate'] = max(agent.malfunction_data['malfunction_rate'], 1) - agent.malfunction_data['next_malfunction'] = np.inf agent.malfunction_data['moving_before_malfunction'] = agent.moving + agent.malfunction_data['fixed'] = False _assert(a, agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction') print(step) _, rewards_dict, _, info_dict = env.step(action_dict)