diff --git a/README.rst b/README.rst index 6811219991cfb849f7d1a2288ab413a4c889c0d7..23d96aa7083814f85f62f9868da284d01912e879 100644 --- a/README.rst +++ b/README.rst @@ -16,7 +16,7 @@ Flatland :align: center Flatland is a opensource toolkit for developing and comparing Multi Agent Reinforcement Learning algorithms in little (or ridiculously large !) gridworlds. -The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the official docs `here <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/>`_. +The base environment is a two-dimensional grid in which many agents can be placed, and each agent must solve one or more navigational tasks in the grid world. More details about the environment and the problem statement can be found in the `official docs <http://flatland-rl-docs.s3-website.eu-central-1.amazonaws.com/>`_. This library was developed by `SBB <https://www.sbb.ch/en/>`_ , `AIcrowd <https://www.aicrowd.com/>`_ and numerous contributors and AIcrowd research fellows from the AIcrowd community. diff --git a/examples/training_example.py b/examples/training_example.py index dd9ded92510be8ec5fa6c222b7259157db920430..313920939aabb8bc63b2198ff77d27a24d699468 100644 --- a/examples/training_example.py +++ b/examples/training_example.py @@ -80,7 +80,6 @@ for trials in range(1, n_trials + 1): # Environment step which returns the observations for all agents, their corresponding # reward and whether their are done next_obs, all_rewards, done, _ = env.step(action_dict) - # Update replay buffer and train agent for a in range(env.get_num_agents()): agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])) diff --git a/flatland/__init__.py b/flatland/__init__.py index cccf18e6fc8ff614ce5f8b6eec433a883d474d81..feccf8656b98057e0c7486483d2a6c6281077a37 100644 --- a/flatland/__init__.py +++ b/flatland/__init__.py @@ -4,4 +4,4 @@ __author__ = """S.P. Mohanty""" __email__ = 'mohanty@aicrowd.com' -__version__ = '0.1.2' +__version__ = '0.2.0' diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py index add983c032f6a91c624faa3bfc2fada5036a139b..2192e61f7b162471d2af54a0a5e100a620ad3517 100644 --- a/flatland/envs/observations.py +++ b/flatland/envs/observations.py @@ -209,8 +209,7 @@ class TreeObsForRailEnv(ObservationBuilder): #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored. #2: if another agents target is detected the distance in number of cells from the agents current locaiton - is stored - + is stored #3: if another agent is detected the distance in number of cells from current agent position is stored. diff --git a/flatland/envs/predictions.py b/flatland/envs/predictions.py index 2605e84cc96a19e246f4015c97ee4a31d92f6acb..ca675ce209bc7e4aeb96305eb945751bb903b8ed 100644 --- a/flatland/envs/predictions.py +++ b/flatland/envs/predictions.py @@ -140,13 +140,15 @@ class ShortestPathPredictorForRailEnv(PredictionBuilder): new_position = get_new_position(agent.position, new_direction) elif np.sum(cell_transitions) > 1: min_dist = np.inf + no_dist_found = True for direction in range(4): if cell_transitions[direction] == 1: neighbour_cell = get_new_position(agent.position, direction) target_dist = distance_map[agent.handle, neighbour_cell[0], neighbour_cell[1], direction] - if target_dist < min_dist: + if target_dist < min_dist or no_dist_found: min_dist = target_dist new_direction = direction + no_dist_found = False new_position = get_new_position(agent.position, new_direction) else: raise Exception("No transition possible {}".format(cell_transitions)) diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 4e8832ecaa665c97f566829d520f07f07f2df381..127f51c2dcc1d6a6dd29f99398e702e09f9a02a5 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -4,6 +4,7 @@ Definition of the RailEnv environment and related level-generation functions. Generator functions are functions that take width, height and num_resets as arguments and return a GridTransitionMap object. """ +# TODO: _ this is a global method --> utils or remove later from enum import IntEnum @@ -84,6 +85,7 @@ class RailEnv(Environment): a GridTransitionMap object rail_from_manual_sp ecifications_generator(rail_spec) : generate a rail from a rail specifications array + TODO: generate_rail_from_saved_list or from list of ndarray bitmaps --- width : int The width of the rail map. Potentially in the future, a range of widths to sample from. @@ -107,7 +109,7 @@ class RailEnv(Environment): self.obs_builder._set_env(self) self.action_space = [1] - self.observation_space = self.obs_builder.observation_space + self.observation_space = self.obs_builder.observation_space # updated on resets? self.rewards = [0] * number_of_agents self.done = False @@ -163,8 +165,8 @@ class RailEnv(Environment): self.restart_agents() - for iAgent in range(self.get_num_agents()): - agent = self.agents[iAgent] + for i_agent in range(self.get_num_agents()): + agent = self.agents[i_agent] agent.speed_data['position_fraction'] = 0.0 self.num_resets += 1 @@ -200,7 +202,9 @@ class RailEnv(Environment): self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()} return self._get_observations(), self.rewards_dict, self.dones, {} - for i_agent, agent in enumerate(self.agents): + # for i in range(len(self.agents_handles)): + for i_agent in range(self.get_num_agents()): + agent = self.agents[i_agent] agent.old_direction = agent.direction agent.old_position = agent.position if self.dones[i_agent]: # this agent has already completed... @@ -227,7 +231,7 @@ class RailEnv(Environment): self.rewards_dict[i_agent] += stop_penalty if not agent.moving and not (action == RailEnvActions.DO_NOTHING or action == RailEnvActions.STOP_MOVING): - # Only allow agent to start moving by pressing forward. + # Allow agent to start with any forward or direction action agent.moving = True self.rewards_dict[i_agent] += start_penalty @@ -245,10 +249,10 @@ class RailEnv(Environment): action_selected = False if agent.speed_data['position_fraction'] == 0.: if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING: - cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + cell_free, new_cell_valid, new_direction, new_position, transition_valid = \ self._check_action_on_agent(action, agent) - if all([new_cell_isValid, transition_isValid]): + if all([new_cell_valid, transition_valid]): agent.speed_data['transition_action_on_cellexit'] = action action_selected = True @@ -256,26 +260,26 @@ class RailEnv(Environment): # But, if the chosen invalid action was LEFT/RIGHT, and the agent is moving, # try to keep moving forward! if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT) and agent.moving: - cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + cell_free, new_cell_valid, new_direction, new_position, transition_valid = \ self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent) - if all([new_cell_isValid, transition_isValid]): + if all([new_cell_valid, transition_valid]): agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.MOVE_FORWARD action_selected = True else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - agent.moving = False + self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty - + agent.moving = False continue else: # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[i_agent] += invalid_action_penalty - agent.moving = False + self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += stop_penalty - + agent.moving = False continue if agent.moving and (action_selected or agent.speed_data['position_fraction'] > 0.0): @@ -287,10 +291,10 @@ class RailEnv(Environment): # Now 'transition_action_on_cellexit' will be guaranteed to be valid; it was checked on entering # the cell - cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + cell_free, new_cell_valid, new_direction, new_position, transition_valid = \ self._check_action_on_agent(agent.speed_data['transition_action_on_cellexit'], agent) - if all([new_cell_isValid, transition_isValid, cell_isFree]): + if all([new_cell_valid, transition_valid, cell_free]): agent.position = new_position agent.direction = new_direction agent.speed_data['position_fraction'] = 0.0 @@ -310,14 +314,14 @@ class RailEnv(Environment): def _check_action_on_agent(self, action, agent): # compute number of possible transitions in the current # cell used to check for invalid actions - new_direction, transition_isValid = self.check_action(agent, action) + new_direction, transition_valid = self.check_action(agent, action) new_position = get_new_position(agent.position, new_direction) # Is it a legal move? # 1) transition allows the new_direction in the cell, # 2) the new cell is not empty (case 0), # 3) the cell is free, i.e., no agent is currently in that cell - new_cell_isValid = ( + new_cell_valid = ( np.array_equal( # Check the new position is still in the grid new_position, np.clip(new_position, [0, 0], [self.height - 1, self.width - 1])) @@ -325,19 +329,19 @@ class RailEnv(Environment): self.rail.get_full_transitions(*new_position) > 0) # If transition validity hasn't been checked yet. - if transition_isValid is None: - transition_isValid = self.rail.get_transition( + if transition_valid is None: + transition_valid = self.rail.get_transition( (*agent.position, agent.direction), new_direction) # Check the new position is not the same as any of the existing agent positions # (including itself, for simplicity, since it is moving) - cell_isFree = not np.any( + cell_free = not np.any( np.equal(new_position, [agent2.position for agent2 in self.agents]).all(1)) - return cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid + return cell_free, new_cell_valid, new_direction, new_position, transition_valid def check_action(self, agent, action): - transition_isValid = None + transition_valid = None possible_transitions = self.rail.get_transitions(*agent.position, agent.direction) num_transitions = np.count_nonzero(possible_transitions) @@ -345,12 +349,12 @@ class RailEnv(Environment): if action == RailEnvActions.MOVE_LEFT: new_direction = agent.direction - 1 if num_transitions <= 1: - transition_isValid = False + transition_valid = False elif action == RailEnvActions.MOVE_RIGHT: new_direction = agent.direction + 1 if num_transitions <= 1: - transition_isValid = False + transition_valid = False new_direction %= 4 @@ -360,8 +364,8 @@ class RailEnv(Environment): # new_direction will be the only valid transition # - take only available transition new_direction = np.argmax(possible_transitions) - transition_isValid = True - return new_direction, transition_isValid + transition_valid = True + return new_direction, transition_valid def _get_observations(self): self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents()))) diff --git a/setup.cfg b/setup.cfg index dd16bd77df8727f52ed2f3102b1042799271a611..d83f09f0c50a153fce8c40c22cf7d496eb02304b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.2 +current_version = 0.2.0 commit = True tag = True diff --git a/setup.py b/setup.py index 3cc28b34f7acf0d05958606c79beef439632035f..900dada07e587333db5459bc4a482be080f241b9 100644 --- a/setup.py +++ b/setup.py @@ -114,6 +114,6 @@ setup( test_suite='tests', tests_require=test_requirements, url='https://gitlab.aicrowd.com/flatland/flatland', - version='0.1.2', + version='0.2.0', zip_safe=False, ) diff --git a/tests/test_distance_map.py b/tests/test_distance_map.py new file mode 100644 index 0000000000000000000000000000000000000000..79f4bab164312f757ad584bd3708a7af3fb7a97e --- /dev/null +++ b/tests/test_distance_map.py @@ -0,0 +1,56 @@ +import numpy as np + +from flatland.core.grid.grid4 import Grid4Transitions +from flatland.core.transition_map import GridTransitionMap +from flatland.envs.generators import rail_from_GridTransitionMap_generator +from flatland.envs.observations import TreeObsForRailEnv +from flatland.envs.predictions import ShortestPathPredictorForRailEnv +from flatland.envs.rail_env import RailEnv + + +def test_walker(): + # _ _ _ + + cells = [int('0000000000000000', 2), # empty cell - Case 0 + int('1000000000100000', 2), # Case 1 - straight + int('1001001000100000', 2), # Case 2 - simple switch + int('1000010000100001', 2), # Case 3 - diamond drossing + int('1001011000100001', 2), # Case 4 - single slip switch + int('1100110000110011', 2), # Case 5 - double slip switch + int('0101001000000010', 2), # Case 6 - symmetrical switch + int('0010000000000000', 2)] # Case 7 - dead end + transitions = Grid4Transitions([]) + dead_end_from_south = cells[7] + dead_end_from_west = transitions.rotate_transition(dead_end_from_south, 90) + dead_end_from_east = transitions.rotate_transition(dead_end_from_south, 270) + vertical_straight = cells[1] + horizontal_straight = transitions.rotate_transition(vertical_straight, 90) + + rail_map = np.array( + [[dead_end_from_east] + [horizontal_straight] + [dead_end_from_west]], dtype=np.uint16) + rail = GridTransitionMap(width=rail_map.shape[1], + height=rail_map.shape[0], transitions=transitions) + rail.grid = rail_map + env = RailEnv(width=rail_map.shape[1], + height=rail_map.shape[0], + rail_generator=rail_from_GridTransitionMap_generator(rail), + number_of_agents=1, + obs_builder_object=TreeObsForRailEnv(max_depth=2, + predictor=ShortestPathPredictorForRailEnv(max_depth=10)), + ) + # reset to initialize agents_static + env.reset() + + # set initial position and direction for testing... + env.agents_static[0].position = (0, 1) + env.agents_static[0].direction = 1 + env.agents_static[0].target = (0, 0) + + # reset to set agents from agents_static + env.reset(False, False) + obs_builder: TreeObsForRailEnv = env.obs_builder + + print(obs_builder.distance_map[(0, *[0, 1], 1)]) + assert obs_builder.distance_map[(0, *[0, 1], 1)] == 3 + print(obs_builder.distance_map[(0, *[0, 2], 3)]) + assert obs_builder.distance_map[(0, *[0, 2], 1)] == 2 # does not work yet, Erik's proposal.