diff --git a/examples/simple_example_3.py b/examples/simple_example_3.py index 1661ef65a9a33f3b44a098caaf83317919722398..e015b3c88cf05a8d047f15dfaf88e8a2fd9ce789 100644 --- a/examples/simple_example_3.py +++ b/examples/simple_example_3.py @@ -2,7 +2,7 @@ import random import numpy as np -from flatland.envs.generators import random_rail_generator +from flatland.envs.generators import random_rail_generator, complex_rail_generator from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.rail_env import RailEnv from flatland.utils.rendertools import RenderTool diff --git a/flatland/envs/agent_utils.py b/flatland/envs/agent_utils.py index 8e9ffb99d06176416dfbe2b65bcca09723b1a56c..aa46aecd4b69b6a13b11b63223123b16dd69e3ac 100644 --- a/flatland/envs/agent_utils.py +++ b/flatland/envs/agent_utils.py @@ -28,19 +28,34 @@ class EnvAgentStatic(object): position = attrib() direction = attrib() target = attrib() - moving = attrib() - - def __init__(self, position, direction, target, moving=False): + moving = attrib(default=False) + # speed_data: speed is added to position_fraction on each moving step, until position_fraction>=1.0, + # after which 'transition_action_on_cellexit' is executed (equivalent to executing that action in the previous + # cell if speed=1, as default) + speed_data = attrib(default=dict({'position_fraction': 0.0, 'speed': 1.0, 'transition_action_on_cellexit': 0})) + + def __init__(self, + position, + direction, + target, + moving=False, + speed_data={'position_fraction': 0.0, 'speed': 1.0, 'transition_action_on_cellexit': 0}): self.position = position self.direction = direction self.target = target self.moving = moving + self.speed_data = speed_data @classmethod - def from_lists(cls, positions, directions, targets): + def from_lists(cls, positions, directions, targets, speeds=None): """ Create a list of EnvAgentStatics from lists of positions, directions and targets """ - return list(starmap(EnvAgentStatic, zip(positions, directions, targets, [False] * len(positions)))) + speed_datas = [] + for i in range(len(positions)): + speed_datas.append({'position_fraction': 0.0, + 'speed': speeds[i] if speeds is not None else 1.0, + 'transition_action_on_cellexit': 0}) + return list(starmap(EnvAgentStatic, zip(positions, directions, targets, [False] * len(positions), speed_datas))) def to_list(self): @@ -54,7 +69,7 @@ class EnvAgentStatic(object): if type(lTarget) is np.ndarray: lTarget = lTarget.tolist() - return [lPos, int(self.direction), lTarget, int(self.moving)] + return [lPos, int(self.direction), lTarget, int(self.moving), self.speed_data] @attrs @@ -78,7 +93,7 @@ class EnvAgent(EnvAgentStatic): def to_list(self): return [ self.position, self.direction, self.target, self.handle, - self.old_direction, self.old_position, self.moving] + self.old_direction, self.old_position, self.moving, self.speed_data] @classmethod def from_static(cls, oStatic): diff --git a/flatland/envs/generators.py b/flatland/envs/generators.py index f644bc120d4b514f1c54e0330cfc8dc4654a4f4e..ca14667424d2c93d1466e3b7e96c2e5c1fbd41e5 100644 --- a/flatland/envs/generators.py +++ b/flatland/envs/generators.py @@ -18,7 +18,7 @@ def empty_rail_generator(): rail_array = grid_map.grid rail_array.fill(0) - return grid_map, [], [], [] + return grid_map, [], [], [], [] return generator @@ -75,8 +75,9 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist= while nr_created < nr_start_goal and created_sanity < sanity_max: all_ok = False for _ in range(sanity_max): - start = (np.random.randint(0, width), np.random.randint(0, height)) - goal = (np.random.randint(0, height), np.random.randint(0, height)) + start = (np.random.randint(0, height), np.random.randint(0, width)) + goal = (np.random.randint(0, height), np.random.randint(0, width)) + # check to make sure start,goal pos is empty? if rail_array[goal] != 0 or rail_array[start] != 0: continue @@ -121,8 +122,8 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist= while nr_created < nr_extra and created_sanity < sanity_max: all_ok = False for _ in range(sanity_max): - start = (np.random.randint(0, width), np.random.randint(0, height)) - goal = (np.random.randint(0, height), np.random.randint(0, height)) + start = (np.random.randint(0, height), np.random.randint(0, width)) + goal = (np.random.randint(0, height), np.random.randint(0, width)) # check to make sure start,goal pos are not empty if rail_array[goal] == 0 or rail_array[start] == 0: continue @@ -139,7 +140,7 @@ def complex_rail_generator(nr_start_goal=1, nr_extra=100, min_dist=20, max_dist= agents_target = [sg[1] for sg in start_goal[:num_agents]] agents_direction = start_dir[:num_agents] - return grid_map, agents_position, agents_direction, agents_target + return grid_map, agents_position, agents_direction, agents_target, [1.0]*len(agents_position) return generator @@ -183,7 +184,7 @@ def rail_from_manual_specifications_generator(rail_spec): rail, num_agents) - return rail, agents_position, agents_direction, agents_target + return rail, agents_position, agents_direction, agents_target, [1.0]*len(agents_position) return generator @@ -209,7 +210,7 @@ def rail_from_GridTransitionMap_generator(rail_map): rail_map, num_agents) - return rail_map, agents_position, agents_direction, agents_target + return rail_map, agents_position, agents_direction, agents_target, [1.0]*len(agents_position) return generator @@ -482,6 +483,6 @@ def random_rail_generator(cell_type_relative_proportion=[1.0] * 11): return_rail, num_agents) - return return_rail, agents_position, agents_direction, agents_target + return return_rail, agents_position, agents_direction, agents_target, [1.0]*len(agents_position) return generator diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index c22e1c5120b54a170f9c59bb54c7666ca910f086..8cf6d52f383ec8f4e271eb0765d32bc0c763307a 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -73,7 +73,7 @@ class RailEnv(Environment): random_rail_generator : generate a random rail of given size rail_from_GridTransitionMap_generator(rail_map) : generate a rail from a GridTransitionMap object - rail_from_manual_specifications_generator(rail_spec) : generate a rail from + rail_from_manual_sp ecifications_generator(rail_spec) : generate a rail from a rail specifications array TODO: generate_rail_from_saved_list or from list of ndarray bitmaps --- width : int @@ -101,7 +101,6 @@ class RailEnv(Environment): self.action_space = [1] self.observation_space = self.obs_builder.observation_space # updated on resets? - self.actions = [0] * number_of_agents self.rewards = [0] * number_of_agents self.done = False @@ -152,7 +151,7 @@ class RailEnv(Environment): self.rail = tRailAgents[0] if replace_agents: - self.agents_static = EnvAgentStatic.from_lists(*tRailAgents[1:4]) + self.agents_static = EnvAgentStatic.from_lists(*tRailAgents[1:5]) self.restart_agents() @@ -193,28 +192,26 @@ class RailEnv(Environment): for iAgent in range(self.get_num_agents()): agent = self.agents[iAgent] - if iAgent not in action_dict: # no action has been supplied for this agent - if agent.moving: - # Keep moving - # Change MOVE_FORWARD to DO_NOTHING - action_dict[iAgent] = RailEnvActions.DO_NOTHING - else: - action_dict[iAgent] = RailEnvActions.DO_NOTHING - if self.dones[iAgent]: # this agent has already completed... continue - action = action_dict[iAgent] - if action < 0 or action > len(RailEnvActions): - print('ERROR: illegal action=', action, - 'for agent with index=', iAgent) - return + if iAgent not in action_dict: # no action has been supplied for this agent + action_dict[iAgent] = RailEnvActions.DO_NOTHING + + if action_dict[iAgent] < 0 or action_dict[iAgent] > len(RailEnvActions): + print('ERROR: illegal action=', action_dict[iAgent], + 'for agent with index=', iAgent, + '"DO NOTHING" will be executed instead') + action_dict[iAgent] = RailEnvActions.DO_NOTHING + + action = action_dict[iAgent] if action == RailEnvActions.DO_NOTHING and agent.moving: # Keep moving action = RailEnvActions.MOVE_FORWARD - if action == RailEnvActions.STOP_MOVING and agent.moving: + if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] < 0.01: + # Only allow halting an agent on entering new cells. agent.moving = False self.rewards_dict[iAgent] += stop_penalty @@ -223,47 +220,78 @@ class RailEnv(Environment): agent.moving = True self.rewards_dict[iAgent] += start_penalty - if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING: - cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ - self._check_action_on_agent(action, agent) - if all([new_cell_isValid, transition_isValid, cell_isFree]): - agent.old_direction = agent.direction - agent.old_position = agent.position - agent.position = new_position - agent.direction = new_direction - else: - # Logic: if the chosen action is invalid, - # and it was LEFT or RIGHT, and the agent was moving, then keep moving FORWARD. - if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT) and agent.moving: - cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ - self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent) - - if all([new_cell_isValid, transition_isValid, cell_isFree]): - agent.old_direction = agent.direction - agent.old_position = agent.position - agent.position = new_position - agent.direction = new_direction + # Now perform a movement. + # If the agent is in an initial position within a new cell (agent.speed_data['position_fraction']<eps) + # store the desired action in `transition_action_on_cellexit' (only if the desired transition is + # allowed! otherwise DO_NOTHING!) + # Then in any case (if agent.moving) and the `transition_action_on_cellexit' is valid, increment the + # position_fraction by the speed of the agent (regardless of action taken, as long as no + # STOP_MOVING, but that makes agent.moving=False) + # If the new position fraction is >= 1, reset to 0, and perform the stored + # transition_action_on_cellexit + + # If the agent can make an action + action_selected = False + if agent.speed_data['position_fraction'] < 0.01: + if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING: + cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + self._check_action_on_agent(action, agent) + + if all([new_cell_isValid, transition_isValid, cell_isFree]): + agent.speed_data['transition_action_on_cellexit'] = action + action_selected = True + + else: + # But, if the chosen invalid action was LEFT/RIGHT, and the agent is moving, + # try to keep moving forward! + if (action == RailEnvActions.MOVE_LEFT or action == RailEnvActions.MOVE_RIGHT) and agent.moving: + cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + self._check_action_on_agent(RailEnvActions.MOVE_FORWARD, agent) + + if all([new_cell_isValid, transition_isValid, cell_isFree]): + agent.speed_data['transition_action_on_cellexit'] = RailEnvActions.MOVE_FORWARD + action_selected = True + + else: + # TODO: an invalid action was chosen after entering the cell. The agent cannot move. + self.rewards_dict[iAgent] += invalid_action_penalty + agent.moving = False + self.rewards_dict[iAgent] += stop_penalty + continue else: - # the action was not valid, add penalty + # TODO: an invalid action was chosen after entering the cell. The agent cannot move. self.rewards_dict[iAgent] += invalid_action_penalty + agent.moving = False + self.rewards_dict[iAgent] += stop_penalty + continue - else: - # the action was not valid, add penalty - self.rewards_dict[iAgent] += invalid_action_penalty + if agent.moving and (action_selected or agent.speed_data['position_fraction'] >= 0.01): + agent.speed_data['position_fraction'] += agent.speed_data['speed'] + + if agent.speed_data['position_fraction'] >= 1.0: + agent.speed_data['position_fraction'] = 0.0 + + # Perform stored action to transition to the next cell + + # Now 'transition_action_on_cellexit' will be guaranteed to be valid; it was checked on entering + # the cell + cell_isFree, new_cell_isValid, new_direction, new_position, transition_isValid = \ + self._check_action_on_agent(agent.speed_data['transition_action_on_cellexit'], agent) + agent.old_direction = agent.direction + agent.old_position = agent.position + agent.position = new_position + agent.direction = new_direction if np.equal(agent.position, agent.target).all(): self.dones[iAgent] = True else: - self.rewards_dict[iAgent] += step_penalty + self.rewards_dict[iAgent] += step_penalty * agent.speed_data['speed'] # Check for end of episode + add global reward to all rewards! if np.all([np.array_equal(agent2.position, agent2.target) for agent2 in self.agents]): self.dones["__all__"] = True self.rewards_dict = [0 * r + global_reward for r in self.rewards_dict] - # Reset the step actions (in case some agent doesn't 'register_action' - # on the next step) - self.actions = [0] * self.get_num_agents() return self._get_observations(), self.rewards_dict, self.dones, {} def _check_action_on_agent(self, action, agent): @@ -271,6 +299,7 @@ class RailEnv(Environment): # cell used to check for invalid actions new_direction, transition_isValid = self.check_action(agent, action) new_position = get_new_position(agent.position, new_direction) + # Is it a legal move? # 1) transition allows the new_direction in the cell, # 2) the new cell is not empty (case 0), @@ -281,11 +310,13 @@ class RailEnv(Environment): np.clip(new_position, [0, 0], [self.height - 1, self.width - 1])) and # check the new position has some transitions (ie is not an empty cell) self.rail.get_transitions(new_position) > 0) + # If transition validity hasn't been checked yet. if transition_isValid is None: transition_isValid = self.rail.get_transition( (*agent.position, agent.direction), new_direction) + # Check the new position is not the same as any of the existing agent positions # (including itself, for simplicity, since it is moving) cell_isFree = not np.any( diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_environments.py b/tests/test_environments.py index 11f0acba2fd54df63c62047f8559897e7d222e72..aa24467dd1d548a2b68a408f300089ee8135c639 100644 --- a/tests/test_environments.py +++ b/tests/test_environments.py @@ -3,7 +3,7 @@ import numpy as np from flatland.core.transition_map import GridTransitionMap -from flatland.core.transitions import Grid4Transitions +from flatland.core.transitions import Grid4Transitions, RailEnvTransitions from flatland.envs.agent_utils import EnvAgent from flatland.envs.generators import complex_rail_generator from flatland.envs.generators import rail_from_GridTransitionMap_generator @@ -53,7 +53,7 @@ def test_rail_environment_single_agent(): # | | | # \_/\_/ - transitions = Grid4Transitions([]) + transitions = RailEnvTransitions() vertical_line = cells[1] south_symmetrical_switch = cells[6] north_symmetrical_switch = transitions.rotate_transition(south_symmetrical_switch, 180) @@ -107,6 +107,7 @@ def test_rail_environment_single_agent(): if prev_pos != pos: valid_active_actions_done += 1 + # After 6 movements on this railway network, the train should be back # to its original height on the map. assert (initial_pos[0] == agent.position[0]) @@ -121,9 +122,9 @@ def test_rail_environment_single_agent(): action = np.random.randint(4) _, _, dones, _ = rail_env.step({0: action}) - done = dones['__all__'] +test_rail_environment_single_agent() def test_dead_end(): transitions = Grid4Transitions([])