Commit 2c4e7be6 authored by u229589's avatar u229589
Browse files

implement review suggestions from Christian Eichenberger

parent 44229fbe
......@@ -12,7 +12,7 @@ Changes since Flatland 2.0.0
- by default the reset method of RailEnv is not called in the constructor of RailEnv anymore. Therefore the reset method needs to be called after the creation of a RailEnv object
### Changes in schedule generation
- return value of schedule generator has changed to the named tuple Schedule
- return value of schedule generator has changed to the named tuple `Schedule`
Changes since Flatland 1.0.0
--------------------------
......
......@@ -697,3 +697,17 @@ RailEnv.step()
self.get()
...
```
### Maximum number of allowed time steps in an episode
Whenever the schedule within RailEnv is generated, the maximum number of allowed time steps in an episode is calculated
according to the following formula:
```python
RailEnv._max_episode_steps = timedelay_factor * alpha * (env.width + env.height + ratio_nr_agents_to_nr_cities)
```
where the following default values are used `timedelay_factor=4`, `alpha=2` and `ratio_nr_agents_to_nr_cities=20`
......@@ -169,7 +169,7 @@ class RailEnv(Environment):
self.obs_builder = obs_builder_object
self.obs_builder.set_env(self)
self._max_episode_steps = None
self._max_episode_steps: Optional[int] = None
self._elapsed_steps = 0
self.dones = dict.fromkeys(list(range(number_of_agents)) + ["__all__"], False)
......@@ -249,6 +249,35 @@ class RailEnv(Environment):
"""
self.agents = EnvAgent.list_from_static(self.agents_static)
@staticmethod
def compute_max_episode_steps(width: int, height: int, timedelay_factor: int = 4, alpha: int = 2,
ratio_nr_agents_to_nr_cities: float = 20.0) -> int:
"""
compute_max_episode_steps(width, height, ratio_nr_agents_to_nr_cities, timedelay_factor, alpha)
The method computes the max number of episode steps allowed
Parameters
----------
width : int
width of environment
height : int
height of environment
ratio_nr_agents_to_nr_cities : float, optional
number_of_agents/number_of_cities
timedelay_factor : int, optional
timedelay_factor
alpha : int, optional
alpha
Returns
-------
max_episode_steps: int
maximum number of episode steps
"""
return int(timedelay_factor * alpha * (width + height + ratio_nr_agents_to_nr_cities))
def reset(self, regen_rail=True, replace_agents=True, activate_agents=False, random_seed=None):
""" if regen_rail then regenerate the rails.
if replace_agents then regenerate the agents static.
......@@ -282,7 +311,14 @@ class RailEnv(Environment):
# why do we need static agents? could we it more elegantly?
schedule = self.schedule_generator(self.rail, self.get_num_agents(), agents_hints, self.num_resets)
self.agents_static = EnvAgentStatic.from_lists(schedule)
self._max_episode_steps = schedule.max_episode_steps
if agents_hints and 'city_orientations' in agents_hints:
ratio_nr_agents_to_nr_cities = self.get_num_agents() / len(agents_hints['city_orientations'])
self._max_episode_steps = self.compute_max_episode_steps(
width=self.width, height=self.height,
ratio_nr_agents_to_nr_cities=ratio_nr_agents_to_nr_cities)
else:
self._max_episode_steps = self.compute_max_episode_steps(width=self.width, height=self.height)
self.restart_agents()
......
......@@ -42,29 +42,6 @@ def speed_initialization_helper(nb_agents: int, speed_ratio_map: Mapping[float,
return list(map(lambda index: speeds[index], np.random.choice(nb_classes, nb_agents, p=speed_ratios)))
def compute_max_episode_steps(width: int,
height: int,
ratio_nr_agents_to_nr_cities: float = 20.0,
timedelay_factor: int = 4,
alpha: int = 2) -> int:
"""
The method computes the max number of episode steps allowed
Parameters
----------
width: width of environment
height: height of environment
ratio_nr_agents_to_nr_cities: number_of_agents/number_of_cities (default is 20)
timedelay_factor
alpha
Returns max number of episode steps
-------
"""
return int(timedelay_factor * alpha * (width + height + ratio_nr_agents_to_nr_cities))
def complex_schedule_generator(speed_ratio_map: Mapping[float, float] = None, seed: int = 1) -> ScheduleGenerator:
def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None, num_resets: int = 0) -> Schedule:
......@@ -82,10 +59,8 @@ def complex_schedule_generator(speed_ratio_map: Mapping[float, float] = None, se
else:
speeds = [1.0] * len(agents_position)
max_episode_steps = compute_max_episode_steps(width=rail.width, height=rail.height)
return Schedule(agent_positions=agents_position, agent_directions=agents_direction,
agent_targets=agents_target, agent_speeds=speeds, agent_malfunction_rates=None,
max_episode_steps=max_episode_steps)
agent_targets=agents_target, agent_speeds=speeds, agent_malfunction_rates=None)
return generator
......@@ -151,11 +126,8 @@ def sparse_schedule_generator(speed_ratio_map: Mapping[float, float] = None, see
else:
speeds = [1.0] * len(agents_position)
max_episode_steps = compute_max_episode_steps(width=rail.width, height=rail.height,
ratio_nr_agents_to_nr_cities=num_agents/len(city_orientations))
return Schedule(agent_positions=agents_position, agent_directions=agents_direction,
agent_targets=agents_target, agent_speeds=speeds, agent_malfunction_rates=None,
max_episode_steps=max_episode_steps)
agent_targets=agents_target, agent_speeds=speeds, agent_malfunction_rates=None)
return generator
......@@ -182,8 +154,6 @@ def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] =
np.random.seed(_runtime_seed)
max_episode_steps = compute_max_episode_steps(width=rail.width, height=rail.height)
valid_positions = []
for r in range(rail.height):
for c in range(rail.width):
......@@ -191,14 +161,12 @@ def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] =
valid_positions.append((r, c))
if len(valid_positions) == 0:
return Schedule(agent_positions=[], agent_directions=[],
agent_targets=[], agent_speeds=[], agent_malfunction_rates=None,
max_episode_steps=max_episode_steps)
agent_targets=[], agent_speeds=[], agent_malfunction_rates=None)
if len(valid_positions) < num_agents:
warnings.warn("schedule_generators: len(valid_positions) < num_agents")
return Schedule(agent_positions=[], agent_directions=[],
agent_targets=[], agent_speeds=[], agent_malfunction_rates=None,
max_episode_steps=max_episode_steps)
agent_targets=[], agent_speeds=[], agent_malfunction_rates=None)
agents_position_idx = [i for i in np.random.choice(len(valid_positions), num_agents, replace=False)]
agents_position = [valid_positions[agents_position_idx[i]] for i in range(num_agents)]
......@@ -257,8 +225,7 @@ def random_schedule_generator(speed_ratio_map: Optional[Mapping[float, float]] =
agents_speed = speed_initialization_helper(num_agents, speed_ratio_map, seed=_runtime_seed)
return Schedule(agent_positions=agents_position, agent_directions=agents_direction,
agent_targets=agents_target, agent_speeds=agents_speed, agent_malfunction_rates=None,
max_episode_steps=max_episode_steps)
agent_targets=agents_target, agent_speeds=agents_speed, agent_malfunction_rates=None)
return generator
......@@ -303,10 +270,9 @@ def schedule_from_file(filename, load_from_package=None) -> ScheduleGenerator:
else:
agents_speed = None
agents_malfunction = None
max_episode_steps = compute_max_episode_steps(width=rail.width, height=rail.height)
return Schedule(agent_positions=agents_position, agent_directions=agents_direction,
agent_targets=agents_target, agent_speeds=agents_speed,
agent_malfunction_rates=agents_malfunction, max_episode_steps=max_episode_steps)
agent_malfunction_rates=agents_malfunction)
return generator
import collections
from typing import List, NamedTuple
Schedule = collections.namedtuple('Schedule', 'agent_positions '
'agent_directions '
'agent_targets '
'agent_speeds '
'agent_malfunction_rates '
'max_episode_steps')
from flatland.core.grid.grid4 import Grid4TransitionsEnum
from flatland.core.grid.grid_utils import IntVector2DArray
Schedule = NamedTuple('Schedule', [('agent_positions', IntVector2DArray),
('agent_directions', List[Grid4TransitionsEnum]),
('agent_targets', IntVector2DArray),
('agent_speeds', List[float]),
('agent_malfunction_rates', List[int])])
......@@ -151,21 +151,22 @@ def test_malfunction_process_statistically():
stochastic_data=stochastic_data, # Malfunction data generator
obs_builder_object=SingleAgentNavigationObs()
)
env.reset()
# reset to initialize agents_static
env.reset(True, True, False, random_seed=10)
env.agents[0].target = (0, 0)
agent_malfunction_list = [[0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1],
agent_malfunction_list = [[0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 6, 5],
[0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0],
[6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4],
[0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 6, 5, 4],
[0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
[6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0],
[6, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3],
[0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5],
[0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 6, 5, 4, 3, 2, 1],
[6, 6, 6, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0]]
for step in range(20):
action_dict: Dict[int, RailEnvActions] = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment