#168 #163 multispeed and penalty testing

44b55f20 · u214892 · d2ac83fe · 44b55f20 · 44b55f20 · 44b55f20
Commit 44b55f20 authored 5 years ago by u214892
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -236,7 +236,8 @@ class RailEnv(Environment):
            Relies on the rail_generator returning agent_static lists (pos, dir, target)
        """

-        # TODO can we not put 'self.rail_generator(..)' into 'if regen_rail or self.rail is None' condition?
+        # TODO https://gitlab.aicrowd.com/flatland/flatland/issues/172
+        #  can we not put 'self.rail_generator(..)' into 'if regen_rail or self.rail is None' condition?
        rail, optionals = self.rail_generator(self.width, self.height, self.get_num_agents(), self.num_resets)

        if optionals and 'distance_map' in optionals:
@@ -256,6 +257,9 @@ class RailEnv(Environment):
            agents_hints = None
            if optionals and 'agents_hints' in optionals:
                agents_hints = optionals['agents_hints']
+
+            # TODO https://gitlab.aicrowd.com/flatland/flatland/issues/185
+            #  why do we need static agents? could we it more elegantly?
            self.agents_static = EnvAgentStatic.from_lists(
                *self.schedule_generator(self.rail, self.get_num_agents(), agents_hints))
        self.restart_agents()
@@ -356,6 +360,8 @@ class RailEnv(Environment):

        # Perform step on all agents
        for i_agent in range(self.get_num_agents()):
+            if self._elapsed_steps - 1 == 3 and i_agent == 0:
+                a = 5
            self._step_agent(i_agent, action_dict_.get(i_agent))

        # Check for end of episode + set global reward to all rewards!
@@ -407,13 +413,14 @@ class RailEnv(Environment):
        # is the agent malfunctioning?
        malfunction = self._agent_malfunction(i_agent)

-        # if agent is broken, actions are ignored and agent does not move,
-        # the agent is not penalized in this step!
+        # if agent is broken, actions are ignored and agent does not move.
+        # full step penalty in this case
        if malfunction:
            self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
            return

        # Is the agent at the beginning of the cell? Then, it can take an action.
+        # As long as the agent is malfunctioning or stopped at the beginning of the cell, different actions may be taken!
        if agent.speed_data['position_fraction'] == 0.0:
            # No action has been supplied for this agent -> set DO_NOTHING as default
            if action is None:
@@ -464,7 +471,6 @@ class RailEnv(Environment):
                if not _action_stored:
                    # If the agent cannot move due to an invalid transition, we set its state to not moving
                    self.rewards_dict[i_agent] += self.invalid_action_penalty
-                    self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
                    self.rewards_dict[i_agent] += self.stop_penalty
                    agent.moving = False

@@ -497,6 +503,9 @@ class RailEnv(Environment):
                agent.moving = False
            else:
                self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+        else:
+            # step penalty if not moving (stopped now or before)
+            self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']

    def _check_action_on_agent(self, action: RailEnvActions, agent: EnvAgent):
        """

--- a/tests/test_flatland_malfunction.py
+++ b/tests/test_flatland_malfunction.py
@@ -4,13 +4,11 @@ import numpy as np

 from flatland.core.grid.grid4 import Grid4TransitionsEnum
 from flatland.core.grid.grid4_utils import get_new_position
-from flatland.envs.agent_utils import EnvAgent
 from flatland.envs.observations import TreeObsForRailEnv
 from flatland.envs.rail_env import RailEnv, RailEnvActions
 from flatland.envs.rail_generators import complex_rail_generator, sparse_rail_generator
 from flatland.envs.schedule_generators import complex_schedule_generator, sparse_schedule_generator
-from flatland.utils.rendertools import RenderTool
-from test_utils import Replay
+from test_utils import Replay, ReplayConfig, run_replay_config


 class SingleAgentNavigationObs(TreeObsForRailEnv):
@@ -155,7 +153,7 @@ def test_malfunction_process_statistically():
    assert nb_malfunction == 156, "nb_malfunction={}".format(nb_malfunction)


-def test_initial_malfunction(rendering=True):
+def test_initial_malfunction():
    random.seed(0)
    np.random.seed(0)

@@ -190,74 +188,55 @@ def test_initial_malfunction(rendering=True):
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )

-    if rendering:
-        renderer = RenderTool(env)
-        renderer.render_env(show=True, frames=False, show_observations=False)
-    _action = dict()
-
-    replay_steps = [
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=3
-        ),
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=2
-        ),
-        # malfunction stops in the next step and we're still at the beginning of the cell
-        # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=1
-        ),
-        Replay(
-            position=(28, 4),
-            direction=Grid4TransitionsEnum.WEST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        ),
-        Replay(
-            position=(27, 4),
-            direction=Grid4TransitionsEnum.NORTH,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        )
-    ]
-
-    info_dict = {
-        'action_required': [True]
-    }
-
-    for i, replay in enumerate(replay_steps):
-
-        def _assert(actual, expected, msg):
-            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
-
-        agent: EnvAgent = env.agents[0]
-
-        _assert(agent.position, replay.position, 'position')
-        _assert(agent.direction, replay.direction, 'direction')
-        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
-
-        if replay.action is not None:
-            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
-            _, _, _, info_dict = env.step({0: replay.action})
-
-        else:
-            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
-            _, _, _, info_dict = env.step({})
-
-        if rendering:
-            renderer.render_env(show=True, show_observations=True)
-
-
-def test_initial_malfunction_stop_moving(rendering=True):
+    replay_config = ReplayConfig(
+        replay=[
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.MOVE_FORWARD,
+                set_malfunction=3,
+                malfunction=3,
+                reward=env.step_penalty  # full step penalty when malfunctioning
+            ),
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=2,
+                reward=env.step_penalty  # full step penalty when malfunctioning
+            ),
+            # malfunction stops in the next step and we're still at the beginning of the cell
+            # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=1,
+                reward=env.start_penalty + env.step_penalty * 1.0
+                # malfunctioning ends: starting and running at speed 1.0
+            ),
+            Replay(
+                position=(28, 4),
+                direction=Grid4TransitionsEnum.WEST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.step_penalty * 1.0  # running at speed 1.0
+            ),
+            Replay(
+                position=(27, 4),
+                direction=Grid4TransitionsEnum.NORTH,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.step_penalty * 1.0  # running at speed 1.0
+            )
+        ],
+        speed=env.agents[0].speed_data['speed'],
+        target=env.agents[0].target
+    )
+    run_replay_config(env, [replay_config])
+
+
+def test_initial_malfunction_stop_moving():
    random.seed(0)
    np.random.seed(0)

@@ -292,80 +271,62 @@ def test_initial_malfunction_stop_moving(rendering=True):
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )

-    if rendering:
-        renderer = RenderTool(env)
-        renderer.render_env(show=True, frames=False, show_observations=False)
-    _action = dict()
-
-    replay_steps = [
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=3
-        ),
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=2
-        ),
-        # malfunction stops in the next step and we're still at the beginning of the cell
-        # --> if we take action DO_NOTHING, agent should restart without moving
-        #
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.STOP_MOVING,
-            malfunction=1
-        ),
-        # we have stopped and do nothing --> should stand still
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=0
-        ),
-        # we start to move forward --> should go to next cell now
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        ),
-        Replay(
-            position=(28, 4),
-            direction=Grid4TransitionsEnum.WEST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        )
-    ]
-
-    info_dict = {
-        'action_required': [True]
-    }
-
-    for i, replay in enumerate(replay_steps):
-
-        def _assert(actual, expected, msg):
-            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
-
-        agent: EnvAgent = env.agents[0]
-
-        _assert(agent.position, replay.position, 'position')
-        _assert(agent.direction, replay.direction, 'direction')
-        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
-
-        if replay.action is not None:
-            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
-            _, _, _, info_dict = env.step({0: replay.action})
-
-        else:
-            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
-            _, _, _, info_dict = env.step({})
-
-        if rendering:
-            renderer.render_env(show=True, show_observations=True)
+    replay_config = ReplayConfig(
+        replay=[
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                set_malfunction=3,
+                malfunction=3,
+                reward=env.step_penalty  # full step penalty when stopped
+            ),
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                malfunction=2,
+                reward=env.step_penalty  # full step penalty when stopped
+            ),
+            # malfunction stops in the next step and we're still at the beginning of the cell
+            # --> if we take action STOP_MOVING, agent should restart without moving
+            #
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.STOP_MOVING,
+                malfunction=1,
+                reward=env.step_penalty  # full step penalty while stopped
+            ),
+            # we have stopped and do nothing --> should stand still
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                malfunction=0,
+                reward=env.step_penalty  # full step penalty while stopped
+            ),
+            # we start to move forward --> should go to next cell now
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.start_penalty + env.step_penalty * 1.0  # full step penalty while stopped
+            ),
+            Replay(
+                position=(28, 4),
+                direction=Grid4TransitionsEnum.WEST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.step_penalty * 1.0  # full step penalty while stopped
+            )
+        ],
+        speed=env.agents[0].speed_data['speed'],
+        target=env.agents[0].target
+    )
+
+    run_replay_config(env, [replay_config])


 def test_initial_malfunction_do_nothing(rendering=True):
@@ -403,77 +364,58 @@ def test_initial_malfunction_do_nothing(rendering=True):
                  stochastic_data=stochastic_data,  # Malfunction data generator
                  )

-    if rendering:
-        renderer = RenderTool(env)
-        renderer.render_env(show=True, frames=False, show_observations=False)
-    _action = dict()
-
-    replay_steps = [
-        Replay(
+    replay_config = ReplayConfig(
+        replay=[Replay(
            position=(28, 5),
            direction=Grid4TransitionsEnum.EAST,
            action=RailEnvActions.DO_NOTHING,
-            malfunction=3
+            set_malfunction=3,
+            malfunction=3,
+            reward=env.step_penalty  # full step penalty while malfunctioning
        ),
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=2
-        ),
-        # malfunction stops in the next step and we're still at the beginning of the cell
-        # --> if we take action DO_NOTHING, agent should restart without moving
-        #
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=1
-        ),
-        # we haven't started moving yet --> stay here
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.DO_NOTHING,
-            malfunction=0
-        ),
-        # we start to move forward --> should go to next cell now
-        Replay(
-            position=(28, 5),
-            direction=Grid4TransitionsEnum.EAST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        ),
-        Replay(
-            position=(28, 4),
-            direction=Grid4TransitionsEnum.WEST,
-            action=RailEnvActions.MOVE_FORWARD,
-            malfunction=0
-        )
-    ]
-
-    info_dict = {
-        'action_required': [True]
-    }
-
-    for i, replay in enumerate(replay_steps):
-
-        def _assert(actual, expected, msg):
-            assert actual == expected, "[{}] {}:  actual={}, expected={}".format(i, msg, actual, expected)
-
-        agent: EnvAgent = env.agents[0]
-
-        _assert(agent.position, replay.position, 'position')
-        _assert(agent.direction, replay.direction, 'direction')
-        _assert(agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
-
-        if replay.action is not None:
-            assert info_dict['action_required'][0] == True, "[{}] expecting action_required={}".format(i, True)
-            _, _, _, info_dict = env.step({0: replay.action})
-
-        else:
-            assert info_dict['action_required'][0] == False, "[{}] expecting action_required={}".format(i, False)
-            _, _, _, info_dict = env.step({})
-
-        if rendering:
-            renderer.render_env(show=True, show_observations=True)
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                malfunction=2,
+                reward=env.step_penalty  # full step penalty while malfunctioning
+            ),
+            # malfunction stops in the next step and we're still at the beginning of the cell
+            # --> if we take action DO_NOTHING, agent should restart without moving
+            #
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                malfunction=1,
+                reward=env.step_penalty  # full step penalty while stopped
+            ),
+            # we haven't started moving yet --> stay here
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.DO_NOTHING,
+                malfunction=0,
+                reward=env.step_penalty  # full step penalty while stopped
+            ),
+            # we start to move forward --> should go to next cell now
+            Replay(
+                position=(28, 5),
+                direction=Grid4TransitionsEnum.EAST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.start_penalty + env.step_penalty * 1.0  # start penalty + step penalty for speed 1.0
+            ),
+            Replay(
+                position=(28, 4),
+                direction=Grid4TransitionsEnum.WEST,
+                action=RailEnvActions.MOVE_FORWARD,
+                malfunction=0,
+                reward=env.step_penalty * 1.0  # step penalty for speed 1.0
+            )
+        ],
+        speed=env.agents[0].speed_data['speed'],
+        target=env.agents[0].target
+    )
+
+    run_replay_config(env, [replay_config])
--- a/tests/test_multi_speed.py
+++ b/tests/test_multi_speed.py
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,7 +4,9 @@ from typing import List, Tuple, Optional
 from attr import attrs, attrib

 from flatland.core.grid.grid4 import Grid4TransitionsEnum
-from flatland.envs.rail_env import RailEnvActions
+from flatland.envs.agent_utils import EnvAgent
+from flatland.envs.rail_env import RailEnvActions, RailEnv
+from flatland.utils.rendertools import RenderTool


 @attrs
@@ -13,7 +15,8 @@ class Replay(object):
    direction = attrib(type=Grid4TransitionsEnum)
    action = attrib(type=RailEnvActions)
    malfunction = attrib(default=0, type=int)
-    penalty = attrib(default=None, type=Optional[float])
+    set_malfunction = attrib(default=None, type=Optional[int])
+    reward = attrib(default=None, type=Optional[float])


 @attrs
@@ -21,3 +24,78 @@ class ReplayConfig(object):
    replay = attrib(type=List[Replay])
    target = attrib(type=Tuple[int, int])
    speed = attrib(type=float)
+
+
+def run_replay_config(env: RailEnv, test_configs: List[ReplayConfig], rendering: bool = False):
+    """
+    Runs the replay configs and checks assertions.
+
+    *Initially*
+    - the position, direction, target and speed of the initial step are taken to initialize the agents
+
+    *Before each step*
+    - action must only be provided if action_required from previous step (initally all True)
+    - position, direction before step are verified
+    - optionally, set_malfunction is applied
+    - malfunction is verified
+
+    *After each step*
+    - reward is verified after step
+
+    Parameters
+    ----------
+    env
+    test_configs
+    rendering
+    """
+    if rendering:
+        renderer = RenderTool(env)
+        renderer.render_env(show=True, frames=False, show_observations=False)
+    info_dict = {
+        'action_required': [True for _ in test_configs]
+    }
+    for step in range(len(test_configs[0].replay)):
+        if step == 0:
+            for a, test_config in enumerate(test_configs):
+                agent: EnvAgent = env.agents[a]
+                replay = test_config.replay[0]
+                # set the initial position
+                agent.position = replay.position
+                agent.direction = replay.direction
+                agent.target = test_config.target
+                agent.speed_data['speed'] = test_config.speed
+
+        def _assert(a, actual, expected, msg):
+            assert actual == expected, "[{}] agent {} {}:  actual={}, expected={}".format(step, a, msg, actual,
+                                                                                          expected)
+
+        action_dict = {}
+
+        for a, test_config in enumerate(test_configs):
+            agent: EnvAgent = env.agents[a]
+            replay = test_config.replay[step]
+
+            _assert(a, agent.position, replay.position, 'position')
+            _assert(a, agent.direction, replay.direction, 'direction')
+
+            if replay.action is not None:
+                assert info_dict['action_required'][a] == True, "[{}] agent {} expecting action_required={}".format(
+                    step, a, True)
+                action_dict[a] = replay.action
+            else:
+                assert info_dict['action_required'][a] == False, "[{}] agent {} expecting action_required={}".format(
+                    step, a, False)
+
+            if replay.set_malfunction is not None:
+                agent.malfunction_data['malfunction'] = replay.set_malfunction
+                agent.malfunction_data['moving_before_malfunction'] = agent.moving
+            _assert(a, agent.malfunction_data['malfunction'], replay.malfunction, 'malfunction')
+
+        _, rewards_dict, _, info_dict = env.step(action_dict)
+
+        for a, test_config in enumerate(test_configs):
+            replay = test_config.replay[step]
+            _assert(a, rewards_dict[a], replay.reward, 'reward')
+
+    if rendering:
+        renderer.render_env(show=True, show_observations=True)