action_space and observation_space, issue #46

8a8a7853 · spiglerg · fa54bee7 · 8a8a7853 · 8a8a7853 · 8a8a7853
Commit 8a8a7853 authored 5 years ago by spiglerg
--- a/flatland/core/env.py
+++ b/flatland/core/env.py
@@ -9,6 +9,10 @@ class Environment:
    """
    Base interface for multi-agent environments in Flatland.
+    Derived environments should implement the following attributes:
+        action_space: tuple with the dimensions of the actions to be passed to the step method
+        observation_space: tuple with the dimensions of the observations returned by reset and step
    Agents are identified by agent ids (handles).
    Examples:
        >>> obs = env.reset()
@@ -39,6 +43,8 @@ class Environment:
    """
    def __init__(self):
+        self.action_space = ()
+        self.observation_space = ()
        pass
    def reset(self):

--- a/flatland/core/env_observation_builder.py
+++ b/flatland/core/env_observation_builder.py
@@ -12,9 +12,13 @@ case of multi-agent environments.
 class ObservationBuilder:
    """
    ObservationBuilder base class.
+    Derived objects must implement and `observation_space' attribute as a tuple with the dimensuions of the returned
+    observations.
    """
    def __init__(self):
+        self.observation_space = ()
        pass
    def _set_env(self, env):

--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -19,6 +19,14 @@ class TreeObsForRailEnv(ObservationBuilder):
    def __init__(self, max_depth):
        self.max_depth = max_depth
+        # Compute the size of the returned observation vector
+        size = 0
+        pow4 = 1
+        for i in range(self.max_depth+1):
+            size += pow4
+            pow4 *= 4
+        self.observation_space = [size * 5]
    def reset(self):
        agents = self.env.agents
        nAgents = len(agents)
@@ -158,10 +166,6 @@ class TreeObsForRailEnv(ObservationBuilder):
        the transitions. The order is:
            [data from 'left'] + [data from 'forward'] + [data from 'right'] + [data from 'back']
        Each branch data is organized as:
            [root node information] +
            [recursive branch data from 'left'] +

--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -112,6 +112,10 @@ class RailEnv(Environment):
        self.valid_positions = None
+        self.action_space = [1]
+        self.observation_space = self.obs_builder.observation_space # updated on resets?
    # no more agent_handles
    def get_agent_handles(self):
        return range(self.get_num_agents())
@@ -160,6 +164,7 @@ class RailEnv(Environment):
        # Reset the state of the observation builder with the new environment
        self.obs_builder.reset()
+        self.observation_space = self.obs_builder.observation_space # <-- change on reset?
        # Return the new observation vectors for each agent
        return self._get_observations()