From 3c86f24419c6c902a3f9659b8d79965e698b6245 Mon Sep 17 00:00:00 2001
From: spiglerg <spiglerg@gmail.com>
Date: Wed, 3 Apr 2019 11:24:09 +0200
Subject: [PATCH] initial draft interface for env.py, derived from
 rllib.env.MultiAgentEnv

---
 flatland/core/env.py | 92 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/flatland/core/env.py b/flatland/core/env.py
index e69de29..98e5778 100644
--- a/flatland/core/env.py
+++ b/flatland/core/env.py
@@ -0,0 +1,92 @@
+"""
+The env module defines the base Environment class.
+The base Environment class is adapted from rllib.env.MultiAgentEnv
+(https://github.com/ray-project/ray).
+"""
+
+# TODO: add derived classes and environments, e.g., GridEnvironment and
+# RailEnvironment.
+
+class Environment:
+    """Base interface for multi-agent environments in Flatland.
+
+    Agents are identified by agent ids (handles).
+    Examples:
+        >>> obs = env.reset()
+        >>> print(obs)
+        {
+            "train_0": [2.4, 1.6],
+            "train_1": [3.4, -3.2],
+        }
+        >>> obs, rewards, dones, infos = env.step(
+            action_dict={
+                "train_0": 1, "train_1": 0})
+        >>> print(rewards)
+        {
+            "train_0": 3,
+            "train_1": -1,
+        }
+        >>> print(dones)
+        {
+            "train_0": False,    # train_0 is still running
+            "train_1": True,     # train_1 is done
+            "__all__": False,    # the env is not done
+        }
+        >>> print(infos)
+        {
+            "train_0": {},  # info for train_0
+            "train_1": {},  # info for train_1
+        }
+    """
+
+    def __init__(self):
+        pass
+
+    def reset(self):
+        """
+        Resets the env and returns observations from agents in the environment.
+
+        Returns:
+        obs : dict
+            New observations for each agent.
+        """
+        raise NotImplementedError()
+
+    def step(self, action_dict):
+        """
+        Performs an environment step with simultaneous execution of actions for
+        agents in action_dict.
+        Returns observations from agents in the environment.
+        The returns are dicts mapping from agent_id strings to values.
+
+        Parameters
+        -------
+        action_dict : dict
+            Dictionary of actions to execute, indexed by agent id.
+
+        Returns
+        -------
+        obs : dict
+            New observations for each ready agent.
+        rewards: dict
+            Reward values for each ready agent.
+        dones : dict
+            Done values for each ready agent. The special key "__all__"
+            (required) is used to indicate env termination.
+        infos : dict
+            Optional info values for each agent id.
+        """
+        raise NotImplementedError()
+
+    def render(self):
+        """
+        Perform rendering of the environment.
+        """
+        raise NotImplementedError()
+
+    def get_agent_handles(self):
+        """
+        Returns a list of agents' handles to be used as keys in the step()
+        function.
+        """
+        raise NotImplementedError()
-- 
GitLab