From 3c86f24419c6c902a3f9659b8d79965e698b6245 Mon Sep 17 00:00:00 2001 From: spiglerg <spiglerg@gmail.com> Date: Wed, 3 Apr 2019 11:24:09 +0200 Subject: [PATCH] initial draft interface for env.py, derived from rllib.env.MultiAgentEnv --- flatland/core/env.py | 92 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/flatland/core/env.py b/flatland/core/env.py index e69de29..98e5778 100644 --- a/flatland/core/env.py +++ b/flatland/core/env.py @@ -0,0 +1,92 @@ +""" +The env module defines the base Environment class. +The base Environment class is adapted from rllib.env.MultiAgentEnv +(https://github.com/ray-project/ray). +""" + +# TODO: add derived classes and environments, e.g., GridEnvironment and +# RailEnvironment. + +class Environment: + """Base interface for multi-agent environments in Flatland. + + Agents are identified by agent ids (handles). + Examples: + >>> obs = env.reset() + >>> print(obs) + { + "train_0": [2.4, 1.6], + "train_1": [3.4, -3.2], + } + >>> obs, rewards, dones, infos = env.step( + action_dict={ + "train_0": 1, "train_1": 0}) + >>> print(rewards) + { + "train_0": 3, + "train_1": -1, + } + >>> print(dones) + { + "train_0": False, # train_0 is still running + "train_1": True, # train_1 is done + "__all__": False, # the env is not done + } + >>> print(infos) + { + "train_0": {}, # info for train_0 + "train_1": {}, # info for train_1 + } + """ + + def __init__(self): + pass + + def reset(self): + """ + Resets the env and returns observations from agents in the environment. + + Returns: + obs : dict + New observations for each agent. + """ + raise NotImplementedError() + + def step(self, action_dict): + """ + Performs an environment step with simultaneous execution of actions for + agents in action_dict. + Returns observations from agents in the environment. + The returns are dicts mapping from agent_id strings to values. + + Parameters + ------- + action_dict : dict + Dictionary of actions to execute, indexed by agent id. + + Returns + ------- + obs : dict + New observations for each ready agent. + rewards: dict + Reward values for each ready agent. + dones : dict + Done values for each ready agent. The special key "__all__" + (required) is used to indicate env termination. + infos : dict + Optional info values for each agent id. + """ + raise NotImplementedError() + + def render(self): + """ + Perform rendering of the environment. + """ + raise NotImplementedError() + + def get_agent_handles(self): + """ + Returns a list of agents' handles to be used as keys in the step() + function. + """ + raise NotImplementedError() -- GitLab