Compare revisions

a6c4ae6a · 48d8f126 · 48d8f126 · 48d8f126 · 48d8f126 · 48d8f126
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.abspath('..'))

 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'numpydoc']

 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -41,8 +41,12 @@ templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+# https://www.sphinx-doc.org/en/master/usage/markdown.html
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.txt': 'markdown',
+    '.md': 'markdown',
+}

 # The master toctree document.
 master_doc = 'index'
@@ -76,10 +80,10 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'

-# If true, `todo` and `todoList` produce output, else they produce nothing.
+# If true, `todo` and `todoList` produce output_generator, else they produce nothing.
 todo_include_todos = False

-# -- Options for HTML output -------------------------------------------
+# -- Options for HTML output_generator -------------------------------------------

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
@@ -97,12 +101,12 @@ html_theme = "sphinx_rtd_theme"
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']

-# -- Options for HTMLHelp output ---------------------------------------
+# -- Options for HTMLHelp output_generator ---------------------------------------

 # Output file base name for HTML help builder.
 htmlhelp_basename = 'flatlanddoc'

-# -- Options for LaTeX output ------------------------------------------
+# -- Options for LaTeX output_generator ------------------------------------------

 latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
@@ -131,7 +135,7 @@ latex_documents = [
     u'S.P. Mohanty', 'manual'),
 ]

-# -- Options for manual page output ------------------------------------
+# -- Options for manual page output_generator ------------------------------------

 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
@@ -141,7 +145,7 @@ man_pages = [
     [author], 1)
 ]

-# -- Options for Texinfo output ----------------------------------------
+# -- Options for Texinfo output_generator ----------------------------------------

 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
@@ -154,3 +158,6 @@ texinfo_documents = [
     'One line description of project.',
     'Miscellaneous'),
 ]
+
+# https://stackoverflow.com/questions/12206334/sphinx-autosummary-toctree-contains-reference-to-nonexisting-document-warnings
+numpydoc_show_class_members = False
--- a/docs/flatland.baselines.rst
+++ b/docs/flatland.baselines.rst
-flatland.baselines package
-==========================
-
-Submodules
----------
-
-flatland.baselines.dueling\_double\_dqn module
----------------------------------------------
-
-.. automodule:: flatland.baselines.dueling_double_dqn
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-flatland.baselines.model module
-------------------------------
-
-.. automodule:: flatland.baselines.model
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
---------------
-
-.. automodule:: flatland.baselines
-    :members:
-    :undoc-members:
-    :show-inheritance:
--- a/docs/flatland.core.rst
+++ b/docs/flatland.core.rst
-flatland.core package
-=====================
-
-Submodules
----------
-
-flatland.core.env module
------------------------
-
-.. automodule:: flatland.core.env
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-flatland.core.transitions module
--------------------------------
-
-.. automodule:: flatland.core.transitions
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
---------------
-
-.. automodule:: flatland.core
-    :members:
-    :undoc-members:
-    :show-inheritance:
--- a/docs/flatland.envs.rst
+++ b/docs/flatland.envs.rst
-flatland.envs package
-=====================
-
-Submodules
----------
-
-flatland.envs.rail\_env module
------------------------------
-
-.. automodule:: flatland.envs.rail_env
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
---------------
-
-.. automodule:: flatland.envs
-    :members:
-    :undoc-members:
-    :show-inheritance:
--- a/docs/flatland.rst
+++ b/docs/flatland.rst
-flatland package
-================
-
-Subpackages
-----------
-
-.. toctree::
-
-    flatland.core
-    flatland.envs
-    flatland.utils
-
-Submodules
----------
-
-flatland.cli module
-------------------
-
-.. automodule:: flatland.cli
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-flatland.flatland module
------------------------
-
-.. automodule:: flatland.flatland
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-Module contents
---------------
-
-.. automodule:: flatland
-    :members:
-    :undoc-members:
-    :show-inheritance:
--- a/docs/flatland.utils.rst
+++ b/docs/flatland.utils.rst
-flatland.utils package
-======================
-
-Module contents
---------------
-
-.. automodule:: flatland.utils
-    :members:
-    :undoc-members:
-    :show-inheritance:
--- a/docs/gifs/complex.gif
+++ b/docs/gifs/complex.gif
--- a/docs/gifs/many_agents.gif
+++ b/docs/gifs/many_agents.gif
--- a/docs/history.rst
+++ b/docs/history.rst
-.. include:: ../HISTORY.rst
--- a/docs/index.rst
+++ b/docs/index.rst
 Welcome to flatland's documentation!
 ======================================

+.. include:: ../README.rst
+
 .. toctree::
   :maxdepth: 2
   :caption: Contents:

-   readme
-   installation
-   gettingstarted
-   modules
-   FAQ
-   contributing
-   authors
-   history
+   01_readme
+   03_tutorials_toc
+   04_specifications_toc
+   05_apidoc
+   06_contributing
+   07_changes
+   08_authors
+   09_faq_toc
+   10_interface

 Indices and tables
 ==================

--- a/docs/installation.rst
+++ b/docs/installation.rst
-.. highlight:: shell
-
-============
-Installation
-============
-
-
-Stable release
--------------
-
-To install flatland, run this command in your terminal:
-
-.. code-block:: console
-
-    $ pip install flatland-rl
-
-This is the preferred method to install flatland, as it will always install the most recent stable release.
-
-If you don't have `pip`_ installed, this `Python installation guide`_ can guide
-you through the process.
-
-.. _pip: https://pip.pypa.io
-.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
-
-
-From sources
------------
-
-The sources for flatland can be downloaded from the `Gitlab repo`_.
-
-You can clone the public repository:
-
-.. code-block:: console
-
-    $ git clone git@gitlab.aicrowd.com:flatland/flatland.git
-
-Once you have a copy of the source, you can install it with:
-
-.. code-block:: console
-
-    $ python setup.py install
-
-
-.. _Gitlab repo: https://gitlab.aicrowd.com/flatland/flatland
--- a/docs/interface/pettingzoo.md
+++ b/docs/interface/pettingzoo.md
+# PettingZoo
+
+> PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland.
+
+## Background
+
+PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland
+
+- Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit.
+- Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations
+- Scikit-learn inspired api
+  e.g.
+
+```python
+act = model.predict(obs, deterministic=True)[0] 
+```
+
+- Parallel learning using literally 2 lines of code to use with stable baselines 3
+
+```python
+env = ss.pettingzoo_env_to_vec_env_v0(env)
+env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=’stable_baselines3’)
+```
+
+- Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent
+- Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want
--- a/docs/interface/pettingzoo.rst
+++ b/docs/interface/pettingzoo.rst
+
+PettingZoo
+==========
+
+..
+
+   PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland.
+
+
+Background
+----------
+
+PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland
+
+
+* Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit.
+* Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations
+* Scikit-learn inspired api
+  e.g.
+
+.. code-block:: python
+
+   act = model.predict(obs, deterministic=True)[0]
+
+
+* Parallel learning using literally 2 lines of code to use with stable baselines 3
+
+.. code-block:: python
+
+   env = ss.pettingzoo_env_to_vec_env_v0(env)
+   env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=’stable_baselines3’)
+
+
+* Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent
+* Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want
--- a/docs/interface/wrappers.md
+++ b/docs/interface/wrappers.md
+# Environment Wrappers
+
+> We provide various environment wrappers to work with both the rail env and the petting zoo interface.
+
+## Background
+
+These wrappers changes certain environment behavior which can help to get better reinforcement learning training.
+
+## Supported Inbuilt Wrappers
+
+We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper.
+
+### ShortestPathAction Wrapper
+
+To use the ShortestPathAction Wrapper, simply wrap the rail env as follows
+
+```python
+rail_env.reset(random_seed=1)
+rail_env = ShortestPathActionWrapper(rail_env)
+```
+
+The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (`0`), Next Shortest Path (`1`) and Stop (`2`).  Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass `0` as the action.
+
+### SkipNoChoice Wrapper
+
+To use the SkipNoChoiceWrapper, simply wrap the rail env as follows
+
+```python
+rail_env.reset(random_seed=1)
+rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0)
+```
--- a/docs/interface/wrappers.rst
+++ b/docs/interface/wrappers.rst
+
+Environment Wrappers
+====================
+
+..
+
+   We provide various environment wrappers to work with both the rail env and the petting zoo interface.
+
+
+Background
+----------
+
+These wrappers changes certain environment behavior which can help to get better reinforcement learning training.
+
+Supported Inbuilt Wrappers
+--------------------------
+
+We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper.
+
+ShortestPathAction Wrapper
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To use the ShortestPathAction Wrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = ShortestPathActionWrapper(rail_env)
+
+The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (\ ``0``\ ), Next Shortest Path (\ ``1``\ ) and Stop (\ ``2``\ ).  Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass ``0`` as the action.
+
+SkipNoChoice Wrapper
+^^^^^^^^^^^^^^^^^^^^
+
+To use the SkipNoChoiceWrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0)
--- a/docs/make.bat
+++ b/docs/make.bat
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=python -msphinx
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-set SPHINXPROJ=flatland
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
-	echo.then set the SPHINXBUILD environment variable to point to the full
-	echo.path of the 'sphinx-build' executable. Alternatively you may add the
-	echo.Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
--- a/docs/modules.rst
+++ b/docs/modules.rst
-flatland
-========
-
-.. toctree::
-   :maxdepth: 4
-
-   flatland
--- a/docs/specifications/core.md
+++ b/docs/specifications/core.md
+## Core Specifications
+
+### Environment Class Overview
+
+The Environment class contains all necessary functions for the interactions between the agents and the environment. The base Environment class is derived from rllib.env.MultiAgentEnv (https://github.com/ray-project/ray).
+
+The functions are specific for each realization of Flatland (e.g. Railway, Vaccination,...)
+In particular, we retain the rllib interface in the use of the step() function, that accepts a dictionary of actions indexed by the agents handles (returned by get_agent_handles()) and returns dictionaries of observations, dones and infos.
+
+```python
+class Environment:
+    """Base interface for multi-agent environments in Flatland.
+
+    Agents are identified by agent ids (handles).
+    Examples:
+        >>> obs, info = env.reset()
+        >>> print(obs)
+        {
+            "train_0": [2.4, 1.6],
+            "train_1": [3.4, -3.2],
+        }
+        >>> obs, rewards, dones, infos = env.step(
+            action_dict={
+                "train_0": 1, "train_1": 0})
+        >>> print(rewards)
+        {
+            "train_0": 3,
+            "train_1": -1,
+        }
+        >>> print(dones)
+        {
+            "train_0": False,    # train_0 is still running
+            "train_1": True,     # train_1 is done
+            "__all__": False,    # the env is not done
+        }
+        >>> print(infos)
+        {
+            "train_0": {},  # info for train_0
+            "train_1": {},  # info for train_1
+        }
+    """
+
+    def __init__(self):
+        pass
+
+    def reset(self):
+        """
+        Resets the env and returns observations from agents in the environment.
+
+        Returns:
+        obs : dict
+            New observations for each agent.
+        """
+        raise NotImplementedError()
+
+    def step(self, action_dict):
+        """
+        Performs an environment step with simultaneous execution of actions for
+        agents in action_dict.
+        Returns observations from agents in the environment.
+        The returns are dicts mapping from agent_id strings to values.
+
+        Parameters
+        -------
+        action_dict : dict
+            Dictionary of actions to execute, indexed by agent id.
+
+        Returns
+        -------
+        obs : dict
+            New observations for each ready agent.
+        rewards: dict
+            Reward values for each ready agent.
+        dones : dict
+            Done values for each ready agent. The special key "__all__"
+            (required) is used to indicate env termination.
+        infos : dict
+            Optional info values for each agent id.
+        """
+        raise NotImplementedError()
+
+    def render(self):
+        """
+        Perform rendering of the environment.
+        """
+        raise NotImplementedError()
+
+    def get_agent_handles(self):
+        """
+        Returns a list of agents' handles to be used as keys in the step()
+        function.
+        """
+        raise NotImplementedError()
+
+```
--- a/docs/specifications/img/UML_flatland.png
+++ b/docs/specifications/img/UML_flatland.png
--- a/docs/specifications/intro.md
+++ b/docs/specifications/intro.md
+## Intro
+
+In a human-readable language, specifications provide
+- code base overview (hand-drawn concept)
+- key concepts (generators, envs) and how are they linked
+- link relevant code base
+
+![Overview](img/UML_flatland.png)
+`Diagram Source <https://confluence.sbb.ch/x/pQfsSw>`_
No results found