From 625734e601a037b8a41fa885557e072003060ca8 Mon Sep 17 00:00:00 2001
From: Nilabha <nilabha2007@gmail.com>
Date: Sun, 5 Sep 2021 23:08:20 +0530
Subject: [PATCH] update readme and docs for interface and wrappers

---
 docs/08_authors.rst           |  2 --
 docs/10_interface.rst         | 33 +++++++++++++++++++++++++++++
 docs/10_interface_toc.rst     |  8 +++++++
 docs/index.rst                |  1 +
 docs/interface/pettingzoo.md  | 26 +++++++++++++++++++++++
 docs/interface/pettingzoo.rst | 35 ++++++++++++++++++++++++++++++
 docs/interface/wrappers.md    | 31 +++++++++++++++++++++++++++
 docs/interface/wrappers.rst   | 40 +++++++++++++++++++++++++++++++++++
 make_docs.py                  |  2 +-
 9 files changed, 175 insertions(+), 3 deletions(-)
 create mode 100644 docs/10_interface.rst
 create mode 100644 docs/10_interface_toc.rst
 create mode 100644 docs/interface/pettingzoo.md
 create mode 100644 docs/interface/pettingzoo.rst
 create mode 100644 docs/interface/wrappers.md
 create mode 100644 docs/interface/wrappers.rst

diff --git a/docs/08_authors.rst b/docs/08_authors.rst
index c7558862..0661b54d 100644
--- a/docs/08_authors.rst
+++ b/docs/08_authors.rst
@@ -1,6 +1,4 @@
 Authors
 =======
-.. toctree::
-   :maxdepth: 2
 
 .. include:: ../AUTHORS.rst
diff --git a/docs/10_interface.rst b/docs/10_interface.rst
new file mode 100644
index 00000000..51464bc9
--- /dev/null
+++ b/docs/10_interface.rst
@@ -0,0 +1,33 @@
+Multi-Agent Interface
+=======
+
+.. include:: interface/pettingzoo.rst
+.. include:: interface/wrappers.rst
+
+Multi-Agent Pettingzoo Usage
+=======
+
+We can use the PettingZoo interface by proving the rail env to the petting zoo wrapper as shown below in the example.
+
+.. literalinclude:: ../tests/test_pettingzoo_interface.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
+
+Multi-Agent Interface Stable Baseline 3 Training
+=======
+
+.. literalinclude:: ../flatland/contrib/training/flatland_pettingzoo_stable_baselines.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
+
+Multi-Agent Interface Rllib Training
+=======
+
+.. literalinclude:: ../flatland/contrib/training/flatland_pettingzoo_rllib.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
\ No newline at end of file
diff --git a/docs/10_interface_toc.rst b/docs/10_interface_toc.rst
new file mode 100644
index 00000000..88d2ad7a
--- /dev/null
+++ b/docs/10_interface_toc.rst
@@ -0,0 +1,8 @@
+Multi-Agent Interfaces
+==============
+
+
+.. toctree::
+   :maxdepth: 2
+
+   10_interface
diff --git a/docs/index.rst b/docs/index.rst
index 852ef7f3..e60b8097 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,6 +15,7 @@ Welcome to flatland's documentation!
    07_changes
    08_authors
    09_faq_toc
+   10_interface
 
 Indices and tables
 ==================
diff --git a/docs/interface/pettingzoo.md b/docs/interface/pettingzoo.md
new file mode 100644
index 00000000..57d5ea76
--- /dev/null
+++ b/docs/interface/pettingzoo.md
@@ -0,0 +1,26 @@
+# PettingZoo
+
+> PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland.
+
+## Background
+
+PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland
+
+- Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit.
+- Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations
+- Scikit-learn inspired api
+  e.g.
+
+```python
+act = model.predict(obs, deterministic=True)[0] 
+```
+
+- Parallel learning using literally 2 lines of code to use with stable baselines 3
+
+```python
+env = ss.pettingzoo_env_to_vec_env_v0(env)
+env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=â€™stable_baselines3â€™)
+```
+
+- Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent
+- Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want
diff --git a/docs/interface/pettingzoo.rst b/docs/interface/pettingzoo.rst
new file mode 100644
index 00000000..35250de2
--- /dev/null
+++ b/docs/interface/pettingzoo.rst
@@ -0,0 +1,35 @@
+
+PettingZoo
+==========
+
+..
+
+   PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland.
+
+
+Background
+----------
+
+PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland
+
+
+* Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit.
+* Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations
+* Scikit-learn inspired api
+  e.g.
+
+.. code-block:: python
+
+   act = model.predict(obs, deterministic=True)[0]
+
+
+* Parallel learning using literally 2 lines of code to use with stable baselines 3
+
+.. code-block:: python
+
+   env = ss.pettingzoo_env_to_vec_env_v0(env)
+   env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=â€™stable_baselines3â€™)
+
+
+* Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent
+* Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want
diff --git a/docs/interface/wrappers.md b/docs/interface/wrappers.md
new file mode 100644
index 00000000..f853ee59
--- /dev/null
+++ b/docs/interface/wrappers.md
@@ -0,0 +1,31 @@
+# Environment Wrappers
+
+> We provide various environment wrappers to work with both the rail env and the petting zoo interface.
+
+## Background
+
+These wrappers changes certain environment behavior which can help to get better reinforcement learning training.
+
+## Supported Inbuilt Wrappers
+
+We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper.
+
+### ShortestPathAction Wrapper
+
+To use the ShortestPathAction Wrapper, simply wrap the rail env as follows
+
+```python
+rail_env.reset(random_seed=1)
+rail_env = ShortestPathActionWrapper(rail_env)
+```
+
+The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (`0`), Next Shortest Path (`1`) and Stop (`2`).  Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass `0` as the action.
+
+### SkipNoChoice Wrapper
+
+To use the SkipNoChoiceWrapper, simply wrap the rail env as follows
+
+```python
+rail_env.reset(random_seed=1)
+rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0)
+```
diff --git a/docs/interface/wrappers.rst b/docs/interface/wrappers.rst
new file mode 100644
index 00000000..3eec7342
--- /dev/null
+++ b/docs/interface/wrappers.rst
@@ -0,0 +1,40 @@
+
+Environment Wrappers
+====================
+
+..
+
+   We provide various environment wrappers to work with both the rail env and the petting zoo interface.
+
+
+Background
+----------
+
+These wrappers changes certain environment behavior which can help to get better reinforcement learning training.
+
+Supported Inbuilt Wrappers
+--------------------------
+
+We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper.
+
+ShortestPathAction Wrapper
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To use the ShortestPathAction Wrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = ShortestPathActionWrapper(rail_env)
+
+The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (\ ``0``\ ), Next Shortest Path (\ ``1``\ ) and Stop (\ ``2``\ ).  Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass ``0`` as the action.
+
+SkipNoChoice Wrapper
+^^^^^^^^^^^^^^^^^^^^
+
+To use the SkipNoChoiceWrapper, simply wrap the rail env as follows
+
+.. code-block:: python
+
+   rail_env.reset(random_seed=1)
+   rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0)
diff --git a/make_docs.py b/make_docs.py
index 05b99111..adbab9da 100644
--- a/make_docs.py
+++ b/make_docs.py
@@ -24,7 +24,7 @@ for image_file in glob.glob(r'./docs/flatland*.rst'):
     remove_exists(image_file)
 remove_exists('docs/modules.rst')
 
-for md_file in glob.glob(r'./*.md') + glob.glob(r'./docs/specifications/*.md') + glob.glob(r'./docs/tutorials/*.md'):
+for md_file in glob.glob(r'./*.md') + glob.glob(r'./docs/specifications/*.md') + glob.glob(r'./docs/tutorials/*.md') + glob.glob(r'./docs/interface/*.md'):
     from m2r import parse_from_file
 
     rst_content = parse_from_file(md_file)
-- 
GitLab