From 625734e601a037b8a41fa885557e072003060ca8 Mon Sep 17 00:00:00 2001 From: Nilabha <nilabha2007@gmail.com> Date: Sun, 5 Sep 2021 23:08:20 +0530 Subject: [PATCH] update readme and docs for interface and wrappers --- docs/08_authors.rst | 2 -- docs/10_interface.rst | 33 +++++++++++++++++++++++++++++ docs/10_interface_toc.rst | 8 +++++++ docs/index.rst | 1 + docs/interface/pettingzoo.md | 26 +++++++++++++++++++++++ docs/interface/pettingzoo.rst | 35 ++++++++++++++++++++++++++++++ docs/interface/wrappers.md | 31 +++++++++++++++++++++++++++ docs/interface/wrappers.rst | 40 +++++++++++++++++++++++++++++++++++ make_docs.py | 2 +- 9 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 docs/10_interface.rst create mode 100644 docs/10_interface_toc.rst create mode 100644 docs/interface/pettingzoo.md create mode 100644 docs/interface/pettingzoo.rst create mode 100644 docs/interface/wrappers.md create mode 100644 docs/interface/wrappers.rst diff --git a/docs/08_authors.rst b/docs/08_authors.rst index c7558862..0661b54d 100644 --- a/docs/08_authors.rst +++ b/docs/08_authors.rst @@ -1,6 +1,4 @@ Authors ======= -.. toctree:: - :maxdepth: 2 .. include:: ../AUTHORS.rst diff --git a/docs/10_interface.rst b/docs/10_interface.rst new file mode 100644 index 00000000..51464bc9 --- /dev/null +++ b/docs/10_interface.rst @@ -0,0 +1,33 @@ +Multi-Agent Interface +======= + +.. include:: interface/pettingzoo.rst +.. include:: interface/wrappers.rst + +Multi-Agent Pettingzoo Usage +======= + +We can use the PettingZoo interface by proving the rail env to the petting zoo wrapper as shown below in the example. + +.. literalinclude:: ../tests/test_pettingzoo_interface.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ + + +Multi-Agent Interface Stable Baseline 3 Training +======= + +.. literalinclude:: ../flatland/contrib/training/flatland_pettingzoo_stable_baselines.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ + + +Multi-Agent Interface Rllib Training +======= + +.. literalinclude:: ../flatland/contrib/training/flatland_pettingzoo_rllib.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ \ No newline at end of file diff --git a/docs/10_interface_toc.rst b/docs/10_interface_toc.rst new file mode 100644 index 00000000..88d2ad7a --- /dev/null +++ b/docs/10_interface_toc.rst @@ -0,0 +1,8 @@ +Multi-Agent Interfaces +============== + + +.. toctree:: + :maxdepth: 2 + + 10_interface diff --git a/docs/index.rst b/docs/index.rst index 852ef7f3..e60b8097 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,6 +15,7 @@ Welcome to flatland's documentation! 07_changes 08_authors 09_faq_toc + 10_interface Indices and tables ================== diff --git a/docs/interface/pettingzoo.md b/docs/interface/pettingzoo.md new file mode 100644 index 00000000..57d5ea76 --- /dev/null +++ b/docs/interface/pettingzoo.md @@ -0,0 +1,26 @@ +# PettingZoo + +> PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland. + +## Background + +PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland + +- Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit. +- Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations +- Scikit-learn inspired api + e.g. + +```python +act = model.predict(obs, deterministic=True)[0] +``` + +- Parallel learning using literally 2 lines of code to use with stable baselines 3 + +```python +env = ss.pettingzoo_env_to_vec_env_v0(env) +env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=’stable_baselines3’) +``` + +- Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent +- Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want diff --git a/docs/interface/pettingzoo.rst b/docs/interface/pettingzoo.rst new file mode 100644 index 00000000..35250de2 --- /dev/null +++ b/docs/interface/pettingzoo.rst @@ -0,0 +1,35 @@ + +PettingZoo +========== + +.. + + PettingZoo (https://www.pettingzoo.ml/) is a collection of multi-agent environments for reinforcement learning. We build a pettingzoo interface for flatland. + + +Background +---------- + +PettingZoo is a popular multi-agent environment library (https://arxiv.org/abs/2009.14471) that aims to be the gym standard for Multi-Agent Reinforcement Learning. We list the below advantages that make it suitable for use with flatland + + +* Works with both rllib (https://docs.ray.io/en/latest/rllib.html) and stable baselines 3 (https://stable-baselines3.readthedocs.io/) using wrappers from Super Suit. +* Clean API (https://www.pettingzoo.ml/api) with additional facilities/api for parallel, saving observation, recording using gym monitor, processing, normalising observations +* Scikit-learn inspired api + e.g. + +.. code-block:: python + + act = model.predict(obs, deterministic=True)[0] + + +* Parallel learning using literally 2 lines of code to use with stable baselines 3 + +.. code-block:: python + + env = ss.pettingzoo_env_to_vec_env_v0(env) + env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class=’stable_baselines3’) + + +* Tested and supports various multi-agent environments with many agents comparable to flatland. e.g. https://www.pettingzoo.ml/magent +* Clean interface means we can custom add an experimenting tool like wandb and have full flexibility to save information we want diff --git a/docs/interface/wrappers.md b/docs/interface/wrappers.md new file mode 100644 index 00000000..f853ee59 --- /dev/null +++ b/docs/interface/wrappers.md @@ -0,0 +1,31 @@ +# Environment Wrappers + +> We provide various environment wrappers to work with both the rail env and the petting zoo interface. + +## Background + +These wrappers changes certain environment behavior which can help to get better reinforcement learning training. + +## Supported Inbuilt Wrappers + +We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper. + +### ShortestPathAction Wrapper + +To use the ShortestPathAction Wrapper, simply wrap the rail env as follows + +```python +rail_env.reset(random_seed=1) +rail_env = ShortestPathActionWrapper(rail_env) +``` + +The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (`0`), Next Shortest Path (`1`) and Stop (`2`). Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass `0` as the action. + +### SkipNoChoice Wrapper + +To use the SkipNoChoiceWrapper, simply wrap the rail env as follows + +```python +rail_env.reset(random_seed=1) +rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0) +``` diff --git a/docs/interface/wrappers.rst b/docs/interface/wrappers.rst new file mode 100644 index 00000000..3eec7342 --- /dev/null +++ b/docs/interface/wrappers.rst @@ -0,0 +1,40 @@ + +Environment Wrappers +==================== + +.. + + We provide various environment wrappers to work with both the rail env and the petting zoo interface. + + +Background +---------- + +These wrappers changes certain environment behavior which can help to get better reinforcement learning training. + +Supported Inbuilt Wrappers +-------------------------- + +We provide 2 sample wrappers for ShortestPathAction wrapper and SkipNoChoice wrapper. The wrappers requires many env properties that are only created on environment reset. Hence before using the wrapper, we must reset the rail env. To use the wrappers, simply pass the resetted rail env. Code samples are shown below for each wrapper. + +ShortestPathAction Wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To use the ShortestPathAction Wrapper, simply wrap the rail env as follows + +.. code-block:: python + + rail_env.reset(random_seed=1) + rail_env = ShortestPathActionWrapper(rail_env) + +The shortest path action wrapper maps the existing action space into 3 actions - Shortest Path (\ ``0``\ ), Next Shortest Path (\ ``1``\ ) and Stop (\ ``2``\ ). Hence, we must ensure that the predicted action should always be one of these (0, 1 and 2) actions. To route all agents in the shortest path, pass ``0`` as the action. + +SkipNoChoice Wrapper +^^^^^^^^^^^^^^^^^^^^ + +To use the SkipNoChoiceWrapper, simply wrap the rail env as follows + +.. code-block:: python + + rail_env.reset(random_seed=1) + rail_env = SkipNoChoiceCellsWrapper(rail_env, accumulate_skipped_rewards=False, discounting=0.0) diff --git a/make_docs.py b/make_docs.py index 05b99111..adbab9da 100644 --- a/make_docs.py +++ b/make_docs.py @@ -24,7 +24,7 @@ for image_file in glob.glob(r'./docs/flatland*.rst'): remove_exists(image_file) remove_exists('docs/modules.rst') -for md_file in glob.glob(r'./*.md') + glob.glob(r'./docs/specifications/*.md') + glob.glob(r'./docs/tutorials/*.md'): +for md_file in glob.glob(r'./*.md') + glob.glob(r'./docs/specifications/*.md') + glob.glob(r'./docs/tutorials/*.md') + glob.glob(r'./docs/interface/*.md'): from m2r import parse_from_file rst_content = parse_from_file(md_file) -- GitLab