Compare revisions

770885df · 604116f6 · c168ee64 · 42a800cd · a8275161 · 662a63bb
--- a/.gitignore
+++ b/.gitignore
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-scratch/test-envs/
-scratch/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+scratch/test-envs/
+scratch/
+
+# Checkpoints and replay buffers
+!checkpoints/.gitkeep
+replay_buffers/*
+!replay_buffers/.gitkeep
\ No newline at end of file
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/workspace.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/neurips2020-flatland-starter-kit.iml
+++ b/.idea/neurips2020-flatland-starter-kit.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
+MIT License
+
+Copyright (c) 2020 Flatland
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
-![AIcrowd-Logo](https://raw.githubusercontent.com/AIcrowd/AIcrowd/master/app/assets/images/misc/aicrowd-horizontal.png)
-
-# Flatland Challenge Starter Kit
-
-**[Follow these instructions to submit your solutions!](http://flatland.aicrowd.com/getting-started/first-submission.html)**
-
-
-![flatland](https://i.imgur.com/0rnbSLY.gif)
-
-Main links
---
-
-* [Flatland documentation](https://flatland.aicrowd.com/)
-* [NeurIPS 2020 Challenge](https://www.aicrowd.com/challenges/neurips-2020-flatland-challenge/)
-
-Communication
---
-
-* [Discord Channel](https://discord.com/invite/hCR3CZG)
-* [Discussion Forum](https://discourse.aicrowd.com/c/neurips-2020-flatland-challenge)
-* [Issue Tracker](https://gitlab.aicrowd.com/flatland/flatland/issues/)
-
-Author
---
-
- **[Sharada Mohanty](https://twitter.com/MeMohanty)**
+🚂 This code is based on the official starter kit - NeurIPS 2020 Flatland Challenge
+---
+
+You can use for your own experiments full or reduced action space. 
+
+```python
+def map_action(action):
+    # if full action space is used -> no mapping required
+    if get_action_size() == get_flatland_full_action_size():
+        return action
+    
+    # if reduced action space is used -> the action has to be mapped to real flatland actions
+    # The reduced action space removes the DO_NOTHING action from Flatland.
+    if action == 0:
+        return RailEnvActions.MOVE_LEFT
+    if action == 1:
+        return RailEnvActions.MOVE_FORWARD
+    if action == 2:
+        return RailEnvActions.MOVE_RIGHT
+    if action == 3:
+        return RailEnvActions.STOP_MOVING
+```
+
+```python
+set_action_size_full()
+```
+or 
+```python
+set_action_size_reduced()
+```
+action space. The reduced action space just removes DO_NOTHING. 
+
+---
+The used policy is based on the FastTreeObs in the official starter kit - NeurIPS 2020 Flatland Challenge. But the
+ FastTreeObs in this repo is an extended version. 
+[fast_tree_obs.py](./utils/fast_tree_obs.py)
+
+---
+Have a look into the [run.py](./run.py) file. There you can select using PPO or DDDQN as RL agents. 
+ 
+```python
+####################################################
+# EVALUATION PARAMETERS
+set_action_size_full()
+
+# Print per-step logs
+VERBOSE = True
+USE_FAST_TREEOBS = True
+
+if False:
+    # -------------------------------------------------------------------------------------------------------
+    # RL solution
+    # -------------------------------------------------------------------------------------------------------
+    # 116591 adrian_egli
+    # graded	71.305	0.633	RL	Successfully Graded ! More details about this submission can be found at:
+    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/51
+    # Fri, 22 Jan 2021 23:37:56
+    set_action_size_reduced()
+    load_policy = "DDDQN"
+    checkpoint = "./checkpoints/210122120236-3000.pth"  # 17.011131341978228
+    EPSILON = 0.0
+
+if False:
+    # -------------------------------------------------------------------------------------------------------
+    # RL solution
+    # -------------------------------------------------------------------------------------------------------
+    # 116658 adrian_egli
+    # graded	73.821	0.655	RL	Successfully Graded ! More details about this submission can be found at:
+    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/52
+    # Sat, 23 Jan 2021 07:41:35
+    set_action_size_reduced()
+    load_policy = "PPO"
+    checkpoint = "./checkpoints/210122235754-5000.pth"  # 16.00113400887389
+    EPSILON = 0.0
+
+if True:
+    # -------------------------------------------------------------------------------------------------------
+    # RL solution
+    # -------------------------------------------------------------------------------------------------------
+    # 116659 adrian_egli
+    # graded	80.579	0.715	RL	Successfully Graded ! More details about this submission can be found at:
+    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/53
+    # Sat, 23 Jan 2021 07:45:49
+    set_action_size_reduced()
+    load_policy = "DDDQN"
+    checkpoint = "./checkpoints/210122165109-5000.pth"  # 17.993750197899438
+    EPSILON = 0.0
+
+if False:
+    # -------------------------------------------------------------------------------------------------------
+    # !! This is not a RL solution !!!!
+    # -------------------------------------------------------------------------------------------------------
+    # 116727 adrian_egli
+    # graded	106.786	0.768	RL	Successfully Graded ! More details about this submission can be found at:
+    # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/54
+    # Sat, 23 Jan 2021 14:31:50
+    set_action_size_reduced()
+    load_policy = "DeadLockAvoidance"
+    checkpoint = None
+    EPSILON = 0.0
+```
+
+---
+A deadlock avoidance agent is implemented. The agent only lets the train take the shortest route. And it tries to avoid as many deadlocks as possible.
+* [dead_lock_avoidance_agent.py](./utils/dead_lock_avoidance_agent.py)
+
+
+---
+The policy interface has changed, please have a look into 
+* [policy.py](./reinforcement_learning/policy.py)
+
+---
+See the tensorboard training output to get some insights:
+```
+tensorboard --logdir ./runs_bench 
+```
+
+---
+```
+python reinforcement_learning/multi_agent_training.py --use_fast_tree_observation  --checkpoint_interval 1000 -n 5000
+ --policy DDDQN -t 2 --action_size reduced --buffer_siz 128000
+```
+
+[multi_agent_training.py](./reinforcement_learning/multi_agent_training.py)
+has new or changed parameters. Most important new or changed parameters for training. 
+ * policy :  [DDDQN, PPO, DeadLockAvoidance, DeadLockAvoidanceWithDecision, MultiDecision] : Default value
+   DeadLockAvoidance 
+ * use_fast_tree_observation : [false,true] : Default value = true  
+ * action_size: [full, reduced] : Default value = full
+``` 
+usage: multi_agent_training.py [-h] [-n N_EPISODES] [--n_agent_fixed]
+                               [-t TRAINING_ENV_CONFIG]
+                               [-e EVALUATION_ENV_CONFIG]
+                               [--n_evaluation_episodes N_EVALUATION_EPISODES]
+                               [--checkpoint_interval CHECKPOINT_INTERVAL]
+                               [--eps_start EPS_START] [--eps_end EPS_END]
+                               [--eps_decay EPS_DECAY]
+                               [--buffer_size BUFFER_SIZE]
+                               [--buffer_min_size BUFFER_MIN_SIZE]
+                               [--restore_replay_buffer RESTORE_REPLAY_BUFFER]
+                               [--save_replay_buffer SAVE_REPLAY_BUFFER]
+                               [--batch_size BATCH_SIZE] [--gamma GAMMA]
+                               [--tau TAU] [--learning_rate LEARNING_RATE]
+                               [--hidden_size HIDDEN_SIZE]
+                               [--update_every UPDATE_EVERY]
+                               [--use_gpu USE_GPU] [--num_threads NUM_THREADS]
+                               [--render] [--load_policy LOAD_POLICY]
+                               [--use_fast_tree_observation]
+                               [--max_depth MAX_DEPTH] [--policy POLICY]
+                               [--action_size ACTION_SIZE]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -n N_EPISODES, --n_episodes N_EPISODES
+                        number of episodes to run
+  --n_agent_fixed       hold the number of agent fixed
+  -t TRAINING_ENV_CONFIG, --training_env_config TRAINING_ENV_CONFIG
+                        training config id (eg 0 for Test_0)
+  -e EVALUATION_ENV_CONFIG, --evaluation_env_config EVALUATION_ENV_CONFIG
+                        evaluation config id (eg 0 for Test_0)
+  --n_evaluation_episodes N_EVALUATION_EPISODES
+                        number of evaluation episodes
+  --checkpoint_interval CHECKPOINT_INTERVAL
+                        checkpoint interval
+  --eps_start EPS_START
+                        max exploration
+  --eps_end EPS_END     min exploration
+  --eps_decay EPS_DECAY
+                        exploration decay
+  --buffer_size BUFFER_SIZE
+                        replay buffer size
+  --buffer_min_size BUFFER_MIN_SIZE
+                        min buffer size to start training
+  --restore_replay_buffer RESTORE_REPLAY_BUFFER
+                        replay buffer to restore
+  --save_replay_buffer SAVE_REPLAY_BUFFER
+                        save replay buffer at each evaluation interval
+  --batch_size BATCH_SIZE
+                        minibatch size
+  --gamma GAMMA         discount factor
+  --tau TAU             soft update of target parameters
+  --learning_rate LEARNING_RATE
+                        learning rate
+  --hidden_size HIDDEN_SIZE
+                        hidden size (2 fc layers)
+  --update_every UPDATE_EVERY
+                        how often to update the network
+  --use_gpu USE_GPU     use GPU if available
+  --num_threads NUM_THREADS
+                        number of threads PyTorch can use
+  --render              render 1 episode in 100
+  --load_policy LOAD_POLICY
+                        policy filename (reference) to load
+  --use_fast_tree_observation
+                        use FastTreeObs instead of stock TreeObs
+  --max_depth MAX_DEPTH
+                        max depth
+  --policy POLICY       policy name [DDDQN, PPO, DeadLockAvoidance,
+                        DeadLockAvoidanceWithDecision, MultiDecision]
+  --action_size ACTION_SIZE
+                        define the action size [reduced,full]
+```                        
+
+
+---
+If you have any questions write me on the official discord channel **aiAdrian**    
+(Adrian Egli - adrian.egli@gmail.com) 
+
+
+Credits
+---
+
+* Florian Laurent <florian@aicrowd.com>
+* Erik Nygren <erik.nygren@sbb.ch>
+* Adrian Egli <adrian.egli@sbb.ch>
+* Sharada Mohanty <mohanty@aicrowd.com>
+* Christian Baumberger <christian.baumberger@sbb.ch>
+* Guillaume Mollard <guillaume.mollard2@gmail.com>
+
+Main links
+---
+
+* [Flatland documentation](https://flatland.aicrowd.com/)
+* [Flatland Challenge](https://www.aicrowd.com/challenges/flatland)
+
+Communication
+---
+
+* [Discord Channel](https://discord.com/invite/hCR3CZG)
+* [Discussion Forum](https://discourse.aicrowd.com/c/neurips-2020-flatland-challenge)
+* [Issue Tracker](https://gitlab.aicrowd.com/flatland/flatland/issues/)
\ No newline at end of file
--- a/aicrowd.json
+++ b/aicrowd.json
-{
-  "challenge_id": "neurips-2020-flatland-challenge",
-  "grader_id": "neurips-2020-flatland-challenge",
-  "debug": false,
-  "tags": ["RL"]
-}
-
+{
+  "challenge_id": "neurips-2020-flatland-challenge",
+  "grader_id": "neurips-2020-flatland-challenge",
+  "debug": false,
+  "tags": ["RL"]
+}
+
--- a/apt.txt
+++ b/apt.txt
@@ -3,4 +3,4 @@ git
 vim
 ssh
 gcc
-python-cairo-dev
+build-essential
\ No newline at end of file
--- a/checkpoints/.gitkeep
+++ b/checkpoints/.gitkeep
--- a/checkpoints/210122120236-3000.pth.local
+++ b/checkpoints/210122120236-3000.pth.local
--- a/checkpoints/210122120236-3000.pth.target
+++ b/checkpoints/210122120236-3000.pth.target
--- a/checkpoints/210122165109-5000.pth.local
+++ b/checkpoints/210122165109-5000.pth.local
--- a/checkpoints/210122165109-5000.pth.target
+++ b/checkpoints/210122165109-5000.pth.target
--- a/checkpoints/210122235754-5000.pth.actor
+++ b/checkpoints/210122235754-5000.pth.actor
--- a/checkpoints/210122235754-5000.pth.optimizer
+++ b/checkpoints/210122235754-5000.pth.optimizer
--- a/checkpoints/210122235754-5000.pth.value
+++ b/checkpoints/210122235754-5000.pth.value
--- a/checkpoints/No_col_20/best_0.6672/ppo/model_checkpoint.meta
+++ b/checkpoints/No_col_20/best_0.6672/ppo/model_checkpoint.meta
No results found