From 5c7a2059adc673a5a93d126e38176f8adb912134 Mon Sep 17 00:00:00 2001
From: u214892 <u214892@sbb.ch>
Date: Tue, 21 May 2019 15:03:50 +0200
Subject: [PATCH] #22 integrating benchmarks

---
 CONTRIBUTING.rst                   |   2 +-
 Makefile                           |   2 +-
 benchmarks/__init__.py             |   0
 benchmarks/play_model_benchmark.py | 132 +++++++++++++++++++++++++++++
 requirements_dev.txt               |   1 +
 tox.ini                            |  11 ++-
 6 files changed, 145 insertions(+), 3 deletions(-)
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/play_model_benchmark.py

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 7ae26bc..65971d3 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
 5. When you're done making changes, check that your changes pass flake8 and the
    tests, including testing other Python versions with tox::
 
-    $ flake8 flatland tests examples
+    $ flake8 flatland tests examples benchmarks
     $ python setup.py test or py.test
     $ tox
 
diff --git a/Makefile b/Makefile
index 69ad1b4..5a4d36e 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	flake8 flatland tests examples
+	flake8 flatland tests examples benchmarks
 
 test: ## run tests quickly with the default Python
 	echo "$$DISPLAY"
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks/play_model_benchmark.py b/benchmarks/play_model_benchmark.py
new file mode 100644
index 0000000..a3c087e
--- /dev/null
+++ b/benchmarks/play_model_benchmark.py
@@ -0,0 +1,132 @@
+import random
+import time
+from collections import deque
+
+import numpy as np
+from benchmarker import Benchmarker
+
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
+
+def main(render=True, delay=0.0):
+    random.seed(1)
+    np.random.seed(1)
+
+    # Example generate a random rail
+    env = RailEnv(width=15, height=15,
+                  rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
+                  number_of_agents=5)
+
+    if render:
+        env_renderer = RenderTool(env, gl="QTSVG")
+
+    n_trials = 20
+    eps = 1.
+    eps_end = 0.005
+    eps_decay = 0.998
+    action_dict = dict()
+    scores_window = deque(maxlen=100)
+    done_window = deque(maxlen=100)
+    scores = []
+    dones_list = []
+    action_prob = [0] * 4
+
+    def max_lt(seq, val):
+        """
+        Return greatest item in seq for which item < val applies.
+        None is returned if seq was empty or all items in seq were >= val.
+        """
+
+        idx = len(seq) - 1
+        while idx >= 0:
+            if seq[idx] < val and seq[idx] >= 0:
+                return seq[idx]
+            idx -= 1
+        return None
+
+    iFrame = 0
+    tStart = time.time()
+    for trials in range(1, n_trials + 1):
+
+        # Reset environment
+        obs = env.reset()
+        if render:
+            env_renderer.set_new_rail()
+
+        for a in range(env.get_num_agents()):
+            norm = max(1, max_lt(obs[a], np.inf))
+            obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1)
+
+        # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
+
+        score = 0
+        env_done = 0
+
+        # Run episode
+        for step in range(100):
+            # if trials > 114:
+            # env_renderer.renderEnv(show=True)
+            # print(step)
+            # Action
+            for a in range(env.get_num_agents()):
+                action = np.random.randint(0, 4)
+                action_prob[action] += 1
+                action_dict.update({a: action})
+
+            if render:
+                env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
+                if delay > 0:
+                    time.sleep(delay)
+
+            iFrame += 1
+
+            # Environment step
+            next_obs, all_rewards, done, _ = env.step(action_dict)
+            for a in range(env.get_num_agents()):
+                norm = max(1, max_lt(next_obs[a], np.inf))
+                next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
+            # Update replay buffer and train agent
+            for a in range(env.get_num_agents()):
+                # agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
+                score += all_rewards[a]
+
+            obs = next_obs.copy()
+            if done['__all__']:
+                env_done = 1
+                break
+        # Epsilon decay
+        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
+
+        done_window.append(env_done)
+        scores_window.append(score)  # save most recent score
+        scores.append(np.mean(scores_window))
+        dones_list.append((np.mean(done_window)))
+
+        print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
+               '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
+            env.get_num_agents(),
+            trials,
+            np.mean(scores_window),
+            100 * np.mean(done_window),
+            eps, action_prob / np.sum(action_prob)),
+            end=" ")
+        if trials % 100 == 0:
+            tNow = time.time()
+            rFps = iFrame / (tNow - tStart)
+            print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
+                   '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
+                env.get_num_agents(),
+                trials,
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps, rFps, action_prob / np.sum(action_prob)))
+            action_prob = [1] * 4
+
+
+if __name__ == "__main__":
+    with Benchmarker(cycle=20, extra=1) as bench:
+        @bench("Everything")
+        def _(bm):
+            main(render=False, delay=0)
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 4b288ce..cb11e71 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -7,6 +7,7 @@ tox==3.5.2
 coverage==4.5.1
 Sphinx==1.8.1
 twine==1.12.1
+benchmarker==4.0.1
 
 pytest==3.8.2
 pytest-runner==4.2
diff --git a/tox.ini b/tox.ini
index 6dd011a..905a420 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,7 +13,7 @@ ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W
 [testenv:flake8]
 basepython = python
 deps = flake8
-commands = flake8 flatland tests examples
+commands = flake8 flatland tests examples benchmarks
 
 [testenv:docs]
 basepython = python
@@ -28,6 +28,15 @@ commands =
     pip install -r requirements_dev.txt
     make coverage
 
+[testenv:benchmark]
+basepython = python
+setenv =
+    PYTHONPATH = {toxinidir}
+whitelist_externals = ls
+                      xargs
+commands =
+    ls benchmarks/*.py  | xargs -n 1 python
+
 [testenv]
 whitelist_externals = xvfb-run
                       sh
-- 
GitLab