From 5c7a2059adc673a5a93d126e38176f8adb912134 Mon Sep 17 00:00:00 2001 From: u214892 <u214892@sbb.ch> Date: Tue, 21 May 2019 15:03:50 +0200 Subject: [PATCH] #22 integrating benchmarks --- CONTRIBUTING.rst | 2 +- Makefile | 2 +- benchmarks/__init__.py | 0 benchmarks/play_model_benchmark.py | 132 +++++++++++++++++++++++++++++ requirements_dev.txt | 1 + tox.ini | 11 ++- 6 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/play_model_benchmark.py diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7ae26bc..65971d3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: - $ flake8 flatland tests examples + $ flake8 flatland tests examples benchmarks $ python setup.py test or py.test $ tox diff --git a/Makefile b/Makefile index 69ad1b4..5a4d36e 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint: ## check style with flake8 - flake8 flatland tests examples + flake8 flatland tests examples benchmarks test: ## run tests quickly with the default Python echo "$$DISPLAY" diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/play_model_benchmark.py b/benchmarks/play_model_benchmark.py new file mode 100644 index 0000000..a3c087e --- /dev/null +++ b/benchmarks/play_model_benchmark.py @@ -0,0 +1,132 @@ +import random +import time +from collections import deque + +import numpy as np +from benchmarker import Benchmarker + +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool + + +def main(render=True, delay=0.0): + random.seed(1) + np.random.seed(1) + + # Example generate a random rail + env = RailEnv(width=15, height=15, + rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12), + number_of_agents=5) + + if render: + env_renderer = RenderTool(env, gl="QTSVG") + + n_trials = 20 + eps = 1. + eps_end = 0.005 + eps_decay = 0.998 + action_dict = dict() + scores_window = deque(maxlen=100) + done_window = deque(maxlen=100) + scores = [] + dones_list = [] + action_prob = [0] * 4 + + def max_lt(seq, val): + """ + Return greatest item in seq for which item < val applies. + None is returned if seq was empty or all items in seq were >= val. + """ + + idx = len(seq) - 1 + while idx >= 0: + if seq[idx] < val and seq[idx] >= 0: + return seq[idx] + idx -= 1 + return None + + iFrame = 0 + tStart = time.time() + for trials in range(1, n_trials + 1): + + # Reset environment + obs = env.reset() + if render: + env_renderer.set_new_rail() + + for a in range(env.get_num_agents()): + norm = max(1, max_lt(obs[a], np.inf)) + obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1) + + # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) + + score = 0 + env_done = 0 + + # Run episode + for step in range(100): + # if trials > 114: + # env_renderer.renderEnv(show=True) + # print(step) + # Action + for a in range(env.get_num_agents()): + action = np.random.randint(0, 4) + action_prob[action] += 1 + action_dict.update({a: action}) + + if render: + env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict) + if delay > 0: + time.sleep(delay) + + iFrame += 1 + + # Environment step + next_obs, all_rewards, done, _ = env.step(action_dict) + for a in range(env.get_num_agents()): + norm = max(1, max_lt(next_obs[a], np.inf)) + next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1) + # Update replay buffer and train agent + for a in range(env.get_num_agents()): + # agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) + score += all_rewards[a] + + obs = next_obs.copy() + if done['__all__']: + env_done = 1 + break + # Epsilon decay + eps = max(eps_end, eps_decay * eps) # decrease epsilon + + done_window.append(env_done) + scores_window.append(score) # save most recent score + scores.append(np.mean(scores_window)) + dones_list.append((np.mean(done_window))) + + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + + '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format( + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, action_prob / np.sum(action_prob)), + end=" ") + if trials % 100 == 0: + tNow = time.time() + rFps = iFrame / (tNow - tStart) + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + + '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format( + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, rFps, action_prob / np.sum(action_prob))) + action_prob = [1] * 4 + + +if __name__ == "__main__": + with Benchmarker(cycle=20, extra=1) as bench: + @bench("Everything") + def _(bm): + main(render=False, delay=0) diff --git a/requirements_dev.txt b/requirements_dev.txt index 4b288ce..cb11e71 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -7,6 +7,7 @@ tox==3.5.2 coverage==4.5.1 Sphinx==1.8.1 twine==1.12.1 +benchmarker==4.0.1 pytest==3.8.2 pytest-runner==4.2 diff --git a/tox.ini b/tox.ini index 6dd011a..905a420 100644 --- a/tox.ini +++ b/tox.ini @@ -13,7 +13,7 @@ ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W [testenv:flake8] basepython = python deps = flake8 -commands = flake8 flatland tests examples +commands = flake8 flatland tests examples benchmarks [testenv:docs] basepython = python @@ -28,6 +28,15 @@ commands = pip install -r requirements_dev.txt make coverage +[testenv:benchmark] +basepython = python +setenv = + PYTHONPATH = {toxinidir} +whitelist_externals = ls + xargs +commands = + ls benchmarks/*.py | xargs -n 1 python + [testenv] whitelist_externals = xvfb-run sh -- GitLab