diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7f0721851686686ec8819606c4d14297ff61bc4..81372257bfb43a079f6c18d4889e850871f7fabd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,7 +26,7 @@ tests: - apt install -y libgl1-mesa-glx xvfb - pip install tox - apt install -y graphviz xdg-utils - - xvfb-run -s "-screen 0 800x600x24" tox + - xvfb-run tox -v --recreate build_and_deploy_docs: image: "python:latest" @@ -42,7 +42,7 @@ build_and_deploy_docs: script: - pip install -r requirements_dev.txt - python setup.py install - - make docs + - xvfb-run make docs - aws s3 cp ./docs/_build/html/ s3://${BUCKET_NAME} --recursive environment: name: ${CI_COMMIT_REF_SLUG} diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7ae26bcc3d4e0f4a5cbbcfafd055e2c084a42345..65971d323bb11cd7449be54f22898bedfa6e4b0d 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: - $ flake8 flatland tests examples + $ flake8 flatland tests examples benchmarks $ python setup.py test or py.test $ tox diff --git a/Makefile b/Makefile index 69ad1b42fd51ef9ec9420f5473dc8acef5468572..98dcbb47a03ad7125694e5053f5e973e45b4fba4 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint: ## check style with flake8 - flake8 flatland tests examples + flake8 flatland tests examples benchmarks test: ## run tests quickly with the default Python echo "$$DISPLAY" @@ -61,7 +61,7 @@ test-all: ## run tests on every Python version with tox tox coverage: ## check code coverage quickly with the default Python - xvfb-run -a coverage run --source flatland -m pytest + coverage run --source flatland -m pytest coverage report -m coverage html $(BROWSER) htmlcov/index.html diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/benchmarks/play_model_benchmark.py b/benchmarks/play_model_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c087e8ab8ed33a24bd447cd3203b85eb1a28f2 --- /dev/null +++ b/benchmarks/play_model_benchmark.py @@ -0,0 +1,132 @@ +import random +import time +from collections import deque + +import numpy as np +from benchmarker import Benchmarker + +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool + + +def main(render=True, delay=0.0): + random.seed(1) + np.random.seed(1) + + # Example generate a random rail + env = RailEnv(width=15, height=15, + rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12), + number_of_agents=5) + + if render: + env_renderer = RenderTool(env, gl="QTSVG") + + n_trials = 20 + eps = 1. + eps_end = 0.005 + eps_decay = 0.998 + action_dict = dict() + scores_window = deque(maxlen=100) + done_window = deque(maxlen=100) + scores = [] + dones_list = [] + action_prob = [0] * 4 + + def max_lt(seq, val): + """ + Return greatest item in seq for which item < val applies. + None is returned if seq was empty or all items in seq were >= val. + """ + + idx = len(seq) - 1 + while idx >= 0: + if seq[idx] < val and seq[idx] >= 0: + return seq[idx] + idx -= 1 + return None + + iFrame = 0 + tStart = time.time() + for trials in range(1, n_trials + 1): + + # Reset environment + obs = env.reset() + if render: + env_renderer.set_new_rail() + + for a in range(env.get_num_agents()): + norm = max(1, max_lt(obs[a], np.inf)) + obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1) + + # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) + + score = 0 + env_done = 0 + + # Run episode + for step in range(100): + # if trials > 114: + # env_renderer.renderEnv(show=True) + # print(step) + # Action + for a in range(env.get_num_agents()): + action = np.random.randint(0, 4) + action_prob[action] += 1 + action_dict.update({a: action}) + + if render: + env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict) + if delay > 0: + time.sleep(delay) + + iFrame += 1 + + # Environment step + next_obs, all_rewards, done, _ = env.step(action_dict) + for a in range(env.get_num_agents()): + norm = max(1, max_lt(next_obs[a], np.inf)) + next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1) + # Update replay buffer and train agent + for a in range(env.get_num_agents()): + # agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) + score += all_rewards[a] + + obs = next_obs.copy() + if done['__all__']: + env_done = 1 + break + # Epsilon decay + eps = max(eps_end, eps_decay * eps) # decrease epsilon + + done_window.append(env_done) + scores_window.append(score) # save most recent score + scores.append(np.mean(scores_window)) + dones_list.append((np.mean(done_window))) + + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + + '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format( + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, action_prob / np.sum(action_prob)), + end=" ") + if trials % 100 == 0: + tNow = time.time() + rFps = iFrame / (tNow - tStart) + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + + '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format( + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, rFps, action_prob / np.sum(action_prob))) + action_prob = [1] * 4 + + +if __name__ == "__main__": + with Benchmarker(cycle=20, extra=1) as bench: + @bench("Everything") + def _(bm): + main(render=False, delay=0) diff --git a/requirements_dev.txt b/requirements_dev.txt index 4b288cee88f4231f330c51f70b1cd9c9f9d05389..cb11e71ae9f86dc4021665dc37172e1d66fda20b 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -7,6 +7,7 @@ tox==3.5.2 coverage==4.5.1 Sphinx==1.8.1 twine==1.12.1 +benchmarker==4.0.1 pytest==3.8.2 pytest-runner==4.2 diff --git a/tox.ini b/tox.ini index 6dd011aadeb2e7ba802ff692278aa763fb665f10..939334a6315c5758ca3d06d967fc7fcbbbbfaa21 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py36, py37, flake8, docs, coverage, xvfb-run, sh +envlist = py36, py37, flake8, docs, coverage, benchmark, sh [travis] python = @@ -13,27 +13,42 @@ ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W [testenv:flake8] basepython = python deps = flake8 -commands = flake8 flatland tests examples +commands = flake8 flatland tests examples benchmarks [testenv:docs] basepython = python whitelist_externals = make +passenv = + DISPLAY commands = make docs [testenv:coverage] basepython = python whitelist_externals = make +passenv = + DISPLAY commands = pip install -U pip pip install -r requirements_dev.txt make coverage +[testenv:benchmark] +basepython = python +setenv = + PYTHONPATH = {toxinidir} +passenv = + DISPLAY +whitelist_externals = sh +commands = + sh -c 'ls benchmarks/*.py | xargs -n 1 python' + [testenv] -whitelist_externals = xvfb-run - sh +whitelist_externals = sh pip setenv = PYTHONPATH = {toxinidir} +passenv = + DISPLAY deps = -r{toxinidir}/requirements_dev.txt ; If you want to make tox run the tests with the same versions, create a @@ -43,6 +58,6 @@ commands = pip install -U pip pip install -r requirements_dev.txt sh -c 'echo DISPLAY: $DISPLAY' - xvfb-run -a py.test --basetemp={envtmpdir} + py.test --basetemp={envtmpdir}