From 504c44d449e8b19bb59bad77e794a3e621ef25d4 Mon Sep 17 00:00:00 2001 From: u214892 <u214892@sbb.ch> Date: Mon, 20 May 2019 10:47:37 +0200 Subject: [PATCH] #22 flake8 for examples --- CONTRIBUTING.rst | 4 +-- Makefile | 2 +- examples/play_model.py | 62 ++++++++++++++++----------------- examples/qt2.py | 6 ++-- examples/temporary_example.py | 16 ++++----- examples/training_navigation.py | 49 ++++++++++++++++---------- 6 files changed, 75 insertions(+), 64 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f552f76..7ae26bc 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: - $ flake8 flatland tests + $ flake8 flatland tests examples $ python setup.py test or py.test $ tox @@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch $ git push $ git push --tags -Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty) \ No newline at end of file +Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty) diff --git a/Makefile b/Makefile index 691bf84..6a655bb 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint: ## check style with flake8 - flake8 flatland tests + flake8 flatland tests examples test: ## run tests quickly with the default Python py.test diff --git a/examples/play_model.py b/examples/play_model.py index 62726c2..1745681 100644 --- a/examples/play_model.py +++ b/examples/play_model.py @@ -1,12 +1,14 @@ -from flatland.envs.rail_env import RailEnv -from flatland.envs.generators import complex_rail_generator -from flatland.utils.rendertools import RenderTool -from flatland.baselines.dueling_double_dqn import Agent -from collections import deque -import torch import random -import numpy as np import time +from collections import deque + +import numpy as np +import torch + +from flatland.baselines.dueling_double_dqn import Agent +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool class Player(object): @@ -25,7 +27,7 @@ class Player(object): self.done_window = deque(maxlen=100) self.scores = [] self.dones_list = [] - self.action_prob = [0]*4 + self.action_prob = [0] * 4 self.agent = Agent(self.state_size, self.action_size, "FC", 0) # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) self.agent.qnetwork_local.load_state_dict(torch.load( @@ -33,7 +35,7 @@ class Player(object): self.iFrame = 0 self.tStart = time.time() - + # Reset environment # self.obs = self.env.reset() self.env.obs_builder.reset() @@ -51,7 +53,6 @@ class Player(object): env = self.env # Pass the (stored) observation to the agent network and retrieve the action - #for handle in env.get_agent_handles(): for handle in env.get_agent_handles(): action = self.agent.act(np.array(self.obs[handle]), eps=self.eps) self.action_prob[action] += 1 @@ -68,8 +69,8 @@ class Player(object): # Update replay buffer and train agent for handle in self.env.get_agent_handles(): self.agent.step(self.obs[handle], self.action_dict[handle], - all_rewards[handle], next_obs[handle], done[handle], - train=False) + all_rewards[handle], next_obs[handle], done[handle], + train=False) self.score += all_rewards[handle] self.iFrame += 1 @@ -85,7 +86,7 @@ def max_lt(seq, val): None is returned if seq was empty or all items in seq were >= val. """ - idx = len(seq)-1 + idx = len(seq) - 1 while idx >= 0: if seq[idx] < val and seq[idx] >= 0: return seq[idx] @@ -94,7 +95,6 @@ def max_lt(seq, val): def main(render=True, delay=0.0): - random.seed(1) np.random.seed(1) @@ -118,8 +118,9 @@ def main(render=True, delay=0.0): done_window = deque(maxlen=100) scores = [] dones_list = [] - action_prob = [0]*4 + action_prob = [0] * 4 agent = Agent(state_size, action_size, "FC", 0) + # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) def max_lt(seq, val): @@ -128,7 +129,7 @@ def main(render=True, delay=0.0): None is returned if seq was empty or all items in seq were >= val. """ - idx = len(seq)-1 + idx = len(seq) - 1 while idx >= 0: if seq[idx] < val and seq[idx] >= 0: return seq[idx] @@ -141,7 +142,8 @@ def main(render=True, delay=0.0): # Reset environment obs = env.reset() - env_renderer.set_new_rail() + if render: + env_renderer.set_new_rail() for a in range(env.get_num_agents()): norm = max(1, max_lt(obs[a], np.inf)) @@ -165,7 +167,6 @@ def main(render=True, delay=0.0): if render: env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict) - #time.sleep(10) if delay > 0: time.sleep(delay) @@ -181,7 +182,6 @@ def main(render=True, delay=0.0): agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) score += all_rewards[a] - obs = next_obs.copy() if done['__all__']: env_done = 1 @@ -196,25 +196,25 @@ def main(render=True, delay=0.0): print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format( - env.get_num_agents(), - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, action_prob/np.sum(action_prob)), + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, action_prob / np.sum(action_prob)), end=" ") if trials % 100 == 0: tNow = time.time() rFps = iFrame / (tNow - tStart) print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format( - env.get_num_agents(), - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, rFps, action_prob / np.sum(action_prob))) + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, rFps, action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), - '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') - action_prob = [1]*4 + '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') + action_prob = [1] * 4 if __name__ == "__main__": diff --git a/examples/qt2.py b/examples/qt2.py index 6074106..ee3ea0c 100644 --- a/examples/qt2.py +++ b/examples/qt2.py @@ -1,9 +1,8 @@ - - import sys + from PyQt5 import QtSvg -from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget from PyQt5.QtCore import Qt, QByteArray +from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget from flatland.utils import svg @@ -75,4 +74,3 @@ window = MainWindow() window.show() app.exec_() - diff --git a/examples/temporary_example.py b/examples/temporary_example.py index 1f3504f..db909e0 100644 --- a/examples/temporary_example.py +++ b/examples/temporary_example.py @@ -1,11 +1,10 @@ import random + import numpy as np -import matplotlib.pyplot as plt -from flatland.envs.rail_env import * -from flatland.envs.generators import * -from flatland.envs.observations import TreeObsForRailEnv -from flatland.utils.rendertools import * +from flatland.envs.generators import random_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool random.seed(0) np.random.seed(0) @@ -94,7 +93,7 @@ env = RailEnv(width=7, # print(env.obs_builder.distance_map[0, :, :, i]) # Print the observation vector for agent 0 -obs, all_rewards, done, _ = env.step({0:0}) +obs, all_rewards, done, _ = env.step({0: 0}) for i in range(env.get_num_agents()): env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5) @@ -113,6 +112,7 @@ for step in range(100): while i < len(cmds): if cmds[i] == 'q': import sys + sys.exit() elif cmds[i] == 's': obs, all_rewards, done, _ = env.step(action_dict) @@ -120,9 +120,9 @@ for step in range(100): print("Rewards: ", all_rewards, " [done=", done, "]") else: agent_id = int(cmds[i]) - action = int(cmds[i+1]) + action = int(cmds[i + 1]) action_dict[agent_id] = action - i = i+1 + i = i + 1 i += 1 env_renderer.renderEnv(show=True) diff --git a/examples/training_navigation.py b/examples/training_navigation.py index cabb655..85f9531 100644 --- a/examples/training_navigation.py +++ b/examples/training_navigation.py @@ -1,11 +1,15 @@ -from flatland.envs.rail_env import * -from flatland.envs.generators import * -from flatland.envs.observations import TreeObsForRailEnv -from flatland.utils.rendertools import * -from flatland.baselines.dueling_double_dqn import Agent -from collections import deque -import torch, random +import random import time +from collections import deque + +import numpy as np +import torch + +from flatland.baselines.dueling_double_dqn import Agent +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool + random.seed(1) np.random.seed(1) @@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1): dones_list.append((np.mean(done_window))) print( - '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + '\rTraining {} Agents.\t' + + 'Episode {}\t' + + 'Average Score: {:.0f}\t' + + 'Dones: {:.2f}%\t' + + 'Epsilon: {:.2f} \t ' + + 'Action Probabilities: \t ' + + '{}'.format( env.get_num_agents(), trials, - np.mean( - scores_window), - 100 * np.mean( - done_window), + np.mean(scores_window), + 100 * np.mean(done_window), eps, action_prob / np.sum(action_prob)), end=" ") if trials % 100 == 0: print( - '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + '\rTraining {} Agents.\t' + + 'Episode {}\t' + + 'Average Score: {:.0f}\t' + + 'Dones: {:.2f}%\t' + + 'Epsilon: {:.2f} \t ' + + 'Action Probabilities: \t ' + + '{}'.format( env.get_num_agents(), trials, - np.mean( - scores_window), - 100 * np.mean( - done_window), - eps, action_prob / np.sum(action_prob))) + np.mean(scores_window), + 100 * np.mean(done_window), + eps, + action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') action_prob = [1] * 4 -- GitLab