From 504c44d449e8b19bb59bad77e794a3e621ef25d4 Mon Sep 17 00:00:00 2001
From: u214892 <u214892@sbb.ch>
Date: Mon, 20 May 2019 10:47:37 +0200
Subject: [PATCH] #22 flake8 for examples

---
 CONTRIBUTING.rst                |  4 +--
 Makefile                        |  2 +-
 examples/play_model.py          | 62 ++++++++++++++++-----------------
 examples/qt2.py                 |  6 ++--
 examples/temporary_example.py   | 16 ++++-----
 examples/training_navigation.py | 49 ++++++++++++++++----------
 6 files changed, 75 insertions(+), 64 deletions(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index f552f76..7ae26bc 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
 5. When you're done making changes, check that your changes pass flake8 and the
    tests, including testing other Python versions with tox::
 
-    $ flake8 flatland tests
+    $ flake8 flatland tests examples
     $ python setup.py test or py.test
     $ tox
 
@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch
 $ git push
 $ git push --tags
 
-Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
\ No newline at end of file
+Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
diff --git a/Makefile b/Makefile
index 691bf84..6a655bb 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	flake8 flatland tests
+	flake8 flatland tests examples
 
 test: ## run tests quickly with the default Python
 	py.test
diff --git a/examples/play_model.py b/examples/play_model.py
index 62726c2..1745681 100644
--- a/examples/play_model.py
+++ b/examples/play_model.py
@@ -1,12 +1,14 @@
-from flatland.envs.rail_env import RailEnv
-from flatland.envs.generators import complex_rail_generator
-from flatland.utils.rendertools import RenderTool
-from flatland.baselines.dueling_double_dqn import Agent
-from collections import deque
-import torch
 import random
-import numpy as np
 import time
+from collections import deque
+
+import numpy as np
+import torch
+
+from flatland.baselines.dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
 
 
 class Player(object):
@@ -25,7 +27,7 @@ class Player(object):
         self.done_window = deque(maxlen=100)
         self.scores = []
         self.dones_list = []
-        self.action_prob = [0]*4
+        self.action_prob = [0] * 4
         self.agent = Agent(self.state_size, self.action_size, "FC", 0)
         # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
         self.agent.qnetwork_local.load_state_dict(torch.load(
@@ -33,7 +35,7 @@ class Player(object):
 
         self.iFrame = 0
         self.tStart = time.time()
-        
+
         # Reset environment
         # self.obs = self.env.reset()
         self.env.obs_builder.reset()
@@ -51,7 +53,6 @@ class Player(object):
         env = self.env
 
         # Pass the (stored) observation to the agent network and retrieve the action
-        #for handle in env.get_agent_handles():
         for handle in env.get_agent_handles():
             action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
             self.action_prob[action] += 1
@@ -68,8 +69,8 @@ class Player(object):
         # Update replay buffer and train agent
         for handle in self.env.get_agent_handles():
             self.agent.step(self.obs[handle], self.action_dict[handle],
-                all_rewards[handle], next_obs[handle], done[handle],
-                train=False)
+                            all_rewards[handle], next_obs[handle], done[handle],
+                            train=False)
             self.score += all_rewards[handle]
 
         self.iFrame += 1
@@ -85,7 +86,7 @@ def max_lt(seq, val):
     None is returned if seq was empty or all items in seq were >= val.
     """
 
-    idx = len(seq)-1
+    idx = len(seq) - 1
     while idx >= 0:
         if seq[idx] < val and seq[idx] >= 0:
             return seq[idx]
@@ -94,7 +95,6 @@ def max_lt(seq, val):
 
 
 def main(render=True, delay=0.0):
-
     random.seed(1)
     np.random.seed(1)
 
@@ -118,8 +118,9 @@ def main(render=True, delay=0.0):
     done_window = deque(maxlen=100)
     scores = []
     dones_list = []
-    action_prob = [0]*4
+    action_prob = [0] * 4
     agent = Agent(state_size, action_size, "FC", 0)
+
     # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
 
     def max_lt(seq, val):
@@ -128,7 +129,7 @@ def main(render=True, delay=0.0):
         None is returned if seq was empty or all items in seq were >= val.
         """
 
-        idx = len(seq)-1
+        idx = len(seq) - 1
         while idx >= 0:
             if seq[idx] < val and seq[idx] >= 0:
                 return seq[idx]
@@ -141,7 +142,8 @@ def main(render=True, delay=0.0):
 
         # Reset environment
         obs = env.reset()
-        env_renderer.set_new_rail()
+        if render:
+            env_renderer.set_new_rail()
 
         for a in range(env.get_num_agents()):
             norm = max(1, max_lt(obs[a], np.inf))
@@ -165,7 +167,6 @@ def main(render=True, delay=0.0):
 
             if render:
                 env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
-                #time.sleep(10)
                 if delay > 0:
                     time.sleep(delay)
 
@@ -181,7 +182,6 @@ def main(render=True, delay=0.0):
                 agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
                 score += all_rewards[a]
 
-
             obs = next_obs.copy()
             if done['__all__']:
                 env_done = 1
@@ -196,25 +196,25 @@ def main(render=True, delay=0.0):
 
         print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
                '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
-               env.get_num_agents(),
-               trials,
-               np.mean(scores_window),
-               100 * np.mean(done_window),
-               eps, action_prob/np.sum(action_prob)),
+            env.get_num_agents(),
+            trials,
+            np.mean(scores_window),
+            100 * np.mean(done_window),
+            eps, action_prob / np.sum(action_prob)),
             end=" ")
         if trials % 100 == 0:
             tNow = time.time()
             rFps = iFrame / (tNow - tStart)
             print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
                    '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
-                   env.get_num_agents(),
-                   trials,
-                   np.mean(scores_window),
-                   100 * np.mean(done_window),
-                   eps, rFps, action_prob / np.sum(action_prob)))
+                env.get_num_agents(),
+                trials,
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps, rFps, action_prob / np.sum(action_prob)))
             torch.save(agent.qnetwork_local.state_dict(),
-                    '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
-            action_prob = [1]*4
+                       '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
+            action_prob = [1] * 4
 
 
 if __name__ == "__main__":
diff --git a/examples/qt2.py b/examples/qt2.py
index 6074106..ee3ea0c 100644
--- a/examples/qt2.py
+++ b/examples/qt2.py
@@ -1,9 +1,8 @@
-
-
 import sys
+
 from PyQt5 import QtSvg
-from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
 from PyQt5.QtCore import Qt, QByteArray
+from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
 
 from flatland.utils import svg
 
@@ -75,4 +74,3 @@ window = MainWindow()
 window.show()
 
 app.exec_()
-
diff --git a/examples/temporary_example.py b/examples/temporary_example.py
index 1f3504f..db909e0 100644
--- a/examples/temporary_example.py
+++ b/examples/temporary_example.py
@@ -1,11 +1,10 @@
 import random
+
 import numpy as np
-import matplotlib.pyplot as plt
 
-from flatland.envs.rail_env import *
-from flatland.envs.generators import *
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import *
+from flatland.envs.generators import random_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
 
 random.seed(0)
 np.random.seed(0)
@@ -94,7 +93,7 @@ env = RailEnv(width=7,
 #     print(env.obs_builder.distance_map[0, :, :, i])
 
 # Print the observation vector for agent 0
-obs, all_rewards, done, _ = env.step({0:0})
+obs, all_rewards, done, _ = env.step({0: 0})
 for i in range(env.get_num_agents()):
     env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5)
 
@@ -113,6 +112,7 @@ for step in range(100):
     while i < len(cmds):
         if cmds[i] == 'q':
             import sys
+
             sys.exit()
         elif cmds[i] == 's':
             obs, all_rewards, done, _ = env.step(action_dict)
@@ -120,9 +120,9 @@ for step in range(100):
             print("Rewards: ", all_rewards, "  [done=", done, "]")
         else:
             agent_id = int(cmds[i])
-            action = int(cmds[i+1])
+            action = int(cmds[i + 1])
             action_dict[agent_id] = action
-            i = i+1
+            i = i + 1
         i += 1
 
     env_renderer.renderEnv(show=True)
diff --git a/examples/training_navigation.py b/examples/training_navigation.py
index cabb655..85f9531 100644
--- a/examples/training_navigation.py
+++ b/examples/training_navigation.py
@@ -1,11 +1,15 @@
-from flatland.envs.rail_env import *
-from flatland.envs.generators import *
-from flatland.envs.observations import TreeObsForRailEnv
-from flatland.utils.rendertools import *
-from flatland.baselines.dueling_double_dqn import Agent
-from collections import deque
-import torch, random
+import random
 import time
+from collections import deque
+
+import numpy as np
+import torch
+
+from flatland.baselines.dueling_double_dqn import Agent
+from flatland.envs.generators import complex_rail_generator
+from flatland.envs.rail_env import RailEnv
+from flatland.utils.rendertools import RenderTool
+
 random.seed(1)
 np.random.seed(1)
 
@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1):
     dones_list.append((np.mean(done_window)))
 
     print(
-        '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+        '\rTraining {} Agents.\t' +
+        'Episode {}\t' +
+        'Average Score: {:.0f}\t' +
+        'Dones: {:.2f}%\t' +
+        'Epsilon: {:.2f} \t ' +
+        'Action Probabilities: \t ' +
+        '{}'.format(
             env.get_num_agents(),
             trials,
-            np.mean(
-                scores_window),
-            100 * np.mean(
-                done_window),
+            np.mean(scores_window),
+            100 * np.mean(done_window),
             eps, action_prob / np.sum(action_prob)),
         end=" ")
     if trials % 100 == 0:
         print(
-            '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
+            '\rTraining {} Agents.\t' +
+            'Episode {}\t' +
+            'Average Score: {:.0f}\t' +
+            'Dones: {:.2f}%\t' +
+            'Epsilon: {:.2f} \t ' +
+            'Action Probabilities: \t ' +
+            '{}'.format(
                 env.get_num_agents(),
                 trials,
-                np.mean(
-                    scores_window),
-                100 * np.mean(
-                    done_window),
-                eps, action_prob / np.sum(action_prob)))
+                np.mean(scores_window),
+                100 * np.mean(done_window),
+                eps,
+                action_prob / np.sum(action_prob)))
         torch.save(agent.qnetwork_local.state_dict(),
                    '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
         action_prob = [1] * 4
-- 
GitLab