Skip to content
Snippets Groups Projects
Commit 504c44d4 authored by u214892's avatar u214892
Browse files

#22 flake8 for examples

parent 72507e8e
No related branches found
No related tags found
No related merge requests found
...@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development. ...@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
5. When you're done making changes, check that your changes pass flake8 and the 5. When you're done making changes, check that your changes pass flake8 and the
tests, including testing other Python versions with tox:: tests, including testing other Python versions with tox::
$ flake8 flatland tests $ flake8 flatland tests examples
$ python setup.py test or py.test $ python setup.py test or py.test
$ tox $ tox
...@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch ...@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch
$ git push $ git push
$ git push --tags $ git push --tags
Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty) Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
\ No newline at end of file
...@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts ...@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
rm -fr .pytest_cache rm -fr .pytest_cache
lint: ## check style with flake8 lint: ## check style with flake8
flake8 flatland tests flake8 flatland tests examples
test: ## run tests quickly with the default Python test: ## run tests quickly with the default Python
py.test py.test
......
from flatland.envs.rail_env import RailEnv
from flatland.envs.generators import complex_rail_generator
from flatland.utils.rendertools import RenderTool
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch
import random import random
import numpy as np
import time import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
class Player(object): class Player(object):
...@@ -25,7 +27,7 @@ class Player(object): ...@@ -25,7 +27,7 @@ class Player(object):
self.done_window = deque(maxlen=100) self.done_window = deque(maxlen=100)
self.scores = [] self.scores = []
self.dones_list = [] self.dones_list = []
self.action_prob = [0]*4 self.action_prob = [0] * 4
self.agent = Agent(self.state_size, self.action_size, "FC", 0) self.agent = Agent(self.state_size, self.action_size, "FC", 0)
# self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
self.agent.qnetwork_local.load_state_dict(torch.load( self.agent.qnetwork_local.load_state_dict(torch.load(
...@@ -33,7 +35,7 @@ class Player(object): ...@@ -33,7 +35,7 @@ class Player(object):
self.iFrame = 0 self.iFrame = 0
self.tStart = time.time() self.tStart = time.time()
# Reset environment # Reset environment
# self.obs = self.env.reset() # self.obs = self.env.reset()
self.env.obs_builder.reset() self.env.obs_builder.reset()
...@@ -51,7 +53,6 @@ class Player(object): ...@@ -51,7 +53,6 @@ class Player(object):
env = self.env env = self.env
# Pass the (stored) observation to the agent network and retrieve the action # Pass the (stored) observation to the agent network and retrieve the action
#for handle in env.get_agent_handles():
for handle in env.get_agent_handles(): for handle in env.get_agent_handles():
action = self.agent.act(np.array(self.obs[handle]), eps=self.eps) action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
self.action_prob[action] += 1 self.action_prob[action] += 1
...@@ -68,8 +69,8 @@ class Player(object): ...@@ -68,8 +69,8 @@ class Player(object):
# Update replay buffer and train agent # Update replay buffer and train agent
for handle in self.env.get_agent_handles(): for handle in self.env.get_agent_handles():
self.agent.step(self.obs[handle], self.action_dict[handle], self.agent.step(self.obs[handle], self.action_dict[handle],
all_rewards[handle], next_obs[handle], done[handle], all_rewards[handle], next_obs[handle], done[handle],
train=False) train=False)
self.score += all_rewards[handle] self.score += all_rewards[handle]
self.iFrame += 1 self.iFrame += 1
...@@ -85,7 +86,7 @@ def max_lt(seq, val): ...@@ -85,7 +86,7 @@ def max_lt(seq, val):
None is returned if seq was empty or all items in seq were >= val. None is returned if seq was empty or all items in seq were >= val.
""" """
idx = len(seq)-1 idx = len(seq) - 1
while idx >= 0: while idx >= 0:
if seq[idx] < val and seq[idx] >= 0: if seq[idx] < val and seq[idx] >= 0:
return seq[idx] return seq[idx]
...@@ -94,7 +95,6 @@ def max_lt(seq, val): ...@@ -94,7 +95,6 @@ def max_lt(seq, val):
def main(render=True, delay=0.0): def main(render=True, delay=0.0):
random.seed(1) random.seed(1)
np.random.seed(1) np.random.seed(1)
...@@ -118,8 +118,9 @@ def main(render=True, delay=0.0): ...@@ -118,8 +118,9 @@ def main(render=True, delay=0.0):
done_window = deque(maxlen=100) done_window = deque(maxlen=100)
scores = [] scores = []
dones_list = [] dones_list = []
action_prob = [0]*4 action_prob = [0] * 4
agent = Agent(state_size, action_size, "FC", 0) agent = Agent(state_size, action_size, "FC", 0)
# agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
def max_lt(seq, val): def max_lt(seq, val):
...@@ -128,7 +129,7 @@ def main(render=True, delay=0.0): ...@@ -128,7 +129,7 @@ def main(render=True, delay=0.0):
None is returned if seq was empty or all items in seq were >= val. None is returned if seq was empty or all items in seq were >= val.
""" """
idx = len(seq)-1 idx = len(seq) - 1
while idx >= 0: while idx >= 0:
if seq[idx] < val and seq[idx] >= 0: if seq[idx] < val and seq[idx] >= 0:
return seq[idx] return seq[idx]
...@@ -141,7 +142,8 @@ def main(render=True, delay=0.0): ...@@ -141,7 +142,8 @@ def main(render=True, delay=0.0):
# Reset environment # Reset environment
obs = env.reset() obs = env.reset()
env_renderer.set_new_rail() if render:
env_renderer.set_new_rail()
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
norm = max(1, max_lt(obs[a], np.inf)) norm = max(1, max_lt(obs[a], np.inf))
...@@ -165,7 +167,6 @@ def main(render=True, delay=0.0): ...@@ -165,7 +167,6 @@ def main(render=True, delay=0.0):
if render: if render:
env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict) env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
#time.sleep(10)
if delay > 0: if delay > 0:
time.sleep(delay) time.sleep(delay)
...@@ -181,7 +182,6 @@ def main(render=True, delay=0.0): ...@@ -181,7 +182,6 @@ def main(render=True, delay=0.0):
agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
score += all_rewards[a] score += all_rewards[a]
obs = next_obs.copy() obs = next_obs.copy()
if done['__all__']: if done['__all__']:
env_done = 1 env_done = 1
...@@ -196,25 +196,25 @@ def main(render=True, delay=0.0): ...@@ -196,25 +196,25 @@ def main(render=True, delay=0.0):
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format( '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean(scores_window), np.mean(scores_window),
100 * np.mean(done_window), 100 * np.mean(done_window),
eps, action_prob/np.sum(action_prob)), eps, action_prob / np.sum(action_prob)),
end=" ") end=" ")
if trials % 100 == 0: if trials % 100 == 0:
tNow = time.time() tNow = time.time()
rFps = iFrame / (tNow - tStart) rFps = iFrame / (tNow - tStart)
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format( '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean(scores_window), np.mean(scores_window),
100 * np.mean(done_window), 100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob))) eps, rFps, action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(), torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1]*4 action_prob = [1] * 4
if __name__ == "__main__": if __name__ == "__main__":
......
import sys import sys
from PyQt5 import QtSvg from PyQt5 import QtSvg
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from PyQt5.QtCore import Qt, QByteArray from PyQt5.QtCore import Qt, QByteArray
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from flatland.utils import svg from flatland.utils import svg
...@@ -75,4 +74,3 @@ window = MainWindow() ...@@ -75,4 +74,3 @@ window = MainWindow()
window.show() window.show()
app.exec_() app.exec_()
import random import random
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from flatland.envs.rail_env import * from flatland.envs.generators import random_rail_generator
from flatland.envs.generators import * from flatland.envs.rail_env import RailEnv
from flatland.envs.observations import TreeObsForRailEnv from flatland.utils.rendertools import RenderTool
from flatland.utils.rendertools import *
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
...@@ -94,7 +93,7 @@ env = RailEnv(width=7, ...@@ -94,7 +93,7 @@ env = RailEnv(width=7,
# print(env.obs_builder.distance_map[0, :, :, i]) # print(env.obs_builder.distance_map[0, :, :, i])
# Print the observation vector for agent 0 # Print the observation vector for agent 0
obs, all_rewards, done, _ = env.step({0:0}) obs, all_rewards, done, _ = env.step({0: 0})
for i in range(env.get_num_agents()): for i in range(env.get_num_agents()):
env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5) env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5)
...@@ -113,6 +112,7 @@ for step in range(100): ...@@ -113,6 +112,7 @@ for step in range(100):
while i < len(cmds): while i < len(cmds):
if cmds[i] == 'q': if cmds[i] == 'q':
import sys import sys
sys.exit() sys.exit()
elif cmds[i] == 's': elif cmds[i] == 's':
obs, all_rewards, done, _ = env.step(action_dict) obs, all_rewards, done, _ = env.step(action_dict)
...@@ -120,9 +120,9 @@ for step in range(100): ...@@ -120,9 +120,9 @@ for step in range(100):
print("Rewards: ", all_rewards, " [done=", done, "]") print("Rewards: ", all_rewards, " [done=", done, "]")
else: else:
agent_id = int(cmds[i]) agent_id = int(cmds[i])
action = int(cmds[i+1]) action = int(cmds[i + 1])
action_dict[agent_id] = action action_dict[agent_id] = action
i = i+1 i = i + 1
i += 1 i += 1
env_renderer.renderEnv(show=True) env_renderer.renderEnv(show=True)
from flatland.envs.rail_env import * import random
from flatland.envs.generators import *
from flatland.envs.observations import TreeObsForRailEnv
from flatland.utils.rendertools import *
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch, random
import time import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
random.seed(1) random.seed(1)
np.random.seed(1) np.random.seed(1)
...@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1): ...@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1):
dones_list.append((np.mean(done_window))) dones_list.append((np.mean(done_window)))
print( print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean( np.mean(scores_window),
scores_window), 100 * np.mean(done_window),
100 * np.mean(
done_window),
eps, action_prob / np.sum(action_prob)), eps, action_prob / np.sum(action_prob)),
end=" ") end=" ")
if trials % 100 == 0: if trials % 100 == 0:
print( print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(), env.get_num_agents(),
trials, trials,
np.mean( np.mean(scores_window),
scores_window), 100 * np.mean(done_window),
100 * np.mean( eps,
done_window), action_prob / np.sum(action_prob)))
eps, action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(), torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * 4 action_prob = [1] * 4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment