Skip to content
Snippets Groups Projects
Commit 504c44d4 authored by u214892's avatar u214892
Browse files

#22 flake8 for examples

parent 72507e8e
No related branches found
No related tags found
No related merge requests found
......@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
5. When you're done making changes, check that your changes pass flake8 and the
tests, including testing other Python versions with tox::
$ flake8 flatland tests
$ flake8 flatland tests examples
$ python setup.py test or py.test
$ tox
......@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch
$ git push
$ git push --tags
Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
\ No newline at end of file
Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
......@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
rm -fr .pytest_cache
lint: ## check style with flake8
flake8 flatland tests
flake8 flatland tests examples
test: ## run tests quickly with the default Python
py.test
......
from flatland.envs.rail_env import RailEnv
from flatland.envs.generators import complex_rail_generator
from flatland.utils.rendertools import RenderTool
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch
import random
import numpy as np
import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
class Player(object):
......@@ -25,7 +27,7 @@ class Player(object):
self.done_window = deque(maxlen=100)
self.scores = []
self.dones_list = []
self.action_prob = [0]*4
self.action_prob = [0] * 4
self.agent = Agent(self.state_size, self.action_size, "FC", 0)
# self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
self.agent.qnetwork_local.load_state_dict(torch.load(
......@@ -33,7 +35,7 @@ class Player(object):
self.iFrame = 0
self.tStart = time.time()
# Reset environment
# self.obs = self.env.reset()
self.env.obs_builder.reset()
......@@ -51,7 +53,6 @@ class Player(object):
env = self.env
# Pass the (stored) observation to the agent network and retrieve the action
#for handle in env.get_agent_handles():
for handle in env.get_agent_handles():
action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
self.action_prob[action] += 1
......@@ -68,8 +69,8 @@ class Player(object):
# Update replay buffer and train agent
for handle in self.env.get_agent_handles():
self.agent.step(self.obs[handle], self.action_dict[handle],
all_rewards[handle], next_obs[handle], done[handle],
train=False)
all_rewards[handle], next_obs[handle], done[handle],
train=False)
self.score += all_rewards[handle]
self.iFrame += 1
......@@ -85,7 +86,7 @@ def max_lt(seq, val):
None is returned if seq was empty or all items in seq were >= val.
"""
idx = len(seq)-1
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0:
return seq[idx]
......@@ -94,7 +95,6 @@ def max_lt(seq, val):
def main(render=True, delay=0.0):
random.seed(1)
np.random.seed(1)
......@@ -118,8 +118,9 @@ def main(render=True, delay=0.0):
done_window = deque(maxlen=100)
scores = []
dones_list = []
action_prob = [0]*4
action_prob = [0] * 4
agent = Agent(state_size, action_size, "FC", 0)
# agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
def max_lt(seq, val):
......@@ -128,7 +129,7 @@ def main(render=True, delay=0.0):
None is returned if seq was empty or all items in seq were >= val.
"""
idx = len(seq)-1
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0:
return seq[idx]
......@@ -141,7 +142,8 @@ def main(render=True, delay=0.0):
# Reset environment
obs = env.reset()
env_renderer.set_new_rail()
if render:
env_renderer.set_new_rail()
for a in range(env.get_num_agents()):
norm = max(1, max_lt(obs[a], np.inf))
......@@ -165,7 +167,6 @@ def main(render=True, delay=0.0):
if render:
env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
#time.sleep(10)
if delay > 0:
time.sleep(delay)
......@@ -181,7 +182,6 @@ def main(render=True, delay=0.0):
agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
score += all_rewards[a]
obs = next_obs.copy()
if done['__all__']:
env_done = 1
......@@ -196,25 +196,25 @@ def main(render=True, delay=0.0):
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob/np.sum(action_prob)),
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)),
end=" ")
if trials % 100 == 0:
tNow = time.time()
rFps = iFrame / (tNow - tStart)
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob)))
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1]*4
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * 4
if __name__ == "__main__":
......
import sys
from PyQt5 import QtSvg
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from PyQt5.QtCore import Qt, QByteArray
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from flatland.utils import svg
......@@ -75,4 +74,3 @@ window = MainWindow()
window.show()
app.exec_()
import random
import numpy as np
import matplotlib.pyplot as plt
from flatland.envs.rail_env import *
from flatland.envs.generators import *
from flatland.envs.observations import TreeObsForRailEnv
from flatland.utils.rendertools import *
from flatland.envs.generators import random_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
random.seed(0)
np.random.seed(0)
......@@ -94,7 +93,7 @@ env = RailEnv(width=7,
# print(env.obs_builder.distance_map[0, :, :, i])
# Print the observation vector for agent 0
obs, all_rewards, done, _ = env.step({0:0})
obs, all_rewards, done, _ = env.step({0: 0})
for i in range(env.get_num_agents()):
env.obs_builder.util_print_obs_subtree(tree=obs[i], num_features_per_node=5)
......@@ -113,6 +112,7 @@ for step in range(100):
while i < len(cmds):
if cmds[i] == 'q':
import sys
sys.exit()
elif cmds[i] == 's':
obs, all_rewards, done, _ = env.step(action_dict)
......@@ -120,9 +120,9 @@ for step in range(100):
print("Rewards: ", all_rewards, " [done=", done, "]")
else:
agent_id = int(cmds[i])
action = int(cmds[i+1])
action = int(cmds[i + 1])
action_dict[agent_id] = action
i = i+1
i = i + 1
i += 1
env_renderer.renderEnv(show=True)
from flatland.envs.rail_env import *
from flatland.envs.generators import *
from flatland.envs.observations import TreeObsForRailEnv
from flatland.utils.rendertools import *
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch, random
import random
import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
random.seed(1)
np.random.seed(1)
......@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1):
dones_list.append((np.mean(done_window)))
print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
'\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(),
trials,
np.mean(
scores_window),
100 * np.mean(
done_window),
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)),
end=" ")
if trials % 100 == 0:
print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
'\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(),
trials,
np.mean(
scores_window),
100 * np.mean(
done_window),
eps, action_prob / np.sum(action_prob)))
np.mean(scores_window),
100 * np.mean(done_window),
eps,
action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * 4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment