Skip to content
Snippets Groups Projects
Commit ebec6f54 authored by u214892's avatar u214892
Browse files

Merge branch 'master' of gitlab.aicrowd.com:flatland/flatland into pydeps

parents 633e38ef 72edfe4b
No related branches found
No related tags found
No related merge requests found
......@@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development.
5. When you're done making changes, check that your changes pass flake8 and the
tests, including testing other Python versions with tox::
$ flake8 flatland tests
$ flake8 flatland tests examples
$ python setup.py test or py.test
$ tox
......@@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch
$ git push
$ git push --tags
Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
\ No newline at end of file
Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty)
......@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
rm -fr .pytest_cache
lint: ## check style with flake8
flake8 flatland tests
flake8 flatland tests examples
test: ## run tests quickly with the default Python
py.test
......
from flatland.envs.rail_env import RailEnv
from flatland.envs.generators import complex_rail_generator
from flatland.utils.rendertools import RenderTool
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch
import random
import numpy as np
import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
class Player(object):
......@@ -25,7 +27,7 @@ class Player(object):
self.done_window = deque(maxlen=100)
self.scores = []
self.dones_list = []
self.action_prob = [0]*4
self.action_prob = [0] * 4
self.agent = Agent(self.state_size, self.action_size, "FC", 0)
# self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
self.agent.qnetwork_local.load_state_dict(torch.load(
......@@ -33,7 +35,7 @@ class Player(object):
self.iFrame = 0
self.tStart = time.time()
# Reset environment
# self.obs = self.env.reset()
self.env.obs_builder.reset()
......@@ -51,7 +53,6 @@ class Player(object):
env = self.env
# Pass the (stored) observation to the agent network and retrieve the action
#for handle in env.get_agent_handles():
for handle in env.get_agent_handles():
action = self.agent.act(np.array(self.obs[handle]), eps=self.eps)
self.action_prob[action] += 1
......@@ -68,8 +69,8 @@ class Player(object):
# Update replay buffer and train agent
for handle in self.env.get_agent_handles():
self.agent.step(self.obs[handle], self.action_dict[handle],
all_rewards[handle], next_obs[handle], done[handle],
train=False)
all_rewards[handle], next_obs[handle], done[handle],
train=False)
self.score += all_rewards[handle]
self.iFrame += 1
......@@ -85,7 +86,7 @@ def max_lt(seq, val):
None is returned if seq was empty or all items in seq were >= val.
"""
idx = len(seq)-1
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0:
return seq[idx]
......@@ -94,7 +95,6 @@ def max_lt(seq, val):
def main(render=True, delay=0.0):
random.seed(1)
np.random.seed(1)
......@@ -118,8 +118,9 @@ def main(render=True, delay=0.0):
done_window = deque(maxlen=100)
scores = []
dones_list = []
action_prob = [0]*4
action_prob = [0] * 4
agent = Agent(state_size, action_size, "FC", 0)
# agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth'))
def max_lt(seq, val):
......@@ -128,7 +129,7 @@ def main(render=True, delay=0.0):
None is returned if seq was empty or all items in seq were >= val.
"""
idx = len(seq)-1
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0:
return seq[idx]
......@@ -141,7 +142,8 @@ def main(render=True, delay=0.0):
# Reset environment
obs = env.reset()
env_renderer.set_new_rail()
if render:
env_renderer.set_new_rail()
for a in range(env.get_num_agents()):
norm = max(1, max_lt(obs[a], np.inf))
......@@ -165,7 +167,6 @@ def main(render=True, delay=0.0):
if render:
env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
#time.sleep(10)
if delay > 0:
time.sleep(delay)
......@@ -181,7 +182,6 @@ def main(render=True, delay=0.0):
agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
score += all_rewards[a]
obs = next_obs.copy()
if done['__all__']:
env_done = 1
......@@ -196,25 +196,25 @@ def main(render=True, delay=0.0):
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob/np.sum(action_prob)),
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)),
end=" ")
if trials % 100 == 0:
tNow = time.time()
rFps = iFrame / (tNow - tStart)
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob)))
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1]*4
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * 4
if __name__ == "__main__":
......
import sys
from PyQt5 import QtSvg
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from PyQt5.QtCore import Qt, QByteArray
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget
from flatland.utils import svg
......@@ -75,4 +74,3 @@ window = MainWindow()
window.show()
app.exec_()
import random
from flatland.envs.rail_env import *
from flatland.envs.generators import random_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import *
from flatland.utils.rendertools import RenderTool
random.seed(0)
np.random.seed(0)
......
from flatland.envs.rail_env import *
from flatland.envs.generators import *
from flatland.envs.observations import TreeObsForRailEnv
from flatland.utils.rendertools import *
from flatland.baselines.dueling_double_dqn import Agent
from collections import deque
import torch, random
import random
import time
from collections import deque
import numpy as np
import torch
from flatland.baselines.dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
random.seed(1)
np.random.seed(1)
......@@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1):
dones_list.append((np.mean(done_window)))
print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
'\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(),
trials,
np.mean(
scores_window),
100 * np.mean(
done_window),
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)),
end=" ")
if trials % 100 == 0:
print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
'\rTraining {} Agents.\t' +
'Episode {}\t' +
'Average Score: {:.0f}\t' +
'Dones: {:.2f}%\t' +
'Epsilon: {:.2f} \t ' +
'Action Probabilities: \t ' +
'{}'.format(
env.get_num_agents(),
trials,
np.mean(
scores_window),
100 * np.mean(
done_window),
eps, action_prob / np.sum(action_prob)))
np.mean(scores_window),
100 * np.mean(done_window),
eps,
action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * 4
......@@ -492,8 +492,11 @@ class GlobalObsForRailEnv(ObservationBuilder):
self.rail_obs = np.zeros((self.env.height, self.env.width, 16))
for i in range(self.rail_obs.shape[0]):
for j in range(self.rail_obs.shape[1]):
self.rail_obs[i, j] = np.array(
list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int)
bitlist = [int(digit) for digit in bin(self.env.rail.get_transitions((i, j)))[2:]]
bitlist = [0] * (16 - len(bitlist)) + bitlist
self.rail_obs[i, j] = np.array(bitlist)
# self.rail_obs[i, j] = np.array(
# list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int)
# self.targets = np.zeros(self.env.height, self.env.width)
# for target_pos in self.env.agents_target:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment