diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f552f7626cfffca9a4422cb9337c9cdc27d83867..7ae26bcc3d4e0f4a5cbbcfafd055e2c084a42345 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -79,7 +79,7 @@ Ready to contribute? Here's how to set up `flatland` for local development. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: - $ flake8 flatland tests + $ flake8 flatland tests examples $ python setup.py test or py.test $ tox @@ -125,4 +125,4 @@ $ bumpversion patch # possible: major / minor / patch $ git push $ git push --tags -Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty) \ No newline at end of file +Travis will then deploy to PyPI if tests pass. (To be configured properly by Mohanty) diff --git a/Makefile b/Makefile index b3b9cb79358ac4fe7d11928c8bb62fb5e4c53213..e9c25bbdfed174ea0ebc4570aed4949c53b31c48 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint: ## check style with flake8 - flake8 flatland tests + flake8 flatland tests examples test: ## run tests quickly with the default Python py.test diff --git a/examples/play_model.py b/examples/play_model.py index 62726c24c96be0e5dae2f4840e18da452163b7ac..174568177a4a886cfe38e53125d0f73f2dae52de 100644 --- a/examples/play_model.py +++ b/examples/play_model.py @@ -1,12 +1,14 @@ -from flatland.envs.rail_env import RailEnv -from flatland.envs.generators import complex_rail_generator -from flatland.utils.rendertools import RenderTool -from flatland.baselines.dueling_double_dqn import Agent -from collections import deque -import torch import random -import numpy as np import time +from collections import deque + +import numpy as np +import torch + +from flatland.baselines.dueling_double_dqn import Agent +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool class Player(object): @@ -25,7 +27,7 @@ class Player(object): self.done_window = deque(maxlen=100) self.scores = [] self.dones_list = [] - self.action_prob = [0]*4 + self.action_prob = [0] * 4 self.agent = Agent(self.state_size, self.action_size, "FC", 0) # self.agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) self.agent.qnetwork_local.load_state_dict(torch.load( @@ -33,7 +35,7 @@ class Player(object): self.iFrame = 0 self.tStart = time.time() - + # Reset environment # self.obs = self.env.reset() self.env.obs_builder.reset() @@ -51,7 +53,6 @@ class Player(object): env = self.env # Pass the (stored) observation to the agent network and retrieve the action - #for handle in env.get_agent_handles(): for handle in env.get_agent_handles(): action = self.agent.act(np.array(self.obs[handle]), eps=self.eps) self.action_prob[action] += 1 @@ -68,8 +69,8 @@ class Player(object): # Update replay buffer and train agent for handle in self.env.get_agent_handles(): self.agent.step(self.obs[handle], self.action_dict[handle], - all_rewards[handle], next_obs[handle], done[handle], - train=False) + all_rewards[handle], next_obs[handle], done[handle], + train=False) self.score += all_rewards[handle] self.iFrame += 1 @@ -85,7 +86,7 @@ def max_lt(seq, val): None is returned if seq was empty or all items in seq were >= val. """ - idx = len(seq)-1 + idx = len(seq) - 1 while idx >= 0: if seq[idx] < val and seq[idx] >= 0: return seq[idx] @@ -94,7 +95,6 @@ def max_lt(seq, val): def main(render=True, delay=0.0): - random.seed(1) np.random.seed(1) @@ -118,8 +118,9 @@ def main(render=True, delay=0.0): done_window = deque(maxlen=100) scores = [] dones_list = [] - action_prob = [0]*4 + action_prob = [0] * 4 agent = Agent(state_size, action_size, "FC", 0) + # agent.qnetwork_local.load_state_dict(torch.load('../flatland/baselines/Nets/avoid_checkpoint9900.pth')) def max_lt(seq, val): @@ -128,7 +129,7 @@ def main(render=True, delay=0.0): None is returned if seq was empty or all items in seq were >= val. """ - idx = len(seq)-1 + idx = len(seq) - 1 while idx >= 0: if seq[idx] < val and seq[idx] >= 0: return seq[idx] @@ -141,7 +142,8 @@ def main(render=True, delay=0.0): # Reset environment obs = env.reset() - env_renderer.set_new_rail() + if render: + env_renderer.set_new_rail() for a in range(env.get_num_agents()): norm = max(1, max_lt(obs[a], np.inf)) @@ -165,7 +167,6 @@ def main(render=True, delay=0.0): if render: env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict) - #time.sleep(10) if delay > 0: time.sleep(delay) @@ -181,7 +182,6 @@ def main(render=True, delay=0.0): agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) score += all_rewards[a] - obs = next_obs.copy() if done['__all__']: env_done = 1 @@ -196,25 +196,25 @@ def main(render=True, delay=0.0): print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + '\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format( - env.get_num_agents(), - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, action_prob/np.sum(action_prob)), + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, action_prob / np.sum(action_prob)), end=" ") if trials % 100 == 0: tNow = time.time() rFps = iFrame / (tNow - tStart) print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' + '\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format( - env.get_num_agents(), - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, rFps, action_prob / np.sum(action_prob))) + env.get_num_agents(), + trials, + np.mean(scores_window), + 100 * np.mean(done_window), + eps, rFps, action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), - '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') - action_prob = [1]*4 + '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') + action_prob = [1] * 4 if __name__ == "__main__": diff --git a/examples/qt2.py b/examples/qt2.py index 6074106523c7fbe503f79b6bc7604f055dc27b35..ee3ea0cd123a3e6d0b1fc970eced219ed8503203 100644 --- a/examples/qt2.py +++ b/examples/qt2.py @@ -1,9 +1,8 @@ - - import sys + from PyQt5 import QtSvg -from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget from PyQt5.QtCore import Qt, QByteArray +from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QGridLayout, QWidget from flatland.utils import svg @@ -75,4 +74,3 @@ window = MainWindow() window.show() app.exec_() - diff --git a/examples/temporary_example.py b/examples/temporary_example.py index a8194d4d6173be562d4cb7a82571d43905a9f174..f30070bbd37000ce43e036873db95274008c7268 100644 --- a/examples/temporary_example.py +++ b/examples/temporary_example.py @@ -1,7 +1,9 @@ import random -from flatland.envs.rail_env import * +from flatland.envs.generators import random_rail_generator +from flatland.envs.rail_env import RailEnv from flatland.utils.rendertools import * +from flatland.utils.rendertools import RenderTool random.seed(0) np.random.seed(0) diff --git a/examples/training_navigation.py b/examples/training_navigation.py index cabb655e3eb2bc2d12d908559b0102b072163052..85f9531b8820139e5559081feee4a93c4e01ac6c 100644 --- a/examples/training_navigation.py +++ b/examples/training_navigation.py @@ -1,11 +1,15 @@ -from flatland.envs.rail_env import * -from flatland.envs.generators import * -from flatland.envs.observations import TreeObsForRailEnv -from flatland.utils.rendertools import * -from flatland.baselines.dueling_double_dqn import Agent -from collections import deque -import torch, random +import random import time +from collections import deque + +import numpy as np +import torch + +from flatland.baselines.dueling_double_dqn import Agent +from flatland.envs.generators import complex_rail_generator +from flatland.envs.rail_env import RailEnv +from flatland.utils.rendertools import RenderTool + random.seed(1) np.random.seed(1) @@ -190,25 +194,34 @@ for trials in range(1, n_trials + 1): dones_list.append((np.mean(done_window))) print( - '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + '\rTraining {} Agents.\t' + + 'Episode {}\t' + + 'Average Score: {:.0f}\t' + + 'Dones: {:.2f}%\t' + + 'Epsilon: {:.2f} \t ' + + 'Action Probabilities: \t ' + + '{}'.format( env.get_num_agents(), trials, - np.mean( - scores_window), - 100 * np.mean( - done_window), + np.mean(scores_window), + 100 * np.mean(done_window), eps, action_prob / np.sum(action_prob)), end=" ") if trials % 100 == 0: print( - '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( + '\rTraining {} Agents.\t' + + 'Episode {}\t' + + 'Average Score: {:.0f}\t' + + 'Dones: {:.2f}%\t' + + 'Epsilon: {:.2f} \t ' + + 'Action Probabilities: \t ' + + '{}'.format( env.get_num_agents(), trials, - np.mean( - scores_window), - 100 * np.mean( - done_window), - eps, action_prob / np.sum(action_prob))) + np.mean(scores_window), + 100 * np.mean(done_window), + eps, + action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') action_prob = [1] * 4 diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py index 651d83520ee1f492ea71ba6ddf82cfa5f9093964..fa77b58a53ef63767c2690edc782b6699f6c8459 100644 --- a/flatland/envs/observations.py +++ b/flatland/envs/observations.py @@ -492,8 +492,11 @@ class GlobalObsForRailEnv(ObservationBuilder): self.rail_obs = np.zeros((self.env.height, self.env.width, 16)) for i in range(self.rail_obs.shape[0]): for j in range(self.rail_obs.shape[1]): - self.rail_obs[i, j] = np.array( - list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int) + bitlist = [int(digit) for digit in bin(self.env.rail.get_transitions((i, j)))[2:]] + bitlist = [0] * (16 - len(bitlist)) + bitlist + self.rail_obs[i, j] = np.array(bitlist) + # self.rail_obs[i, j] = np.array( + # list(f'{self.env.rail.get_transitions((i, j)):016b}')).astype(int) # self.targets = np.zeros(self.env.height, self.env.width) # for target_pos in self.env.agents_target: