Skip to content
Snippets Groups Projects
Commit 8e9dcca2 authored by u214892's avatar u214892
Browse files

remove verbosity from benchmark

parent c4b3b556
No related branches found
No related tags found
No related merge requests found
import random import random
import time
from collections import deque
import numpy as np import numpy as np
from benchmarker import Benchmarker from benchmarker import Benchmarker
from flatland.envs.generators import complex_rail_generator from flatland.envs.generators import complex_rail_generator
from flatland.envs.rail_env import RailEnv from flatland.envs.rail_env import RailEnv
from flatland.utils.rendertools import RenderTool
def main(render=True, delay=0.0): def main():
random.seed(1) random.seed(1)
np.random.seed(1) np.random.seed(1)
...@@ -19,18 +16,8 @@ def main(render=True, delay=0.0): ...@@ -19,18 +16,8 @@ def main(render=True, delay=0.0):
rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12), rail_generator=complex_rail_generator(nr_start_goal=5, nr_extra=20, min_dist=12),
number_of_agents=5) number_of_agents=5)
if render:
env_renderer = RenderTool(env, gl="QTSVG")
n_trials = 20 n_trials = 20
eps = 1.
eps_end = 0.005
eps_decay = 0.998
action_dict = dict() action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
scores = []
dones_list = []
action_prob = [0] * 4 action_prob = [0] * 4
def max_lt(seq, val): def max_lt(seq, val):
...@@ -46,82 +33,32 @@ def main(render=True, delay=0.0): ...@@ -46,82 +33,32 @@ def main(render=True, delay=0.0):
idx -= 1 idx -= 1
return None return None
iFrame = 0
tStart = time.time()
for trials in range(1, n_trials + 1): for trials in range(1, n_trials + 1):
# Reset environment # Reset environment
obs = env.reset() obs = env.reset()
if render:
env_renderer.set_new_rail()
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
norm = max(1, max_lt(obs[a], np.inf)) norm = max(1, max_lt(obs[a], np.inf))
obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1) obs[a] = np.clip(np.array(obs[a]) / norm, -1, 1)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
score = 0
env_done = 0
# Run episode # Run episode
for step in range(100): for step in range(100):
# if trials > 114:
# env_renderer.renderEnv(show=True)
# print(step)
# Action # Action
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
action = np.random.randint(0, 4) action = np.random.randint(0, 4)
action_prob[action] += 1 action_prob[action] += 1
action_dict.update({a: action}) action_dict.update({a: action})
if render:
env_renderer.renderEnv(show=True, frames=True, iEpisode=trials, iStep=step, action_dict=action_dict)
if delay > 0:
time.sleep(delay)
iFrame += 1
# Environment step # Environment step
next_obs, all_rewards, done, _ = env.step(action_dict) next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
norm = max(1, max_lt(next_obs[a], np.inf)) norm = max(1, max_lt(next_obs[a], np.inf))
next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1) next_obs[a] = np.clip(np.array(next_obs[a]) / norm, -1, 1)
# Update replay buffer and train agent
for a in range(env.get_num_agents()):
# agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
score += all_rewards[a]
obs = next_obs.copy()
if done['__all__']: if done['__all__']:
env_done = 1
break break
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
done_window.append(env_done)
scores_window.append(score) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)),
end=" ")
if trials % 100 == 0: if trials % 100 == 0:
tNow = time.time()
rFps = iFrame / (tNow - tStart)
print(('\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%' +
'\tEpsilon: {:.2f} fps: {:.2f} \t Action Probabilities: \t {}').format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, rFps, action_prob / np.sum(action_prob)))
action_prob = [1] * 4 action_prob = [1] * 4
...@@ -129,4 +66,4 @@ if __name__ == "__main__": ...@@ -129,4 +66,4 @@ if __name__ == "__main__":
with Benchmarker(cycle=20, extra=1) as bench: with Benchmarker(cycle=20, extra=1) as bench:
@bench("Everything") @bench("Everything")
def _(bm): def _(bm):
main(render=False, delay=0) main()
...@@ -21,13 +21,12 @@ with open('HISTORY.rst') as history_file: ...@@ -21,13 +21,12 @@ with open('HISTORY.rst') as history_file:
# install pycairo # install pycairo on Windows
if os.name == 'nt': if os.name == 'nt':
p = platform.architecture() p = platform.architecture()
is64bit = p[0] == '64bit' is64bit = p[0] == '64bit'
if sys.version[0:3] == '3.5': if sys.version[0:3] == '3.5':
if is64bit: if is64bit:
url = 'https://download.lfd.uci.edu/pythonlibs/t4jqbe6o/pycairo-1.18.0-cp35-cp35m-win_amd64.whl' url = 'https://download.lfd.uci.edu/pythonlibs/t4jqbe6o/pycairo-1.18.0-cp35-cp35m-win_amd64.whl'
else: else:
url = 'https://download.lfd.uci.edu/pythonlibs/t4jqbe6o/pycairo-1.18.0-cp35-cp35m-win32.whl' url = 'https://download.lfd.uci.edu/pythonlibs/t4jqbe6o/pycairo-1.18.0-cp35-cp35m-win32.whl'
...@@ -47,7 +46,7 @@ if os.name == 'nt': ...@@ -47,7 +46,7 @@ if os.name == 'nt':
try: try:
import pycairo import pycairo
except: except:
call_cmd = "pip install " + url call_cmd = "pip install --user " + url
os.system(call_cmd) os.system(call_cmd)
import site import site
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment