Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • jack_bruck/baselines
  • rivesunder/baselines
  • xzhaoma/baselines
  • giulia_cantini/baselines
  • sfwatergit/baselines
  • jiaodaxiaozi/baselines
  • flatland/baselines
7 results
Show changes
File added
File added
File added
File added
File added
File added
File added
import random
from collections import deque
import numpy as np
import torch
from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from importlib_resources import path
import torch_training.Nets
from torch_training.dueling_double_dqn import Agent
from utils.observation_utils import normalize_observation
random.seed(1)
np.random.seed(1)
"""
file_name = "./railway/complex_scene.pkl"
env = RailEnv(width=10,
height=20,
rail_generator=rail_from_file(file_name),
obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()))
x_dim = env.width
y_dim = env.height
"""
# Parameters for the Environment
x_dim = 25
y_dim = 25
n_agents = 1
n_goals = 5
min_dist = 5
# We are training an Agent using the Tree Observation with depth 2
observation_builder = TreeObsForRailEnv(max_depth=2)
# Use a the malfunction generator to break agents from time to time
stochastic_data = MalfunctionParameters(malfunction_rate=1./10000, # Rate of malfunction occurence
min_duration=15, # Minimal duration of malfunction
max_duration=50 # Max duration of malfunction
)
# Custom observation builder
TreeObservation = TreeObsForRailEnv(max_depth=2)
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 1., # Fast passenger train
1. / 2.: 0.0, # Fast freight train
1. / 3.: 0.0, # Slow commuter train
1. / 4.: 0.0} # Slow freight train
env = RailEnv(width=x_dim,
height=y_dim,
rail_generator=sparse_rail_generator(max_num_cities=3,
# Number of cities in map (where train stations are)
seed=1, # Random seed
grid_mode=False,
max_rails_between_cities=2,
max_rails_in_city=4),
schedule_generator=sparse_schedule_generator(speed_ration_map),
number_of_agents=n_agents,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
obs_builder_object=TreeObservation)
env.reset(True,True)
env_renderer = RenderTool(env, gl="PILSVG", )
num_features_per_node = env.obs_builder.observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
state_size = num_features_per_node * nr_nodes
action_size = 5
# We set the number of episodes we would like to train on
if 'n_trials' not in locals():
n_trials = 60000
max_steps = int(3 * (env.height + env.width))
eps = 1.
eps_end = 0.005
eps_decay = 0.9995
action_dict = dict()
final_action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
scores = []
dones_list = []
action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size)
with path(torch_training.Nets, "navigator_checkpoint1000.pth") as file_in:
agent.qnetwork_local.load_state_dict(torch.load(file_in))
record_images = False
frame_step = 0
for trials in range(1, n_trials + 1):
# Reset environment
obs, info = env.reset(True, True)
env_renderer.reset()
# Build agent specific observations
for a in range(env.get_num_agents()):
agent_obs[a] = agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
# Action
for a in range(env.get_num_agents()):
if info['action_required'][a]:
action = agent.act(agent_obs[a], eps=0.)
else:
action = 0
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
obs, all_rewards, done, _ = env.step(action_dict)
env_renderer.render_env(show=True, show_predictions=True, show_observations=False)
# Build agent specific observations and normalize
for a in range(env.get_num_agents()):
if obs[a]:
agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
if done['__all__']:
break
import getopt
import random
import sys
from collections import deque
# make sure the root path is in system path
from pathlib import Path
from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))
import matplotlib.pyplot as plt
import numpy as np
import torch
from dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from torch_training.dueling_double_dqn import Agent
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from utils.observation_utils import normalize_observation
from flatland.envs.observations import TreeObsForRailEnv
random.seed(1)
np.random.seed(1)
# Example generate a rail given a manual specification,
# a map of tuples (cell_type, rotation)
transition_probability = [15, # empty cell - Case 0
5, # Case 1 - straight
5, # Case 2 - simple switch
1, # Case 3 - diamond crossing
1, # Case 4 - single slip
1, # Case 5 - double slip
1, # Case 6 - symmetrical
0, # Case 7 - dead end
1, # Case 1b (8) - simple turn right
1, # Case 1c (9) - simple turn left
1] # Case 2b (10) - simple switch mirrored
# Example generate a random rail
"""
env = RailEnv(width=20,
height=20,
rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
number_of_agents=1)
env = RailEnv(width=15,
height=15,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=10, max_dist=99999, seed=0),
number_of_agents=1)
"""
env = RailEnv(width=10,
height=20, obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()))
env.load("./railway/complex_scene.pkl")
file_load = True
"""
env = RailEnv(width=20,
height=20,
rail_generator=complex_rail_generator(nr_start_goal=20, nr_extra=5, min_dist=10, max_dist=99999, seed=0),
obs_builder_object=TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=15)
file_load = False
env.reset(True, True)
"""
env_renderer = RenderTool(env, gl="PILSVG",)
handle = env.get_agent_handles()
state_size = 168 * 2
action_size = 5
n_trials = 15000
max_steps = int(3 * (env.height + env.width))
eps = 1.
eps_end = 0.005
eps_decay = 0.9995
action_dict = dict()
final_action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
time_obs = deque(maxlen=2)
scores = []
dones_list = []
action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent = Agent(state_size, action_size, "FC", 0)
agent.qnetwork_local.load_state_dict(torch.load('./Nets/avoid_checkpoint15000.pth'))
demo = True
record_images = False
def max_lt(seq, val):
"""
Return greatest item in seq for which item < val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
max = 0
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
max = seq[idx]
idx -= 1
return max
def min_lt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
min = np.inf
idx = len(seq) - 1
while idx >= 0:
if seq[idx] >= val and seq[idx] < min:
min = seq[idx]
idx -= 1
return min
def norm_obs_clip(obs, clip_min=-1, clip_max=1):
"""
This function returns the difference between min and max value of an observation
:param obs: Observation that should be normalized
:param clip_min: min value where observation will be clipped
:param clip_max: max value where observation will be clipped
:return: returnes normalized and clipped observatoin
"""
max_obs = max(1, max_lt(obs, 1000))
min_obs = min(max_obs, min_lt(obs, 0))
if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
if norm == 0:
norm = 1.
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
for trials in range(1, n_trials + 1):
# Reset environment
if file_load :
obs = env.reset(False, False)
else:
obs = env.reset(True, True)
if demo:
env_renderer.set_new_rail()
final_obs = obs.copy()
final_obs_next = obs.copy()
for a in range(env.get_num_agents()):
data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(obs[a]), num_features_per_node=8,
current_depth=0)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
agent_data = env.agents[a]
speed = 1 #np.random.randint(1,5)
agent_data.speed_data['speed'] = 1. / speed
for i in range(2):
time_obs.append(obs)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
if demo:
env_renderer.renderEnv(show=True, show_observations=False)
if record_images:
env_renderer.gl.saveImage("./Images/frame_{:04d}.bmp".format(step))
# print(step)
# Action
for a in range(env.get_num_agents()):
if demo:
eps = 0
# action = agent.act(np.array(obs[a]), eps=eps)
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
def main(argv):
try:
opts, args = getopt.getopt(argv, "n:", ["n_trials="])
except getopt.GetoptError:
print('training_navigation.py -n <n_trials>')
sys.exit(2)
for opt, arg in opts:
if opt in ('-n', '--n_trials'):
n_trials = int(arg)
random.seed(1)
np.random.seed(1)
# Parameters for the Environment
x_dim = 35
y_dim = 35
n_agents = 1
# Use a the malfunction generator to break agents from time to time
stochastic_data = MalfunctionParameters(malfunction_rate=1./10000, # Rate of malfunction occurence
min_duration=15, # Minimal duration of malfunction
max_duration=50 # Max duration of malfunction
)
# Custom observation builder
TreeObservation = TreeObsForRailEnv(max_depth=2)
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0., # Fast passenger train
1. / 2.: 1.0, # Fast freight train
1. / 3.: 0.0, # Slow commuter train
1. / 4.: 0.0} # Slow freight train
env = RailEnv(width=x_dim,
height=y_dim,
rail_generator=sparse_rail_generator(max_num_cities=3,
# Number of cities in map (where train stations are)
seed=1, # Random seed
grid_mode=False,
max_rails_between_cities=2,
max_rails_in_city=3),
schedule_generator=sparse_schedule_generator(speed_ration_map),
number_of_agents=n_agents,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
# Malfunction data generator
obs_builder_object=TreeObservation)
# Reset env
env.reset(True,True)
# After training we want to render the results so we also load a renderer
env_renderer = RenderTool(env, gl="PILSVG", )
# Given the depth of the tree observation and the number of features per node we get the following state_size
num_features_per_node = env.obs_builder.observation_dim
tree_depth = 2
nr_nodes = 0
for i in range(tree_depth + 1):
nr_nodes += np.power(4, i)
state_size = num_features_per_node * nr_nodes
# The action space of flatland is 5 discrete actions
action_size = 5
# We set the number of episodes we would like to train on
if 'n_trials' not in locals():
n_trials = 15000
# And the max number of steps we want to take per episode
max_steps = int(3 * (env.height + env.width))
# Define training parameters
eps = 1.
eps_end = 0.005
eps_decay = 0.998
# And some variables to keep track of the progress
action_dict = dict()
final_action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
scores = []
dones_list = []
action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent_obs_buffer = [None] * env.get_num_agents()
agent_action_buffer = [2] * env.get_num_agents()
cummulated_reward = np.zeros(env.get_num_agents())
update_values = False
# Now we load a Double dueling DQN agent
agent = Agent(state_size, action_size)
for trials in range(1, n_trials + 1):
# Reset environment
obs, info = env.reset(True, True)
env_renderer.reset()
# Build agent specific observations
for a in range(env.get_num_agents()):
data, distance, agent_data = env.obs_builder.split_tree(tree=np.array(next_obs[a]), num_features_per_node=8,
current_depth=0)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs)
# Update replay buffer and train agent
for a in range(env.get_num_agents()):
agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
if done[a]:
final_obs[a] = agent_obs[a].copy()
final_obs_next[a] = agent_next_obs[a].copy()
final_action_dict.update({a: action_dict[a]})
if not demo and not done[a]:
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
score += all_rewards[a] / env.get_num_agents()
agent_obs = agent_next_obs.copy()
if done['__all__']:
env_done = 1
if obs[a]:
agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
agent_obs_buffer[a] = agent_obs[a].copy()
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
# Action
for a in range(env.get_num_agents()):
agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a])
break
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
done_window.append(env_done)
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
print(
'\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(),
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)), end=" ")
if trials % 100 == 0:
if info['action_required'][a]:
# If an action is require, we want to store the obs a that step as well as the action
update_values = True
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
else:
update_values = False
action = 0
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, info = env.step(action_dict)
# Update replay buffer and train agent
for a in range(env.get_num_agents()):
# Only update the values when we are done or when an action was taken and thus relevant information is present
if update_values or done[a]:
agent.step(agent_obs_buffer[a], agent_action_buffer[a], all_rewards[a],
agent_obs[a], done[a])
cummulated_reward[a] = 0.
agent_obs_buffer[a] = agent_obs[a].copy()
agent_action_buffer[a] = action_dict[a]
if next_obs[a]:
agent_obs[a] = normalize_observation(next_obs[a], tree_depth, observation_radius=10)
score += all_rewards[a] / env.get_num_agents()
# Copy observation
if done['__all__']:
env_done = 1
break
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
# Collection information about training
tasks_finished = 0
for _idx in range(env.get_num_agents()):
if done[_idx] == 1:
tasks_finished += 1
done_window.append(tasks_finished / max(1, env.get_num_agents()))
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
print(
'\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(),
'\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), x_dim, y_dim,
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps,
action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'./Nets/avoid_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size
plt.plot(scores)
plt.show()
eps, action_prob / np.sum(action_prob)), end=" ")
if trials % 100 == 0:
print(
'\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.get_num_agents(), x_dim, y_dim,
trials,
np.mean(scores_window),
100 * np.mean(done_window),
eps, action_prob / np.sum(action_prob)))
torch.save(agent.qnetwork_local.state_dict(),
'./Nets/navigator_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size
# Plot overall training progress at the end
plt.plot(scores)
plt.show()
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -15,13 +15,14 @@ setenv =
PYTHONPATH = {toxinidir}
passenv =
DISPLAY
XAUTHORITY
; HTTP_PROXY+HTTPS_PROXY required behind corporate proxies
HTTP_PROXY
HTTPS_PROXY
deps =
-r{toxinidir}/requirements_torch_training.txt
commands =
python torch_training/training_navigation.py
python torch_training/multi_agent_training.py --n_trials=10
[flake8]
max-line-length = 120
......@@ -29,7 +30,12 @@ ignore = E121 E126 E123 E128 E133 E226 E241 E242 E704 W291 W293 W391 W503 W504 W
[testenv:flake8]
basepython = python
passenv = DISPLAY
passenv =
DISPLAY
XAUTHORITY
; HTTP_PROXY+HTTPS_PROXY required behind corporate proxies
HTTP_PROXY
HTTPS_PROXY
deps =
-r{toxinidir}/requirements_torch_training.txt
commands =
......
import random
import time
from collections import deque
import numpy as np
from flatland.envs.observations import GlobalObsForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator
from line_profiler import LineProfiler
from utils.observation_utils import norm_obs_clip, split_tree_into_feature_groups
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='*'):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
"""
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '_' * (length - filledLength)
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end=" ")
# Print New Line on Complete
if iteration == total:
print('')
class RandomAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
def act(self, state, eps=0):
"""
:param state: input is the observation of the agent
:return: returns an action
"""
return np.random.choice(np.arange(self.action_size))
def step(self, memories):
"""
Step function to improve agent by adjusting policy given the observations
:param memories: SARS Tuple to be
:return:
"""
return
def save(self, filename):
# Store the current policy
return
def load(self, filename):
# Load a policy
return
def run_test(parameters, agent, test_nr=0, tree_depth=3):
# Parameter initialization
lp = LineProfiler()
features_per_node = 9
start_time_scoring = time.time()
action_dict = dict()
nr_trials_per_test = 5
print('Running Test {} with (x_dim,y_dim) = ({},{}) and {} Agents.'.format(test_nr, parameters[0], parameters[1],
parameters[2]))
# Reset all measurements
time_obs = deque(maxlen=2)
test_scores = []
test_dones = []
# Reset environment
random.seed(parameters[3])
np.random.seed(parameters[3])
nr_paths = max(2, parameters[2] + int(0.5 * parameters[2]))
min_dist = int(min([parameters[0], parameters[1]]) * 0.75)
env = RailEnv(width=parameters[0],
height=parameters[1],
rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist,
max_dist=99999,
seed=parameters[3]),
schedule_generator=complex_schedule_generator(),
obs_builder_object=GlobalObsForRailEnv(),
number_of_agents=parameters[2])
max_steps = int(3 * (env.height + env.width))
lp_step = lp(env.step)
lp_reset = lp(env.reset)
agent_obs = [None] * env.get_num_agents()
printProgressBar(0, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
for trial in range(nr_trials_per_test):
# Reset the env
lp_reset(True, True)
obs, info = env.reset(True, True)
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree_into_feature_groups(obs[a], tree_depth)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
for i in range(2):
time_obs.append(obs)
for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
# Run episode
trial_score = 0
for step in range(max_steps):
for a in range(env.get_num_agents()):
action = agent.act(agent_obs[a], eps=0)
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = lp_step(action_dict)
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree_into_feature_groups(next_obs[a], tree_depth)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data))
time_obs.append(next_obs)
for a in range(env.get_num_agents()):
agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a]))
trial_score += all_rewards[a] / env.get_num_agents()
if done['__all__']:
break
test_scores.append(trial_score / max_steps)
test_dones.append(done['__all__'])
printProgressBar(trial + 1, nr_trials_per_test, prefix='Progress:', suffix='Complete', length=20)
end_time_scoring = time.time()
tot_test_time = end_time_scoring - start_time_scoring
lp.print_stats()
return test_scores, test_dones, tot_test_time
import numpy as np
from flatland.envs.observations import TreeObsForRailEnv
def max_lt(seq, val):
"""
Return greatest item in seq for which item < val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
max = 0
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
max = seq[idx]
idx -= 1
return max
def min_gt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
min = np.inf
idx = len(seq) - 1
while idx >= 0:
if seq[idx] >= val and seq[idx] < min:
min = seq[idx]
idx -= 1
return min
def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
"""
This function returns the difference between min and max value of an observation
:param obs: Observation that should be normalized
:param clip_min: min value where observation will be clipped
:param clip_max: max value where observation will be clipped
:return: returnes normalized and clipped observatoin
"""
if fixed_radius > 0:
max_obs = fixed_radius
else:
max_obs = max(1, max_lt(obs, 1000)) + 1
min_obs = 0 # min(max_obs, min_gt(obs, 0))
if normalize_to_range:
min_obs = min_gt(obs, 0)
if min_obs > max_obs:
min_obs = max_obs
if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray, np.ndarray, np.ndarray):
data = np.zeros(6)
distance = np.zeros(1)
agent_data = np.zeros(4)
data[0] = node.dist_own_target_encountered
data[1] = node.dist_other_target_encountered
data[2] = node.dist_other_agent_encountered
data[3] = node.dist_potential_conflict
data[4] = node.dist_unusable_switch
data[5] = node.dist_to_next_branch
distance[0] = node.dist_min_to_target
agent_data[0] = node.num_agents_same_direction
agent_data[1] = node.num_agents_opposite_direction
agent_data[2] = node.num_agents_malfunctioning
agent_data[3] = node.speed_min_fractional
return data, distance, agent_data
def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
if node == -np.inf:
remaining_depth = max_tree_depth - current_tree_depth
# reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
num_remaining_nodes = int((4**(remaining_depth+1) - 1) / (4 - 1))
return [-np.inf] * num_remaining_nodes*6, [-np.inf] * num_remaining_nodes, [-np.inf] * num_remaining_nodes*4
data, distance, agent_data = _split_node_into_feature_groups(node)
if not node.childs:
return data, distance, agent_data
for direction in TreeObsForRailEnv.tree_explored_actions_char:
sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(node.childs[direction], current_tree_depth + 1, max_tree_depth)
data = np.concatenate((data, sub_data))
distance = np.concatenate((distance, sub_distance))
agent_data = np.concatenate((agent_data, sub_agent_data))
return data, distance, agent_data
def split_tree_into_feature_groups(tree: TreeObsForRailEnv.Node, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
"""
This function splits the tree into three difference arrays of values
"""
data, distance, agent_data = _split_node_into_feature_groups(tree)
for direction in TreeObsForRailEnv.tree_explored_actions_char:
sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(tree.childs[direction], 1, max_tree_depth)
data = np.concatenate((data, sub_data))
distance = np.concatenate((distance, sub_distance))
agent_data = np.concatenate((agent_data, sub_agent_data))
return data, distance, agent_data
def normalize_observation(observation: TreeObsForRailEnv.Node, tree_depth: int, observation_radius=0):
"""
This function normalizes the observation used by the RL algorithm
"""
data, distance, agent_data = split_tree_into_feature_groups(observation, tree_depth)
data = norm_obs_clip(data, fixed_radius=observation_radius)
distance = norm_obs_clip(distance, normalize_to_range=True)
agent_data = np.clip(agent_data, -1, 1)
normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data))
return normalized_obs