Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • jack_bruck/baselines
  • rivesunder/baselines
  • xzhaoma/baselines
  • giulia_cantini/baselines
  • sfwatergit/baselines
  • jiaodaxiaozi/baselines
  • flatland/baselines
7 results
Show changes
......@@ -2,18 +2,25 @@ import getopt
import random
import sys
from collections import deque
# make sure the root path is in system path
from pathlib import Path
from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))
import matplotlib.pyplot as plt
import numpy as np
import torch
from dueling_double_dqn import Agent
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import TreeObsForRailEnv
from torch_training.dueling_double_dqn import Agent
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.utils.rendertools import RenderTool
from utils.observation_utils import norm_obs_clip, split_tree
from utils.observation_utils import normalize_observation
from flatland.envs.observations import TreeObsForRailEnv
def main(argv):
try:
......@@ -29,28 +36,44 @@ def main(argv):
np.random.seed(1)
# Parameters for the Environment
x_dim = 10
y_dim = 10
x_dim = 35
y_dim = 35
n_agents = 1
n_goals = 5
min_dist = 5
# We are training an Agent using the Tree Observation with depth 2
observation_builder = TreeObsForRailEnv(max_depth=2)
# Load the Environment
# Use a the malfunction generator to break agents from time to time
stochastic_data = MalfunctionParameters(malfunction_rate=1./10000, # Rate of malfunction occurence
min_duration=15, # Minimal duration of malfunction
max_duration=50 # Max duration of malfunction
)
# Custom observation builder
TreeObservation = TreeObsForRailEnv(max_depth=2)
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0., # Fast passenger train
1. / 2.: 1.0, # Fast freight train
1. / 3.: 0.0, # Slow commuter train
1. / 4.: 0.0} # Slow freight train
env = RailEnv(width=x_dim,
height=y_dim,
rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
max_dist=99999,
seed=0),
obs_builder_object=observation_builder,
number_of_agents=n_agents)
env.reset(True, True)
rail_generator=sparse_rail_generator(max_num_cities=3,
# Number of cities in map (where train stations are)
seed=1, # Random seed
grid_mode=False,
max_rails_between_cities=2,
max_rails_in_city=3),
schedule_generator=sparse_schedule_generator(speed_ration_map),
number_of_agents=n_agents,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
# Malfunction data generator
obs_builder_object=TreeObservation)
# Reset env
env.reset(True,True)
# After training we want to render the results so we also load a renderer
env_renderer = RenderTool(env, gl="PILSVG", )
# Given the depth of the tree observation and the number of features per node we get the following state_size
num_features_per_node = env.obs_builder.observation_dim
tree_depth = 2
......@@ -64,7 +87,7 @@ def main(argv):
# We set the number of episodes we would like to train on
if 'n_trials' not in locals():
n_trials = 6000
n_trials = 15000
# And the max number of steps we want to take per episode
max_steps = int(3 * (env.height + env.width))
......@@ -79,35 +102,28 @@ def main(argv):
final_action_dict = dict()
scores_window = deque(maxlen=100)
done_window = deque(maxlen=100)
time_obs = deque(maxlen=2)
scores = []
dones_list = []
action_prob = [0] * action_size
agent_obs = [None] * env.get_num_agents()
agent_next_obs = [None] * env.get_num_agents()
agent_obs_buffer = [None] * env.get_num_agents()
agent_action_buffer = [2] * env.get_num_agents()
cummulated_reward = np.zeros(env.get_num_agents())
update_values = False
# Now we load a Double dueling DQN agent
agent = Agent(state_size, action_size, "FC", 0)
Training = True
agent = Agent(state_size, action_size)
for trials in range(1, n_trials + 1):
# Reset environment
obs = env.reset(True, True)
if not Training:
env_renderer.set_new_rail()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific local observation
obs, info = env.reset(True, True)
env_renderer.reset()
# Build agent specific observations
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
if obs[a]:
agent_obs[a] = normalize_observation(obs[a], tree_depth, observation_radius=10)
agent_obs_buffer[a] = agent_obs[a].copy()
# Reset score and done
score = 0
......@@ -115,45 +131,36 @@ def main(argv):
# Run episode
for step in range(max_steps):
# Only render when not triaing
if not Training:
env_renderer.renderEnv(show=True, show_observations=True)
# Chose the actions
# Action
for a in range(env.get_num_agents()):
if not Training:
eps = 0
action = agent.act(agent_obs[a], eps=eps)
if info['action_required'][a]:
# If an action is require, we want to store the obs a that step as well as the action
update_values = True
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
else:
update_values = False
action = 0
action_dict.update({a: action})
# Count number of actions takes for statistics
action_prob[action] += 1
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
next_obs, all_rewards, done, info = env.step(action_dict)
# Update replay buffer and train agent
for a in range(env.get_num_agents()):
# Only update the values when we are done or when an action was taken and thus relevant information is present
if update_values or done[a]:
agent.step(agent_obs_buffer[a], agent_action_buffer[a], all_rewards[a],
agent_obs[a], done[a])
cummulated_reward[a] = 0.
# Remember and train agent
if Training:
agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a])
agent_obs_buffer[a] = agent_obs[a].copy()
agent_action_buffer[a] = action_dict[a]
if next_obs[a]:
agent_obs[a] = normalize_observation(next_obs[a], tree_depth, observation_radius=10)
# Update the current score
score += all_rewards[a] / env.get_num_agents()
agent_obs = agent_next_obs.copy()
# Copy observation
if done['__all__']:
env_done = 1
break
......@@ -161,8 +168,12 @@ def main(argv):
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
# Store the information about training progress
done_window.append(env_done)
# Collection information about training
tasks_finished = 0
for _idx in range(env.get_num_agents()):
if done[_idx] == 1:
tasks_finished += 1
done_window.append(tasks_finished / max(1, env.get_num_agents()))
scores_window.append(score / max_steps) # save most recent score
scores.append(np.mean(scores_window))
dones_list.append((np.mean(done_window)))
......@@ -187,52 +198,6 @@ def main(argv):
'./Nets/navigator_checkpoint' + str(trials) + '.pth')
action_prob = [1] * action_size
# Render the trained agent
# Reset environment
obs = env.reset(True, True)
env_renderer.set_new_rail()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific local observation
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
# Reset score and done
score = 0
env_done = 0
# Run episode
for step in range(max_steps):
env_renderer.renderEnv(show=True, show_observations=False)
# Chose the actions
for a in range(env.get_num_agents()):
eps = 0
action = agent.act(agent_obs[a], eps=eps)
action_dict.update({a: action})
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.get_num_agents()):
rail_data, distance_data, agent_data = split_tree(tree=np.array(next_obs[a]),
num_features_per_node=num_features_per_node,
current_depth=0)
rail_data = norm_obs_clip(rail_data)
distance_data = norm_obs_clip(distance_data)
agent_data = np.clip(agent_data, -1, 1)
agent_next_obs[a] = np.concatenate((np.concatenate((rail_data, distance_data)), agent_data))
agent_obs = agent_next_obs.copy()
if done['__all__']:
break
# Plot overall training progress at the end
plt.plot(scores)
plt.show()
......
......@@ -3,15 +3,16 @@ import time
from collections import deque
import numpy as np
from flatland.envs.generators import complex_rail_generator
from flatland.envs.observations import GlobalObsForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator
from line_profiler import LineProfiler
from utils.observation_utils import norm_obs_clip, split_tree
from utils.observation_utils import norm_obs_clip, split_tree_into_feature_groups
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '*'):
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='*'):
"""
Call in a loop to create terminal progress bar
@params:
......@@ -31,13 +32,14 @@ def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1,
if iteration == total:
print('')
class RandomAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
def act(self, state, eps = 0):
def act(self, state, eps=0):
"""
:param state: input is the observation of the agent
:return: returns an action
......@@ -87,6 +89,7 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
rail_generator=complex_rail_generator(nr_start_goal=nr_paths, nr_extra=5, min_dist=min_dist,
max_dist=99999,
seed=parameters[3]),
schedule_generator=complex_schedule_generator(),
obs_builder_object=GlobalObsForRailEnv(),
number_of_agents=parameters[2])
max_steps = int(3 * (env.height + env.width))
......@@ -99,10 +102,9 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
# Reset the env
lp_reset(True, True)
obs = env.reset(True, True)
obs, info = env.reset(True, True)
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(obs[a]),
current_depth=0)
data, distance, agent_data = split_tree_into_feature_groups(obs[a], tree_depth)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
......@@ -126,8 +128,7 @@ def run_test(parameters, agent, test_nr=0, tree_depth=3):
next_obs, all_rewards, done, _ = lp_step(action_dict)
for a in range(env.get_num_agents()):
data, distance, agent_data = split_tree(tree=np.array(next_obs[a]),
current_depth=0)
data, distance, agent_data = split_tree_into_feature_groups(next_obs[a], tree_depth)
data = norm_obs_clip(data)
distance = norm_obs_clip(distance)
agent_data = np.clip(agent_data, -1, 1)
......
import numpy as np
from flatland.envs.observations import TreeObsForRailEnv
def max_lt(seq, val):
......@@ -15,7 +16,7 @@ def max_lt(seq, val):
return max
def min_lt(seq, val):
def min_gt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
......@@ -29,7 +30,7 @@ def min_lt(seq, val):
return min
def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
"""
This function returns the difference between min and max value of an observation
:param obs: Observation that should be normalized
......@@ -42,58 +43,84 @@ def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0):
else:
max_obs = max(1, max_lt(obs, 1000)) + 1
min_obs = 0 # min(max_obs, min_lt(obs, 0))
min_obs = 0 # min(max_obs, min_gt(obs, 0))
if normalize_to_range:
min_obs = min_gt(obs, 0)
if min_obs > max_obs:
min_obs = max_obs
if max_obs == min_obs:
return np.clip(np.array(obs) / max_obs, clip_min, clip_max)
norm = np.abs(max_obs - min_obs)
if norm == 0:
norm = 1.
return np.clip((np.array(obs) - min_obs) / norm, clip_min, clip_max)
def split_tree(tree, num_features_per_node, current_depth=0):
def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray, np.ndarray, np.ndarray):
data = np.zeros(6)
distance = np.zeros(1)
agent_data = np.zeros(4)
data[0] = node.dist_own_target_encountered
data[1] = node.dist_other_target_encountered
data[2] = node.dist_other_agent_encountered
data[3] = node.dist_potential_conflict
data[4] = node.dist_unusable_switch
data[5] = node.dist_to_next_branch
distance[0] = node.dist_min_to_target
agent_data[0] = node.num_agents_same_direction
agent_data[1] = node.num_agents_opposite_direction
agent_data[2] = node.num_agents_malfunctioning
agent_data[3] = node.speed_min_fractional
return data, distance, agent_data
def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
if node == -np.inf:
remaining_depth = max_tree_depth - current_tree_depth
# reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
num_remaining_nodes = int((4**(remaining_depth+1) - 1) / (4 - 1))
return [-np.inf] * num_remaining_nodes*6, [-np.inf] * num_remaining_nodes, [-np.inf] * num_remaining_nodes*4
data, distance, agent_data = _split_node_into_feature_groups(node)
if not node.childs:
return data, distance, agent_data
for direction in TreeObsForRailEnv.tree_explored_actions_char:
sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(node.childs[direction], current_tree_depth + 1, max_tree_depth)
data = np.concatenate((data, sub_data))
distance = np.concatenate((distance, sub_distance))
agent_data = np.concatenate((agent_data, sub_agent_data))
return data, distance, agent_data
def split_tree_into_feature_groups(tree: TreeObsForRailEnv.Node, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
"""
Splits the tree observation into different sub groups that need the same normalization.
This is necessary because the tree observation includes two different distance:
1. Distance from the agent --> This is measured in cells from current agent location
2. Distance to targer --> This is measured as distance from cell to agent target
3. Binary data --> Contains information about presence of object --> No normalization necessary
Number 1. will depend on the depth and size of the tree search
Number 2. will depend on the size of the map and thus the max distance on the map
Number 3. Is independent of tree depth and map size and thus must be handled differently
Therefore we split the tree into these two classes for better normalization.
:param tree: Tree that needs to be split
:param num_features_per_node: Features per node ATTENTION! this parameter is vital to correct splitting of the tree.
:param current_depth: Keeping track of the current depth in the tree
:return: Returns the three different groups of distance and binary values.
This function splits the tree into three difference arrays of values
"""
if len(tree) < num_features_per_node:
return [], [], []
depth = 0
tmp = len(tree) / num_features_per_node - 1
pow4 = 4
while tmp > 0:
tmp -= pow4
depth += 1
pow4 *= 4
child_size = (len(tree) - num_features_per_node) // 4
data, distance, agent_data = _split_node_into_feature_groups(tree)
for direction in TreeObsForRailEnv.tree_explored_actions_char:
sub_data, sub_distance, sub_agent_data = _split_subtree_into_feature_groups(tree.childs[direction], 1, max_tree_depth)
data = np.concatenate((data, sub_data))
distance = np.concatenate((distance, sub_distance))
agent_data = np.concatenate((agent_data, sub_agent_data))
return data, distance, agent_data
def normalize_observation(observation: TreeObsForRailEnv.Node, tree_depth: int, observation_radius=0):
"""
Here we split the node features into the different classes of distances and binary values.
Pay close attention to this part if you modify any of the features in the tree observation.
This function normalizes the observation used by the RL algorithm
"""
tree_data = tree[:6].tolist()
distance_data = [tree[6]]
agent_data = tree[7:num_features_per_node].tolist()
# Split each child of the current node and continue to next depth level
for children in range(4):
child_tree = tree[(num_features_per_node + children * child_size):
(num_features_per_node + (children + 1) * child_size)]
tmp_tree_data, tmp_distance_data, tmp_agent_data = split_tree(child_tree, num_features_per_node,
current_depth=current_depth + 1)
if len(tmp_tree_data) > 0:
tree_data.extend(tmp_tree_data)
distance_data.extend(tmp_distance_data)
agent_data.extend(tmp_agent_data)
return tree_data, distance_data, agent_data
data, distance, agent_data = split_tree_into_feature_groups(observation, tree_depth)
data = norm_obs_clip(data, fixed_radius=observation_radius)
distance = norm_obs_clip(distance, normalize_to_range=True)
agent_data = np.clip(agent_data, -1, 1)
normalized_obs = np.concatenate((np.concatenate((data, distance)), agent_data))
return normalized_obs