Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
baselines
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Flatland
baselines
Commits
828db1d6
Commit
828db1d6
authored
5 years ago
by
Erik Nygren
Browse files
Options
Downloads
Patches
Plain Diff
simplified training example for participants
parent
c167f87e
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
torch_training/training_navigation.py
+129
-127
129 additions, 127 deletions
torch_training/training_navigation.py
with
129 additions
and
127 deletions
torch_training/training_navigation.py
+
129
−
127
View file @
828db1d6
...
...
@@ -7,7 +7,6 @@ import torch
from
dueling_double_dqn
import
Agent
from
flatland.envs.generators
import
complex_rail_generator
from
flatland.envs.observations
import
TreeObsForRailEnv
from
flatland.envs.predictions
import
ShortestPathPredictorForRailEnv
from
flatland.envs.rail_env
import
RailEnv
from
flatland.utils.rendertools
import
RenderTool
...
...
@@ -16,66 +15,52 @@ from utils.observation_utils import norm_obs_clip, split_tree
random
.
seed
(
1
)
np
.
random
.
seed
(
1
)
# Example generate a rail given a manual specification,
# a map of tuples (cell_type, rotation)
transition_probability
=
[
15
,
# empty cell - Case 0
5
,
# Case 1 - straight
5
,
# Case 2 - simple switch
1
,
# Case 3 - diamond crossing
1
,
# Case 4 - single slip
1
,
# Case 5 - double slip
1
,
# Case 6 - symmetrical
0
,
# Case 7 - dead end
1
,
# Case 1b (8) - simple turn right
1
,
# Case 1c (9) - simple turn left
1
]
# Case 2b (10) - simple switch mirrored
# Example generate a random rail
"""
env = RailEnv(width=20,
height=20,
rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
number_of_agents=1)
env = RailEnv(width=15,
height=15,
rail_generator=complex_rail_generator(nr_start_goal=10, nr_extra=10, min_dist=10, max_dist=99999, seed=0),
number_of_agents=1)
env = RailEnv(width=10,
height=20, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()))
env.load(
"
./railway/complex_scene.pkl
"
)
file_load = True
"""
x_dim
=
np
.
random
.
randint
(
8
,
20
)
y_dim
=
np
.
random
.
randint
(
8
,
20
)
n_agents
=
np
.
random
.
randint
(
3
,
8
)
n_goals
=
n_agents
+
np
.
random
.
randint
(
0
,
3
)
min_dist
=
int
(
0.75
*
min
(
x_dim
,
y_dim
))
# Parameters for the Environment
x_dim
=
10
y_dim
=
10
n_agents
=
1
n_goals
=
5
min_dist
=
5
# We are training an Agent using the Tree Observation with depth 2
observation_builder
=
TreeObsForRailEnv
(
max_depth
=
2
)
# Load the Environment
env
=
RailEnv
(
width
=
x_dim
,
height
=
y_dim
,
rail_generator
=
complex_rail_generator
(
nr_start_goal
=
n_goals
,
nr_extra
=
5
,
min_dist
=
min_dist
,
max_dist
=
99999
,
seed
=
0
),
obs_builder_object
=
TreeObsForRailEnv
(
max_depth
=
3
,
predictor
=
ShortestPathPredictorForRailEnv
())
,
obs_builder_object
=
observation_builder
,
number_of_agents
=
n_agents
)
env
.
reset
(
True
,
True
)
file_load
=
False
"""
"""
observation_helper
=
TreeObsForRailEnv
(
max_depth
=
3
,
predictor
=
ShortestPathPredictorForRailEnv
()
)
env_renderer
=
RenderTool
(
env
,
gl
=
"
PILSVG
"
,)
handle
=
env
.
get_agent_handles
()
# After training we want to render the results so we also load a renderer
env_renderer
=
RenderTool
(
env
,
gl
=
"
PILSVG
"
,
)
# Given the depth of the tree observation and the number of features per node we get the following state_size
features_per_node
=
9
state_size
=
features_per_node
*
85
*
2
tree_depth
=
2
nr_nodes
=
0
for
i
in
range
(
tree_depth
+
1
):
nr_nodes
+=
np
.
power
(
4
,
i
)
state_size
=
features_per_node
*
nr_nodes
# The action space of flatland is 5 discrete actions
action_size
=
5
n_trials
=
30000
# We set the number of episodes we would like to train on
n_trials
=
6000
# And the max number of steps we want to take per episode
max_steps
=
int
(
3
*
(
env
.
height
+
env
.
width
))
# Define training parameters
eps
=
1.
eps_end
=
0.005
eps_decay
=
0.9995
eps_decay
=
0.998
# And some variables to keep track of the progress
action_dict
=
dict
()
final_action_dict
=
dict
()
scores_window
=
deque
(
maxlen
=
100
)
...
...
@@ -86,112 +71,83 @@ dones_list = []
action_prob
=
[
0
]
*
action_size
agent_obs
=
[
None
]
*
env
.
get_num_agents
()
agent_next_obs
=
[
None
]
*
env
.
get_num_agents
()
agent
=
Agent
(
state_size
,
action_size
,
"
FC
"
,
0
)
agent
.
qnetwork_local
.
load_state_dict
(
torch
.
load
(
'
./Nets/avoid_checkpoint30000.pth
'
))
demo
=
True
record_im
age
s
=
False
# Now we load a Double dueling DQN agent
age
nt
=
Agent
(
state_size
,
action_size
,
"
FC
"
,
0
)
Training
=
True
for
trials
in
range
(
1
,
n_trials
+
1
):
if
trials
%
50
==
0
and
not
demo
:
x_dim
=
np
.
random
.
randint
(
8
,
20
)
y_dim
=
np
.
random
.
randint
(
8
,
20
)
n_agents
=
np
.
random
.
randint
(
3
,
8
)
n_goals
=
n_agents
+
np
.
random
.
randint
(
0
,
3
)
min_dist
=
int
(
0.75
*
min
(
x_dim
,
y_dim
))
env
=
RailEnv
(
width
=
x_dim
,
height
=
y_dim
,
rail_generator
=
complex_rail_generator
(
nr_start_goal
=
n_goals
,
nr_extra
=
5
,
min_dist
=
min_dist
,
max_dist
=
99999
,
seed
=
0
),
obs_builder_object
=
TreeObsForRailEnv
(
max_depth
=
3
,
predictor
=
ShortestPathPredictorForRailEnv
()),
number_of_agents
=
n_agents
)
env
.
reset
(
True
,
True
)
max_steps
=
int
(
3
*
(
env
.
height
+
env
.
width
))
agent_obs
=
[
None
]
*
env
.
get_num_agents
()
agent_next_obs
=
[
None
]
*
env
.
get_num_agents
()
# Reset environment
if
file_load
:
obs
=
env
.
reset
(
False
,
False
)
else
:
obs
=
env
.
reset
(
True
,
True
)
if
demo
:
obs
=
env
.
reset
(
True
,
True
)
if
not
Training
:
env_renderer
.
set_new_rail
()
obs_original
=
obs
.
copy
()
final_obs
=
obs
.
copy
()
fin
al
_
obs
_next
=
obs
.
copy
()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific loc
al
obs
ervation
for
a
in
range
(
env
.
get_num_agents
()):
data
,
distance
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
data
=
norm_obs_clip
(
data
)
distance
=
norm_obs_clip
(
distance
)
rail_data
,
distance_data
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
rail_data
=
norm_obs_clip
(
rail_data
)
distance_data
=
norm_obs_clip
(
distance_data
)
agent_data
=
np
.
clip
(
agent_data
,
-
1
,
1
)
obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
data
,
distance
)),
agent_data
))
agent_data
=
env
.
agents
[
a
]
speed
=
1
#np.random.randint(1,5)
agent_data
.
speed_data
[
'
speed
'
]
=
1.
/
speed
for
i
in
range
(
2
):
time_obs
.
append
(
obs
)
# env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5)
for
a
in
range
(
env
.
get_num_agents
()):
agent_obs
[
a
]
=
np
.
concatenate
((
time_obs
[
0
][
a
],
time_obs
[
1
][
a
]))
agent_obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
rail_data
,
distance_data
)),
agent_data
))
# Reset score and done
score
=
0
env_done
=
0
# Run episode
for
step
in
range
(
max_steps
):
if
demo
:
# Only render when not triaing
if
not
Training
:
env_renderer
.
renderEnv
(
show
=
True
,
show_observations
=
True
)
# observation_helper.util_print_obs_subtree(obs_original[0])
if
record_images
:
env_renderer
.
gl
.
saveImage
(
"
./Images/flatland_frame_{:04d}.bmp
"
.
format
(
step
))
# print(step)
# Action
# Chose the actions
for
a
in
range
(
env
.
get_num_agents
()):
if
demo
:
if
not
Training
:
eps
=
0
# action = agent.act(np.array(obs[a]), eps=eps)
action
=
agent
.
act
(
agent_obs
[
a
],
eps
=
eps
)
action_prob
[
action
]
+=
1
action_dict
.
update
({
a
:
action
})
# Environment step
# Count number of actions takes for statistics
action_prob
[
action
]
+=
1
# Environment step
next_obs
,
all_rewards
,
done
,
_
=
env
.
step
(
action_dict
)
# print(all_rewards,action)
obs_original
=
next_obs
.
copy
()
for
a
in
range
(
env
.
get_num_agents
()):
data
,
distance
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
next_obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
data
=
norm_obs_clip
(
data
)
distance
=
norm_obs_clip
(
distance
)
rail_data
,
distance_data
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
next_obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
rail_data
=
norm_obs_clip
(
rail_data
)
distance_data
=
norm_obs_clip
(
distance_data
)
agent_data
=
np
.
clip
(
agent_data
,
-
1
,
1
)
next_obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
data
,
distance
)),
agent_data
))
time_obs
.
append
(
next_obs
)
agent_next_obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
rail_data
,
distance_data
)),
agent_data
))
# Update replay buffer and train agent
for
a
in
range
(
env
.
get_num_agents
()):
agent_next_obs
[
a
]
=
np
.
concatenate
((
time_obs
[
0
][
a
],
time_obs
[
1
][
a
]))
if
done
[
a
]:
final_obs
[
a
]
=
agent_obs
[
a
].
copy
()
final_obs_next
[
a
]
=
agent_next_obs
[
a
].
copy
()
final_action_dict
.
update
({
a
:
action_dict
[
a
]})
if
not
demo
and
not
done
[
a
]:
# Remember and train agent
if
Training
:
agent
.
step
(
agent_obs
[
a
],
action_dict
[
a
],
all_rewards
[
a
],
agent_next_obs
[
a
],
done
[
a
])
# Update the current score
score
+=
all_rewards
[
a
]
/
env
.
get_num_agents
()
agent_obs
=
agent_next_obs
.
copy
()
if
done
[
'
__all__
'
]:
env_done
=
1
for
a
in
range
(
env
.
get_num_agents
()):
agent
.
step
(
final_obs
[
a
],
final_action_dict
[
a
],
all_rewards
[
a
],
final_obs_next
[
a
],
done
[
a
])
break
# Epsilon decay
eps
=
max
(
eps_end
,
eps_decay
*
eps
)
# decrease epsilon
# Store the information about training progress
done_window
.
append
(
env_done
)
scores_window
.
append
(
score
/
max_steps
)
# save most recent score
scores
.
append
(
np
.
mean
(
scores_window
))
...
...
@@ -200,22 +156,68 @@ for trials in range(1, n_trials + 1):
print
(
'
\r
Training {} Agents on ({},{}).
\t
Episode {}
\t
Average Score: {:.3f}
\t
Dones: {:.2f}%
\t
Epsilon: {:.2f}
\t
Action Probabilities:
\t
{}
'
.
format
(
env
.
get_num_agents
(),
x_dim
,
y_dim
,
trials
,
np
.
mean
(
scores_window
),
100
*
np
.
mean
(
done_window
),
eps
,
action_prob
/
np
.
sum
(
action_prob
)),
end
=
"
"
)
trials
,
np
.
mean
(
scores_window
),
100
*
np
.
mean
(
done_window
),
eps
,
action_prob
/
np
.
sum
(
action_prob
)),
end
=
"
"
)
if
trials
%
100
==
0
:
print
(
'
\r
Training {} Agents.
\t
Episode {}
\t
Average Score: {:.3f}
\t
Dones: {:.2f}%
\t
Epsilon: {:.2f}
\t
Action Probabilities:
\t
{}
'
.
format
(
env
.
get_num_agents
(),
'
\r
Training {} Agents
on ({},{})
.
\t
Episode {}
\t
Average Score: {:.3f}
\t
Dones: {:.2f}%
\t
Epsilon: {:.2f}
\t
Action Probabilities:
\t
{}
'
.
format
(
env
.
get_num_agents
(),
x_dim
,
y_dim
,
trials
,
np
.
mean
(
scores_window
),
100
*
np
.
mean
(
done_window
),
eps
,
action_prob
/
np
.
sum
(
action_prob
)))
eps
,
action_prob
/
np
.
sum
(
action_prob
)))
torch
.
save
(
agent
.
qnetwork_local
.
state_dict
(),
'
./Nets/av
oid
_checkpoint
'
+
str
(
trials
)
+
'
.pth
'
)
'
./Nets/
n
av
igator
_checkpoint
'
+
str
(
trials
)
+
'
.pth
'
)
action_prob
=
[
1
]
*
action_size
# Render the trained agent
# Reset environment
obs
=
env
.
reset
(
True
,
True
)
env_renderer
.
set_new_rail
()
# Split the observation tree into its parts and normalize the observation using the utility functions.
# Build agent specific local observation
for
a
in
range
(
env
.
get_num_agents
()):
rail_data
,
distance_data
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
rail_data
=
norm_obs_clip
(
rail_data
)
distance_data
=
norm_obs_clip
(
distance_data
)
agent_data
=
np
.
clip
(
agent_data
,
-
1
,
1
)
agent_obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
rail_data
,
distance_data
)),
agent_data
))
# Reset score and done
score
=
0
env_done
=
0
# Run episode
for
step
in
range
(
max_steps
):
env_renderer
.
renderEnv
(
show
=
True
,
show_observations
=
False
)
# Chose the actions
for
a
in
range
(
env
.
get_num_agents
()):
eps
=
0
action
=
agent
.
act
(
agent_obs
[
a
],
eps
=
eps
)
action_dict
.
update
({
a
:
action
})
# Environment step
next_obs
,
all_rewards
,
done
,
_
=
env
.
step
(
action_dict
)
for
a
in
range
(
env
.
get_num_agents
()):
rail_data
,
distance_data
,
agent_data
=
split_tree
(
tree
=
np
.
array
(
next_obs
[
a
]),
num_features_per_node
=
features_per_node
,
current_depth
=
0
)
rail_data
=
norm_obs_clip
(
rail_data
)
distance_data
=
norm_obs_clip
(
distance_data
)
agent_data
=
np
.
clip
(
agent_data
,
-
1
,
1
)
agent_next_obs
[
a
]
=
np
.
concatenate
((
np
.
concatenate
((
rail_data
,
distance_data
)),
agent_data
))
agent_obs
=
agent_next_obs
.
copy
()
if
done
[
'
__all__
'
]:
break
# Plot overall training progress at the end
plt
.
plot
(
scores
)
plt
.
show
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment