Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Flatland
flatland-starter-kit
Commits
d40319cf
Commit
d40319cf
authored
Aug 23, 2021
by
nilabha
Browse files
update single agent and sequential agent codes to new version
parent
b845ca2c
Changes
2
Hide whitespace changes
Inline
Side-by-side
reinforcement_learning/sequential_agent.py
View file @
d40319cf
import
sys
import
PIL
import
numpy
as
np
from
flatland.envs.observations
import
TreeObsForRailEnv
from
flatland.envs.predictions
import
ShortestPathPredictorForRailEnv
from
flatland.envs.rail_env
import
RailEnv
from
flatland.envs.rail_generators
import
complex
_rail_generator
from
flatland.envs.
schedul
e_generators
import
complex_schedul
e_generator
from
flatland.envs.rail_generators
import
sparse
_rail_generator
from
flatland.envs.
lin
e_generators
import
sparse_lin
e_generator
from
flatland.utils.rendertools
import
RenderTool
from
pathlib
import
Path
...
...
@@ -23,36 +24,29 @@ multi_agent_training.py is a better starting point to train your own solution!
np
.
random
.
seed
(
2
)
x_dim
=
np
.
random
.
randint
(
8
,
20
)
y_dim
=
np
.
random
.
randint
(
8
,
20
)
x_dim
=
np
.
random
.
randint
(
30
,
35
)
y_dim
=
np
.
random
.
randint
(
30
,
35
)
n_agents
=
np
.
random
.
randint
(
3
,
8
)
n_goals
=
n_agents
+
np
.
random
.
randint
(
0
,
3
)
min_dist
=
int
(
0.75
*
min
(
x_dim
,
y_dim
))
env
=
RailEnv
(
width
=
x_dim
,
height
=
y_dim
,
rail_generator
=
complex_rail_generator
(
nr_start_goal
=
n_goals
,
nr_extra
=
5
,
min_dist
=
min_dist
,
max_dist
=
99999
,
seed
=
0
),
schedule_generator
=
complex_schedule_generator
(),
rail_generator
=
sparse_rail_generator
(),
line_generator
=
sparse_line_generator
(),
obs_builder_object
=
TreeObsForRailEnv
(
max_depth
=
1
,
predictor
=
ShortestPathPredictorForRailEnv
()),
number_of_agents
=
n_agents
)
env
.
reset
(
True
,
True
)
tree_depth
=
1
observation_helper
=
TreeObsForRailEnv
(
max_depth
=
tree_depth
,
predictor
=
ShortestPathPredictorForRailEnv
())
env_renderer
=
RenderTool
(
env
,
gl
=
"PGL"
,
)
handle
=
env
.
get_agent_handles
()
n_episodes
=
1
0
n_episodes
=
1
max_steps
=
100
*
(
env
.
height
+
env
.
width
)
record_images
=
Fals
e
record_images
=
Tru
e
policy
=
OrderedPolicy
()
action_dict
=
dict
()
frame_list
=
[]
for
trials
in
range
(
1
,
n_episodes
+
1
):
# Reset environment
obs
,
info
=
env
.
reset
(
True
,
True
)
done
=
env
.
dones
...
...
@@ -61,10 +55,10 @@ for trials in range(1, n_episodes + 1):
# Run episode
for
step
in
range
(
max_steps
):
env_renderer
.
render_env
(
show
=
Tru
e
,
show_observations
=
False
,
show_predictions
=
True
)
env_renderer
.
render_env
(
show
=
Fals
e
,
show_observations
=
False
,
show_predictions
=
True
)
if
record_images
:
env_renderer
.
gl
.
save_image
(
"./Images/flatland_frame_{:04d}.bmp"
.
format
(
frame_step
))
frame_list
.
append
(
PIL
.
Image
.
fromarray
(
env_renderer
.
gl
.
get_image
()
))
frame_step
+=
1
# Action
...
...
@@ -82,4 +76,8 @@ for trials in range(1, n_episodes + 1):
obs
,
all_rewards
,
done
,
_
=
env
.
step
(
action_dict
)
if
done
[
'__all__'
]:
print
(
done
)
if
record_images
:
frame_list
[
0
].
save
(
f
"flatland_sequential_agent_
{
trials
}
.gif"
,
save_all
=
True
,
append_images
=
frame_list
[
1
:],
duration
=
3
,
loop
=
0
)
frame_list
=
[]
break
reinforcement_learning/single_agent_training.py
View file @
d40319cf
...
...
@@ -3,6 +3,9 @@ import sys
from
argparse
import
ArgumentParser
,
Namespace
from
collections
import
deque
from
pathlib
import
Path
import
PIL
from
flatland.utils.rendertools
import
RenderTool
base_dir
=
Path
(
__file__
).
resolve
().
parent
.
parent
sys
.
path
.
append
(
str
(
base_dir
))
...
...
@@ -14,7 +17,7 @@ import torch
from
flatland.envs.rail_env
import
RailEnv
from
flatland.envs.rail_generators
import
sparse_rail_generator
from
flatland.envs.
schedul
e_generators
import
sparse_
schedul
e_generator
from
flatland.envs.
lin
e_generators
import
sparse_
lin
e_generator
from
utils.observation_utils
import
normalize_observation
from
flatland.envs.observations
import
TreeObsForRailEnv
...
...
@@ -30,8 +33,8 @@ multi_agent_training.py is a better starting point to train your own solution!
def
train_agent
(
n_episodes
):
# Environment parameters
n_agents
=
1
x_dim
=
25
y_dim
=
25
x_dim
=
30
y_dim
=
30
n_cities
=
4
max_rails_between_cities
=
2
max_rails_in_city
=
3
...
...
@@ -62,9 +65,9 @@ def train_agent(n_episodes):
seed
=
seed
,
grid_mode
=
False
,
max_rails_between_cities
=
max_rails_between_cities
,
max_rails_in_city
=
max_rails_in_city
max_rail
_pair
s_in_city
=
max_rails_in_city
),
schedul
e_generator
=
sparse_
schedul
e_generator
(),
lin
e_generator
=
sparse_
lin
e_generator
(),
number_of_agents
=
n_agents
,
obs_builder_object
=
tree_observation
)
...
...
@@ -83,7 +86,7 @@ def train_agent(n_episodes):
# Max number of steps per episode
# This is the official formula used during evaluations
max_steps
=
int
(
4
*
2
*
(
env
.
height
+
env
.
width
+
(
n_agents
/
n_cities
)))
max_steps
=
int
(
100
*
(
env
.
height
+
env
.
width
+
(
n_agents
/
n_cities
)))
action_dict
=
dict
()
...
...
@@ -113,12 +116,19 @@ def train_agent(n_episodes):
# Double Dueling DQN policy
policy
=
DDDQNPolicy
(
state_size
,
action_size
,
Namespace
(
**
training_parameters
))
record_images
=
False
# env_renderer = RenderTool(env, gl="PGL", )
frame_list
=
[]
for
episode_idx
in
range
(
n_episodes
):
score
=
0
if
episode_idx
==
n_episodes
-
1
:
record_images
=
True
# Reset environment
obs
,
info
=
env
.
reset
(
regenerate_rail
=
True
,
regenerate_schedule
=
True
)
if
record_images
:
env_renderer
=
RenderTool
(
env
,
gl
=
"PGL"
,
)
env_renderer
.
reset
()
# env_renderer.set_new_rail()
# Build agent specific observations
for
agent
in
env
.
get_agent_handles
():
...
...
@@ -127,7 +137,7 @@ def train_agent(n_episodes):
agent_prev_obs
[
agent
]
=
agent_obs
[
agent
].
copy
()
# Run episode
for
step
in
range
(
max_steps
-
1
):
for
step
in
range
(
max_steps
*
3
-
1
):
for
agent
in
env
.
get_agent_handles
():
if
info
[
'action_required'
][
agent
]:
# If an action is required, we want to store the obs at that step as well as the action
...
...
@@ -141,6 +151,9 @@ def train_agent(n_episodes):
# Environment step
next_obs
,
all_rewards
,
done
,
info
=
env
.
step
(
action_dict
)
if
record_images
:
env_renderer
.
render_env
(
show
=
False
,
show_observations
=
False
,
show_predictions
=
True
)
frame_list
.
append
(
PIL
.
Image
.
fromarray
(
env_renderer
.
gl
.
get_image
()))
# Update replay buffer and train agent
for
agent
in
range
(
env
.
get_num_agents
()):
...
...
@@ -157,6 +170,14 @@ def train_agent(n_episodes):
score
+=
all_rewards
[
agent
]
if
done
[
'__all__'
]:
if
record_images
:
print
(
done
)
tasks_done
=
np
.
sum
([
int
(
done
[
idx
])
for
idx
in
env
.
get_agent_handles
()])
completed
=
tasks_done
/
max
(
1
,
env
.
get_num_agents
())
print
(
completed
)
frame_list
[
0
].
save
(
f
"flatland_single_agent_
{
episode_idx
}
.gif"
,
save_all
=
True
,
append_images
=
frame_list
[
1
:],
duration
=
3
,
loop
=
0
)
frame_list
=
[]
# env_renderer.close_window()
break
# Epsilon decay
...
...
@@ -187,17 +208,54 @@ def train_agent(n_episodes):
action_probs
),
end
=
end
)
# Run episode with trained policy
obs
,
info
=
env
.
reset
(
regenerate_rail
=
True
,
regenerate_schedule
=
True
)
env_renderer
.
reset
()
frame_list
=
[]
for
step
in
range
(
max_steps
-
1
):
env_renderer
.
render_env
(
show
=
False
,
show_observations
=
False
,
show_predictions
=
True
)
frame_list
.
append
(
PIL
.
Image
.
fromarray
(
env_renderer
.
gl
.
get_image
()))
for
agent
in
env
.
get_agent_handles
():
if
obs
[
agent
]:
agent_obs
[
agent
]
=
normalize_observation
(
obs
[
agent
],
observation_tree_depth
,
observation_radius
=
observation_radius
)
action
=
0
if
info
[
'action_required'
][
agent
]:
action
=
policy
.
act
(
agent_obs
[
agent
],
eps
=
0.0
)
action_dict
.
update
({
agent
:
action
})
obs
,
all_rewards
,
done
,
info
=
env
.
step
(
action_dict
)
for
agent
in
env
.
get_agent_handles
():
score
+=
all_rewards
[
agent
]
if
done
[
'__all__'
]:
frame_list
[
0
].
save
(
f
"flatland_single_agent.gif"
,
save_all
=
True
,
append_images
=
frame_list
[
1
:],
duration
=
3
,
loop
=
0
)
frame_list
=
[]
break
normalized_score
=
score
/
(
max_steps
*
env
.
get_num_agents
())
print
(
normalized_score
)
tasks_finished
=
sum
(
done
[
idx
]
for
idx
in
env
.
get_agent_handles
())
completion
=
tasks_finished
/
max
(
1
,
env
.
get_num_agents
())
print
(
completion
)
# Plot overall training progress at the end
plt
.
plot
(
scores
)
plt
.
show
()
plt
.
savefig
(
'scores.png'
)
# plt.show()
plt
.
plot
(
completion
)
plt
.
show
()
plt
.
savefig
(
'completion.png'
)
# plt.show()
if
__name__
==
"__main__"
:
parser
=
ArgumentParser
()
parser
.
add_argument
(
"-n"
,
"--n_episodes"
,
dest
=
"n_episodes"
,
help
=
"number of episodes to run"
,
default
=
5
00
,
type
=
int
)
parser
.
add_argument
(
"-n"
,
"--n_episodes"
,
dest
=
"n_episodes"
,
help
=
"number of episodes to run"
,
default
=
2
00
,
type
=
int
)
args
=
parser
.
parse_args
()
train_agent
(
args
.
n_episodes
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment