Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dipam Chakraborty
neurips-2020-procgen-competition
Commits
2aeebdfe
Commit
2aeebdfe
authored
Oct 28, 2020
by
Dipam Chakraborty
Browse files
ppg aux accumulate
parent
c4d01795
Changes
5
Hide whitespace changes
Inline
Side-by-side
algorithms/custom_ppg/custom_torch_ppg.py
View file @
2aeebdfe
...
...
@@ -206,7 +206,7 @@ class CustomTorchPolicy(TorchPolicy):
apply_grad
=
(
optim_count
%
self
.
accumulate_train_batches
)
==
0
self
.
_batch_train
(
apply_grad
,
self
.
accumulate_train_batches
,
cliprange
,
vfcliprange
,
max_grad_norm
,
ent_coef
,
vf_coef
,
*
slices
)
## Distill with aux head
should_retune
=
self
.
retune_selector
.
update
(
unroll
(
obs
,
ts
),
mb_returns
)
if
should_retune
:
...
...
@@ -300,14 +300,15 @@ class CustomTorchPolicy(TorchPolicy):
ret_numpy
=
True
,
no_grad
=
True
,
to_torch
=
True
)
# Tune vf and pi heads to older predictions with (augmented?) observations
num_accumulate
=
self
.
config
[
'aux_num_accumulates'
]
num_rollouts
=
self
.
config
[
'aux_mbsize'
]
for
ep
in
range
(
retune_epochs
):
counter
=
0
for
slices
in
self
.
retune_selector
.
make_minibatches
(
replay_pi
):
for
slices
in
self
.
retune_selector
.
make_minibatches
(
replay_pi
,
num_rollouts
):
counter
+=
1
apply_grad
=
(
counter
%
2
)
==
0
apply_grad
=
(
counter
%
num_accumulate
)
==
0
self
.
tune_policy
(
slices
[
0
],
self
.
to_tensor
(
slices
[
1
]),
self
.
to_tensor
(
slices
[
2
]),
apply_grad
,
num_accumulate
=
2
)
apply_grad
,
num_accumulate
)
self
.
retunes_completed
+=
1
self
.
retune_selector
.
retune_done
()
...
...
algorithms/custom_ppg/ppg.py
View file @
2aeebdfe
...
...
@@ -98,6 +98,7 @@ DEFAULT_CONFIG = with_common_config({
"single_optimizer"
:
False
,
"max_time"
:
7200
,
"pi_phase_mixed_precision"
:
False
,
"aux_num_accumulates"
:
1
,
})
# __sphinx_doc_end__
# yapf: enable
...
...
algorithms/custom_ppg/utils.py
View file @
2aeebdfe
...
...
@@ -144,7 +144,7 @@ class RetuneSelector:
self
.
replay_index
=
0
def
make_minibatches
(
self
,
presleep_pi
,
num_rollouts
=
4
):
def
make_minibatches
(
self
,
presleep_pi
,
num_rollouts
):
if
not
self
.
flat_buffer
:
env_segs
=
list
(
itertools
.
product
(
range
(
self
.
n_pi
),
range
(
self
.
nenvs
)))
np
.
random
.
shuffle
(
env_segs
)
...
...
experiments/custom-ppg.yaml
View file @
2aeebdfe
...
...
@@ -45,12 +45,13 @@ procgen-ppo:
no_done_at_end
:
False
# Custom switches
skips
:
6
n_pi
:
1
0
skips
:
0
n_pi
:
1
6
num_retunes
:
16
retune_epochs
:
6
standardize_rewards
:
True
aux_mbsize
:
4
aux_num_accumulates
:
3
augment_buffer
:
True
scale_reward
:
1.0
reset_returns
:
False
...
...
@@ -62,7 +63,7 @@ procgen-ppo:
aux_phase_mixed_precision
:
True
single_optimizer
:
True
max_time
:
7200
pi_phase_mixed_precision
:
Tru
e
pi_phase_mixed_precision
:
Fals
e
adaptive_gamma
:
False
final_lr
:
1.0e-4
...
...
@@ -71,7 +72,7 @@ procgen-ppo:
entropy_schedule
:
False
# Memory management, if batch size overflow, batch splitting is done to handle it
max_minibatch_size
:
5
00
max_minibatch_size
:
10
00
updates_per_batch
:
8
normalize_actions
:
False
...
...
@@ -88,10 +89,10 @@ procgen-ppo:
model
:
custom_model
:
impala_torch_ppg
custom_model_config
:
#
depths: [32, 64, 64]
#
nlatents: 512
depths
:
[
64
,
128
,
128
]
nlatents
:
1024
depths
:
[
32
,
64
,
64
]
nlatents
:
512
#
depths: [64, 128, 128]
#
nlatents: 1024
init_normed
:
True
use_layernorm
:
False
diff_framestack
:
True
...
...
experiments/custom-torch-ppo.yaml
View file @
2aeebdfe
...
...
@@ -47,14 +47,14 @@ procgen-ppo:
# Custom switches
retune_skips
:
100000
retune_replay_size
:
4
0
0000
num_retunes
:
1
4
retune_replay_size
:
4
5
0000
num_retunes
:
1
3
retune_epochs
:
3
standardize_rewards
:
True
scale_reward
:
1.0
return_reset
:
False
aux_phase_mixed_precision
:
True
max_time
:
72
00
max_time
:
10000
00
adaptive_gamma
:
False
final_lr
:
5.0e-5
...
...
@@ -63,7 +63,7 @@ procgen-ppo:
entropy_schedule
:
False
# Memory management, if batch size overflow, batch splitting is done to handle it
max_minibatch_size
:
1000
max_minibatch_size
:
2048
updates_per_batch
:
8
normalize_actions
:
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment