Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dipam Chakraborty
neurips-2020-procgen-competition
Commits
04d5264d
Commit
04d5264d
authored
Oct 27, 2020
by
Dipam Chakraborty
Browse files
pi phase mixed precision
parent
606189ba
Changes
3
Hide whitespace changes
Inline
Side-by-side
algorithms/custom_ppg/custom_torch_ppg.py
View file @
04d5264d
...
@@ -228,6 +228,48 @@ class CustomTorchPolicy(TorchPolicy):
...
@@ -228,6 +228,48 @@ class CustomTorchPolicy(TorchPolicy):
ent_coef
,
vf_coef
,
ent_coef
,
vf_coef
,
obs
,
returns
,
actions
,
values
,
logp_actions_old
,
advs
):
obs
,
returns
,
actions
,
values
,
logp_actions_old
,
advs
):
if
not
self
.
config
[
'pi_phase_mixed_precision'
]:
loss
,
vf_loss
=
self
.
_calc_pi_vf_loss
(
apply_grad
,
num_accumulate
,
cliprange
,
vfcliprange
,
max_grad_norm
,
ent_coef
,
vf_coef
,
obs
,
returns
,
actions
,
values
,
logp_actions_old
,
advs
)
loss
.
backward
()
vf_loss
.
backward
()
if
apply_grad
:
self
.
optimizer
.
step
()
self
.
optimizer
.
zero_grad
()
if
not
self
.
config
[
'single_optimizer'
]:
self
.
value_optimizer
.
step
()
self
.
value_optimizer
.
zero_grad
()
else
:
with
autocast
():
loss
,
vf_loss
=
self
.
_calc_pi_vf_loss
(
apply_grad
,
num_accumulate
,
cliprange
,
vfcliprange
,
max_grad_norm
,
ent_coef
,
vf_coef
,
obs
,
returns
,
actions
,
values
,
logp_actions_old
,
advs
)
self
.
amp_scaler
.
scale
(
loss
).
backward
(
retain_graph
=
True
)
self
.
amp_scaler
.
scale
(
vf_loss
).
backward
()
if
apply_grad
:
self
.
amp_scaler
.
step
(
self
.
optimizer
)
if
not
self
.
config
[
'single_optimizer'
]:
self
.
amp_scaler
.
step
(
self
.
value_optimizer
)
self
.
amp_scaler
.
update
()
self
.
optimizer
.
zero_grad
()
if
not
self
.
config
[
'single_optimizer'
]:
self
.
value_optimizer
.
zero_grad
()
def
_calc_pi_vf_loss
(
self
,
apply_grad
,
num_accumulate
,
cliprange
,
vfcliprange
,
max_grad_norm
,
ent_coef
,
vf_coef
,
obs
,
returns
,
actions
,
values
,
logp_actions_old
,
advs
):
vpred
,
pi_logits
=
self
.
model
.
vf_pi
(
obs
,
ret_numpy
=
False
,
no_grad
=
False
,
to_torch
=
False
)
vpred
,
pi_logits
=
self
.
model
.
vf_pi
(
obs
,
ret_numpy
=
False
,
no_grad
=
False
,
to_torch
=
False
)
pd
=
self
.
make_distr
(
pi_logits
)
pd
=
self
.
make_distr
(
pi_logits
)
logp_actions
=
pd
.
log_prob
(
actions
[...,
None
]).
squeeze
(
1
)
logp_actions
=
pd
.
log_prob
(
actions
[...,
None
]).
squeeze
(
1
)
...
@@ -244,17 +286,8 @@ class CustomTorchPolicy(TorchPolicy):
...
@@ -244,17 +286,8 @@ class CustomTorchPolicy(TorchPolicy):
loss
=
loss
/
num_accumulate
loss
=
loss
/
num_accumulate
vf_loss
=
vf_loss
/
num_accumulate
vf_loss
=
vf_loss
/
num_accumulate
return
loss
,
vf_loss
loss
.
backward
()
vf_loss
.
backward
()
if
apply_grad
:
self
.
optimizer
.
step
()
self
.
optimizer
.
zero_grad
()
if
not
self
.
config
[
'single_optimizer'
]:
self
.
value_optimizer
.
step
()
self
.
value_optimizer
.
zero_grad
()
def
aux_train
(
self
):
def
aux_train
(
self
):
nbatch_train
=
self
.
mem_limited_batch_size
nbatch_train
=
self
.
mem_limited_batch_size
retune_epochs
=
self
.
config
[
'retune_epochs'
]
retune_epochs
=
self
.
config
[
'retune_epochs'
]
...
@@ -294,6 +327,7 @@ class CustomTorchPolicy(TorchPolicy):
...
@@ -294,6 +327,7 @@ class CustomTorchPolicy(TorchPolicy):
self
.
value_optimizer
.
step
()
self
.
value_optimizer
.
step
()
else
:
else
:
self
.
optimizer
.
step
()
self
.
optimizer
.
step
()
else
:
else
:
with
autocast
():
with
autocast
():
...
...
algorithms/custom_ppg/ppg.py
View file @
04d5264d
...
@@ -97,6 +97,7 @@ DEFAULT_CONFIG = with_common_config({
...
@@ -97,6 +97,7 @@ DEFAULT_CONFIG = with_common_config({
"aux_phase_mixed_precision"
:
False
,
"aux_phase_mixed_precision"
:
False
,
"single_optimizer"
:
False
,
"single_optimizer"
:
False
,
"max_time"
:
7200
,
"max_time"
:
7200
,
"pi_phase_mixed_precision"
:
False
,
})
})
# __sphinx_doc_end__
# __sphinx_doc_end__
# yapf: enable
# yapf: enable
...
...
experiments/custom-ppg.yaml
View file @
04d5264d
...
@@ -62,6 +62,7 @@ procgen-ppo:
...
@@ -62,6 +62,7 @@ procgen-ppo:
aux_phase_mixed_precision
:
True
aux_phase_mixed_precision
:
True
single_optimizer
:
True
single_optimizer
:
True
max_time
:
7200
max_time
:
7200
pi_phase_mixed_precision
:
True
adaptive_gamma
:
False
adaptive_gamma
:
False
final_lr
:
5.0e-5
final_lr
:
5.0e-5
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment