Commit 910b63f7 authored by Chakraborty's avatar Chakraborty
Browse files
parents fada618e f07abc5e
......@@ -286,7 +286,6 @@ class CustomTorchPolicy(TorchPolicy):
for nnpi in range(self.retune_selector.n_pi):
for ne in range(self.retune_selector.nenvs):
import pdb; pdb.set_trace()
replay_vf[nnpi, :, ne], replay_pi[nnpi, :, ne] = self.model.vf_pi(self.retune_selector.exp_replay[nnpi, :, ne],
ret_numpy=True, no_grad=True, to_torch=True)
......@@ -301,7 +300,7 @@ class CustomTorchPolicy(TorchPolicy):
num_rollouts = self.config['aux_mbsize']
for ep in range(retune_epochs):
counter = 0
for slices in self.retune_selector.make_minibatches(replay_pi, returns_buffer, num_rollouts):
for slices in self.retune_selector.make_minibatches(replay_pi, new_returns, num_rollouts):
counter += 1
apply_grad = (counter % num_accumulate) == 0
self.tune_policy(slices[0], self.to_tensor(slices[1]), self.to_tensor(slices[2]),
......
......@@ -45,9 +45,9 @@ procgen-ppo:
no_done_at_end: False
# Custom switches
skips: 0
n_pi: 1
num_retunes: 14
skips: 2
n_pi: 16
num_retunes: 15
retune_epochs: 7
standardize_rewards: True
aux_mbsize: 4
......
......@@ -7,8 +7,8 @@ set -e
# export EXPERIMENT_DEFAULT="experiments/impala-baseline.yaml"
# export EXPERIMENT_DEFAULT="experiments/custom-torch-ppo.yaml"
export EXPERIMENT_DEFAULT="experiments/custom-ppg.yaml"
# export EXPERIMENT_DEFAULT="experiments/ppg-experimental.yaml"
# export EXPERIMENT_DEFAULT="experiments/custom-ppg.yaml"
export EXPERIMENT_DEFAULT="experiments/ppg-experimental.yaml"
export EXPERIMENT=${EXPERIMENT:-$EXPERIMENT_DEFAULT}
if [[ -z $AICROWD_IS_GRADING ]]; then
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment