Commit 3f56f4df authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

ppg rnorm bugfix

parent 1c6b58be
......@@ -88,7 +88,7 @@ class CustomTorchPolicy(TorchPolicy):
self.save_success = 0
self.target_timesteps = 8_000_000
self.buffer_time = 20 # TODO: Could try to do a median or mean time step check instead
self.max_time = 100000000
self.max_time = self.config['max_time']
self.maxrewep_lenbuf = deque(maxlen=100)
self.gamma = self.config['gamma']
self.adaptive_discount_tuner = AdaptiveDiscountTuner(self.gamma, momentum=0.98, eplenmult=3)
......@@ -137,7 +137,7 @@ class CustomTorchPolicy(TorchPolicy):
mb_rewards = np.zeros_like(mb_origrewards)
mb_rewards[0] = self.rewnorm.normalize(mb_origrewards[0], self.last_dones, self.config["reset_returns"])
for ii in range(1, nsteps):
mb_rewards[ii] = self.rewnorm.normalize(mb_origrewards[ii], mb_dones[ii-1])
mb_rewards[ii] = self.rewnorm.normalize(mb_origrewards[ii], mb_dones[ii-1], self.config["reset_returns"])
self.last_dones = mb_dones[-1]
else:
mb_rewards = unroll(samples['rewards'], ts)
......
......@@ -96,6 +96,7 @@ DEFAULT_CONFIG = with_common_config({
"same_lr_everywhere": False,
"aux_phase_mixed_precision": False,
"single_optimizer": False,
"max_time": 7200,
})
# __sphinx_doc_end__
# yapf: enable
......
......@@ -178,7 +178,7 @@ class RewardNormalizer(object):
self.cliprew = cliprew
self.ret = 0. # size updates after first pass
def normalize(self, rews, news, resetrew=True):
def normalize(self, rews, news, resetrew):
self.ret = self.ret * self.gamma + rews
self.ret_rms.update(self.ret)
rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
......
......@@ -47,7 +47,7 @@ procgen-ppo:
# Custom switches
skips: 0
n_pi: 16
num_retunes: 100
num_retunes: 29
retune_epochs: 6
standardize_rewards: True
aux_mbsize: 4
......@@ -61,6 +61,7 @@ procgen-ppo:
same_lr_everywhere: False
aux_phase_mixed_precision: True
single_optimizer: True
max_time: 7200
adaptive_gamma: False
final_lr: 5.0e-5
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment