Commit cb4e668d authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

ppg npi 16

parent c0820e84
......@@ -120,7 +120,7 @@ class CustomTorchPolicy(TorchPolicy):
if self.config['standardize_rewards']:
mb_origrewards = unroll(samples['rewards'], ts)
mb_rewards = np.zeros_like(mb_origrewards)
mb_rewards[0] = self.rewnorm.normalize(mb_origrewards[0], self.last_dones)
mb_rewards[0] = self.rewnorm.normalize(mb_origrewards[0], self.last_dones, self.config["reset_returns"])
for ii in range(1, nsteps):
mb_rewards[ii] = self.rewnorm.normalize(mb_origrewards[ii], mb_dones[ii-1])
self.last_dones = mb_dones[-1]
......
......@@ -88,6 +88,7 @@ DEFAULT_CONFIG = with_common_config({
"updates_per_batch": 8,
"aux_mbsize": 4,
"augment_buffer": False,
"reset_returns": True,
})
# __sphinx_doc_end__
# yapf: enable
......
......@@ -156,11 +156,12 @@ class RewardNormalizer(object):
self.cliprew = cliprew
self.ret = 0. # size updates after first pass
def normalize(self, rews, news):
def normalize(self, rews, news, resetrew=True):
self.ret = self.ret * self.gamma + rews
self.ret_rms.update(self.ret)
rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
self.ret[np.array(news, dtype=bool)] = 0. ## Values should be True of False to set positional index
if resetrew:
self.ret[np.array(news, dtype=bool)] = 0. ## Values should be True of False to set positional index
return rews
class RunningMeanStd(object):
......
......@@ -141,7 +141,8 @@ class CustomCallbacks(DefaultCallbacks):
result['return_min'] = trainer_policy.config['env_config']['return_min']
result['return_blind'] = trainer_policy.config['env_config']['return_blind']
result['return_max'] = trainer_policy.config['env_config']['return_max']
result['buffer_save_success'] = trainer_policy.save_success
# result['buffer_save_success'] = trainer_policy.save_success
result['retunes_completed'] = trainer_policy.retunes_completed
......
......@@ -47,13 +47,14 @@ procgen-ppo:
# Custom switches
skips: 0
n_pi: 1
n_pi: 16
num_retunes: 100
retune_epochs: 3
standardize_rewards: True
aux_mbsize: 4
augment_buffer: False
scale_reward: 0.6
reset_returns: True
adaptive_gamma: False
final_lr: 2.0e-4
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment