......@@ -53,12 +53,12 @@ procgen-ppo:
standardize_rewards: True
aux_mbsize: 4
augment_buffer: False
scale_reward: 0.6
reset_returns: True
scale_reward: 1.0
reset_returns: False
adaptive_gamma: False
final_lr: 2.0e-4
lr_schedule: 'linear'
lr_schedule: 'None'
final_entropy_coeff: 0.002
entropy_schedule: False
