Commit 606189ba authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

ppo max time

parent 4a9bbaea
......@@ -77,7 +77,7 @@ class CustomTorchPolicy(TorchPolicy):
self.exp_replay = np.empty((self.retune_selector.replay_size, *self.observation_space.shape), dtype=np.uint8)
self.target_timesteps = 8_000_000
self.buffer_time = 20 # TODO: Could try to do a median or mean time step check instead
self.max_time = 10000000000000 # ignore timekeeping because spot instances are messing it up
self.max_time = self.config['max_time']
self.maxrewep_lenbuf = deque(maxlen=100)
self.gamma = self.config['gamma']
self.adaptive_discount_tuner = AdaptiveDiscountTuner(self.gamma, momentum=0.98, eplenmult=3)
......
......@@ -89,6 +89,7 @@ DEFAULT_CONFIG = with_common_config({
"scale_reward": 1.0,
"return_reset": True,
"aux_phase_mixed_precision": False,
"max_time": 100000000,
})
# __sphinx_doc_end__
# yapf: enable
......
......@@ -46,14 +46,15 @@ procgen-ppo:
no_done_at_end: False
# Custom switches
retune_skips: 50000
retune_replay_size: 200000
num_retunes: 28
retune_skips: 100000
retune_replay_size: 400000
num_retunes: 14
retune_epochs: 3
standardize_rewards: True
scale_reward: 1.0
return_reset: False
aux_phase_mixed_precision: True
max_time: 7200
adaptive_gamma: False
final_lr: 5.0e-5
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment