Commit f473f516 authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

ppg npi 32 softq eval 0.5

parent f2a86f1f
...@@ -225,8 +225,6 @@ class CustomTorchPolicy(TorchPolicy): ...@@ -225,8 +225,6 @@ class CustomTorchPolicy(TorchPolicy):
loss.backward() loss.backward()
vf_loss.backward() vf_loss.backward()
if apply_grad: if apply_grad:
if self.config['grad_clip'] is not None:
nn.utils.clip_grad_norm_(self.model.parameters(), self.config['grad_clip'])
self.optimizer.step() self.optimizer.step()
self.optimizer.zero_grad() self.optimizer.zero_grad()
if not self.config['single_optimizer']: if not self.config['single_optimizer']:
...@@ -365,9 +363,10 @@ class CustomTorchPolicy(TorchPolicy): ...@@ -365,9 +363,10 @@ class CustomTorchPolicy(TorchPolicy):
self.best_rew_tsteps = self.timesteps_total self.best_rew_tsteps = self.timesteps_total
if self.timesteps_total > self.target_timesteps or (self.time_elapsed + self.buffer_time) > self.max_time: if self.timesteps_total > self.target_timesteps or (self.time_elapsed + self.buffer_time) > self.max_time:
if self.best_weights is not None: if self.timesteps_total > 1_000_000: # Adding this hack due to maze reward deque very high in beginning
self.set_model_weights(self.best_weights) if self.best_weights is not None:
return True self.set_model_weights(self.best_weights)
return True
return False return False
......
...@@ -45,10 +45,10 @@ procgen-ppo: ...@@ -45,10 +45,10 @@ procgen-ppo:
no_done_at_end: False no_done_at_end: False
# Custom switches # Custom switches
skips: 2 skips: 0
n_pi: 16 n_pi: 32
num_retunes: 15 num_retunes: 8
retune_epochs: 7 retune_epochs: 6
standardize_rewards: True standardize_rewards: True
aux_mbsize: 4 aux_mbsize: 4
aux_num_accumulates: 2 aux_num_accumulates: 2
...@@ -112,6 +112,11 @@ procgen-ppo: ...@@ -112,6 +112,11 @@ procgen-ppo:
explore: True, explore: True,
exploration_config: exploration_config:
type: "StochasticSampling" type: "StochasticSampling"
evaluation_config:
exploration_config:
type: SoftQ
temperature: 0.5
observation_filter: "NoFilter" observation_filter: "NoFilter"
synchronize_filters: True synchronize_filters: True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment