procgen-ppo: env: frame_stacked_procgen run: CustomTorchPPGAgent disable_evaluation_worker: True # === Stop Conditions === stop: timesteps_total: 8000000 time_total_s: 7200 # === Settings for Checkpoints === checkpoint_freq: 100 checkpoint_at_end: True keep_checkpoints_num: 5 config: # === Settings for the Procgen Environment === env_config: env_name: miner num_levels: 0 start_level: 0 paint_vel_info: False use_generated_assets: False center_agent: True use_sequential_levels: False distribution_mode: easy frame_stack: 2 return_min: 0 return_blind: 1 return_max: 10 gamma: 0.996 lambda: 0.95 lr: 5.0e-4 # Number of SGD iterations in each outer loop num_sgd_iter: 1 vf_loss_coeff: 1.0 entropy_coeff: 0.01 clip_param: 0.2 vf_clip_param: 0.2 grad_clip: 0.5 observation_filter: NoFilter vf_share_layers: True horizon: null soft_horizon: False no_done_at_end: False # Custom switches skips: 0 n_pi: 16 num_retunes: 16 retune_epochs: 6 standardize_rewards: True aux_mbsize: 4 aux_num_accumulates: 3 augment_buffer: True scale_reward: 1.0 reset_returns: False flattened_buffer: True augment_randint_num: 3 ## Hacky name fix later aux_lr: 5.0e-4 value_lr: 1.0e-3 same_lr_everywhere: False aux_phase_mixed_precision: True single_optimizer: True max_time: 7200 pi_phase_mixed_precision: False adaptive_gamma: False final_lr: 1.0e-4 lr_schedule: 'linear' final_entropy_coeff: 0.002 entropy_schedule: False # Memory management, if batch size overflow, batch splitting is done to handle it max_minibatch_size: 1000 updates_per_batch: 8 normalize_actions: False clip_rewards: null clip_actions: True preprocessor_pref: deepmind ignore_worker_failures: False log_sys_usage: True use_pytorch: True # === Settings for Model === model: custom_model: impala_torch_ppg custom_model_config: depths: [32, 64, 64] nlatents: 512 # depths: [64, 128, 128] # nlatents: 1024 init_normed: True use_layernorm: False diff_framestack: True num_workers: 7 num_envs_per_worker: 16 rollout_fragment_length: 256 # Whether to rollout "complete_episodes" or "truncate_episodes" to batch_mode: truncate_episodes num_cpus_per_worker: 1 num_gpus_per_worker: 0.1 num_cpus_for_driver: 1 num_gpus: 0.3 explore: True, exploration_config: type: "StochasticSampling" observation_filter: "NoFilter" synchronize_filters: True compress_observations: False timesteps_per_iteration: 0 seed: null