procgen-ppo: env: frame_stacked_procgen run: CustomTorchPPOAgent disable_evaluation_worker: True # === Stop Conditions === stop: timesteps_total: 8000000 time_total_s: 7200 # === Settings for Checkpoints === checkpoint_freq: 100 checkpoint_at_end: True keep_checkpoints_num: 5 config: # === Settings for the Procgen Environment === env_config: env_name: coinrun num_levels: 0 start_level: 0 paint_vel_info: False use_generated_assets: False center_agent: True use_sequential_levels: False distribution_mode: easy frame_stack: 2 return_min: 0 return_blind: 1 return_max: 10 gamma: 0.996 lambda: 0.95 lr: 5.0e-4 # Number of SGD iterations in each outer loop num_sgd_iter: 3 vf_loss_coeff: 0.5 entropy_coeff: 0.01 clip_param: 0.2 vf_clip_param: 0.2 grad_clip: 0.5 observation_filter: NoFilter vf_share_layers: True horizon: null soft_horizon: False no_done_at_end: False # Custom switches retune_skips: 300000 retune_replay_size: 200000 num_retunes: 8 retune_epochs: 3 standardize_rewards: True adaptive_gamma: False final_lr: 2.0e-4 lr_schedule: 'linear' final_entropy_coeff: 0.002 entropy_schedule: False # Memory management, if batch size overflow, batch splitting is done to handle it max_minibatch_size: 2048 updates_per_batch: 8 normalize_actions: False clip_rewards: null clip_actions: True preprocessor_pref: deepmind ignore_worker_failures: False log_sys_usage: True use_pytorch: True # === Settings for Model === model: custom_model: impala_torch_custom custom_options: # depths: [64, 128, 128] # nlatents: 1024 depths: [32, 64, 64] nlatents: 512 init_glorot: False use_layernorm: True num_workers: 7 num_envs_per_worker: 16 rollout_fragment_length: 256 # Whether to rollout "complete_episodes" or "truncate_episodes" to batch_mode: truncate_episodes num_cpus_per_worker: 1 num_gpus_per_worker: 0.1 num_cpus_for_driver: 1 num_gpus: 0.3 explore: True, exploration_config: type: "StochasticSampling" observation_filter: "NoFilter" synchronize_filters: True compress_observations: False timesteps_per_iteration: 0 seed: null