ppg-experimental.yaml 3.29 KB
Newer Older
Dipam Chakraborty's avatar
Dipam Chakraborty committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
procgen-ppo:
    env: frame_stacked_procgen
    run: PPGExperimental
    disable_evaluation_worker: True
    # === Stop Conditions ===
    stop:
        timesteps_total: 8000000
        time_total_s: 7200

    # === Settings for Checkpoints ===
    checkpoint_freq: 100
    checkpoint_at_end: True
    keep_checkpoints_num: 5

    config:
        # === Settings for the Procgen Environment ===
        env_config:
            env_name: miner
            num_levels: 0
            start_level: 0
            paint_vel_info: False
            use_generated_assets: False
            center_agent: True
            use_sequential_levels: False
            distribution_mode: easy
            frame_stack: 2
            return_min: 0
            return_blind: 1
            return_max: 10

        gamma: 0.996
        lambda: 0.95
        lr: 5.0e-4
        # Number of SGD iterations in each outer loop
        num_sgd_iter: 1
        vf_loss_coeff: 1.0
        entropy_coeff: 0.01
        clip_param: 0.2
        vf_clip_param: 0.2
        grad_clip: 0.5
        observation_filter: NoFilter
        vf_share_layers: True
        horizon: null
        soft_horizon: False
        no_done_at_end: False
        
        # Custom switches
Dipam Chakraborty's avatar
Dipam Chakraborty committed
48
49
50
51
        skips: 0
        n_pi: 32
        num_retunes: 8
        retune_epochs: 6
Dipam Chakraborty's avatar
Dipam Chakraborty committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        standardize_rewards: True
        aux_mbsize: 4
        aux_num_accumulates: 2
        augment_buffer: True
        scale_reward: 1.0
        reset_returns: False
        flattened_buffer: True
        augment_randint_num: 3 ## Hacky name fix later
        aux_lr: 5.0e-4
        value_lr: 1.0e-3
        same_lr_everywhere: True
        aux_phase_mixed_precision: True
        single_optimizer: False
        max_time: 7200
        pi_phase_mixed_precision: False
        
        adaptive_gamma: False
        final_lr: 5.0e-5
        lr_schedule: 'linear'
        final_entropy_coeff: 0.002
        entropy_schedule: False
        
        # Memory management, if batch size overflow, batch splitting is done to handle it
        max_minibatch_size: 1000
        updates_per_batch: 8

        normalize_actions: False
        clip_rewards: null
        clip_actions: True
        preprocessor_pref: deepmind

        ignore_worker_failures: False
        log_sys_usage: True

        use_pytorch: True

        # === Settings for Model ===
        model:
            custom_model: impala_torch_ppg
            custom_model_config:
                depths: [32, 64, 64]
                nlatents: 512
                init_normed: True
                use_layernorm: False
                diff_framestack: True

        num_workers: 7
        num_envs_per_worker: 16

        rollout_fragment_length: 256

        # Whether to rollout "complete_episodes" or "truncate_episodes" to
        batch_mode: truncate_episodes

        num_cpus_per_worker: 1
        num_gpus_per_worker: 0.1
        num_cpus_for_driver: 1
        
        num_gpus: 0.3

        explore: True,
        exploration_config:
            type: "StochasticSampling"
Dipam Chakraborty's avatar
Dipam Chakraborty committed
115
116
117
118
119
            
        evaluation_config:
            exploration_config:
                type: SoftQ
                temperature: 0.5
Dipam Chakraborty's avatar
Dipam Chakraborty committed
120
121
122
123
124
125
126

        observation_filter: "NoFilter"
        synchronize_filters: True
        compress_observations: False
        timesteps_per_iteration: 0
        seed: null