custom-ppg.yaml 3.21 KB
Newer Older
Dipam Chakraborty's avatar
Dipam Chakraborty committed
1
2
procgen-ppo:
    env: frame_stacked_procgen
Dipam Chakraborty's avatar
Dipam Chakraborty committed
3
    run: CustomTorchPPGAgent
Dipam Chakraborty's avatar
Dipam Chakraborty committed
4
5
6
7
8
9
10
    disable_evaluation_worker: True
    # === Stop Conditions ===
    stop:
        timesteps_total: 8000000
        time_total_s: 7200

    # === Settings for Checkpoints ===
11
    checkpoint_freq: 100
Dipam Chakraborty's avatar
Dipam Chakraborty committed
12
13
14
15
16
17
    checkpoint_at_end: True
    keep_checkpoints_num: 5

    config:
        # === Settings for the Procgen Environment ===
        env_config:
Dipam Chakraborty's avatar
Dipam Chakraborty committed
18
            env_name: miner
Dipam Chakraborty's avatar
Dipam Chakraborty committed
19
20
21
22
23
24
25
26
27
28
29
30
            num_levels: 0
            start_level: 0
            paint_vel_info: False
            use_generated_assets: False
            center_agent: True
            use_sequential_levels: False
            distribution_mode: easy
            frame_stack: 2
            return_min: 0
            return_blind: 1
            return_max: 10

Dipam Chakraborty's avatar
Dipam Chakraborty committed
31
        gamma: 0.996
Dipam Chakraborty's avatar
Dipam Chakraborty committed
32
33
34
        lambda: 0.95
        lr: 5.0e-4
        # Number of SGD iterations in each outer loop
Dipam Chakraborty's avatar
Dipam Chakraborty committed
35
36
        num_sgd_iter: 1
        vf_loss_coeff: 1.0
Dipam Chakraborty's avatar
Dipam Chakraborty committed
37
38
39
40
41
42
43
44
45
46
47
        entropy_coeff: 0.01
        clip_param: 0.2
        vf_clip_param: 0.2
        grad_clip: 0.5
        observation_filter: NoFilter
        vf_share_layers: True
        horizon: null
        soft_horizon: False
        no_done_at_end: False
        
        # Custom switches
Dipam Chakraborty's avatar
Dipam Chakraborty committed
48
49
50
        skips: 2
        n_pi: 14
        num_retunes: 12
Dipam Chakraborty's avatar
Dipam Chakraborty committed
51
        retune_epochs: 6
Dipam Chakraborty's avatar
Dipam Chakraborty committed
52
        standardize_rewards: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
53
        aux_mbsize: 4
Dipam Chakraborty's avatar
Dipam Chakraborty committed
54
        augment_buffer: True
Chakraborty's avatar
Chakraborty committed
55
        scale_reward: 1.0
Dipam Chakraborty's avatar
Dipam Chakraborty committed
56
57
        reset_returns: False
        flattened_buffer: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
58
        augment_randint_num: 3 ## Hacky name fix later
Dipam Chakraborty's avatar
Dipam Chakraborty committed
59
        aux_lr: 5.0e-4
Dipam Chakraborty's avatar
Dipam Chakraborty committed
60
        value_lr: 1.0e-3
Dipam Chakraborty's avatar
Dipam Chakraborty committed
61
        same_lr_everywhere: False
Dipam Chakraborty's avatar
Dipam Chakraborty committed
62
        aux_phase_mixed_precision: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
63
        single_optimizer: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
64
        max_time: 7200
Dipam Chakraborty's avatar
Dipam Chakraborty committed
65
        pi_phase_mixed_precision: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
66
67
        
        adaptive_gamma: False
Dipam Chakraborty's avatar
Dipam Chakraborty committed
68
        final_lr: 5.0e-5
Dipam Chakraborty's avatar
Dipam Chakraborty committed
69
        lr_schedule: 'linear'
Dipam Chakraborty's avatar
Dipam Chakraborty committed
70
        final_entropy_coeff: 0.002
Dipam Chakraborty's avatar
Dipam Chakraborty committed
71
        entropy_schedule: False
Dipam Chakraborty's avatar
Dipam Chakraborty committed
72
73
        
        # Memory management, if batch size overflow, batch splitting is done to handle it
Dipam Chakraborty's avatar
Dipam Chakraborty committed
74
        max_minibatch_size: 500
75
        updates_per_batch: 8
Dipam Chakraborty's avatar
Dipam Chakraborty committed
76
77
78
79
80
81
82
83
84
85
86
87
88

        normalize_actions: False
        clip_rewards: null
        clip_actions: True
        preprocessor_pref: deepmind

        ignore_worker_failures: False
        log_sys_usage: True

        use_pytorch: True

        # === Settings for Model ===
        model:
Dipam Chakraborty's avatar
Dipam Chakraborty committed
89
            custom_model: impala_torch_ppg
Dipam Chakraborty's avatar
Dipam Chakraborty committed
90
            custom_model_config:
Dipam Chakraborty's avatar
Dipam Chakraborty committed
91
92
93
94
#                 depths: [32, 64, 64]
#                 nlatents: 512
                depths: [64, 128, 128]
                nlatents: 1024
Dipam Chakraborty's avatar
Dipam Chakraborty committed
95
96
                init_normed: True
                use_layernorm: False
Dipam Chakraborty's avatar
Dipam Chakraborty committed
97
                diff_framestack: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
98

Dipam Chakraborty's avatar
Dipam Chakraborty committed
99
        num_workers: 7
Dipam Chakraborty's avatar
112 env    
Dipam Chakraborty committed
100
        num_envs_per_worker: 16
Dipam Chakraborty's avatar
Dipam Chakraborty committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

        rollout_fragment_length: 256

        # Whether to rollout "complete_episodes" or "truncate_episodes" to
        batch_mode: truncate_episodes

        num_cpus_per_worker: 1
        num_gpus_per_worker: 0.1
        num_cpus_for_driver: 1
        
        num_gpus: 0.3

        explore: True,
        exploration_config:
            type: "StochasticSampling"

        observation_filter: "NoFilter"
        synchronize_filters: True
        compress_observations: False
        timesteps_per_iteration: 0
        seed: null