custom-torch-ppo.yaml 2.86 KB
Newer Older
Dipam Chakraborty's avatar
Dipam Chakraborty committed
1
procgen-ppo:
Dipam Chakraborty's avatar
Dipam Chakraborty committed
2
    env: frame_stacked_procgen
Dipam Chakraborty's avatar
Dipam Chakraborty committed
3
    run: CustomTorchPPOAgent
Dipam Chakraborty's avatar
Dipam Chakraborty committed
4
    disable_evaluation_worker: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
5
6
7
    # === Stop Conditions ===
    stop:
        timesteps_total: 8000000
Dipam Chakraborty's avatar
Dipam Chakraborty committed
8
        time_total_s: 7200
Dipam Chakraborty's avatar
Dipam Chakraborty committed
9

Dipam Chakraborty's avatar
Dipam Chakraborty committed
10

Dipam Chakraborty's avatar
Dipam Chakraborty committed
11
    # === Settings for Checkpoints ===
Dipam Chakraborty's avatar
Dipam Chakraborty committed
12
    checkpoint_freq: 100
Dipam Chakraborty's avatar
Dipam Chakraborty committed
13
14
15
16
17
18
    checkpoint_at_end: True
    keep_checkpoints_num: 5

    config:
        # === Settings for the Procgen Environment ===
        env_config:
19
            env_name: coinrun
Dipam Chakraborty's avatar
Dipam Chakraborty committed
20
21
22
23
24
25
26
            num_levels: 0
            start_level: 0
            paint_vel_info: False
            use_generated_assets: False
            center_agent: True
            use_sequential_levels: False
            distribution_mode: easy
Dipam Chakraborty's avatar
Dipam Chakraborty committed
27
            frame_stack: 2
28
29
            return_min: 0
            return_blind: 1
30
            return_max: 10
Dipam Chakraborty's avatar
Dipam Chakraborty committed
31

Chakraborty's avatar
Chakraborty committed
32
        gamma: 0.996
Dipam Chakraborty's avatar
Dipam Chakraborty committed
33
        lambda: 0.95
Chakraborty's avatar
lrs 7-2    
Chakraborty committed
34
        lr: 5.0e-4
35
        # Number of SGD iterations in each outer loop
Dipam Chakraborty's avatar
Dipam Chakraborty committed
36
37
38
39
40
41
42
43
44
45
46
        num_sgd_iter: 3
        vf_loss_coeff: 0.5
        entropy_coeff: 0.01
        clip_param: 0.2
        vf_clip_param: 0.2
        grad_clip: 0.5
        observation_filter: NoFilter
        vf_share_layers: True
        horizon: null
        soft_horizon: False
        no_done_at_end: False
Dipam Chakraborty's avatar
cleanup    
Dipam Chakraborty committed
47
48
        
        # Custom switches
Dipam Chakraborty's avatar
Dipam Chakraborty committed
49
        retune_skips: 100000
Dipam Chakraborty's avatar
d2rl    
Dipam Chakraborty committed
50
        retune_replay_size: 200000
Dipam Chakraborty's avatar
Dipam Chakraborty committed
51
        num_retunes: 23
Chakraborty's avatar
Chakraborty committed
52
        retune_epochs: 3
53
        standardize_rewards: True
Chakraborty's avatar
Chakraborty committed
54
55
        scale_reward: 1.0
        return_reset: False
Dipam Chakraborty's avatar
ppo amp    
Dipam Chakraborty committed
56
        aux_phase_mixed_precision: True
57
        
Chakraborty's avatar
lr-7-1    
Chakraborty committed
58
        adaptive_gamma: False
Chakraborty's avatar
Chakraborty committed
59
        final_lr: 5.0e-5
Dipam Chakraborty's avatar
Dipam Chakraborty committed
60
        lr_schedule: 'linear'
61
        final_entropy_coeff: 0.002
Chakraborty's avatar
Chakraborty committed
62
        entropy_schedule: False
63
64
        
        # Memory management, if batch size overflow, batch splitting is done to handle it
Dipam Chakraborty's avatar
Dipam Chakraborty committed
65
        max_minibatch_size: 1000
Chakraborty's avatar
Chakraborty committed
66
        updates_per_batch: 8
Dipam Chakraborty's avatar
Dipam Chakraborty committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80

        normalize_actions: False
        clip_rewards: null
        clip_actions: True
        preprocessor_pref: deepmind

        ignore_worker_failures: False
        log_sys_usage: True

        use_pytorch: True

        # === Settings for Model ===
        model:
            custom_model: impala_torch_custom
Dipam Chakraborty's avatar
Dipam Chakraborty committed
81
            custom_model_config: 
Dipam Chakraborty's avatar
Dipam Chakraborty committed
82
83
                depths: [32, 64, 64]
                nlatents: 512
84
                use_layernorm: True
Chakraborty's avatar
Chakraborty committed
85
                diff_framestack: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
86
                d2rl: False
Dipam Chakraborty's avatar
Dipam Chakraborty committed
87

88
        num_workers: 7
Chakraborty's avatar
Chakraborty committed
89
        num_envs_per_worker: 16
Dipam Chakraborty's avatar
Dipam Chakraborty committed
90
91
92
93
94
95
96

        rollout_fragment_length: 256

        # Whether to rollout "complete_episodes" or "truncate_episodes" to
        batch_mode: truncate_episodes

        num_cpus_per_worker: 1
Dipam Chakraborty's avatar
Dipam Chakraborty committed
97
        num_gpus_per_worker: 0.1
Dipam Chakraborty's avatar
Dipam Chakraborty committed
98
        num_cpus_for_driver: 1
99
        
Dipam Chakraborty's avatar
Dipam Chakraborty committed
100
        num_gpus: 0.3
Dipam Chakraborty's avatar
Dipam Chakraborty committed
101
102
103
104
105
106
107
108
109
110
111

        explore: True,
        exploration_config:
            type: "StochasticSampling"

        observation_filter: "NoFilter"
        synchronize_filters: True
        compress_observations: False
        timesteps_per_iteration: 0
        seed: null