custom-torch-ppo.yaml 2.8 KB
Newer Older
Dipam Chakraborty's avatar
Dipam Chakraborty committed
1
procgen-ppo:
Dipam Chakraborty's avatar
Dipam Chakraborty committed
2
    env: frame_stacked_procgen
Dipam Chakraborty's avatar
Dipam Chakraborty committed
3
    run: CustomTorchPPOAgent
Dipam Chakraborty's avatar
Dipam Chakraborty committed
4
    disable_evaluation_worker: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
5
6
7
    # === Stop Conditions ===
    stop:
        timesteps_total: 8000000
Dipam Chakraborty's avatar
Dipam Chakraborty committed
8
        time_total_s: 7200
Dipam Chakraborty's avatar
Dipam Chakraborty committed
9

Dipam Chakraborty's avatar
Dipam Chakraborty committed
10

Dipam Chakraborty's avatar
Dipam Chakraborty committed
11
    # === Settings for Checkpoints ===
Chakraborty's avatar
Chakraborty committed
12
    checkpoint_freq: 100
Dipam Chakraborty's avatar
Dipam Chakraborty committed
13
14
15
16
17
18
    checkpoint_at_end: True
    keep_checkpoints_num: 5

    config:
        # === Settings for the Procgen Environment ===
        env_config:
19
            env_name: coinrun
Dipam Chakraborty's avatar
Dipam Chakraborty committed
20
21
22
23
24
25
26
            num_levels: 0
            start_level: 0
            paint_vel_info: False
            use_generated_assets: False
            center_agent: True
            use_sequential_levels: False
            distribution_mode: easy
Dipam Chakraborty's avatar
Dipam Chakraborty committed
27
            frame_stack: 2
28
29
            return_min: 0
            return_blind: 1
30
            return_max: 10
Dipam Chakraborty's avatar
Dipam Chakraborty committed
31

Chakraborty's avatar
Chakraborty committed
32
        gamma: 0.996
Dipam Chakraborty's avatar
Dipam Chakraborty committed
33
        lambda: 0.95
Chakraborty's avatar
lrs 7-2    
Chakraborty committed
34
        lr: 5.0e-4
35
        # Number of SGD iterations in each outer loop
Dipam Chakraborty's avatar
Dipam Chakraborty committed
36
37
38
39
40
41
42
43
44
45
46
        num_sgd_iter: 3
        vf_loss_coeff: 0.5
        entropy_coeff: 0.01
        clip_param: 0.2
        vf_clip_param: 0.2
        grad_clip: 0.5
        observation_filter: NoFilter
        vf_share_layers: True
        horizon: null
        soft_horizon: False
        no_done_at_end: False
Dipam Chakraborty's avatar
cleanup    
Dipam Chakraborty committed
47
48
        
        # Custom switches
Dipam Chakraborty's avatar
Dipam Chakraborty committed
49
        retune_skips: 300000
Dipam Chakraborty's avatar
cleanup    
Dipam Chakraborty committed
50
        retune_replay_size: 200000
Dipam Chakraborty's avatar
Dipam Chakraborty committed
51
        num_retunes: 8
Dipam Chakraborty's avatar
Dipam Chakraborty committed
52
        retune_epochs: 3
53
        standardize_rewards: True
54
        
Chakraborty's avatar
lr-7-1    
Chakraborty committed
55
        adaptive_gamma: False
Chakraborty's avatar
Chakraborty committed
56
        final_lr: 2.0e-4
Dipam Chakraborty's avatar
Dipam Chakraborty committed
57
        lr_schedule: 'linear'
58
59
        final_entropy_coeff: 0.002
        entropy_schedule: False
60
61
62
63
        
        # Memory management, if batch size overflow, batch splitting is done to handle it
        max_minibatch_size: 2048
        updates_per_batch: 8 
Dipam Chakraborty's avatar
Dipam Chakraborty committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77

        normalize_actions: False
        clip_rewards: null
        clip_actions: True
        preprocessor_pref: deepmind

        ignore_worker_failures: False
        log_sys_usage: True

        use_pytorch: True

        # === Settings for Model ===
        model:
            custom_model: impala_torch_custom
Dipam Chakraborty's avatar
Dipam Chakraborty committed
78
            custom_options: 
Dipam Chakraborty's avatar
Dipam Chakraborty committed
79
80
81
82
#                 depths: [64, 128, 128]
#                 nlatents: 1024
                depths: [32, 64, 64]
                nlatents: 512
Dipam Chakraborty's avatar
Dipam Chakraborty committed
83
                init_glorot: False
84
                use_layernorm: True
Dipam Chakraborty's avatar
Dipam Chakraborty committed
85

86
        num_workers: 7
87
        num_envs_per_worker: 16
Dipam Chakraborty's avatar
Dipam Chakraborty committed
88
89
90
91
92
93
94

        rollout_fragment_length: 256

        # Whether to rollout "complete_episodes" or "truncate_episodes" to
        batch_mode: truncate_episodes

        num_cpus_per_worker: 1
Dipam Chakraborty's avatar
Dipam Chakraborty committed
95
        num_gpus_per_worker: 0.1
Dipam Chakraborty's avatar
Dipam Chakraborty committed
96
        num_cpus_for_driver: 1
97
        
Dipam Chakraborty's avatar
Dipam Chakraborty committed
98
        num_gpus: 0.3
Dipam Chakraborty's avatar
Dipam Chakraborty committed
99
100
101
102
103
104
105
106
107
108
109

        explore: True,
        exploration_config:
            type: "StochasticSampling"

        observation_filter: "NoFilter"
        synchronize_filters: True
        compress_observations: False
        timesteps_per_iteration: 0
        seed: null