ccppo_base.yaml 2.26 KB
Newer Older
umbra99's avatar
umbra99 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
flatland-random-sparse-small-tree-fc-cctransformer:
    run: CcConcatenate
    env: flatland_sparse
    stop:
        timesteps_total: 10000000  # 1e7
    checkpoint_freq: 10
    checkpoint_at_end: True
    keep_checkpoints_num: 5
    checkpoint_score_attr: episode_reward_mean
    config:
        clip_rewards: True
        clip_param: 0.1
        vf_clip_param: 500.0
        entropy_coeff: 0.01
        # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
        # see https://github.com/ray-project/ray/issues/4628
        train_batch_size: 1000  # 5000
        rollout_fragment_length: 50  # 100
        sgd_minibatch_size: 100  # 500
        num_sgd_iter: 10
        num_workers: 2
        num_envs_per_worker: 5
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        vf_share_layers: True
        lr: 0.0001
        vf_loss_coeff: 0.000001
        num_gpus: 1

        env_config:
            observation: tree
            resolve_deadlocks: false
            observation_config:
                max_depth: 2
                shortest_path_max_depth: 30

            generator: sparse_rail_generator
            generator_config: small_v0

            wandb:
                project: flatland
                entity: 
                tags: ["small_v0", "tree_obs"] # TODO should be set programmatically

        model:
            custom_model: cc_concatenate
            custom_options:
                max_num_agents: 15
                actor:
                    activation_fn: relu
                    hidden_layers:
                    - 512
                    - 512
                    - 512
                critic:
                    centralized: True
                    embedding_size: 32
                    num_heads: 4
                    d_model: 32
                    use_scale: True
                    activation_fn: relu
                    hidden_layers:
                    - 512
                    - 512
                    - 512
                embedding:
                    activation_fn: relu
                    hidden_layers:
                    - 512
                    - 512
                    - 512
            fcnet_activation: relu
            fcnet_hiddens: [256, 256]
            vf_share_layers: True  # False