ppo_tree_obs_small_v0_mask.yaml 1.48 KB
Newer Older
metataro's avatar
metataro committed
1
2
3
4
5
sparse-mask-ppo-tree-obs-small-v0:
    run: PPO
    env: flatland_sparse
    stop:
        timesteps_total: 15000000  # 1.5e7
nilabha's avatar
nilabha committed
6
    checkpoint_freq: 50
metataro's avatar
metataro committed
7
    checkpoint_at_end: True
nilabha's avatar
nilabha committed
8
    keep_checkpoints_num: 100
metataro's avatar
metataro committed
9
10
11
    checkpoint_score_attr: episode_reward_mean
    num_samples: 3
    config:
nilabha's avatar
nilabha committed
12
        num_workers: 13
metataro's avatar
metataro committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
        num_envs_per_worker: 5
        num_gpus: 0
        gamma: 0.99

        clip_rewards: False
        vf_clip_param: 500.0
        entropy_coeff: 0.01
        # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
        # see https://github.com/ray-project/ray/issues/4628
        train_batch_size: 1000  # 5000
        rollout_fragment_length: 50  # 100
        sgd_minibatch_size: 100  # 500
        vf_share_layers: False

        env_config:
            observation: tree
            observation_config:
                max_depth: 2
                shortest_path_max_depth: 30

            generator: sparse_rail_generator
            generator_config: small_v0

            available_actions_obs: True
            allow_noop: False

            wandb:
nilabha's avatar
nilabha committed
40
41
                project: flatland-paper
                entity: aicrowd
metataro's avatar
metataro committed
42
43
44
45
46
47
48
49
50
                tags: ["small_v0", "tree_obs", "ppo", "mask"] # TODO should be set programmatically

        model:
            custom_model: fully_connected_model
            custom_options:
                layers: [256, 256]
                activation: relu
                layer_norm: False
                mask_unavailable_actions: True