ppo.yaml 1.42 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
flatland-sparse-global-conv-ppo:
    run: PPO
    env: flatland_sparse
    stop:
        timesteps_total: 10000000  # 1e7
    checkpoint_freq: 10
    checkpoint_at_end: True
    keep_checkpoints_num: 5
    checkpoint_score_attr: episode_reward_mean
    config:
        clip_rewards: True
        clip_param: 0.1
        vf_clip_param: 500.0
        entropy_coeff: 0.01
        # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
        # see https://github.com/ray-project/ray/issues/4628
        train_batch_size: 1000  # 5000
        rollout_fragment_length: 50  # 100
        sgd_minibatch_size: 100  # 500
        num_sgd_iter: 10
        num_workers: 7
        num_envs_per_worker: 5
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        vf_share_layers: True
        vf_loss_coeff: 0.5
        num_gpus: 1

        env_config:
            observation: global
            observation_config:
                max_width: 32
                max_height: 32
34 35 36 37

            generator: sparse_rail_generator
            generator_config: 32x32_v0

38 39 40 41 42 43 44 45 46 47 48
            wandb:
                project: flatland
                entity: masterscrat
                tags: ["32x32", "global_obs"]

        model:
            custom_model: global_obs_model
            custom_options:
                architecture: impala
                architecture_options:
                    residual_layers: [[16, 2], [32, 4]]