global_obs_ppo.yaml 1.45 KB
Newer Older
MasterScrat's avatar
MasterScrat committed
1 2 3 4
flatland-sparse-global-conv-ppo:
    run: PPO
    env: flatland_sparse
    stop:
5
        timesteps_total: 500
MasterScrat's avatar
MasterScrat committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
    checkpoint_freq: 10
    checkpoint_at_end: True
    keep_checkpoints_num: 5
    checkpoint_score_attr: episode_reward_mean
    config:
        clip_rewards: True
        clip_param: 0.1
        vf_clip_param: 500.0
        entropy_coeff: 0.01
        # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
        # see https://github.com/ray-project/ray/issues/4628
        train_batch_size: 1000  # 5000
        rollout_fragment_length: 50  # 100
        sgd_minibatch_size: 100  # 500
        num_sgd_iter: 10
        num_workers: 2
        num_envs_per_worker: 2
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        vf_share_layers: True
        vf_loss_coeff: 0.5
        num_gpus: 0

        env_config:
            observation: global
            observation_config:
                max_width: 32
                max_height: 32

            generator: sparse_rail_generator
            generator_config: small_v0

            wandb:
                project: flatland
                entity: masterscrat
                tags: ["small_v0", "global_obs"] # TODO should be set programmatically

        model:
            custom_model: global_obs_model
            custom_options:
                architecture: impala
                architecture_options:
                    residual_layers: [[16, 2], [32, 4]]