MARWIL.yaml 2.26 KB
Newer Older
MasterScrat's avatar
MasterScrat committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
flatland-random-sparse-small-tree-marwil-fc-ppo:
    run: MARWIL
    env: flatland_random_sparse_small
    stop:
        timesteps_total: 100000  # 1e7
    checkpoint_freq: 10
    checkpoint_at_end: True
    keep_checkpoints_num: 5
    checkpoint_score_attr: episode_reward_mean
    config:
        beta: 0
        # grid_search: [0, 1]   # compare IL (beta=0) vs MARWIL
        input: /tmp/flatland-out
        # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
        # see https://github.com/ray-project/ray/issues/4628
        train_batch_size: 1000  # 5000
        rollout_fragment_length: 50  # 100
        num_workers: 2
        num_envs_per_worker: 1
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        num_gpus: 0

        env_config:
            min_seed: 1002
            max_seed: 213783
            min_test_seed: 0
            max_test_seed: 100
            # After how many episodes the level should be regenerated:
            reset_env_freq: 1
            observation: tree
            observation_config:
                max_depth: 2
                shortest_path_max_depth: 30
            regenerate_rail_on_reset: True
            regenerate_schedule_on_reset: True
            render: False

        model:
            fcnet_activation: relu
            fcnet_hiddens: [256, 256]
            vf_share_layers: True  # False

        # evaluation_num_workers: 1
        # # Enable evaluation, once per training iteration.
        # evaluation_interval: 25
        # # Run 1 episode each time evaluation runs.
        # evaluation_num_episodes: 1
        # # Override the env config for evaluation.
        # evaluation_config:
        #     explore: False
        #     env_config:
        #         min_seed: 1002
        #         max_seed: 213783
        #         min_test_seed: 0
        #         max_test_seed: 100
        #         # After how many episodes the level should be regenerated:
        #         reset_env_freq: 1
        #         observation: tree
        #         observation_config:
        #             max_depth: 2
        #             shortest_path_max_depth: 30
        #         regenerate_rail_on_reset: True
        #         regenerate_schedule_on_reset: True
        #         render: False