flatland-random-sparse-small-tree-marwil-fc-ppo: run: MARWIL env: flatland_random_sparse_small stop: timesteps_total: 100000 # 1e7 checkpoint_freq: 10 checkpoint_at_end: True keep_checkpoints_num: 5 checkpoint_score_attr: episode_reward_mean config: beta: 0 # grid_search: [0, 1] # compare IL (beta=0) vs MARWIL input: /tmp/flatland-out # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10] # see https://github.com/ray-project/ray/issues/4628 train_batch_size: 1000 # 5000 rollout_fragment_length: 50 # 100 num_workers: 2 num_envs_per_worker: 1 batch_mode: truncate_episodes observation_filter: NoFilter num_gpus: 0 env_config: min_seed: 1002 max_seed: 213783 min_test_seed: 0 max_test_seed: 100 # After how many episodes the level should be regenerated: reset_env_freq: 1 observation: tree observation_config: max_depth: 2 shortest_path_max_depth: 30 regenerate_rail_on_reset: True regenerate_schedule_on_reset: True render: False model: fcnet_activation: relu fcnet_hiddens: [256, 256] vf_share_layers: True # False # evaluation_num_workers: 1 # # Enable evaluation, once per training iteration. # evaluation_interval: 25 # # Run 1 episode each time evaluation runs. # evaluation_num_episodes: 1 # # Override the env config for evaluation. # evaluation_config: # explore: False # env_config: # min_seed: 1002 # max_seed: 213783 # min_test_seed: 0 # max_test_seed: 100 # # After how many episodes the level should be regenerated: # reset_env_freq: 1 # observation: tree # observation_config: # max_depth: 2 # shortest_path_max_depth: 30 # regenerate_rail_on_reset: True # regenerate_schedule_on_reset: True # render: False