sparse-mask-ppo-tree-obs-small-v0: run: PPO env: flatland_sparse stop: timesteps_total: 15000000 # 1.5e7 checkpoint_freq: 50 checkpoint_at_end: True keep_checkpoints_num: 100000000 checkpoint_score_attr: episode_reward_mean num_samples: 3 config: num_workers: 13 num_envs_per_worker: 5 num_gpus: 0 gamma: 0.99 clip_rewards: False vf_clip_param: 500.0 entropy_coeff: 0.01 # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10] # see https://github.com/ray-project/ray/issues/4628 train_batch_size: 1000 # 5000 rollout_fragment_length: 50 # 100 sgd_minibatch_size: 100 # 500 vf_share_layers: False env_config: observation: tree observation_config: max_depth: 2 shortest_path_max_depth: 30 generator: sparse_rail_generator generator_config: small_v0 available_actions_obs: True allow_noop: False wandb: project: flatland-paper entity: aicrowd tags: ["small_v0", "tree_obs", "ppo", "mask"] # TODO should be set programmatically model: custom_model: fully_connected_model custom_options: layers: [256, 256] activation: relu layer_norm: False mask_unavailable_actions: True