flatland-sparse-global-conv-ppo: run: PPO env: flatland_sparse stop: timesteps_total: 500 checkpoint_freq: 10 checkpoint_at_end: True keep_checkpoints_num: 5 checkpoint_score_attr: episode_reward_mean config: clip_rewards: True clip_param: 0.1 vf_clip_param: 500.0 entropy_coeff: 0.01 # effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10] # see https://github.com/ray-project/ray/issues/4628 train_batch_size: 1000 # 5000 rollout_fragment_length: 50 # 100 sgd_minibatch_size: 100 # 500 num_sgd_iter: 10 num_workers: 2 num_envs_per_worker: 2 batch_mode: truncate_episodes observation_filter: NoFilter vf_share_layers: True vf_loss_coeff: 0.5 num_gpus: 0 env_config: observation: global observation_config: max_width: 32 max_height: 32 generator: sparse_rail_generator generator_config: small_v0 wandb: project: flatland entity: masterscrat tags: ["small_v0", "global_obs"] # TODO should be set programmatically model: custom_model: global_obs_model custom_options: architecture: impala architecture_options: residual_layers: [[16, 2], [32, 4]]