Commit 2543a9c3 authored by nilabha's avatar nilabha

standardise configs

parent f8d0c32b
......@@ -3,11 +3,13 @@ apex-tree-obs-small-v0:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 15
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
......
......@@ -3,11 +3,13 @@ apex-tree-obs-small-v0-skip:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 15
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
gamma: 0.99
......
......@@ -3,11 +3,13 @@ ppo-tree-obs-small-v0:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 15
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
......
......@@ -3,11 +3,13 @@ sparse-mask-ppo-tree-obs-small-v0:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 15
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
gamma: 0.99
......
......@@ -3,11 +3,13 @@ ppo-tree-obs-small-v0-skip:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 15
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
gamma: 0.99
......
......@@ -2,13 +2,14 @@ flatland-sparse-small-tree-fc-apex:
run: APEX
env: flatland_sparse
stop:
timesteps_total: 5000000 # 5e6
checkpoint_freq: 10
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 5
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 3
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
......
......@@ -3,9 +3,9 @@ flatland-sparse-small-combined-obs-tree-local-conflict-apex:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 10
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 5
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
......@@ -40,23 +40,4 @@ flatland-sparse-small-combined-obs-tree-local-conflict-apex:
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True
evaluation_num_workers: 2
evaluation_interval: 100
evaluation_num_episodes: 100
evaluation_config:
explore: False
env_config:
observation: combined
observation_config:
tree:
max_depth: 2
shortest_path_max_depth: 30
localConflict:
max_depth: 2
shortest_path_max_depth: 30
n_local: 5
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
vf_share_layers: True
\ No newline at end of file
......@@ -3,9 +3,9 @@ flatland-sparse-small-density-cnn-apex:
env: flatland_sparse
stop:
timesteps_total: 15000000 # 1.5e7
checkpoint_freq: 10
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 5
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
......@@ -38,19 +38,3 @@ flatland-sparse-small-density-cnn-apex:
architecture: impala
architecture_options:
residual_layers: [[16, 2], [32, 4]]
evaluation_num_workers: 2
evaluation_interval: 100
evaluation_num_episodes: 100
evaluation_config:
explore: False
env_config:
observation: density
observation_config:
width: 25
height: 25
max_t: 1000
encoding: exp_decay
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
......@@ -7,11 +7,12 @@ flatland-random-sparse-small-tree-fc-apex-il-loss:
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
input:
"/tmp/flatland-out": 0.75
sampler: 0.25
num_workers: 10
num_workers: 13
num_envs_per_worker: 1
num_gpus: 0
......@@ -23,7 +24,6 @@ flatland-random-sparse-small-tree-fc-apex-il-loss:
generator: sparse_rail_generator
generator_config: small_v0
seed: 0
wandb:
project: flatland
......@@ -41,21 +41,4 @@ flatland-random-sparse-small-tree-fc-apex-il-loss:
loss:
grid_search: [dqfd,ce,kl] # ce (cross entropy), kl (kl divergence)
lambda1: 1
lambda2: 1
evaluation_num_workers: 2
# Enable evaluation, once per training iteration.
evaluation_interval: 50
# Run 1 episode each time evaluation runs.
evaluation_num_episodes: 50
# Override the env config for evaluation.
evaluation_config:
explore: False
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
seed: 100
lambda2: 1
\ No newline at end of file
......@@ -2,14 +2,15 @@ flatland-random-sparse-small-tree-fc-marwil-il:
run: MARWIL
env: flatland_sparse
stop:
timesteps_total: 1000000000 # 1e9
timesteps_total: 5000000000 # 5e9
checkpoint_freq: 50
checkpoint_at_end: True
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
beta:
grid_search: [0, 0.25, 0.5, 0.75, 1] # compare IL (beta=0) vs MARWIL
grid_search: [1] # compare IL (beta=0) vs MARWIL
input: /tmp/flatland-out
input_evaluation: [is, wis, simulation]
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
......@@ -18,8 +19,8 @@ flatland-random-sparse-small-tree-fc-marwil-il:
rollout_fragment_length: 50 # 100
batch_mode: truncate_episodes
observation_filter: NoFilter
num_workers: 10
num_envs_per_worker: 1
num_workers: 13
num_envs_per_worker: 5
num_gpus: 0
env_config:
......@@ -30,7 +31,6 @@ flatland-random-sparse-small-tree-fc-marwil-il:
generator: sparse_rail_generator
generator_config: small_v0
seed: 0
wandb:
project: flatland
......@@ -40,22 +40,4 @@ flatland-random-sparse-small-tree-fc-marwil-il:
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
evaluation_num_workers: 2
# Enable evaluation, once per training iteration.
evaluation_interval: 50
# Run 1 episode each time evaluation runs.
evaluation_num_episodes: 50
# Override the env config for evaluation.
evaluation_config:
explore: False
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
seed: 100
vf_share_layers: True # False
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment