Commit a442df7d authored by nilabha's avatar nilabha

Added Candidates for Imitation learning runs

parent 9508203a
flatland-random-sparse-small-tree-marwil-fc-ppo:
run: MARWIL
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
beta: 0
# grid_search: [0, 1] # compare IL (beta=0) vs MARWIL
input: /tmp/flatland-out
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 2
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-marwil-fc-ppo:
run: MARWIL
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
beta: 0
# grid_search: [0, 1] # compare IL (beta=0) vs MARWIL
input: /tmp/flatland-out
input_evaluation: [is, wis, simulation]
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 2
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-mixed-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input:
"/tmp/flatland-out": 0.5
sampler: 0.5
explore: False
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-mixed-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input:
"/tmp/flatland-out": 0.5
sampler: 0.5
input_evaluation: [is, wis]
explore: False
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-pure-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input: /tmp/flatland-out
input_evaluation: []
explore: False
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-pure-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input: /tmp/flatland-out
input_evaluation: [is, wis]
exploration_config:
type: SoftQ
temperature: 1.0
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-pure-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input: /tmp/flatland-out
input_evaluation: [is, wis, simulation]
exploration_config:
type: SoftQ
temperature: 1.0
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
flatland-random-sparse-small-tree-dqn-pure-imitate-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input: /tmp/flatland-out
input_evaluation: [is, wis]
exploration_config:
type: SoftQ
temperature: 1.0
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
evaluation_num_workers: 1
# Enable evaluation, once per training iteration.
evaluation_interval: 10
# Run 1 episode each time evaluation runs.
evaluation_num_episodes: 1
# Override the env config for evaluation.
evaluation_config:
explore: False
input_evaluation: [simulation]
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
flatland-random-sparse-small-tree-dqn-pure-imitate-loss-fc-ppo:
run: DQN
env: flatland_random_sparse_small
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
input: /tmp/flatland-out
input_evaluation: [is, wis, simulation]
exploration_config:
type: SoftQ
temperature: 1.0
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
num_gpus: 0
env_config:
min_seed: 1002
max_seed: 213783
min_test_seed: 0
max_test_seed: 100
# After how many episodes the level should be regenerated:
reset_env_freq: 1
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
render: False
model:
custom_model: custom_loss_model
custom_options:
input_files: /tmp/flatland-out
lambda1: 1
lambda2: 1
# fcnet_activation: relu
# fcnet_hiddens: [256, 256]
# vf_share_layers: True # False
# evaluation_num_workers: 1
# # Enable evaluation, once per training iteration.
# evaluation_interval: 25
# # Run 1 episode each time evaluation runs.
# evaluation_num_episodes: 1
# # Override the env config for evaluation.
# evaluation_config:
# explore: False
# env_config:
# min_seed: 1002
# max_seed: 213783
# min_test_seed: 0
# max_test_seed: 100
# # After how many episodes the level should be regenerated:
# reset_env_freq: 1
# observation: tree
# observation_config:
# max_depth: 2
# shortest_path_max_depth: 30
# regenerate_rail_on_reset: True
# regenerate_schedule_on_reset: True
# render: False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment