Commit 7d742c53 authored by MasterScrat's avatar MasterScrat

Experimenting with one policy handling multiple agents

parent 9b6a5594
width: 25
height: 25
number_of_agents: 2
max_num_cities: 4
grid_mode: False
max_rails_between_cities: 2
max_rails_in_city: 3
seed: 0
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
\ No newline at end of file
width: 25
height: 25
number_of_agents: 3
max_num_cities: 4
grid_mode: False
max_rails_between_cities: 2
max_rails_in_city: 3
seed: 0
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
\ No newline at end of file
import logging
import gym, ray
import gym
import numpy as np
from flatland.envs.malfunction_generators import no_malfunction_generator, malfunction_from_params
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from ray.rllib.agents import ppo
from envs.flatland import get_generator_config
from envs.flatland.observations import make_obs
......@@ -80,7 +80,13 @@ class FlatlandSingle(gym.Env):
# print(step_r)
# print("="*50)
return StepOutput(obs=step_r.obs[0], reward=step_r.reward[0], done=step_r.done[0], info=step_r.info[0])
return StepOutput(
obs=[step for step in step_r.obs.values()],
reward=np.sum([r for r in step_r.reward.values()]),
done=all(step_r.done.values()),
info=step_r.info[0]
)
#return step_r
def reset(self):
foo = self._env.reset()
......@@ -89,12 +95,21 @@ class FlatlandSingle(gym.Env):
# print(foo)
# print("="*50)
return foo[0]
return [step for step in foo.obs.values()],
#return foo
@property
def observation_space(self) -> gym.spaces.Space:
return self._observation.observation_space()
observation_space = self._observation.observation_space()
if isinstance(observation_space, gym.spaces.Box):
return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self._config['number_of_agents'], *observation_space.shape,))
elif isinstance(observation_space, gym.spaces.Tuple):
spaces = observation_space.spaces * self._config['number_of_agents']
return gym.spaces.Tuple(spaces)
else:
raise ValueError("Unhandled space:", observation_space.__class__)
@property
def action_space(self) -> gym.spaces.Space:
return self._env.action_space
return gym.spaces.MultiDiscrete([5] * self._config['number_of_agents'])
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 5
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 1
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_single_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_single_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 5
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 1
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_double_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_double_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 5
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 1
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_triple_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_triple_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 7
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_single_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_single_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-sparse-single-global-conv-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
env_config:
observation: global
observation_config:
max_width: 32
max_height: 32
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_v0", "global_obs"] # TODO should be set programmatically
model:
custom_model: global_obs_model
custom_options:
architecture: impala
architecture_options:
residual_layers: [[16, 2], [32, 4]]
......@@ -18,8 +18,8 @@ flatland-random-sparse-small-tree-fc-ppo:
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 7
num_envs_per_worker: 5
num_workers: 1
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
......@@ -33,12 +33,12 @@ flatland-random-sparse-small-tree-fc-ppo:
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_single_v0
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_single_v0", "tree_obs"] # TODO should be set programmatically
tags: ["small_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment