Commit fbdc79e8 authored by MasterScrat's avatar MasterScrat

Added tree obs, single agent experiments

parent 8fe7ea5f
width: 25
height: 25
number_of_agents: 1
max_num_cities: 4
grid_mode: False
max_rails_between_cities: 2
max_rails_in_city: 3
seed: 0
regenerate_rail_on_reset: True
regenerate_schedule_on_reset: True
\ No newline at end of file
......@@ -15,8 +15,10 @@ class TreeObservation(Observation):
def __init__(self, config) -> None:
super().__init__(config)
self._builder = TreeObsForRailEnvRLLibWrapper(
TreeObsForRailEnv(max_depth=config['max_depth'],
predictor=ShortestPathPredictorForRailEnv(config['shortest_path_max_depth']))
TreeObsForRailEnv(
max_depth=config['max_depth'],
predictor=ShortestPathPredictorForRailEnv(config['shortest_path_max_depth'])
)
)
def builder(self) -> ObservationBuilder:
......@@ -105,12 +107,11 @@ def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray
def _split_subtree_into_feature_groups(node: TreeObsForRailEnv.Node, current_tree_depth: int, max_tree_depth: int) -> (np.ndarray, np.ndarray, np.ndarray):
if node == -np.inf:
remaining_depth = max_tree_depth - current_tree_depth
# reference: https://stackoverflow.com/questions/515214/total-number-of-nodes-in-a-tree-data-structure
num_remaining_nodes = int((4**(remaining_depth+1) - 1) / (4 - 1))
return [-np.inf] * num_remaining_nodes*6, [-np.inf] * num_remaining_nodes, [-np.inf] * num_remaining_nodes*4
num_remaining_nodes = int((4 ** (remaining_depth + 1) - 1) / (4 - 1))
return [-np.inf] * num_remaining_nodes * 6, [-np.inf] * num_remaining_nodes, [-np.inf] * num_remaining_nodes * 4
data, distance, agent_data = _split_node_into_feature_groups(node)
......@@ -182,4 +183,4 @@ class TreeObsForRailEnvRLLibWrapper(ObservationBuilder):
self._builder.print_subtree(node, label, indent)
def set_env(self, env):
self._builder.set_env(env)
\ No newline at end of file
self._builder.set_env(env)
import logging
import gym, ray
from flatland.envs.malfunction_generators import no_malfunction_generator, malfunction_from_params
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from ray.rllib.agents import ppo
from envs.flatland import get_generator_config
from envs.flatland.observations import make_obs
from envs.flatland.utils.rllib_wrapper import FlatlandRllibWrapper, StepOutput
class FlatlandSingle(gym.Env):
def render(self, mode='human'):
pass
def __init__(self, env_config):
self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
self._config = get_generator_config(env_config['generator_config'])
self._env = FlatlandRllibWrapper(
rail_env=self._launch(),
regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
regenerate_schedule_on_reset=self._config['regenerate_schedule_on_reset']
)
def _launch(self):
rail_generator = sparse_rail_generator(
seed=self._config['seed'],
max_num_cities=self._config['max_num_cities'],
grid_mode=self._config['grid_mode'],
max_rails_between_cities=self._config['max_rails_between_cities'],
max_rails_in_city=self._config['max_rails_in_city']
)
malfunction_generator = no_malfunction_generator()
if {'malfunction_rate', 'min_duration', 'max_duration'} <= self._config.keys():
stochastic_data = {
'malfunction_rate': self._config['malfunction_rate'],
'min_duration': self._config['malfunction_min_duration'],
'max_duration': self._config['malfunction_max_duration']
}
malfunction_generator = malfunction_from_params(stochastic_data)
speed_ratio_map = None
if 'speed_ratio_map' in self._config:
speed_ratio_map = {
float(k): float(v) for k, v in self._config['speed_ratio_map'].items()
}
schedule_generator = sparse_schedule_generator(speed_ratio_map)
env = None
try:
env = RailEnv(
width=self._config['width'],
height=self._config['height'],
rail_generator=rail_generator,
schedule_generator=schedule_generator,
number_of_agents=self._config['number_of_agents'],
malfunction_generator_and_process_data=malfunction_generator,
obs_builder_object=self._observation.builder(),
remove_agents_at_target=False,
random_seed=self._config['seed']
)
env.reset()
except ValueError as e:
logging.error("=" * 50)
logging.error(f"Error while creating env: {e}")
logging.error("=" * 50)
return env
def step(self, action_dict):
# print("="*50)
# print(action_dict)
step_r = self._env.step({0: action_dict})
# print(step_r)
# print("="*50)
return StepOutput(obs=step_r.obs[0], reward=step_r.reward[0], done=step_r.done[0], info=step_r.info[0])
def reset(self):
foo = self._env.reset()
# print("="*50)
# print(foo)
# print("="*50)
return foo[0]
@property
def observation_space(self) -> gym.spaces.Space:
return self._observation.observation_space()
@property
def action_space(self) -> gym.spaces.Space:
return self._env.action_space
......@@ -78,7 +78,8 @@ class FlatlandSparse(MultiAgentEnv):
number_of_agents=self._config['number_of_agents'],
malfunction_generator_and_process_data=malfunction_generator,
obs_builder_object=self._observation.builder(),
remove_agents_at_target=False
remove_agents_at_target=False,
random_seed=self._config['seed']
)
env.reset()
......
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_single
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 7
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_single_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_single_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
......@@ -18,13 +18,13 @@ flatland-sparse-global-conv-ppo:
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 7
num_envs_per_worker: 5
num_workers: 2
num_envs_per_worker: 2
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 1
num_gpus: 0
env_config:
observation: global
......
flatland-random-sparse-small-tree-fc-ppo:
run: PPO
env: flatland_sparse
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 2
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity: masterscrat
tags: ["small_v0", "tree_obs"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment