Commit ea5f091b authored by umbra99's avatar umbra99 Committed by nilabha

Newest costum + ccppo_base

parent 5c0f6f8b
......@@ -23,8 +23,9 @@ flatland-random-sparse-small-tree-fc-cctransformer:
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
vf_loss_coeff: 0.5
num_gpus: 0
lr: 0.0001
vf_loss_coeff: 0.000001
num_gpus: 1
env_config:
observation: tree
......@@ -48,9 +49,9 @@ flatland-random-sparse-small-tree-fc-cctransformer:
actor:
activation_fn: relu
hidden_layers:
- 128
- 64
- 32
- 512
- 512
- 512
critic:
centralized: True
embedding_size: 32
......@@ -59,15 +60,15 @@ flatland-random-sparse-small-tree-fc-cctransformer:
use_scale: True
activation_fn: relu
hidden_layers:
- 128
- 64
- 32
- 512
- 512
- 512
embedding:
activation_fn: relu
hidden_layers:
- 128
- 64
- 32
- 512
- 512
- 512
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
flatland-random-sparse-small-tree-fc-cctransformer:
run: CcConcatenate
env: flatland_sparse
stop:
timesteps_total: 10000000 # 1e7
checkpoint_freq: 10
checkpoint_at_end: True
keep_checkpoints_num: 5
checkpoint_score_attr: episode_reward_mean
config:
clip_rewards: True
clip_param: 0.1
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
num_sgd_iter: 10
num_workers: 2
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: True
lr: 0.0001
vf_loss_coeff: 0.000001
num_gpus: 1
env_config:
observation: tree
resolve_deadlocks: false
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland
entity:
tags: ["small_v0", "tree_obs"] # TODO should be set programmatically
model:
custom_model: cc_concatenate
custom_options:
max_num_agents: 15
actor:
activation_fn: relu
hidden_layers:
- 512
- 512
- 512
critic:
centralized: True
embedding_size: 32
num_heads: 4
d_model: 32
use_scale: True
activation_fn: relu
hidden_layers:
- 512
- 512
- 512
embedding:
activation_fn: relu
hidden_layers:
- 512
- 512
- 512
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
This diff is collapsed.
......@@ -71,7 +71,9 @@ class CentralizedCriticModel(ABC, TFModelV2):
class CcTransformer(CentralizedCriticModel):
"""Multi-agent model that implements a centralized VF."""
def _build_actor(self, activation_fn="relu", hidden_layers=[64, 32], **kwargs):
def _build_actor(
self, activation_fn="relu", hidden_layers=[512, 512, 512], **kwargs
):
inputs = tf.keras.layers.Input(shape=(self.obs_space_shape,), name="obs")
output = build_fullyConnected(
inputs=inputs,
......@@ -86,7 +88,7 @@ class CcTransformer(CentralizedCriticModel):
def _build_critic(
self,
activation_fn="relu",
hidden_layers=[64, 32],
hidden_layers=[512, 512, 512],
centralized=True,
embedding_size=128,
num_heads=8,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment