Commit 07075277 authored by MasterScrat's avatar MasterScrat

Wandb reporting

parent 96e56bbc
wandb/
*.iml
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
......
......@@ -3,6 +3,15 @@
The basic structure of this repository is adopted from [https://github.com/spMohanty/rl-experiments/](https://github.com/spMohanty/rl-experiments/)
## Installation
Tested with Python 3.6 and 3.7
```
conda create --name ray-env python=3.7 --yes
```
You may need to install/update bazel: https://docs.bazel.build/versions/master/install-ubuntu.html
```
pip install ray[rllib]
pip install tensorflow # or tensorflow-gpu
......@@ -12,7 +21,7 @@ pip install -r requirements.txt
## Usage
```
Training example:
python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml
python ./train.py -f experiments/flatland_random_sparse_small/global_obs_conv_net/ppo.yaml
Test example:
python ./rollout.py /tmp/ray/checkpoint_dir/checkpoint-0 --run PPO --no-render
......
import argparse
from ray.tune.config_parser import make_parser
from ray.tune.result import DEFAULT_RESULTS_DIR
EXAMPLE_USAGE = """
Training example:
python ./train.py --run DQN --env CartPole-v0 --no-log-flatland-stats
Training with Config:
python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml
Note that -f overrides all other trial-specific command-line options.
"""
def create_parser(parser_creator=None):
parser = make_parser(
parser_creator=parser_creator,
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Train a reinforcement learning agent.",
epilog=EXAMPLE_USAGE)
# See also the base parser definition in ray/tune/config_parser.py
parser.add_argument(
"--ray-address",
default=None,
type=str,
help="Connect to an existing Ray cluster at this address instead "
"of starting a new one.")
parser.add_argument(
"--ray-num-cpus",
default=None,
type=int,
help="--num-cpus to use if starting a new cluster.")
parser.add_argument(
"--ray-num-gpus",
default=None,
type=int,
help="--num-gpus to use if starting a new cluster.")
parser.add_argument(
"--ray-num-nodes",
default=None,
type=int,
help="Emulate multiple cluster nodes for debugging.")
parser.add_argument(
"--ray-redis-max-memory",
default=None,
type=int,
help="--redis-max-memory to use if starting a new cluster.")
parser.add_argument(
"--ray-memory",
default=None,
type=int,
help="--memory to use if starting a new cluster.")
parser.add_argument(
"--ray-object-store-memory",
default=None,
type=int,
help="--object-store-memory to use if starting a new cluster.")
parser.add_argument(
"--experiment-name",
default="default",
type=str,
help="Name of the subdirectory under `local_dir` to put results in.")
parser.add_argument(
"--local-dir",
default=DEFAULT_RESULTS_DIR,
type=str,
help="Local dir to save training results to. Defaults to '{}'.".format(
DEFAULT_RESULTS_DIR))
parser.add_argument(
"--upload-dir",
default="",
type=str,
help="Optional URI to sync training results to (e.g. s3://bucket).")
parser.add_argument(
"-v", action="store_true", help="Whether to use INFO level logging.")
parser.add_argument(
"-vv", action="store_true", help="Whether to use DEBUG level logging.")
parser.add_argument(
"--resume",
action="store_true",
help="Whether to attempt to resume previous Tune experiments.")
parser.add_argument(
"--torch",
action="store_true",
help="Whether to use PyTorch (instead of tf) as the DL framework.")
parser.add_argument(
"--eager",
action="store_true",
help="Whether to attempt to enable TF eager execution.")
parser.add_argument(
"--trace",
action="store_true",
help="Whether to attempt to enable tracing for eager mode.")
parser.add_argument(
"--log-flatland-stats",
action="store_true",
default=True,
help="Whether to log additional flatland specfic metrics such as percentage complete or normalized score.")
parser.add_argument(
"--bind-all",
action="store_true",
default=False,
help="Whether to expose on network (binding on all network interfaces).")
parser.add_argument(
"--env", default=None, type=str, help="The gym environment to use.")
parser.add_argument(
"--queue-trials",
action="store_true",
help=(
"Whether to queue trials when the cluster does not currently have "
"enough resources to launch one. This should be set to True when "
"running on an autoscaling cluster to enable automatic scale-up."))
parser.add_argument(
"-f",
"--config-file",
default=None,
type=str,
help="If specified, use config options from this file. Note that this "
"overrides any trial-specific options set via flags above.")
return parser
......@@ -2,3 +2,4 @@ CairoSVG==2.4.2
flatland-rl==2.1.10
pycairo==1.19.1
pyhumps==1.3.1
gputil==1.4.0
\ No newline at end of file
#!/usr/bin/env python
import argparse
import os
from pathlib import Path
import yaml
import ray
import yaml
from ray.cluster_utils import Cluster
from ray.rllib.evaluation import MultiAgentEpisode
from ray.tune.config_parser import make_parser
from ray.tune.result import DEFAULT_RESULTS_DIR
from ray.tune.resources import resources_to_json
from ray.tune.tune import _make_scheduler, run_experiments
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.tune import tune, run_experiments
from ray.tune.resources import resources_to_json
from ray.tune.tune import _make_scheduler
from argparser import create_parser
from utils.loader import load_envs, load_models
# Custom wandb logger with hotfix to allow custom callbacks
from wandblogger import WandbLogger
# Try to import both backends for flag checking/warnings.
tf = try_import_tf()
torch, _ = try_import_torch()
EXAMPLE_USAGE = """
Training example:
python ./train.py --run DQN --env CartPole-v0 --no-log-flatland-stats
Training with Config:
python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml
Note that -f overrides all other trial-specific command-line options.
"""
# Register all necessary assets in tune registries
load_envs(os.getcwd()) # Load envs
load_models(os.getcwd()) # Load models
......@@ -68,109 +58,9 @@ def on_episode_end(info):
episode.custom_metrics["episode_score_normalized"] = episode_score * norm_factor
episode.custom_metrics["percentage_complete"] = percentage_complete
def create_parser(parser_creator=None):
parser = make_parser(
parser_creator=parser_creator,
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Train a reinforcement learning agent.",
epilog=EXAMPLE_USAGE)
# See also the base parser definition in ray/tune/config_parser.py
parser.add_argument(
"--ray-address",
default=None,
type=str,
help="Connect to an existing Ray cluster at this address instead "
"of starting a new one.")
parser.add_argument(
"--ray-num-cpus",
default=None,
type=int,
help="--num-cpus to use if starting a new cluster.")
parser.add_argument(
"--ray-num-gpus",
default=None,
type=int,
help="--num-gpus to use if starting a new cluster.")
parser.add_argument(
"--ray-num-nodes",
default=None,
type=int,
help="Emulate multiple cluster nodes for debugging.")
parser.add_argument(
"--ray-redis-max-memory",
default=None,
type=int,
help="--redis-max-memory to use if starting a new cluster.")
parser.add_argument(
"--ray-memory",
default=None,
type=int,
help="--memory to use if starting a new cluster.")
parser.add_argument(
"--ray-object-store-memory",
default=None,
type=int,
help="--object-store-memory to use if starting a new cluster.")
parser.add_argument(
"--experiment-name",
default="default",
type=str,
help="Name of the subdirectory under `local_dir` to put results in.")
parser.add_argument(
"--local-dir",
default=DEFAULT_RESULTS_DIR,
type=str,
help="Local dir to save training results to. Defaults to '{}'.".format(
DEFAULT_RESULTS_DIR))
parser.add_argument(
"--upload-dir",
default="",
type=str,
help="Optional URI to sync training results to (e.g. s3://bucket).")
parser.add_argument(
"-v", action="store_true", help="Whether to use INFO level logging.")
parser.add_argument(
"-vv", action="store_true", help="Whether to use DEBUG level logging.")
parser.add_argument(
"--resume",
action="store_true",
help="Whether to attempt to resume previous Tune experiments.")
parser.add_argument(
"--torch",
action="store_true",
help="Whether to use PyTorch (instead of tf) as the DL framework.")
parser.add_argument(
"--eager",
action="store_true",
help="Whether to attempt to enable TF eager execution.")
parser.add_argument(
"--trace",
action="store_true",
help="Whether to attempt to enable tracing for eager mode.")
parser.add_argument(
"--log-flatland-stats",
action="store_true",
default=True,
help="Whether to log additional flatland specfic metrics such as percentage complete or normalized score.")
parser.add_argument(
"--env", default=None, type=str, help="The gym environment to use.")
parser.add_argument(
"--queue-trials",
action="store_true",
help=(
"Whether to queue trials when the cluster does not currently have "
"enough resources to launch one. This should be set to True when "
"running on an autoscaling cluster to enable automatic scale-up."))
parser.add_argument(
"-f",
"--config-file",
default=None,
type=str,
help="If specified, use config options from this file. Note that this "
"overrides any trial-specific options set via flags above.")
return parser
print("=" * 50)
print(episode.custom_metrics)
print("=" * 50)
def run(args, parser):
......@@ -187,8 +77,8 @@ def run(args, parser):
"checkpoint_score_attr": args.checkpoint_score_attr,
"local_dir": args.local_dir,
"resources_per_trial": (
args.resources_per_trial and
resources_to_json(args.resources_per_trial)),
args.resources_per_trial and
resources_to_json(args.resources_per_trial)),
"stop": args.stop,
"config": dict(args.config, env=args.env),
"restore": args.restore,
......@@ -198,6 +88,7 @@ def run(args, parser):
}
verbose = 1
webui_host = "localhost"
for exp in experiments.values():
# Bazel makes it hard to find files specified in `args` (and `data`).
# Look for them here.
......@@ -227,10 +118,13 @@ def run(args, parser):
if not exp["config"].get("eager"):
raise ValueError("Must enable --eager to enable tracing.")
exp["config"]["eager_tracing"] = True
if args.bind_all:
webui_host = "0.0.0.0"
if args.log_flatland_stats:
exp['config']['callbacks'] = {
'on_episode_end': on_episode_end,
}
exp['loggers'] = [WandbLogger]
if args.ray_num_nodes:
cluster = Cluster()
......@@ -249,7 +143,9 @@ def run(args, parser):
memory=args.ray_memory,
redis_max_memory=args.ray_redis_max_memory,
num_cpus=args.ray_num_cpus,
num_gpus=args.ray_num_gpus)
num_gpus=args.ray_num_gpus,
webui_host=webui_host)
run_experiments(
experiments,
scheduler=_make_scheduler(args),
......
import numbers
import wandb
from ray import tune
# ray 0.8.1 reorganized ray.tune.util -> ray.tune.utils
try:
from ray.tune.utils import flatten_dict
except ImportError:
from ray.tune.util import flatten_dict
class WandbLogger(tune.logger.Logger):
"""Pass WandbLogger to the loggers argument of tune.run
tune.run("PG", loggers=[WandbLogger], config={
"monitor": True, "env_config": {
"wandb": {"project": "my-project-name"}}})
"""
def _init(self):
self._config = None
wandb.init(**self.config.get("env_config", {}).get("wandb", {}))
def on_result(self, result):
config = result.get("config")
if config and self._config is None:
for k in config.keys():
if k != "callbacks":
if wandb.config.get(k) is None:
wandb.config[k] = config[k]
self._config = config
tmp = result.copy()
for k in ["done", "config", "pid", "timestamp"]:
if k in tmp:
del tmp[k]
metrics = {}
for key, value in flatten_dict(tmp, delimiter="/").items():
if not isinstance(value, numbers.Number):
continue
metrics[key] = value
wandb.log(metrics)
def close(self):
wandb.join()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment