diff --git a/.gitignore b/.gitignore index 10227931cf46c80550ab54b537bfc17bc8fcb418..06080501d3fc2bcc95430846bf23cd2c36dec184 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +wandb/ +*.iml + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index b51df8beff93b02d23c96ea3b7dfe438be7c5f3e..2b1018a786bda80317277e9da30523e5ce05cfdc 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,15 @@ The basic structure of this repository is adopted from [https://github.com/spMohanty/rl-experiments/](https://github.com/spMohanty/rl-experiments/) ## Installation + +Tested with Python 3.6 and 3.7 + +``` +conda create --name ray-env python=3.7 --yes +``` + +You may need to install/update bazel: https://docs.bazel.build/versions/master/install-ubuntu.html + ``` pip install ray[rllib] pip install tensorflow # or tensorflow-gpu @@ -12,7 +21,7 @@ pip install -r requirements.txt ## Usage ``` Training example: - python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml + python ./train.py -f experiments/flatland_random_sparse_small/global_obs_conv_net/ppo.yaml Test example: python ./rollout.py /tmp/ray/checkpoint_dir/checkpoint-0 --run PPO --no-render diff --git a/argparser.py b/argparser.py new file mode 100644 index 0000000000000000000000000000000000000000..4a0376aa33471bfbe7cb6a6dd162c64f44b5df93 --- /dev/null +++ b/argparser.py @@ -0,0 +1,124 @@ +import argparse + +from ray.tune.config_parser import make_parser +from ray.tune.result import DEFAULT_RESULTS_DIR + +EXAMPLE_USAGE = """ +Training example: + python ./train.py --run DQN --env CartPole-v0 --no-log-flatland-stats + +Training with Config: + python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml + + +Note that -f overrides all other trial-specific command-line options. +""" + + +def create_parser(parser_creator=None): + parser = make_parser( + parser_creator=parser_creator, + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Train a reinforcement learning agent.", + epilog=EXAMPLE_USAGE) + + # See also the base parser definition in ray/tune/config_parser.py + parser.add_argument( + "--ray-address", + default=None, + type=str, + help="Connect to an existing Ray cluster at this address instead " + "of starting a new one.") + parser.add_argument( + "--ray-num-cpus", + default=None, + type=int, + help="--num-cpus to use if starting a new cluster.") + parser.add_argument( + "--ray-num-gpus", + default=None, + type=int, + help="--num-gpus to use if starting a new cluster.") + parser.add_argument( + "--ray-num-nodes", + default=None, + type=int, + help="Emulate multiple cluster nodes for debugging.") + parser.add_argument( + "--ray-redis-max-memory", + default=None, + type=int, + help="--redis-max-memory to use if starting a new cluster.") + parser.add_argument( + "--ray-memory", + default=None, + type=int, + help="--memory to use if starting a new cluster.") + parser.add_argument( + "--ray-object-store-memory", + default=None, + type=int, + help="--object-store-memory to use if starting a new cluster.") + parser.add_argument( + "--experiment-name", + default="default", + type=str, + help="Name of the subdirectory under `local_dir` to put results in.") + parser.add_argument( + "--local-dir", + default=DEFAULT_RESULTS_DIR, + type=str, + help="Local dir to save training results to. Defaults to '{}'.".format( + DEFAULT_RESULTS_DIR)) + parser.add_argument( + "--upload-dir", + default="", + type=str, + help="Optional URI to sync training results to (e.g. s3://bucket).") + parser.add_argument( + "-v", action="store_true", help="Whether to use INFO level logging.") + parser.add_argument( + "-vv", action="store_true", help="Whether to use DEBUG level logging.") + parser.add_argument( + "--resume", + action="store_true", + help="Whether to attempt to resume previous Tune experiments.") + parser.add_argument( + "--torch", + action="store_true", + help="Whether to use PyTorch (instead of tf) as the DL framework.") + parser.add_argument( + "--eager", + action="store_true", + help="Whether to attempt to enable TF eager execution.") + parser.add_argument( + "--trace", + action="store_true", + help="Whether to attempt to enable tracing for eager mode.") + parser.add_argument( + "--log-flatland-stats", + action="store_true", + default=True, + help="Whether to log additional flatland specfic metrics such as percentage complete or normalized score.") + parser.add_argument( + "--bind-all", + action="store_true", + default=False, + help="Whether to expose on network (binding on all network interfaces).") + parser.add_argument( + "--env", default=None, type=str, help="The gym environment to use.") + parser.add_argument( + "--queue-trials", + action="store_true", + help=( + "Whether to queue trials when the cluster does not currently have " + "enough resources to launch one. This should be set to True when " + "running on an autoscaling cluster to enable automatic scale-up.")) + parser.add_argument( + "-f", + "--config-file", + default=None, + type=str, + help="If specified, use config options from this file. Note that this " + "overrides any trial-specific options set via flags above.") + return parser diff --git a/requirements.txt b/requirements.txt index a2a746322361d7fd14bc208ec55b359d915fb64d..e80d5fa59bb36a83f3d72ee3d699817c8573cac2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ CairoSVG==2.4.2 flatland-rl==2.1.10 pycairo==1.19.1 pyhumps==1.3.1 +gputil==1.4.0 \ No newline at end of file diff --git a/train.py b/train.py index 078c55c1121eed8655d4203c4ac8870d2c06dcbe..12d6c4eb73dbad201d9e914d1bc106dfa65079e2 100644 --- a/train.py +++ b/train.py @@ -1,37 +1,27 @@ #!/usr/bin/env python -import argparse import os from pathlib import Path -import yaml import ray +import yaml from ray.cluster_utils import Cluster from ray.rllib.evaluation import MultiAgentEpisode -from ray.tune.config_parser import make_parser -from ray.tune.result import DEFAULT_RESULTS_DIR -from ray.tune.resources import resources_to_json -from ray.tune.tune import _make_scheduler, run_experiments from ray.rllib.utils.framework import try_import_tf, try_import_torch +from ray.tune import tune, run_experiments +from ray.tune.resources import resources_to_json +from ray.tune.tune import _make_scheduler +from argparser import create_parser from utils.loader import load_envs, load_models +# Custom wandb logger with hotfix to allow custom callbacks +from wandblogger import WandbLogger # Try to import both backends for flag checking/warnings. tf = try_import_tf() torch, _ = try_import_torch() -EXAMPLE_USAGE = """ -Training example: - python ./train.py --run DQN --env CartPole-v0 --no-log-flatland-stats - -Training with Config: - python ./train.py -f experiments/flatland_random_sparse_small/global_obs/ppo.yaml - - -Note that -f overrides all other trial-specific command-line options. -""" - # Register all necessary assets in tune registries load_envs(os.getcwd()) # Load envs load_models(os.getcwd()) # Load models @@ -68,109 +58,9 @@ def on_episode_end(info): episode.custom_metrics["episode_score_normalized"] = episode_score * norm_factor episode.custom_metrics["percentage_complete"] = percentage_complete - -def create_parser(parser_creator=None): - parser = make_parser( - parser_creator=parser_creator, - formatter_class=argparse.RawDescriptionHelpFormatter, - description="Train a reinforcement learning agent.", - epilog=EXAMPLE_USAGE) - - # See also the base parser definition in ray/tune/config_parser.py - parser.add_argument( - "--ray-address", - default=None, - type=str, - help="Connect to an existing Ray cluster at this address instead " - "of starting a new one.") - parser.add_argument( - "--ray-num-cpus", - default=None, - type=int, - help="--num-cpus to use if starting a new cluster.") - parser.add_argument( - "--ray-num-gpus", - default=None, - type=int, - help="--num-gpus to use if starting a new cluster.") - parser.add_argument( - "--ray-num-nodes", - default=None, - type=int, - help="Emulate multiple cluster nodes for debugging.") - parser.add_argument( - "--ray-redis-max-memory", - default=None, - type=int, - help="--redis-max-memory to use if starting a new cluster.") - parser.add_argument( - "--ray-memory", - default=None, - type=int, - help="--memory to use if starting a new cluster.") - parser.add_argument( - "--ray-object-store-memory", - default=None, - type=int, - help="--object-store-memory to use if starting a new cluster.") - parser.add_argument( - "--experiment-name", - default="default", - type=str, - help="Name of the subdirectory under `local_dir` to put results in.") - parser.add_argument( - "--local-dir", - default=DEFAULT_RESULTS_DIR, - type=str, - help="Local dir to save training results to. Defaults to '{}'.".format( - DEFAULT_RESULTS_DIR)) - parser.add_argument( - "--upload-dir", - default="", - type=str, - help="Optional URI to sync training results to (e.g. s3://bucket).") - parser.add_argument( - "-v", action="store_true", help="Whether to use INFO level logging.") - parser.add_argument( - "-vv", action="store_true", help="Whether to use DEBUG level logging.") - parser.add_argument( - "--resume", - action="store_true", - help="Whether to attempt to resume previous Tune experiments.") - parser.add_argument( - "--torch", - action="store_true", - help="Whether to use PyTorch (instead of tf) as the DL framework.") - parser.add_argument( - "--eager", - action="store_true", - help="Whether to attempt to enable TF eager execution.") - parser.add_argument( - "--trace", - action="store_true", - help="Whether to attempt to enable tracing for eager mode.") - parser.add_argument( - "--log-flatland-stats", - action="store_true", - default=True, - help="Whether to log additional flatland specfic metrics such as percentage complete or normalized score.") - parser.add_argument( - "--env", default=None, type=str, help="The gym environment to use.") - parser.add_argument( - "--queue-trials", - action="store_true", - help=( - "Whether to queue trials when the cluster does not currently have " - "enough resources to launch one. This should be set to True when " - "running on an autoscaling cluster to enable automatic scale-up.")) - parser.add_argument( - "-f", - "--config-file", - default=None, - type=str, - help="If specified, use config options from this file. Note that this " - "overrides any trial-specific options set via flags above.") - return parser + print("=" * 50) + print(episode.custom_metrics) + print("=" * 50) def run(args, parser): @@ -187,8 +77,8 @@ def run(args, parser): "checkpoint_score_attr": args.checkpoint_score_attr, "local_dir": args.local_dir, "resources_per_trial": ( - args.resources_per_trial and - resources_to_json(args.resources_per_trial)), + args.resources_per_trial and + resources_to_json(args.resources_per_trial)), "stop": args.stop, "config": dict(args.config, env=args.env), "restore": args.restore, @@ -198,6 +88,7 @@ def run(args, parser): } verbose = 1 + webui_host = "localhost" for exp in experiments.values(): # Bazel makes it hard to find files specified in `args` (and `data`). # Look for them here. @@ -227,10 +118,13 @@ def run(args, parser): if not exp["config"].get("eager"): raise ValueError("Must enable --eager to enable tracing.") exp["config"]["eager_tracing"] = True + if args.bind_all: + webui_host = "0.0.0.0" if args.log_flatland_stats: exp['config']['callbacks'] = { 'on_episode_end': on_episode_end, } + exp['loggers'] = [WandbLogger] if args.ray_num_nodes: cluster = Cluster() @@ -249,7 +143,9 @@ def run(args, parser): memory=args.ray_memory, redis_max_memory=args.ray_redis_max_memory, num_cpus=args.ray_num_cpus, - num_gpus=args.ray_num_gpus) + num_gpus=args.ray_num_gpus, + webui_host=webui_host) + run_experiments( experiments, scheduler=_make_scheduler(args), diff --git a/wandblogger.py b/wandblogger.py new file mode 100644 index 0000000000000000000000000000000000000000..8236320d08826896e74d28cb32acf1fa82fdec71 --- /dev/null +++ b/wandblogger.py @@ -0,0 +1,45 @@ +import numbers + +import wandb +from ray import tune + +# ray 0.8.1 reorganized ray.tune.util -> ray.tune.utils +try: + from ray.tune.utils import flatten_dict +except ImportError: + from ray.tune.util import flatten_dict + + +class WandbLogger(tune.logger.Logger): + """Pass WandbLogger to the loggers argument of tune.run + + tune.run("PG", loggers=[WandbLogger], config={ + "monitor": True, "env_config": { + "wandb": {"project": "my-project-name"}}}) + """ + + def _init(self): + self._config = None + wandb.init(**self.config.get("env_config", {}).get("wandb", {})) + + def on_result(self, result): + config = result.get("config") + if config and self._config is None: + for k in config.keys(): + if k != "callbacks": + if wandb.config.get(k) is None: + wandb.config[k] = config[k] + self._config = config + tmp = result.copy() + for k in ["done", "config", "pid", "timestamp"]: + if k in tmp: + del tmp[k] + metrics = {} + for key, value in flatten_dict(tmp, delimiter="/").items(): + if not isinstance(value, numbers.Number): + continue + metrics[key] = value + wandb.log(metrics) + + def close(self): + wandb.join()