Skip to content
Snippets Groups Projects
Unverified Commit 7ef08d32 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

use mmcv.init_dist (#1851)

parent ed9d42a2
No related branches found
No related tags found
No related merge requests found
...@@ -8,11 +8,11 @@ repos: ...@@ -8,11 +8,11 @@ repos:
hooks: hooks:
- id: isort - id: isort
- repo: https://github.com/pre-commit/mirrors-yapf - repo: https://github.com/pre-commit/mirrors-yapf
rev: 80b9cd2f0f3b1f3456a77eff3ddbaf08f18c08ae rev: v0.29.0
hooks: hooks:
- id: yapf - id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0 rev: v2.4.0
hooks: hooks:
- id: flake8 - id: flake8
- id: trailing-whitespace - id: trailing-whitespace
......
from .env import get_root_logger, init_dist, set_random_seed
from .inference import (async_inference_detector, inference_detector, from .inference import (async_inference_detector, inference_detector,
init_detector, show_result, show_result_pyplot) init_detector, show_result, show_result_pyplot)
from .train import train_detector from .train import get_root_logger, set_random_seed, train_detector
__all__ = [ __all__ = [
'async_inference_detector', 'init_dist', 'get_root_logger', 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
'set_random_seed', 'train_detector', 'init_detector', 'inference_detector', 'async_inference_detector', 'inference_detector', 'show_result',
'show_result', 'show_result_pyplot' 'show_result_pyplot'
] ]
import logging
import os
import random
import subprocess
import numpy as np
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from mmcv.runner import get_dist_info
def init_dist(launcher, backend='nccl', **kwargs):
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'mpi':
_init_dist_mpi(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError('Invalid launcher type: {}'.format(launcher))
def _init_dist_pytorch(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus)
dist.init_process_group(backend=backend, **kwargs)
def _init_dist_mpi(backend, **kwargs):
raise NotImplementedError
def _init_dist_slurm(backend, port=29500, **kwargs):
proc_id = int(os.environ['SLURM_PROCID'])
ntasks = int(os.environ['SLURM_NTASKS'])
node_list = os.environ['SLURM_NODELIST']
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(proc_id % num_gpus)
addr = subprocess.getoutput(
'scontrol show hostname {} | head -n1'.format(node_list))
os.environ['MASTER_PORT'] = str(port)
os.environ['MASTER_ADDR'] = addr
os.environ['WORLD_SIZE'] = str(ntasks)
os.environ['RANK'] = str(proc_id)
dist.init_process_group(backend=backend)
def set_random_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def get_root_logger(log_level=logging.INFO):
logger = logging.getLogger()
if not logger.hasHandlers():
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s',
level=log_level)
rank, _ = get_dist_info()
if rank != 0:
logger.setLevel('ERROR')
return logger
from __future__ import division import logging
import random
import re import re
from collections import OrderedDict from collections import OrderedDict
import numpy as np
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner, obj_from_dict from mmcv.runner import (DistSamplerSeedHook, Runner, get_dist_info,
obj_from_dict)
from mmdet import datasets from mmdet import datasets
from mmdet.core import (CocoDistEvalmAPHook, CocoDistEvalRecallHook, from mmdet.core import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
DistEvalmAPHook, DistOptimizerHook, Fp16OptimizerHook) DistEvalmAPHook, DistOptimizerHook, Fp16OptimizerHook)
from mmdet.datasets import DATASETS, build_dataloader from mmdet.datasets import DATASETS, build_dataloader
from mmdet.models import RPN from mmdet.models import RPN
from .env import get_root_logger
def set_random_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def get_root_logger(log_level=logging.INFO):
logger = logging.getLogger()
if not logger.hasHandlers():
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s',
level=log_level)
rank, _ = get_dist_info()
if rank != 0:
logger.setLevel('ERROR')
return logger
def parse_losses(losses): def parse_losses(losses):
......
...@@ -9,9 +9,8 @@ import mmcv ...@@ -9,9 +9,8 @@ import mmcv
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import get_dist_info, load_checkpoint from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from mmdet.apis import init_dist
from mmdet.core import coco_eval, results2json, wrap_fp16_model from mmdet.core import coco_eval, results2json, wrap_fp16_model
from mmdet.datasets import build_dataloader, build_dataset from mmdet.datasets import build_dataloader, build_dataset
from mmdet.models import build_detector from mmdet.models import build_detector
......
...@@ -10,13 +10,13 @@ import numpy as np ...@@ -10,13 +10,13 @@ import numpy as np
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import get_dist_info, load_checkpoint from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from pycocotools.coco import COCO from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval from pycocotools.cocoeval import COCOeval
from robustness_eval import get_results from robustness_eval import get_results
from mmdet import datasets from mmdet import datasets
from mmdet.apis import init_dist, set_random_seed from mmdet.apis import set_random_seed
from mmdet.core import (eval_map, fast_eval_recall, results2json, from mmdet.core import (eval_map, fast_eval_recall, results2json,
wrap_fp16_model) wrap_fp16_model)
from mmdet.datasets import build_dataloader, build_dataset from mmdet.datasets import build_dataloader, build_dataset
......
...@@ -4,10 +4,10 @@ import os ...@@ -4,10 +4,10 @@ import os
import torch import torch
from mmcv import Config from mmcv import Config
from mmcv.runner import init_dist
from mmdet import __version__ from mmdet import __version__
from mmdet.apis import (get_root_logger, init_dist, set_random_seed, from mmdet.apis import get_root_logger, set_random_seed, train_detector
train_detector)
from mmdet.datasets import build_dataset from mmdet.datasets import build_dataset
from mmdet.models import build_detector from mmdet.models import build_detector
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment