From b287273c11166f0fc11cd3318d1711550d925587 Mon Sep 17 00:00:00 2001 From: Jon Crall <erotemic@gmail.com> Date: Tue, 8 Oct 2019 12:47:06 -0400 Subject: [PATCH] Expand docstring docs (#1477) * Expand docs * Fix flake8 errors * Clarify Collect docstring * revert to the original version --- mmdet/core/post_processing/merge_augs.py | 8 ++++- mmdet/datasets/pipelines/formating.py | 29 ++++++++++++++++ mmdet/models/detectors/base.py | 14 ++++++++ mmdet/models/detectors/cascade_rcnn.py | 42 ++++++++++++++++++++++++ mmdet/models/detectors/single_stage.py | 11 +++++++ mmdet/models/detectors/two_stage.py | 39 ++++++++++++++++++++++ 6 files changed, 142 insertions(+), 1 deletion(-) diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index fcee4fb..a0214d6 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -12,7 +12,13 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): aug_proposals (list[Tensor]): proposals from different testing schemes, shape (n, 5). Note that they are not rescaled to the original image size. - img_metas (list[dict]): image info including "shape_scale" and "flip". + + img_metas (list[dict]): list of image info dict where each dict has: + 'img_shape', 'scale_factor', 'flip', and my also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmdet/datasets/pipelines/formatting.py:Collect`. + rpn_test_cfg (dict): rpn test config. Returns: diff --git a/mmdet/datasets/pipelines/formating.py b/mmdet/datasets/pipelines/formating.py index f5357f7..83385ab 100644 --- a/mmdet/datasets/pipelines/formating.py +++ b/mmdet/datasets/pipelines/formating.py @@ -134,6 +134,35 @@ class DefaultFormatBundle(object): @PIPELINES.register_module class Collect(object): + """ + Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "proposals", "gt_bboxes", + "gt_bboxes_ignore", "gt_labels", and/or "gt_masks". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + """ def __init__(self, keys, diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index 7650878..23cb33b 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -45,6 +45,20 @@ class BaseDetector(nn.Module): @abstractmethod def forward_train(self, imgs, img_metas, **kwargs): + """ + Args: + img (list[Tensor]): list of tensors of shape (1, C, H, W). + Typically these should be mean centered and std scaled. + + img_metas (list[dict]): list of image info dict where each dict + has: + 'img_shape', 'scale_factor', 'flip', and my also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmdet/datasets/pipelines/formatting.py:Collect`. + + **kwargs: specific to concrete implementation + """ pass @abstractmethod diff --git a/mmdet/models/detectors/cascade_rcnn.py b/mmdet/models/detectors/cascade_rcnn.py index 334581b..3e2d1af 100644 --- a/mmdet/models/detectors/cascade_rcnn.py +++ b/mmdet/models/detectors/cascade_rcnn.py @@ -157,6 +157,34 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): gt_bboxes_ignore=None, gt_masks=None, proposals=None): + """ + Args: + img (Tensor): of shape (B, C, H, W) encoding input images. + Typically these should be mean centered and std scaled. + + img_meta (list[dict]): list of image info dict where each dict has: + 'img_shape', 'scale_factor', 'flip', and my also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmdet/datasets/pipelines/formatting.py:Collect`. + + gt_bboxes (list[Tensor]): each item are the truth boxes for each + image in [tl_x, tl_y, br_x, br_y] format. + + gt_labels (list[Tensor]): class indices corresponding to each box + + gt_bboxes_ignore (None | list[Tensor]): specify which bounding + boxes can be ignored when computing the loss. + + gt_masks (None | Tensor) : true segmentation masks for each box + used if the architecture supports a segmentation task. + + proposals : override rpn proposals with custom proposals. Use when + `with_rpn` is False. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ x = self.extract_feat(img) losses = dict() @@ -270,7 +298,21 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): return losses def simple_test(self, img, img_meta, proposals=None, rescale=False): + """Run inference on a single image. + + Args: + img (Tensor): must be in shape (B, C, H, W) + img_meta (list[dict]): a list with one dictionary element. + See `mmdet/datasets/pipelines/formatting.py:Collect` for + details of meta dicts. + proposals : if specified overrides rpn proposals + rescale (bool): if True returns boxes in original image space + + Returns: + dict: results + """ x = self.extract_feat(img) + proposal_list = self.simple_test_rpn( x, img_meta, self.test_cfg.rpn) if proposals is None else proposals diff --git a/mmdet/models/detectors/single_stage.py b/mmdet/models/detectors/single_stage.py index 9587392..b25af7b 100644 --- a/mmdet/models/detectors/single_stage.py +++ b/mmdet/models/detectors/single_stage.py @@ -8,6 +8,11 @@ from .base import BaseDetector @DETECTORS.register_module class SingleStageDetector(BaseDetector): + """Base class for single-stage detectors. + + Single-stage detectors directly and densely predict bounding boxes on the + output features of the backbone+neck. + """ def __init__(self, backbone, @@ -37,12 +42,18 @@ class SingleStageDetector(BaseDetector): self.bbox_head.init_weights() def extract_feat(self, img): + """Directly extract features from the backbone+neck + """ x = self.backbone(img) if self.with_neck: x = self.neck(x) return x def forward_dummy(self, img): + """Used for computing network flops. + + See `mmedetection/tools/get_flops.py` + """ x = self.extract_feat(img) outs = self.bbox_head(x) return outs diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py index 6ec1541..f35d25b 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/two_stage.py @@ -11,6 +11,11 @@ from .test_mixins import BBoxTestMixin, MaskTestMixin, RPNTestMixin @DETECTORS.register_module class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, MaskTestMixin): + """Base class for two-stage detectors. + + Two-stage detectors typically consisting of a region proposal network and a + task-specific regression head. + """ def __init__(self, backbone, @@ -82,12 +87,18 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, self.mask_roi_extractor.init_weights() def extract_feat(self, img): + """Directly extract features from the backbone+neck + """ x = self.backbone(img) if self.with_neck: x = self.neck(x) return x def forward_dummy(self, img): + """Used for computing network flops. + + See `mmedetection/tools/get_flops.py` + """ outs = () # backbone x = self.extract_feat(img) @@ -124,6 +135,34 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, gt_bboxes_ignore=None, gt_masks=None, proposals=None): + """ + Args: + img (Tensor): of shape (B, C, H, W) encoding input images. + Typically these should be mean centered and std scaled. + + img_meta (list[dict]): list of image info dict where each dict has: + 'img_shape', 'scale_factor', 'flip', and my also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmdet/datasets/pipelines/formatting.py:Collect`. + + gt_bboxes (list[Tensor]): each item are the truth boxes for each + image in [tl_x, tl_y, br_x, br_y] format. + + gt_labels (list[Tensor]): class indices corresponding to each box + + gt_bboxes_ignore (None | list[Tensor]): specify which bounding + boxes can be ignored when computing the loss. + + gt_masks (None | Tensor) : true segmentation masks for each box + used if the architecture supports a segmentation task. + + proposals : override rpn proposals with custom proposals. Use when + `with_rpn` is False. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ x = self.extract_feat(img) losses = dict() -- GitLab