Merge pull request #124 from hellock/nms

Add support for soft-nms and potential new nms methods

Merge pull request #124 from hellock/nms
Add support for soft-nms and potential new nms methods
52a34b5d · Kai Chen · GitHub · 47b096a4 · e59ed54d · 52a34b5d
Unverified Commit 52a34b5d authored 6 years ago by Kai Chen Committed by GitHub 6 years ago
--- a/README.md
+++ b/README.md
@@ -38,6 +38,7 @@ This project is released under the [Apache 2.0 license](LICENSE).

 v0.5.3 (26/11/2018)
 - Add Cascade R-CNN and Cascade Mask R-CNN.
+- Add support for Soft-NMS in config files.

 v0.5.2 (21/10/2018)
 - Add support for custom datasets.

--- a/configs/cascade_mask_rcnn_r50_fpn_1x.py
+++ b/configs/cascade_mask_rcnn_r50_fpn_1x.py
@@ -152,7 +152,10 @@ test_cfg = dict(
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
-        score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5),
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5),
    keep_all_stages=False)
 # dataset settings
 dataset_type = 'CocoDataset'

--- a/configs/cascade_rcnn_r50_fpn_1x.py
+++ b/configs/cascade_rcnn_r50_fpn_1x.py
@@ -137,7 +137,8 @@ test_cfg = dict(
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
-    rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5),
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
    keep_all_stages=False)
 # dataset settings
 dataset_type = 'CocoDataset'

--- a/configs/fast_mask_rcnn_r50_fpn_1x.py
+++ b/configs/fast_mask_rcnn_r50_fpn_1x.py
@@ -60,7 +60,10 @@ train_cfg = dict(
        debug=False))
 test_cfg = dict(
    rcnn=dict(
-        score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5))
 # dataset settings
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'

--- a/configs/fast_rcnn_r50_fpn_1x.py
+++ b/configs/fast_rcnn_r50_fpn_1x.py
@@ -46,7 +46,9 @@ train_cfg = dict(
            neg_balance_thr=0),
        pos_weight=-1,
        debug=False))
-test_cfg = dict(rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5))
+test_cfg = dict(
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
 # dataset settings
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'

--- a/configs/faster_rcnn_r50_fpn_1x.py
+++ b/configs/faster_rcnn_r50_fpn_1x.py
@@ -81,7 +81,11 @@ test_cfg = dict(
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
-    rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5))
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
 # dataset settings
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'

--- a/configs/mask_rcnn_r50_fpn_1x.py
+++ b/configs/mask_rcnn_r50_fpn_1x.py
@@ -94,7 +94,10 @@ test_cfg = dict(
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
-        score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5))
 # dataset settings
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'

--- a/mmdet/core/post_processing/bbox_nms.py
+++ b/mmdet/core/post_processing/bbox_nms.py
 import torch

-from mmdet.ops import nms
+from mmdet.ops.nms import nms_wrapper


-def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1):
+def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
    """NMS for multi-class bboxes.

    Args:
@@ -21,6 +21,9 @@ def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1):
    """
    num_classes = multi_scores.shape[1]
    bboxes, labels = [], []
+    nms_cfg_ = nms_cfg.copy()
+    nms_type = nms_cfg_.pop('type', 'nms')
+    nms_op = getattr(nms_wrapper, nms_type)
    for i in range(1, num_classes):
        cls_inds = multi_scores[:, i] > score_thr
        if not cls_inds.any():
@@ -32,11 +35,9 @@ def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1):
            _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
        _scores = multi_scores[cls_inds, i]
        cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
-        # perform nms
-        nms_keep = nms(cls_dets, nms_thr)
-        cls_dets = cls_dets[nms_keep, :]
+        cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
        cls_labels = multi_bboxes.new_full(
-            (len(nms_keep), ), i - 1, dtype=torch.long)
+            (cls_dets.shape[0], ), i - 1, dtype=torch.long)
        bboxes.append(cls_dets)
        labels.append(cls_labels)
    if bboxes:

--- a/mmdet/core/post_processing/merge_augs.py
+++ b/mmdet/core/post_processing/merge_augs.py
@@ -29,9 +29,7 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
                                              scale_factor, flip)
        recovered_proposals.append(_proposals)
    aug_proposals = torch.cat(recovered_proposals, dim=0)
-    nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr,
-                   aug_proposals.get_device())
-    merged_proposals = aug_proposals[nms_keep, :]
+    merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
    scores = merged_proposals[:, 4]
    _, order = scores.sort(0, descending=True)
    num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])

--- a/mmdet/models/bbox_heads/bbox_head.py
+++ b/mmdet/models/bbox_heads/bbox_head.py
@@ -100,7 +100,7 @@ class BBoxHead(nn.Module):
                       img_shape,
                       scale_factor,
                       rescale=False,
-                       nms_cfg=None):
+                       cfg=None):
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
@@ -115,12 +115,11 @@ class BBoxHead(nn.Module):
        if rescale:
            bboxes /= scale_factor

-        if nms_cfg is None:
+        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(
-                bboxes, scores, nms_cfg.score_thr, nms_cfg.nms_thr,
-                nms_cfg.max_per_img)
+                bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img)

            return det_bboxes, det_labels


--- a/mmdet/models/detectors/cascade_rcnn.py
+++ b/mmdet/models/detectors/cascade_rcnn.py
@@ -218,7 +218,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                    img_shape,
                    scale_factor,
                    rescale=rescale,
-                    nms_cfg=rcnn_test_cfg)
+                    cfg=rcnn_test_cfg)
                bbox_result = bbox2result(det_bboxes, det_labels,
                                          bbox_head.num_classes)
                ms_bbox_result['stage{}'.format(i)] = bbox_result
@@ -256,7 +256,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
            img_shape,
            scale_factor,
            rescale=rescale,
-            nms_cfg=rcnn_test_cfg)
+            cfg=rcnn_test_cfg)
        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

--- a/mmdet/models/detectors/test_mixins.py
+++ b/mmdet/models/detectors/test_mixins.py
@@ -47,7 +47,7 @@ class BBoxTestMixin(object):
            img_shape,
            scale_factor,
            rescale=rescale,
-            nms_cfg=rcnn_test_cfg)
+            cfg=rcnn_test_cfg)
        return det_bboxes, det_labels

    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
@@ -73,15 +73,15 @@ class BBoxTestMixin(object):
                img_shape,
                scale_factor,
                rescale=False,
-                nms_cfg=None)
+                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)
        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
-            aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn)
+            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(
-            merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr,
-            self.test_cfg.rcnn.nms_thr, self.test_cfg.rcnn.max_per_img)
+            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
+            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
        return det_bboxes, det_labels



--- a/mmdet/models/rpn_heads/rpn_head.py
+++ b/mmdet/models/rpn_heads/rpn_head.py
@@ -234,13 +234,13 @@ class RPNHead(nn.Module):
            proposals = proposals[valid_inds, :]
            scores = scores[valid_inds]
            proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
-            nms_keep = nms(proposals, cfg.nms_thr)[:cfg.nms_post]
-            proposals = proposals[nms_keep, :]
+            proposals, _ = nms(proposals, cfg.nms_thr)
+            proposals = proposals[:cfg.nms_post, :]
            mlvl_proposals.append(proposals)
        proposals = torch.cat(mlvl_proposals, 0)
        if cfg.nms_across_levels:
-            nms_keep = nms(proposals, cfg.nms_thr)[:cfg.max_num]
-            proposals = proposals[nms_keep, :]
+            proposals, _ = nms(proposals, cfg.nms_thr)
+            proposals = proposals[:cfg.max_num, :]
        else:
            scores = proposals[:, 4]
            _, order = scores.sort(0, descending=True)

--- a/mmdet/ops/nms/cpu_soft_nms.pyx
+++ b/mmdet/ops/nms/cpu_soft_nms.pyx
@@ -3,6 +3,7 @@
 # Copyright (c) University of Maryland, College Park
 # Licensed under The MIT License [see LICENSE for details]
 # Written by Navaneeth Bodla and Bharat Singh
+# Modified by Kai Chen
 # ----------------------------------------------------------

 import numpy as np
@@ -15,12 +16,13 @@ cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
    return a if a <= b else b

+
 def cpu_soft_nms(
    np.ndarray[float, ndim=2] boxes_in,
+    float iou_thr,
+    unsigned int method=1,
    float sigma=0.5,
-    float Nt=0.3,
-    float threshold=0.001,
-    unsigned int method=0
+    float min_score=0.001,
 ):
    boxes = boxes_in.copy()
    cdef unsigned int N = boxes.shape[0]
@@ -36,11 +38,11 @@ def cpu_soft_nms(
        maxscore = boxes[i, 4]
        maxpos = i

-        tx1 = boxes[i,0]
-        ty1 = boxes[i,1]
-        tx2 = boxes[i,2]
-        ty2 = boxes[i,3]
-        ts = boxes[i,4]
+        tx1 = boxes[i, 0]
+        ty1 = boxes[i, 1]
+        tx2 = boxes[i, 2]
+        ty2 = boxes[i, 3]
+        ts = boxes[i, 4]
        ti = inds[i]

        pos = i + 1
@@ -52,26 +54,26 @@ def cpu_soft_nms(
            pos = pos + 1

        # add max box as a detection
-        boxes[i,0] = boxes[maxpos,0]
-        boxes[i,1] = boxes[maxpos,1]
-        boxes[i,2] = boxes[maxpos,2]
-        boxes[i,3] = boxes[maxpos,3]
-        boxes[i,4] = boxes[maxpos,4]
+        boxes[i, 0] = boxes[maxpos, 0]
+        boxes[i, 1] = boxes[maxpos, 1]
+        boxes[i, 2] = boxes[maxpos, 2]
+        boxes[i, 3] = boxes[maxpos, 3]
+        boxes[i, 4] = boxes[maxpos, 4]
        inds[i] = inds[maxpos]

        # swap ith box with position of max box
-        boxes[maxpos,0] = tx1
-        boxes[maxpos,1] = ty1
-        boxes[maxpos,2] = tx2
-        boxes[maxpos,3] = ty2
-        boxes[maxpos,4] = ts
+        boxes[maxpos, 0] = tx1
+        boxes[maxpos, 1] = ty1
+        boxes[maxpos, 2] = tx2
+        boxes[maxpos, 3] = ty2
+        boxes[maxpos, 4] = ts
        inds[maxpos] = ti

-        tx1 = boxes[i,0]
-        ty1 = boxes[i,1]
-        tx2 = boxes[i,2]
-        ty2 = boxes[i,3]
-        ts = boxes[i,4]
+        tx1 = boxes[i, 0]
+        ty1 = boxes[i, 1]
+        tx2 = boxes[i, 2]
+        ty2 = boxes[i, 3]
+        ts = boxes[i, 4]

        pos = i + 1
        # NMS iterations, note that N changes if detection boxes fall below
@@ -89,35 +91,35 @@ def cpu_soft_nms(
                ih = (min(ty2, y2) - max(ty1, y1) + 1)
                if ih > 0:
                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
-                    ov = iw * ih / ua #iou between max box and detection box
+                    ov = iw * ih / ua  # iou between max box and detection box

-                    if method == 1: # linear
-                        if ov > Nt:
+                    if method == 1:  # linear
+                        if ov > iou_thr:
                            weight = 1 - ov
                        else:
                            weight = 1
-                    elif method == 2: # gaussian
-                        weight = np.exp(-(ov * ov)/sigma)
-                    else: # original NMS
-                        if ov > Nt:
+                    elif method == 2:  # gaussian
+                        weight = np.exp(-(ov * ov) / sigma)
+                    else:  # original NMS
+                        if ov > iou_thr:
                            weight = 0
                        else:
                            weight = 1

-                    boxes[pos, 4] = weight*boxes[pos, 4]
+                    boxes[pos, 4] = weight * boxes[pos, 4]

                    # if box score falls below threshold, discard the box by
                    # swapping with last box update N
-                    if boxes[pos, 4] < threshold:
-                        boxes[pos,0] = boxes[N-1, 0]
-                        boxes[pos,1] = boxes[N-1, 1]
-                        boxes[pos,2] = boxes[N-1, 2]
-                        boxes[pos,3] = boxes[N-1, 3]
-                        boxes[pos,4] = boxes[N-1, 4]
-                        inds[pos] = inds[N-1]
+                    if boxes[pos, 4] < min_score:
+                        boxes[pos, 0] = boxes[N-1, 0]
+                        boxes[pos, 1] = boxes[N-1, 1]
+                        boxes[pos, 2] = boxes[N-1, 2]
+                        boxes[pos, 3] = boxes[N-1, 3]
+                        boxes[pos, 4] = boxes[N-1, 4]
+                        inds[pos] = inds[N - 1]
                        N = N - 1
                        pos = pos - 1

            pos = pos + 1

-    return boxes[:N], inds[:N]
\ No newline at end of file
+    return boxes[:N], inds[:N]
--- a/mmdet/ops/nms/nms_wrapper.py
+++ b/mmdet/ops/nms/nms_wrapper.py
@@ -6,43 +6,58 @@ from .cpu_nms import cpu_nms
 from .cpu_soft_nms import cpu_soft_nms


-def nms(dets, thresh, device_id=None):
+def nms(dets, iou_thr, device_id=None):
    """Dispatch to either CPU or GPU NMS implementations."""
-
-    tensor_device = None
    if isinstance(dets, torch.Tensor):
-        tensor_device = dets.device
+        is_tensor = True
        if dets.is_cuda:
            device_id = dets.get_device()
-        dets = dets.detach().cpu().numpy()
-    assert isinstance(dets, np.ndarray)
+        dets_np = dets.detach().cpu().numpy()
+    elif isinstance(dets, np.ndarray):
+        is_tensor = False
+        dets_np = dets
+    else:
+        raise TypeError(
+            'dets must be either a Tensor or numpy array, but got {}'.format(
+                type(dets)))

-    if dets.shape[0] == 0:
+    if dets_np.shape[0] == 0:
        inds = []
    else:
-        inds = (gpu_nms(dets, thresh, device_id=device_id)
-                if device_id is not None else cpu_nms(dets, thresh))
+        inds = (gpu_nms(dets_np, iou_thr, device_id=device_id)
+                if device_id is not None else cpu_nms(dets_np, iou_thr))

-    if tensor_device:
-        return torch.Tensor(inds).long().to(tensor_device)
+    if is_tensor:
+        inds = dets.new_tensor(inds, dtype=torch.long)
    else:
-        return np.array(inds, dtype=np.int)
+        inds = np.array(inds, dtype=np.int64)
+    return dets[inds, :], inds


-def soft_nms(dets, Nt=0.3, method=1, sigma=0.5, min_score=0):
+def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
    if isinstance(dets, torch.Tensor):
-        _dets = dets.detach().cpu().numpy()
+        is_tensor = True
+        dets_np = dets.detach().cpu().numpy()
+    elif isinstance(dets, np.ndarray):
+        is_tensor = False
+        dets_np = dets
    else:
-        _dets = dets.copy()
-    assert isinstance(_dets, np.ndarray)
+        raise TypeError(
+            'dets must be either a Tensor or numpy array, but got {}'.format(
+                type(dets)))

+    method_codes = {'linear': 1, 'gaussian': 2}
+    if method not in method_codes:
+        raise ValueError('Invalid method for SoftNMS: {}'.format(method))
    new_dets, inds = cpu_soft_nms(
-        _dets, Nt=Nt, method=method, sigma=sigma, threshold=min_score)
-
-    if isinstance(dets, torch.Tensor):
-        return dets.new_tensor(
-            inds, dtype=torch.long), dets.new_tensor(new_dets)
+        dets_np,
+        iou_thr,
+        method=method_codes[method],
+        sigma=sigma,
+        min_score=min_score)
+
+    if is_tensor:
+        return dets.new_tensor(new_dets), dets.new_tensor(
+            inds, dtype=torch.long)
    else:
-        return np.array(
-            inds, dtype=np.int), np.array(
-                new_dets, dtype=np.float32)
+        return new_dets.astype(np.float32), inds.astype(np.int64)