diff --git a/mmdet/core/mask/__init__.py b/mmdet/core/mask/__init__.py index ea6179c5c385ff5bc064686e5b50bce23938e356..b703b55d3eb92821c28ef38579fcbebeb1fa12cf 100644 --- a/mmdet/core/mask/__init__.py +++ b/mmdet/core/mask/__init__.py @@ -1,11 +1,4 @@ -from .segms import (flip_segms, polys_to_mask, mask_to_bbox, - polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, - rle_mask_nms, rle_masks_to_boxes) from .utils import split_combined_polys from .mask_target import mask_target -__all__ = [ - 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', - 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_polys', 'mask_target' -] +__all__ = ['split_combined_polys', 'mask_target'] diff --git a/mmdet/core/mask/segms.py b/mmdet/core/mask/segms.py deleted file mode 100644 index 9809aae3a277b2bf49d6bba945f779a4f76a0461..0000000000000000000000000000000000000000 --- a/mmdet/core/mask/segms.py +++ /dev/null @@ -1,272 +0,0 @@ -# flake8: noqa -# This file is copied from Detectron. - -# Copyright (c) 2017-present, Facebook, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################## -"""Functions for interacting with segmentation masks in the COCO format. -The following terms are used in this module - mask: a binary mask encoded as a 2D numpy array - segm: a segmentation mask in one of the two COCO formats (polygon or RLE) - polygon: COCO's polygon format - RLE: COCO's run length encoding format -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import pycocotools.mask as mask_util - - -def flip_segms(segms, height, width): - """Left/right flip each mask in a list of masks.""" - - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - # Magic RLE format handling painfully discovered by looking at the - # COCO API showAnns function. - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1, :] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - flipped_segms = [] - for segm in segms: - if type(segm) == list: - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - assert type(segm) == dict - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms - - -def polys_to_mask(polygons, height, width): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed inside a height x width image. The resulting - mask is therefore of shape (height, width). - """ - rle = mask_util.frPyObjects(polygons, height, width) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def mask_to_bbox(mask): - """Compute the tight bounding box of a binary mask.""" - xs = np.where(np.sum(mask, axis=0) > 0)[0] - ys = np.where(np.sum(mask, axis=1) > 0)[0] - - if len(xs) == 0 or len(ys) == 0: - return None - - x0 = xs[0] - x1 = xs[-1] - y0 = ys[0] - y1 = ys[-1] - return np.array((x0, y0, x1, y1), dtype=np.float32) - - -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - rle = mask_util.frPyObjects(polygons_norm, M, M) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for i in range(len(polys)): - poly = polys[i] - x0 = min(min(p[::2]) for p in poly) - x1 = max(max(p[::2]) for p in poly) - y0 = min(min(p[1::2]) for p in poly) - y1 = max(max(p[1::2]) for p in poly) - boxes_from_polys[i, :] = [x0, y0, x1, y1] - - return boxes_from_polys - - -def rle_mask_voting(top_masks, - all_masks, - all_dets, - iou_thresh, - binarize_thresh, - method='AVG'): - """Returns new masks (in correspondence with `top_masks`) by combining - multiple overlapping masks coming from the pool of `all_masks`. Two methods - for combining masks are supported: 'AVG' uses a weighted average of - overlapping mask pixels; 'UNION' takes the union of all mask pixels. - """ - if len(top_masks) == 0: - return - - all_not_crowd = [False] * len(all_masks) - top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) - decoded_all_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks - ] - decoded_top_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks - ] - all_boxes = all_dets[:, :4].astype(np.int32) - all_scores = all_dets[:, 4] - - # Fill box support with weights - mask_shape = decoded_all_masks[0].shape - mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) - for k in range(len(all_masks)): - ref_box = all_boxes[k] - x_0 = max(ref_box[0], 0) - x_1 = min(ref_box[2] + 1, mask_shape[1]) - y_0 = max(ref_box[1], 0) - y_1 = min(ref_box[3] + 1, mask_shape[0]) - mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] - mask_weights = np.maximum(mask_weights, 1e-5) - - top_segms_out = [] - for k in range(len(top_masks)): - # Corner case of empty mask - if decoded_top_masks[k].sum() == 0: - top_segms_out.append(top_masks[k]) - continue - - inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] - # Only matches itself - if len(inds_to_vote) == 1: - top_segms_out.append(top_masks[k]) - continue - - masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] - if method == 'AVG': - ws = mask_weights[inds_to_vote] - soft_mask = np.average(masks_to_vote, axis=0, weights=ws) - mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) - elif method == 'UNION': - # Any pixel that's on joins the mask - soft_mask = np.sum(masks_to_vote, axis=0) - mask = np.array(soft_mask > 1e-5, dtype=np.uint8) - else: - raise NotImplementedError('Method {} is unknown'.format(method)) - rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] - top_segms_out.append(rle) - - return top_segms_out - - -def rle_mask_nms(masks, dets, thresh, mode='IOU'): - """Performs greedy non-maximum suppression based on an overlap measurement - between masks. The type of measurement is determined by `mode` and can be - either 'IOU' (standard intersection over union) or 'IOMA' (intersection over - mininum area). - """ - if len(masks) == 0: - return [] - if len(masks) == 1: - return [0] - - if mode == 'IOU': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) - all_not_crowds = [False] * len(masks) - ious = mask_util.iou(masks, masks, all_not_crowds) - elif mode == 'IOMA': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) - all_crowds = [True] * len(masks) - # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - ious = mask_util.iou(masks, masks, all_crowds) - # ... = max(area(intersect(m1, m2)) / area(m2), - # area(intersect(m2, m1)) / area(m1)) - ious = np.maximum(ious, ious.transpose()) - elif mode == 'CONTAINMENT': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - # Which measures how much m2 is contained inside m1 - all_crowds = [True] * len(masks) - ious = mask_util.iou(masks, masks, all_crowds) - else: - raise NotImplementedError('Mode {} is unknown'.format(mode)) - - scores = dets[:, 4] - order = np.argsort(-scores) - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = ious[i, order[1:]] - inds_to_keep = np.where(ovr <= thresh)[0] - order = order[inds_to_keep + 1] - - return keep - - -def rle_masks_to_boxes(masks): - """Computes the bounding box of each mask in a list of RLE encoded masks.""" - if len(masks) == 0: - return [] - - decoded_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks - ] - - def get_bounds(flat_mask): - inds = np.where(flat_mask > 0)[0] - return inds.min(), inds.max() - - boxes = np.zeros((len(decoded_masks), 4)) - keep = [True] * len(decoded_masks) - for i, mask in enumerate(decoded_masks): - if mask.sum() == 0: - keep[i] = False - continue - flat_mask = mask.sum(axis=0) - x0, x1 = get_bounds(flat_mask) - flat_mask = mask.sum(axis=1) - y0, y1 = get_bounds(flat_mask) - boxes[i, :] = (x0, y0, x1, y1) - - return boxes, np.where(keep)[0] diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index d2daad15046ba9f4b6f288ef800b0ccb30f94d97..09b4f1c6614dc2dc6434b2442f1545daffeae697 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -2,11 +2,7 @@ import mmcv import numpy as np import torch -from mmdet.core.mask import segms - -__all__ = [ - 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' -] +__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] class ImageTransform(object): @@ -85,26 +81,6 @@ class BboxTransform(object): return padded_bboxes -class PolyMaskTransform(object): - """Preprocess polygons.""" - - def __init__(self): - pass - - def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): - if flip: - gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) - num_polys_per_mask = np.array( - [len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64) - gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64) - gt_mask_polys = [ - np.concatenate(mask_polys).astype(np.float32) - for mask_polys in gt_mask_polys - ] - gt_mask_polys = np.concatenate(gt_mask_polys) - return gt_mask_polys, gt_poly_lens, num_polys_per_mask - - class MaskTransform(object): """Preprocess masks. @@ -119,7 +95,7 @@ class MaskTransform(object): for mask in masks ] if flip: - masks = [mask[:, ::-1] for mask in masks] + masks = [mmcv.imflip(mask) for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ]