From ff2841333f1e0f63d8bf9c8d430a62a21308db3d Mon Sep 17 00:00:00 2001 From: Lawrence <694869989@qq.com> Date: Fri, 11 Oct 2019 13:19:39 +0800 Subject: [PATCH] Remove redundant data transforms (#1522) * Delete transforms.py * Delete extra_aug.py * Update __init__.py --- mmdet/datasets/__init__.py | 5 +- mmdet/datasets/extra_aug.py | 163 ---------------------------------- mmdet/datasets/transforms.py | 166 ----------------------------------- 3 files changed, 2 insertions(+), 332 deletions(-) delete mode 100644 mmdet/datasets/extra_aug.py delete mode 100644 mmdet/datasets/transforms.py diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index 8de55d2..7ad926d 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -3,7 +3,6 @@ from .cityscapes import CityscapesDataset from .coco import CocoDataset from .custom import CustomDataset from .dataset_wrappers import ConcatDataset, RepeatDataset -from .extra_aug import ExtraAugmentation from .loader import DistributedGroupSampler, GroupSampler, build_dataloader from .registry import DATASETS from .voc import VOCDataset @@ -13,6 +12,6 @@ from .xml_style import XMLDataset __all__ = [ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', - 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', - 'WIDERFaceDataset', 'DATASETS', 'build_dataset' + 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset', + 'DATASETS', 'build_dataset' ] diff --git a/mmdet/datasets/extra_aug.py b/mmdet/datasets/extra_aug.py deleted file mode 100644 index c1bda3e..0000000 --- a/mmdet/datasets/extra_aug.py +++ /dev/null @@ -1,163 +0,0 @@ -import mmcv -import numpy as np -from numpy import random - -from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps - - -class PhotoMetricDistortion(object): - - def __init__(self, - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18): - self.brightness_delta = brightness_delta - self.contrast_lower, self.contrast_upper = contrast_range - self.saturation_lower, self.saturation_upper = saturation_range - self.hue_delta = hue_delta - - def __call__(self, img, boxes, labels): - # random brightness - if random.randint(2): - delta = random.uniform(-self.brightness_delta, - self.brightness_delta) - img += delta - - # mode == 0 --> do random contrast first - # mode == 1 --> do random contrast last - mode = random.randint(2) - if mode == 1: - if random.randint(2): - alpha = random.uniform(self.contrast_lower, - self.contrast_upper) - img *= alpha - - # convert color from BGR to HSV - img = mmcv.bgr2hsv(img) - - # random saturation - if random.randint(2): - img[..., 1] *= random.uniform(self.saturation_lower, - self.saturation_upper) - - # random hue - if random.randint(2): - img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) - img[..., 0][img[..., 0] > 360] -= 360 - img[..., 0][img[..., 0] < 0] += 360 - - # convert color from HSV to BGR - img = mmcv.hsv2bgr(img) - - # random contrast - if mode == 0: - if random.randint(2): - alpha = random.uniform(self.contrast_lower, - self.contrast_upper) - img *= alpha - - # randomly swap channels - if random.randint(2): - img = img[..., random.permutation(3)] - - return img, boxes, labels - - -class Expand(object): - - def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): - if to_rgb: - self.mean = mean[::-1] - else: - self.mean = mean - self.min_ratio, self.max_ratio = ratio_range - - def __call__(self, img, boxes, labels): - if random.randint(2): - return img, boxes, labels - - h, w, c = img.shape - ratio = random.uniform(self.min_ratio, self.max_ratio) - expand_img = np.full((int(h * ratio), int(w * ratio), c), - self.mean).astype(img.dtype) - left = int(random.uniform(0, w * ratio - w)) - top = int(random.uniform(0, h * ratio - h)) - expand_img[top:top + h, left:left + w] = img - img = expand_img - boxes += np.tile((left, top), 2) - return img, boxes, labels - - -class RandomCrop(object): - - def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): - # 1: return ori img - self.sample_mode = (1, *min_ious, 0) - self.min_crop_size = min_crop_size - - def __call__(self, img, boxes, labels): - h, w, c = img.shape - while True: - mode = random.choice(self.sample_mode) - if mode == 1: - return img, boxes, labels - - min_iou = mode - for i in range(50): - new_w = random.uniform(self.min_crop_size * w, w) - new_h = random.uniform(self.min_crop_size * h, h) - - # h / w in [0.5, 2] - if new_h / new_w < 0.5 or new_h / new_w > 2: - continue - - left = random.uniform(w - new_w) - top = random.uniform(h - new_h) - - patch = np.array( - (int(left), int(top), int(left + new_w), int(top + new_h))) - overlaps = bbox_overlaps( - patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) - if overlaps.min() < min_iou: - continue - - # center of boxes should inside the crop img - center = (boxes[:, :2] + boxes[:, 2:]) / 2 - mask = (center[:, 0] > patch[0]) * ( - center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( - center[:, 1] < patch[3]) - if not mask.any(): - continue - boxes = boxes[mask] - labels = labels[mask] - - # adjust boxes - img = img[patch[1]:patch[3], patch[0]:patch[2]] - boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) - boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) - boxes -= np.tile(patch[:2], 2) - - return img, boxes, labels - - -class ExtraAugmentation(object): - - def __init__(self, - photo_metric_distortion=None, - expand=None, - random_crop=None): - self.transforms = [] - if photo_metric_distortion is not None: - self.transforms.append( - PhotoMetricDistortion(**photo_metric_distortion)) - if expand is not None: - self.transforms.append(Expand(**expand)) - if random_crop is not None: - self.transforms.append(RandomCrop(**random_crop)) - - def __call__(self, img, boxes, labels): - img = img.astype(np.float32) - for transform in self.transforms: - img, boxes, labels = transform(img, boxes, labels) - return img, boxes, labels diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py deleted file mode 100644 index 1e13d77..0000000 --- a/mmdet/datasets/transforms.py +++ /dev/null @@ -1,166 +0,0 @@ -import mmcv -import numpy as np -import torch - -__all__ = [ - 'ImageTransform', 'BboxTransform', 'MaskTransform', 'SegMapTransform', - 'Numpy2Tensor' -] - - -class ImageTransform(object): - """Preprocess an image. - - 1. rescale the image to expected size - 2. normalize the image - 3. flip the image (if needed) - 4. pad the image (if needed) - 5. transpose to (c, h, w) - """ - - def __init__(self, - mean=(0, 0, 0), - std=(1, 1, 1), - to_rgb=True, - size_divisor=None): - self.mean = np.array(mean, dtype=np.float32) - self.std = np.array(std, dtype=np.float32) - self.to_rgb = to_rgb - self.size_divisor = size_divisor - - def __call__(self, img, scale, flip=False, keep_ratio=True): - if keep_ratio: - img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) - else: - img, w_scale, h_scale = mmcv.imresize( - img, scale, return_scale=True) - scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], - dtype=np.float32) - img_shape = img.shape - img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) - if flip: - img = mmcv.imflip(img) - if self.size_divisor is not None: - img = mmcv.impad_to_multiple(img, self.size_divisor) - pad_shape = img.shape - else: - pad_shape = img_shape - img = img.transpose(2, 0, 1) - return img, img_shape, pad_shape, scale_factor - - -def bbox_flip(bboxes, img_shape, direction='horizontal'): - """Flip bboxes horizontally or vertically. - - Args: - bboxes(ndarray): shape (..., 4*k) - img_shape(tuple): (height, width) - """ - assert bboxes.shape[-1] % 4 == 0 - flipped = bboxes.copy() - if direction == 'horizontal': - w = img_shape[1] - flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 - flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 - else: - h = img_shape[0] - flipped[..., 1::4] = h - bboxes[..., 3::4] - 1 - flipped[..., 3::4] = h - bboxes[..., 1::4] - 1 - return flipped - - -class BboxTransform(object): - """Preprocess gt bboxes. - - 1. rescale bboxes according to image size - 2. flip bboxes (if needed) - 3. pad the first dimension to `max_num_gts` - """ - - def __init__(self, max_num_gts=None): - self.max_num_gts = max_num_gts - - def __call__(self, bboxes, img_shape, scale_factor, flip=False): - gt_bboxes = bboxes * scale_factor - if flip: - gt_bboxes = bbox_flip(gt_bboxes, img_shape) - gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) - gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - if self.max_num_gts is None: - return gt_bboxes - else: - num_gts = gt_bboxes.shape[0] - padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) - padded_bboxes[:num_gts, :] = gt_bboxes - return padded_bboxes - - -class MaskTransform(object): - """Preprocess masks. - - 1. resize masks to expected size and stack to a single array - 2. flip the masks (if needed) - 3. pad the masks (if needed) - """ - - def __call__(self, masks, pad_shape, scale_factor, flip=False): - # aspect ratio unchanged - if isinstance(scale_factor, float): - masks = [ - mmcv.imrescale(mask, scale_factor, interpolation='nearest') - for mask in masks - ] - # aspect ratio changed - else: - w_ratio, h_ratio = scale_factor[:2] - if masks: - h, w = masks[0].shape[:2] - new_h = int(np.round(h * h_ratio)) - new_w = int(np.round(w * w_ratio)) - new_size = (new_w, new_h) - masks = [ - mmcv.imresize(mask, new_size, interpolation='nearest') - for mask in masks - ] - if flip: - masks = [mask[:, ::-1] for mask in masks] - padded_masks = [ - mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks - ] - padded_masks = np.stack(padded_masks, axis=0) - return padded_masks - - -class SegMapTransform(object): - """Preprocess semantic segmentation maps. - - 1. rescale the segmentation map to expected size - 3. flip the image (if needed) - 4. pad the image (if needed) - """ - - def __init__(self, size_divisor=None): - self.size_divisor = size_divisor - - def __call__(self, img, scale, flip=False, keep_ratio=True): - if keep_ratio: - img = mmcv.imrescale(img, scale, interpolation='nearest') - else: - img = mmcv.imresize(img, scale, interpolation='nearest') - if flip: - img = mmcv.imflip(img) - if self.size_divisor is not None: - img = mmcv.impad_to_multiple(img, self.size_divisor) - return img - - -class Numpy2Tensor(object): - - def __init__(self): - pass - - def __call__(self, *args): - if len(args) == 1: - return torch.from_numpy(args[0]) - else: - return tuple([torch.from_numpy(np.array(array)) for array in args]) -- GitLab