diff --git a/.gitignore b/.gitignore index ffbae97a51e885187c5fc0c0485e58bf6067e310..01c47d6e277dba0d7b880dff88f9695f9a8eec50 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,6 @@ venv.bak/ .mypy_cache/ # cython generated cpp -mmdet/ops/nms/*.cpp \ No newline at end of file +mmdet/ops/nms/*.cpp +mmdet/version.py +data diff --git a/TDL.md b/TDL.md deleted file mode 100644 index 1679338c04733a1a23e5dc2e8ac96069c6b3c41e..0000000000000000000000000000000000000000 --- a/TDL.md +++ /dev/null @@ -1,29 +0,0 @@ -### MMCV -- [ ] Implement the attr 'get' of 'Config' -- [ ] Config bugs: None type to '{}' with addict -- [ ] Default logger should be only with gpu0 -- [ ] Unit Test: mmcv and mmcv.torchpack - - -### MMDetection - -#### Basic -- [ ] Implement training function without distributed -- [ ] Verify nccl/nccl2/gloo -- [ ] Replace UGLY code: params plug in 'args' to reach a global flow -- [ ] Replace 'print' by 'logger' - - -#### Testing -- [ ] Implement distributed testing -- [ ] Implement single gpu testing - - -#### Refactor -- [ ] Re-consider params names -- [ ] Refactor functions in 'core' -- [ ] Merge single test & aug test as one function, so as other redundancy - -#### New features -- [ ] Plug loss params into Config -- [ ] Multi-head communication diff --git a/tools/examples/r50_fpn_frcnn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py similarity index 53% rename from tools/examples/r50_fpn_frcnn_1x.py rename to configs/faster_rcnn_r50_fpn_1x.py index 6814445f8e1ba10a5fad24502ac8aff535f60f21..f4803f0b045e3801d2a09b652d6869625fb589f0 100644 --- a/tools/examples/r50_fpn_frcnn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -1,14 +1,14 @@ # model settings model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + type='FasterRCNN', + pretrained='modelzoo://resnet50', backbone=dict( type='resnet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, - style='fb'), + style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], @@ -18,15 +18,14 @@ model = dict( type='RPNHead', in_channels=256, feat_channels=256, - coarsest_stride=32, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), - roi_block=dict( - type='SingleLevelRoI', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), @@ -40,28 +39,23 @@ model = dict( target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False)) -meta_params = dict( - rpn_train_cfg = dict( +# model training and testing settings +train_cfg = dict( + rpn=dict( pos_fraction=0.5, pos_balance_sampling=False, neg_pos_ub=256, allowed_border=0, + crowd_thr=1.1, anchor_batch_size=256, pos_iou_thr=0.7, neg_iou_thr=0.3, neg_balance_thr=0, - min_pos_iou=1e-3, + min_pos_iou=0.3, pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), - rpn_test_cfg = dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn_train_cfg = dict( + rcnn=dict( pos_iou_thr=0.5, neg_iou_thr=0.5, crowd_thr=1.1, @@ -71,55 +65,84 @@ meta_params = dict( pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, + min_pos_iou=1.1, pos_weight=-1, - debug=False), - rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5) -) + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5)) # dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +dataset_type = 'CocoDataset' +data_root = 'data/coco/' img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32) + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_policy = dict( +lr_config = dict( policy='step', warmup='linear', warmup_iters=500, - warmup_ratio=0.333, + warmup_ratio=1.0 / 3, step=[8, 11]) -max_epoch = 12 checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' # yapf:disable log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + # dict(type='TensorboardLoggerHook') ]) # yapf:enable -work_dir = './model/r50_fpn_frcnn_1x' +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_maskrcnn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py similarity index 57% rename from tools/examples/r50_fpn_maskrcnn_1x.py rename to configs/mask_rcnn_r50_fpn_1x.py index 49b32037ec5139ee64d21bc6e9c607dcd69da018..4760821e24464b2e21d5ac0b0b0418f4163e9494 100644 --- a/tools/examples/r50_fpn_maskrcnn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -1,14 +1,14 @@ # model settings model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + type='MaskRCNN', + pretrained='modelzoo://resnet50', backbone=dict( type='resnet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, - style='fb'), + style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], @@ -18,15 +18,14 @@ model = dict( type='RPNHead', in_channels=256, feat_channels=256, - coarsest_stride=32, anchor_scales=[8], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], use_sigmoid_cls=True), - roi_block=dict( - type='SingleLevelRoI', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), @@ -40,8 +39,8 @@ model = dict( target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False), - mask_block=dict( - type='SingleLevelRoI', + mask_roi_extractor=dict( + type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), @@ -51,28 +50,23 @@ model = dict( in_channels=256, conv_out_channels=256, num_classes=81)) -meta_params = dict( - rpn_train_cfg=dict( +# model training and testing settings +train_cfg = dict( + rpn=dict( pos_fraction=0.5, pos_balance_sampling=False, neg_pos_ub=256, allowed_border=0, + crowd_thr=1.1, anchor_batch_size=256, pos_iou_thr=0.7, neg_iou_thr=0.3, neg_balance_thr=0, - min_pos_iou=1e-3, + min_pos_iou=0.3, pos_weight=-1, smoothl1_beta=1 / 9.0, debug=False), - rpn_test_cfg=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn_train_cfg=dict( + rcnn=dict( mask_size=28, pos_iou_thr=0.5, neg_iou_thr=0.5, @@ -83,54 +77,85 @@ meta_params = dict( pos_balance_sampling=False, neg_pos_ub=512, neg_balance_thr=0, + min_pos_iou=1.1, pos_weight=-1, - debug=False), - rcnn_test_cfg=dict( - score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) # dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +dataset_type = 'CocoDataset' +data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - with_mask=True, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_policy = dict( +lr_config = dict( policy='step', warmup='linear', warmup_iters=500, - warmup_ratio=0.333, + warmup_ratio=1.0 / 3, step=[8, 11]) -max_epoch = 12 checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' # yapf:disable log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + # dict(type='TensorboardLoggerHook') ]) # yapf:enable -work_dir = './model/r50_fpn_mask_rcnn_1x' +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/rpn_r50_fpn_1x.py b/configs/rpn_r50_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..4e45eb9e41b8b727256b2abfe974e12802b73560 --- /dev/null +++ b/configs/rpn_r50_fpn_1x.py @@ -0,0 +1,118 @@ +# model settings +model = dict( + type='RPN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + crowd_thr=1.1, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=0.3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=False, + with_label=False), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=False, + with_label=False), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +# runner configs +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/rpn_r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/mmdet/__init__.py b/mmdet/__init__.py index 58f3ace6c03d093337c9fa417ccbe8bc267b6c69..1c4f7e8fcc54041e383b72d48860ccbdc3afc41c 100644 --- a/mmdet/__init__.py +++ b/mmdet/__init__.py @@ -1 +1,3 @@ -from .version import __version__ +from .version import __version__, short_version + +__all__ = ['__version__', 'short_version'] diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 52ed690e6689abdd1dcc4af6ccb237f1d3fbdad9..645d5be29c039aeb2173525163b681675741d7ea 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,9 +1,7 @@ -from .train_engine import * -from .test_engine import * -from .rpn_ops import * -from .bbox_ops import * -from .mask_ops import * -from .losses import * -from .eval import * -from .post_processing import * -from .utils import * +from .anchor import * # noqa: F401, F403 +from .bbox import * # noqa: F401, F403 +from .mask import * # noqa: F401, F403 +from .loss import * # noqa: F401, F403 +from .evaluation import * # noqa: F401, F403 +from .post_processing import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/mmdet/core/anchor/__init__.py b/mmdet/core/anchor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ff430a4be1825fbbaa3cb31d54de8790aa2fb90 --- /dev/null +++ b/mmdet/core/anchor/__init__.py @@ -0,0 +1,4 @@ +from .anchor_generator import AnchorGenerator +from .anchor_target import anchor_target + +__all__ = ['AnchorGenerator', 'anchor_target'] diff --git a/mmdet/core/rpn_ops/anchor_generator.py b/mmdet/core/anchor/anchor_generator.py similarity index 98% rename from mmdet/core/rpn_ops/anchor_generator.py rename to mmdet/core/anchor/anchor_generator.py index e7a1fa256fb6d4df69be77a341728ed194b54b7e..84600be331e52d9a64f70e2cb43696b82801bf0e 100644 --- a/mmdet/core/rpn_ops/anchor_generator.py +++ b/mmdet/core/anchor/anchor_generator.py @@ -50,15 +50,18 @@ class AnchorGenerator(object): return yy, xx def grid_anchors(self, featmap_size, stride=16, device='cuda'): + base_anchors = self.base_anchors.to(device) + feat_h, feat_w = featmap_size shift_x = torch.arange(0, feat_w, device=device) * stride shift_y = torch.arange(0, feat_h, device=device) * stride shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) + shifts = shifts.type_as(base_anchors) # first feat_w elements correspond to the first row of shifts # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get # shifted anchors (K, A, 4), reshape to (K*A, 4) - base_anchors = self.base_anchors.to(device) + all_anchors = base_anchors[None, :, :] + shifts[:, None, :] all_anchors = all_anchors.view(-1, 4) # first A rows correspond to A anchors of (0, 0) in feature map, diff --git a/mmdet/core/anchor/anchor_target.py b/mmdet/core/anchor/anchor_target.py new file mode 100644 index 0000000000000000000000000000000000000000..ad81e390e6dcb2a064862818a34ea99adbe462e0 --- /dev/null +++ b/mmdet/core/anchor/anchor_target.py @@ -0,0 +1,149 @@ +import torch + +from ..bbox import bbox_assign, bbox2delta, bbox_sampling +from ..utils import multi_apply + + +def anchor_target(anchor_list, valid_flag_list, gt_bboxes_list, img_metas, + target_means, target_stds, cfg): + """Compute regression and classification targets for anchors. + + Args: + anchor_list (list[list]): Multi level anchors of each image. + valid_flag_list (list[list]): Multi level valid flags of each image. + gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. + img_metas (list[dict]): Meta info of each image. + target_means (Iterable): Mean value of regression targets. + target_stds (Iterable): Std value of regression targets. + cfg (dict): RPN train configs. + + Returns: + tuple + """ + num_imgs = len(img_metas) + assert len(anchor_list) == len(valid_flag_list) == num_imgs + + # anchor number of multi levels + num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] + # concat all level anchors and flags to a single tensor + for i in range(num_imgs): + assert len(anchor_list[i]) == len(valid_flag_list[i]) + anchor_list[i] = torch.cat(anchor_list[i]) + valid_flag_list[i] = torch.cat(valid_flag_list[i]) + + # compute targets for each image + means_replicas = [target_means for _ in range(num_imgs)] + stds_replicas = [target_stds for _ in range(num_imgs)] + cfg_replicas = [cfg for _ in range(num_imgs)] + (all_labels, all_label_weights, all_bbox_targets, + all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( + anchor_target_single, anchor_list, valid_flag_list, gt_bboxes_list, + img_metas, means_replicas, stds_replicas, cfg_replicas) + # no valid anchors + if any([labels is None for labels in all_labels]): + return None + # sampled anchors of all images + num_total_samples = sum([ + max(pos_inds.numel() + neg_inds.numel(), 1) + for pos_inds, neg_inds in zip(pos_inds_list, neg_inds_list) + ]) + # split targets to a list w.r.t. multiple levels + labels_list = images_to_levels(all_labels, num_level_anchors) + label_weights_list = images_to_levels(all_label_weights, num_level_anchors) + bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) + bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) + return (labels_list, label_weights_list, bbox_targets_list, + bbox_weights_list, num_total_samples) + + +def images_to_levels(target, num_level_anchors): + """Convert targets by image to targets by feature level. + + [target_img0, target_img1] -> [target_level0, target_level1, ...] + """ + target = torch.stack(target, 0) + level_targets = [] + start = 0 + for n in num_level_anchors: + end = start + n + level_targets.append(target[:, start:end].squeeze(0)) + start = end + return level_targets + + +def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta, + target_means, target_stds, cfg): + inside_flags = anchor_inside_flags(flat_anchors, valid_flags, + img_meta['img_shape'][:2], + cfg.allowed_border) + if not inside_flags.any(): + return (None, ) * 6 + # assign gt and sample anchors + anchors = flat_anchors[inside_flags, :] + assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( + anchors, + gt_bboxes, + pos_iou_thr=cfg.pos_iou_thr, + neg_iou_thr=cfg.neg_iou_thr, + min_pos_iou=cfg.min_pos_iou) + pos_inds, neg_inds = bbox_sampling(assigned_gt_inds, cfg.anchor_batch_size, + cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, + cfg.neg_balance_thr) + + bbox_targets = torch.zeros_like(anchors) + bbox_weights = torch.zeros_like(anchors) + labels = torch.zeros_like(assigned_gt_inds) + label_weights = torch.zeros_like(assigned_gt_inds, dtype=anchors.dtype) + + if len(pos_inds) > 0: + pos_anchors = anchors[pos_inds, :] + pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] + pos_bbox_targets = bbox2delta(pos_anchors, pos_gt_bbox, target_means, + target_stds) + bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_weights[pos_inds, :] = 1.0 + labels[pos_inds] = 1 + if cfg.pos_weight <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = cfg.pos_weight + if len(neg_inds) > 0: + label_weights[neg_inds] = 1.0 + + # map up to original set of anchors + num_total_anchors = flat_anchors.size(0) + labels = unmap(labels, num_total_anchors, inside_flags) + label_weights = unmap(label_weights, num_total_anchors, inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) + + return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds) + + +def anchor_inside_flags(flat_anchors, valid_flags, img_shape, + allowed_border=0): + img_h, img_w = img_shape[:2] + if allowed_border >= 0: + inside_flags = valid_flags & \ + (flat_anchors[:, 0] >= -allowed_border) & \ + (flat_anchors[:, 1] >= -allowed_border) & \ + (flat_anchors[:, 2] < img_w + allowed_border) & \ + (flat_anchors[:, 3] < img_h + allowed_border) + else: + inside_flags = valid_flags + return inside_flags + + +def unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if data.dim() == 1: + ret = data.new_full((count, ), fill) + ret[inds] = data + else: + new_size = (count, ) + data.size()[1:] + ret = data.new_full(new_size, fill) + ret[inds, :] = data + return ret diff --git a/mmdet/core/bbox/__init__.py b/mmdet/core/bbox/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a5c21dce52f25781e2e4e3e760a837d4d36eec5c --- /dev/null +++ b/mmdet/core/bbox/__init__.py @@ -0,0 +1,15 @@ +from .geometry import bbox_overlaps +from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps, + bbox_sampling, bbox_sampling_pos, bbox_sampling_neg, + sample_bboxes) +from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, + bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) +from .bbox_target import bbox_target + +__all__ = [ + 'bbox_overlaps', 'random_choice', 'bbox_assign', + 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos', + 'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox', + 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', + 'bbox2result', 'bbox_target' +] diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox/bbox_target.py similarity index 60% rename from mmdet/core/bbox_ops/bbox_target.py rename to mmdet/core/bbox/bbox_target.py index ce1f885e184a37779c7636f8c6053248e8cd3330..2e205c3850c9bc232b99826a23e79f416a3dbcfb 100644 --- a/mmdet/core/bbox_ops/bbox_target.py +++ b/mmdet/core/bbox/bbox_target.py @@ -1,8 +1,7 @@ -import mmcv import torch -from .geometry import bbox_overlaps -from .transforms import bbox_transform, bbox_transform_inv +from .transforms import bbox2delta +from ..utils import multi_apply def bbox_target(pos_proposals_list, @@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list, reg_num_classes=1, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], - return_list=False): - img_per_gpu = len(pos_proposals_list) - all_labels = [] - all_label_weights = [] - all_bbox_targets = [] - all_bbox_weights = [] - for img_id in range(img_per_gpu): - pos_proposals = pos_proposals_list[img_id] - neg_proposals = neg_proposals_list[img_id] - pos_gt_bboxes = pos_gt_bboxes_list[img_id] - pos_gt_labels = pos_gt_labels_list[img_id] - debug_img = debug_imgs[img_id] if cfg.debug else None - labels, label_weights, bbox_targets, bbox_weights = proposal_target_single( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - reg_num_classes, cfg, target_means, target_stds) - all_labels.append(labels) - all_label_weights.append(label_weights) - all_bbox_targets.append(bbox_targets) - all_bbox_weights.append(bbox_weights) + concat=True): + labels, label_weights, bbox_targets, bbox_weights = multi_apply( + proposal_target_single, + pos_proposals_list, + neg_proposals_list, + pos_gt_bboxes_list, + pos_gt_labels_list, + cfg=cfg, + reg_num_classes=reg_num_classes, + target_means=target_means, + target_stds=target_stds) - if return_list: - return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights - - labels = torch.cat(all_labels, 0) - label_weights = torch.cat(all_label_weights, 0) - bbox_targets = torch.cat(all_bbox_targets, 0) - bbox_weights = torch.cat(all_bbox_weights, 0) + if concat: + labels = torch.cat(labels, 0) + label_weights = torch.cat(label_weights, 0) + bbox_targets = torch.cat(bbox_targets, 0) + bbox_weights = torch.cat(bbox_weights, 0) return labels, label_weights, bbox_targets, bbox_weights @@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - reg_num_classes, cfg, + reg_num_classes=1, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]): num_pos = pos_proposals.size(0) @@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals, labels[:num_pos] = pos_gt_labels pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight label_weights[:num_pos] = pos_weight - pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes, - target_means, target_stds) + pos_bbox_targets = bbox2delta(pos_proposals, pos_gt_bboxes, + target_means, target_stds) bbox_targets[:num_pos, :] = pos_bbox_targets bbox_weights[:num_pos, :] = 1 if num_neg > 0: diff --git a/mmdet/core/bbox_ops/geometry.py b/mmdet/core/bbox/geometry.py similarity index 100% rename from mmdet/core/bbox_ops/geometry.py rename to mmdet/core/bbox/geometry.py diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox/sampling.py similarity index 61% rename from mmdet/core/bbox_ops/sampling.py rename to mmdet/core/bbox/sampling.py index eed820496409f1f8265f73e81bd4667e6b1558f8..976cd9507f2279b663d3f5e09ed1180da5b457c1 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox/sampling.py @@ -5,6 +5,11 @@ from .geometry import bbox_overlaps def random_choice(gallery, num): + """Random select some elements from the gallery. + + It seems that Pytorch's implementation is slower than numpy so we use numpy + to randperm the indices. + """ assert len(gallery) >= num if isinstance(gallery, list): gallery = np.array(gallery) @@ -12,38 +17,42 @@ def random_choice(gallery, num): np.random.shuffle(cands) rand_inds = cands[:num] if not isinstance(gallery, np.ndarray): - rand_inds = torch.from_numpy(rand_inds).long() - if gallery.is_cuda: - rand_inds = rand_inds.cuda(gallery.get_device()) + rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) return gallery[rand_inds] def bbox_assign(proposals, gt_bboxes, - gt_crowd_bboxes=None, + gt_bboxes_ignore=None, gt_labels=None, pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=.0, crowd_thr=-1): - """Assign a corresponding gt bbox or background to each proposal/anchor - This function assign a gt bbox to every proposal, each proposals will be - assigned with -1, 0, or a positive number. -1 means don't care, 0 means - negative sample, positive number is the index (1-based) of assigned gt. - If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground) - with crowd bboxes over crowd_thr will be ignored + """Assign a corresponding gt bbox or background to each proposal/anchor. + + Each proposals will be assigned with `-1`, `0`, or a positive integer. + + - -1: don't care + - 0: negative sample, no assigned gt + - positive integer: positive sample, index (1-based) of assigned gt + + If `gt_bboxes_ignore` is specified, bboxes which have iof (intersection + over foreground) with `gt_bboxes_ignore` above `crowd_thr` will be ignored. + Args: - proposals(Tensor): proposals or RPN anchors, shape (n, 4) - gt_bboxes(Tensor): shape (k, 4) - gt_crowd_bboxes(Tensor): shape(m, 4) - gt_labels(Tensor, optional): shape (k, ) - pos_iou_thr(float): iou threshold for positive bboxes - neg_iou_thr(float or tuple): iou threshold for negative bboxes - min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, - for RPN, it is usually set as 0, for Fast R-CNN, - it is usually set as pos_iou_thr - crowd_thr: ignore proposals which have iof(intersection over foreground) with - crowd bboxes over crowd_thr + proposals (Tensor): Proposals or RPN anchors, shape (n, 4). + gt_bboxes (Tensor): Ground truth bboxes, shape (k, 4). + gt_bboxes_ignore (Tensor, optional): shape(m, 4). + gt_labels (Tensor, optional): shape (k, ). + pos_iou_thr (float): IoU threshold for positive bboxes. + neg_iou_thr (float or tuple): IoU threshold for negative bboxes. + min_pos_iou (float): Minimum iou for a bbox to be considered as a + positive bbox. For RPN, it is usually set as 0.3, for Fast R-CNN, + it is usually set as pos_iou_thr + crowd_thr (float): IoF threshold for ignoring bboxes. Negative value + for not ignoring any bboxes. + Returns: tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) """ @@ -54,45 +63,50 @@ def bbox_assign(proposals, raise ValueError('No gt bbox or proposals') # ignore proposals according to crowd bboxes - if (crowd_thr > 0) and (gt_crowd_bboxes is - not None) and (gt_crowd_bboxes.numel() > 0): - crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof') + if (crowd_thr > 0) and (gt_bboxes_ignore is + not None) and (gt_bboxes_ignore.numel() > 0): + crowd_overlaps = bbox_overlaps(proposals, gt_bboxes_ignore, mode='iof') crowd_max_overlaps, _ = crowd_overlaps.max(dim=1) crowd_bboxes_inds = torch.nonzero( crowd_max_overlaps > crowd_thr).long() if crowd_bboxes_inds.numel() > 0: overlaps[crowd_bboxes_inds, :] = -1 - return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr, + return bbox_assign_wrt_overlaps(overlaps, gt_labels, pos_iou_thr, neg_iou_thr, min_pos_iou) -def bbox_assign_via_overlaps(overlaps, +def bbox_assign_wrt_overlaps(overlaps, gt_labels=None, pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=.0): - """Assign a corresponding gt bbox or background to each proposal/anchor - This function assign a gt bbox to every proposal, each proposals will be + """Assign a corresponding gt bbox or background to each proposal/anchor. + + This method assign a gt bbox to every proposal, each proposals will be assigned with -1, 0, or a positive number. -1 means don't care, 0 means negative sample, positive number is the index (1-based) of assigned gt. The assignment is done in following steps, the order matters: + 1. assign every anchor to -1 2. assign proposals whose iou with all gts < neg_iou_thr to 0 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, assign it to that bbox 4. for each gt bbox, assign its nearest proposals(may be more than one) to itself + Args: - overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) - gt_labels(Tensor, optional): shape (k, ) - pos_iou_thr(float): iou threshold for positive bboxes - neg_iou_thr(float or tuple): iou threshold for negative bboxes - min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, - for RPN, it is usually set as 0, for Fast R-CNN, - it is usually set as pos_iou_thr + overlaps (Tensor): Overlaps between n proposals and k gt_bboxes, + shape(n, k). + gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). + pos_iou_thr (float): IoU threshold for positive bboxes. + neg_iou_thr (float or tuple): IoU threshold for negative bboxes. + min_pos_iou (float): Minimum IoU for a bbox to be considered as a + positive bbox. This argument only affects the 4th step. + Returns: - tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps, + max_overlaps), shape (n, ) """ num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) # 1. assign -1 by default @@ -138,8 +152,9 @@ def bbox_assign_via_overlaps(overlaps, return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps -def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): - """Balance sampling for positive bboxes/anchors +def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True): + """Balance sampling for positive bboxes/anchors. + 1. calculate average positive num for each gt: num_per_gt 2. sample at most num_per_gt positives for each gt 3. random sampling from rest anchors if not enough fg @@ -180,15 +195,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): return sampled_inds -def sample_negatives(assigned_gt_inds, - num_expected, - max_overlaps=None, - balance_thr=0, - hard_fraction=0.5): - """Balance sampling for negative bboxes/anchors - negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr) - and easy(iou < balance_thr), around equal number of bg are sampled - from each set. +def bbox_sampling_neg(assigned_gt_inds, + num_expected, + max_overlaps=None, + balance_thr=0, + hard_fraction=0.5): + """Balance sampling for negative bboxes/anchors. + + Negative samples are split into 2 set: hard (balance_thr <= iou < + neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled + by `hard_fraction`. """ neg_inds = torch.nonzero(assigned_gt_inds == 0) if neg_inds.numel() != 0: @@ -241,55 +257,87 @@ def bbox_sampling(assigned_gt_inds, max_overlaps=None, neg_balance_thr=0, neg_hard_fraction=0.5): + """Sample positive and negative bboxes given assigned results. + + Args: + assigned_gt_inds (Tensor): Assigned gt indices for each bbox. + num_expected (int): Expected total samples (pos and neg). + pos_fraction (float): Positive sample fraction. + neg_pos_ub (float): Negative/Positive upper bound. + pos_balance_sampling(bool): Whether to sample positive samples around + each gt bbox evenly. + max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts. + Used for negative balance sampling only. + neg_balance_thr (float, optional): IoU threshold for simple/hard + negative balance sampling. + neg_hard_fraction (float, optional): Fraction of hard negative samples + for negative balance sampling. + + Returns: + tuple[Tensor]: positive bbox indices, negative bbox indices. + """ num_expected_pos = int(num_expected * pos_fraction) - pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, - pos_balance_sampling) + pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos, + pos_balance_sampling) + # We found that sampled indices have duplicated items occasionally. + # (mab be a bug of PyTorch) + pos_inds = pos_inds.unique() num_sampled_pos = pos_inds.numel() num_neg_max = int( neg_pos_ub * num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) - neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, - max_overlaps, neg_balance_thr, - neg_hard_fraction) + neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg, + max_overlaps, neg_balance_thr, + neg_hard_fraction) + neg_inds = neg_inds.unique() return pos_inds, neg_inds +def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): + """Sample positive and negative bboxes. -def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, - gt_labels_list, cfg): - cfg_list = [cfg for _ in range(len(proposals_list))] - results = map(sample_proposals_single, proposals_list, gt_bboxes_list, - gt_crowds_list, gt_labels_list, cfg_list) - # list of tuple to tuple of list - return tuple(map(list, zip(*results))) + This is a simple implementation of bbox sampling given candidates and + ground truth bboxes, which includes 3 steps. + 1. Assign gt to each bbox. + 2. Add gt bboxes to the sampling pool (optional). + 3. Perform positive and negative sampling. -def sample_proposals_single(proposals, - gt_bboxes, - gt_crowds, - gt_labels, - cfg): - proposals = proposals[:, :4] + Args: + bboxes (Tensor): Boxes to be sampled from. + gt_bboxes (Tensor): Ground truth bboxes. + gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO, + `crowd` bboxes are considered as ignored. + gt_labels (Tensor): Class labels of ground truth bboxes. + cfg (dict): Sampling configs. + + Returns: + tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels + """ + bboxes = bboxes[:, :4] assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ - bbox_assign( - proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr, - cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) + bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, + cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou, + cfg.crowd_thr) + if cfg.add_gt_as_proposals: - proposals = torch.cat([gt_bboxes, proposals], dim=0) + bboxes = torch.cat([gt_bboxes, bboxes], dim=0) gt_assign_self = torch.arange( - 1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device) + 1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device) assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) assigned_labels = torch.cat([gt_labels, assigned_labels]) pos_inds, neg_inds = bbox_sampling( assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) - pos_proposals = proposals[pos_inds] - neg_proposals = proposals[neg_inds] + + pos_bboxes = bboxes[pos_inds] + neg_bboxes = bboxes[neg_inds] pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] pos_gt_labels = assigned_labels[pos_inds] - return (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) + return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes, + pos_gt_labels) diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox/transforms.py similarity index 84% rename from mmdet/core/bbox_ops/transforms.py rename to mmdet/core/bbox/transforms.py index a9f1e2a45fab42652189e84f42aadc2e5f7a8994..0d8f6f44f20df5c019dc8ed9ea46c2eb6c411c66 100644 --- a/mmdet/core/bbox_ops/transforms.py +++ b/mmdet/core/bbox/transforms.py @@ -3,7 +3,7 @@ import numpy as np import torch -def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): +def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): assert proposals.size() == gt.size() proposals = proposals.float() @@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): return deltas -def bbox_transform_inv(rois, - deltas, - means=[0, 0, 0, 0], - stds=[1, 1, 1, 1], - max_shape=None, - wh_ratio_clip=16 / 1000): +def delta2bbox(rois, + deltas, + means=[0, 0, 0, 0], + stds=[1, 1, 1, 1], + max_shape=None, + wh_ratio_clip=16 / 1000): means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) denorm_deltas = deltas * stds + means @@ -69,10 +69,14 @@ def bbox_transform_inv(rois, def bbox_flip(bboxes, img_shape): - """Flip bboxes horizontally + """Flip bboxes horizontally. + Args: - bboxes(Tensor): shape (..., 4*k) - img_shape(Tensor): image shape + bboxes(Tensor or ndarray): Shape (..., 4*k) + img_shape(tuple): Image shape. + + Returns: + Same type as `bboxes`: Flipped bboxes. """ if isinstance(bboxes, torch.Tensor): assert bboxes.shape[-1] % 4 == 0 @@ -84,25 +88,28 @@ def bbox_flip(bboxes, img_shape): return mmcv.bbox_flip(bboxes, img_shape) -def bbox_mapping(bboxes, img_shape, flip): +def bbox_mapping(bboxes, img_shape, scale_factor, flip): """Map bboxes from the original image scale to testing scale""" - new_bboxes = bboxes * img_shape[-1] + new_bboxes = bboxes * scale_factor if flip: new_bboxes = bbox_flip(new_bboxes, img_shape) return new_bboxes -def bbox_mapping_back(bboxes, img_shape, flip): +def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): """Map bboxes from testing scale to original image scale""" new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes - new_bboxes = new_bboxes / img_shape[-1] + new_bboxes = new_bboxes / scale_factor return new_bboxes def bbox2roi(bbox_list): """Convert a list of bboxes to roi format. + Args: - bbox_list (Tensor): a list of bboxes corresponding to a list of images + bbox_list (list[Tensor]): a list of bboxes corresponding to a batch + of images. + Returns: Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] """ @@ -129,11 +136,13 @@ def roi2bbox(rois): def bbox2result(bboxes, labels, num_classes): - """Convert detection results to a list of numpy arrays + """Convert detection results to a list of numpy arrays. + Args: bboxes (Tensor): shape (n, 5) labels (Tensor): shape (n, ) num_classes (int): class number, including background class + Returns: list(ndarray): bbox results of each class """ diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py deleted file mode 100644 index dbdbb970648bcac1ced61096b436ef9966266c1f..0000000000000000000000000000000000000000 --- a/mmdet/core/bbox_ops/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from .geometry import bbox_overlaps -from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, - bbox_sampling, sample_positives, sample_negatives, - sample_proposals) -from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, - bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, - bbox2result) -from .bbox_target import bbox_target - -__all__ = [ - 'bbox_overlaps', 'random_choice', 'bbox_assign', - 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', - 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', - 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', - 'bbox_target', 'sample_proposals' -] diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/evaluation/__init__.py similarity index 60% rename from mmdet/core/eval/__init__.py rename to mmdet/core/evaluation/__init__.py index fe4893a0af68ffff2633fcd702f7cf73cce93e76..026234fce3198fe410143d9e1578cc384005c0d4 100644 --- a/mmdet/core/eval/__init__.py +++ b/mmdet/core/evaluation/__init__.py @@ -1,13 +1,18 @@ from .class_names import (voc_classes, imagenet_det_classes, imagenet_vid_classes, coco_classes, dataset_aliases, get_classes) +from .coco_utils import coco_eval, fast_eval_recall, results2json +from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook, + CocoDistEvalmAPHook) from .mean_ap import average_precision, eval_map, print_map_summary from .recall import (eval_recalls, print_recall_summary, plot_num_recall, plot_iou_recall) __all__ = [ 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', - 'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', + 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', + 'fast_eval_recall', 'results2json', 'DistEvalHook', + 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 'plot_num_recall', 'plot_iou_recall' ] diff --git a/mmdet/core/eval/bbox_overlaps.py b/mmdet/core/evaluation/bbox_overlaps.py similarity index 100% rename from mmdet/core/eval/bbox_overlaps.py rename to mmdet/core/evaluation/bbox_overlaps.py diff --git a/mmdet/core/eval/class_names.py b/mmdet/core/evaluation/class_names.py similarity index 98% rename from mmdet/core/eval/class_names.py rename to mmdet/core/evaluation/class_names.py index b68e9135dca366e93217e0c06959bea990ffda5e..04f806315b7c6ef47419efa61e38d2f7ec3ebd2a 100644 --- a/mmdet/core/eval/class_names.py +++ b/mmdet/core/evaluation/class_names.py @@ -95,7 +95,7 @@ def get_classes(dataset): if mmcv.is_str(dataset): if dataset in alias2name: - labels = eval(alias2name[dataset] + '_labels()') + labels = eval(alias2name[dataset] + '_classes()') else: raise ValueError('Unrecognized dataset: {}'.format(dataset)) else: diff --git a/mmdet/core/evaluation/coco_utils.py b/mmdet/core/evaluation/coco_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e9fdb41649c39e83719ae7c8626d4bb8a58c2c28 --- /dev/null +++ b/mmdet/core/evaluation/coco_utils.py @@ -0,0 +1,149 @@ +import mmcv +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from .recall import eval_recalls + + +def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): + for res_type in result_types: + assert res_type in [ + 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints' + ] + + if mmcv.is_str(coco): + coco = COCO(coco) + assert isinstance(coco, COCO) + + if res_type == 'proposal_fast': + ar = fast_eval_recall(result_file, coco, max_dets) + for i, num in enumerate(max_dets): + print('AR@{}\t= {:.4f}'.format(num, ar[i])) + return + + assert result_file.endswith('.json') + coco_dets = coco.loadRes(result_file) + + img_ids = coco.getImgIds() + for res_type in result_types: + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(coco, coco_dets, iou_type) + cocoEval.params.imgIds = img_ids + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + +def fast_eval_recall(results, + coco, + max_dets, + iou_thrs=np.arange(0.5, 0.96, 0.05)): + if mmcv.is_str(results): + assert results.endswith('.pkl') + results = mmcv.load(results) + elif not isinstance(results, list): + raise TypeError( + 'results must be a list of numpy arrays or a filename, not {}'. + format(type(results))) + + gt_bboxes = [] + img_ids = coco.getImgIds() + for i in range(len(img_ids)): + ann_ids = coco.getAnnIds(imgIds=img_ids[i]) + ann_info = coco.loadAnns(ann_ids) + if len(ann_info) == 0: + gt_bboxes.append(np.zeros((0, 4))) + continue + bboxes = [] + for ann in ann_info: + if ann.get('ignore', False) or ann['iscrowd']: + continue + x1, y1, w, h = ann['bbox'] + bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) + bboxes = np.array(bboxes, dtype=np.float32) + if bboxes.shape[0] == 0: + bboxes = np.zeros((0, 4)) + gt_bboxes.append(bboxes) + + recalls = eval_recalls( + gt_bboxes, results, max_dets, iou_thrs, print_summary=False) + ar = recalls.mean(axis=1) + return ar + + +def xyxy2xywh(bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + + +def proposal2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + +def det2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + +def segm2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + +def results2json(dataset, results, out_file): + if isinstance(results[0], list): + json_results = det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file) diff --git a/mmdet/core/evaluation/eval_hooks.py b/mmdet/core/evaluation/eval_hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..a83b80dbfe7081fa6dbfc13f818339c565076000 --- /dev/null +++ b/mmdet/core/evaluation/eval_hooks.py @@ -0,0 +1,142 @@ +import os +import os.path as osp +import shutil +import time + +import mmcv +import numpy as np +import torch +from mmcv.runner import Hook, obj_from_dict +from mmcv.parallel import scatter, collate +from pycocotools.cocoeval import COCOeval +from torch.utils.data import Dataset + +from .coco_utils import results2json, fast_eval_recall +from mmdet import datasets + + +class DistEvalHook(Hook): + + def __init__(self, dataset, interval=1): + if isinstance(dataset, Dataset): + self.dataset = dataset + elif isinstance(dataset, dict): + self.dataset = obj_from_dict(dataset, datasets, + {'test_mode': True}) + else: + raise TypeError( + 'dataset must be a Dataset object or a dict, not {}'.format( + type(dataset))) + self.interval = interval + self.lock_dir = None + + def _barrier(self, rank, world_size): + """Due to some issues with `torch.distributed.barrier()`, we have to + implement this ugly barrier function. + """ + if rank == 0: + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + while not (osp.exists(tmp)): + time.sleep(1) + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + os.remove(tmp) + else: + tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) + mmcv.dump([], tmp) + while osp.exists(tmp): + time.sleep(1) + + def before_run(self, runner): + self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') + if runner.rank == 0: + if osp.exists(self.lock_dir): + shutil.rmtree(self.lock_dir) + mmcv.mkdir_or_exist(self.lock_dir) + + def after_train_epoch(self, runner): + if not self.every_n_epochs(runner, self.interval): + return + runner.model.eval() + results = [None for _ in range(len(self.dataset))] + prog_bar = mmcv.ProgressBar(len(self.dataset)) + for idx in range(runner.rank, len(self.dataset), runner.world_size): + data = self.dataset[idx] + data_gpu = scatter( + collate([data], samples_per_gpu=1), + [torch.cuda.current_device()])[0] + + # compute output + with torch.no_grad(): + result = runner.model( + **data_gpu, return_loss=False, rescale=True) + results[idx] = result + + batch_size = runner.world_size + for _ in range(batch_size): + prog_bar.update() + + if runner.rank == 0: + print('\n') + self._barrier(runner.rank, runner.world_size) + for i in range(1, runner.world_size): + tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) + tmp_results = mmcv.load(tmp_file) + for idx in range(i, len(results), runner.world_size): + results[idx] = tmp_results[idx] + os.remove(tmp_file) + self.evaluate(runner, results) + else: + tmp_file = osp.join(runner.work_dir, + 'temp_{}.pkl'.format(runner.rank)) + mmcv.dump(results, tmp_file) + self._barrier(runner.rank, runner.world_size) + self._barrier(runner.rank, runner.world_size) + + def evaluate(self): + raise NotImplementedError + + +class CocoDistEvalRecallHook(DistEvalHook): + + def __init__(self, + dataset, + proposal_nums=(100, 300, 1000), + iou_thrs=np.arange(0.5, 0.96, 0.05)): + super(CocoDistEvalRecallHook, self).__init__(dataset) + self.proposal_nums = np.array(proposal_nums, dtype=np.int32) + self.iou_thrs = np.array(iou_thrs, dtype=np.float32) + + def evaluate(self, runner, results): + # the official coco evaluation is too slow, here we use our own + # implementation instead, which may get slightly different results + ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums, + self.iou_thrs) + for i, num in enumerate(self.proposal_nums): + runner.log_buffer.output['AR@{}'.format(num)] = ar[i] + runner.log_buffer.ready = True + + +class CocoDistEvalmAPHook(DistEvalHook): + + def evaluate(self, runner, results): + tmp_file = osp.join(runner.work_dir, 'temp_0.json') + results2json(self.dataset, results, tmp_file) + + res_types = ['bbox', + 'segm'] if runner.model.module.with_mask else ['bbox'] + cocoGt = self.dataset.coco + cocoDt = cocoGt.loadRes(tmp_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + field = '{}_mAP'.format(res_type) + runner.log_buffer.output[field] = cocoEval.stats[0] + runner.log_buffer.ready = True + os.remove(tmp_file) diff --git a/mmdet/core/eval/mean_ap.py b/mmdet/core/evaluation/mean_ap.py similarity index 89% rename from mmdet/core/eval/mean_ap.py rename to mmdet/core/evaluation/mean_ap.py index 9a33f7640409993db3e11cedd587f1cd14c38aa5..5f47c1368af0e3385bc8e49cc5d35b99726ce722 100644 --- a/mmdet/core/eval/mean_ap.py +++ b/mmdet/core/evaluation/mean_ap.py @@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'): """Calculate average precision (for single or multiple scales). Args: - recalls(ndarray): shape (num_scales, num_dets) or (num_dets, ) - precisions(ndarray): shape (num_scales, num_dets) or (num_dets, ) - mode(str): 'area' or '11points', 'area' means calculating the area + recalls (ndarray): shape (num_scales, num_dets) or (num_dets, ) + precisions (ndarray): shape (num_scales, num_dets) or (num_dets, ) + mode (str): 'area' or '11points', 'area' means calculating the area under precision-recall curve, '11points' means calculating the average precision of recalls at [0, 0.1, ..., 1] @@ -60,11 +60,11 @@ def tpfp_imagenet(det_bboxes, """Check if detected bboxes are true positive or false positive. Args: - det_bbox(ndarray): the detected bbox - gt_bboxes(ndarray): ground truth bboxes of this image - gt_ignore(ndarray): indicate if gts are ignored for evaluation or not - default_iou_thr(float): the iou thresholds for medium and large bboxes - area_ranges(list or None): gt bbox area ranges + det_bbox (ndarray): the detected bbox + gt_bboxes (ndarray): ground truth bboxes of this image + gt_ignore (ndarray): indicate if gts are ignored for evaluation or not + default_iou_thr (float): the iou thresholds for medium and large bboxes + area_ranges (list or None): gt bbox area ranges Returns: tuple: two arrays (tp, fp) whose elements are 0 and 1 @@ -115,10 +115,10 @@ def tpfp_imagenet(det_bboxes, max_iou = ious[i, j] matched_gt = j # there are 4 cases for a det bbox: - # 1. this det bbox matches a gt, tp = 1, fp = 0 - # 2. this det bbox matches an ignored gt, tp = 0, fp = 0 - # 3. this det bbox matches no gt and within area range, tp = 0, fp = 1 - # 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0 + # 1. it matches a gt, tp = 1, fp = 0 + # 2. it matches an ignored gt, tp = 0, fp = 0 + # 3. it matches no gt and within area range, tp = 0, fp = 1 + # 4. it matches no gt but is beyond area range, tp = 0, fp = 0 if matched_gt >= 0: gt_covered[matched_gt] = 1 if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): @@ -137,10 +137,10 @@ def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None): """Check if detected bboxes are true positive or false positive. Args: - det_bbox(ndarray): the detected bbox - gt_bboxes(ndarray): ground truth bboxes of this image - gt_ignore(ndarray): indicate if gts are ignored for evaluation or not - iou_thr(float): the iou thresholds + det_bbox (ndarray): the detected bbox + gt_bboxes (ndarray): ground truth bboxes of this image + gt_ignore (ndarray): indicate if gts are ignored for evaluation or not + iou_thr (float): the iou thresholds Returns: tuple: (tp, fp), two arrays whose elements are 0 and 1 @@ -227,15 +227,16 @@ def eval_map(det_results, """Evaluate mAP of a dataset. Args: - det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...] - gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array - gt_labels(list): ground truth labels of each image, a list of K array - gt_ignore(list): gt ignore indicators of each image, a list of K array - scale_ranges(list, optional): [(min1, max1), (min2, max2), ...] - iou_thr(float): IoU threshold - dataset(None or str): dataset name, there are minor differences in + det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...] + gt_bboxes (list): ground truth bboxes of each image, a list of K*4 + array. + gt_labels (list): ground truth labels of each image, a list of K array + gt_ignore (list): gt ignore indicators of each image, a list of K array + scale_ranges (list, optional): [(min1, max1), (min2, max2), ...] + iou_thr (float): IoU threshold + dataset (None or str): dataset name, there are minor differences in metrics for different datsets, e.g. "voc07", "imagenet_det", etc. - print_summary(bool): whether to print the mAP summary + print_summary (bool): whether to print the mAP summary Returns: tuple: (mAP, [dict, dict, ...]) @@ -265,7 +266,8 @@ def eval_map(det_results, area_ranges) for j in range(len(cls_dets)) ] tp, fp = tuple(zip(*tpfp)) - # calculate gt number of each scale, gts ignored or beyond scale are not counted + # calculate gt number of each scale, gts ignored or beyond scale + # are not counted num_gts = np.zeros(num_scales, dtype=int) for j, bbox in enumerate(cls_gts): if area_ranges is None: diff --git a/mmdet/core/eval/recall.py b/mmdet/core/evaluation/recall.py similarity index 100% rename from mmdet/core/eval/recall.py rename to mmdet/core/evaluation/recall.py diff --git a/mmdet/core/loss/__init__.py b/mmdet/core/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..661f0d6426602b5bed7dc3367e1322374922ae1c --- /dev/null +++ b/mmdet/core/loss/__init__.py @@ -0,0 +1,11 @@ +from .losses import (weighted_nll_loss, weighted_cross_entropy, + weighted_binary_cross_entropy, sigmoid_focal_loss, + weighted_sigmoid_focal_loss, mask_cross_entropy, + smooth_l1_loss, weighted_smoothl1, accuracy) + +__all__ = [ + 'weighted_nll_loss', 'weighted_cross_entropy', + 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', + 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', + 'weighted_smoothl1', 'accuracy' +] diff --git a/mmdet/core/loss/losses.py b/mmdet/core/loss/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..14b49f5cb90ccc29240622a0c2a6764ae4c68520 --- /dev/null +++ b/mmdet/core/loss/losses.py @@ -0,0 +1,101 @@ +# TODO merge naive and weighted loss. +import torch +import torch.nn.functional as F + + +def weighted_nll_loss(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.nll_loss(pred, label, reduction='none') + return torch.sum(raw * weight)[None] / avg_factor + + +def weighted_cross_entropy(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.cross_entropy(pred, label, reduction='none') + return torch.sum(raw * weight)[None] / avg_factor + + +def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): + if avg_factor is None: + avg_factor = max(torch.sum(weight > 0).float().item(), 1.) + return F.binary_cross_entropy_with_logits( + pred, label.float(), weight.float(), + reduction='sum')[None] / avg_factor + + +def sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + reduction='elementwise_mean'): + pred_sigmoid = pred.sigmoid() + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + weight = (alpha * target + (1 - alpha) * (1 - target)) * weight + weight = weight * pt.pow(gamma) + return F.binary_cross_entropy_with_logits( + pred, target, weight, reduction=reduction) + + +def weighted_sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + avg_factor=None, + num_classes=80): + if avg_factor is None: + avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 + return sigmoid_focal_loss( + pred, target, weight, gamma=gamma, alpha=alpha, + reduction='sum')[None] / avg_factor + + +def mask_cross_entropy(pred, target, label): + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, reduction='elementwise_mean')[None] + + +def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'): + assert beta > 0 + assert pred.size() == target.size() and target.numel() > 0 + diff = torch.abs(pred - target) + loss = torch.where(diff < beta, 0.5 * diff * diff / beta, + diff - 0.5 * beta) + reduction = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction == 0: + return loss + elif reduction == 1: + return loss.sum() / pred.numel() + elif reduction == 2: + return loss.sum() + + +def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): + if avg_factor is None: + avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 + loss = smooth_l1_loss(pred, target, beta, reduction='none') + return torch.sum(loss * weight)[None] / avg_factor + + +def accuracy(pred, target, topk=1): + if isinstance(topk, int): + topk = (topk, ) + return_single = True + + maxk = max(topk) + _, pred_label = pred.topk(maxk, 1, True, True) + pred_label = pred_label.t() + correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / pred.size(0))) + return res[0] if return_single else res diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/losses/__init__.py deleted file mode 100644 index 3e4447ff0a6c708e9407bc47698a6281e8c81216..0000000000000000000000000000000000000000 --- a/mmdet/core/losses/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from .losses import ( - weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, - sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy, - weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy) - -__all__ = [ - 'weighted_nll_loss', 'weighted_cross_entropy', - 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', - 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', - 'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', - 'accuracy' -] diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py deleted file mode 100644 index 575c91d053650acbde927f49e0c474e5fd325e77..0000000000000000000000000000000000000000 --- a/mmdet/core/losses/losses.py +++ /dev/null @@ -1,110 +0,0 @@ -# TODO merge naive and weighted loss to one function. -import torch -import torch.nn.functional as F - -from ..bbox_ops import bbox_transform_inv, bbox_overlaps - - -def weighted_nll_loss(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) - raw = F.nll_loss(pred, label, size_average=False, reduce=False) - return torch.sum(raw * weight)[None] / ave_factor - - -def weighted_cross_entropy(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) - raw = F.cross_entropy(pred, label, size_average=False, reduce=False) - return torch.sum(raw * weight)[None] / ave_factor - - -def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None): - if ave_factor is None: - ave_factor = max(torch.sum(weight > 0).float().item(), 1.) - return F.binary_cross_entropy_with_logits( - pred, label.float(), weight.float(), - size_average=False)[None] / ave_factor - - -def sigmoid_focal_loss(pred, - target, - weight, - gamma=2.0, - alpha=0.25, - size_average=True): - pred_sigmoid = pred.sigmoid() - pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) - weight = (alpha * target + (1 - alpha) * (1 - target)) * weight - weight = weight * pt.pow(gamma) - return F.binary_cross_entropy_with_logits( - pred, target, weight, size_average=size_average) - - -def weighted_sigmoid_focal_loss(pred, - target, - weight, - gamma=2.0, - alpha=0.25, - ave_factor=None, - num_classes=80): - if ave_factor is None: - ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 - return sigmoid_focal_loss( - pred, target, weight, gamma=gamma, alpha=alpha, - size_average=False)[None] / ave_factor - - -def mask_cross_entropy(pred, target, label): - num_rois = pred.size()[0] - inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) - pred_slice = pred[inds, label].squeeze(1) - return F.binary_cross_entropy_with_logits( - pred_slice, target, size_average=True)[None] - - -def weighted_mask_cross_entropy(pred, target, weight, label): - num_rois = pred.size()[0] - num_samples = torch.sum(weight > 0).float().item() + 1e-6 - assert num_samples >= 1 - inds = torch.arange(0, num_rois).long().cuda() - pred_slice = pred[inds, label].squeeze(1) - return F.binary_cross_entropy_with_logits( - pred_slice, target, weight, size_average=False)[None] / num_samples - - -def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True): - assert beta > 0 - assert pred.size() == target.size() and target.numel() > 0 - diff = torch.abs(pred - target) - loss = torch.where(diff < beta, 0.5 * diff * diff / beta, - diff - 0.5 * beta) - if size_average: - loss /= pred.numel() - if reduce: - loss = loss.sum() - return loss - - -def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None): - if ave_factor is None: - ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 - loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False) - return torch.sum(loss * weight)[None] / ave_factor - - -def accuracy(pred, target, topk=1): - if isinstance(topk, int): - topk = (topk, ) - return_single = True - - maxk = max(topk) - _, pred_label = pred.topk(maxk, 1, True, True) - pred_label = pred_label.t() - correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / pred.size(0))) - return res[0] if return_single else res diff --git a/mmdet/core/mask/__init__.py b/mmdet/core/mask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b703b55d3eb92821c28ef38579fcbebeb1fa12cf --- /dev/null +++ b/mmdet/core/mask/__init__.py @@ -0,0 +1,4 @@ +from .utils import split_combined_polys +from .mask_target import mask_target + +__all__ = ['split_combined_polys', 'mask_target'] diff --git a/mmdet/core/mask/mask_target.py b/mmdet/core/mask/mask_target.py new file mode 100644 index 0000000000000000000000000000000000000000..be93dfc28934052a7497b3c42aa3e9dd1b3b3fe6 --- /dev/null +++ b/mmdet/core/mask/mask_target.py @@ -0,0 +1,36 @@ +import torch +import numpy as np +import mmcv + + +def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, + cfg): + cfg_list = [cfg for _ in range(len(pos_proposals_list))] + mask_targets = map(mask_target_single, pos_proposals_list, + pos_assigned_gt_inds_list, gt_masks_list, cfg_list) + mask_targets = torch.cat(list(mask_targets)) + return mask_targets + + +def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): + mask_size = cfg.mask_size + num_pos = pos_proposals.size(0) + mask_targets = [] + if num_pos > 0: + proposals_np = pos_proposals.cpu().numpy() + pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() + for i in range(num_pos): + gt_mask = gt_masks[pos_assigned_gt_inds[i]] + bbox = proposals_np[i, :].astype(np.int32) + x1, y1, x2, y2 = bbox + w = np.maximum(x2 - x1 + 1, 1) + h = np.maximum(y2 - y1 + 1, 1) + # mask is uint8 both before and after resizing + target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], + (mask_size, mask_size)) + mask_targets.append(target) + mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( + pos_proposals.device) + else: + mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) + return mask_targets diff --git a/mmdet/core/mask/utils.py b/mmdet/core/mask/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a68312b179e56cb0e93e967ecfeeb602d48ca866 --- /dev/null +++ b/mmdet/core/mask/utils.py @@ -0,0 +1,30 @@ +import mmcv + + +def split_combined_polys(polys, poly_lens, polys_per_mask): + """Split the combined 1-D polys into masks. + + A mask is represented as a list of polys, and a poly is represented as + a 1-D array. In dataset, all masks are concatenated into a single 1-D + tensor. Here we need to split the tensor into original representations. + + Args: + polys (list): a list (length = image num) of 1-D tensors + poly_lens (list): a list (length = image num) of poly length + polys_per_mask (list): a list (length = image num) of poly number + of each mask + + Returns: + list: a list (length = image num) of list (length = mask num) of + list (length = poly num) of numpy array + """ + mask_polys_list = [] + for img_id in range(len(polys)): + polys_single = polys[img_id] + polys_lens_single = poly_lens[img_id].tolist() + polys_per_mask_single = polys_per_mask[img_id].tolist() + + split_polys = mmcv.slice_list(polys_single, polys_lens_single) + mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) + mask_polys_list.append(mask_polys) + return mask_polys_list diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py deleted file mode 100644 index 4669ba1f9102cbcabe20c48ea193408c1e12e4aa..0000000000000000000000000000000000000000 --- a/mmdet/core/mask_ops/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .segms import (flip_segms, polys_to_mask, mask_to_bbox, - polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, - rle_mask_nms, rle_masks_to_boxes) -from .utils import split_combined_gt_polys -from .mask_target import mask_target - -__all__ = [ - 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', - 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_gt_polys', 'mask_target' -] diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py deleted file mode 100644 index 3fb65e3587473b60c4fd25b075072b9a3bb4670c..0000000000000000000000000000000000000000 --- a/mmdet/core/mask_ops/mask_target.py +++ /dev/null @@ -1,35 +0,0 @@ -import torch -import numpy as np - -from .segms import polys_to_mask_wrt_box - - -def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list, - img_meta, cfg): - cfg_list = [cfg for _ in range(len(pos_proposals_list))] - img_metas = [img_meta for _ in range(len(pos_proposals_list))] - mask_targets = map(mask_target_single, pos_proposals_list, - pos_assigned_gt_inds_list, gt_polys_list, img_metas, - cfg_list) - mask_targets = torch.cat(tuple(mask_targets), dim=0) - return mask_targets - - -def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys, - img_meta, cfg): - - mask_size = cfg.mask_size - num_pos = pos_proposals.size(0) - mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) - if num_pos > 0: - pos_proposals = pos_proposals.cpu().numpy() - pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() - scale_factor = img_meta['scale_factor'][0].cpu().numpy() - for i in range(num_pos): - bbox = pos_proposals[i, :] / scale_factor - polys = gt_polys[pos_assigned_gt_inds[i]] - mask = polys_to_mask_wrt_box(polys, bbox, mask_size) - mask = np.array(mask > 0, dtype=np.float32) - mask_targets[i, ...] = torch.from_numpy(mask).to( - mask_targets.device) - return mask_targets diff --git a/mmdet/core/mask_ops/segms.py b/mmdet/core/mask_ops/segms.py deleted file mode 100644 index b2ae6b69a1ff206b085799fa82527e1d17be0a4f..0000000000000000000000000000000000000000 --- a/mmdet/core/mask_ops/segms.py +++ /dev/null @@ -1,271 +0,0 @@ -# This file is copied from Detectron. - -# Copyright (c) 2017-present, Facebook, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################## -"""Functions for interacting with segmentation masks in the COCO format. -The following terms are used in this module - mask: a binary mask encoded as a 2D numpy array - segm: a segmentation mask in one of the two COCO formats (polygon or RLE) - polygon: COCO's polygon format - RLE: COCO's run length encoding format -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import pycocotools.mask as mask_util - - -def flip_segms(segms, height, width): - """Left/right flip each mask in a list of masks.""" - - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - # Magic RLE format handling painfully discovered by looking at the - # COCO API showAnns function. - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1, :] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - flipped_segms = [] - for segm in segms: - if type(segm) == list: - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - assert type(segm) == dict - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms - - -def polys_to_mask(polygons, height, width): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed inside a height x width image. The resulting - mask is therefore of shape (height, width). - """ - rle = mask_util.frPyObjects(polygons, height, width) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def mask_to_bbox(mask): - """Compute the tight bounding box of a binary mask.""" - xs = np.where(np.sum(mask, axis=0) > 0)[0] - ys = np.where(np.sum(mask, axis=1) > 0)[0] - - if len(xs) == 0 or len(ys) == 0: - return None - - x0 = xs[0] - x1 = xs[-1] - y0 = ys[0] - y1 = ys[-1] - return np.array((x0, y0, x1, y1), dtype=np.float32) - - -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - rle = mask_util.frPyObjects(polygons_norm, M, M) - mask = np.array(mask_util.decode(rle), dtype=np.float32) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=2) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for i in range(len(polys)): - poly = polys[i] - x0 = min(min(p[::2]) for p in poly) - x1 = max(max(p[::2]) for p in poly) - y0 = min(min(p[1::2]) for p in poly) - y1 = max(max(p[1::2]) for p in poly) - boxes_from_polys[i, :] = [x0, y0, x1, y1] - - return boxes_from_polys - - -def rle_mask_voting(top_masks, - all_masks, - all_dets, - iou_thresh, - binarize_thresh, - method='AVG'): - """Returns new masks (in correspondence with `top_masks`) by combining - multiple overlapping masks coming from the pool of `all_masks`. Two methods - for combining masks are supported: 'AVG' uses a weighted average of - overlapping mask pixels; 'UNION' takes the union of all mask pixels. - """ - if len(top_masks) == 0: - return - - all_not_crowd = [False] * len(all_masks) - top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) - decoded_all_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks - ] - decoded_top_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks - ] - all_boxes = all_dets[:, :4].astype(np.int32) - all_scores = all_dets[:, 4] - - # Fill box support with weights - mask_shape = decoded_all_masks[0].shape - mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) - for k in range(len(all_masks)): - ref_box = all_boxes[k] - x_0 = max(ref_box[0], 0) - x_1 = min(ref_box[2] + 1, mask_shape[1]) - y_0 = max(ref_box[1], 0) - y_1 = min(ref_box[3] + 1, mask_shape[0]) - mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] - mask_weights = np.maximum(mask_weights, 1e-5) - - top_segms_out = [] - for k in range(len(top_masks)): - # Corner case of empty mask - if decoded_top_masks[k].sum() == 0: - top_segms_out.append(top_masks[k]) - continue - - inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] - # Only matches itself - if len(inds_to_vote) == 1: - top_segms_out.append(top_masks[k]) - continue - - masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] - if method == 'AVG': - ws = mask_weights[inds_to_vote] - soft_mask = np.average(masks_to_vote, axis=0, weights=ws) - mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) - elif method == 'UNION': - # Any pixel that's on joins the mask - soft_mask = np.sum(masks_to_vote, axis=0) - mask = np.array(soft_mask > 1e-5, dtype=np.uint8) - else: - raise NotImplementedError('Method {} is unknown'.format(method)) - rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] - top_segms_out.append(rle) - - return top_segms_out - - -def rle_mask_nms(masks, dets, thresh, mode='IOU'): - """Performs greedy non-maximum suppression based on an overlap measurement - between masks. The type of measurement is determined by `mode` and can be - either 'IOU' (standard intersection over union) or 'IOMA' (intersection over - mininum area). - """ - if len(masks) == 0: - return [] - if len(masks) == 1: - return [0] - - if mode == 'IOU': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) - all_not_crowds = [False] * len(masks) - ious = mask_util.iou(masks, masks, all_not_crowds) - elif mode == 'IOMA': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) - all_crowds = [True] * len(masks) - # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - ious = mask_util.iou(masks, masks, all_crowds) - # ... = max(area(intersect(m1, m2)) / area(m2), - # area(intersect(m2, m1)) / area(m1)) - ious = np.maximum(ious, ious.transpose()) - elif mode == 'CONTAINMENT': - # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) - # Which measures how much m2 is contained inside m1 - all_crowds = [True] * len(masks) - ious = mask_util.iou(masks, masks, all_crowds) - else: - raise NotImplementedError('Mode {} is unknown'.format(mode)) - - scores = dets[:, 4] - order = np.argsort(-scores) - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = ious[i, order[1:]] - inds_to_keep = np.where(ovr <= thresh)[0] - order = order[inds_to_keep + 1] - - return keep - - -def rle_masks_to_boxes(masks): - """Computes the bounding box of each mask in a list of RLE encoded masks.""" - if len(masks) == 0: - return [] - - decoded_masks = [ - np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks - ] - - def get_bounds(flat_mask): - inds = np.where(flat_mask > 0)[0] - return inds.min(), inds.max() - - boxes = np.zeros((len(decoded_masks), 4)) - keep = [True] * len(decoded_masks) - for i, mask in enumerate(decoded_masks): - if mask.sum() == 0: - keep[i] = False - continue - flat_mask = mask.sum(axis=0) - x0, x1 = get_bounds(flat_mask) - flat_mask = mask.sum(axis=1) - y0, y1 = get_bounds(flat_mask) - boxes[i, :] = (x0, y0, x1, y1) - - return boxes, np.where(keep)[0] diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask_ops/utils.py deleted file mode 100644 index 2802430007e7b239bcb18ba20a26c0609c62245c..0000000000000000000000000000000000000000 --- a/mmdet/core/mask_ops/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import cvbase as cvb -import numpy as np -import pycocotools.mask as mask_utils - -import mmcv - - -def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): - """Split the combined 1-D polys into masks. - - A mask is represented as a list of polys, and a poly is represented as - a 1-D array. In dataset, all masks are concatenated into a single 1-D - tensor. Here we need to split the tensor into original representations. - - Args: - gt_polys (list): a list (length = image num) of 1-D tensors - gt_poly_lens (list): a list (length = image num) of poly length - num_polys_per_mask (list): a list (length = image num) of poly number - of each mask - - Returns: - list: a list (length = image num) of list (length = mask num) of - list (length = poly num) of numpy array - """ - mask_polys_list = [] - for img_id in range(len(gt_polys)): - gt_polys_single = gt_polys[img_id].cpu().numpy() - gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist() - num_polys_per_mask_single = num_polys_per_mask[ - img_id].cpu().numpy().tolist() - - split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single) - mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single) - mask_polys_list.append(mask_polys) - return mask_polys_list diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 35dfce24f91b4a6260476a3f77b67471c88e4bc7..00f65b049ccf2b00a0fee73cc64ac257415425ea 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -1,9 +1,9 @@ import torch -from mmdet.ops import nms import numpy as np -from ..bbox_ops import bbox_mapping_back +from mmdet.ops import nms +from ..bbox import bbox_mapping_back def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): @@ -21,11 +21,12 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): """ recovered_proposals = [] for proposals, img_info in zip(aug_proposals, img_metas): - shape_scale = img_info['shape_scale'][0] - flip = img_info['flip'][0] + img_shape = img_info['img_shape'] + scale_factor = img_info['scale_factor'] + flip = img_info['flip'] _proposals = proposals.clone() - _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale, - flip) + _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, + scale_factor, flip) recovered_proposals.append(_proposals) aug_proposals = torch.cat(recovered_proposals, dim=0) nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr, @@ -53,9 +54,10 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): """ recovered_bboxes = [] for bboxes, img_info in zip(aug_bboxes, img_metas): - shape_scale = img_info['shape_scale'][0] - flip = img_info['flip'][0] - bboxes = bbox_mapping_back(bboxes, shape_scale, flip) + img_shape = img_info[0]['img_shape'] + scale_factor = img_info[0]['scale_factor'] + flip = img_info[0]['flip'] + bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) recovered_bboxes.append(bboxes) bboxes = torch.stack(recovered_bboxes).mean(dim=0) if aug_scores is None: @@ -73,7 +75,7 @@ def merge_aug_scores(aug_scores): return np.mean(aug_scores, axis=0) -def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): +def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): """Merge augmented mask prediction. Args: @@ -85,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): tuple: (bboxes, scores) """ recovered_masks = [ - mask if not img_info['flip'][0] else mask[..., ::-1] + mask if not img_info[0]['flip'] else mask[..., ::-1] for mask, img_info in zip(aug_masks, img_metas) ] if weights is None: diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/rpn_ops/__init__.py deleted file mode 100644 index 4d5f9244dde2b244bbe42d54640e8a648277c506..0000000000000000000000000000000000000000 --- a/mmdet/core/rpn_ops/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .anchor_generator import * -from .anchor_target import * diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py deleted file mode 100644 index a6bba8ed221db022fb95590c6b10a56c8b6d4553..0000000000000000000000000000000000000000 --- a/mmdet/core/rpn_ops/anchor_target.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -import numpy as np -from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling) - - -def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, - img_shapes, target_means, target_stds, cfg): - """Compute anchor regression and classification targets - - Args: - anchor_list(list): anchors of each feature map level - featuremap_sizes(list): feature map sizes - gt_bboxes_list(list): ground truth bbox of images in a mini-batch - img_shapes(list): shape of each image in a mini-batch - cfg(dict): configs - - Returns: - tuple - """ - if len(featmap_sizes) == len(anchor_list): - all_anchors = torch.cat(anchor_list, 0) - anchor_nums = [anchors.size(0) for anchors in anchor_list] - use_isomerism_anchors = False - elif len(img_shapes) == len(anchor_list): - # using different anchors for different images - all_anchors_list = [ - torch.cat(anchor_list[img_id], 0) - for img_id in range(len(img_shapes)) - ] - anchor_nums = [anchors.size(0) for anchors in anchor_list[0]] - use_isomerism_anchors = True - else: - raise ValueError('length of anchor_list should be equal to number of ' - 'feature lvls or number of images in a batch') - all_labels = [] - all_label_weights = [] - all_bbox_targets = [] - all_bbox_weights = [] - num_total_sampled = 0 - for img_id in range(len(img_shapes)): - if isinstance(valid_flag_list[img_id], list): - valid_flags = torch.cat(valid_flag_list[img_id], 0) - else: - valid_flags = valid_flag_list[img_id] - if use_isomerism_anchors: - all_anchors = all_anchors_list[img_id] - inside_flags = anchor_inside_flags(all_anchors, valid_flags, - img_shapes[img_id][:2], - cfg.allowed_border) - if not inside_flags.any(): - return None - gt_bboxes = gt_bboxes_list[img_id] - anchor_targets = anchor_target_single(all_anchors, inside_flags, - gt_bboxes, target_means, - target_stds, cfg) - (labels, label_weights, bbox_targets, bbox_weights, pos_inds, - neg_inds) = anchor_targets - all_labels.append(labels) - all_label_weights.append(label_weights) - all_bbox_targets.append(bbox_targets) - all_bbox_weights.append(bbox_weights) - num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1) - all_labels = torch.stack(all_labels, 0) - all_label_weights = torch.stack(all_label_weights, 0) - all_bbox_targets = torch.stack(all_bbox_targets, 0) - all_bbox_weights = torch.stack(all_bbox_weights, 0) - # split into different feature levels - labels_list = [] - label_weights_list = [] - bbox_targets_list = [] - bbox_weights_list = [] - start = 0 - for anchor_num in anchor_nums: - end = start + anchor_num - labels_list.append(all_labels[:, start:end].squeeze(0)) - label_weights_list.append(all_label_weights[:, start:end].squeeze(0)) - bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0)) - bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0)) - start = end - return (labels_list, label_weights_list, bbox_targets_list, - bbox_weights_list, num_total_sampled) - - -def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, - target_stds, cfg): - num_total_anchors = all_anchors.size(0) - anchors = all_anchors[inside_flags, :] - assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( - anchors, - gt_bboxes, - pos_iou_thr=cfg.pos_iou_thr, - neg_iou_thr=cfg.neg_iou_thr, - min_pos_iou=cfg.min_pos_iou) - pos_inds, neg_inds = bbox_sampling(assigned_gt_inds, cfg.anchor_batch_size, - cfg.pos_fraction, cfg.neg_pos_ub, - cfg.pos_balance_sampling, max_overlaps, - cfg.neg_balance_thr) - - bbox_targets = torch.zeros_like(anchors) - bbox_weights = torch.zeros_like(anchors) - labels = torch.zeros_like(assigned_gt_inds) - label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float) - - if len(pos_inds) > 0: - pos_inds = unique(pos_inds) - pos_anchors = anchors[pos_inds, :] - pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] - pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, - target_means, target_stds) - bbox_targets[pos_inds, :] = pos_bbox_targets - bbox_weights[pos_inds, :] = 1.0 - labels[pos_inds] = 1 - if cfg.pos_weight <= 0: - label_weights[pos_inds] = 1.0 - else: - label_weights[pos_inds] = cfg.pos_weight - if len(neg_inds) > 0: - neg_inds = unique(neg_inds) - label_weights[neg_inds] = 1.0 - - # map up to original set of anchors - labels = unmap(labels, num_total_anchors, inside_flags) - label_weights = unmap(label_weights, num_total_anchors, inside_flags) - bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) - bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) - - return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, - neg_inds) - -def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): - img_h, img_w = img_shape.float() - if allowed_border >= 0: - inside_flags = valid_flags & \ - (all_anchors[:, 0] >= -allowed_border) & \ - (all_anchors[:, 1] >= -allowed_border) & \ - (all_anchors[:, 2] < img_w + allowed_border) & \ - (all_anchors[:, 3] < img_h + allowed_border) - else: - inside_flags = valid_flags - return inside_flags - -def unique(tensor): - if tensor.is_cuda: - u_tensor = np.unique(tensor.cpu().numpy()) - return tensor.new_tensor(u_tensor) - else: - return torch.unique(tensor) - -def unmap(data, count, inds, fill=0): - """ Unmap a subset of item (data) back to the original set of items (of - size count) """ - if data.dim() == 1: - ret = data.new_full((count, ), fill) - ret[inds] = data - else: - new_size = (count, ) + data.size()[1:] - ret = data.new_full(new_size, fill) - ret[inds, :] = data - return ret diff --git a/mmdet/core/test_engine.py b/mmdet/core/test_engine.py deleted file mode 100644 index 4825beda640c443b5d8aab0daf5c30838be4364b..0000000000000000000000000000000000000000 --- a/mmdet/core/test_engine.py +++ /dev/null @@ -1,14 +0,0 @@ -from mmdet.datasets import collate -from mmdet.nn.parallel import scatter - -__all__ = ['_data_func'] - -def _data_func(data, gpu_id): - imgs, img_metas = tuple( - scatter(collate([data], samples_per_gpu=1), [gpu_id])[0]) - return dict( - img=imgs, - img_meta=img_metas, - return_loss=False, - return_bboxes=True, - rescale=True) diff --git a/mmdet/core/train_engine.py b/mmdet/core/train_engine.py deleted file mode 100644 index cc745faad87cb2a97272934902822666be55d71f..0000000000000000000000000000000000000000 --- a/mmdet/core/train_engine.py +++ /dev/null @@ -1,40 +0,0 @@ -import numpy as np -import torch -from collections import OrderedDict -from mmdet.nn.parallel import scatter - - -def parse_losses(losses): - log_vars = OrderedDict() - for loss_key, loss_value in losses.items(): - if isinstance(loss_value, dict): - for _key, _value in loss_value.items(): - if isinstance(_value, list): - _value = sum([_loss.mean() for _loss in _value]) - else: - _value = _value.mean() - log_vars[_keys] = _value - elif isinstance(loss_value, list): - log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value) - else: - log_vars[loss_key] = loss_value.mean() - - loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) - log_vars['loss'] = loss - for _key, _value in log_vars.items(): - log_vars[_key] = _value.item() - - return loss, log_vars - - -def batch_processor(model, data, train_mode, args=None): - data = scatter(data, [torch.cuda.current_device()])[0] - losses = model(**data) - loss, log_vars = parse_losses(losses) - - outputs = dict( - loss=loss / args.world_size, - log_vars=log_vars, - num_samples=len(data['img'].data)) - - return outputs diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py index 2b6e79d62e60b5e1efaac985e039b36840f86397..981dab7fb0db3841a3bea05a1c96bdd91cfff4ca 100644 --- a/mmdet/core/utils/__init__.py +++ b/mmdet/core/utils/__init__.py @@ -1,3 +1,7 @@ -from .dist_utils import * -from .hooks import * -from .misc import * +from .dist_utils import init_dist, allreduce_grads, DistOptimizerHook +from .misc import tensor2imgs, unmap, multi_apply + +__all__ = [ + 'init_dist', 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', + 'unmap', 'multi_apply' +] diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py index 47279c7bf8fd3b0ed66c3099f465b0130c864a23..c7748db661f4467fac0a2081350a0c06264fc593 100644 --- a/mmdet/core/utils/dist_utils.py +++ b/mmdet/core/utils/dist_utils.py @@ -1,60 +1,89 @@ import os +from collections import OrderedDict + import torch import torch.multiprocessing as mp import torch.distributed as dist -from torch.nn.utils import clip_grad -from mmcv.torchpack import Hook, OptimizerStepperHook - -__all__ = [ - 'init_dist', 'average_gradients', 'broadcast_params', - 'DistOptimizerStepperHook', 'DistSamplerSeedHook' -] +from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, + _take_tensors) +from mmcv.runner import OptimizerHook -def init_dist(world_size, - rank, - backend='gloo', - master_ip='127.0.0.1', - port=29500): +def init_dist(launcher, backend='nccl', **kwargs): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError('Invalid launcher type: {}'.format(launcher)) + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) - os.environ['MASTER_ADDR'] = master_ip - os.environ['MASTER_PORT'] = str(port) - if backend == 'nccl': - dist.init_process_group(backend='nccl') + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + raise NotImplementedError + + +def _init_dist_slurm(backend, **kwargs): + raise NotImplementedError + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) else: - dist.init_process_group( - backend='gloo', rank=rank, world_size=world_size) + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) -def average_gradients(model): - for param in model.parameters(): - if param.requires_grad and not (param.grad is None): - dist.all_reduce(param.grad.data) +def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): + grads = [ + param.grad.data for param in model.parameters() + if param.requires_grad and param.grad is not None + ] + world_size = dist.get_world_size() + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) -def broadcast_params(model): - for p in model.state_dict().values(): - dist.broadcast(p, 0) +class DistOptimizerHook(OptimizerHook): -class DistOptimizerStepperHook(OptimizerStepperHook): + def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb def after_train_iter(self, runner): runner.optimizer.zero_grad() runner.outputs['loss'].backward() - average_gradients(runner.model) - if self.grad_clip: - clip_grad.clip_grad_norm_( - filter(lambda p: p.requires_grad, runner.model.parameters()), - max_norm=self.max_norm, - norm_type=self.norm_type) + allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) + if self.grad_clip is not None: + self.clip_grads(runner.model.parameters()) runner.optimizer.step() - - -class DistSamplerSeedHook(Hook): - - def before_epoch(self, runner): - runner.data_loader.sampler.set_epoch(runner.epoch) diff --git a/mmdet/core/utils/hooks.py b/mmdet/core/utils/hooks.py deleted file mode 100644 index f97e1fb29061ad5f07fa06907fbec72ede7a3bf3..0000000000000000000000000000000000000000 --- a/mmdet/core/utils/hooks.py +++ /dev/null @@ -1,245 +0,0 @@ -import os -import os.path as osp -import shutil -import time - -import mmcv -import numpy as np -import torch -from mmcv.torchpack import Hook -from mmdet.datasets import collate -from mmdet.nn.parallel import scatter -from pycocotools.cocoeval import COCOeval - -from ..eval import eval_recalls - -__all__ = [ - 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', - 'CocoDistEvalmAPHook' -] - - -class EmptyCacheHook(Hook): - - def before_epoch(self, runner): - torch.cuda.empty_cache() - - def after_epoch(self, runner): - torch.cuda.empty_cache() - - -class DistEvalHook(Hook): - - def __init__(self, dataset, interval=1): - self.dataset = dataset - self.interval = interval - self.lock_dir = None - - def _barrier(self, rank, world_size): - """Due to some issues with `torch.distributed.barrier()`, we have to - implement this ugly barrier function. - """ - if rank == 0: - for i in range(1, world_size): - tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) - while not (osp.exists(tmp)): - time.sleep(1) - for i in range(1, world_size): - tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) - os.remove(tmp) - else: - tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) - mmcv.dump([], tmp) - while osp.exists(tmp): - time.sleep(1) - - def before_run(self, runner): - self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') - if runner.rank == 0: - if osp.exists(self.lock_dir): - shutil.rmtree(self.lock_dir) - mmcv.mkdir_or_exist(self.lock_dir) - - def after_train_epoch(self, runner): - if not self.every_n_epochs(runner, self.interval): - return - runner.model.eval() - results = [None for _ in range(len(self.dataset))] - prog_bar = mmcv.ProgressBar(len(self.dataset)) - for idx in range(runner.rank, len(self.dataset), runner.world_size): - data = self.dataset[idx] - device_id = torch.cuda.current_device() - imgs_data = tuple( - scatter(collate([data], samples_per_gpu=1), [device_id])[0]) - - # compute output - with torch.no_grad(): - result = runner.model( - *imgs_data, - return_loss=False, - return_bboxes=True, - rescale=True) - results[idx] = result - - batch_size = runner.world_size - for _ in range(batch_size): - prog_bar.update() - - if runner.rank == 0: - print('\n') - self._barrier(runner.rank, runner.world_size) - for i in range(1, runner.world_size): - tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) - tmp_results = mmcv.load(tmp_file) - for idx in range(i, len(results), runner.world_size): - results[idx] = tmp_results[idx] - os.remove(tmp_file) - self.evaluate(runner, results) - else: - tmp_file = osp.join(runner.work_dir, - 'temp_{}.pkl'.format(runner.rank)) - mmcv.dump(results, tmp_file) - self._barrier(runner.rank, runner.world_size) - self._barrier(runner.rank, runner.world_size) - - def evaluate(self): - raise NotImplementedError - - -class CocoEvalMixin(object): - - def _xyxy2xywh(self, bbox): - _bbox = bbox.tolist() - return [ - _bbox[0], - _bbox[1], - _bbox[2] - _bbox[0] + 1, - _bbox[3] - _bbox[1] + 1, - ] - - def det2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - result = results[idx] - for label in range(len(result)): - bboxes = result[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - json_results.append(data) - return json_results - - def segm2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - det, seg = results[idx] - for label in range(len(det)): - bboxes = det[label] - segms = seg[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - segms[i]['counts'] = segms[i]['counts'].decode() - data['segmentation'] = segms[i] - json_results.append(data) - return json_results - - def proposal2json(self, dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - bboxes = results[idx] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = self._xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = 1 - json_results.append(data) - return json_results - - def results2json(self, dataset, results, out_file): - if isinstance(results[0], list): - json_results = self.det2json(dataset, results) - elif isinstance(results[0], tuple): - json_results = self.segm2json(dataset, results) - elif isinstance(results[0], np.ndarray): - json_results = self.proposal2json(dataset, results) - else: - raise TypeError('invalid type of results') - mmcv.dump(json_results, out_file, file_format='json') - - -class DistEvalRecallHook(DistEvalHook): - - def __init__(self, - dataset, - proposal_nums=(100, 300, 1000), - iou_thrs=np.arange(0.5, 0.96, 0.05)): - super(DistEvalRecallHook, self).__init__(dataset) - self.proposal_nums = np.array(proposal_nums, dtype=np.int32) - self.iou_thrs = np.array(iou_thrs, dtype=np.float32) - - def evaluate(self, runner, results): - # official coco evaluation is too slow, here we use our own - # implementation, which may get slightly different results - gt_bboxes = [] - for i in range(len(self.dataset)): - img_id = self.dataset.img_ids[i] - ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) - ann_info = self.dataset.coco.loadAnns(ann_ids) - if len(ann_info) == 0: - gt_bboxes.append(np.zeros((0, 4))) - continue - bboxes = [] - for ann in ann_info: - if ann.get('ignore', False) or ann['iscrowd']: - continue - x1, y1, w, h = ann['bbox'] - bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) - bboxes = np.array(bboxes, dtype=np.float32) - if bboxes.shape[0] == 0: - bboxes = np.zeros((0, 4)) - gt_bboxes.append(bboxes) - - recalls = eval_recalls( - gt_bboxes, - results, - self.proposal_nums, - self.iou_thrs, - print_summary=False) - ar = recalls.mean(axis=1) - for i, num in enumerate(self.proposal_nums): - runner.log_buffer.output['AR@{}'.format(num)] = ar[i] - runner.log_buffer.ready = True - - -class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): - - def evaluate(self, runner, results): - tmp_file = osp.join(runner.work_dir, 'temp_0.json') - self.results2json(self.dataset, results, tmp_file) - - res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox'] - cocoGt = self.dataset.coco - cocoDt = cocoGt.loadRes(tmp_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = res_type - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - field = '{}_mAP'.format(res_type) - runner.log_buffer.output[field] = cocoEval.stats[0] - runner.log_buffer.ready = True - os.remove(tmp_file) diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py index 0f9c05e4577f23125fad0f0714a8f1089e82dbee..262f168e646089a535a9ad393947d57198873d93 100644 --- a/mmdet/core/utils/misc.py +++ b/mmdet/core/utils/misc.py @@ -1,36 +1,27 @@ -import subprocess +from functools import partial import mmcv import numpy as np -import torch +from six.moves import map, zip -__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json'] - -def tensor2imgs(tensor, - color_order='RGB', - color_mean=(0.485, 0.456, 0.406), - color_std=(0.229, 0.224, 0.225)): - assert color_order in ['RGB', 'BGR'] - img_per_gpu = tensor.size(0) - color_mean = np.array(color_mean, dtype=np.float32) - color_std = np.array(color_std, dtype=np.float32) +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) imgs = [] - for img_id in range(img_per_gpu): + for img_id in range(num_imgs): img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) - if color_order == 'RGB': - img = mmcv.rgb2bgr(img) - img = img * color_std + color_mean + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) imgs.append(np.ascontiguousarray(img)) return imgs -def unique(tensor): - if tensor.is_cuda: - u_tensor = np.unique(tensor.cpu().numpy()) - return tensor.new_tensor(u_tensor) - else: - return torch.unique(tensor) +def multi_apply(func, *args, **kwargs): + pfunc = partial(func, **kwargs) if kwargs else func + map_results = map(pfunc, *args) + return tuple(map(list, zip(*map_results))) def unmap(data, count, inds, fill=0): @@ -44,75 +35,3 @@ def unmap(data, count, inds, fill=0): ret = data.new_full(new_size, fill) ret[inds, :] = data return ret - -def xyxy2xywh(bbox): - _bbox = bbox.tolist() - return [ - _bbox[0], - _bbox[1], - _bbox[2] - _bbox[0] + 1, - _bbox[3] - _bbox[1] + 1, - ] - -def det2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - result = results[idx] - for label in range(len(result)): - bboxes = result[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - json_results.append(data) - return json_results - - -def segm2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - det, seg = results[idx] - for label in range(len(det)): - bboxes = det[label] - segms = seg[label] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = dataset.cat_ids[label] - segms[i]['counts'] = segms[i]['counts'].decode() - data['segmentation'] = segms[i] - json_results.append(data) - return json_results - - -def proposal2json(dataset, results): - json_results = [] - for idx in range(len(dataset)): - img_id = dataset.img_ids[idx] - bboxes = results[idx] - for i in range(bboxes.shape[0]): - data = dict() - data['image_id'] = img_id - data['bbox'] = xyxy2xywh(bboxes[i]) - data['score'] = float(bboxes[i][4]) - data['category_id'] = 1 - json_results.append(data) - return json_results - - -def results2json(dataset, results, out_file): - if isinstance(results[0], list): - json_results = det2json(dataset, results) - elif isinstance(results[0], tuple): - json_results = segm2json(dataset, results) - elif isinstance(results[0], np.ndarray): - json_results = proposal2json(dataset, results) - else: - raise TypeError('invalid type of results') - mmcv.dump(json_results, out_file) diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index 6045c2b0923993243a999f0008b79443126d0e26..425ea72535a144544f44ebe8b5d63dd31336a54c 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -1,4 +1,8 @@ from .coco import CocoDataset -from .collate import * -from .sampler import * -from .transforms import * +from .loader import GroupSampler, DistributedGroupSampler, build_dataloader +from .utils import to_tensor, random_scale, show_ann + +__all__ = [ + 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler', + 'build_dataloader', 'to_tensor', 'random_scale', 'show_ann' +] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index a7eedca6a2cbab92e069415513def5ab363dc824..3cd0a6d5ca20dbeba11f96135b570635348c74d9 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -2,75 +2,17 @@ import os.path as osp import mmcv import numpy as np +from mmcv.parallel import DataContainer as DC from pycocotools.coco import COCO from torch.utils.data import Dataset -from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, +from .transforms import (ImageTransform, BboxTransform, MaskTransform, Numpy2Tensor) -from .utils import show_ann, random_scale -from .utils import DataContainer as DC - - -def parse_ann_info(ann_info, cat2label, with_mask=True): - """Parse bbox and mask annotation. - - Args: - ann_info (list[dict]): Annotation info of an image. - cat2label (dict): The mapping from category ids to labels. - with_mask (bool): Whether to parse mask annotations. - - Returns: - tuple: gt_bboxes, gt_labels and gt_mask_info - """ - gt_bboxes = [] - gt_labels = [] - gt_bboxes_ignore = [] - # each mask consists of one or several polys, each poly is a list of float. - if with_mask: - gt_mask_polys = [] - gt_poly_lens = [] - for i, ann in enumerate(ann_info): - if ann.get('ignore', False): - continue - x1, y1, w, h = ann['bbox'] - if ann['area'] <= 0 or w < 1 or h < 1: - continue - bbox = [x1, y1, x1 + w - 1, y1 + h - 1] - if ann['iscrowd']: - gt_bboxes_ignore.append(bbox) - else: - gt_bboxes.append(bbox) - gt_labels.append(cat2label[ann['category_id']]) - if with_mask: - # Note polys are not resized - mask_polys = [ - p for p in ann['segmentation'] if len(p) >= 6 - ] # valid polygons have >= 3 points (6 coordinates) - poly_lens = [len(p) for p in mask_polys] - gt_mask_polys.append(mask_polys) - gt_poly_lens.extend(poly_lens) - if gt_bboxes: - gt_bboxes = np.array(gt_bboxes, dtype=np.float32) - gt_labels = np.array(gt_labels, dtype=np.int64) - else: - gt_bboxes = np.zeros((0, 4), dtype=np.float32) - gt_labels = np.array([], dtype=np.int64) - - if gt_bboxes_ignore: - gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) - else: - gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) - - ann = dict( - bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) - - if with_mask: - ann['mask_polys'] = gt_mask_polys - ann['poly_lens'] = gt_poly_lens - return ann +from .utils import to_tensor, show_ann, random_scale class CocoDataset(Dataset): + def __init__(self, ann_file, img_prefix, @@ -137,7 +79,7 @@ class CocoDataset(Dataset): self.img_transform = ImageTransform( size_divisor=self.size_divisor, **self.img_norm_cfg) self.bbox_transform = BboxTransform() - self.mask_transform = PolyMaskTransform() + self.mask_transform = MaskTransform() self.numpy2tensor = Numpy2Tensor() def __len__(self): @@ -161,6 +103,70 @@ class CocoDataset(Dataset): ann_info = self.coco.loadAnns(ann_ids) return ann_info + def _parse_ann_info(self, ann_info, with_mask=True): + """Parse bbox and mask annotation. + + Args: + ann_info (list[dict]): Annotation info of an image. + with_mask (bool): Whether to parse mask annotations. + + Returns: + dict: A dict containing the following keys: bboxes, bboxes_ignore, + labels, masks, mask_polys, poly_lens. + """ + gt_bboxes = [] + gt_labels = [] + gt_bboxes_ignore = [] + # Two formats are provided. + # 1. mask: a binary map of the same size of the image. + # 2. polys: each mask consists of one or several polys, each poly is a + # list of float. + if with_mask: + gt_masks = [] + gt_mask_polys = [] + gt_poly_lens = [] + for i, ann in enumerate(ann_info): + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if ann['area'] <= 0 or w < 1 or h < 1: + continue + bbox = [x1, y1, x1 + w - 1, y1 + h - 1] + if ann['iscrowd']: + gt_bboxes_ignore.append(bbox) + else: + gt_bboxes.append(bbox) + gt_labels.append(self.cat2label[ann['category_id']]) + if with_mask: + gt_masks.append(self.coco.annToMask(ann)) + mask_polys = [ + p for p in ann['segmentation'] if len(p) >= 6 + ] # valid polygons have >= 3 points (6 coordinates) + poly_lens = [len(p) for p in mask_polys] + gt_mask_polys.append(mask_polys) + gt_poly_lens.extend(poly_lens) + if gt_bboxes: + gt_bboxes = np.array(gt_bboxes, dtype=np.float32) + gt_labels = np.array(gt_labels, dtype=np.int64) + else: + gt_bboxes = np.zeros((0, 4), dtype=np.float32) + gt_labels = np.array([], dtype=np.int64) + + if gt_bboxes_ignore: + gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) + else: + gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict( + bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) + + if with_mask: + ann['masks'] = gt_masks + # poly format is not used in the current implementation + ann['mask_polys'] = gt_mask_polys + ann['poly_lens'] = gt_poly_lens + return ann + def _set_group_flag(self): """Set flag according to image aspect ratio. @@ -199,7 +205,7 @@ class CocoDataset(Dataset): idx = self._rand_another(idx) continue - ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) + ann = self._parse_ann_info(ann_info, self.with_mask) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] gt_bboxes_ignore = ann['bboxes_ignore'] @@ -211,7 +217,7 @@ class CocoDataset(Dataset): # apply transforms flip = True if np.random.rand() < self.flip_ratio else False img_scale = random_scale(self.img_scales) # sample a scale - img, img_shape, scale_factor = self.img_transform( + img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip) if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, @@ -222,32 +228,29 @@ class CocoDataset(Dataset): scale_factor, flip) if self.with_mask: - gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ - self.mask_transform( - ann['mask_polys'], ann['poly_lens'], - img_info['height'], img_info['width'], flip) + gt_masks = self.mask_transform(ann['masks'], pad_shape, + scale_factor, flip) - ori_shape = (img_info['height'], img_info['width']) + ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( - ori_shape=DC(ori_shape), - img_shape=DC(img_shape), - scale_factor=DC(scale_factor), - flip=DC(flip)) + ori_shape=ori_shape, + img_shape=img_shape, + pad_shape=pad_shape, + scale_factor=scale_factor, + flip=flip) data = dict( - img=DC(img, stack=True), - img_meta=img_meta, - gt_bboxes=DC(gt_bboxes)) + img=DC(to_tensor(img), stack=True), + img_meta=DC(img_meta, cpu_only=True), + gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: - data['proposals'] = DC(proposals) + data['proposals'] = DC(to_tensor(proposals)) if self.with_label: - data['gt_labels'] = DC(gt_labels) + data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: - data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore) + data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: - data['gt_mask_polys'] = DC(gt_mask_polys) - data['gt_poly_lens'] = DC(gt_poly_lens) - data['num_polys_per_mask'] = DC(num_polys_per_mask) + data['gt_masks'] = DC(gt_masks, cpu_only=True) return data def prepare_test_img(self, idx): @@ -258,37 +261,38 @@ class CocoDataset(Dataset): if self.proposals is not None else None) def prepare_single(img, scale, flip, proposal=None): - _img, _img_shape, _scale_factor = self.img_transform( + _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip) - img, img_shape, scale_factor = self.numpy2tensor( - _img, _img_shape, _scale_factor) - ori_shape = (img_info['height'], img_info['width']) - img_meta = dict( - ori_shape=ori_shape, + _img = to_tensor(_img) + _img_meta = dict( + ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, + pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: - proposal = self.bbox_transform(proposal, _scale_factor, flip) - proposal = self.numpy2tensor(proposal) - return img, img_meta, proposal + _proposal = self.bbox_transform(proposal, scale_factor, flip) + _proposal = to_tensor(_proposal) + else: + _proposal = None + return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: - img, img_meta, proposal = prepare_single(img, scale, False, - proposal) - imgs.append(img) - img_metas.append(img_meta) - proposals.append(proposal) + _img, _img_meta, _proposal = prepare_single( + img, scale, False, proposal) + imgs.append(_img) + img_metas.append(DC(_img_meta, cpu_only=True)) + proposals.append(_proposal) if self.flip_ratio > 0: - img, img_meta, prop = prepare_single(img, scale, True, - proposal) - imgs.append(img) - img_metas.append(img_meta) - proposals.append(prop) - if self.proposals is None: - return imgs, img_metas - else: - return imgs, img_metas, proposals + _img, _img_meta, _proposal = prepare_single( + img, scale, True, proposal) + imgs.append(_img) + img_metas.append(DC(_img_meta, cpu_only=True)) + proposals.append(_proposal) + data = dict(img=imgs, img_meta=img_metas) + if self.proposals is not None: + data['proposals'] = proposals + return data diff --git a/mmdet/datasets/collate.py b/mmdet/datasets/collate.py deleted file mode 100644 index 44117d6f2d01d3aaa4c06996c2d8bf657e4a1ce5..0000000000000000000000000000000000000000 --- a/mmdet/datasets/collate.py +++ /dev/null @@ -1,57 +0,0 @@ -import collections - -import torch -import torch.nn.functional as F -from torch.utils.data.dataloader import default_collate - -from .utils import DataContainer - -# https://github.com/pytorch/pytorch/issues/973 -import resource -rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) -resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) - -__all__ = ['collate'] - - -def collate(batch, samples_per_gpu=1): - - if not isinstance(batch, collections.Sequence): - raise TypeError("{} is not supported.".format(batch.dtype)) - - if isinstance(batch[0], DataContainer): - assert len(batch) % samples_per_gpu == 0 - stacked = [] - if batch[0].stack: - for i in range(0, len(batch), samples_per_gpu): - assert isinstance(batch[i].data, torch.Tensor) - # TODO: handle tensors other than 3d - assert batch[i].dim() == 3 - c, h, w = batch[0].size() - for sample in batch[i:i + samples_per_gpu]: - assert c == sample.size(0) - h = max(h, sample.size(1)) - w = max(w, sample.size(2)) - padded_samples = [ - F.pad( - sample.data, - (0, w - sample.size(2), 0, h - sample.size(1)), - value=sample.padding_value) - for sample in batch[i:i + samples_per_gpu] - ] - stacked.append(default_collate(padded_samples)) - else: - for i in range(0, len(batch), samples_per_gpu): - stacked.append( - [sample.data for sample in batch[i:i + samples_per_gpu]]) - return DataContainer(stacked, batch[0].stack, batch[0].padding_value) - elif isinstance(batch[0], collections.Sequence): - transposed = zip(*batch) - return [collate(samples, samples_per_gpu) for samples in transposed] - elif isinstance(batch[0], collections.Mapping): - return { - key: collate([d[key] for d in batch], samples_per_gpu) - for key in batch[0] - } - else: - return default_collate(batch) diff --git a/mmdet/datasets/data_engine.py b/mmdet/datasets/data_engine.py deleted file mode 100644 index 0c89f21878a9f2fe2b21669ecfb2cd71cc9ae073..0000000000000000000000000000000000000000 --- a/mmdet/datasets/data_engine.py +++ /dev/null @@ -1,29 +0,0 @@ -from functools import partial -import torch -from .coco import CocoDataset -from .collate import collate -from .sampler import GroupSampler, DistributedGroupSampler - - -def build_data(cfg, args): - dataset = CocoDataset(**cfg) - - if args.dist: - sampler = DistributedGroupSampler(dataset, args.img_per_gpu, - args.world_size, args.rank) - batch_size = args.img_per_gpu - num_workers = args.data_workers - else: - sampler = GroupSampler(dataset, args.img_per_gpu) - batch_size = args.world_size * args.img_per_gpu - num_workers = args.world_size * args.data_workers - - loader = torch.utils.data.DataLoader( - dataset, - batch_size=args.img_per_gpu, - sampler=sampler, - num_workers=num_workers, - collate_fn=partial(collate, samples_per_gpu=args.img_per_gpu), - pin_memory=False) - - return loader diff --git a/mmdet/datasets/loader/__init__.py b/mmdet/datasets/loader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a3d4fdd2cbbe85b26b4c5fa4898315accbe94c0a --- /dev/null +++ b/mmdet/datasets/loader/__init__.py @@ -0,0 +1,6 @@ +from .build_loader import build_dataloader +from .sampler import GroupSampler, DistributedGroupSampler + +__all__ = [ + 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' +] diff --git a/mmdet/datasets/loader/build_loader.py b/mmdet/datasets/loader/build_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..d3b342b32b83b629110877de649923c3610ba4bb --- /dev/null +++ b/mmdet/datasets/loader/build_loader.py @@ -0,0 +1,44 @@ +from functools import partial + +from mmcv.runner import get_dist_info +from mmcv.parallel import collate +from torch.utils.data import DataLoader + +from .sampler import GroupSampler, DistributedGroupSampler + +# https://github.com/pytorch/pytorch/issues/973 +import resource +rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) +resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) + + +def build_dataloader(dataset, + imgs_per_gpu, + workers_per_gpu, + num_gpus, + dist=True, + **kwargs): + if dist: + rank, world_size = get_dist_info() + sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, + rank) + batch_size = imgs_per_gpu + num_workers = workers_per_gpu + else: + sampler = GroupSampler(dataset, imgs_per_gpu) + batch_size = num_gpus * imgs_per_gpu + num_workers = num_gpus * workers_per_gpu + + if not kwargs.get('shuffle', True): + sampler = None + + data_loader = DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), + pin_memory=False, + **kwargs) + + return data_loader diff --git a/mmdet/datasets/sampler.py b/mmdet/datasets/loader/sampler.py similarity index 98% rename from mmdet/datasets/sampler.py rename to mmdet/datasets/loader/sampler.py index 74089821bf17a7bdc6f1f728c0340e382adb3046..5c060cd926ea50d232d0f765b86933ca8fad0969 100644 --- a/mmdet/datasets/sampler.py +++ b/mmdet/datasets/loader/sampler.py @@ -7,8 +7,6 @@ import numpy as np from torch.distributed import get_world_size, get_rank from torch.utils.data.sampler import Sampler -__all__ = ['GroupSampler', 'DistributedGroupSampler'] - class GroupSampler(Sampler): diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 1532fe074f2968b225cc030dc3f868b3c7780194..ddb2fb2c2f483326e8703a108d086a919542b212 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -2,15 +2,12 @@ import mmcv import numpy as np import torch -from mmdet.core.mask_ops import segms - -__all__ = [ - 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' -] +__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] class ImageTransform(object): - """Preprocess an image + """Preprocess an image. + 1. rescale the image to expected size 2. normalize the image 3. flip the image (if needed) @@ -29,90 +26,38 @@ class ImageTransform(object): self.size_divisor = size_divisor def __call__(self, img, scale, flip=False): - img, scale_factor = mmcv.imrescale(img, scale, True) + img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) img_shape = img.shape - img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb) + img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) + pad_shape = img.shape + else: + pad_shape = img_shape img = img.transpose(2, 0, 1) - return img, img_shape, scale_factor - - # img, scale = cvb.resize_keep_ar(img_or_path, max_long_edge, - # max_short_edge, True) - # shape_scale = np.array(img.shape + (scale, ), dtype=np.float32) - # if flip: - # img = img[:, ::-1, :].copy() - # if self.color_order == 'RGB': - # img = cvb.bgr2rgb(img) - # img = img.astype(np.float32) - # img -= self.color_mean - # img /= self.color_std - # if self.size_divisor is None: - # padded_img = img - # else: - # pad_h = int(np.ceil( - # img.shape[0] / self.size_divisor)) * self.size_divisor - # pad_w = int(np.ceil( - # img.shape[1] / self.size_divisor)) * self.size_divisor - # padded_img = cvb.pad_img(img, (pad_h, pad_w), pad_val=0) - # padded_img = padded_img.transpose(2, 0, 1) - # return padded_img, shape_scale - - -class ImageCrop(object): - """crop image patches and resize patches into fixed size - 1. (read and) flip image (if needed) - 2. crop image patches according to given bboxes - 3. resize patches into fixed size (default 224x224) - 4. normalize the image (if needed) - 5. transpose to (c, h, w) (if needed) - """ + return img, img_shape, pad_shape, scale_factor - def __init__(self, - normalize=True, - transpose=True, - color_order='RGB', - color_mean=(0, 0, 0), - color_std=(1, 1, 1)): - self.normalize = normalize - self.transpose = transpose - - assert color_order in ['RGB', 'BGR'] - self.color_order = color_order - self.color_mean = np.array(color_mean, dtype=np.float32) - self.color_std = np.array(color_std, dtype=np.float32) - - def __call__(self, - img_or_path, - bboxes, - crop_size, - scale_ratio=1.0, - flip=False): - img = cvb.read_img(img_or_path) - if flip: - img = img[:, ::-1, :].copy() - crop_imgs = cvb.crop_img( - img, - bboxes[:, :4], - scale_ratio=scale_ratio, - pad_fill=self.color_mean) - processed_crop_imgs_list = [] - for i in range(len(crop_imgs)): - crop_img = crop_imgs[i] - crop_img = cvb.resize(crop_img, crop_size) - crop_img = crop_img.astype(np.float32) - crop_img -= self.color_mean - crop_img /= self.color_std - processed_crop_imgs_list.append(crop_img) - processed_crop_imgs = np.stack(processed_crop_imgs_list, axis=0) - processed_crop_imgs = processed_crop_imgs.transpose(0, 3, 1, 2) - return processed_crop_imgs + +def bbox_flip(bboxes, img_shape): + """Flip bboxes horizontally. + + Args: + bboxes(ndarray): shape (..., 4*k) + img_shape(tuple): (height, width) + """ + assert bboxes.shape[-1] % 4 == 0 + w = img_shape[1] + flipped = bboxes.copy() + flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 + flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 + return flipped class BboxTransform(object): - """Preprocess gt bboxes + """Preprocess gt bboxes. + 1. rescale bboxes according to image size 2. flip bboxes (if needed) 3. pad the first dimension to `max_num_gts` @@ -124,7 +69,7 @@ class BboxTransform(object): def __call__(self, bboxes, img_shape, scale_factor, flip=False): gt_bboxes = bboxes * scale_factor if flip: - gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + gt_bboxes = bbox_flip(gt_bboxes, img_shape) gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) if self.max_num_gts is None: @@ -136,64 +81,25 @@ class BboxTransform(object): return padded_bboxes -class PolyMaskTransform(object): - - def __init__(self): - pass - - def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): - """ - Args: - gt_mask_polys(list): a list of masks, each mask is a list of polys, - each poly is a list of numbers - gt_poly_lens(list): a list of int, indicating the size of each poly - """ - if flip: - gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) - num_polys_per_mask = np.array( - [len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64) - gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64) - gt_mask_polys = [ - np.concatenate(mask_polys).astype(np.float32) - for mask_polys in gt_mask_polys - ] - gt_mask_polys = np.concatenate(gt_mask_polys) - return gt_mask_polys, gt_poly_lens, num_polys_per_mask - - class MaskTransform(object): - """Preprocess masks + """Preprocess masks. + 1. resize masks to expected size and stack to a single array 2. flip the masks (if needed) 3. pad the masks (if needed) """ - def __init__(self, max_num_gts, pad_size=None): - self.max_num_gts = max_num_gts - self.pad_size = pad_size - - def __call__(self, masks, img_size, flip=False): - max_long_edge = max(img_size) - max_short_edge = min(img_size) + def __call__(self, masks, pad_shape, scale_factor, flip=False): masks = [ - cvb.resize_keep_ar( - mask, - max_long_edge, - max_short_edge, - interpolation=cvb.INTER_NEAREST) for mask in masks + mmcv.imrescale(mask, scale_factor, interpolation='nearest') + for mask in masks ] - masks = np.stack(masks, axis=0) if flip: - masks = masks[:, ::-1, :] - if self.pad_size is None: - pad_h = masks.shape[1] - pad_w = masks.shape[2] - else: - pad_size = self.pad_size if self.pad_size > 0 else max_long_edge - pad_h = pad_w = pad_size - padded_masks = np.zeros( - (self.max_num_gts, pad_h, pad_w), dtype=masks.dtype) - padded_masks[:masks.shape[0], :masks.shape[1], :masks.shape[2]] = masks + masks = [mask[:, ::-1] for mask in masks] + padded_masks = [ + mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks + ] + padded_masks = np.stack(padded_masks, axis=0) return padded_masks diff --git a/mmdet/datasets/utils/misc.py b/mmdet/datasets/utils.py similarity index 64% rename from mmdet/datasets/utils/misc.py rename to mmdet/datasets/utils.py index 419c11ad08462268b9dfe6b43182a9ec4725b00c..5a248ef6890ea348ea7ad98154cc163ae1e035c5 100644 --- a/mmdet/datasets/utils/misc.py +++ b/mmdet/datasets/utils.py @@ -1,8 +1,31 @@ +from collections import Sequence + import mmcv +import torch import matplotlib.pyplot as plt import numpy as np -import pycocotools.mask as maskUtils + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError('type {} cannot be converted to tensor.'.format( + type(data))) def random_scale(img_scales, mode='range'): @@ -44,19 +67,3 @@ def show_ann(coco, img, ann_info): plt.axis('off') coco.showAnns(ann_info) plt.show() - - -def draw_bbox_and_segm(img, results, dataset, score_thr=0.5): - bbox_results, segm_results = results - hi_bboxes = [] - for cls_bboxes, cls_segms in zip(bbox_results, segm_results): - if len(cls_bboxes) == 0: - hi_bboxes.append(cls_bboxes) - continue - inds = np.where(cls_bboxes[:, -1] > score_thr)[0] - hi_bboxes.append(cls_bboxes[inds, :]) - color_mask = np.random.random((1, 3)) - for i in inds: - mask = maskUtils.decode(cls_segms[i]).astype(np.bool) - img[mask] = img[mask] * 0.5 + color_mask * 0.5 - mmcv.draw_bboxes_with_label(np.ascontiguousarray(img), hi_bboxes, dataset) diff --git a/mmdet/datasets/utils/__init__.py b/mmdet/datasets/utils/__init__.py deleted file mode 100644 index de3ea43bdf4e4cc526119054954fdd1acf811c38..0000000000000000000000000000000000000000 --- a/mmdet/datasets/utils/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .data_container import DataContainer -from .misc import * diff --git a/mmdet/datasets/utils/data_container.py b/mmdet/datasets/utils/data_container.py deleted file mode 100644 index c27beab37bbd28aeb37c1231b8ff94a335702216..0000000000000000000000000000000000000000 --- a/mmdet/datasets/utils/data_container.py +++ /dev/null @@ -1,80 +0,0 @@ -import functools -from collections import Sequence - -import mmcv -import numpy as np -import torch - - -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - """ - if isinstance(data, np.ndarray): - return torch.from_numpy(data) - elif isinstance(data, torch.Tensor): - return data - elif isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - elif isinstance(data, int): - return torch.LongTensor([data]) - elif isinstance(data, float): - return torch.FloatTensor([data]) - else: - raise TypeError('type {} cannot be converted to tensor.'.format( - type(data))) - - -def assert_tensor_type(func): - - @functools.wraps(func) - def wrapper(*args, **kwargs): - if not isinstance(args[0].data, torch.Tensor): - raise AttributeError('{} has no attribute {} for type {}'.format( - args[0].__class__.__name__, func.__name__, args[0].datatype)) - return func(*args, **kwargs) - - return wrapper - - -class DataContainer(object): - - def __init__(self, data, stack=False, padding_value=0): - if isinstance(data, list): - self._data = data - else: - self._data = to_tensor(data) - self._stack = stack - self._padding_value = padding_value - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self.data)) - - @property - def data(self): - return self._data - - @property - def datatype(self): - if isinstance(self.data, torch.Tensor): - return self.data.type() - else: - return type(self.data) - - @property - def stack(self): - return self._stack - - @property - def padding_value(self): - return self._padding_value - - @assert_tensor_type - def size(self, *args, **kwargs): - return self.data.size(*args, **kwargs) - - @assert_tensor_type - def dim(self): - return self.data.dim() diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index 2209550509f71a71a66b2582440986eebcf3926c..aca6399e45e3e21c40d8e2470b233ac0d992888e 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -1 +1,9 @@ -from .detectors import Detector +from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN +from .builder import (build_neck, build_rpn_head, build_roi_extractor, + build_bbox_head, build_mask_head, build_detector) + +__all__ = [ + 'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone', + 'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head', + 'build_mask_head', 'build_detector' +] diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py index f9e21e83d1469167d35de22c6511f6c09c260727..107507ceaf6d1a36cafe07197cefd9693a13a49b 100644 --- a/mmdet/models/backbones/__init__.py +++ b/mmdet/models/backbones/__init__.py @@ -1 +1,3 @@ from .resnet import resnet + +__all__ = ['resnet'] diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py index f8203accd4b335886b7ebffd59517bdc8568769e..371f4f59feca466eca0040faeb1ae7de5e78800f 100644 --- a/mmdet/models/backbones/resnet.py +++ b/mmdet/models/backbones/resnet.py @@ -1,7 +1,9 @@ +import logging import math + import torch.nn as nn import torch.utils.checkpoint as cp -from torchpack import load_checkpoint +from mmcv.runner import load_checkpoint def conv3x3(in_planes, out_planes, stride=1, dilation=1): @@ -25,7 +27,7 @@ class BasicBlock(nn.Module): stride=1, dilation=1, downsample=None, - style='fb'): + style='pytorch'): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.bn1 = nn.BatchNorm2d(planes) @@ -64,15 +66,16 @@ class Bottleneck(nn.Module): stride=1, dilation=1, downsample=None, - style='fb', + style='pytorch', with_cp=False): - """Bottleneck block - if style is "fb", the stride-two layer is the 3x3 conv layer, - if style is "msra", the stride-two layer is the first 1x1 conv layer + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, + if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() - assert style in ['fb', 'msra'] - if style == 'fb': + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': conv1_stride = 1 conv2_stride = stride else: @@ -139,7 +142,7 @@ def make_res_layer(block, blocks, stride=1, dilation=1, - style='fb', + style='pytorch', with_cp=False): downsample = None if stride != 1 or inplanes != planes * block.expansion: @@ -173,7 +176,12 @@ def make_res_layer(block, class ResHead(nn.Module): - def __init__(self, block, num_blocks, stride=2, dilation=1, style='fb'): + def __init__(self, + block, + num_blocks, + stride=2, + dilation=1, + style='pytorch'): self.layer4 = make_res_layer( block, 1024, @@ -196,9 +204,10 @@ class ResNet(nn.Module): dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), frozen_stages=-1, - style='fb', + style='pytorch', sync_bn=False, - with_cp=False): + with_cp=False, + strict_frozen=False): super(ResNet, self).__init__() if not len(layers) == len(strides) == len(dilations): raise ValueError( @@ -234,14 +243,17 @@ class ResNet(nn.Module): style=self.style, with_cp=with_cp) self.inplanes = planes * block.expansion - setattr(self, layer_name, res_layer) + self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) self.with_cp = with_cp + self.strict_frozen = strict_frozen + def init_weights(self, pretrained=None): if isinstance(pretrained, str): - load_checkpoint(self, pretrained, strict=False) + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -275,6 +287,9 @@ class ResNet(nn.Module): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() + if self.strict_frozen: + for params in m.parameters(): + params.requires_grad = False if mode and self.frozen_stages >= 0: for param in self.conv1.parameters(): param.requires_grad = False @@ -305,9 +320,10 @@ def resnet(depth, dilations=(1, 1, 1, 1), out_indices=(2, ), frozen_stages=-1, - style='fb', + style='pytorch', sync_bn=False, - with_cp=False): + with_cp=False, + strict_frozen=False): """Constructs a ResNet model. Args: @@ -321,5 +337,5 @@ def resnet(depth, raise KeyError('invalid depth {} for resnet'.format(depth)) block, layers = resnet_cfg[depth] model = ResNet(block, layers[:num_stages], strides, dilations, out_indices, - frozen_stages, style, sync_bn, with_cp) + frozen_stages, style, sync_bn, with_cp, strict_frozen) return model diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index 5f6e1136eed45abe85a710170e76e04cba0e91cf..67dba03959231b5ed0f784ac97542911b56cc785 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (bbox_transform_inv, multiclass_nms, bbox_target, +from mmdet.core import (delta2bbox, multiclass_nms, bbox_target, weighted_cross_entropy, weighted_smoothl1, accuracy) @@ -60,7 +60,7 @@ class BBoxHead(nn.Module): return cls_score, bbox_pred def get_bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, - pos_gt_labels, rcnn_train_cfg): + pos_gt_labels, rcnn_train_cfg): reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes cls_reg_targets = bbox_target( pos_proposals, @@ -85,7 +85,7 @@ class BBoxHead(nn.Module): bbox_pred, bbox_targets, bbox_weights, - ave_factor=bbox_targets.size(0)) + avg_factor=bbox_targets.size(0)) return losses def get_det_bboxes(self, @@ -101,15 +101,14 @@ class BBoxHead(nn.Module): scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: - bboxes = bbox_transform_inv(rois[:, 1:], bbox_pred, - self.target_means, self.target_stds, - img_shape) + bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, + self.target_stds, img_shape) else: bboxes = rois[:, 1:] # TODO: add clip here if rescale: - bboxes /= scale_factor.float() + bboxes /= scale_factor if nms_cfg is None: return bboxes, scores diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py index 02e2a6b6d859e728a47f98fe857f1e71c2a6754a..f7bd7f80a9fc00bd3fc020ccd7d834eb45905067 100644 --- a/mmdet/models/bbox_heads/convfc_bbox_head.py +++ b/mmdet/models/bbox_heads/convfc_bbox_head.py @@ -43,17 +43,21 @@ class ConvFCRoIHead(BBoxHead): self.fc_out_channels = fc_out_channels # add shared convs and fcs - self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch( - self.num_shared_convs, self.num_shared_fcs, self.in_channels, True) + self.shared_convs, self.shared_fcs, last_layer_dim = \ + self._add_conv_fc_branch( + self.num_shared_convs, self.num_shared_fcs, self.in_channels, + True) self.shared_out_channels = last_layer_dim # add cls specific branch - self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch( - self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) + self.cls_convs, self.cls_fcs, self.cls_last_dim = \ + self._add_conv_fc_branch( + self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) # add reg specific branch - self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch( - self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) + self.reg_convs, self.reg_fcs, self.reg_last_dim = \ + self._add_conv_fc_branch( + self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) if self.num_shared_fcs == 0 and not self.with_avg_pool: if self.num_cls_fcs == 0: diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index c3b058507fcdc461a9d3b0271858522e4ba0f1ce..bdf0ac3d16f9aadb194f944b3f7c4dd1a741e8cd 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -1,27 +1,26 @@ -import mmcv -from mmcv import torchpack +from mmcv.runner import obj_from_dict from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, - mask_heads) + mask_heads, detectors) __all__ = [ 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', - 'build_bbox_head', 'build_mask_head' + 'build_bbox_head', 'build_mask_head', 'build_detector' ] -def _build_module(cfg, parrent=None): - return cfg if isinstance(cfg, nn.Module) else torchpack.obj_from_dict( - cfg, parrent) +def _build_module(cfg, parrent=None, default_args=None): + return cfg if isinstance(cfg, nn.Module) else obj_from_dict( + cfg, parrent, default_args) -def build(cfg, parrent=None): +def build(cfg, parrent=None, default_args=None): if isinstance(cfg, list): - modules = [_build_module(cfg_, parrent) for cfg_ in cfg] + modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg] return nn.Sequential(*modules) else: - return _build_module(cfg, parrent) + return _build_module(cfg, parrent, default_args) def build_backbone(cfg): @@ -46,3 +45,7 @@ def build_bbox_head(cfg): def build_mask_head(cfg): return build(cfg, mask_heads) + + +def build_detector(cfg, train_cfg=None, test_cfg=None): + return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index 5b690f8d77d6d8eae1adc4bf8b04d3dd3db3462a..b8914c1e5d3c834a1373b2a2e8360183a41de4da 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -1 +1,6 @@ -from .detector import Detector +from .base import BaseDetector +from .rpn import RPN +from .faster_rcnn import FasterRCNN +from .mask_rcnn import MaskRCNN + +__all__ = ['BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN'] diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py new file mode 100644 index 0000000000000000000000000000000000000000..d1b0fce1283b012072e7fb1f864313135eeac940 --- /dev/null +++ b/mmdet/models/detectors/base.py @@ -0,0 +1,119 @@ +import logging +from abc import ABCMeta, abstractmethod + +import mmcv +import numpy as np +import torch +import torch.nn as nn + +from mmdet.core import tensor2imgs, get_classes + + +class BaseDetector(nn.Module): + """Base class for detectors""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseDetector, self).__init__() + + @property + def with_neck(self): + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_bbox(self): + return hasattr(self, 'bbox_head') and self.bbox_head is not None + + @property + def with_mask(self): + return hasattr(self, 'mask_head') and self.mask_head is not None + + @abstractmethod + def extract_feat(self, imgs): + pass + + def extract_feats(self, imgs): + if isinstance(imgs, torch.Tensor): + return self.extract_feat(imgs) + elif isinstance(imgs, list): + for img in imgs: + yield self.extract_feat(img) + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + pass + + def init_weights(self, pretrained=None): + if pretrained is not None: + logger = logging.getLogger() + logger.info('load model from: {}'.format(pretrained)) + + def forward_test(self, imgs, img_metas, **kwargs): + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError('{} must be a list, but got {}'.format( + name, type(var))) + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError( + 'num of augmentations ({}) != num of image meta ({})'.format( + len(imgs), len(img_metas))) + # TODO: remove the restriction of imgs_per_gpu == 1 when prepared + imgs_per_gpu = imgs[0].size(0) + assert imgs_per_gpu == 1 + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + def forward(self, img, img_meta, return_loss=True, **kwargs): + if return_loss: + return self.forward_train(img, img_meta, **kwargs) + else: + return self.forward_test(img, img_meta, **kwargs) + + def show_result(self, + data, + result, + img_norm_cfg, + dataset='coco', + score_thr=0.3): + img_tensor = data['img'][0] + img_metas = data['img_meta'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_norm_cfg) + assert len(imgs) == len(img_metas) + + if isinstance(dataset, str): + class_names = get_classes(dataset) + elif isinstance(dataset, list): + class_names = dataset + else: + raise TypeError('dataset must be a valid dataset name or a list' + ' of class names, not {}'.format(type(dataset))) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + labels = [ + np.full(bbox.shape[0], i, dtype=np.int32) + for i, bbox in enumerate(result) + ] + labels = np.concatenate(labels) + bboxes = np.vstack(result) + mmcv.imshow_det_bboxes( + img_show, + bboxes, + labels, + class_names=class_names, + score_thr=score_thr) diff --git a/mmdet/models/detectors/detector.py b/mmdet/models/detectors/detector.py deleted file mode 100644 index 80b7d4438cb59612dbff8a2bf71930eb6383a144..0000000000000000000000000000000000000000 --- a/mmdet/models/detectors/detector.py +++ /dev/null @@ -1,348 +0,0 @@ -import torch -import torch.nn as nn - -from .. import builder -from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, - bbox2result, multiclass_nms, merge_aug_proposals, - merge_aug_bboxes, merge_aug_masks, sample_proposals) - - -class Detector(nn.Module): - def __init__(self, - backbone, - neck=None, - rpn_head=None, - roi_block=None, - bbox_head=None, - mask_block=None, - mask_head=None, - rpn_train_cfg=None, - rpn_test_cfg=None, - rcnn_train_cfg=None, - rcnn_test_cfg=None, - pretrained=None): - super(Detector, self).__init__() - self.backbone = builder.build_backbone(backbone) - - self.with_neck = True if neck is not None else False - if self.with_neck: - self.neck = builder.build_neck(neck) - - self.with_rpn = True if rpn_head is not None else False - if self.with_rpn: - self.rpn_head = builder.build_rpn_head(rpn_head) - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - - self.with_bbox = True if bbox_head is not None else False - if self.with_bbox: - self.bbox_roi_extractor = builder.build_roi_extractor(roi_block) - self.bbox_head = builder.build_bbox_head(bbox_head) - self.rcnn_train_cfg = rcnn_train_cfg - self.rcnn_test_cfg = rcnn_test_cfg - - self.with_mask = True if mask_head is not None else False - if self.with_mask: - self.mask_roi_extractor = builder.build_roi_extractor(mask_block) - self.mask_head = builder.build_mask_head(mask_head) - - self.init_weights(pretrained=pretrained) - - def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) - self.backbone.init_weights(pretrained=pretrained) - if self.with_neck: - if isinstance(self.neck, nn.Sequential): - for m in self.neck: - m.init_weights() - else: - self.neck.init_weights() - if self.with_rpn: - self.rpn_head.init_weights() - if self.with_bbox: - self.bbox_roi_extractor.init_weights() - self.bbox_head.init_weights() - if self.with_mask: - self.mask_roi_extractor.init_weights() - self.mask_head.init_weights() - - def forward(self, - img, - img_meta, - gt_bboxes=None, - proposals=None, - gt_labels=None, - gt_bboxes_ignore=None, - gt_mask_polys=None, - gt_poly_lens=None, - num_polys_per_mask=None, - return_loss=True, - return_bboxes=True, - rescale=False): - assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist." - - if not return_loss: - return self.test(img, img_meta, proposals, rescale) - else: - losses = dict() - - img_shapes = img_meta['img_shape'] - x = self.backbone(img) - - if self.with_neck: - x = self.neck(x) - - if self.with_rpn: - rpn_outs = self.rpn_head(x) - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) - losses.update(rpn_losses) - - if self.with_bbox: - if self.with_rpn: - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - else: - proposal_list = proposals - - (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, - pos_gt_bboxes, pos_gt_labels) = sample_proposals( - proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, - self.rcnn_train_cfg) - - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.get_bbox_target( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.rcnn_train_cfg) - - rois = bbox2roi([ - torch.cat([pos, neg], dim=0) - for pos, neg in zip(pos_proposals, neg_proposals) - ]) - # TODO: a more flexible way to configurate feat maps - roi_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - - loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, - label_weights, bbox_targets, - bbox_weights) - losses.update(loss_bbox) - - if self.with_mask: - gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens, - num_polys_per_mask) - mask_targets = self.mask_head.get_mask_target( - pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, - self.rcnn_train_cfg) - pos_rois = bbox2roi(pos_proposals) - mask_feats = self.mask_roi_extractor( - x[:self.mask_roi_extractor.num_inputs], pos_rois) - mask_pred = self.mask_head(mask_feats) - losses['loss_mask'] = self.mask_head.loss(mask_pred, mask_targets, - torch.cat(pos_gt_labels)) - return losses - - def test(self, imgs, img_metas, proposals=None, rescale=False): - """Test w/ or w/o augmentations.""" - assert isinstance(imgs, list) and isinstance(img_metas, list) - assert len(imgs) == len(img_metas) - img_per_gpu = imgs[0].size(0) - assert img_per_gpu == 1 - if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], proposals, rescale) - else: - return self.aug_test(imgs, img_metas, proposals, rescale) - - def simple_test_rpn(self, x, img_meta): - img_shapes = img_meta['img_shape'] - scale_factor = img_meta['scale_factor'] - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0] - return proposal_list - - def simple_test_bboxes(self, x, img_meta, proposals, rescale=False): - """Test only det bboxes without augmentation.""" - rois = bbox2roi(proposals) - roi_feats = self.bbox_roi_extractor( - x[:len(self.bbox_roi_extractor.featmap_strides)], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - # image shape of the first image in the batch (only one) - img_shape = img_meta['img_shape'][0] - scale_factor = img_meta['scale_factor'] - det_bboxes, det_labels = self.bbox_head.get_det_bboxes( - rois, - cls_score, - bbox_pred, - img_shape, - scale_factor, - rescale=rescale, - nms_cfg=self.rcnn_test_cfg) - return det_bboxes, det_labels - - def simple_test_mask(self, - x, - img_meta, - det_bboxes, - det_labels, - rescale=False): - # image shape of the first image in the batch (only one) - img_shape = img_meta['img_shape'][0] - scale_factor = img_meta['scale_factor'] - if det_bboxes.shape[0] == 0: - segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] - else: - # if det_bboxes is rescaled to the original image size, we need to - # rescale it back to the testing scale to obtain RoIs. - _bboxes = (det_bboxes[:, :4] * scale_factor.float() - if rescale else det_bboxes) - mask_rois = bbox2roi([_bboxes]) - mask_feats = self.mask_roi_extractor( - x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) - mask_pred = self.mask_head(mask_feats) - segm_result = self.mask_head.get_seg_masks( - mask_pred, - det_bboxes, - det_labels, - self.rcnn_test_cfg, - ori_scale=img_meta['ori_shape']) - return segm_result - - def simple_test(self, img, img_meta, proposals=None, rescale=False): - """Test without augmentation.""" - # get feature maps - x = self.backbone(img) - if self.with_neck: - x = self.neck(x) - if self.with_rpn: - proposals = self.simple_test_rpn(x, img_meta) - if self.with_bbox: - # BUG proposals shape? - det_bboxes, det_labels = self.simple_test_bboxes( - x, img_meta, [proposals], rescale=rescale) - bbox_result = bbox2result(det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - - segm_result = self.simple_test_mask( - x, img_meta, det_bboxes, det_labels, rescale=rescale) - return bbox_result, segm_result - else: - proposals[:, :4] /= img_meta['scale_factor'].float() - return proposals.cpu().numpy() - - # TODO aug test haven't been verified - def aug_test_bboxes(self, imgs, img_metas): - """Test with augmentations for det bboxes.""" - # step 1: get RPN proposals for augmented images, apply NMS to the - # union of all proposals. - aug_proposals = [] - for img, img_meta in zip(imgs, img_metas): - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_meta['shape_scale'], - self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - assert len(proposal_list) == 1 - aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 - # after merging, proposals will be rescaled to the original image size - merged_proposals = merge_aug_proposals(aug_proposals, img_metas, - self.rpn_test_cfg) - # step 2: Given merged proposals, predict bboxes for augmented images, - # output the union of these bboxes. - aug_bboxes = [] - aug_scores = [] - for img, img_meta in zip(imgs, img_metas): - # only one image in the batch - img_shape = img_meta['shape_scale'][0] - flip = img_meta['flip'][0] - proposals = bbox_mapping(merged_proposals[:, :4], img_shape, flip) - rois = bbox2roi([proposals]) - # recompute feature maps to save GPU memory - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - roi_feats = self.bbox_roi_extractor( - x[:len(self.bbox_roi_extractor.featmap_strides)], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) - bboxes, scores = self.bbox_head.get_det_bboxes( - rois, - cls_score, - bbox_pred, - img_shape, - rescale=False, - nms_cfg=None) - aug_bboxes.append(bboxes) - aug_scores.append(scores) - # after merging, bboxes will be rescaled to the original image size - merged_bboxes, merged_scores = merge_aug_bboxes( - aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) - det_bboxes, det_labels = multiclass_nms( - merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, - self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) - return det_bboxes, det_labels - - def aug_test_mask(self, - imgs, - img_metas, - det_bboxes, - det_labels, - rescale=False): - # step 3: Given merged bboxes, predict masks for augmented images, - # scores of masks are averaged across augmented images. - if rescale: - _det_bboxes = det_bboxes - else: - _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] - if det_bboxes.shape[0] == 0: - segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] - else: - aug_masks = [] - for img, img_meta in zip(imgs, img_metas): - img_shape = img_meta['shape_scale'][0] - flip = img_meta['flip'][0] - _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, flip) - mask_rois = bbox2roi([_bboxes]) - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - mask_feats = self.mask_roi_extractor( - x[:len(self.mask_roi_extractor.featmap_strides)], - mask_rois) - mask_pred = self.mask_head(mask_feats) - # convert to numpy array to save memory - aug_masks.append(mask_pred.sigmoid().cpu().numpy()) - merged_masks = merge_aug_masks(aug_masks, img_metas, - self.rcnn_test_cfg) - segm_result = self.mask_head.get_seg_masks( - merged_masks, _det_bboxes, det_labels, - img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) - return segm_result - - def aug_test(self, imgs, img_metas, rescale=False): - """Test with augmentations. - If rescale is False, then returned bboxes and masks will fit the scale - if imgs[0]. - """ - # aug test det bboxes - det_bboxes, det_labels = self.aug_test_bboxes(imgs, img_metas) - if rescale: - _det_bboxes = det_bboxes - else: - _det_bboxes = det_bboxes.clone() - _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] - bbox_result = bbox2result(_det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - segm_result = self.aug_test_mask( - imgs, img_metas, det_bboxes, det_labels, rescale=rescale) - return bbox_result, segm_result diff --git a/mmdet/models/detectors/faster_rcnn.py b/mmdet/models/detectors/faster_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..dd31f60c1d819b6c7ba47a67ecb3285a46e09636 --- /dev/null +++ b/mmdet/models/detectors/faster_rcnn.py @@ -0,0 +1,23 @@ +from .two_stage import TwoStageDetector + + +class FasterRCNN(TwoStageDetector): + + def __init__(self, + backbone, + neck, + rpn_head, + bbox_roi_extractor, + bbox_head, + train_cfg, + test_cfg, + pretrained=None): + super(FasterRCNN, self).__init__( + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) diff --git a/mmdet/models/detectors/mask_rcnn.py b/mmdet/models/detectors/mask_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..25a363e398f6c0d01e2f8bd53e05c9046a5275ac --- /dev/null +++ b/mmdet/models/detectors/mask_rcnn.py @@ -0,0 +1,34 @@ +from .two_stage import TwoStageDetector + + +class MaskRCNN(TwoStageDetector): + + def __init__(self, + backbone, + neck, + rpn_head, + bbox_roi_extractor, + bbox_head, + mask_roi_extractor, + mask_head, + train_cfg, + test_cfg, + pretrained=None): + super(MaskRCNN, self).__init__( + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + bbox_roi_extractor=bbox_roi_extractor, + bbox_head=bbox_head, + mask_roi_extractor=mask_roi_extractor, + mask_head=mask_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def show_result(self, data, result, img_norm_cfg, **kwargs): + # TODO: show segmentation masks + assert isinstance(result, tuple) + assert len(result) == 2 # (bbox_results, segm_results) + super(MaskRCNN, self).show_result(data, result[0], img_norm_cfg, + **kwargs) diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..9d700fe3e3c3af357256b36f1582c6a8c7249580 --- /dev/null +++ b/mmdet/models/detectors/rpn.py @@ -0,0 +1,85 @@ +import mmcv + +from mmdet.core import tensor2imgs, bbox_mapping +from .base import BaseDetector +from .test_mixins import RPNTestMixin +from .. import builder + + +class RPN(BaseDetector, RPNTestMixin): + + def __init__(self, + backbone, + neck, + rpn_head, + train_cfg, + test_cfg, + pretrained=None): + super(RPN, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.train_cfg = train_cfg + self.test_cfg = test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + super(RPN, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + if self.with_neck: + self.neck.init_weights() + self.rpn_head.init_weights() + + def extract_feat(self, img): + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def forward_train(self, img, img_meta, gt_bboxes=None): + if self.train_cfg.rpn.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + + x = self.extract_feat(img) + rpn_outs = self.rpn_head(x) + + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) + losses = self.rpn_head.loss(*rpn_loss_inputs) + return losses + + def simple_test(self, img, img_meta, rescale=False): + x = self.extract_feat(img) + proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) + if rescale: + for proposals, meta in zip(proposal_list, img_meta): + proposals[:, :4] /= meta['scale_factor'] + # TODO: remove this restriction + return proposal_list[0].cpu().numpy() + + def aug_test(self, imgs, img_metas, rescale=False): + proposal_list = self.aug_test_rpn( + self.extract_feats(imgs), img_metas, self.test_cfg.rpn) + if not rescale: + for proposals, img_meta in zip(proposal_list, img_metas[0]): + img_shape = img_meta['img_shape'] + scale_factor = img_meta['scale_factor'] + flip = img_meta['flip'] + proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, + scale_factor, flip) + # TODO: remove this restriction + return proposal_list[0].cpu().numpy() + + def show_result(self, data, result, img_norm_cfg): + """Show RPN proposals on the image. + + Although we assume batch size is 1, this method supports arbitrary + batch size. + """ + img_tensor = data['img'][0] + img_metas = data['img_meta'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_norm_cfg) + assert len(imgs) == len(img_metas) + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + mmcv.imshow_bboxes(img_show, result, top_k=20) diff --git a/mmdet/models/detectors/test_mixins.py b/mmdet/models/detectors/test_mixins.py new file mode 100644 index 0000000000000000000000000000000000000000..77ba244f1a3fa107bfb6828110eaa344f4a0ba8a --- /dev/null +++ b/mmdet/models/detectors/test_mixins.py @@ -0,0 +1,140 @@ +from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, multiclass_nms) + + +class RPNTestMixin(object): + + def simple_test_rpn(self, x, img_meta, rpn_test_cfg): + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + return proposal_list + + def aug_test_rpn(self, feats, img_metas, rpn_test_cfg): + imgs_per_gpu = len(img_metas[0]) + aug_proposals = [[] for _ in range(imgs_per_gpu)] + for x, img_meta in zip(feats, img_metas): + proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg) + for i, proposals in enumerate(proposal_list): + aug_proposals[i].append(proposals) + # after merging, proposals will be rescaled to the original image size + merged_proposals = [ + merge_aug_proposals(proposals, img_meta, rpn_test_cfg) + for proposals, img_meta in zip(aug_proposals, img_metas) + ] + return merged_proposals + + +class BBoxTestMixin(object): + + def simple_test_bboxes(self, + x, + img_meta, + proposals, + rcnn_test_cfg, + rescale=False): + """Test only det bboxes without augmentation.""" + rois = bbox2roi(proposals) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + det_bboxes, det_labels = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + scale_factor, + rescale=rescale, + nms_cfg=rcnn_test_cfg) + return det_bboxes, det_labels + + def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): + aug_bboxes = [] + aug_scores = [] + for x, img_meta in zip(feats, img_metas): + # only one image in the batch + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + flip = img_meta[0]['flip'] + # TODO more flexible + proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, + scale_factor, flip) + rois = bbox2roi([proposals]) + # recompute feature maps to save GPU memory + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + bboxes, scores = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + scale_factor, + rescale=False, + nms_cfg=None) + aug_bboxes.append(bboxes) + aug_scores.append(scores) + # after merging, bboxes will be rescaled to the original image size + merged_bboxes, merged_scores = merge_aug_bboxes( + aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn) + det_bboxes, det_labels = multiclass_nms( + merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr, + self.test_cfg.rcnn.nms_thr, self.test_cfg.rcnn.max_per_img) + return det_bboxes, det_labels + + +class MaskTestMixin(object): + + def simple_test_mask(self, + x, + img_meta, + det_bboxes, + det_labels, + rescale=False): + # image shape of the first image in the batch (only one) + ori_shape = img_meta[0]['ori_shape'] + scale_factor = img_meta[0]['scale_factor'] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + # if det_bboxes is rescaled to the original image size, we need to + # rescale it back to the testing scale to obtain RoIs. + _bboxes = (det_bboxes[:, :4] * scale_factor + if rescale else det_bboxes) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) + mask_pred = self.mask_head(mask_feats) + segm_result = self.mask_head.get_seg_masks( + mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape, + scale_factor, rescale) + return segm_result + + def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + aug_masks = [] + for x, img_meta in zip(feats, img_metas): + img_shape = img_meta[0]['img_shape'] + scale_factor = img_meta[0]['scale_factor'] + flip = img_meta[0]['flip'] + _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, + scale_factor, flip) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], + mask_rois) + mask_pred = self.mask_head(mask_feats) + # convert to numpy array to save memory + aug_masks.append(mask_pred.sigmoid().cpu().numpy()) + merged_masks = merge_aug_masks(aug_masks, img_metas, + self.test_cfg.rcnn) + + ori_shape = img_metas[0][0]['ori_shape'] + segm_result = self.mask_head.get_seg_masks( + merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, + ori_shape) + return segm_result diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py new file mode 100644 index 0000000000000000000000000000000000000000..8573d83215f120ba392a2f6b45cb9b6b93ca0519 --- /dev/null +++ b/mmdet/models/detectors/two_stage.py @@ -0,0 +1,190 @@ +import torch +import torch.nn as nn + +from .base import BaseDetector +from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin +from .. import builder +from mmdet.core import sample_bboxes, bbox2roi, bbox2result, multi_apply + + +class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, + MaskTestMixin): + + def __init__(self, + backbone, + neck=None, + rpn_head=None, + bbox_roi_extractor=None, + bbox_head=None, + mask_roi_extractor=None, + mask_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(TwoStageDetector, self).__init__() + self.backbone = builder.build_backbone(backbone) + + if neck is not None: + self.neck = builder.build_neck(neck) + else: + raise NotImplementedError + + if rpn_head is not None: + self.rpn_head = builder.build_rpn_head(rpn_head) + + if bbox_head is not None: + self.bbox_roi_extractor = builder.build_roi_extractor( + bbox_roi_extractor) + self.bbox_head = builder.build_bbox_head(bbox_head) + + if mask_head is not None: + self.mask_roi_extractor = builder.build_roi_extractor( + mask_roi_extractor) + self.mask_head = builder.build_mask_head(mask_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + @property + def with_rpn(self): + return hasattr(self, 'rpn_head') and self.rpn_head is not None + + def init_weights(self, pretrained=None): + super(TwoStageDetector, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + if self.with_neck: + if isinstance(self.neck, nn.Sequential): + for m in self.neck: + m.init_weights() + else: + self.neck.init_weights() + if self.with_rpn: + self.rpn_head.init_weights() + if self.with_bbox: + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + + def extract_feat(self, img): + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def forward_train(self, + img, + img_meta, + gt_bboxes, + gt_bboxes_ignore, + gt_labels, + gt_masks=None, + proposals=None): + losses = dict() + + x = self.extract_feat(img) + + if self.with_rpn: + rpn_outs = self.rpn_head(x) + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, + self.train_cfg.rpn) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) + + proposal_inputs = rpn_outs + (img_meta, self.test_cfg.rpn) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + else: + proposal_list = proposals + + if self.with_bbox: + (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, + pos_gt_labels) = multi_apply( + sample_bboxes, + proposal_list, + gt_bboxes, + gt_bboxes_ignore, + gt_labels, + cfg=self.train_cfg.rcnn) + (labels, label_weights, bbox_targets, + bbox_weights) = self.bbox_head.get_bbox_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.train_cfg.rcnn) + + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) + + if self.with_mask: + mask_targets = self.mask_head.get_mask_target( + pos_proposals, pos_assigned_gt_inds, gt_masks, + self.train_cfg.rcnn) + pos_rois = bbox2roi(pos_proposals) + mask_feats = self.mask_roi_extractor( + x[:self.mask_roi_extractor.num_inputs], pos_rois) + mask_pred = self.mask_head(mask_feats) + loss_mask = self.mask_head.loss(mask_pred, mask_targets, + torch.cat(pos_gt_labels)) + losses.update(loss_mask) + + return losses + + def simple_test(self, img, img_meta, proposals=None, rescale=False): + """Test without augmentation.""" + assert proposals is None, "Fast RCNN hasn't been implemented." + assert self.with_bbox, "Bbox head must be implemented." + + x = self.extract_feat(img) + + proposal_list = self.simple_test_rpn( + x, img_meta, self.test_cfg.rpn) if proposals is None else proposals + + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale) + bbox_results = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + + if not self.with_mask: + return bbox_results + else: + segm_results = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + return bbox_results, segm_results + + def aug_test(self, imgs, img_metas, rescale=False): + """Test with augmentations. + + If rescale is False, then returned bboxes and masks will fit the scale + of imgs[0]. + """ + # recompute feats to save memory + proposal_list = self.aug_test_rpn( + self.extract_feats(imgs), img_metas, self.test_cfg.rpn) + det_bboxes, det_labels = self.aug_test_bboxes( + self.extract_feats(imgs), img_metas, proposal_list, + self.test_cfg.rcnn) + + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] + bbox_results = bbox2result(_det_bboxes, det_labels, + self.bbox_head.num_classes) + + # det_bboxes always keep the original scale + if self.with_mask: + segm_results = self.aug_test_mask( + self.extract_feats(imgs), img_metas, det_bboxes, det_labels) + return bbox_results, segm_results + else: + return bbox_results diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index 016c05204bdc4533f7cca438666aa011f5ceb56d..ba46bea77e16115378f5b8d36626e3097943bd75 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -87,18 +87,21 @@ class FCNMaskHead(nn.Module): return mask_pred def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, - img_meta, rcnn_train_cfg): + rcnn_train_cfg): mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, - gt_masks, img_meta, rcnn_train_cfg) + gt_masks, rcnn_train_cfg) return mask_targets def loss(self, mask_pred, mask_targets, labels): + loss = dict() loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) - return loss_mask + loss['loss_mask'] = loss_mask + return loss def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, - ori_scale): - """Get segmentation masks from mask_pred and bboxes + ori_shape, scale_factor, rescale): + """Get segmentation masks from mask_pred and bboxes. + Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). For single-scale testing, mask_pred is the direct output of @@ -108,40 +111,44 @@ class FCNMaskHead(nn.Module): det_labels (Tensor): shape (n, ) img_shape (Tensor): shape (3, ) rcnn_test_cfg (dict): rcnn testing config - rescale (bool): whether rescale masks to original image size + ori_shape: original image size + Returns: list[list]: encoded masks """ if isinstance(mask_pred, torch.Tensor): mask_pred = mask_pred.sigmoid().cpu().numpy() assert isinstance(mask_pred, np.ndarray) + cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 - img_h = ori_scale[0] - img_w = ori_scale[1] + + if rescale: + img_h, img_w = ori_shape[:2] + else: + img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) + img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) + scale_factor = 1.0 for i in range(bboxes.shape[0]): - bbox = bboxes[i, :].astype(int) + bbox = (bboxes[i, :] / scale_factor).astype(np.int32) label = labels[i] - w = bbox[2] - bbox[0] + 1 - h = bbox[3] - bbox[1] + 1 - w = max(w, 1) - h = max(h, 1) + w = max(bbox[2] - bbox[0] + 1, 1) + h = max(bbox[3] - bbox[1] + 1, 1) if not self.class_agnostic: mask_pred_ = mask_pred[i, label, :, :] else: mask_pred_ = mask_pred[i, 0, :, :] + im_mask = np.zeros((img_h, img_w), dtype=np.uint8) - im_mask = np.zeros((img_h, img_w), dtype=np.float32) - - im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.imresize( - mask_pred_, (w, h)) - # im_mask = cv2.resize(im_mask, (img_w, img_h)) - im_mask = np.array( - im_mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) + bbox_mask = mmcv.imresize(mask_pred_, (w, h)) + bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( + np.uint8) + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F'))[0] cls_segms[label - 1].append(rle) + return cls_segms diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py index 8b5b49826bad94ce00379e60bbafc905b0cba9af..6a256cae3647bcafa54ee2671cb7167f75fc9f95 100644 --- a/mmdet/models/necks/fpn.py +++ b/mmdet/models/necks/fpn.py @@ -101,7 +101,7 @@ class FPN(nn.Module): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.upsample( + laterals[i - 1] += F.interpolate( laterals[i], scale_factor=2, mode='nearest') # build outputs @@ -111,7 +111,8 @@ class FPN(nn.Module): ] # part 2: add extra levels if self.num_outs > len(outs): - # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) if not self.add_extra_convs: for i in range(self.num_outs - used_backbone_levels): outs.append(F.max_pool2d(outs[-1], 1, stride=2)) diff --git a/mmdet/models/roi_extractors/__init__.py b/mmdet/models/roi_extractors/__init__.py index e76e689753f10e87b3f6d9482e880b902f9b747e..9161708ce13fa4f0a6bb188e82a19a163b9b7e4f 100644 --- a/mmdet/models/roi_extractors/__init__.py +++ b/mmdet/models/roi_extractors/__init__.py @@ -1,3 +1,3 @@ -from .single_level import SingleLevelRoI +from .single_level import SingleRoIExtractor -__all__ = ['SingleLevelRoI'] +__all__ = ['SingleRoIExtractor'] diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py index 3e37ac83d6ffb7beab56926329f71311f7eef116..3f97a631f987104422f65110a2cb6b49e080de0e 100644 --- a/mmdet/models/roi_extractors/single_level.py +++ b/mmdet/models/roi_extractors/single_level.py @@ -6,16 +6,25 @@ import torch.nn as nn from mmdet import ops -class SingleLevelRoI(nn.Module): - """Extract RoI features from a single level feature map. Each RoI is - mapped to a level according to its scale.""" +class SingleRoIExtractor(nn.Module): + """Extract RoI features from a single level feature map. + + If there are mulitple input feature levels, each RoI is mapped to a level + according to its scale. + + Args: + roi_layer (dict): Specify RoI layer type and arguments. + out_channels (int): Output channels of RoI layers. + featmap_strides (int): Strides of input feature maps. + finest_scale (int): Scale threshold of mapping to level 0. + """ def __init__(self, roi_layer, out_channels, featmap_strides, finest_scale=56): - super(SingleLevelRoI, self).__init__() + super(SingleRoIExtractor, self).__init__() self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) self.out_channels = out_channels self.featmap_strides = featmap_strides @@ -23,6 +32,7 @@ class SingleLevelRoI(nn.Module): @property def num_inputs(self): + """int: Input feature map levels.""" return len(self.featmap_strides) def init_weights(self): @@ -38,12 +48,19 @@ class SingleLevelRoI(nn.Module): return roi_layers def map_roi_levels(self, rois, num_levels): - """Map rois to corresponding feature levels (0-based) by scales. + """Map rois to corresponding feature levels by scales. + + - scale < finest_scale: level 0 + - finest_scale <= scale < finest_scale * 2: level 1 + - finest_scale * 2 <= scale < finest_scale * 4: level 2 + - scale >= finest_scale * 4: level 3 - scale < finest_scale: level 0 - finest_scale <= scale < finest_scale * 2: level 1 - finest_scale * 2 <= scale < finest_scale * 4: level 2 - scale >= finest_scale * 4: level 3 + Args: + rois (Tensor): Input RoIs, shape (k, 5). + num_levels (int): Total level number. + + Returns: + Tensor: Level index (0-based) of each RoI, shape (k, ) """ scale = torch.sqrt( (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) @@ -52,10 +69,6 @@ class SingleLevelRoI(nn.Module): return target_lvls def forward(self, feats, rois): - """Extract roi features with the roi layer. If multiple feature levels - are used, then rois are mapped to corresponding levels according to - their scales. - """ if len(feats) == 1: return self.roi_layers[0](feats[0], rois) diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index 7ffd441f694b5d6c37d3042bb25088f27b002ea9..e67d7ae973f05c60c8e226009cfb4234c0894f69 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -5,20 +5,36 @@ import torch import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, - weighted_cross_entropy, weighted_smoothl1, +from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox, + multi_apply, weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms -from ..utils import multi_apply from ..utils import normal_init class RPNHead(nn.Module): + """Network head of RPN. + + / - rpn_cls (1x1 conv) + input - rpn_conv (3x3 conv) - + \ - rpn_reg (1x1 conv) + + Args: + in_channels (int): Number of channels in the input feature map. + feat_channels (int): Number of channels for the RPN feature map. + anchor_scales (Iterable): Anchor scales. + anchor_ratios (Iterable): Anchor aspect ratios. + anchor_strides (Iterable): Anchor strides. + anchor_base_sizes (Iterable): Anchor base sizes. + target_means (Iterable): Mean values of regression targets. + target_stds (Iterable): Std values of regression targets. + use_sigmoid_cls (bool): Whether to use sigmoid loss for classification. + (softmax by default) + """ def __init__(self, in_channels, - feat_channels=512, - coarsest_stride=32, + feat_channels=256, anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1.0, 2.0], anchor_strides=[4, 8, 16, 32, 64], @@ -29,7 +45,6 @@ class RPNHead(nn.Module): super(RPNHead, self).__init__() self.in_channels = in_channels self.feat_channels = feat_channels - self.coarsest_stride = coarsest_stride self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides @@ -66,63 +81,63 @@ class RPNHead(nn.Module): def forward(self, feats): return multi_apply(self.forward_single, feats) - def get_anchors(self, featmap_sizes, img_shapes): - """Get anchors given a list of feature map sizes, and get valid flags - at the same time. (Extra padding regions should be marked as invalid) + def get_anchors(self, featmap_sizes, img_metas): + """Get anchors according to feature map sizes. + + Args: + featmap_sizes (list[tuple]): Multi-level feature map sizes. + img_metas (list[dict]): Image meta info. + + Returns: + tuple: anchors of each image, valid flags of each image """ - # calculate actual image shapes - padded_img_shapes = [] - for img_shape in img_shapes: - h, w = img_shape[:2] - padded_h = int( - np.ceil(h / self.coarsest_stride) * self.coarsest_stride) - padded_w = int( - np.ceil(w / self.coarsest_stride) * self.coarsest_stride) - padded_img_shapes.append((padded_h, padded_w)) - # generate anchors for different feature levels - # len = feature levels - anchor_list = [] - # len = imgs per gpu - valid_flag_list = [[] for _ in range(len(img_shapes))] - for i in range(len(featmap_sizes)): - anchor_stride = self.anchor_strides[i] + num_imgs = len(img_metas) + num_levels = len(featmap_sizes) + + # since feature map sizes of all images are the same, we only compute + # anchors for one time + multi_level_anchors = [] + for i in range(num_levels): anchors = self.anchor_generators[i].grid_anchors( - featmap_sizes[i], anchor_stride) - anchor_list.append(anchors) - # for each image in this feature level, get valid flags - featmap_size = featmap_sizes[i] - for img_id, (h, w) in enumerate(padded_img_shapes): - valid_feat_h = min( - int(np.ceil(h / anchor_stride)), featmap_size[0]) - valid_feat_w = min( - int(np.ceil(w / anchor_stride)), featmap_size[1]) + featmap_sizes[i], self.anchor_strides[i]) + multi_level_anchors.append(anchors) + anchor_list = [multi_level_anchors for _ in range(num_imgs)] + + # for each image, we compute valid flags of multi level anchors + valid_flag_list = [] + for img_id, img_meta in enumerate(img_metas): + multi_level_flags = [] + for i in range(num_levels): + anchor_stride = self.anchor_strides[i] + feat_h, feat_w = featmap_sizes[i] + h, w, _ = img_meta['pad_shape'] + valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) + valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) flags = self.anchor_generators[i].valid_flags( - featmap_size, (valid_feat_h, valid_feat_w)) - valid_flag_list[img_id].append(flags) + (feat_h, feat_w), (valid_feat_h, valid_feat_w)) + multi_level_flags.append(flags) + valid_flag_list.append(multi_level_flags) + return anchor_list, valid_flag_list def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): + # classification loss labels = labels.contiguous().view(-1) label_weights = label_weights.contiguous().view(-1) - bbox_targets = bbox_targets.contiguous().view(-1, 4) - bbox_weights = bbox_weights.contiguous().view(-1, 4) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1) - loss_cls = weighted_binary_cross_entropy( - rpn_cls_score, - labels, - label_weights, - ave_factor=num_total_samples) + criterion = weighted_binary_cross_entropy else: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1, 2) - loss_cls = weighted_cross_entropy( - rpn_cls_score, - labels, - label_weights, - ave_factor=num_total_samples) + criterion = weighted_cross_entropy + loss_cls = criterion( + rpn_cls_score, labels, label_weights, avg_factor=num_total_samples) + # regression loss + bbox_targets = bbox_targets.contiguous().view(-1, 4) + bbox_weights = bbox_weights.contiguous().view(-1, 4) rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view( -1, 4) loss_reg = weighted_smoothl1( @@ -130,7 +145,7 @@ class RPNHead(nn.Module): bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, - ave_factor=num_total_samples) + avg_factor=num_total_samples) return loss_cls, loss_reg def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): @@ -140,7 +155,7 @@ class RPNHead(nn.Module): anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_shapes) cls_reg_targets = anchor_target( - anchor_list, valid_flag_list, featmap_sizes, gt_bboxes, img_shapes, + anchor_list, valid_flag_list, gt_bboxes, img_shapes, self.target_means, self.target_stds, cfg) if cls_reg_targets is None: return None @@ -158,8 +173,8 @@ class RPNHead(nn.Module): cfg=cfg) return dict(loss_rpn_cls=losses_cls, loss_rpn_reg=losses_reg) - def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): - img_per_gpu = len(img_shapes) + def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_meta, cfg): + num_imgs = len(img_meta) featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] mlvl_anchors = [ self.anchor_generators[idx].grid_anchors(featmap_sizes[idx], @@ -167,7 +182,7 @@ class RPNHead(nn.Module): for idx in range(len(featmap_sizes)) ] proposal_list = [] - for img_id in range(img_per_gpu): + for img_id in range(num_imgs): rpn_cls_score_list = [ rpn_cls_scores[idx][img_id].detach() for idx in range(len(rpn_cls_scores)) @@ -177,10 +192,9 @@ class RPNHead(nn.Module): for idx in range(len(rpn_bbox_preds)) ] assert len(rpn_cls_score_list) == len(rpn_bbox_pred_list) - img_shape = img_shapes[img_id] proposals = self._get_proposals_single( rpn_cls_score_list, rpn_bbox_pred_list, mlvl_anchors, - img_shape, cfg) + img_meta[img_id]['img_shape'], cfg) proposal_list.append(proposals) return proposal_list @@ -195,7 +209,7 @@ class RPNHead(nn.Module): if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view(-1) - rpn_cls_prob = F.sigmoid(rpn_cls_score) + rpn_cls_prob = rpn_cls_score.sigmoid() scores = rpn_cls_prob else: rpn_cls_score = rpn_cls_score.permute(1, 2, @@ -211,9 +225,8 @@ class RPNHead(nn.Module): rpn_bbox_pred = rpn_bbox_pred[order, :] anchors = anchors[order, :] scores = scores[order] - proposals = bbox_transform_inv(anchors, rpn_bbox_pred, - self.target_means, self.target_stds, - img_shape) + proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, + self.target_stds, img_shape) w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & diff --git a/mmdet/models/utils/__init__.py b/mmdet/models/utils/__init__.py index f11af964480456cce144172591bd0b94f3ed7ad7..c759ca9aba1a07d983ae3a0d0305faab910b17a5 100644 --- a/mmdet/models/utils/__init__.py +++ b/mmdet/models/utils/__init__.py @@ -1,6 +1,8 @@ from .conv_module import ConvModule from .norm import build_norm_layer -from .misc import * -from .weight_init import * +from .weight_init import xavier_init, normal_init, uniform_init, kaiming_init -__all__ = ['ConvModule', 'build_norm_layer'] +__all__ = [ + 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', + 'uniform_init', 'kaiming_init' +] diff --git a/mmdet/models/utils/misc.py b/mmdet/models/utils/misc.py deleted file mode 100644 index ad52b587ac126ed2cfbf5e2ed5c98356e1499c5f..0000000000000000000000000000000000000000 --- a/mmdet/models/utils/misc.py +++ /dev/null @@ -1,9 +0,0 @@ -from functools import partial - -from six.moves import map, zip - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) diff --git a/mmdet/nn/__init__.py b/mmdet/nn/__init__.py deleted file mode 100644 index 1b627f5e7b807b1c6ae321c775c8fc8d03266238..0000000000000000000000000000000000000000 --- a/mmdet/nn/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .parallel import MMDataParallel, MMDistributedDataParallel diff --git a/mmdet/nn/parallel/__init__.py b/mmdet/nn/parallel/__init__.py deleted file mode 100644 index 0ea0a58e4a53737372b7995f3f9d570cba50dddb..0000000000000000000000000000000000000000 --- a/mmdet/nn/parallel/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .data_parallel import MMDataParallel -from .distributed import MMDistributedDataParallel -from .scatter_gather import scatter, scatter_kwargs - -__all__ = [ - 'MMDataParallel', 'MMDistributedDataParallel', 'scatter', 'scatter_kwargs' -] diff --git a/mmdet/nn/parallel/_functions.py b/mmdet/nn/parallel/_functions.py deleted file mode 100644 index 75bb954dce440f7634c47d4a021360df53f3509e..0000000000000000000000000000000000000000 --- a/mmdet/nn/parallel/_functions.py +++ /dev/null @@ -1,74 +0,0 @@ -import torch -from torch.nn.parallel._functions import _get_stream - - -def scatter(input, devices, streams=None): - """Scatters tensor across multiple GPUs. - """ - if streams is None: - streams = [None] * len(devices) - - if isinstance(input, list): - chunk_size = (len(input) - 1) // len(devices) + 1 - outputs = [ - scatter(input[i], [devices[i // chunk_size]], - [streams[i // chunk_size]]) for i in range(len(input)) - ] - return outputs - elif isinstance(input, torch.Tensor): - output = input.contiguous() - # TODO: copy to a pinned buffer first (if copying from CPU) - stream = streams[0] if output.numel() > 0 else None - with torch.cuda.device(devices[0]), torch.cuda.stream(stream): - output = output.cuda(devices[0], non_blocking=True) - return output - else: - raise Exception('Unknown type {}.'.format(type(input))) - - -def synchronize_stream(output, devices, streams): - if isinstance(output, list): - chunk_size = len(output) // len(devices) - for i in range(len(devices)): - for j in range(chunk_size): - synchronize_stream(output[i * chunk_size + j], [devices[i]], - [streams[i]]) - elif isinstance(output, torch.Tensor): - if output.numel() != 0: - with torch.cuda.device(devices[0]): - main_stream = torch.cuda.current_stream() - main_stream.wait_stream(streams[0]) - output.record_stream(main_stream) - else: - raise Exception('Unknown type {}.'.format(type(output))) - - -def get_input_device(input): - if isinstance(input, list): - for item in input: - input_device = get_input_device(item) - if input_device != -1: - return input_device - return -1 - elif isinstance(input, torch.Tensor): - return input.get_device() if input.is_cuda else -1 - else: - raise Exception('Unknown type {}.'.format(type(input))) - - -class Scatter(object): - - @staticmethod - def forward(target_gpus, input): - input_device = get_input_device(input) - streams = None - if input_device == -1: - # Perform CPU to GPU copies in a background stream - streams = [_get_stream(device) for device in target_gpus] - - outputs = scatter(input, target_gpus, streams) - # Synchronize with the copy stream - if streams is not None: - synchronize_stream(outputs, target_gpus, streams) - - return tuple(outputs) diff --git a/mmdet/nn/parallel/data_parallel.py b/mmdet/nn/parallel/data_parallel.py deleted file mode 100644 index 6735cb4afb7b512c5e9f757e962612ad1073ae12..0000000000000000000000000000000000000000 --- a/mmdet/nn/parallel/data_parallel.py +++ /dev/null @@ -1,9 +0,0 @@ -from torch.nn.parallel import DataParallel - -from .scatter_gather import scatter_kwargs - - -class MMDataParallel(DataParallel): - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/nn/parallel/distributed.py deleted file mode 100644 index 2809778ad93951650677a546b57190cb7659302d..0000000000000000000000000000000000000000 --- a/mmdet/nn/parallel/distributed.py +++ /dev/null @@ -1,9 +0,0 @@ -from torch.nn.parallel import DistributedDataParallel - -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(DistributedDataParallel): - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py deleted file mode 100644 index 47f794e8916956f9e8c494e50aff7e5b870889e7..0000000000000000000000000000000000000000 --- a/mmdet/nn/parallel/scatter_gather.py +++ /dev/null @@ -1,48 +0,0 @@ -import torch -from ._functions import Scatter -from torch.nn.parallel._functions import Scatter as OrigScatter -from mmdet.datasets.utils import DataContainer - - -def scatter(inputs, target_gpus, dim=0): - """Scatter inputs to target gpus. - - The only difference from original :func:`scatter` is to add support for - :type:`~mmdet.DataContainer`. - """ - - def scatter_map(obj): - if isinstance(obj, torch.Tensor): - return OrigScatter.apply(target_gpus, None, dim, obj) - if isinstance(obj, DataContainer) and isinstance(obj.data, list): - return Scatter.forward(target_gpus, obj.data) - if isinstance(obj, tuple) and len(obj) > 0: - return list(zip(*map(scatter_map, obj))) - if isinstance(obj, list) and len(obj) > 0: - return list(map(list, zip(*map(scatter_map, obj)))) - if isinstance(obj, dict) and len(obj) > 0: - return list(map(type(obj), zip(*map(scatter_map, obj.items())))) - return [obj for targets in target_gpus] - - # After scatter_map is called, a scatter_map cell will exist. This cell - # has a reference to the actual function scatter_map, which has references - # to a closure that has a reference to the scatter_map cell (because the - # fn is recursive). To avoid this reference cycle, we set the function to - # None, clearing the cell - try: - return scatter_map(inputs) - finally: - scatter_map = None - - -def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): - """Scatter with support for kwargs dictionary""" - inputs = scatter(inputs, target_gpus, dim) if inputs else [] - kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] - if len(inputs) < len(kwargs): - inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) - elif len(kwargs) < len(inputs): - kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) - inputs = tuple(inputs) - kwargs = tuple(kwargs) - return inputs, kwargs diff --git a/mmdet/ops/__init__.py b/mmdet/ops/__init__.py index 52e5808016cb94e63a7501cef7b1292805eb3491..5b63224c3476ad189445fe2f6ee2b7182aee661a 100644 --- a/mmdet/ops/__init__.py +++ b/mmdet/ops/__init__.py @@ -1,3 +1,5 @@ from .nms import nms, soft_nms from .roi_align import RoIAlign, roi_align from .roi_pool import RoIPool, roi_pool + +__all__ = ['nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool'] diff --git a/mmdet/ops/nms/__init__.py b/mmdet/ops/nms/__init__.py index 1cf8569b97b3a568458428776b1dbd6737882389..c4407041ad733d51eca3006b8aefa82e02bbfcde 100644 --- a/mmdet/ops/nms/__init__.py +++ b/mmdet/ops/nms/__init__.py @@ -1 +1,3 @@ from .nms_wrapper import nms, soft_nms + +__all__ = ['nms', 'soft_nms'] diff --git a/mmdet/ops/roi_align/__init__.py b/mmdet/ops/roi_align/__init__.py index ae27e21d6c78e9ffd8d13e8c71017ef6f365fb5e..4cb037904a24e613c4b15305cdf8ded6c0072a1b 100644 --- a/mmdet/ops/roi_align/__init__.py +++ b/mmdet/ops/roi_align/__init__.py @@ -1,2 +1,4 @@ from .functions.roi_align import roi_align from .modules.roi_align import RoIAlign + +__all__ = ['roi_align', 'RoIAlign'] diff --git a/mmdet/ops/roi_align/gradcheck.py b/mmdet/ops/roi_align/gradcheck.py index e2c51e64bb7b5eba9da3087d83cfa1083f965bbc..394cd69c5064e097becf12752755ee510045193b 100644 --- a/mmdet/ops/roi_align/gradcheck.py +++ b/mmdet/ops/roi_align/gradcheck.py @@ -5,7 +5,7 @@ from torch.autograd import gradcheck import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_align import RoIAlign +from roi_align import RoIAlign # noqa: E402 feat_size = 15 spatial_scale = 1.0 / 8 diff --git a/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/mmdet/ops/roi_align/src/roi_align_cuda.cpp index e4c28c142268d4caf3ff2800dcfe9b24e8e99c66..8551bc5188800e46baf4cf64c6076520fed38581 100644 --- a/mmdet/ops/roi_align/src/roi_align_cuda.cpp +++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp @@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") + AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/roi_align/src/roi_align_kernel.cu b/mmdet/ops/roi_align/src/roi_align_kernel.cu index 31be093c038872ff0b48c79157e5048d25a416cf..341d858de52a0999f7d9598ddb3c2f52d529bf17 100644 --- a/mmdet/ops/roi_align/src/roi_align_kernel.cu +++ b/mmdet/ops/roi_align/src/roi_align_kernel.cu @@ -1,14 +1,10 @@ #include <ATen/ATen.h> +#include <THC/THCAtomics.cuh> -#include <cuda.h> -#include <cuda_runtime.h> +using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848) -#include <math.h> -#include <stdio.h> -#include <vector> - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ i += blockDim.x * gridDim.x) #define THREADS_PER_BLOCK 1024 @@ -28,10 +24,8 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, return 0; } - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; + if (y <= 0) y = 0; + if (x <= 0) x = 0; int y_low = (int)y; int x_low = (int)x; @@ -69,12 +63,13 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, } template <typename scalar_t> -__global__ void -ROIAlignForward(const int nthreads, const scalar_t *bottom_data, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sample_num, const int channels, const int height, - const int width, const int pooled_height, - const int pooled_width, scalar_t *top_data) { +__global__ void ROIAlignForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *bottom_rois, + const scalar_t spatial_scale, + const int sample_num, const int channels, + const int height, const int width, + const int pooled_height, const int pooled_width, + scalar_t *top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the aligned output int pw = index % pooled_width; @@ -101,7 +96,7 @@ ROIAlignForward(const int nthreads, const scalar_t *bottom_data, int sample_num_h = (sample_num > 0) ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 + : ceil(roi_height / pooled_height); // e.g., = 2 int sample_num_w = (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); @@ -137,17 +132,17 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, const int pooled_height, const int pooled_width, at::Tensor output) { const int output_size = num_rois * pooled_height * pooled_width * channels; - AT_DISPATCH_FLOATING_TYPES( + AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.type(), "ROIAlignLaucherForward", ([&] { const scalar_t *bottom_data = features.data<scalar_t>(); const scalar_t *rois_data = rois.data<scalar_t>(); scalar_t *top_data = output.data<scalar_t>(); - ROIAlignForward< - scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - sample_num, channels, height, width, pooled_height, pooled_width, - top_data); + ROIAlignForward<scalar_t> + <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + sample_num, channels, height, width, pooled_height, + pooled_width, top_data); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { @@ -159,11 +154,12 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, } template <typename scalar_t> -__device__ void -bilinear_interpolate_gradient(const int height, const int width, scalar_t y, - scalar_t x, scalar_t &w1, scalar_t &w2, - scalar_t &w3, scalar_t &w4, int &x_low, - int &x_high, int &y_low, int &y_high) { +__device__ void bilinear_interpolate_gradient(const int height, const int width, + scalar_t y, scalar_t x, + scalar_t &w1, scalar_t &w2, + scalar_t &w3, scalar_t &w4, + int &x_low, int &x_high, + int &y_low, int &y_high) { // deal with cases that inverse elements are out of feature map boundary if (y < -1.0 || y > height || x < -1.0 || x > width) { w1 = w2 = w3 = w4 = 0.; @@ -171,10 +167,8 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y, return; } - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; + if (y <= 0) y = 0; + if (x <= 0) x = 0; y_low = (int)y; x_low = (int)x; @@ -204,12 +198,11 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y, } template <typename scalar_t> -__global__ void -ROIAlignBackward(const int nthreads, const scalar_t *top_diff, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sample_num, const int channels, const int height, - const int width, const int pooled_height, - const int pooled_width, scalar_t *bottom_diff) { +__global__ void ROIAlignBackward( + const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, + const scalar_t spatial_scale, const int sample_num, const int channels, + const int height, const int width, const int pooled_height, + const int pooled_width, scalar_t *bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { // (n, c, ph, pw) is an element in the aligned output int pw = index % pooled_width; @@ -239,7 +232,7 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff, int sample_num_h = (sample_num > 0) ? sample_num - : ceil(roi_height / pooled_height); // e.g., = 2 + : ceil(roi_height / pooled_height); // e.g., = 2 int sample_num_w = (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); @@ -279,13 +272,6 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff, } } -template <> -__global__ void ROIAlignBackward<double>( - const int nthreads, const double *top_diff, const double *bottom_rois, - const double spatial_scale, const int sample_num, const int channels, - const int height, const int width, const int pooled_height, - const int pooled_width, double *bottom_diff) {} - int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale, const int sample_num, const int channels, const int height, @@ -294,6 +280,7 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_height * pooled_width * channels; + // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved AT_DISPATCH_FLOATING_TYPES( top_grad.type(), "ROIAlignLaucherBackward", ([&] { const scalar_t *top_diff = top_grad.data<scalar_t>(); @@ -304,10 +291,11 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, exit(-1); } - ROIAlignBackward< - scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( - output_size, top_diff, rois_data, spatial_scale, sample_num, - channels, height, width, pooled_height, pooled_width, bottom_diff); + ROIAlignBackward<scalar_t> + <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, top_diff, rois_data, spatial_scale, sample_num, + channels, height, width, pooled_height, pooled_width, + bottom_diff); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { diff --git a/mmdet/ops/roi_pool/__init__.py b/mmdet/ops/roi_pool/__init__.py index 9c8506d319d3c9c2300860a6c0d64259e43e7916..eb2c57eabd6fa002c970c1f8d199d80d0a9b689c 100644 --- a/mmdet/ops/roi_pool/__init__.py +++ b/mmdet/ops/roi_pool/__init__.py @@ -1,2 +1,4 @@ from .functions.roi_pool import roi_pool from .modules.roi_pool import RoIPool + +__all__ = ['roi_pool', 'RoIPool'] diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py index dfc08b2e138855e913a2ac1f3c365a570aba661d..c39616086a240cf57cf115d4264eb32b9cc9f7c7 100644 --- a/mmdet/ops/roi_pool/gradcheck.py +++ b/mmdet/ops/roi_pool/gradcheck.py @@ -4,7 +4,7 @@ from torch.autograd import gradcheck import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -from roi_pooling import RoIPool +from roi_pool import RoIPool # noqa: E402 feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], diff --git a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp index 799c151d192911f03e446ea9c1ad7bb18fa3b1d1..b05e870600fa80ea4b236bd85c03122ed1f49aba 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp +++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp @@ -16,9 +16,9 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int num_rois, const int pooled_h, const int pooled_w, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") + AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu index c94a9cd78503c19995db88dd71f2b1ce5a36d629..d2cefa662f9ff9c961a261cef621f7f1d0e561fc 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu +++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu @@ -1,14 +1,10 @@ #include <ATen/ATen.h> +#include <THC/THCAtomics.cuh> -#include <cuda.h> -#include <cuda_runtime.h> +using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848) -#include <math.h> -#include <stdio.h> -#include <vector> - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ i += blockDim.x * gridDim.x) #define THREADS_PER_BLOCK 1024 @@ -44,8 +40,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, // force malformed rois to be 1x1 scalar_t roi_w = roi_x2 - roi_x1; scalar_t roi_h = roi_y2 - roi_y1; - if (roi_w <= 0 || roi_h <= 0) - continue; + if (roi_w <= 0 || roi_h <= 0) continue; scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w); scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h); @@ -68,7 +63,8 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, bottom_data += (roi_batch_ind * channels + c) * height * width; // Define an empty pooling region to be zero - scalar_t max_val = is_empty ? 0 : bottom_data[bin_y1 * width + bin_x1] - 1; + scalar_t max_val = is_empty ? static_cast<scalar_t>(0) + : bottom_data[bin_y1 * width + bin_x1] - 1; for (int h = bin_y1; h < bin_y2; ++h) { for (int w = bin_x1; w < bin_x2; ++w) { @@ -80,8 +76,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, } } top_data[index] = max_val; - if (argmax_data != NULL) - argmax_data[index] = max_idx; + if (argmax_data != NULL) argmax_data[index] = max_idx; } } @@ -92,17 +87,18 @@ int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, at::Tensor output, at::Tensor argmax) { const int output_size = num_rois * channels * pooled_h * pooled_w; - AT_DISPATCH_FLOATING_TYPES( + AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.type(), "ROIPoolLaucherForward", ([&] { const scalar_t *bottom_data = features.data<scalar_t>(); const scalar_t *rois_data = rois.data<scalar_t>(); scalar_t *top_data = output.data<scalar_t>(); int *argmax_data = argmax.data<int>(); - ROIPoolForward< - scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( - output_size, bottom_data, rois_data, scalar_t(spatial_scale), - channels, height, width, pooled_h, pooled_w, top_data, argmax_data); + ROIPoolForward<scalar_t> + <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + channels, height, width, pooled_h, pooled_w, top_data, + argmax_data); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { @@ -135,28 +131,6 @@ __global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, } } -template <> -__global__ void -ROIPoolBackward<double>(const int nthreads, const double *top_diff, - const double *rois, const int *argmax_data, - const double spatial_scale, const int channels, - const int height, const int width, const int pooled_h, - const int pooled_w, double *bottom_diff) { - // CUDA_1D_KERNEL_LOOP(index, nthreads) { - // int pw = index % pooled_w; - // int ph = (index / pooled_w) % pooled_h; - // int c = (index / pooled_w / pooled_h) % channels; - // int n = index / pooled_w / pooled_h / channels; - - // int roi_batch_ind = rois[n * 5]; - // int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + - // ph * pooled_w + pw]; - - // *(bottom_diff + (roi_batch_ind * channels + c) * height * width + - // bottom_index) +=top_diff[index]; - // } -} - int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const at::Tensor argmax, const float spatial_scale, const int batch_size, const int channels, @@ -165,6 +139,7 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_w, at::Tensor bottom_grad) { const int output_size = num_rois * pooled_h * pooled_w * channels; + // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved AT_DISPATCH_FLOATING_TYPES( top_grad.type(), "ROIPoolLaucherBackward", ([&] { const scalar_t *top_diff = top_grad.data<scalar_t>(); @@ -177,11 +152,11 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, exit(-1); } - ROIPoolBackward< - scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( - output_size, top_diff, rois_data, argmax_data, - scalar_t(spatial_scale), channels, height, width, pooled_h, - pooled_w, bottom_diff); + ROIPoolBackward<scalar_t> + <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, top_diff, rois_data, argmax_data, + scalar_t(spatial_scale), channels, height, width, pooled_h, + pooled_w, bottom_diff); })); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err) { diff --git a/mmdet/version.py b/mmdet/version.py deleted file mode 100644 index 2b8877c505752cd3aaa805b09b88791d3ca0c9bb..0000000000000000000000000000000000000000 --- a/mmdet/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.5.0' diff --git a/setup.py b/setup.py index 8ed19bd5a810692f308f99617f20fe2e07e86f5a..7cb44e538e3ce611a00135a588ebe37a486e3388 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,6 @@ +import os +import subprocess +import time from setuptools import find_packages, setup @@ -7,34 +10,102 @@ def readme(): return content +MAJOR = 0 +MINOR = 5 +PATCH = 0 +SUFFIX = '' +SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) + +version_file = 'mmdet/version.py' + + +def get_git_hash(): + + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + except OSError: + sha = 'unknown' + + return sha + + +def get_hash(): + if os.path.exists('.git'): + sha = get_git_hash()[:7] + elif os.path.exists(version_file): + try: + from mmdet.version import __version__ + sha = __version__.split('+')[-1] + except ImportError: + raise ImportError('Unable to get git version') + else: + sha = 'unknown' + + return sha + + +def write_version_py(): + content = """# GENERATED VERSION FILE +# TIME: {} + +__version__ = '{}' +short_version = '{}' +""" + sha = get_hash() + VERSION = SHORT_VERSION + '+' + sha + + with open(version_file, 'w') as f: + f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) + + def get_version(): - version_file = 'mmcv/version.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] -setup( - name='mmdet', - version=get_version(), - description='Open MMLab Detection Toolbox', - long_description=readme(), - keywords='computer vision, object detection', - packages=find_packages(), - classifiers=[ - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Topic :: Utilities', - ], - license='GPLv3', - setup_requires=['pytest-runner'], - tests_require=['pytest'], - install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], - zip_safe=False) +if __name__ == '__main__': + write_version_py() + setup( + name='mmdet', + version=get_version(), + description='Open MMLab Detection Toolbox', + long_description=readme(), + keywords='computer vision, object detection', + url='https://github.com/open-mmlab/mmdetection', + packages=find_packages(), + package_data={'mmdet.ops': ['*/*.so']}, + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + ], + license='GPLv3', + setup_requires=['pytest-runner'], + tests_require=['pytest'], + install_requires=[ + 'numpy', 'matplotlib', 'six', 'terminaltables', 'pycocotools' + ], + zip_safe=False) diff --git a/tools/coco_eval.py b/tools/coco_eval.py new file mode 100644 index 0000000000000000000000000000000000000000..65e114ca280578cd41848a631e419d70819a662f --- /dev/null +++ b/tools/coco_eval.py @@ -0,0 +1,28 @@ +from argparse import ArgumentParser + +from mmdet.core import coco_eval + + +def main(): + parser = ArgumentParser(description='COCO Evaluation') + parser.add_argument('result', help='result file path') + parser.add_argument('--ann', help='annotation file path') + parser.add_argument( + '--types', + type=str, + nargs='+', + choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], + default=['bbox'], + help='result types') + parser.add_argument( + '--max-dets', + type=int, + nargs='+', + default=[100, 300, 1000], + help='proposal numbers, only used for recall evaluation') + args = parser.parse_args() + coco_eval(args.result, args.types, args.ann, args.max_dets) + + +if __name__ == '__main__': + main() diff --git a/tools/dist_train.sh b/tools/dist_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..fa68297226b874596a54b9c819f03584008093e6 --- /dev/null +++ b/tools/dist_train.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +PYTHON=${PYTHON:-"python"} + +$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3} diff --git a/tools/eval.py b/tools/eval.py deleted file mode 100644 index 20cc571e94b2fcf228f2d0782cf8a8b16dd3688b..0000000000000000000000000000000000000000 --- a/tools/eval.py +++ /dev/null @@ -1,265 +0,0 @@ -from argparse import ArgumentParser -from multiprocessing import Pool -import matplotlib.pyplot as plt -import numpy as np -import copy -import os - -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - - -def generate_area_range(splitRng=32, stop_size=128): - areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]] - start = 0 - while start < stop_size: - end = start + splitRng - areaRng.append([start * start, end * end]) - start = end - areaRng.append([start * start, 1e5**2]) - return areaRng - - -def print_summarize(iouThr=None, - iouThrs=None, - precision=None, - recall=None, - areaRng_id=4, - areaRngs=None, - maxDets_id=2, - maxDets=None): - assert (precision is not None) or (recall is not None) - iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}' - titleStr = 'Average Precision' if precision is not None else 'Average Recall' - typeStr = '(AP)' if precision is not None else '(AR)' - iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \ - if iouThr is None else '{:0.2f}'.format(iouThr) - - aind = [areaRng_id] - mind = [maxDets_id] - if precision is not None: - # dimension of precision: [TxRxKxAxM] - s = precision - # IoU - if iouThr is not None: - t = np.where(iouThr == iouThrs)[0] - s = s[t] - s = s[:, :, :, aind, mind] - else: - # dimension of recall: [TxKxAxM] - s = recall - if iouThr is not None: - t = np.where(iouThr == iouThrs)[0] - s = s[t] - s = s[:, :, aind, mind] - if len(s[s > -1]) == 0: - mean_s = -1 - else: - mean_s = np.mean(s[s > -1]) - print( - iStr.format( - titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]), - np.sqrt(areaRngs[areaRng_id][1]) - if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max', - maxDets[maxDets_id], mean_s)) - - -def eval_results(res_file, ann_file, res_types, splitRng): - for res_type in res_types: - assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] - - areaRng = generate_area_range(splitRng) - cocoGt = COCO(ann_file) - cocoDt = cocoGt.loadRes(res_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = 'bbox' if res_type == 'proposal' else res_type - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.imgIds = imgIds - if res_type == 'proposal': - cocoEval.params.useCats = 0 - cocoEval.params.maxDets = [100, 300, 1000] - cocoEval.params.areaRng = areaRng - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - ps = cocoEval.eval['precision'] - rc = cocoEval.eval['recall'] - for i in range(len(areaRng)): - print_summarize(None, cocoEval.params.iouThrs, ps, None, i, - areaRng, 2, cocoEval.params.maxDets) - - -def makeplot(rs, ps, outDir, class_name): - cs = np.vstack([ - np.ones((2, 3)), - np.array([.31, .51, .74]), - np.array([.75, .31, .30]), - np.array([.36, .90, .38]), - np.array([.50, .39, .64]), - np.array([1, .6, 0]) - ]) - areaNames = ['all', 'small', 'medium', 'large'] - types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN'] - for i in range(len(areaNames)): - area_ps = ps[..., i, 0] - figure_tile = class_name + '-' + areaNames[i] - aps = [ps_.mean() for ps_ in area_ps] - ps_curve = [ - ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps - ] - ps_curve.insert(0, np.zeros(ps_curve[0].shape)) - fig = plt.figure() - ax = plt.subplot(111) - for k in range(len(types)): - ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5) - ax.fill_between( - rs, - ps_curve[k], - ps_curve[k + 1], - color=cs[k], - label=str('[{:.3f}'.format(aps[k]) + ']' + types[k])) - plt.xlabel('recall') - plt.ylabel('precision') - plt.xlim(0, 1.) - plt.ylim(0, 1.) - plt.title(figure_tile) - plt.legend() - # plt.show() - fig.savefig(outDir + '/{}.png'.format(figure_tile)) - plt.close(fig) - - -def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type): - nm = cocoGt.loadCats(catId)[0] - print('--------------analyzing {}-{}---------------'.format( - k + 1, nm['name'])) - ps_ = {} - dt = copy.deepcopy(cocoDt) - nm = cocoGt.loadCats(catId)[0] - imgIds = cocoGt.getImgIds() - dt_anns = dt.dataset['annotations'] - select_dt_anns = [] - for ann in dt_anns: - if ann['category_id'] == catId: - select_dt_anns.append(ann) - dt.dataset['annotations'] = select_dt_anns - dt.createIndex() - # compute precision but ignore superclass confusion - gt = copy.deepcopy(cocoGt) - child_catIds = gt.getCatIds(supNms=[nm['supercategory']]) - for idx, ann in enumerate(gt.dataset['annotations']): - if (ann['category_id'] in child_catIds - and ann['category_id'] != catId): - gt.dataset['annotations'][idx]['ignore'] = 1 - gt.dataset['annotations'][idx]['iscrowd'] = 1 - gt.dataset['annotations'][idx]['category_id'] = catId - cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.maxDets = [100] - cocoEval.params.iouThrs = [.1] - cocoEval.params.useCats = 1 - cocoEval.evaluate() - cocoEval.accumulate() - ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :] - ps_['ps_supercategory'] = ps_supercategory - # compute precision but ignore any class confusion - gt = copy.deepcopy(cocoGt) - for idx, ann in enumerate(gt.dataset['annotations']): - if ann['category_id'] != catId: - gt.dataset['annotations'][idx]['ignore'] = 1 - gt.dataset['annotations'][idx]['iscrowd'] = 1 - gt.dataset['annotations'][idx]['category_id'] = catId - cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.maxDets = [100] - cocoEval.params.iouThrs = [.1] - cocoEval.params.useCats = 1 - cocoEval.evaluate() - cocoEval.accumulate() - ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :] - ps_['ps_allcategory'] = ps_allcategory - return k, ps_ - - -def analyze_results(res_file, ann_file, res_types, out_dir): - for res_type in res_types: - assert res_type in ['bbox', 'segm'] - - directory = os.path.dirname(out_dir + '/') - if not os.path.exists(directory): - print('-------------create {}-----------------'.format(out_dir)) - os.makedirs(directory) - - cocoGt = COCO(ann_file) - cocoDt = cocoGt.loadRes(res_file) - imgIds = cocoGt.getImgIds() - for res_type in res_types: - iou_type = res_type - cocoEval = COCOeval( - copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type) - cocoEval.params.imgIds = imgIds - cocoEval.params.iouThrs = [.75, .5, .1] - cocoEval.params.maxDets = [100] - cocoEval.evaluate() - cocoEval.accumulate() - ps = cocoEval.eval['precision'] - ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))]) - catIds = cocoGt.getCatIds() - recThrs = cocoEval.params.recThrs - with Pool(processes=48) as pool: - args = [(k, cocoDt, cocoGt, catId, iou_type) - for k, catId in enumerate(catIds)] - analyze_results = pool.starmap(analyze_individual_category, args) - for k, catId in enumerate(catIds): - nm = cocoGt.loadCats(catId)[0] - print('--------------saving {}-{}---------------'.format( - k + 1, nm['name'])) - analyze_result = analyze_results[k] - assert k == analyze_result[0] - ps_supercategory = analyze_result[1]['ps_supercategory'] - ps_allcategory = analyze_result[1]['ps_allcategory'] - # compute precision but ignore superclass confusion - ps[3, :, k, :, :] = ps_supercategory - # compute precision but ignore any class confusion - ps[4, :, k, :, :] = ps_allcategory - # fill in background and false negative errors and plot - ps[ps == -1] = 0 - ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0) - ps[6, :, k, :, :] = 1.0 - makeplot(recThrs, ps[:, :, k], out_dir, nm['name']) - makeplot(recThrs, ps, out_dir, 'all') - - -def main(): - parser = ArgumentParser(description='COCO Evaluation') - parser.add_argument('result', help='result file path') - parser.add_argument( - '--ann', - default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json', - help='annotation file path') - parser.add_argument( - '--types', type=str, nargs='+', default=['bbox'], help='result types') - parser.add_argument( - '--analyze', action='store_true', help='whether to analyze results') - parser.add_argument( - '--out_dir', - type=str, - default=None, - help='dir to save analyze result images') - parser.add_argument( - '--splitRng', - type=int, - default=32, - help='range to split area in evaluation') - args = parser.parse_args() - if not args.analyze: - eval_results(args.result, args.ann, args.types, splitRng=args.splitRng) - else: - assert args.out_dir is not None - analyze_results( - args.result, args.ann, args.types, out_dir=args.out_dir) - - -if __name__ == '__main__': - main() diff --git a/tools/examples/r50_fpn_rpn_1x.py b/tools/examples/r50_fpn_rpn_1x.py deleted file mode 100644 index 45c0a1a6c4649a18346251c8e81f5480f29da30f..0000000000000000000000000000000000000000 --- a/tools/examples/r50_fpn_rpn_1x.py +++ /dev/null @@ -1,95 +0,0 @@ -# model settings -model = dict( - pretrained= - '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', - backbone=dict( - type='resnet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='fb'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - coarsest_stride=32, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - use_sigmoid_cls=True)) -meta_params = dict( - rpn_train_cfg=dict( - pos_fraction=0.5, - pos_balance_sampling=False, - neg_pos_ub=256, - allowed_border=0, - anchor_batch_size=256, - pos_iou_thr=0.7, - neg_iou_thr=0.3, - neg_balance_thr=0, - min_pos_iou=1e-3, - pos_weight=-1, - smoothl1_beta=1 / 9.0, - debug=False), - rpn_test_cfg=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -img_per_gpu = 1 -data_workers = 2 -train_dataset = dict( - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5) -test_dataset = dict( - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - test_mode=True) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) -# learning policy -lr_policy = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=0.333, - step=[8, 11]) -max_epoch = 12 -checkpoint_config = dict(interval=1) -dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') -# logging settings -log_level = 'INFO' -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), - ]) -# yapf:enable -work_dir = './model/r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/tools/test.py b/tools/test.py index 2d062489100f3fc6a579ec811ff0391573f48454..3b1ce2d2e04859fdcce4c977556be89298d1953d 100644 --- a/tools/test.py +++ b/tools/test.py @@ -1,64 +1,92 @@ -import os.path as osp -import sys -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') import argparse -import numpy as np import torch - import mmcv -from mmcv import Config -from mmcv.torchpack import load_checkpoint, parallel_test -from mmdet.core import _data_func, results2json -from mmdet.datasets import CocoDataset -from mmdet.datasets.data_engine import build_data -from mmdet.models import Detector +from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict +from mmcv.parallel import scatter, MMDataParallel + +from mmdet import datasets +from mmdet.core import results2json, coco_eval +from mmdet.datasets import collate, build_dataloader +from mmdet.models import build_detector, detectors + + +def single_test(model, data_loader, show=False): + model.eval() + results = [] + prog_bar = mmcv.ProgressBar(len(data_loader.dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(**data, return_loss=False, rescale=not show) + results.append(result) + + if show: + model.module.show_result(data, result, + data_loader.dataset.img_norm_cfg) + + batch_size = data['img'][0].size(0) + for _ in range(batch_size): + prog_bar.update() + return results + + +def _data_func(data, device_id): + data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] + return dict(**data, return_loss=False, rescale=True) def parse_args(): parser = argparse.ArgumentParser(description='MMDet test detector') parser.add_argument('config', help='test config file path') parser.add_argument('checkpoint', help='checkpoint file') - parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--gpus', default=1, type=int) parser.add_argument('--out', help='output result file') parser.add_argument( - '--out_json', action='store_true', help='get json output file') + '--eval', + type=str, + nargs='+', + choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'], + help='eval types') + parser.add_argument('--show', action='store_true', help='show results') args = parser.parse_args() return args -args = parse_args() +def main(): + args = parse_args() + cfg = mmcv.Config.fromfile(args.config) + cfg.model.pretrained = None + cfg.data.test.test_mode = True -def main(): - cfg = Config.fromfile(args.config) - cfg.model['pretrained'] = None - # TODO this img_per_gpu - cfg.img_per_gpu == 1 - - if args.world_size == 1: - # TODO verify this part - args.dist = False - args.img_per_gpu = cfg.img_per_gpu - args.data_workers = cfg.data_workers - model = Detector(**cfg.model, **meta_params) + dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) + if args.gpus == 1: + model = build_detector( + cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint) - test_loader = build_data(cfg.test_dataset, args) - model = torch.nn.DataParallel(model, device_ids=0) - # TODO write single_test - outputs = single_test(test_loader, model) + model = MMDataParallel(model, device_ids=[0]) + + data_loader = build_dataloader( + dataset, + imgs_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + num_gpus=1, + dist=False, + shuffle=False) + outputs = single_test(model, data_loader, args.show) else: - test_dataset = CocoDataset(**cfg.test_dataset) - model = dict(cfg.model, **cfg.meta_params) - outputs = parallel_test(Detector, model, - args.checkpoint, test_dataset, _data_func, - range(args.world_size)) + model_args = cfg.model.copy() + model_args.update(train_cfg=None, test_cfg=cfg.test_cfg) + model_type = getattr(detectors, model_args.pop('type')) + outputs = parallel_test(model_type, model_args, args.checkpoint, + dataset, _data_func, range(args.gpus)) if args.out: - mmcv.dump(outputs, args.out, protocol=4) - if args.out_json: - results2json(test_dataset, outputs, args.out + '.json') + mmcv.dump(outputs, args.out) + if args.eval: + json_file = args.out + '.json' + results2json(dataset, outputs, json_file) + coco_eval(json_file, args.eval, dataset.coco) if __name__ == '__main__': diff --git a/tools/train.py b/tools/train.py index 0cb2450acf511715c716594e37b0968876aad683..237ec2b21f58bdbda27339844bfdf0501700b8ca 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,85 +1,157 @@ from __future__ import division + import argparse -import sys -import os.path as osp -sys.path.append(osp.abspath(osp.join(__file__, '../../'))) -sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') +import logging +import random +from collections import OrderedDict +import numpy as np import torch -import torch.multiprocessing as mp from mmcv import Config -from mmcv.torchpack import Runner -from mmdet.core import (batch_processor, init_dist, broadcast_params, - DistOptimizerStepperHook, DistSamplerSeedHook) -from mmdet.datasets.data_engine import build_data -from mmdet.models import Detector -from mmdet.nn.parallel import MMDataParallel +from mmcv.runner import Runner, obj_from_dict, DistSamplerSeedHook +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel + +from mmdet import datasets, __version__ +from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook, + CocoDistEvalmAPHook) +from mmdet.datasets import build_dataloader +from mmdet.models import build_detector, RPN + + +def parse_losses(losses): + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + '{} is not a tensor or list of tensors'.format(loss_name)) + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + + log_vars['loss'] = loss + for name in log_vars: + log_vars[name] = log_vars[name].item() + + return loss, log_vars + + +def batch_processor(model, data, train_mode): + losses = model(**data) + loss, log_vars = parse_losses(losses) + + outputs = dict( + loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) + + return outputs + + +def get_logger(log_level): + logging.basicConfig( + format='%(asctime)s - %(levelname)s - %(message)s', level=log_level) + logger = logging.getLogger() + return logger + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) def parse_args(): - parser = argparse.ArgumentParser(description='MMDet train val detector') + parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') - parser.add_argument('--validate', action='store_true', help='validate') + parser.add_argument('--work_dir', help='the dir to save logs and models') + parser.add_argument( + '--validate', + action='store_true', + help='whether to add a validate phase') parser.add_argument( - '--dist', action='store_true', help='distributed training or not') - parser.add_argument('--world_size', default=1, type=int) - parser.add_argument('--rank', default=0, type=int) + '--gpus', type=int, default=1, help='number of gpus to use') + parser.add_argument('--seed', type=int, help='random seed') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() return args -args = parse_args() - - def main(): - # Enable distributed training or not - if args.dist: - print('Enable distributed training.') - mp.set_start_method("spawn", force=True) - init_dist( - args.world_size, - args.rank, - **cfg.dist_params) - else: - print('Disabled distributed training.') + args = parse_args() - # Fetch config information cfg = Config.fromfile(args.config) - # TODO more flexible - args.img_per_gpu = cfg.img_per_gpu - args.data_workers = cfg.data_workers + if args.work_dir is not None: + cfg.work_dir = args.work_dir + cfg.gpus = args.gpus + # save mmdet version in checkpoint as meta data + cfg.checkpoint_config.meta = dict( + mmdet_version=__version__, config=cfg.text) + + logger = get_logger(cfg.log_level) - # prepare training loader - train_loader = [build_data(cfg.train_dataset, args)] - if args.validate: - val_loader = build_data(cfg.val_dataset, args) - train_loader.append(val_loader) + # set random seed if specified + if args.seed is not None: + logger.info('Set random seed to {}'.format(args.seed)) + set_random_seed(args.seed) + + # init distributed environment if necessary + if args.launcher == 'none': + dist = False + logger.info('Non-distributed training.') + else: + dist = True + init_dist(args.launcher, **cfg.dist_params) + if torch.distributed.get_rank() != 0: + logger.setLevel('ERROR') + logger.info('Distributed training.') + + # prepare data loaders + train_dataset = obj_from_dict(cfg.data.train, datasets) + data_loaders = [ + build_dataloader(train_dataset, cfg.data.imgs_per_gpu, + cfg.data.workers_per_gpu, cfg.gpus, dist) + ] # build model - model = Detector(**cfg.model, **cfg.meta_params) - if args.dist: - model = model.cuda() - broadcast_params(model) + model = build_detector( + cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) + if dist: + model = MMDistributedDataParallel(model.cuda()) else: - device_ids = args.rank % torch.cuda.device_count() - model = MMDataParallel(model, device_ids=device_ids).cuda() + model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() - # register hooks + # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) - optimizer_stepper = DistOptimizerStepperHook( - **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config - runner.register_training_hooks(cfg.lr_policy, optimizer_stepper, + + # register hooks + optimizer_config = DistOptimizerHook( + **cfg.optimizer_config) if dist else cfg.optimizer_config + runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) - if args.dist: + if dist: runner.register_hook(DistSamplerSeedHook()) + # register eval hooks + if args.validate: + if isinstance(model.module, RPN): + runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) + elif cfg.data.val.type == 'CocoDataset': + runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) + if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) - runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs) -if __name__ == "__main__": +if __name__ == '__main__': main()