diff --git a/TDL.md b/TDL.md new file mode 100644 index 0000000000000000000000000000000000000000..1679338c04733a1a23e5dc2e8ac96069c6b3c41e --- /dev/null +++ b/TDL.md @@ -0,0 +1,29 @@ +### MMCV +- [ ] Implement the attr 'get' of 'Config' +- [ ] Config bugs: None type to '{}' with addict +- [ ] Default logger should be only with gpu0 +- [ ] Unit Test: mmcv and mmcv.torchpack + + +### MMDetection + +#### Basic +- [ ] Implement training function without distributed +- [ ] Verify nccl/nccl2/gloo +- [ ] Replace UGLY code: params plug in 'args' to reach a global flow +- [ ] Replace 'print' by 'logger' + + +#### Testing +- [ ] Implement distributed testing +- [ ] Implement single gpu testing + + +#### Refactor +- [ ] Re-consider params names +- [ ] Refactor functions in 'core' +- [ ] Merge single test & aug test as one function, so as other redundancy + +#### New features +- [ ] Plug loss params into Config +- [ ] Multi-head communication diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py index 7992d8deb3ba0f6586c1bef0705f33a41a78d917..52ed690e6689abdd1dcc4af6ccb237f1d3fbdad9 100644 --- a/mmdet/core/__init__.py +++ b/mmdet/core/__init__.py @@ -1,6 +1,9 @@ -from .anchor_generator import * +from .train_engine import * +from .test_engine import * +from .rpn_ops import * from .bbox_ops import * from .mask_ops import * +from .losses import * from .eval import * -from .nn import * -from .targets import * +from .post_processing import * +from .utils import * diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py index 4bf9aeb74a5db787f687bacf0147ae1e2b1054bf..dbdbb970648bcac1ced61096b436ef9966266c1f 100644 --- a/mmdet/core/bbox_ops/__init__.py +++ b/mmdet/core/bbox_ops/__init__.py @@ -1,12 +1,16 @@ from .geometry import bbox_overlaps from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, - bbox_sampling, sample_positives, sample_negatives) + bbox_sampling, sample_positives, sample_negatives, + sample_proposals) from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, - bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox) + bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox, + bbox2result) +from .bbox_target import bbox_target __all__ = [ 'bbox_overlaps', 'random_choice', 'bbox_assign', 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', - 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox' + 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', + 'bbox_target', 'sample_proposals' ] diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox_ops/bbox_target.py new file mode 100644 index 0000000000000000000000000000000000000000..ce1f885e184a37779c7636f8c6053248e8cd3330 --- /dev/null +++ b/mmdet/core/bbox_ops/bbox_target.py @@ -0,0 +1,87 @@ +import mmcv +import torch + +from .geometry import bbox_overlaps +from .transforms import bbox_transform, bbox_transform_inv + + +def bbox_target(pos_proposals_list, + neg_proposals_list, + pos_gt_bboxes_list, + pos_gt_labels_list, + cfg, + reg_num_classes=1, + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + return_list=False): + img_per_gpu = len(pos_proposals_list) + all_labels = [] + all_label_weights = [] + all_bbox_targets = [] + all_bbox_weights = [] + for img_id in range(img_per_gpu): + pos_proposals = pos_proposals_list[img_id] + neg_proposals = neg_proposals_list[img_id] + pos_gt_bboxes = pos_gt_bboxes_list[img_id] + pos_gt_labels = pos_gt_labels_list[img_id] + debug_img = debug_imgs[img_id] if cfg.debug else None + labels, label_weights, bbox_targets, bbox_weights = proposal_target_single( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + reg_num_classes, cfg, target_means, target_stds) + all_labels.append(labels) + all_label_weights.append(label_weights) + all_bbox_targets.append(bbox_targets) + all_bbox_weights.append(bbox_weights) + + if return_list: + return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights + + labels = torch.cat(all_labels, 0) + label_weights = torch.cat(all_label_weights, 0) + bbox_targets = torch.cat(all_bbox_targets, 0) + bbox_weights = torch.cat(all_bbox_weights, 0) + return labels, label_weights, bbox_targets, bbox_weights + + +def proposal_target_single(pos_proposals, + neg_proposals, + pos_gt_bboxes, + pos_gt_labels, + reg_num_classes, + cfg, + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]): + num_pos = pos_proposals.size(0) + num_neg = neg_proposals.size(0) + num_samples = num_pos + num_neg + labels = pos_proposals.new_zeros(num_samples, dtype=torch.long) + label_weights = pos_proposals.new_zeros(num_samples) + bbox_targets = pos_proposals.new_zeros(num_samples, 4) + bbox_weights = pos_proposals.new_zeros(num_samples, 4) + if num_pos > 0: + labels[:num_pos] = pos_gt_labels + pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight + label_weights[:num_pos] = pos_weight + pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes, + target_means, target_stds) + bbox_targets[:num_pos, :] = pos_bbox_targets + bbox_weights[:num_pos, :] = 1 + if num_neg > 0: + label_weights[-num_neg:] = 1.0 + if reg_num_classes > 1: + bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, + labels, reg_num_classes) + + return labels, label_weights, bbox_targets, bbox_weights + + +def expand_target(bbox_targets, bbox_weights, labels, num_classes): + bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), + 4 * num_classes)) + bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), + 4 * num_classes)) + for i in torch.nonzero(labels > 0).squeeze(-1): + start, end = labels[i] * 4, (labels[i] + 1) * 4 + bbox_targets_expand[i, start:end] = bbox_targets[i, :] + bbox_weights_expand[i, start:end] = bbox_weights[i, :] + return bbox_targets_expand, bbox_weights_expand diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py index 9825e3bd15ec87dc6bc9c31be4b2f11422fcda13..eed820496409f1f8265f73e81bd4667e6b1558f8 100644 --- a/mmdet/core/bbox_ops/sampling.py +++ b/mmdet/core/bbox_ops/sampling.py @@ -42,7 +42,7 @@ def bbox_assign(proposals, min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, for RPN, it is usually set as 0, for Fast R-CNN, it is usually set as pos_iou_thr - crowd_thr: ignore proposals which have iof(intersection over foreground) with + crowd_thr: ignore proposals which have iof(intersection over foreground) with crowd bboxes over crowd_thr Returns: tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) @@ -253,3 +253,43 @@ def bbox_sampling(assigned_gt_inds, max_overlaps, neg_balance_thr, neg_hard_fraction) return pos_inds, neg_inds + + + +def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, + gt_labels_list, cfg): + cfg_list = [cfg for _ in range(len(proposals_list))] + results = map(sample_proposals_single, proposals_list, gt_bboxes_list, + gt_crowds_list, gt_labels_list, cfg_list) + # list of tuple to tuple of list + return tuple(map(list, zip(*results))) + + +def sample_proposals_single(proposals, + gt_bboxes, + gt_crowds, + gt_labels, + cfg): + proposals = proposals[:, :4] + assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ + bbox_assign( + proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr, + cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) + if cfg.add_gt_as_proposals: + proposals = torch.cat([gt_bboxes, proposals], dim=0) + gt_assign_self = torch.arange( + 1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device) + assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) + assigned_labels = torch.cat([gt_labels, assigned_labels]) + + pos_inds, neg_inds = bbox_sampling( + assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) + pos_proposals = proposals[pos_inds] + neg_proposals = proposals[neg_inds] + pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 + pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] + pos_gt_labels = assigned_labels[pos_inds] + + return (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py index 6f83a1dc56efdc214fe96c60b9a587a1cb81602b..a9f1e2a45fab42652189e84f42aadc2e5f7a8994 100644 --- a/mmdet/core/bbox_ops/transforms.py +++ b/mmdet/core/bbox_ops/transforms.py @@ -126,3 +126,22 @@ def roi2bbox(rois): bbox = rois[inds, 1:] bbox_list.append(bbox) return bbox_list + + +def bbox2result(bboxes, labels, num_classes): + """Convert detection results to a list of numpy arrays + Args: + bboxes (Tensor): shape (n, 5) + labels (Tensor): shape (n, ) + num_classes (int): class number, including background class + Returns: + list(ndarray): bbox results of each class + """ + if bboxes.shape[0] == 0: + return [ + np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1) + ] + else: + bboxes = bboxes.cpu().numpy() + labels = labels.cpu().numpy() + return [bboxes[labels == i, :] for i in range(num_classes - 1)] diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4447ff0a6c708e9407bc47698a6281e8c81216 --- /dev/null +++ b/mmdet/core/losses/__init__.py @@ -0,0 +1,12 @@ +from .losses import ( + weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, + sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy, + weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy) + +__all__ = [ + 'weighted_nll_loss', 'weighted_cross_entropy', + 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', + 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', + 'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', + 'accuracy' +] diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..575c91d053650acbde927f49e0c474e5fd325e77 --- /dev/null +++ b/mmdet/core/losses/losses.py @@ -0,0 +1,110 @@ +# TODO merge naive and weighted loss to one function. +import torch +import torch.nn.functional as F + +from ..bbox_ops import bbox_transform_inv, bbox_overlaps + + +def weighted_nll_loss(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.nll_loss(pred, label, size_average=False, reduce=False) + return torch.sum(raw * weight)[None] / ave_factor + + +def weighted_cross_entropy(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + raw = F.cross_entropy(pred, label, size_average=False, reduce=False) + return torch.sum(raw * weight)[None] / ave_factor + + +def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None): + if ave_factor is None: + ave_factor = max(torch.sum(weight > 0).float().item(), 1.) + return F.binary_cross_entropy_with_logits( + pred, label.float(), weight.float(), + size_average=False)[None] / ave_factor + + +def sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + size_average=True): + pred_sigmoid = pred.sigmoid() + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + weight = (alpha * target + (1 - alpha) * (1 - target)) * weight + weight = weight * pt.pow(gamma) + return F.binary_cross_entropy_with_logits( + pred, target, weight, size_average=size_average) + + +def weighted_sigmoid_focal_loss(pred, + target, + weight, + gamma=2.0, + alpha=0.25, + ave_factor=None, + num_classes=80): + if ave_factor is None: + ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 + return sigmoid_focal_loss( + pred, target, weight, gamma=gamma, alpha=alpha, + size_average=False)[None] / ave_factor + + +def mask_cross_entropy(pred, target, label): + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, size_average=True)[None] + + +def weighted_mask_cross_entropy(pred, target, weight, label): + num_rois = pred.size()[0] + num_samples = torch.sum(weight > 0).float().item() + 1e-6 + assert num_samples >= 1 + inds = torch.arange(0, num_rois).long().cuda() + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight, size_average=False)[None] / num_samples + + +def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True): + assert beta > 0 + assert pred.size() == target.size() and target.numel() > 0 + diff = torch.abs(pred - target) + loss = torch.where(diff < beta, 0.5 * diff * diff / beta, + diff - 0.5 * beta) + if size_average: + loss /= pred.numel() + if reduce: + loss = loss.sum() + return loss + + +def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None): + if ave_factor is None: + ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 + loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False) + return torch.sum(loss * weight)[None] / ave_factor + + +def accuracy(pred, target, topk=1): + if isinstance(topk, int): + topk = (topk, ) + return_single = True + + maxk = max(topk) + _, pred_label = pred.topk(maxk, 1, True, True) + pred_label = pred_label.t() + correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / pred.size(0))) + return res[0] if return_single else res diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py index 25850cdc62ae69271f3788288d960b86ef179452..4669ba1f9102cbcabe20c48ea193408c1e12e4aa 100644 --- a/mmdet/core/mask_ops/__init__.py +++ b/mmdet/core/mask_ops/__init__.py @@ -2,9 +2,10 @@ from .segms import (flip_segms, polys_to_mask, mask_to_bbox, polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, rle_mask_nms, rle_masks_to_boxes) from .utils import split_combined_gt_polys +from .mask_target import mask_target __all__ = [ 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', - 'split_combined_gt_polys' + 'split_combined_gt_polys', 'mask_target' ] diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py new file mode 100644 index 0000000000000000000000000000000000000000..3fb65e3587473b60c4fd25b075072b9a3bb4670c --- /dev/null +++ b/mmdet/core/mask_ops/mask_target.py @@ -0,0 +1,35 @@ +import torch +import numpy as np + +from .segms import polys_to_mask_wrt_box + + +def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list, + img_meta, cfg): + cfg_list = [cfg for _ in range(len(pos_proposals_list))] + img_metas = [img_meta for _ in range(len(pos_proposals_list))] + mask_targets = map(mask_target_single, pos_proposals_list, + pos_assigned_gt_inds_list, gt_polys_list, img_metas, + cfg_list) + mask_targets = torch.cat(tuple(mask_targets), dim=0) + return mask_targets + + +def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys, + img_meta, cfg): + + mask_size = cfg.mask_size + num_pos = pos_proposals.size(0) + mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) + if num_pos > 0: + pos_proposals = pos_proposals.cpu().numpy() + pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() + scale_factor = img_meta['scale_factor'][0].cpu().numpy() + for i in range(num_pos): + bbox = pos_proposals[i, :] / scale_factor + polys = gt_polys[pos_assigned_gt_inds[i]] + mask = polys_to_mask_wrt_box(polys, bbox, mask_size) + mask = np.array(mask > 0, dtype=np.float32) + mask_targets[i, ...] = torch.from_numpy(mask).to( + mask_targets.device) + return mask_targets diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py index 5d56e481e5aee2ce113cea7adcb11ebe0aaede5b..35dfce24f91b4a6260476a3f77b67471c88e4bc7 100644 --- a/mmdet/core/post_processing/merge_augs.py +++ b/mmdet/core/post_processing/merge_augs.py @@ -1,6 +1,6 @@ import torch -from mmcv.ops import nms +from mmdet.ops import nms import numpy as np from ..bbox_ops import bbox_mapping_back diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/rpn_ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d5f9244dde2b244bbe42d54640e8a648277c506 --- /dev/null +++ b/mmdet/core/rpn_ops/__init__.py @@ -0,0 +1,2 @@ +from .anchor_generator import * +from .anchor_target import * diff --git a/mmdet/core/anchor_generator.py b/mmdet/core/rpn_ops/anchor_generator.py similarity index 100% rename from mmdet/core/anchor_generator.py rename to mmdet/core/rpn_ops/anchor_generator.py diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py new file mode 100644 index 0000000000000000000000000000000000000000..a6bba8ed221db022fb95590c6b10a56c8b6d4553 --- /dev/null +++ b/mmdet/core/rpn_ops/anchor_target.py @@ -0,0 +1,159 @@ +import torch +import numpy as np +from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling) + + +def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list, + img_shapes, target_means, target_stds, cfg): + """Compute anchor regression and classification targets + + Args: + anchor_list(list): anchors of each feature map level + featuremap_sizes(list): feature map sizes + gt_bboxes_list(list): ground truth bbox of images in a mini-batch + img_shapes(list): shape of each image in a mini-batch + cfg(dict): configs + + Returns: + tuple + """ + if len(featmap_sizes) == len(anchor_list): + all_anchors = torch.cat(anchor_list, 0) + anchor_nums = [anchors.size(0) for anchors in anchor_list] + use_isomerism_anchors = False + elif len(img_shapes) == len(anchor_list): + # using different anchors for different images + all_anchors_list = [ + torch.cat(anchor_list[img_id], 0) + for img_id in range(len(img_shapes)) + ] + anchor_nums = [anchors.size(0) for anchors in anchor_list[0]] + use_isomerism_anchors = True + else: + raise ValueError('length of anchor_list should be equal to number of ' + 'feature lvls or number of images in a batch') + all_labels = [] + all_label_weights = [] + all_bbox_targets = [] + all_bbox_weights = [] + num_total_sampled = 0 + for img_id in range(len(img_shapes)): + if isinstance(valid_flag_list[img_id], list): + valid_flags = torch.cat(valid_flag_list[img_id], 0) + else: + valid_flags = valid_flag_list[img_id] + if use_isomerism_anchors: + all_anchors = all_anchors_list[img_id] + inside_flags = anchor_inside_flags(all_anchors, valid_flags, + img_shapes[img_id][:2], + cfg.allowed_border) + if not inside_flags.any(): + return None + gt_bboxes = gt_bboxes_list[img_id] + anchor_targets = anchor_target_single(all_anchors, inside_flags, + gt_bboxes, target_means, + target_stds, cfg) + (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds) = anchor_targets + all_labels.append(labels) + all_label_weights.append(label_weights) + all_bbox_targets.append(bbox_targets) + all_bbox_weights.append(bbox_weights) + num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1) + all_labels = torch.stack(all_labels, 0) + all_label_weights = torch.stack(all_label_weights, 0) + all_bbox_targets = torch.stack(all_bbox_targets, 0) + all_bbox_weights = torch.stack(all_bbox_weights, 0) + # split into different feature levels + labels_list = [] + label_weights_list = [] + bbox_targets_list = [] + bbox_weights_list = [] + start = 0 + for anchor_num in anchor_nums: + end = start + anchor_num + labels_list.append(all_labels[:, start:end].squeeze(0)) + label_weights_list.append(all_label_weights[:, start:end].squeeze(0)) + bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0)) + bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0)) + start = end + return (labels_list, label_weights_list, bbox_targets_list, + bbox_weights_list, num_total_sampled) + + +def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, + target_stds, cfg): + num_total_anchors = all_anchors.size(0) + anchors = all_anchors[inside_flags, :] + assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( + anchors, + gt_bboxes, + pos_iou_thr=cfg.pos_iou_thr, + neg_iou_thr=cfg.neg_iou_thr, + min_pos_iou=cfg.min_pos_iou) + pos_inds, neg_inds = bbox_sampling(assigned_gt_inds, cfg.anchor_batch_size, + cfg.pos_fraction, cfg.neg_pos_ub, + cfg.pos_balance_sampling, max_overlaps, + cfg.neg_balance_thr) + + bbox_targets = torch.zeros_like(anchors) + bbox_weights = torch.zeros_like(anchors) + labels = torch.zeros_like(assigned_gt_inds) + label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float) + + if len(pos_inds) > 0: + pos_inds = unique(pos_inds) + pos_anchors = anchors[pos_inds, :] + pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] + pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, + target_means, target_stds) + bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_weights[pos_inds, :] = 1.0 + labels[pos_inds] = 1 + if cfg.pos_weight <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = cfg.pos_weight + if len(neg_inds) > 0: + neg_inds = unique(neg_inds) + label_weights[neg_inds] = 1.0 + + # map up to original set of anchors + labels = unmap(labels, num_total_anchors, inside_flags) + label_weights = unmap(label_weights, num_total_anchors, inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) + + return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds) + +def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0): + img_h, img_w = img_shape.float() + if allowed_border >= 0: + inside_flags = valid_flags & \ + (all_anchors[:, 0] >= -allowed_border) & \ + (all_anchors[:, 1] >= -allowed_border) & \ + (all_anchors[:, 2] < img_w + allowed_border) & \ + (all_anchors[:, 3] < img_h + allowed_border) + else: + inside_flags = valid_flags + return inside_flags + +def unique(tensor): + if tensor.is_cuda: + u_tensor = np.unique(tensor.cpu().numpy()) + return tensor.new_tensor(u_tensor) + else: + return torch.unique(tensor) + +def unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if data.dim() == 1: + ret = data.new_full((count, ), fill) + ret[inds] = data + else: + new_size = (count, ) + data.size()[1:] + ret = data.new_full(new_size, fill) + ret[inds, :] = data + return ret diff --git a/mmdet/core/targets/__init__.py b/mmdet/core/targets/__init__.py deleted file mode 100644 index b3b2567efff687ba503b8a37d9f096597a0c8780..0000000000000000000000000000000000000000 --- a/mmdet/core/targets/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .anchor_target import anchor_target -from .bbox_target import bbox_target -from .mask_target import mask_target - -__all__ = ['anchor_target', 'bbox_target', 'mask_target'] diff --git a/mmdet/core/targets/anchor_target.py b/mmdet/core/targets/anchor_target.py deleted file mode 100644 index ec2389f90885da0c92f0598dc6d45f59c0ab6dac..0000000000000000000000000000000000000000 --- a/mmdet/core/targets/anchor_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def anchor_target(): - pass diff --git a/mmdet/core/targets/bbox_target.py b/mmdet/core/targets/bbox_target.py deleted file mode 100644 index 49642c2298735b163b98ad832a3a6a9ee9941c45..0000000000000000000000000000000000000000 --- a/mmdet/core/targets/bbox_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def bbox_target(): - pass diff --git a/mmdet/core/targets/mask_target.py b/mmdet/core/targets/mask_target.py deleted file mode 100644 index 4c330e13b81e8cb27e35a8705e2e89b00792ddaa..0000000000000000000000000000000000000000 --- a/mmdet/core/targets/mask_target.py +++ /dev/null @@ -1,2 +0,0 @@ -def mask_target(): - pass diff --git a/mmdet/core/test_engine.py b/mmdet/core/test_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..4825beda640c443b5d8aab0daf5c30838be4364b --- /dev/null +++ b/mmdet/core/test_engine.py @@ -0,0 +1,14 @@ +from mmdet.datasets import collate +from mmdet.nn.parallel import scatter + +__all__ = ['_data_func'] + +def _data_func(data, gpu_id): + imgs, img_metas = tuple( + scatter(collate([data], samples_per_gpu=1), [gpu_id])[0]) + return dict( + img=imgs, + img_meta=img_metas, + return_loss=False, + return_bboxes=True, + rescale=True) diff --git a/mmdet/core/train_engine.py b/mmdet/core/train_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..cc745faad87cb2a97272934902822666be55d71f --- /dev/null +++ b/mmdet/core/train_engine.py @@ -0,0 +1,40 @@ +import numpy as np +import torch +from collections import OrderedDict +from mmdet.nn.parallel import scatter + + +def parse_losses(losses): + log_vars = OrderedDict() + for loss_key, loss_value in losses.items(): + if isinstance(loss_value, dict): + for _key, _value in loss_value.items(): + if isinstance(_value, list): + _value = sum([_loss.mean() for _loss in _value]) + else: + _value = _value.mean() + log_vars[_keys] = _value + elif isinstance(loss_value, list): + log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value) + else: + log_vars[loss_key] = loss_value.mean() + + loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) + log_vars['loss'] = loss + for _key, _value in log_vars.items(): + log_vars[_key] = _value.item() + + return loss, log_vars + + +def batch_processor(model, data, train_mode, args=None): + data = scatter(data, [torch.cuda.current_device()])[0] + losses = model(**data) + loss, log_vars = parse_losses(losses) + + outputs = dict( + loss=loss / args.world_size, + log_vars=log_vars, + num_samples=len(data['img'].data)) + + return outputs diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b6e79d62e60b5e1efaac985e039b36840f86397 --- /dev/null +++ b/mmdet/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .dist_utils import * +from .hooks import * +from .misc import * diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..47279c7bf8fd3b0ed66c3099f465b0130c864a23 --- /dev/null +++ b/mmdet/core/utils/dist_utils.py @@ -0,0 +1,60 @@ +import os +import torch +import torch.multiprocessing as mp +import torch.distributed as dist +from torch.nn.utils import clip_grad +from mmcv.torchpack import Hook, OptimizerStepperHook + +__all__ = [ + 'init_dist', 'average_gradients', 'broadcast_params', + 'DistOptimizerStepperHook', 'DistSamplerSeedHook' +] + + +def init_dist(world_size, + rank, + backend='gloo', + master_ip='127.0.0.1', + port=29500): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + os.environ['MASTER_ADDR'] = master_ip + os.environ['MASTER_PORT'] = str(port) + if backend == 'nccl': + dist.init_process_group(backend='nccl') + else: + dist.init_process_group( + backend='gloo', rank=rank, world_size=world_size) + + +def average_gradients(model): + for param in model.parameters(): + if param.requires_grad and not (param.grad is None): + dist.all_reduce(param.grad.data) + + +def broadcast_params(model): + for p in model.state_dict().values(): + dist.broadcast(p, 0) + + +class DistOptimizerStepperHook(OptimizerStepperHook): + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + average_gradients(runner.model) + if self.grad_clip: + clip_grad.clip_grad_norm_( + filter(lambda p: p.requires_grad, runner.model.parameters()), + max_norm=self.max_norm, + norm_type=self.norm_type) + runner.optimizer.step() + + +class DistSamplerSeedHook(Hook): + + def before_epoch(self, runner): + runner.data_loader.sampler.set_epoch(runner.epoch) diff --git a/mmdet/core/hooks.py b/mmdet/core/utils/hooks.py similarity index 96% rename from mmdet/core/hooks.py rename to mmdet/core/utils/hooks.py index 3347639d51ac19d5072bcb0a2e76c7747d686c77..f97e1fb29061ad5f07fa06907fbec72ede7a3bf3 100644 --- a/mmdet/core/hooks.py +++ b/mmdet/core/utils/hooks.py @@ -7,10 +7,16 @@ import mmcv import numpy as np import torch from mmcv.torchpack import Hook -from mmdet import collate, scatter +from mmdet.datasets import collate +from mmdet.nn.parallel import scatter from pycocotools.cocoeval import COCOeval -from .eval import eval_recalls +from ..eval import eval_recalls + +__all__ = [ + 'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook', + 'CocoDistEvalmAPHook' +] class EmptyCacheHook(Hook): @@ -237,10 +243,3 @@ class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): runner.log_buffer.output[field] = cocoEval.stats[0] runner.log_buffer.ready = True os.remove(tmp_file) - - -class CocoDistCascadeEvalmAPHook(CocoDistEvalmAPHook): - - def evaluate(self, runner, results): - results = [res[-1] for res in results] - super(CocoDistCascadeEvalmAPHook, self).evaluate(runner, results) diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9c05e4577f23125fad0f0714a8f1089e82dbee --- /dev/null +++ b/mmdet/core/utils/misc.py @@ -0,0 +1,118 @@ +import subprocess + +import mmcv +import numpy as np +import torch + +__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json'] + + +def tensor2imgs(tensor, + color_order='RGB', + color_mean=(0.485, 0.456, 0.406), + color_std=(0.229, 0.224, 0.225)): + assert color_order in ['RGB', 'BGR'] + img_per_gpu = tensor.size(0) + color_mean = np.array(color_mean, dtype=np.float32) + color_std = np.array(color_std, dtype=np.float32) + imgs = [] + for img_id in range(img_per_gpu): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + if color_order == 'RGB': + img = mmcv.rgb2bgr(img) + img = img * color_std + color_mean + imgs.append(np.ascontiguousarray(img)) + return imgs + + +def unique(tensor): + if tensor.is_cuda: + u_tensor = np.unique(tensor.cpu().numpy()) + return tensor.new_tensor(u_tensor) + else: + return torch.unique(tensor) + + +def unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if data.dim() == 1: + ret = data.new_full((count, ), fill) + ret[inds] = data + else: + new_size = (count, ) + data.size()[1:] + ret = data.new_full(new_size, fill) + ret[inds, :] = data + return ret + +def xyxy2xywh(bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + +def det2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + +def segm2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + +def proposal2json(dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + +def results2json(dataset, results, out_file): + if isinstance(results[0], list): + json_results = det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file) diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index e0705e79b6168c2ccf45610af3609013082ddb48..a7eedca6a2cbab92e069415513def5ab363dc824 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -71,7 +71,6 @@ def parse_ann_info(ann_info, cat2label, with_mask=True): class CocoDataset(Dataset): - def __init__(self, ann_file, img_prefix, @@ -253,31 +252,38 @@ class CocoDataset(Dataset): def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" - img_info = self._load_info(idx, with_ann=False) - img_file = osp.join(self.prefix, img_info['file_name']) + img_info = self.img_infos[idx] + img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name'])) proposal = (self.proposals[idx][:, :4] if self.proposals is not None else None) - def prepare_single(img_file, scale, flip, proposal=None): - img_np, shape_scale_np = self.img_transform(img_file, scale, flip) - img, shape_scale = self.numpy2tensor(img_np, shape_scale_np) - img_meta = dict(shape_scale=shape_scale, flip=flip) + def prepare_single(img, scale, flip, proposal=None): + _img, _img_shape, _scale_factor = self.img_transform( + img, scale, flip) + img, img_shape, scale_factor = self.numpy2tensor( + _img, _img_shape, _scale_factor) + ori_shape = (img_info['height'], img_info['width']) + img_meta = dict( + ori_shape=ori_shape, + img_shape=img_shape, + scale_factor=scale_factor, + flip=flip) if proposal is not None: - proposal = self.bbox_transform(proposal, shape_scale_np, flip) + proposal = self.bbox_transform(proposal, _scale_factor, flip) proposal = self.numpy2tensor(proposal) return img, img_meta, proposal imgs = [] img_metas = [] proposals = [] - for scale in self.img_scale: - img, img_meta, proposal = prepare_single(img_file, scale, False, + for scale in self.img_scales: + img, img_meta, proposal = prepare_single(img, scale, False, proposal) imgs.append(img) img_metas.append(img_meta) proposals.append(proposal) if self.flip_ratio > 0: - img, img_meta, prop = prepare_single(img_file, scale, True, + img, img_meta, prop = prepare_single(img, scale, True, proposal) imgs.append(img) img_metas.append(img_meta) diff --git a/mmdet/datasets/data_engine.py b/mmdet/datasets/data_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..0c89f21878a9f2fe2b21669ecfb2cd71cc9ae073 --- /dev/null +++ b/mmdet/datasets/data_engine.py @@ -0,0 +1,29 @@ +from functools import partial +import torch +from .coco import CocoDataset +from .collate import collate +from .sampler import GroupSampler, DistributedGroupSampler + + +def build_data(cfg, args): + dataset = CocoDataset(**cfg) + + if args.dist: + sampler = DistributedGroupSampler(dataset, args.img_per_gpu, + args.world_size, args.rank) + batch_size = args.img_per_gpu + num_workers = args.data_workers + else: + sampler = GroupSampler(dataset, args.img_per_gpu) + batch_size = args.world_size * args.img_per_gpu + num_workers = args.world_size * args.data_workers + + loader = torch.utils.data.DataLoader( + dataset, + batch_size=args.img_per_gpu, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=args.img_per_gpu), + pin_memory=False) + + return loader diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py index 81f3a627d0d20a5890ea9c5f597e814ea373b9e5..1532fe074f2968b225cc030dc3f868b3c7780194 100644 --- a/mmdet/datasets/transforms.py +++ b/mmdet/datasets/transforms.py @@ -1,9 +1,8 @@ import mmcv -# import cvbase as cvb import numpy as np import torch -from mmdet.core import segms +from mmdet.core.mask_ops import segms __all__ = [ 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' @@ -64,7 +63,7 @@ class ImageTransform(object): class ImageCrop(object): """crop image patches and resize patches into fixed size - 1. (read and) flip image (if needed) + 1. (read and) flip image (if needed) 2. crop image patches according to given bboxes 3. resize patches into fixed size (default 224x224) 4. normalize the image (if needed) @@ -126,6 +125,8 @@ class BboxTransform(object): gt_bboxes = bboxes * scale_factor if flip: gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) if self.max_num_gts is None: return gt_bboxes else: @@ -205,4 +206,4 @@ class Numpy2Tensor(object): if len(args) == 1: return torch.from_numpy(args[0]) else: - return tuple([torch.from_numpy(array) for array in args]) + return tuple([torch.from_numpy(np.array(array)) for array in args]) diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2209550509f71a71a66b2582440986eebcf3926c 100644 --- a/mmdet/models/__init__.py +++ b/mmdet/models/__init__.py @@ -0,0 +1 @@ +from .detectors import Detector diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py index e6709af6176d5d574bf7f4a5bdf8e67691787536..49a863594290ce0b0e748ffc45c6d4a4381e2140 100644 --- a/mmdet/models/bbox_heads/__init__.py +++ b/mmdet/models/bbox_heads/__init__.py @@ -1,3 +1,4 @@ from .bbox_head import BBoxHead +from .convfc_bbox_head import ConvFCRoIHead, SharedFCRoIHead -__all__ = ['BBoxHead'] +__all__ = ['BBoxHead', 'ConvFCRoIHead', 'SharedFCRoIHead'] diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py index 9f0c188a459286ee5c0e5ab71f8305da0d1ab761..5f6e1136eed45abe85a710170e76e04cba0e91cf 100644 --- a/mmdet/models/bbox_heads/bbox_head.py +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms, +from mmdet.core import (bbox_transform_inv, multiclass_nms, bbox_target, weighted_cross_entropy, weighted_smoothl1, accuracy) @@ -10,7 +10,6 @@ class BBoxHead(nn.Module): regression respectively""" def __init__(self, - exclude_mal_box=True, with_avg_pool=False, with_cls=True, with_reg=True, @@ -31,7 +30,6 @@ class BBoxHead(nn.Module): self.target_means = target_means self.target_stds = target_stds self.reg_class_agnostic = reg_class_agnostic - self.exclude_mal_box = exclude_mal_box in_channels = self.in_channels if self.with_avg_pool: @@ -61,7 +59,7 @@ class BBoxHead(nn.Module): bbox_pred = self.fc_reg(x) if self.with_reg else None return cls_score, bbox_pred - def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, + def get_bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, rcnn_train_cfg): reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes cls_reg_targets = bbox_target( @@ -69,11 +67,10 @@ class BBoxHead(nn.Module): neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.target_means, - self.target_stds, rcnn_train_cfg, reg_num_classes, - debug_imgs=self.debug_imgs) + target_means=self.target_means, + target_stds=self.target_stds) return cls_reg_targets def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, @@ -96,6 +93,7 @@ class BBoxHead(nn.Module): cls_score, bbox_pred, img_shape, + scale_factor, rescale=False, nms_cfg=None): if isinstance(cls_score, list): @@ -111,7 +109,7 @@ class BBoxHead(nn.Module): # TODO: add clip here if rescale: - bboxes /= img_shape[-1] + bboxes /= scale_factor.float() if nms_cfg is None: return bboxes, scores diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..02e2a6b6d859e728a47f98fe857f1e71c2a6754a --- /dev/null +++ b/mmdet/models/bbox_heads/convfc_bbox_head.py @@ -0,0 +1,174 @@ +import torch.nn as nn + +from .bbox_head import BBoxHead +from ..utils import ConvModule + + +class ConvFCRoIHead(BBoxHead): + """More general bbox head, with shared conv and fc layers and two optional + separated branches. + + /-> cls convs -> cls fcs -> cls + shared convs -> shared fcs + \-> reg convs -> reg fcs -> reg + """ + + def __init__(self, + num_shared_convs=0, + num_shared_fcs=0, + num_cls_convs=0, + num_cls_fcs=0, + num_reg_convs=0, + num_reg_fcs=0, + conv_out_channels=256, + fc_out_channels=1024, + *args, + **kwargs): + super(ConvFCRoIHead, self).__init__(*args, **kwargs) + assert (num_shared_convs + num_shared_fcs + num_cls_convs + num_cls_fcs + + num_reg_convs + num_reg_fcs > 0) + if num_cls_convs > 0 or num_reg_convs > 0: + assert num_shared_fcs == 0 + if not self.with_cls: + assert num_cls_convs == 0 and num_cls_fcs == 0 + if not self.with_reg: + assert num_reg_convs == 0 and num_reg_fcs == 0 + self.num_shared_convs = num_shared_convs + self.num_shared_fcs = num_shared_fcs + self.num_cls_convs = num_cls_convs + self.num_cls_fcs = num_cls_fcs + self.num_reg_convs = num_reg_convs + self.num_reg_fcs = num_reg_fcs + self.conv_out_channels = conv_out_channels + self.fc_out_channels = fc_out_channels + + # add shared convs and fcs + self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch( + self.num_shared_convs, self.num_shared_fcs, self.in_channels, True) + self.shared_out_channels = last_layer_dim + + # add cls specific branch + self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch( + self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) + + # add reg specific branch + self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch( + self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) + + if self.num_shared_fcs == 0 and not self.with_avg_pool: + if self.num_cls_fcs == 0: + self.cls_last_dim *= (self.roi_feat_size * self.roi_feat_size) + if self.num_reg_fcs == 0: + self.reg_last_dim *= (self.roi_feat_size * self.roi_feat_size) + + self.relu = nn.ReLU(inplace=True) + # reconstruct fc_cls and fc_reg since input channels are changed + if self.with_cls: + self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes) + if self.with_reg: + out_dim_reg = (4 if self.reg_class_agnostic else + 4 * self.num_classes) + self.fc_reg = nn.Linear(self.reg_last_dim, out_dim_reg) + + def _add_conv_fc_branch(self, + num_branch_convs, + num_branch_fcs, + in_channels, + is_shared=False): + """Add shared or separable branch + + convs -> avg pool (optional) -> fcs + """ + last_layer_dim = in_channels + # add branch specific conv layers + branch_convs = nn.ModuleList() + if num_branch_convs > 0: + for i in range(num_branch_convs): + conv_in_channels = (last_layer_dim + if i == 0 else self.conv_out_channels) + branch_convs.append( + ConvModule( + conv_in_channels, + self.conv_out_channels, + 3, + padding=1, + normalize=self.normalize, + bias=self.with_bias)) + last_layer_dim = self.conv_out_channels + # add branch specific fc layers + branch_fcs = nn.ModuleList() + if num_branch_fcs > 0: + # for shared branch, only consider self.with_avg_pool + # for separated branches, also consider self.num_shared_fcs + if (is_shared + or self.num_shared_fcs == 0) and not self.with_avg_pool: + last_layer_dim *= (self.roi_feat_size * self.roi_feat_size) + for i in range(num_branch_fcs): + fc_in_channels = (last_layer_dim + if i == 0 else self.fc_out_channels) + branch_fcs.append( + nn.Linear(fc_in_channels, self.fc_out_channels)) + last_layer_dim = self.fc_out_channels + return branch_convs, branch_fcs, last_layer_dim + + def init_weights(self): + super(ConvFCRoIHead, self).init_weights() + for module_list in [self.shared_fcs, self.cls_fcs, self.reg_fcs]: + for m in module_list.modules(): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + # shared part + if self.num_shared_convs > 0: + for conv in self.shared_convs: + x = conv(x) + + if self.num_shared_fcs > 0: + if self.with_avg_pool: + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + for fc in self.shared_fcs: + x = self.relu(fc(x)) + # separate branches + x_cls = x + x_reg = x + + for conv in self.cls_convs: + x_cls = conv(x_cls) + if x_cls.dim() > 2: + if self.with_avg_pool: + x_cls = self.avg_pool(x_cls) + x_cls = x_cls.view(x_cls.size(0), -1) + for fc in self.cls_fcs: + x_cls = self.relu(fc(x_cls)) + + for conv in self.reg_convs: + x_reg = conv(x_reg) + if x_reg.dim() > 2: + if self.with_avg_pool: + x_reg = self.avg_pool(x_reg) + x_reg = x_reg.view(x_reg.size(0), -1) + for fc in self.reg_fcs: + x_reg = self.relu(fc(x_reg)) + + cls_score = self.fc_cls(x_cls) if self.with_cls else None + bbox_pred = self.fc_reg(x_reg) if self.with_reg else None + return cls_score, bbox_pred + + +class SharedFCRoIHead(ConvFCRoIHead): + + def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs): + assert num_fcs >= 1 + super(SharedFCRoIHead, self).__init__( + num_shared_convs=0, + num_shared_fcs=num_fcs, + num_cls_convs=0, + num_cls_fcs=0, + num_reg_convs=0, + num_reg_fcs=0, + fc_out_channels=fc_out_channels, + *args, + **kwargs) diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py index f109d851397a5106c33d173eda8986ee1c0f8b06..c3b058507fcdc461a9d3b0271858522e4ba0f1ce 100644 --- a/mmdet/models/builder.py +++ b/mmdet/models/builder.py @@ -1,4 +1,5 @@ import mmcv +from mmcv import torchpack from torch import nn from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, @@ -11,7 +12,7 @@ __all__ = [ def _build_module(cfg, parrent=None): - return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict( + return cfg if isinstance(cfg, nn.Module) else torchpack.obj_from_dict( cfg, parrent) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5b690f8d77d6d8eae1adc4bf8b04d3dd3db3462a 100644 --- a/mmdet/models/detectors/__init__.py +++ b/mmdet/models/detectors/__init__.py @@ -0,0 +1 @@ +from .detector import Detector diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/detector.py similarity index 60% rename from mmdet/models/detectors/two_stage.py rename to mmdet/models/detectors/detector.py index 0c057d606fba6c322733490591d5352a42b426a5..80b7d4438cb59612dbff8a2bf71930eb6383a144 100644 --- a/mmdet/models/detectors/two_stage.py +++ b/mmdet/models/detectors/detector.py @@ -2,137 +2,141 @@ import torch import torch.nn as nn from .. import builder -from mmdet.core.utils import tensor2imgs from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, - bbox_sampling, multiclass_nms, merge_aug_proposals, - merge_aug_bboxes, merge_aug_masks, bbox2result) + bbox2result, multiclass_nms, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, sample_proposals) -class TwoStageDetector(nn.Module): - +class Detector(nn.Module): def __init__(self, backbone, - neck, - rpn_head, - roi_block, - bbox_head, - rpn_train_cfg, - rpn_test_cfg, - rcnn_train_cfg, - rcnn_test_cfg, + neck=None, + rpn_head=None, + roi_block=None, + bbox_head=None, mask_block=None, mask_head=None, + rpn_train_cfg=None, + rpn_test_cfg=None, + rcnn_train_cfg=None, + rcnn_test_cfg=None, pretrained=None): - super(TwoStageDetector, self).__init__() + super(Detector, self).__init__() self.backbone = builder.build_backbone(backbone) - self.neck = builder.build_neck(neck) if neck is not None else None - self.rpn_head = builder.build_rpn_head(rpn_head) - self.bbox_roi_extractor = builder.build_roi_block(roi_block) - self.bbox_head = builder.build_bbox_head(bbox_head) - self.mask_roi_extractor = builder.build_roi_block(mask_block) if ( - mask_block is not None) else None - self.mask_head = builder.build_mask_head(mask_head) if ( - mask_head is not None) else None - self.with_mask = False if self.mask_head is None else True - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - self.rcnn_train_cfg = rcnn_train_cfg - self.rcnn_test_cfg = rcnn_test_cfg + self.with_neck = True if neck is not None else False + if self.with_neck: + self.neck = builder.build_neck(neck) + + self.with_rpn = True if rpn_head is not None else False + if self.with_rpn: + self.rpn_head = builder.build_rpn_head(rpn_head) + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + + self.with_bbox = True if bbox_head is not None else False + if self.with_bbox: + self.bbox_roi_extractor = builder.build_roi_extractor(roi_block) + self.bbox_head = builder.build_bbox_head(bbox_head) + self.rcnn_train_cfg = rcnn_train_cfg + self.rcnn_test_cfg = rcnn_test_cfg + + self.with_mask = True if mask_head is not None else False + if self.with_mask: + self.mask_roi_extractor = builder.build_roi_extractor(mask_block) + self.mask_head = builder.build_mask_head(mask_head) + self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): if pretrained is not None: print('load model from: {}'.format(pretrained)) self.backbone.init_weights(pretrained=pretrained) - if self.neck is not None: + if self.with_neck: if isinstance(self.neck, nn.Sequential): for m in self.neck: m.init_weights() else: self.neck.init_weights() - self.rpn_head.init_weights() - self.bbox_roi_extractor.init_weights() - self.bbox_head.init_weights() - if self.mask_roi_extractor is not None: + if self.with_rpn: + self.rpn_head.init_weights() + if self.with_bbox: + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + if self.with_mask: self.mask_roi_extractor.init_weights() - if self.mask_head is not None: self.mask_head.init_weights() def forward(self, img, img_meta, gt_bboxes=None, + proposals=None, gt_labels=None, - gt_ignore=None, - gt_polys=None, + gt_bboxes_ignore=None, + gt_mask_polys=None, gt_poly_lens=None, num_polys_per_mask=None, return_loss=True, - return_bboxes=False, + return_bboxes=True, rescale=False): - if not return_loss: - return self.test(img, img_meta, rescale) + assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist." - if not self.with_mask: - assert (gt_polys is None and gt_poly_lens is None - and num_polys_per_mask is None) + if not return_loss: + return self.test(img, img_meta, proposals, rescale) else: - assert (gt_polys is not None and gt_poly_lens is not None - and num_polys_per_mask is not None) - gt_polys = split_combined_gt_polys(gt_polys, gt_poly_lens, - num_polys_per_mask) - - if self.rpn_train_cfg.get('debug', False): - self.rpn_head.debug_imgs = tensor2imgs(img) - if self.rcnn_train_cfg.get('debug', False): - self.bbox_head.debug_imgs = tensor2imgs(img) - if self.mask_head is not None: - self.mask_head.debug_imgs = tensor2imgs(img) - - img_shapes = img_meta['shape_scale'] + losses = dict() + img_shapes = img_meta['img_shape'] x = self.backbone(img) - if self.neck is not None: + + if self.with_neck: x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + if self.with_rpn: + rpn_outs = self.rpn_head(x) + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) - (pos_inds, neg_inds, pos_proposals, neg_proposals, - pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = bbox_sampling( - proposal_list, gt_bboxes, gt_ignore, gt_labels, - self.rcnn_train_cfg) + if self.with_bbox: + if self.with_rpn: + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + else: + proposal_list = proposals - labels, label_weights, bbox_targets, bbox_weights = \ - self.bbox_head.proposal_target( - pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, - self.rcnn_train_cfg) + (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, + pos_gt_bboxes, pos_gt_labels) = sample_proposals( + proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels, + self.rcnn_train_cfg) - rois = bbox2roi([ - torch.cat([pos, neg], dim=0) - for pos, neg in zip(pos_proposals, neg_proposals) - ]) - # TODO: a more flexible way to configurate feat maps - roi_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - cls_score, bbox_pred = self.bbox_head(roi_feats) + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.get_bbox_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.rcnn_train_cfg) - losses = dict() - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) - losses.update(rpn_losses) + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) - loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, - label_weights, bbox_targets, - bbox_weights) - losses.update(loss_bbox) + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) if self.with_mask: - mask_targets = self.mask_head.mask_target( - pos_proposals, pos_assigned_gt_inds, gt_polys, img_shapes, + gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens, + num_polys_per_mask) + mask_targets = self.mask_head.get_mask_target( + pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta, self.rcnn_train_cfg) pos_rois = bbox2roi(pos_proposals) mask_feats = self.mask_roi_extractor( @@ -142,36 +146,40 @@ class TwoStageDetector(nn.Module): torch.cat(pos_gt_labels)) return losses - def test(self, imgs, img_metas, rescale=False): + def test(self, imgs, img_metas, proposals=None, rescale=False): """Test w/ or w/o augmentations.""" assert isinstance(imgs, list) and isinstance(img_metas, list) assert len(imgs) == len(img_metas) img_per_gpu = imgs[0].size(0) assert img_per_gpu == 1 if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], rescale) + return self.simple_test(imgs[0], img_metas[0], proposals, rescale) else: - return self.aug_test(imgs, img_metas, rescale) - - def simple_test_bboxes(self, x, img_meta, rescale=False): - """Test only det bboxes without augmentation.""" + return self.aug_test(imgs, img_metas, proposals, rescale) - img_shapes = img_meta['shape_scale'] + def simple_test_rpn(self, x, img_meta): + img_shapes = img_meta['img_shape'] + scale_factor = img_meta['scale_factor'] rpn_outs = self.rpn_head(x) proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0] + return proposal_list - rois = bbox2roi(proposal_list) + def simple_test_bboxes(self, x, img_meta, proposals, rescale=False): + """Test only det bboxes without augmentation.""" + rois = bbox2roi(proposals) roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = self.bbox_head(roi_feats) # image shape of the first image in the batch (only one) - img_shape = img_shapes[0] + img_shape = img_meta['img_shape'][0] + scale_factor = img_meta['scale_factor'] det_bboxes, det_labels = self.bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, + scale_factor, rescale=rescale, nms_cfg=self.rcnn_test_cfg) return det_bboxes, det_labels @@ -183,41 +191,52 @@ class TwoStageDetector(nn.Module): det_labels, rescale=False): # image shape of the first image in the batch (only one) - img_shape = img_meta['shape_scale'][0] + img_shape = img_meta['img_shape'][0] + scale_factor = img_meta['scale_factor'] if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: # if det_bboxes is rescaled to the original image size, we need to # rescale it back to the testing scale to obtain RoIs. - _bboxes = (det_bboxes[:, :4] * img_shape[-1] + _bboxes = (det_bboxes[:, :4] * scale_factor.float() if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) segm_result = self.mask_head.get_seg_masks( - mask_pred, det_bboxes, det_labels, img_shape, - self.rcnn_test_cfg, rescale) + mask_pred, + det_bboxes, + det_labels, + self.rcnn_test_cfg, + ori_scale=img_meta['ori_shape']) return segm_result - def simple_test(self, img, img_meta, rescale=False): + def simple_test(self, img, img_meta, proposals=None, rescale=False): """Test without augmentation.""" # get feature maps x = self.backbone(img) - if self.neck is not None: + if self.with_neck: x = self.neck(x) - det_bboxes, det_labels = self.simple_test_bboxes( - x, img_meta, rescale=rescale) - bbox_result = bbox2result(det_bboxes, det_labels, - self.bbox_head.num_classes) - if not self.with_mask: - return bbox_result - - segm_result = self.simple_test_mask( - x, img_meta, det_bboxes, det_labels, rescale=rescale) + if self.with_rpn: + proposals = self.simple_test_rpn(x, img_meta) + if self.with_bbox: + # BUG proposals shape? + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, [proposals], rescale=rescale) + bbox_result = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result - return bbox_result, segm_result + segm_result = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + return bbox_result, segm_result + else: + proposals[:, :4] /= img_meta['scale_factor'].float() + return proposals.cpu().numpy() + # TODO aug test haven't been verified def aug_test_bboxes(self, imgs, img_metas): """Test with augmentations for det bboxes.""" # step 1: get RPN proposals for augmented images, apply NMS to the diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py deleted file mode 100644 index 6d80c9d9b10a12c07155f11ab00b24542f805cc6..0000000000000000000000000000000000000000 --- a/mmdet/models/detectors/rpn.py +++ /dev/null @@ -1,100 +0,0 @@ -import torch.nn as nn - -from mmdet.core import tensor2imgs, merge_aug_proposals, bbox_mapping -from .. import builder - - -class RPN(nn.Module): - - def __init__(self, - backbone, - neck, - rpn_head, - rpn_train_cfg, - rpn_test_cfg, - pretrained=None): - super(RPN, self).__init__() - self.backbone = builder.build_backbone(backbone) - self.neck = builder.build_neck(neck) if neck is not None else None - self.rpn_head = builder.build_rpn_head(rpn_head) - self.rpn_train_cfg = rpn_train_cfg - self.rpn_test_cfg = rpn_test_cfg - self.init_weights(pretrained=pretrained) - - def init_weights(self, pretrained=None): - if pretrained is not None: - print('load model from: {}'.format(pretrained)) - self.backbone.init_weights(pretrained=pretrained) - if self.neck is not None: - self.neck.init_weights() - self.rpn_head.init_weights() - - def forward(self, - img, - img_meta, - gt_bboxes=None, - return_loss=True, - return_bboxes=False, - rescale=False): - if not return_loss: - return self.test(img, img_meta, rescale) - - img_shapes = img_meta['shape_scale'] - - if self.rpn_train_cfg.get('debug', False): - self.rpn_head.debug_imgs = tensor2imgs(img) - - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - - rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, - self.rpn_train_cfg) - losses = self.rpn_head.loss(*rpn_loss_inputs) - return losses - - def test(self, imgs, img_metas, rescale=False): - """Test w/ or w/o augmentations.""" - assert isinstance(imgs, list) and isinstance(img_metas, list) - assert len(imgs) == len(img_metas) - img_per_gpu = imgs[0].size(0) - assert img_per_gpu == 1 - if len(imgs) == 1: - return self.simple_test(imgs[0], img_metas[0], rescale) - else: - return self.aug_test(imgs, img_metas, rescale) - - def simple_test(self, img, img_meta, rescale=False): - img_shapes = img_meta['shape_scale'] - # get feature maps - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) - proposals = self.rpn_head.get_proposals(*proposal_inputs)[0] - if rescale: - proposals[:, :4] /= img_shapes[0][-1] - return proposals.cpu().numpy() - - def aug_test(self, imgs, img_metas, rescale=False): - aug_proposals = [] - for img, img_meta in zip(imgs, img_metas): - x = self.backbone(img) - if self.neck is not None: - x = self.neck(x) - rpn_outs = self.rpn_head(x) - proposal_inputs = rpn_outs + (img_meta['shape_scale'], - self.rpn_test_cfg) - proposal_list = self.rpn_head.get_proposals(*proposal_inputs) - assert len(proposal_list) == 1 - aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 - merged_proposals = merge_aug_proposals(aug_proposals, img_metas, - self.rpn_test_cfg) - if not rescale: - img_shape = img_metas[0]['shape_scale'][0] - flip = img_metas[0]['flip'][0] - merged_proposals[:, :4] = bbox_mapping(merged_proposals[:, :4], - img_shape, flip) - return merged_proposals.cpu().numpy() diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index 28865a68f006a4cd04753a1eb6caeda9ce3fc284..016c05204bdc4533f7cca438666aa011f5ceb56d 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -3,10 +3,9 @@ import numpy as np import pycocotools.mask as mask_util import torch import torch.nn as nn -import torch.utils.checkpoint as cp -from ..common import ConvModule -from mmdet.core import mask_target, mask_cross_entropy +from ..utils import ConvModule +from mmdet.core import mask_cross_entropy, mask_target class FCNMaskHead(nn.Module): @@ -21,7 +20,6 @@ class FCNMaskHead(nn.Module): upsample_ratio=2, num_classes=81, class_agnostic=False, - with_cp=False, normalize=None): super(FCNMaskHead, self).__init__() if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: @@ -39,7 +37,6 @@ class FCNMaskHead(nn.Module): self.class_agnostic = class_agnostic self.normalize = normalize self.with_bias = normalize is None - self.with_cp = with_cp self.convs = nn.ModuleList() for i in range(self.num_convs): @@ -79,25 +76,9 @@ class FCNMaskHead(nn.Module): m.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(m.bias, 0) - def convs_forward(self, x): - - def m_lvl_convs_forward(x): - for conv in self.convs[1:-1]: - x = conv(x) - return x - - if self.num_convs > 0: - x = self.convs[0](x) - if self.num_convs > 1: - if self.with_cp and x.requires_grad: - x = cp.checkpoint(m_lvl_convs_forward, x) - else: - x = m_lvl_convs_forward(x) - x = self.convs[-1](x) - return x - def forward(self, x): - x = self.convs_forward(x) + for conv in self.convs: + x = conv(x) if self.upsample is not None: x = self.upsample(x) if self.upsample_method == 'deconv': @@ -105,24 +86,18 @@ class FCNMaskHead(nn.Module): mask_pred = self.conv_logits(x) return mask_pred - def mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, - img_shapes, rcnn_train_cfg): + def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, + img_meta, rcnn_train_cfg): mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, - gt_masks, img_shapes, rcnn_train_cfg) + gt_masks, img_meta, rcnn_train_cfg) return mask_targets def loss(self, mask_pred, mask_targets, labels): loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) return loss_mask - def get_seg_masks(self, - mask_pred, - det_bboxes, - det_labels, - img_shape, - rcnn_test_cfg, - ori_scale, - rescale=True): + def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, + ori_scale): """Get segmentation masks from mask_pred and bboxes Args: mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). @@ -143,14 +118,11 @@ class FCNMaskHead(nn.Module): cls_segms = [[] for _ in range(self.num_classes - 1)] bboxes = det_bboxes.cpu().numpy()[:, :4] labels = det_labels.cpu().numpy() + 1 - scale_factor = img_shape[-1] if rescale else 1.0 - img_h = ori_scale['height'] if rescale else np.round( - ori_scale['height'].item() * img_shape[-1].item()).astype(np.int32) - img_w = ori_scale['width'] if rescale else np.round( - ori_scale['width'].item() * img_shape[-1].item()).astype(np.int32) + img_h = ori_scale[0] + img_w = ori_scale[1] for i in range(bboxes.shape[0]): - bbox = (bboxes[i, :] / float(scale_factor)).astype(int) + bbox = bboxes[i, :].astype(int) label = labels[i] w = bbox[2] - bbox[0] + 1 h = bbox[3] - bbox[1] + 1 @@ -164,7 +136,7 @@ class FCNMaskHead(nn.Module): im_mask = np.zeros((img_h, img_w), dtype=np.float32) - im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.resize( + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.imresize( mask_pred_, (w, h)) # im_mask = cv2.resize(im_mask, (img_w, img_h)) im_mask = np.array( diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py index c4734e18621bec4cdb8e33052935c6d7f3a495e2..8b5b49826bad94ce00379e60bbafc905b0cba9af 100644 --- a/mmdet/models/necks/fpn.py +++ b/mmdet/models/necks/fpn.py @@ -1,7 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from ..common import ConvModule -from ..weight_init import xavier_init +from ..utils import ConvModule +from ..utils import xavier_init class FPN(nn.Module): diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py index f2fce9ebe7aa5c820139fa0188e2f6a25322ed66..7ffd441f694b5d6c37d3042bb25088f27b002ea9 100644 --- a/mmdet/models/rpn_heads/rpn_head.py +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -9,8 +9,8 @@ from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, weighted_cross_entropy, weighted_smoothl1, weighted_binary_cross_entropy) from mmdet.ops import nms -from ..misc import multi_apply -from ..weight_init import normal_init +from ..utils import multi_apply +from ..utils import normal_init class RPNHead(nn.Module): diff --git a/mmdet/models/common/__init__.py b/mmdet/models/utils/__init__.py similarity index 71% rename from mmdet/models/common/__init__.py rename to mmdet/models/utils/__init__.py index 1a611c251065f2addc6c069d61c7e1f18fbd7da2..f11af964480456cce144172591bd0b94f3ed7ad7 100644 --- a/mmdet/models/common/__init__.py +++ b/mmdet/models/utils/__init__.py @@ -1,4 +1,6 @@ from .conv_module import ConvModule from .norm import build_norm_layer +from .misc import * +from .weight_init import * __all__ = ['ConvModule', 'build_norm_layer'] diff --git a/mmdet/models/common/conv_module.py b/mmdet/models/utils/conv_module.py similarity index 100% rename from mmdet/models/common/conv_module.py rename to mmdet/models/utils/conv_module.py diff --git a/mmdet/models/misc.py b/mmdet/models/utils/misc.py similarity index 100% rename from mmdet/models/misc.py rename to mmdet/models/utils/misc.py diff --git a/mmdet/models/common/norm.py b/mmdet/models/utils/norm.py similarity index 100% rename from mmdet/models/common/norm.py rename to mmdet/models/utils/norm.py diff --git a/mmdet/models/weight_init.py b/mmdet/models/utils/weight_init.py similarity index 100% rename from mmdet/models/weight_init.py rename to mmdet/models/utils/weight_init.py diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py index 82511fd1db12774e1df1468e93353f2a963ed962..47f794e8916956f9e8c494e50aff7e5b870889e7 100644 --- a/mmdet/nn/parallel/scatter_gather.py +++ b/mmdet/nn/parallel/scatter_gather.py @@ -1,7 +1,7 @@ import torch from ._functions import Scatter from torch.nn.parallel._functions import Scatter as OrigScatter -from detkit.datasets.utils import DataContainer +from mmdet.datasets.utils import DataContainer def scatter(inputs, target_gpus, dim=0): diff --git a/tools/eval.py b/tools/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..20cc571e94b2fcf228f2d0782cf8a8b16dd3688b --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,265 @@ +from argparse import ArgumentParser +from multiprocessing import Pool +import matplotlib.pyplot as plt +import numpy as np +import copy +import os + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +def generate_area_range(splitRng=32, stop_size=128): + areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]] + start = 0 + while start < stop_size: + end = start + splitRng + areaRng.append([start * start, end * end]) + start = end + areaRng.append([start * start, 1e5**2]) + return areaRng + + +def print_summarize(iouThr=None, + iouThrs=None, + precision=None, + recall=None, + areaRng_id=4, + areaRngs=None, + maxDets_id=2, + maxDets=None): + assert (precision is not None) or (recall is not None) + iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if precision is not None else 'Average Recall' + typeStr = '(AP)' if precision is not None else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [areaRng_id] + mind = [maxDets_id] + if precision is not None: + # dimension of precision: [TxRxKxAxM] + s = precision + # IoU + if iouThr is not None: + t = np.where(iouThr == iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = recall + if iouThr is not None: + t = np.where(iouThr == iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + print( + iStr.format( + titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]), + np.sqrt(areaRngs[areaRng_id][1]) + if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max', + maxDets[maxDets_id], mean_s)) + + +def eval_results(res_file, ann_file, res_types, splitRng): + for res_type in res_types: + assert res_type in ['proposal', 'bbox', 'segm', 'keypoints'] + + areaRng = generate_area_range(splitRng) + cocoGt = COCO(ann_file) + cocoDt = cocoGt.loadRes(res_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = [100, 300, 1000] + cocoEval.params.areaRng = areaRng + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + ps = cocoEval.eval['precision'] + rc = cocoEval.eval['recall'] + for i in range(len(areaRng)): + print_summarize(None, cocoEval.params.iouThrs, ps, None, i, + areaRng, 2, cocoEval.params.maxDets) + + +def makeplot(rs, ps, outDir, class_name): + cs = np.vstack([ + np.ones((2, 3)), + np.array([.31, .51, .74]), + np.array([.75, .31, .30]), + np.array([.36, .90, .38]), + np.array([.50, .39, .64]), + np.array([1, .6, 0]) + ]) + areaNames = ['all', 'small', 'medium', 'large'] + types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN'] + for i in range(len(areaNames)): + area_ps = ps[..., i, 0] + figure_tile = class_name + '-' + areaNames[i] + aps = [ps_.mean() for ps_ in area_ps] + ps_curve = [ + ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps + ] + ps_curve.insert(0, np.zeros(ps_curve[0].shape)) + fig = plt.figure() + ax = plt.subplot(111) + for k in range(len(types)): + ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5) + ax.fill_between( + rs, + ps_curve[k], + ps_curve[k + 1], + color=cs[k], + label=str('[{:.3f}'.format(aps[k]) + ']' + types[k])) + plt.xlabel('recall') + plt.ylabel('precision') + plt.xlim(0, 1.) + plt.ylim(0, 1.) + plt.title(figure_tile) + plt.legend() + # plt.show() + fig.savefig(outDir + '/{}.png'.format(figure_tile)) + plt.close(fig) + + +def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type): + nm = cocoGt.loadCats(catId)[0] + print('--------------analyzing {}-{}---------------'.format( + k + 1, nm['name'])) + ps_ = {} + dt = copy.deepcopy(cocoDt) + nm = cocoGt.loadCats(catId)[0] + imgIds = cocoGt.getImgIds() + dt_anns = dt.dataset['annotations'] + select_dt_anns = [] + for ann in dt_anns: + if ann['category_id'] == catId: + select_dt_anns.append(ann) + dt.dataset['annotations'] = select_dt_anns + dt.createIndex() + # compute precision but ignore superclass confusion + gt = copy.deepcopy(cocoGt) + child_catIds = gt.getCatIds(supNms=[nm['supercategory']]) + for idx, ann in enumerate(gt.dataset['annotations']): + if (ann['category_id'] in child_catIds + and ann['category_id'] != catId): + gt.dataset['annotations'][idx]['ignore'] = 1 + gt.dataset['annotations'][idx]['iscrowd'] = 1 + gt.dataset['annotations'][idx]['category_id'] = catId + cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.maxDets = [100] + cocoEval.params.iouThrs = [.1] + cocoEval.params.useCats = 1 + cocoEval.evaluate() + cocoEval.accumulate() + ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :] + ps_['ps_supercategory'] = ps_supercategory + # compute precision but ignore any class confusion + gt = copy.deepcopy(cocoGt) + for idx, ann in enumerate(gt.dataset['annotations']): + if ann['category_id'] != catId: + gt.dataset['annotations'][idx]['ignore'] = 1 + gt.dataset['annotations'][idx]['iscrowd'] = 1 + gt.dataset['annotations'][idx]['category_id'] = catId + cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.maxDets = [100] + cocoEval.params.iouThrs = [.1] + cocoEval.params.useCats = 1 + cocoEval.evaluate() + cocoEval.accumulate() + ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :] + ps_['ps_allcategory'] = ps_allcategory + return k, ps_ + + +def analyze_results(res_file, ann_file, res_types, out_dir): + for res_type in res_types: + assert res_type in ['bbox', 'segm'] + + directory = os.path.dirname(out_dir + '/') + if not os.path.exists(directory): + print('-------------create {}-----------------'.format(out_dir)) + os.makedirs(directory) + + cocoGt = COCO(ann_file) + cocoDt = cocoGt.loadRes(res_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval( + copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.params.iouThrs = [.75, .5, .1] + cocoEval.params.maxDets = [100] + cocoEval.evaluate() + cocoEval.accumulate() + ps = cocoEval.eval['precision'] + ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))]) + catIds = cocoGt.getCatIds() + recThrs = cocoEval.params.recThrs + with Pool(processes=48) as pool: + args = [(k, cocoDt, cocoGt, catId, iou_type) + for k, catId in enumerate(catIds)] + analyze_results = pool.starmap(analyze_individual_category, args) + for k, catId in enumerate(catIds): + nm = cocoGt.loadCats(catId)[0] + print('--------------saving {}-{}---------------'.format( + k + 1, nm['name'])) + analyze_result = analyze_results[k] + assert k == analyze_result[0] + ps_supercategory = analyze_result[1]['ps_supercategory'] + ps_allcategory = analyze_result[1]['ps_allcategory'] + # compute precision but ignore superclass confusion + ps[3, :, k, :, :] = ps_supercategory + # compute precision but ignore any class confusion + ps[4, :, k, :, :] = ps_allcategory + # fill in background and false negative errors and plot + ps[ps == -1] = 0 + ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0) + ps[6, :, k, :, :] = 1.0 + makeplot(recThrs, ps[:, :, k], out_dir, nm['name']) + makeplot(recThrs, ps, out_dir, 'all') + + +def main(): + parser = ArgumentParser(description='COCO Evaluation') + parser.add_argument('result', help='result file path') + parser.add_argument( + '--ann', + default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json', + help='annotation file path') + parser.add_argument( + '--types', type=str, nargs='+', default=['bbox'], help='result types') + parser.add_argument( + '--analyze', action='store_true', help='whether to analyze results') + parser.add_argument( + '--out_dir', + type=str, + default=None, + help='dir to save analyze result images') + parser.add_argument( + '--splitRng', + type=int, + default=32, + help='range to split area in evaluation') + args = parser.parse_args() + if not args.analyze: + eval_results(args.result, args.ann, args.types, splitRng=args.splitRng) + else: + assert args.out_dir is not None + analyze_results( + args.result, args.ann, args.types, out_dir=args.out_dir) + + +if __name__ == '__main__': + main() diff --git a/tools/examples/r50_fpn_frcnn_1x.py b/tools/examples/r50_fpn_frcnn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..6814445f8e1ba10a5fad24502ac8aff535f60f21 --- /dev/null +++ b/tools/examples/r50_fpn_frcnn_1x.py @@ -0,0 +1,125 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + roi_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +meta_params = dict( + rpn_train_cfg = dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg = dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn_train_cfg = dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + pos_weight=-1, + debug=False), + rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5) +) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_frcnn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_maskrcnn_1x.py b/tools/examples/r50_fpn_maskrcnn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..49b32037ec5139ee64d21bc6e9c607dcd69da018 --- /dev/null +++ b/tools/examples/r50_fpn_maskrcnn_1x.py @@ -0,0 +1,136 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + roi_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCRoIHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_block=dict( + type='SingleLevelRoI', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +meta_params = dict( + rpn_train_cfg=dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn_train_cfg=dict( + mask_size=28, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + crowd_thr=1.1, + roi_batch_size=512, + add_gt_as_proposals=True, + pos_fraction=0.25, + pos_balance_sampling=False, + neg_pos_ub=512, + neg_balance_thr=0, + pos_weight=-1, + debug=False), + rcnn_test_cfg=dict( + score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + with_mask=True, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_mask_rcnn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/examples/r50_fpn_rpn_1x.py b/tools/examples/r50_fpn_rpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..45c0a1a6c4649a18346251c8e81f5480f29da30f --- /dev/null +++ b/tools/examples/r50_fpn_rpn_1x.py @@ -0,0 +1,95 @@ +# model settings +model = dict( + pretrained= + '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', + backbone=dict( + type='resnet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='fb'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + coarsest_stride=32, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True)) +meta_params = dict( + rpn_train_cfg=dict( + pos_fraction=0.5, + pos_balance_sampling=False, + neg_pos_ub=256, + allowed_border=0, + anchor_batch_size=256, + pos_iou_thr=0.7, + neg_iou_thr=0.3, + neg_balance_thr=0, + min_pos_iou=1e-3, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rpn_test_cfg=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) +# dataset settings +data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_per_gpu = 1 +data_workers = 2 +train_dataset = dict( + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5) +test_dataset = dict( + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + test_mode=True) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) +# learning policy +lr_policy = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.333, + step=[8, 11]) +max_epoch = 12 +checkpoint_config = dict(interval=1) +dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1') +# logging settings +log_level = 'INFO' +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), + ]) +# yapf:enable +work_dir = './model/r50_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..2d062489100f3fc6a579ec811ff0391573f48454 --- /dev/null +++ b/tools/test.py @@ -0,0 +1,65 @@ +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') +import argparse + +import numpy as np +import torch + +import mmcv +from mmcv import Config +from mmcv.torchpack import load_checkpoint, parallel_test +from mmdet.core import _data_func, results2json +from mmdet.datasets import CocoDataset +from mmdet.datasets.data_engine import build_data +from mmdet.models import Detector + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet test detector') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--out', help='output result file') + parser.add_argument( + '--out_json', action='store_true', help='get json output file') + args = parser.parse_args() + return args + + +args = parse_args() + + +def main(): + cfg = Config.fromfile(args.config) + cfg.model['pretrained'] = None + # TODO this img_per_gpu + cfg.img_per_gpu == 1 + + if args.world_size == 1: + # TODO verify this part + args.dist = False + args.img_per_gpu = cfg.img_per_gpu + args.data_workers = cfg.data_workers + model = Detector(**cfg.model, **meta_params) + load_checkpoint(model, args.checkpoint) + test_loader = build_data(cfg.test_dataset, args) + model = torch.nn.DataParallel(model, device_ids=0) + # TODO write single_test + outputs = single_test(test_loader, model) + else: + test_dataset = CocoDataset(**cfg.test_dataset) + model = dict(cfg.model, **cfg.meta_params) + outputs = parallel_test(Detector, model, + args.checkpoint, test_dataset, _data_func, + range(args.world_size)) + + if args.out: + mmcv.dump(outputs, args.out, protocol=4) + if args.out_json: + results2json(test_dataset, outputs, args.out + '.json') + + +if __name__ == '__main__': + main() diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..0cb2450acf511715c716594e37b0968876aad683 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,85 @@ +from __future__ import division +import argparse +import sys +import os.path as osp +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv') + +import torch +import torch.multiprocessing as mp +from mmcv import Config +from mmcv.torchpack import Runner +from mmdet.core import (batch_processor, init_dist, broadcast_params, + DistOptimizerStepperHook, DistSamplerSeedHook) +from mmdet.datasets.data_engine import build_data +from mmdet.models import Detector +from mmdet.nn.parallel import MMDataParallel + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet train val detector') + parser.add_argument('config', help='train config file path') + parser.add_argument('--validate', action='store_true', help='validate') + parser.add_argument( + '--dist', action='store_true', help='distributed training or not') + parser.add_argument('--world_size', default=1, type=int) + parser.add_argument('--rank', default=0, type=int) + args = parser.parse_args() + + return args + + +args = parse_args() + + +def main(): + # Enable distributed training or not + if args.dist: + print('Enable distributed training.') + mp.set_start_method("spawn", force=True) + init_dist( + args.world_size, + args.rank, + **cfg.dist_params) + else: + print('Disabled distributed training.') + + # Fetch config information + cfg = Config.fromfile(args.config) + # TODO more flexible + args.img_per_gpu = cfg.img_per_gpu + args.data_workers = cfg.data_workers + + # prepare training loader + train_loader = [build_data(cfg.train_dataset, args)] + if args.validate: + val_loader = build_data(cfg.val_dataset, args) + train_loader.append(val_loader) + + # build model + model = Detector(**cfg.model, **cfg.meta_params) + if args.dist: + model = model.cuda() + broadcast_params(model) + else: + device_ids = args.rank % torch.cuda.device_count() + model = MMDataParallel(model, device_ids=device_ids).cuda() + + # register hooks + runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, + cfg.log_level) + optimizer_stepper = DistOptimizerStepperHook( + **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config + runner.register_training_hooks(cfg.lr_policy, optimizer_stepper, + cfg.checkpoint_config, cfg.log_config) + if args.dist: + runner.register_hook(DistSamplerSeedHook()) + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args) + + +if __name__ == "__main__": + main()