diff --git a/TDL.md b/TDL.md
new file mode 100644
index 0000000000000000000000000000000000000000..1679338c04733a1a23e5dc2e8ac96069c6b3c41e
--- /dev/null
+++ b/TDL.md
@@ -0,0 +1,29 @@
+### MMCV
+- [ ] Implement the attr 'get' of 'Config'
+- [ ] Config bugs: None type to '{}' with addict
+- [ ] Default logger should be only with gpu0
+- [ ] Unit Test: mmcv and mmcv.torchpack
+
+
+### MMDetection
+
+#### Basic
+- [ ] Implement training function without distributed
+- [ ] Verify nccl/nccl2/gloo
+- [ ] Replace UGLY code: params plug in 'args' to reach a global flow
+- [ ] Replace 'print' by 'logger'
+
+
+#### Testing
+- [ ] Implement distributed testing
+- [ ] Implement single gpu testing
+
+
+#### Refactor
+- [ ] Re-consider params names
+- [ ] Refactor functions in 'core'
+- [ ] Merge single test & aug test as one function, so as other redundancy
+
+#### New features
+- [ ] Plug loss params into Config
+- [ ] Multi-head communication
diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py
index 7992d8deb3ba0f6586c1bef0705f33a41a78d917..52ed690e6689abdd1dcc4af6ccb237f1d3fbdad9 100644
--- a/mmdet/core/__init__.py
+++ b/mmdet/core/__init__.py
@@ -1,6 +1,9 @@
-from .anchor_generator import *
+from .train_engine import *
+from .test_engine import *
+from .rpn_ops import *
 from .bbox_ops import *
 from .mask_ops import *
+from .losses import *
 from .eval import *
-from .nn import *
-from .targets import *
+from .post_processing import *
+from .utils import *
diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py
index 4bf9aeb74a5db787f687bacf0147ae1e2b1054bf..dbdbb970648bcac1ced61096b436ef9966266c1f 100644
--- a/mmdet/core/bbox_ops/__init__.py
+++ b/mmdet/core/bbox_ops/__init__.py
@@ -1,12 +1,16 @@
 from .geometry import bbox_overlaps
 from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps,
-                       bbox_sampling, sample_positives, sample_negatives)
+                       bbox_sampling, sample_positives, sample_negatives,
+                       sample_proposals)
 from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip,
-                         bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox)
+                         bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox,
+                         bbox2result)
+from .bbox_target import bbox_target
 
 __all__ = [
     'bbox_overlaps', 'random_choice', 'bbox_assign',
     'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives',
     'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip',
-    'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox'
+    'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
+    'bbox_target', 'sample_proposals'
 ]
diff --git a/mmdet/core/bbox_ops/bbox_target.py b/mmdet/core/bbox_ops/bbox_target.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce1f885e184a37779c7636f8c6053248e8cd3330
--- /dev/null
+++ b/mmdet/core/bbox_ops/bbox_target.py
@@ -0,0 +1,87 @@
+import mmcv
+import torch
+
+from .geometry import bbox_overlaps
+from .transforms import bbox_transform, bbox_transform_inv
+
+
+def bbox_target(pos_proposals_list,
+                neg_proposals_list,
+                pos_gt_bboxes_list,
+                pos_gt_labels_list,
+                cfg,
+                reg_num_classes=1,
+                target_means=[.0, .0, .0, .0],
+                target_stds=[1.0, 1.0, 1.0, 1.0],
+                return_list=False):
+    img_per_gpu = len(pos_proposals_list)
+    all_labels = []
+    all_label_weights = []
+    all_bbox_targets = []
+    all_bbox_weights = []
+    for img_id in range(img_per_gpu):
+        pos_proposals = pos_proposals_list[img_id]
+        neg_proposals = neg_proposals_list[img_id]
+        pos_gt_bboxes = pos_gt_bboxes_list[img_id]
+        pos_gt_labels = pos_gt_labels_list[img_id]
+        debug_img = debug_imgs[img_id] if cfg.debug else None
+        labels, label_weights, bbox_targets, bbox_weights = proposal_target_single(
+            pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
+            reg_num_classes, cfg, target_means, target_stds)
+        all_labels.append(labels)
+        all_label_weights.append(label_weights)
+        all_bbox_targets.append(bbox_targets)
+        all_bbox_weights.append(bbox_weights)
+
+    if return_list:
+        return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights
+
+    labels = torch.cat(all_labels, 0)
+    label_weights = torch.cat(all_label_weights, 0)
+    bbox_targets = torch.cat(all_bbox_targets, 0)
+    bbox_weights = torch.cat(all_bbox_weights, 0)
+    return labels, label_weights, bbox_targets, bbox_weights
+
+
+def proposal_target_single(pos_proposals,
+                           neg_proposals,
+                           pos_gt_bboxes,
+                           pos_gt_labels,
+                           reg_num_classes,
+                           cfg,
+                           target_means=[.0, .0, .0, .0],
+                           target_stds=[1.0, 1.0, 1.0, 1.0]):
+    num_pos = pos_proposals.size(0)
+    num_neg = neg_proposals.size(0)
+    num_samples = num_pos + num_neg
+    labels = pos_proposals.new_zeros(num_samples, dtype=torch.long)
+    label_weights = pos_proposals.new_zeros(num_samples)
+    bbox_targets = pos_proposals.new_zeros(num_samples, 4)
+    bbox_weights = pos_proposals.new_zeros(num_samples, 4)
+    if num_pos > 0:
+        labels[:num_pos] = pos_gt_labels
+        pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
+        label_weights[:num_pos] = pos_weight
+        pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes,
+                                          target_means, target_stds)
+        bbox_targets[:num_pos, :] = pos_bbox_targets
+        bbox_weights[:num_pos, :] = 1
+    if num_neg > 0:
+        label_weights[-num_neg:] = 1.0
+    if reg_num_classes > 1:
+        bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
+                                                   labels, reg_num_classes)
+
+    return labels, label_weights, bbox_targets, bbox_weights
+
+
+def expand_target(bbox_targets, bbox_weights, labels, num_classes):
+    bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
+                                                  4 * num_classes))
+    bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
+                                                  4 * num_classes))
+    for i in torch.nonzero(labels > 0).squeeze(-1):
+        start, end = labels[i] * 4, (labels[i] + 1) * 4
+        bbox_targets_expand[i, start:end] = bbox_targets[i, :]
+        bbox_weights_expand[i, start:end] = bbox_weights[i, :]
+    return bbox_targets_expand, bbox_weights_expand
diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py
index 9825e3bd15ec87dc6bc9c31be4b2f11422fcda13..eed820496409f1f8265f73e81bd4667e6b1558f8 100644
--- a/mmdet/core/bbox_ops/sampling.py
+++ b/mmdet/core/bbox_ops/sampling.py
@@ -42,7 +42,7 @@ def bbox_assign(proposals,
         min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
                             for RPN, it is usually set as 0, for Fast R-CNN,
                             it is usually set as pos_iou_thr
-        crowd_thr: ignore proposals which have iof(intersection over foreground) with 
+        crowd_thr: ignore proposals which have iof(intersection over foreground) with
         crowd bboxes over crowd_thr
     Returns:
         tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
@@ -253,3 +253,43 @@ def bbox_sampling(assigned_gt_inds,
                                 max_overlaps, neg_balance_thr,
                                 neg_hard_fraction)
     return pos_inds, neg_inds
+
+
+
+def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list,
+                     gt_labels_list, cfg):
+    cfg_list = [cfg for _ in range(len(proposals_list))]
+    results = map(sample_proposals_single, proposals_list, gt_bboxes_list,
+                  gt_crowds_list, gt_labels_list, cfg_list)
+    # list of tuple to tuple of list
+    return tuple(map(list, zip(*results)))
+
+
+def sample_proposals_single(proposals,
+                            gt_bboxes,
+                            gt_crowds,
+                            gt_labels,
+                            cfg):
+    proposals = proposals[:, :4]
+    assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
+        bbox_assign(
+            proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr,
+            cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr)
+    if cfg.add_gt_as_proposals:
+        proposals = torch.cat([gt_bboxes, proposals], dim=0)
+        gt_assign_self = torch.arange(
+            1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device)
+        assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
+        assigned_labels = torch.cat([gt_labels, assigned_labels])
+
+    pos_inds, neg_inds = bbox_sampling(
+        assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
+        cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
+    pos_proposals = proposals[pos_inds]
+    neg_proposals = proposals[neg_inds]
+    pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
+    pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
+    pos_gt_labels = assigned_labels[pos_inds]
+
+    return (pos_inds, neg_inds, pos_proposals, neg_proposals,
+            pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels)
diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py
index 6f83a1dc56efdc214fe96c60b9a587a1cb81602b..a9f1e2a45fab42652189e84f42aadc2e5f7a8994 100644
--- a/mmdet/core/bbox_ops/transforms.py
+++ b/mmdet/core/bbox_ops/transforms.py
@@ -126,3 +126,22 @@ def roi2bbox(rois):
         bbox = rois[inds, 1:]
         bbox_list.append(bbox)
     return bbox_list
+
+
+def bbox2result(bboxes, labels, num_classes):
+    """Convert detection results to a list of numpy arrays
+    Args:
+        bboxes (Tensor): shape (n, 5)
+        labels (Tensor): shape (n, )
+        num_classes (int): class number, including background class
+    Returns:
+        list(ndarray): bbox results of each class
+    """
+    if bboxes.shape[0] == 0:
+        return [
+            np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)
+        ]
+    else:
+        bboxes = bboxes.cpu().numpy()
+        labels = labels.cpu().numpy()
+        return [bboxes[labels == i, :] for i in range(num_classes - 1)]
diff --git a/mmdet/core/losses/__init__.py b/mmdet/core/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e4447ff0a6c708e9407bc47698a6281e8c81216
--- /dev/null
+++ b/mmdet/core/losses/__init__.py
@@ -0,0 +1,12 @@
+from .losses import (
+    weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
+    sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy,
+    weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy)
+
+__all__ = [
+    'weighted_nll_loss', 'weighted_cross_entropy',
+    'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
+    'weighted_sigmoid_focal_loss', 'mask_cross_entropy',
+    'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1',
+    'accuracy'
+]
diff --git a/mmdet/core/losses/losses.py b/mmdet/core/losses/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..575c91d053650acbde927f49e0c474e5fd325e77
--- /dev/null
+++ b/mmdet/core/losses/losses.py
@@ -0,0 +1,110 @@
+# TODO merge naive and weighted loss to one function.
+import torch
+import torch.nn.functional as F
+
+from ..bbox_ops import bbox_transform_inv, bbox_overlaps
+
+
+def weighted_nll_loss(pred, label, weight, ave_factor=None):
+    if ave_factor is None:
+        ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
+    raw = F.nll_loss(pred, label, size_average=False, reduce=False)
+    return torch.sum(raw * weight)[None] / ave_factor
+
+
+def weighted_cross_entropy(pred, label, weight, ave_factor=None):
+    if ave_factor is None:
+        ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
+    raw = F.cross_entropy(pred, label, size_average=False, reduce=False)
+    return torch.sum(raw * weight)[None] / ave_factor
+
+
+def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None):
+    if ave_factor is None:
+        ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
+    return F.binary_cross_entropy_with_logits(
+        pred, label.float(), weight.float(),
+        size_average=False)[None] / ave_factor
+
+
+def sigmoid_focal_loss(pred,
+                       target,
+                       weight,
+                       gamma=2.0,
+                       alpha=0.25,
+                       size_average=True):
+    pred_sigmoid = pred.sigmoid()
+    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
+    weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
+    weight = weight * pt.pow(gamma)
+    return F.binary_cross_entropy_with_logits(
+        pred, target, weight, size_average=size_average)
+
+
+def weighted_sigmoid_focal_loss(pred,
+                                target,
+                                weight,
+                                gamma=2.0,
+                                alpha=0.25,
+                                ave_factor=None,
+                                num_classes=80):
+    if ave_factor is None:
+        ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
+    return sigmoid_focal_loss(
+        pred, target, weight, gamma=gamma, alpha=alpha,
+        size_average=False)[None] / ave_factor
+
+
+def mask_cross_entropy(pred, target, label):
+    num_rois = pred.size()[0]
+    inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
+    pred_slice = pred[inds, label].squeeze(1)
+    return F.binary_cross_entropy_with_logits(
+        pred_slice, target, size_average=True)[None]
+
+
+def weighted_mask_cross_entropy(pred, target, weight, label):
+    num_rois = pred.size()[0]
+    num_samples = torch.sum(weight > 0).float().item() + 1e-6
+    assert num_samples >= 1
+    inds = torch.arange(0, num_rois).long().cuda()
+    pred_slice = pred[inds, label].squeeze(1)
+    return F.binary_cross_entropy_with_logits(
+        pred_slice, target, weight, size_average=False)[None] / num_samples
+
+
+def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True):
+    assert beta > 0
+    assert pred.size() == target.size() and target.numel() > 0
+    diff = torch.abs(pred - target)
+    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
+                       diff - 0.5 * beta)
+    if size_average:
+        loss /= pred.numel()
+    if reduce:
+        loss = loss.sum()
+    return loss
+
+
+def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None):
+    if ave_factor is None:
+        ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
+    loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False)
+    return torch.sum(loss * weight)[None] / ave_factor
+
+
+def accuracy(pred, target, topk=1):
+    if isinstance(topk, int):
+        topk = (topk, )
+        return_single = True
+
+    maxk = max(topk)
+    _, pred_label = pred.topk(maxk, 1, True, True)
+    pred_label = pred_label.t()
+    correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+        res.append(correct_k.mul_(100.0 / pred.size(0)))
+    return res[0] if return_single else res
diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py
index 25850cdc62ae69271f3788288d960b86ef179452..4669ba1f9102cbcabe20c48ea193408c1e12e4aa 100644
--- a/mmdet/core/mask_ops/__init__.py
+++ b/mmdet/core/mask_ops/__init__.py
@@ -2,9 +2,10 @@ from .segms import (flip_segms, polys_to_mask, mask_to_bbox,
                     polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting,
                     rle_mask_nms, rle_masks_to_boxes)
 from .utils import split_combined_gt_polys
+from .mask_target import mask_target
 
 __all__ = [
     'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box',
     'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes',
-    'split_combined_gt_polys'
+    'split_combined_gt_polys', 'mask_target'
 ]
diff --git a/mmdet/core/mask_ops/mask_target.py b/mmdet/core/mask_ops/mask_target.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fb65e3587473b60c4fd25b075072b9a3bb4670c
--- /dev/null
+++ b/mmdet/core/mask_ops/mask_target.py
@@ -0,0 +1,35 @@
+import torch
+import numpy as np
+
+from .segms import polys_to_mask_wrt_box
+
+
+def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list,
+                img_meta, cfg):
+    cfg_list = [cfg for _ in range(len(pos_proposals_list))]
+    img_metas = [img_meta for _ in range(len(pos_proposals_list))]
+    mask_targets = map(mask_target_single, pos_proposals_list,
+                       pos_assigned_gt_inds_list, gt_polys_list, img_metas,
+                       cfg_list)
+    mask_targets = torch.cat(tuple(mask_targets), dim=0)
+    return mask_targets
+
+
+def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys,
+                       img_meta, cfg):
+
+    mask_size = cfg.mask_size
+    num_pos = pos_proposals.size(0)
+    mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size))
+    if num_pos > 0:
+        pos_proposals = pos_proposals.cpu().numpy()
+        pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
+        scale_factor = img_meta['scale_factor'][0].cpu().numpy()
+        for i in range(num_pos):
+            bbox = pos_proposals[i, :] / scale_factor
+            polys = gt_polys[pos_assigned_gt_inds[i]]
+            mask = polys_to_mask_wrt_box(polys, bbox, mask_size)
+            mask = np.array(mask > 0, dtype=np.float32)
+            mask_targets[i, ...] = torch.from_numpy(mask).to(
+                mask_targets.device)
+    return mask_targets
diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py
index 5d56e481e5aee2ce113cea7adcb11ebe0aaede5b..35dfce24f91b4a6260476a3f77b67471c88e4bc7 100644
--- a/mmdet/core/post_processing/merge_augs.py
+++ b/mmdet/core/post_processing/merge_augs.py
@@ -1,6 +1,6 @@
 import torch
 
-from mmcv.ops import nms
+from mmdet.ops import nms
 import numpy as np
 
 from ..bbox_ops import bbox_mapping_back
diff --git a/mmdet/core/rpn_ops/__init__.py b/mmdet/core/rpn_ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d5f9244dde2b244bbe42d54640e8a648277c506
--- /dev/null
+++ b/mmdet/core/rpn_ops/__init__.py
@@ -0,0 +1,2 @@
+from .anchor_generator import *
+from .anchor_target import *
diff --git a/mmdet/core/anchor_generator.py b/mmdet/core/rpn_ops/anchor_generator.py
similarity index 100%
rename from mmdet/core/anchor_generator.py
rename to mmdet/core/rpn_ops/anchor_generator.py
diff --git a/mmdet/core/rpn_ops/anchor_target.py b/mmdet/core/rpn_ops/anchor_target.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6bba8ed221db022fb95590c6b10a56c8b6d4553
--- /dev/null
+++ b/mmdet/core/rpn_ops/anchor_target.py
@@ -0,0 +1,159 @@
+import torch
+import numpy as np
+from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling)
+
+
+def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list,
+                  img_shapes, target_means, target_stds, cfg):
+    """Compute anchor regression and classification targets
+
+    Args:
+        anchor_list(list): anchors of each feature map level
+        featuremap_sizes(list): feature map sizes
+        gt_bboxes_list(list): ground truth bbox of images in a mini-batch
+        img_shapes(list): shape of each image in a mini-batch
+        cfg(dict): configs
+
+    Returns:
+        tuple
+    """
+    if len(featmap_sizes) == len(anchor_list):
+        all_anchors = torch.cat(anchor_list, 0)
+        anchor_nums = [anchors.size(0) for anchors in anchor_list]
+        use_isomerism_anchors = False
+    elif len(img_shapes) == len(anchor_list):
+        # using different anchors for different images
+        all_anchors_list = [
+            torch.cat(anchor_list[img_id], 0)
+            for img_id in range(len(img_shapes))
+        ]
+        anchor_nums = [anchors.size(0) for anchors in anchor_list[0]]
+        use_isomerism_anchors = True
+    else:
+        raise ValueError('length of anchor_list should be equal to number of '
+                         'feature lvls or number of images in a batch')
+    all_labels = []
+    all_label_weights = []
+    all_bbox_targets = []
+    all_bbox_weights = []
+    num_total_sampled = 0
+    for img_id in range(len(img_shapes)):
+        if isinstance(valid_flag_list[img_id], list):
+            valid_flags = torch.cat(valid_flag_list[img_id], 0)
+        else:
+            valid_flags = valid_flag_list[img_id]
+        if use_isomerism_anchors:
+            all_anchors = all_anchors_list[img_id]
+        inside_flags = anchor_inside_flags(all_anchors, valid_flags,
+                                           img_shapes[img_id][:2],
+                                           cfg.allowed_border)
+        if not inside_flags.any():
+            return None
+        gt_bboxes = gt_bboxes_list[img_id]
+        anchor_targets = anchor_target_single(all_anchors, inside_flags,
+                                              gt_bboxes, target_means,
+                                              target_stds, cfg)
+        (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+         neg_inds) = anchor_targets
+        all_labels.append(labels)
+        all_label_weights.append(label_weights)
+        all_bbox_targets.append(bbox_targets)
+        all_bbox_weights.append(bbox_weights)
+        num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1)
+    all_labels = torch.stack(all_labels, 0)
+    all_label_weights = torch.stack(all_label_weights, 0)
+    all_bbox_targets = torch.stack(all_bbox_targets, 0)
+    all_bbox_weights = torch.stack(all_bbox_weights, 0)
+    # split into different feature levels
+    labels_list = []
+    label_weights_list = []
+    bbox_targets_list = []
+    bbox_weights_list = []
+    start = 0
+    for anchor_num in anchor_nums:
+        end = start + anchor_num
+        labels_list.append(all_labels[:, start:end].squeeze(0))
+        label_weights_list.append(all_label_weights[:, start:end].squeeze(0))
+        bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0))
+        bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0))
+        start = end
+    return (labels_list, label_weights_list, bbox_targets_list,
+            bbox_weights_list, num_total_sampled)
+
+
+def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
+                         target_stds, cfg):
+    num_total_anchors = all_anchors.size(0)
+    anchors = all_anchors[inside_flags, :]
+    assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign(
+        anchors,
+        gt_bboxes,
+        pos_iou_thr=cfg.pos_iou_thr,
+        neg_iou_thr=cfg.neg_iou_thr,
+        min_pos_iou=cfg.min_pos_iou)
+    pos_inds, neg_inds = bbox_sampling(assigned_gt_inds, cfg.anchor_batch_size,
+                                       cfg.pos_fraction, cfg.neg_pos_ub,
+                                       cfg.pos_balance_sampling, max_overlaps,
+                                       cfg.neg_balance_thr)
+
+    bbox_targets = torch.zeros_like(anchors)
+    bbox_weights = torch.zeros_like(anchors)
+    labels = torch.zeros_like(assigned_gt_inds)
+    label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float)
+
+    if len(pos_inds) > 0:
+        pos_inds = unique(pos_inds)
+        pos_anchors = anchors[pos_inds, :]
+        pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :]
+        pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox,
+                                          target_means, target_stds)
+        bbox_targets[pos_inds, :] = pos_bbox_targets
+        bbox_weights[pos_inds, :] = 1.0
+        labels[pos_inds] = 1
+        if cfg.pos_weight <= 0:
+            label_weights[pos_inds] = 1.0
+        else:
+            label_weights[pos_inds] = cfg.pos_weight
+    if len(neg_inds) > 0:
+        neg_inds = unique(neg_inds)
+        label_weights[neg_inds] = 1.0
+
+    # map up to original set of anchors
+    labels = unmap(labels, num_total_anchors, inside_flags)
+    label_weights = unmap(label_weights, num_total_anchors, inside_flags)
+    bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
+    bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+
+    return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+            neg_inds)
+
+def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0):
+    img_h, img_w = img_shape.float()
+    if allowed_border >= 0:
+        inside_flags = valid_flags & \
+            (all_anchors[:, 0] >= -allowed_border) & \
+            (all_anchors[:, 1] >= -allowed_border) & \
+            (all_anchors[:, 2] < img_w + allowed_border) & \
+            (all_anchors[:, 3] < img_h + allowed_border)
+    else:
+        inside_flags = valid_flags
+    return inside_flags
+
+def unique(tensor):
+    if tensor.is_cuda:
+        u_tensor = np.unique(tensor.cpu().numpy())
+        return tensor.new_tensor(u_tensor)
+    else:
+        return torch.unique(tensor)
+
+def unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if data.dim() == 1:
+        ret = data.new_full((count, ), fill)
+        ret[inds] = data
+    else:
+        new_size = (count, ) + data.size()[1:]
+        ret = data.new_full(new_size, fill)
+        ret[inds, :] = data
+    return ret
diff --git a/mmdet/core/targets/__init__.py b/mmdet/core/targets/__init__.py
deleted file mode 100644
index b3b2567efff687ba503b8a37d9f096597a0c8780..0000000000000000000000000000000000000000
--- a/mmdet/core/targets/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .anchor_target import anchor_target
-from .bbox_target import bbox_target
-from .mask_target import mask_target
-
-__all__ = ['anchor_target', 'bbox_target', 'mask_target']
diff --git a/mmdet/core/targets/anchor_target.py b/mmdet/core/targets/anchor_target.py
deleted file mode 100644
index ec2389f90885da0c92f0598dc6d45f59c0ab6dac..0000000000000000000000000000000000000000
--- a/mmdet/core/targets/anchor_target.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def anchor_target():
-    pass
diff --git a/mmdet/core/targets/bbox_target.py b/mmdet/core/targets/bbox_target.py
deleted file mode 100644
index 49642c2298735b163b98ad832a3a6a9ee9941c45..0000000000000000000000000000000000000000
--- a/mmdet/core/targets/bbox_target.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def bbox_target():
-    pass
diff --git a/mmdet/core/targets/mask_target.py b/mmdet/core/targets/mask_target.py
deleted file mode 100644
index 4c330e13b81e8cb27e35a8705e2e89b00792ddaa..0000000000000000000000000000000000000000
--- a/mmdet/core/targets/mask_target.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def mask_target():
-    pass
diff --git a/mmdet/core/test_engine.py b/mmdet/core/test_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..4825beda640c443b5d8aab0daf5c30838be4364b
--- /dev/null
+++ b/mmdet/core/test_engine.py
@@ -0,0 +1,14 @@
+from mmdet.datasets import collate
+from mmdet.nn.parallel import scatter
+
+__all__ = ['_data_func']
+
+def _data_func(data, gpu_id):
+    imgs, img_metas = tuple(
+        scatter(collate([data], samples_per_gpu=1), [gpu_id])[0])
+    return dict(
+        img=imgs,
+        img_meta=img_metas,
+        return_loss=False,
+        return_bboxes=True,
+        rescale=True)
diff --git a/mmdet/core/train_engine.py b/mmdet/core/train_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc745faad87cb2a97272934902822666be55d71f
--- /dev/null
+++ b/mmdet/core/train_engine.py
@@ -0,0 +1,40 @@
+import numpy as np
+import torch
+from collections import OrderedDict
+from mmdet.nn.parallel import scatter
+
+
+def parse_losses(losses):
+    log_vars = OrderedDict()
+    for loss_key, loss_value in losses.items():
+        if isinstance(loss_value, dict):
+            for _key, _value in loss_value.items():
+                if isinstance(_value, list):
+                    _value = sum([_loss.mean() for _loss in _value])
+                else:
+                    _value = _value.mean()
+                log_vars[_keys] = _value
+        elif isinstance(loss_value, list):
+            log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value)
+        else:
+            log_vars[loss_key] = loss_value.mean()
+
+    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
+    log_vars['loss'] = loss
+    for _key, _value in log_vars.items():
+        log_vars[_key] = _value.item()
+
+    return loss, log_vars
+
+
+def batch_processor(model, data, train_mode, args=None):
+    data = scatter(data, [torch.cuda.current_device()])[0]
+    losses = model(**data)
+    loss, log_vars = parse_losses(losses)
+
+    outputs = dict(
+        loss=loss / args.world_size,
+        log_vars=log_vars,
+        num_samples=len(data['img'].data))
+
+    return outputs
diff --git a/mmdet/core/utils/__init__.py b/mmdet/core/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b6e79d62e60b5e1efaac985e039b36840f86397
--- /dev/null
+++ b/mmdet/core/utils/__init__.py
@@ -0,0 +1,3 @@
+from .dist_utils import *
+from .hooks import *
+from .misc import *
diff --git a/mmdet/core/utils/dist_utils.py b/mmdet/core/utils/dist_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..47279c7bf8fd3b0ed66c3099f465b0130c864a23
--- /dev/null
+++ b/mmdet/core/utils/dist_utils.py
@@ -0,0 +1,60 @@
+import os
+import torch
+import torch.multiprocessing as mp
+import torch.distributed as dist
+from torch.nn.utils import clip_grad
+from mmcv.torchpack import Hook, OptimizerStepperHook
+
+__all__ = [
+    'init_dist', 'average_gradients', 'broadcast_params',
+    'DistOptimizerStepperHook', 'DistSamplerSeedHook'
+]
+
+
+def init_dist(world_size,
+              rank,
+              backend='gloo',
+              master_ip='127.0.0.1',
+              port=29500):
+    if mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method('spawn')
+    num_gpus = torch.cuda.device_count()
+    torch.cuda.set_device(rank % num_gpus)
+    os.environ['MASTER_ADDR'] = master_ip
+    os.environ['MASTER_PORT'] = str(port)
+    if backend == 'nccl':
+        dist.init_process_group(backend='nccl')
+    else:
+        dist.init_process_group(
+            backend='gloo', rank=rank, world_size=world_size)
+
+
+def average_gradients(model):
+    for param in model.parameters():
+        if param.requires_grad and not (param.grad is None):
+            dist.all_reduce(param.grad.data)
+
+
+def broadcast_params(model):
+    for p in model.state_dict().values():
+        dist.broadcast(p, 0)
+
+
+class DistOptimizerStepperHook(OptimizerStepperHook):
+
+    def after_train_iter(self, runner):
+        runner.optimizer.zero_grad()
+        runner.outputs['loss'].backward()
+        average_gradients(runner.model)
+        if self.grad_clip:
+            clip_grad.clip_grad_norm_(
+                filter(lambda p: p.requires_grad, runner.model.parameters()),
+                max_norm=self.max_norm,
+                norm_type=self.norm_type)
+        runner.optimizer.step()
+
+
+class DistSamplerSeedHook(Hook):
+
+    def before_epoch(self, runner):
+        runner.data_loader.sampler.set_epoch(runner.epoch)
diff --git a/mmdet/core/hooks.py b/mmdet/core/utils/hooks.py
similarity index 96%
rename from mmdet/core/hooks.py
rename to mmdet/core/utils/hooks.py
index 3347639d51ac19d5072bcb0a2e76c7747d686c77..f97e1fb29061ad5f07fa06907fbec72ede7a3bf3 100644
--- a/mmdet/core/hooks.py
+++ b/mmdet/core/utils/hooks.py
@@ -7,10 +7,16 @@ import mmcv
 import numpy as np
 import torch
 from mmcv.torchpack import Hook
-from mmdet import collate, scatter
+from mmdet.datasets import collate
+from mmdet.nn.parallel import scatter
 from pycocotools.cocoeval import COCOeval
 
-from .eval import eval_recalls
+from ..eval import eval_recalls
+
+__all__ = [
+    'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook',
+    'CocoDistEvalmAPHook'
+]
 
 
 class EmptyCacheHook(Hook):
@@ -237,10 +243,3 @@ class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin):
             runner.log_buffer.output[field] = cocoEval.stats[0]
         runner.log_buffer.ready = True
         os.remove(tmp_file)
-
-
-class CocoDistCascadeEvalmAPHook(CocoDistEvalmAPHook):
-
-    def evaluate(self, runner, results):
-        results = [res[-1] for res in results]
-        super(CocoDistCascadeEvalmAPHook, self).evaluate(runner, results)
diff --git a/mmdet/core/utils/misc.py b/mmdet/core/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f9c05e4577f23125fad0f0714a8f1089e82dbee
--- /dev/null
+++ b/mmdet/core/utils/misc.py
@@ -0,0 +1,118 @@
+import subprocess
+
+import mmcv
+import numpy as np
+import torch
+
+__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json']
+
+
+def tensor2imgs(tensor,
+                color_order='RGB',
+                color_mean=(0.485, 0.456, 0.406),
+                color_std=(0.229, 0.224, 0.225)):
+    assert color_order in ['RGB', 'BGR']
+    img_per_gpu = tensor.size(0)
+    color_mean = np.array(color_mean, dtype=np.float32)
+    color_std = np.array(color_std, dtype=np.float32)
+    imgs = []
+    for img_id in range(img_per_gpu):
+        img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
+        if color_order == 'RGB':
+            img = mmcv.rgb2bgr(img)
+        img = img * color_std + color_mean
+        imgs.append(np.ascontiguousarray(img))
+    return imgs
+
+
+def unique(tensor):
+    if tensor.is_cuda:
+        u_tensor = np.unique(tensor.cpu().numpy())
+        return tensor.new_tensor(u_tensor)
+    else:
+        return torch.unique(tensor)
+
+
+def unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if data.dim() == 1:
+        ret = data.new_full((count, ), fill)
+        ret[inds] = data
+    else:
+        new_size = (count, ) + data.size()[1:]
+        ret = data.new_full(new_size, fill)
+        ret[inds, :] = data
+    return ret
+
+def xyxy2xywh(bbox):
+    _bbox = bbox.tolist()
+    return [
+        _bbox[0],
+        _bbox[1],
+        _bbox[2] - _bbox[0] + 1,
+        _bbox[3] - _bbox[1] + 1,
+    ]
+
+def det2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        result = results[idx]
+        for label in range(len(result)):
+            bboxes = result[label]
+            for i in range(bboxes.shape[0]):
+                data = dict()
+                data['image_id'] = img_id
+                data['bbox'] = xyxy2xywh(bboxes[i])
+                data['score'] = float(bboxes[i][4])
+                data['category_id'] = dataset.cat_ids[label]
+                json_results.append(data)
+    return json_results
+
+
+def segm2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        det, seg = results[idx]
+        for label in range(len(det)):
+            bboxes = det[label]
+            segms = seg[label]
+            for i in range(bboxes.shape[0]):
+                data = dict()
+                data['image_id'] = img_id
+                data['bbox'] = xyxy2xywh(bboxes[i])
+                data['score'] = float(bboxes[i][4])
+                data['category_id'] = dataset.cat_ids[label]
+                segms[i]['counts'] = segms[i]['counts'].decode()
+                data['segmentation'] = segms[i]
+                json_results.append(data)
+    return json_results
+
+
+def proposal2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        bboxes = results[idx]
+        for i in range(bboxes.shape[0]):
+            data = dict()
+            data['image_id'] = img_id
+            data['bbox'] = xyxy2xywh(bboxes[i])
+            data['score'] = float(bboxes[i][4])
+            data['category_id'] = 1
+            json_results.append(data)
+    return json_results
+
+
+def results2json(dataset, results, out_file):
+    if isinstance(results[0], list):
+        json_results = det2json(dataset, results)
+    elif isinstance(results[0], tuple):
+        json_results = segm2json(dataset, results)
+    elif isinstance(results[0], np.ndarray):
+        json_results = proposal2json(dataset, results)
+    else:
+        raise TypeError('invalid type of results')
+    mmcv.dump(json_results, out_file)
diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py
index e0705e79b6168c2ccf45610af3609013082ddb48..a7eedca6a2cbab92e069415513def5ab363dc824 100644
--- a/mmdet/datasets/coco.py
+++ b/mmdet/datasets/coco.py
@@ -71,7 +71,6 @@ def parse_ann_info(ann_info, cat2label, with_mask=True):
 
 
 class CocoDataset(Dataset):
-
     def __init__(self,
                  ann_file,
                  img_prefix,
@@ -253,31 +252,38 @@ class CocoDataset(Dataset):
 
     def prepare_test_img(self, idx):
         """Prepare an image for testing (multi-scale and flipping)"""
-        img_info = self._load_info(idx, with_ann=False)
-        img_file = osp.join(self.prefix, img_info['file_name'])
+        img_info = self.img_infos[idx]
+        img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name']))
         proposal = (self.proposals[idx][:, :4]
                     if self.proposals is not None else None)
 
-        def prepare_single(img_file, scale, flip, proposal=None):
-            img_np, shape_scale_np = self.img_transform(img_file, scale, flip)
-            img, shape_scale = self.numpy2tensor(img_np, shape_scale_np)
-            img_meta = dict(shape_scale=shape_scale, flip=flip)
+        def prepare_single(img, scale, flip, proposal=None):
+            _img, _img_shape, _scale_factor = self.img_transform(
+                img, scale, flip)
+            img, img_shape, scale_factor = self.numpy2tensor(
+                _img, _img_shape, _scale_factor)
+            ori_shape = (img_info['height'], img_info['width'])
+            img_meta = dict(
+                ori_shape=ori_shape,
+                img_shape=img_shape,
+                scale_factor=scale_factor,
+                flip=flip)
             if proposal is not None:
-                proposal = self.bbox_transform(proposal, shape_scale_np, flip)
+                proposal = self.bbox_transform(proposal, _scale_factor, flip)
                 proposal = self.numpy2tensor(proposal)
             return img, img_meta, proposal
 
         imgs = []
         img_metas = []
         proposals = []
-        for scale in self.img_scale:
-            img, img_meta, proposal = prepare_single(img_file, scale, False,
+        for scale in self.img_scales:
+            img, img_meta, proposal = prepare_single(img, scale, False,
                                                      proposal)
             imgs.append(img)
             img_metas.append(img_meta)
             proposals.append(proposal)
             if self.flip_ratio > 0:
-                img, img_meta, prop = prepare_single(img_file, scale, True,
+                img, img_meta, prop = prepare_single(img, scale, True,
                                                      proposal)
                 imgs.append(img)
                 img_metas.append(img_meta)
diff --git a/mmdet/datasets/data_engine.py b/mmdet/datasets/data_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c89f21878a9f2fe2b21669ecfb2cd71cc9ae073
--- /dev/null
+++ b/mmdet/datasets/data_engine.py
@@ -0,0 +1,29 @@
+from functools import partial
+import torch
+from .coco import CocoDataset
+from .collate import collate
+from .sampler import GroupSampler, DistributedGroupSampler
+
+
+def build_data(cfg, args):
+    dataset = CocoDataset(**cfg)
+
+    if args.dist:
+        sampler = DistributedGroupSampler(dataset, args.img_per_gpu,
+                                     args.world_size, args.rank)
+        batch_size = args.img_per_gpu
+        num_workers = args.data_workers
+    else:
+        sampler = GroupSampler(dataset, args.img_per_gpu)
+        batch_size = args.world_size * args.img_per_gpu
+        num_workers = args.world_size * args.data_workers
+
+    loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=args.img_per_gpu,
+        sampler=sampler,
+        num_workers=num_workers,
+        collate_fn=partial(collate, samples_per_gpu=args.img_per_gpu),
+        pin_memory=False)
+
+    return loader
diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py
index 81f3a627d0d20a5890ea9c5f597e814ea373b9e5..1532fe074f2968b225cc030dc3f868b3c7780194 100644
--- a/mmdet/datasets/transforms.py
+++ b/mmdet/datasets/transforms.py
@@ -1,9 +1,8 @@
 import mmcv
-# import cvbase as cvb
 import numpy as np
 import torch
 
-from mmdet.core import segms
+from mmdet.core.mask_ops import segms
 
 __all__ = [
     'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor'
@@ -64,7 +63,7 @@ class ImageTransform(object):
 
 class ImageCrop(object):
     """crop image patches and resize patches into fixed size
-    1. (read and) flip image (if needed) 
+    1. (read and) flip image (if needed)
     2. crop image patches according to given bboxes
     3. resize patches into fixed size (default 224x224)
     4. normalize the image (if needed)
@@ -126,6 +125,8 @@ class BboxTransform(object):
         gt_bboxes = bboxes * scale_factor
         if flip:
             gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape)
+        gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
+        gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
         if self.max_num_gts is None:
             return gt_bboxes
         else:
@@ -205,4 +206,4 @@ class Numpy2Tensor(object):
         if len(args) == 1:
             return torch.from_numpy(args[0])
         else:
-            return tuple([torch.from_numpy(array) for array in args])
+            return tuple([torch.from_numpy(np.array(array)) for array in args])
diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2209550509f71a71a66b2582440986eebcf3926c 100644
--- a/mmdet/models/__init__.py
+++ b/mmdet/models/__init__.py
@@ -0,0 +1 @@
+from .detectors import Detector
diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py
index e6709af6176d5d574bf7f4a5bdf8e67691787536..49a863594290ce0b0e748ffc45c6d4a4381e2140 100644
--- a/mmdet/models/bbox_heads/__init__.py
+++ b/mmdet/models/bbox_heads/__init__.py
@@ -1,3 +1,4 @@
 from .bbox_head import BBoxHead
+from .convfc_bbox_head import ConvFCRoIHead, SharedFCRoIHead
 
-__all__ = ['BBoxHead']
+__all__ = ['BBoxHead', 'ConvFCRoIHead', 'SharedFCRoIHead']
diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py
index 9f0c188a459286ee5c0e5ab71f8305da0d1ab761..5f6e1136eed45abe85a710170e76e04cba0e91cf 100644
--- a/mmdet/models/bbox_heads/bbox_head.py
+++ b/mmdet/models/bbox_heads/bbox_head.py
@@ -1,7 +1,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms,
+from mmdet.core import (bbox_transform_inv, multiclass_nms, bbox_target,
                         weighted_cross_entropy, weighted_smoothl1, accuracy)
 
 
@@ -10,7 +10,6 @@ class BBoxHead(nn.Module):
     regression respectively"""
 
     def __init__(self,
-                 exclude_mal_box=True,
                  with_avg_pool=False,
                  with_cls=True,
                  with_reg=True,
@@ -31,7 +30,6 @@ class BBoxHead(nn.Module):
         self.target_means = target_means
         self.target_stds = target_stds
         self.reg_class_agnostic = reg_class_agnostic
-        self.exclude_mal_box = exclude_mal_box
 
         in_channels = self.in_channels
         if self.with_avg_pool:
@@ -61,7 +59,7 @@ class BBoxHead(nn.Module):
         bbox_pred = self.fc_reg(x) if self.with_reg else None
         return cls_score, bbox_pred
 
-    def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes,
+    def get_bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes,
                     pos_gt_labels, rcnn_train_cfg):
         reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes
         cls_reg_targets = bbox_target(
@@ -69,11 +67,10 @@ class BBoxHead(nn.Module):
             neg_proposals,
             pos_gt_bboxes,
             pos_gt_labels,
-            self.target_means,
-            self.target_stds,
             rcnn_train_cfg,
             reg_num_classes,
-            debug_imgs=self.debug_imgs)
+            target_means=self.target_means,
+            target_stds=self.target_stds)
         return cls_reg_targets
 
     def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets,
@@ -96,6 +93,7 @@ class BBoxHead(nn.Module):
                        cls_score,
                        bbox_pred,
                        img_shape,
+                       scale_factor,
                        rescale=False,
                        nms_cfg=None):
         if isinstance(cls_score, list):
@@ -111,7 +109,7 @@ class BBoxHead(nn.Module):
             # TODO: add clip here
 
         if rescale:
-            bboxes /= img_shape[-1]
+            bboxes /= scale_factor.float()
 
         if nms_cfg is None:
             return bboxes, scores
diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..02e2a6b6d859e728a47f98fe857f1e71c2a6754a
--- /dev/null
+++ b/mmdet/models/bbox_heads/convfc_bbox_head.py
@@ -0,0 +1,174 @@
+import torch.nn as nn
+
+from .bbox_head import BBoxHead
+from ..utils import ConvModule
+
+
+class ConvFCRoIHead(BBoxHead):
+    """More general bbox head, with shared conv and fc layers and two optional
+    separated branches.
+
+                                /-> cls convs -> cls fcs -> cls
+    shared convs -> shared fcs
+                                \-> reg convs -> reg fcs -> reg
+    """
+
+    def __init__(self,
+                 num_shared_convs=0,
+                 num_shared_fcs=0,
+                 num_cls_convs=0,
+                 num_cls_fcs=0,
+                 num_reg_convs=0,
+                 num_reg_fcs=0,
+                 conv_out_channels=256,
+                 fc_out_channels=1024,
+                 *args,
+                 **kwargs):
+        super(ConvFCRoIHead, self).__init__(*args, **kwargs)
+        assert (num_shared_convs + num_shared_fcs + num_cls_convs + num_cls_fcs
+                + num_reg_convs + num_reg_fcs > 0)
+        if num_cls_convs > 0 or num_reg_convs > 0:
+            assert num_shared_fcs == 0
+        if not self.with_cls:
+            assert num_cls_convs == 0 and num_cls_fcs == 0
+        if not self.with_reg:
+            assert num_reg_convs == 0 and num_reg_fcs == 0
+        self.num_shared_convs = num_shared_convs
+        self.num_shared_fcs = num_shared_fcs
+        self.num_cls_convs = num_cls_convs
+        self.num_cls_fcs = num_cls_fcs
+        self.num_reg_convs = num_reg_convs
+        self.num_reg_fcs = num_reg_fcs
+        self.conv_out_channels = conv_out_channels
+        self.fc_out_channels = fc_out_channels
+
+        # add shared convs and fcs
+        self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch(
+            self.num_shared_convs, self.num_shared_fcs, self.in_channels, True)
+        self.shared_out_channels = last_layer_dim
+
+        # add cls specific branch
+        self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch(
+            self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
+
+        # add reg specific branch
+        self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch(
+            self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
+
+        if self.num_shared_fcs == 0 and not self.with_avg_pool:
+            if self.num_cls_fcs == 0:
+                self.cls_last_dim *= (self.roi_feat_size * self.roi_feat_size)
+            if self.num_reg_fcs == 0:
+                self.reg_last_dim *= (self.roi_feat_size * self.roi_feat_size)
+
+        self.relu = nn.ReLU(inplace=True)
+        # reconstruct fc_cls and fc_reg since input channels are changed
+        if self.with_cls:
+            self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes)
+        if self.with_reg:
+            out_dim_reg = (4 if self.reg_class_agnostic else
+                           4 * self.num_classes)
+            self.fc_reg = nn.Linear(self.reg_last_dim, out_dim_reg)
+
+    def _add_conv_fc_branch(self,
+                            num_branch_convs,
+                            num_branch_fcs,
+                            in_channels,
+                            is_shared=False):
+        """Add shared or separable branch
+
+        convs -> avg pool (optional) -> fcs
+        """
+        last_layer_dim = in_channels
+        # add branch specific conv layers
+        branch_convs = nn.ModuleList()
+        if num_branch_convs > 0:
+            for i in range(num_branch_convs):
+                conv_in_channels = (last_layer_dim
+                                    if i == 0 else self.conv_out_channels)
+                branch_convs.append(
+                    ConvModule(
+                        conv_in_channels,
+                        self.conv_out_channels,
+                        3,
+                        padding=1,
+                        normalize=self.normalize,
+                        bias=self.with_bias))
+            last_layer_dim = self.conv_out_channels
+        # add branch specific fc layers
+        branch_fcs = nn.ModuleList()
+        if num_branch_fcs > 0:
+            # for shared branch, only consider self.with_avg_pool
+            # for separated branches, also consider self.num_shared_fcs
+            if (is_shared
+                    or self.num_shared_fcs == 0) and not self.with_avg_pool:
+                last_layer_dim *= (self.roi_feat_size * self.roi_feat_size)
+            for i in range(num_branch_fcs):
+                fc_in_channels = (last_layer_dim
+                                  if i == 0 else self.fc_out_channels)
+                branch_fcs.append(
+                    nn.Linear(fc_in_channels, self.fc_out_channels))
+            last_layer_dim = self.fc_out_channels
+        return branch_convs, branch_fcs, last_layer_dim
+
+    def init_weights(self):
+        super(ConvFCRoIHead, self).init_weights()
+        for module_list in [self.shared_fcs, self.cls_fcs, self.reg_fcs]:
+            for m in module_list.modules():
+                if isinstance(m, nn.Linear):
+                    nn.init.xavier_uniform_(m.weight)
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        # shared part
+        if self.num_shared_convs > 0:
+            for conv in self.shared_convs:
+                x = conv(x)
+
+        if self.num_shared_fcs > 0:
+            if self.with_avg_pool:
+                x = self.avg_pool(x)
+            x = x.view(x.size(0), -1)
+            for fc in self.shared_fcs:
+                x = self.relu(fc(x))
+        # separate branches
+        x_cls = x
+        x_reg = x
+
+        for conv in self.cls_convs:
+            x_cls = conv(x_cls)
+        if x_cls.dim() > 2:
+            if self.with_avg_pool:
+                x_cls = self.avg_pool(x_cls)
+            x_cls = x_cls.view(x_cls.size(0), -1)
+        for fc in self.cls_fcs:
+            x_cls = self.relu(fc(x_cls))
+
+        for conv in self.reg_convs:
+            x_reg = conv(x_reg)
+        if x_reg.dim() > 2:
+            if self.with_avg_pool:
+                x_reg = self.avg_pool(x_reg)
+            x_reg = x_reg.view(x_reg.size(0), -1)
+        for fc in self.reg_fcs:
+            x_reg = self.relu(fc(x_reg))
+
+        cls_score = self.fc_cls(x_cls) if self.with_cls else None
+        bbox_pred = self.fc_reg(x_reg) if self.with_reg else None
+        return cls_score, bbox_pred
+
+
+class SharedFCRoIHead(ConvFCRoIHead):
+
+    def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):
+        assert num_fcs >= 1
+        super(SharedFCRoIHead, self).__init__(
+            num_shared_convs=0,
+            num_shared_fcs=num_fcs,
+            num_cls_convs=0,
+            num_cls_fcs=0,
+            num_reg_convs=0,
+            num_reg_fcs=0,
+            fc_out_channels=fc_out_channels,
+            *args,
+            **kwargs)
diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py
index f109d851397a5106c33d173eda8986ee1c0f8b06..c3b058507fcdc461a9d3b0271858522e4ba0f1ce 100644
--- a/mmdet/models/builder.py
+++ b/mmdet/models/builder.py
@@ -1,4 +1,5 @@
 import mmcv
+from mmcv import torchpack
 from torch import nn
 
 from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads,
@@ -11,7 +12,7 @@ __all__ = [
 
 
 def _build_module(cfg, parrent=None):
-    return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict(
+    return cfg if isinstance(cfg, nn.Module) else torchpack.obj_from_dict(
         cfg, parrent)
 
 
diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5b690f8d77d6d8eae1adc4bf8b04d3dd3db3462a 100644
--- a/mmdet/models/detectors/__init__.py
+++ b/mmdet/models/detectors/__init__.py
@@ -0,0 +1 @@
+from .detector import Detector
diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/detector.py
similarity index 60%
rename from mmdet/models/detectors/two_stage.py
rename to mmdet/models/detectors/detector.py
index 0c057d606fba6c322733490591d5352a42b426a5..80b7d4438cb59612dbff8a2bf71930eb6383a144 100644
--- a/mmdet/models/detectors/two_stage.py
+++ b/mmdet/models/detectors/detector.py
@@ -2,137 +2,141 @@ import torch
 import torch.nn as nn
 
 from .. import builder
-from mmdet.core.utils import tensor2imgs
 from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys,
-                        bbox_sampling, multiclass_nms, merge_aug_proposals,
-                        merge_aug_bboxes, merge_aug_masks, bbox2result)
+                        bbox2result, multiclass_nms, merge_aug_proposals,
+                        merge_aug_bboxes, merge_aug_masks, sample_proposals)
 
 
-class TwoStageDetector(nn.Module):
-
+class Detector(nn.Module):
     def __init__(self,
                  backbone,
-                 neck,
-                 rpn_head,
-                 roi_block,
-                 bbox_head,
-                 rpn_train_cfg,
-                 rpn_test_cfg,
-                 rcnn_train_cfg,
-                 rcnn_test_cfg,
+                 neck=None,
+                 rpn_head=None,
+                 roi_block=None,
+                 bbox_head=None,
                  mask_block=None,
                  mask_head=None,
+                 rpn_train_cfg=None,
+                 rpn_test_cfg=None,
+                 rcnn_train_cfg=None,
+                 rcnn_test_cfg=None,
                  pretrained=None):
-        super(TwoStageDetector, self).__init__()
+        super(Detector, self).__init__()
         self.backbone = builder.build_backbone(backbone)
-        self.neck = builder.build_neck(neck) if neck is not None else None
-        self.rpn_head = builder.build_rpn_head(rpn_head)
-        self.bbox_roi_extractor = builder.build_roi_block(roi_block)
-        self.bbox_head = builder.build_bbox_head(bbox_head)
-        self.mask_roi_extractor = builder.build_roi_block(mask_block) if (
-            mask_block is not None) else None
-        self.mask_head = builder.build_mask_head(mask_head) if (
-            mask_head is not None) else None
-        self.with_mask = False if self.mask_head is None else True
 
-        self.rpn_train_cfg = rpn_train_cfg
-        self.rpn_test_cfg = rpn_test_cfg
-        self.rcnn_train_cfg = rcnn_train_cfg
-        self.rcnn_test_cfg = rcnn_test_cfg
+        self.with_neck = True if neck is not None else False
+        if self.with_neck:
+            self.neck = builder.build_neck(neck)
+
+        self.with_rpn = True if rpn_head is not None else False
+        if self.with_rpn:
+            self.rpn_head = builder.build_rpn_head(rpn_head)
+            self.rpn_train_cfg = rpn_train_cfg
+            self.rpn_test_cfg = rpn_test_cfg
+
+        self.with_bbox = True if bbox_head is not None else False
+        if self.with_bbox:
+            self.bbox_roi_extractor = builder.build_roi_extractor(roi_block)
+            self.bbox_head = builder.build_bbox_head(bbox_head)
+            self.rcnn_train_cfg = rcnn_train_cfg
+            self.rcnn_test_cfg = rcnn_test_cfg
+
+        self.with_mask = True if mask_head is not None else False
+        if self.with_mask:
+            self.mask_roi_extractor = builder.build_roi_extractor(mask_block)
+            self.mask_head = builder.build_mask_head(mask_head)
+
         self.init_weights(pretrained=pretrained)
 
     def init_weights(self, pretrained=None):
         if pretrained is not None:
             print('load model from: {}'.format(pretrained))
         self.backbone.init_weights(pretrained=pretrained)
-        if self.neck is not None:
+        if self.with_neck:
             if isinstance(self.neck, nn.Sequential):
                 for m in self.neck:
                     m.init_weights()
             else:
                 self.neck.init_weights()
-        self.rpn_head.init_weights()
-        self.bbox_roi_extractor.init_weights()
-        self.bbox_head.init_weights()
-        if self.mask_roi_extractor is not None:
+        if self.with_rpn:
+            self.rpn_head.init_weights()
+        if self.with_bbox:
+            self.bbox_roi_extractor.init_weights()
+            self.bbox_head.init_weights()
+        if self.with_mask:
             self.mask_roi_extractor.init_weights()
-        if self.mask_head is not None:
             self.mask_head.init_weights()
 
     def forward(self,
                 img,
                 img_meta,
                 gt_bboxes=None,
+                proposals=None,
                 gt_labels=None,
-                gt_ignore=None,
-                gt_polys=None,
+                gt_bboxes_ignore=None,
+                gt_mask_polys=None,
                 gt_poly_lens=None,
                 num_polys_per_mask=None,
                 return_loss=True,
-                return_bboxes=False,
+                return_bboxes=True,
                 rescale=False):
-        if not return_loss:
-            return self.test(img, img_meta, rescale)
+        assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist."
 
-        if not self.with_mask:
-            assert (gt_polys is None and gt_poly_lens is None
-                    and num_polys_per_mask is None)
+        if not return_loss:
+            return self.test(img, img_meta, proposals, rescale)
         else:
-            assert (gt_polys is not None and gt_poly_lens is not None
-                    and num_polys_per_mask is not None)
-            gt_polys = split_combined_gt_polys(gt_polys, gt_poly_lens,
-                                               num_polys_per_mask)
-
-        if self.rpn_train_cfg.get('debug', False):
-            self.rpn_head.debug_imgs = tensor2imgs(img)
-        if self.rcnn_train_cfg.get('debug', False):
-            self.bbox_head.debug_imgs = tensor2imgs(img)
-            if self.mask_head is not None:
-                self.mask_head.debug_imgs = tensor2imgs(img)
-
-        img_shapes = img_meta['shape_scale']
+            losses = dict()
 
+        img_shapes = img_meta['img_shape']
         x = self.backbone(img)
-        if self.neck is not None:
+
+        if self.with_neck:
             x = self.neck(x)
 
-        rpn_outs = self.rpn_head(x)
-        proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
-        proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
+        if self.with_rpn:
+            rpn_outs = self.rpn_head(x)
+            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes,
+                                          self.rpn_train_cfg)
+            rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
+            losses.update(rpn_losses)
 
-        (pos_inds, neg_inds, pos_proposals, neg_proposals,
-         pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = bbox_sampling(
-             proposal_list, gt_bboxes, gt_ignore, gt_labels,
-             self.rcnn_train_cfg)
+        if self.with_bbox:
+            if self.with_rpn:
+                proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
+                proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
+            else:
+                proposal_list = proposals
 
-        labels, label_weights, bbox_targets, bbox_weights = \
-            self.bbox_head.proposal_target(
-                pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
-                self.rcnn_train_cfg)
+            (pos_inds, neg_inds, pos_proposals, neg_proposals,
+             pos_assigned_gt_inds,
+             pos_gt_bboxes, pos_gt_labels) = sample_proposals(
+                 proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels,
+                 self.rcnn_train_cfg)
 
-        rois = bbox2roi([
-            torch.cat([pos, neg], dim=0)
-            for pos, neg in zip(pos_proposals, neg_proposals)
-        ])
-        # TODO: a more flexible way to configurate feat maps
-        roi_feats = self.bbox_roi_extractor(
-            x[:self.bbox_roi_extractor.num_inputs], rois)
-        cls_score, bbox_pred = self.bbox_head(roi_feats)
+            labels, label_weights, bbox_targets, bbox_weights = \
+                self.bbox_head.get_bbox_target(
+                    pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
+                    self.rcnn_train_cfg)
 
-        losses = dict()
-        rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes,
-                                      self.rpn_train_cfg)
-        rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
-        losses.update(rpn_losses)
+            rois = bbox2roi([
+                torch.cat([pos, neg], dim=0)
+                for pos, neg in zip(pos_proposals, neg_proposals)
+            ])
+            # TODO: a more flexible way to configurate feat maps
+            roi_feats = self.bbox_roi_extractor(
+                x[:self.bbox_roi_extractor.num_inputs], rois)
+            cls_score, bbox_pred = self.bbox_head(roi_feats)
 
-        loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels,
-                                        label_weights, bbox_targets,
-                                        bbox_weights)
-        losses.update(loss_bbox)
+            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels,
+                                            label_weights, bbox_targets,
+                                            bbox_weights)
+            losses.update(loss_bbox)
 
         if self.with_mask:
-            mask_targets = self.mask_head.mask_target(
-                pos_proposals, pos_assigned_gt_inds, gt_polys, img_shapes,
+            gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens,
+                                               num_polys_per_mask)
+            mask_targets = self.mask_head.get_mask_target(
+                pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta,
                 self.rcnn_train_cfg)
             pos_rois = bbox2roi(pos_proposals)
             mask_feats = self.mask_roi_extractor(
@@ -142,36 +146,40 @@ class TwoStageDetector(nn.Module):
                                                       torch.cat(pos_gt_labels))
         return losses
 
-    def test(self, imgs, img_metas, rescale=False):
+    def test(self, imgs, img_metas, proposals=None, rescale=False):
         """Test w/ or w/o augmentations."""
         assert isinstance(imgs, list) and isinstance(img_metas, list)
         assert len(imgs) == len(img_metas)
         img_per_gpu = imgs[0].size(0)
         assert img_per_gpu == 1
         if len(imgs) == 1:
-            return self.simple_test(imgs[0], img_metas[0], rescale)
+            return self.simple_test(imgs[0], img_metas[0], proposals, rescale)
         else:
-            return self.aug_test(imgs, img_metas, rescale)
-
-    def simple_test_bboxes(self, x, img_meta, rescale=False):
-        """Test only det bboxes without augmentation."""
+            return self.aug_test(imgs, img_metas, proposals, rescale)
 
-        img_shapes = img_meta['shape_scale']
+    def simple_test_rpn(self, x, img_meta):
+        img_shapes = img_meta['img_shape']
+        scale_factor = img_meta['scale_factor']
         rpn_outs = self.rpn_head(x)
         proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
-        proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
+        proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0]
+        return proposal_list
 
-        rois = bbox2roi(proposal_list)
+    def simple_test_bboxes(self, x, img_meta, proposals, rescale=False):
+        """Test only det bboxes without augmentation."""
+        rois = bbox2roi(proposals)
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         # image shape of the first image in the batch (only one)
-        img_shape = img_shapes[0]
+        img_shape = img_meta['img_shape'][0]
+        scale_factor = img_meta['scale_factor']
         det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
             rois,
             cls_score,
             bbox_pred,
             img_shape,
+            scale_factor,
             rescale=rescale,
             nms_cfg=self.rcnn_test_cfg)
         return det_bboxes, det_labels
@@ -183,41 +191,52 @@ class TwoStageDetector(nn.Module):
                          det_labels,
                          rescale=False):
         # image shape of the first image in the batch (only one)
-        img_shape = img_meta['shape_scale'][0]
+        img_shape = img_meta['img_shape'][0]
+        scale_factor = img_meta['scale_factor']
         if det_bboxes.shape[0] == 0:
             segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
         else:
             # if det_bboxes is rescaled to the original image size, we need to
             # rescale it back to the testing scale to obtain RoIs.
-            _bboxes = (det_bboxes[:, :4] * img_shape[-1]
+            _bboxes = (det_bboxes[:, :4] * scale_factor.float()
                        if rescale else det_bboxes)
             mask_rois = bbox2roi([_bboxes])
             mask_feats = self.mask_roi_extractor(
                 x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
             mask_pred = self.mask_head(mask_feats)
             segm_result = self.mask_head.get_seg_masks(
-                mask_pred, det_bboxes, det_labels, img_shape,
-                self.rcnn_test_cfg, rescale)
+                mask_pred,
+                det_bboxes,
+                det_labels,
+                self.rcnn_test_cfg,
+                ori_scale=img_meta['ori_shape'])
         return segm_result
 
-    def simple_test(self, img, img_meta, rescale=False):
+    def simple_test(self, img, img_meta, proposals=None, rescale=False):
         """Test without augmentation."""
         # get feature maps
         x = self.backbone(img)
-        if self.neck is not None:
+        if self.with_neck:
             x = self.neck(x)
-        det_bboxes, det_labels = self.simple_test_bboxes(
-            x, img_meta, rescale=rescale)
-        bbox_result = bbox2result(det_bboxes, det_labels,
-                                  self.bbox_head.num_classes)
-        if not self.with_mask:
-            return bbox_result
-
-        segm_result = self.simple_test_mask(
-            x, img_meta, det_bboxes, det_labels, rescale=rescale)
+        if self.with_rpn:
+            proposals = self.simple_test_rpn(x, img_meta)
+        if self.with_bbox:
+            # BUG proposals shape?
+            det_bboxes, det_labels = self.simple_test_bboxes(
+                x, img_meta, [proposals], rescale=rescale)
+            bbox_result = bbox2result(det_bboxes, det_labels,
+                                      self.bbox_head.num_classes)
+            if not self.with_mask:
+                return bbox_result
 
-        return bbox_result, segm_result
+            segm_result = self.simple_test_mask(
+                x, img_meta, det_bboxes, det_labels, rescale=rescale)
+            return bbox_result, segm_result
+        else:
+            proposals[:, :4] /= img_meta['scale_factor'].float()
+            return proposals.cpu().numpy()
 
+    # TODO aug test haven't been verified
     def aug_test_bboxes(self, imgs, img_metas):
         """Test with augmentations for det bboxes."""
         # step 1: get RPN proposals for augmented images, apply NMS to the
diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py
deleted file mode 100644
index 6d80c9d9b10a12c07155f11ab00b24542f805cc6..0000000000000000000000000000000000000000
--- a/mmdet/models/detectors/rpn.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import torch.nn as nn
-
-from mmdet.core import tensor2imgs, merge_aug_proposals, bbox_mapping
-from .. import builder
-
-
-class RPN(nn.Module):
-
-    def __init__(self,
-                 backbone,
-                 neck,
-                 rpn_head,
-                 rpn_train_cfg,
-                 rpn_test_cfg,
-                 pretrained=None):
-        super(RPN, self).__init__()
-        self.backbone = builder.build_backbone(backbone)
-        self.neck = builder.build_neck(neck) if neck is not None else None
-        self.rpn_head = builder.build_rpn_head(rpn_head)
-        self.rpn_train_cfg = rpn_train_cfg
-        self.rpn_test_cfg = rpn_test_cfg
-        self.init_weights(pretrained=pretrained)
-
-    def init_weights(self, pretrained=None):
-        if pretrained is not None:
-            print('load model from: {}'.format(pretrained))
-        self.backbone.init_weights(pretrained=pretrained)
-        if self.neck is not None:
-            self.neck.init_weights()
-        self.rpn_head.init_weights()
-
-    def forward(self,
-                img,
-                img_meta,
-                gt_bboxes=None,
-                return_loss=True,
-                return_bboxes=False,
-                rescale=False):
-        if not return_loss:
-            return self.test(img, img_meta, rescale)
-
-        img_shapes = img_meta['shape_scale']
-
-        if self.rpn_train_cfg.get('debug', False):
-            self.rpn_head.debug_imgs = tensor2imgs(img)
-
-        x = self.backbone(img)
-        if self.neck is not None:
-            x = self.neck(x)
-        rpn_outs = self.rpn_head(x)
-
-        rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes,
-                                      self.rpn_train_cfg)
-        losses = self.rpn_head.loss(*rpn_loss_inputs)
-        return losses
-
-    def test(self, imgs, img_metas, rescale=False):
-        """Test w/ or w/o augmentations."""
-        assert isinstance(imgs, list) and isinstance(img_metas, list)
-        assert len(imgs) == len(img_metas)
-        img_per_gpu = imgs[0].size(0)
-        assert img_per_gpu == 1
-        if len(imgs) == 1:
-            return self.simple_test(imgs[0], img_metas[0], rescale)
-        else:
-            return self.aug_test(imgs, img_metas, rescale)
-
-    def simple_test(self, img, img_meta, rescale=False):
-        img_shapes = img_meta['shape_scale']
-        # get feature maps
-        x = self.backbone(img)
-        if self.neck is not None:
-            x = self.neck(x)
-        rpn_outs = self.rpn_head(x)
-        proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
-        proposals = self.rpn_head.get_proposals(*proposal_inputs)[0]
-        if rescale:
-            proposals[:, :4] /= img_shapes[0][-1]
-        return proposals.cpu().numpy()
-
-    def aug_test(self, imgs, img_metas, rescale=False):
-        aug_proposals = []
-        for img, img_meta in zip(imgs, img_metas):
-            x = self.backbone(img)
-            if self.neck is not None:
-                x = self.neck(x)
-            rpn_outs = self.rpn_head(x)
-            proposal_inputs = rpn_outs + (img_meta['shape_scale'],
-                                          self.rpn_test_cfg)
-            proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
-            assert len(proposal_list) == 1
-            aug_proposals.append(proposal_list[0])  # len(proposal_list) = 1
-        merged_proposals = merge_aug_proposals(aug_proposals, img_metas,
-                                               self.rpn_test_cfg)
-        if not rescale:
-            img_shape = img_metas[0]['shape_scale'][0]
-            flip = img_metas[0]['flip'][0]
-            merged_proposals[:, :4] = bbox_mapping(merged_proposals[:, :4],
-                                                   img_shape, flip)
-        return merged_proposals.cpu().numpy()
diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py
index 28865a68f006a4cd04753a1eb6caeda9ce3fc284..016c05204bdc4533f7cca438666aa011f5ceb56d 100644
--- a/mmdet/models/mask_heads/fcn_mask_head.py
+++ b/mmdet/models/mask_heads/fcn_mask_head.py
@@ -3,10 +3,9 @@ import numpy as np
 import pycocotools.mask as mask_util
 import torch
 import torch.nn as nn
-import torch.utils.checkpoint as cp
 
-from ..common import ConvModule
-from mmdet.core import mask_target, mask_cross_entropy
+from ..utils import ConvModule
+from mmdet.core import mask_cross_entropy, mask_target
 
 
 class FCNMaskHead(nn.Module):
@@ -21,7 +20,6 @@ class FCNMaskHead(nn.Module):
                  upsample_ratio=2,
                  num_classes=81,
                  class_agnostic=False,
-                 with_cp=False,
                  normalize=None):
         super(FCNMaskHead, self).__init__()
         if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
@@ -39,7 +37,6 @@ class FCNMaskHead(nn.Module):
         self.class_agnostic = class_agnostic
         self.normalize = normalize
         self.with_bias = normalize is None
-        self.with_cp = with_cp
 
         self.convs = nn.ModuleList()
         for i in range(self.num_convs):
@@ -79,25 +76,9 @@ class FCNMaskHead(nn.Module):
                 m.weight, mode='fan_out', nonlinearity='relu')
             nn.init.constant_(m.bias, 0)
 
-    def convs_forward(self, x):
-
-        def m_lvl_convs_forward(x):
-            for conv in self.convs[1:-1]:
-                x = conv(x)
-            return x
-
-        if self.num_convs > 0:
-            x = self.convs[0](x)
-            if self.num_convs > 1:
-                if self.with_cp and x.requires_grad:
-                    x = cp.checkpoint(m_lvl_convs_forward, x)
-                else:
-                    x = m_lvl_convs_forward(x)
-                x = self.convs[-1](x)
-        return x
-
     def forward(self, x):
-        x = self.convs_forward(x)
+        for conv in self.convs:
+            x = conv(x)
         if self.upsample is not None:
             x = self.upsample(x)
             if self.upsample_method == 'deconv':
@@ -105,24 +86,18 @@ class FCNMaskHead(nn.Module):
         mask_pred = self.conv_logits(x)
         return mask_pred
 
-    def mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks,
-                    img_shapes, rcnn_train_cfg):
+    def get_mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks,
+                     img_meta, rcnn_train_cfg):
         mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
-                                   gt_masks, img_shapes, rcnn_train_cfg)
+                                   gt_masks, img_meta, rcnn_train_cfg)
         return mask_targets
 
     def loss(self, mask_pred, mask_targets, labels):
         loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
         return loss_mask
 
-    def get_seg_masks(self,
-                      mask_pred,
-                      det_bboxes,
-                      det_labels,
-                      img_shape,
-                      rcnn_test_cfg,
-                      ori_scale,
-                      rescale=True):
+    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
+                      ori_scale):
         """Get segmentation masks from mask_pred and bboxes
         Args:
             mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
@@ -143,14 +118,11 @@ class FCNMaskHead(nn.Module):
         cls_segms = [[] for _ in range(self.num_classes - 1)]
         bboxes = det_bboxes.cpu().numpy()[:, :4]
         labels = det_labels.cpu().numpy() + 1
-        scale_factor = img_shape[-1] if rescale else 1.0
-        img_h = ori_scale['height'] if rescale else np.round(
-            ori_scale['height'].item() * img_shape[-1].item()).astype(np.int32)
-        img_w = ori_scale['width'] if rescale else np.round(
-            ori_scale['width'].item() * img_shape[-1].item()).astype(np.int32)
+        img_h = ori_scale[0]
+        img_w = ori_scale[1]
 
         for i in range(bboxes.shape[0]):
-            bbox = (bboxes[i, :] / float(scale_factor)).astype(int)
+            bbox = bboxes[i, :].astype(int)
             label = labels[i]
             w = bbox[2] - bbox[0] + 1
             h = bbox[3] - bbox[1] + 1
@@ -164,7 +136,7 @@ class FCNMaskHead(nn.Module):
 
             im_mask = np.zeros((img_h, img_w), dtype=np.float32)
 
-            im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.resize(
+            im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.imresize(
                 mask_pred_, (w, h))
             # im_mask = cv2.resize(im_mask, (img_w, img_h))
             im_mask = np.array(
diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py
index c4734e18621bec4cdb8e33052935c6d7f3a495e2..8b5b49826bad94ce00379e60bbafc905b0cba9af 100644
--- a/mmdet/models/necks/fpn.py
+++ b/mmdet/models/necks/fpn.py
@@ -1,7 +1,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
-from ..common import ConvModule
-from ..weight_init import xavier_init
+from ..utils import ConvModule
+from ..utils import xavier_init
 
 
 class FPN(nn.Module):
diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py
index f2fce9ebe7aa5c820139fa0188e2f6a25322ed66..7ffd441f694b5d6c37d3042bb25088f27b002ea9 100644
--- a/mmdet/models/rpn_heads/rpn_head.py
+++ b/mmdet/models/rpn_heads/rpn_head.py
@@ -9,8 +9,8 @@ from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv,
                         weighted_cross_entropy, weighted_smoothl1,
                         weighted_binary_cross_entropy)
 from mmdet.ops import nms
-from ..misc import multi_apply
-from ..weight_init import normal_init
+from ..utils import multi_apply
+from ..utils import normal_init
 
 
 class RPNHead(nn.Module):
diff --git a/mmdet/models/common/__init__.py b/mmdet/models/utils/__init__.py
similarity index 71%
rename from mmdet/models/common/__init__.py
rename to mmdet/models/utils/__init__.py
index 1a611c251065f2addc6c069d61c7e1f18fbd7da2..f11af964480456cce144172591bd0b94f3ed7ad7 100644
--- a/mmdet/models/common/__init__.py
+++ b/mmdet/models/utils/__init__.py
@@ -1,4 +1,6 @@
 from .conv_module import ConvModule
 from .norm import build_norm_layer
+from .misc import *
+from .weight_init import *
 
 __all__ = ['ConvModule', 'build_norm_layer']
diff --git a/mmdet/models/common/conv_module.py b/mmdet/models/utils/conv_module.py
similarity index 100%
rename from mmdet/models/common/conv_module.py
rename to mmdet/models/utils/conv_module.py
diff --git a/mmdet/models/misc.py b/mmdet/models/utils/misc.py
similarity index 100%
rename from mmdet/models/misc.py
rename to mmdet/models/utils/misc.py
diff --git a/mmdet/models/common/norm.py b/mmdet/models/utils/norm.py
similarity index 100%
rename from mmdet/models/common/norm.py
rename to mmdet/models/utils/norm.py
diff --git a/mmdet/models/weight_init.py b/mmdet/models/utils/weight_init.py
similarity index 100%
rename from mmdet/models/weight_init.py
rename to mmdet/models/utils/weight_init.py
diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py
index 82511fd1db12774e1df1468e93353f2a963ed962..47f794e8916956f9e8c494e50aff7e5b870889e7 100644
--- a/mmdet/nn/parallel/scatter_gather.py
+++ b/mmdet/nn/parallel/scatter_gather.py
@@ -1,7 +1,7 @@
 import torch
 from ._functions import Scatter
 from torch.nn.parallel._functions import Scatter as OrigScatter
-from detkit.datasets.utils import DataContainer
+from mmdet.datasets.utils import DataContainer
 
 
 def scatter(inputs, target_gpus, dim=0):
diff --git a/tools/eval.py b/tools/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..20cc571e94b2fcf228f2d0782cf8a8b16dd3688b
--- /dev/null
+++ b/tools/eval.py
@@ -0,0 +1,265 @@
+from argparse import ArgumentParser
+from multiprocessing import Pool
+import matplotlib.pyplot as plt
+import numpy as np
+import copy
+import os
+
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+
+def generate_area_range(splitRng=32, stop_size=128):
+    areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]]
+    start = 0
+    while start < stop_size:
+        end = start + splitRng
+        areaRng.append([start * start, end * end])
+        start = end
+    areaRng.append([start * start, 1e5**2])
+    return areaRng
+
+
+def print_summarize(iouThr=None,
+                    iouThrs=None,
+                    precision=None,
+                    recall=None,
+                    areaRng_id=4,
+                    areaRngs=None,
+                    maxDets_id=2,
+                    maxDets=None):
+    assert (precision is not None) or (recall is not None)
+    iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}'
+    titleStr = 'Average Precision' if precision is not None else 'Average Recall'
+    typeStr = '(AP)' if precision is not None else '(AR)'
+    iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \
+        if iouThr is None else '{:0.2f}'.format(iouThr)
+
+    aind = [areaRng_id]
+    mind = [maxDets_id]
+    if precision is not None:
+        # dimension of precision: [TxRxKxAxM]
+        s = precision
+        # IoU
+        if iouThr is not None:
+            t = np.where(iouThr == iouThrs)[0]
+            s = s[t]
+        s = s[:, :, :, aind, mind]
+    else:
+        # dimension of recall: [TxKxAxM]
+        s = recall
+        if iouThr is not None:
+            t = np.where(iouThr == iouThrs)[0]
+            s = s[t]
+        s = s[:, :, aind, mind]
+    if len(s[s > -1]) == 0:
+        mean_s = -1
+    else:
+        mean_s = np.mean(s[s > -1])
+    print(
+        iStr.format(
+            titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]),
+            np.sqrt(areaRngs[areaRng_id][1])
+            if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max',
+            maxDets[maxDets_id], mean_s))
+
+
+def eval_results(res_file, ann_file, res_types, splitRng):
+    for res_type in res_types:
+        assert res_type in ['proposal', 'bbox', 'segm', 'keypoints']
+
+    areaRng = generate_area_range(splitRng)
+    cocoGt = COCO(ann_file)
+    cocoDt = cocoGt.loadRes(res_file)
+    imgIds = cocoGt.getImgIds()
+    for res_type in res_types:
+        iou_type = 'bbox' if res_type == 'proposal' else res_type
+        cocoEval = COCOeval(cocoGt, cocoDt, iou_type)
+        cocoEval.params.imgIds = imgIds
+        if res_type == 'proposal':
+            cocoEval.params.useCats = 0
+            cocoEval.params.maxDets = [100, 300, 1000]
+        cocoEval.params.areaRng = areaRng
+        cocoEval.evaluate()
+        cocoEval.accumulate()
+        cocoEval.summarize()
+        ps = cocoEval.eval['precision']
+        rc = cocoEval.eval['recall']
+        for i in range(len(areaRng)):
+            print_summarize(None, cocoEval.params.iouThrs, ps, None, i,
+                            areaRng, 2, cocoEval.params.maxDets)
+
+
+def makeplot(rs, ps, outDir, class_name):
+    cs = np.vstack([
+        np.ones((2, 3)),
+        np.array([.31, .51, .74]),
+        np.array([.75, .31, .30]),
+        np.array([.36, .90, .38]),
+        np.array([.50, .39, .64]),
+        np.array([1, .6, 0])
+    ])
+    areaNames = ['all', 'small', 'medium', 'large']
+    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
+    for i in range(len(areaNames)):
+        area_ps = ps[..., i, 0]
+        figure_tile = class_name + '-' + areaNames[i]
+        aps = [ps_.mean() for ps_ in area_ps]
+        ps_curve = [
+            ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps
+        ]
+        ps_curve.insert(0, np.zeros(ps_curve[0].shape))
+        fig = plt.figure()
+        ax = plt.subplot(111)
+        for k in range(len(types)):
+            ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)
+            ax.fill_between(
+                rs,
+                ps_curve[k],
+                ps_curve[k + 1],
+                color=cs[k],
+                label=str('[{:.3f}'.format(aps[k]) + ']' + types[k]))
+        plt.xlabel('recall')
+        plt.ylabel('precision')
+        plt.xlim(0, 1.)
+        plt.ylim(0, 1.)
+        plt.title(figure_tile)
+        plt.legend()
+        # plt.show()
+        fig.savefig(outDir + '/{}.png'.format(figure_tile))
+        plt.close(fig)
+
+
+def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type):
+    nm = cocoGt.loadCats(catId)[0]
+    print('--------------analyzing {}-{}---------------'.format(
+        k + 1, nm['name']))
+    ps_ = {}
+    dt = copy.deepcopy(cocoDt)
+    nm = cocoGt.loadCats(catId)[0]
+    imgIds = cocoGt.getImgIds()
+    dt_anns = dt.dataset['annotations']
+    select_dt_anns = []
+    for ann in dt_anns:
+        if ann['category_id'] == catId:
+            select_dt_anns.append(ann)
+    dt.dataset['annotations'] = select_dt_anns
+    dt.createIndex()
+    # compute precision but ignore superclass confusion
+    gt = copy.deepcopy(cocoGt)
+    child_catIds = gt.getCatIds(supNms=[nm['supercategory']])
+    for idx, ann in enumerate(gt.dataset['annotations']):
+        if (ann['category_id'] in child_catIds
+                and ann['category_id'] != catId):
+            gt.dataset['annotations'][idx]['ignore'] = 1
+            gt.dataset['annotations'][idx]['iscrowd'] = 1
+            gt.dataset['annotations'][idx]['category_id'] = catId
+    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+    cocoEval.params.imgIds = imgIds
+    cocoEval.params.maxDets = [100]
+    cocoEval.params.iouThrs = [.1]
+    cocoEval.params.useCats = 1
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]
+    ps_['ps_supercategory'] = ps_supercategory
+    # compute precision but ignore any class confusion
+    gt = copy.deepcopy(cocoGt)
+    for idx, ann in enumerate(gt.dataset['annotations']):
+        if ann['category_id'] != catId:
+            gt.dataset['annotations'][idx]['ignore'] = 1
+            gt.dataset['annotations'][idx]['iscrowd'] = 1
+            gt.dataset['annotations'][idx]['category_id'] = catId
+    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+    cocoEval.params.imgIds = imgIds
+    cocoEval.params.maxDets = [100]
+    cocoEval.params.iouThrs = [.1]
+    cocoEval.params.useCats = 1
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]
+    ps_['ps_allcategory'] = ps_allcategory
+    return k, ps_
+
+
+def analyze_results(res_file, ann_file, res_types, out_dir):
+    for res_type in res_types:
+        assert res_type in ['bbox', 'segm']
+
+    directory = os.path.dirname(out_dir + '/')
+    if not os.path.exists(directory):
+        print('-------------create {}-----------------'.format(out_dir))
+        os.makedirs(directory)
+
+    cocoGt = COCO(ann_file)
+    cocoDt = cocoGt.loadRes(res_file)
+    imgIds = cocoGt.getImgIds()
+    for res_type in res_types:
+        iou_type = res_type
+        cocoEval = COCOeval(
+            copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)
+        cocoEval.params.imgIds = imgIds
+        cocoEval.params.iouThrs = [.75, .5, .1]
+        cocoEval.params.maxDets = [100]
+        cocoEval.evaluate()
+        cocoEval.accumulate()
+        ps = cocoEval.eval['precision']
+        ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])
+        catIds = cocoGt.getCatIds()
+        recThrs = cocoEval.params.recThrs
+        with Pool(processes=48) as pool:
+            args = [(k, cocoDt, cocoGt, catId, iou_type)
+                    for k, catId in enumerate(catIds)]
+            analyze_results = pool.starmap(analyze_individual_category, args)
+        for k, catId in enumerate(catIds):
+            nm = cocoGt.loadCats(catId)[0]
+            print('--------------saving {}-{}---------------'.format(
+                k + 1, nm['name']))
+            analyze_result = analyze_results[k]
+            assert k == analyze_result[0]
+            ps_supercategory = analyze_result[1]['ps_supercategory']
+            ps_allcategory = analyze_result[1]['ps_allcategory']
+            # compute precision but ignore superclass confusion
+            ps[3, :, k, :, :] = ps_supercategory
+            # compute precision but ignore any class confusion
+            ps[4, :, k, :, :] = ps_allcategory
+            # fill in background and false negative errors and plot
+            ps[ps == -1] = 0
+            ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0)
+            ps[6, :, k, :, :] = 1.0
+            makeplot(recThrs, ps[:, :, k], out_dir, nm['name'])
+        makeplot(recThrs, ps, out_dir, 'all')
+
+
+def main():
+    parser = ArgumentParser(description='COCO Evaluation')
+    parser.add_argument('result', help='result file path')
+    parser.add_argument(
+        '--ann',
+        default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json',
+        help='annotation file path')
+    parser.add_argument(
+        '--types', type=str, nargs='+', default=['bbox'], help='result types')
+    parser.add_argument(
+        '--analyze', action='store_true', help='whether to analyze results')
+    parser.add_argument(
+        '--out_dir',
+        type=str,
+        default=None,
+        help='dir to save analyze result images')
+    parser.add_argument(
+        '--splitRng',
+        type=int,
+        default=32,
+        help='range to split area in evaluation')
+    args = parser.parse_args()
+    if not args.analyze:
+        eval_results(args.result, args.ann, args.types, splitRng=args.splitRng)
+    else:
+        assert args.out_dir is not None
+        analyze_results(
+            args.result, args.ann, args.types, out_dir=args.out_dir)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/examples/r50_fpn_frcnn_1x.py b/tools/examples/r50_fpn_frcnn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..6814445f8e1ba10a5fad24502ac8aff535f60f21
--- /dev/null
+++ b/tools/examples/r50_fpn_frcnn_1x.py
@@ -0,0 +1,125 @@
+# model settings
+model = dict(
+    pretrained=
+    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    backbone=dict(
+        type='resnet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='fb'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        coarsest_stride=32,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    roi_block=dict(
+        type='SingleLevelRoI',
+        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    bbox_head=dict(
+        type='SharedFCRoIHead',
+        num_fcs=2,
+        in_channels=256,
+        fc_out_channels=1024,
+        roi_feat_size=7,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False))
+meta_params = dict(
+    rpn_train_cfg = dict(
+        pos_fraction=0.5,
+        pos_balance_sampling=False,
+        neg_pos_ub=256,
+        allowed_border=0,
+        anchor_batch_size=256,
+        pos_iou_thr=0.7,
+        neg_iou_thr=0.3,
+        neg_balance_thr=0,
+        min_pos_iou=1e-3,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rpn_test_cfg = dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn_train_cfg = dict(
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.5,
+        crowd_thr=1.1,
+        roi_batch_size=512,
+        add_gt_as_proposals=True,
+        pos_fraction=0.25,
+        pos_balance_sampling=False,
+        neg_pos_ub=512,
+        neg_balance_thr=0,
+        pos_weight=-1,
+        debug=False),
+    rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5)
+)
+# dataset settings
+data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    to_rgb=True)
+img_per_gpu = 1
+data_workers = 2
+train_dataset = dict(
+    ann_file=data_root + 'annotations/instances_train2017.json',
+    img_prefix=data_root + 'train2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32,
+    flip_ratio=0.5)
+test_dataset = dict(
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    img_prefix=data_root + 'val2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32)
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+# learning policy
+lr_policy = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.333,
+    step=[8, 11])
+max_epoch = 12
+checkpoint_config = dict(interval=1)
+dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
+# logging settings
+log_level = 'INFO'
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+    ])
+# yapf:enable
+work_dir = './model/r50_fpn_frcnn_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/tools/examples/r50_fpn_maskrcnn_1x.py b/tools/examples/r50_fpn_maskrcnn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..49b32037ec5139ee64d21bc6e9c607dcd69da018
--- /dev/null
+++ b/tools/examples/r50_fpn_maskrcnn_1x.py
@@ -0,0 +1,136 @@
+# model settings
+model = dict(
+    pretrained=
+    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    backbone=dict(
+        type='resnet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='fb'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        coarsest_stride=32,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    roi_block=dict(
+        type='SingleLevelRoI',
+        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    bbox_head=dict(
+        type='SharedFCRoIHead',
+        num_fcs=2,
+        in_channels=256,
+        fc_out_channels=1024,
+        roi_feat_size=7,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False),
+    mask_block=dict(
+        type='SingleLevelRoI',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    mask_head=dict(
+        type='FCNMaskHead',
+        num_convs=4,
+        in_channels=256,
+        conv_out_channels=256,
+        num_classes=81))
+meta_params = dict(
+    rpn_train_cfg=dict(
+        pos_fraction=0.5,
+        pos_balance_sampling=False,
+        neg_pos_ub=256,
+        allowed_border=0,
+        anchor_batch_size=256,
+        pos_iou_thr=0.7,
+        neg_iou_thr=0.3,
+        neg_balance_thr=0,
+        min_pos_iou=1e-3,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rpn_test_cfg=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn_train_cfg=dict(
+        mask_size=28,
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.5,
+        crowd_thr=1.1,
+        roi_batch_size=512,
+        add_gt_as_proposals=True,
+        pos_fraction=0.25,
+        pos_balance_sampling=False,
+        neg_pos_ub=512,
+        neg_balance_thr=0,
+        pos_weight=-1,
+        debug=False),
+    rcnn_test_cfg=dict(
+        score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
+# dataset settings
+data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_per_gpu = 1
+data_workers = 2
+train_dataset = dict(
+    with_mask=True,
+    ann_file=data_root + 'annotations/instances_train2017.json',
+    img_prefix=data_root + 'train2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32,
+    flip_ratio=0.5)
+test_dataset = dict(
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    img_prefix=data_root + 'val2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32)
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+# learning policy
+lr_policy = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.333,
+    step=[8, 11])
+max_epoch = 12
+checkpoint_config = dict(interval=1)
+dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
+# logging settings
+log_level = 'INFO'
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+    ])
+# yapf:enable
+work_dir = './model/r50_fpn_mask_rcnn_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/tools/examples/r50_fpn_rpn_1x.py b/tools/examples/r50_fpn_rpn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..45c0a1a6c4649a18346251c8e81f5480f29da30f
--- /dev/null
+++ b/tools/examples/r50_fpn_rpn_1x.py
@@ -0,0 +1,95 @@
+# model settings
+model = dict(
+    pretrained=
+    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    backbone=dict(
+        type='resnet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='fb'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        coarsest_stride=32,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True))
+meta_params = dict(
+    rpn_train_cfg=dict(
+        pos_fraction=0.5,
+        pos_balance_sampling=False,
+        neg_pos_ub=256,
+        allowed_border=0,
+        anchor_batch_size=256,
+        pos_iou_thr=0.7,
+        neg_iou_thr=0.3,
+        neg_balance_thr=0,
+        min_pos_iou=1e-3,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rpn_test_cfg=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0))
+# dataset settings
+data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_per_gpu = 1
+data_workers = 2
+train_dataset = dict(
+    ann_file=data_root + 'annotations/instances_train2017.json',
+    img_prefix=data_root + 'train2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32,
+    flip_ratio=0.5)
+test_dataset = dict(
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    img_prefix=data_root + 'val2017/',
+    img_scale=(1333, 800),
+    img_norm_cfg=img_norm_cfg,
+    size_divisor=32,
+    test_mode=True)
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+# learning policy
+lr_policy = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.333,
+    step=[8, 11])
+max_epoch = 12
+checkpoint_config = dict(interval=1)
+dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
+# logging settings
+log_level = 'INFO'
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+    ])
+# yapf:enable
+work_dir = './model/r50_fpn_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/tools/test.py b/tools/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d062489100f3fc6a579ec811ff0391573f48454
--- /dev/null
+++ b/tools/test.py
@@ -0,0 +1,65 @@
+import os.path as osp
+import sys
+sys.path.append(osp.abspath(osp.join(__file__, '../../')))
+sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv')
+import argparse
+
+import numpy as np
+import torch
+
+import mmcv
+from mmcv import Config
+from mmcv.torchpack import load_checkpoint, parallel_test
+from mmdet.core import _data_func, results2json
+from mmdet.datasets import CocoDataset
+from mmdet.datasets.data_engine import build_data
+from mmdet.models import Detector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet test detector')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--world_size', default=1, type=int)
+    parser.add_argument('--out', help='output result file')
+    parser.add_argument(
+        '--out_json', action='store_true', help='get json output file')
+    args = parser.parse_args()
+    return args
+
+
+args = parse_args()
+
+
+def main():
+    cfg = Config.fromfile(args.config)
+    cfg.model['pretrained'] = None
+    # TODO this img_per_gpu
+    cfg.img_per_gpu == 1
+
+    if args.world_size == 1:
+        # TODO verify this part
+        args.dist = False
+        args.img_per_gpu = cfg.img_per_gpu
+        args.data_workers = cfg.data_workers
+        model = Detector(**cfg.model, **meta_params)
+        load_checkpoint(model, args.checkpoint)
+        test_loader = build_data(cfg.test_dataset, args)
+        model = torch.nn.DataParallel(model, device_ids=0)
+        # TODO write single_test
+        outputs = single_test(test_loader, model)
+    else:
+        test_dataset = CocoDataset(**cfg.test_dataset)
+        model = dict(cfg.model, **cfg.meta_params)
+        outputs = parallel_test(Detector, model,
+                                args.checkpoint, test_dataset, _data_func,
+                                range(args.world_size))
+
+    if args.out:
+        mmcv.dump(outputs, args.out, protocol=4)
+        if args.out_json:
+            results2json(test_dataset, outputs, args.out + '.json')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cb2450acf511715c716594e37b0968876aad683
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,85 @@
+from __future__ import division
+import argparse
+import sys
+import os.path as osp
+sys.path.append(osp.abspath(osp.join(__file__, '../../')))
+sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv')
+
+import torch
+import torch.multiprocessing as mp
+from mmcv import Config
+from mmcv.torchpack import Runner
+from mmdet.core import (batch_processor, init_dist, broadcast_params,
+                        DistOptimizerStepperHook, DistSamplerSeedHook)
+from mmdet.datasets.data_engine import build_data
+from mmdet.models import Detector
+from mmdet.nn.parallel import MMDataParallel
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet train val detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--validate', action='store_true', help='validate')
+    parser.add_argument(
+        '--dist', action='store_true', help='distributed training or not')
+    parser.add_argument('--world_size', default=1, type=int)
+    parser.add_argument('--rank', default=0, type=int)
+    args = parser.parse_args()
+
+    return args
+
+
+args = parse_args()
+
+
+def main():
+    # Enable distributed training or not
+    if args.dist:
+        print('Enable distributed training.')
+        mp.set_start_method("spawn", force=True)
+        init_dist(
+            args.world_size,
+            args.rank,
+            **cfg.dist_params)
+    else:
+        print('Disabled distributed training.')
+
+    # Fetch config information
+    cfg = Config.fromfile(args.config)
+    # TODO more flexible
+    args.img_per_gpu = cfg.img_per_gpu
+    args.data_workers = cfg.data_workers
+
+    # prepare training loader
+    train_loader = [build_data(cfg.train_dataset, args)]
+    if args.validate:
+        val_loader = build_data(cfg.val_dataset, args)
+        train_loader.append(val_loader)
+
+    # build model
+    model = Detector(**cfg.model, **cfg.meta_params)
+    if args.dist:
+        model = model.cuda()
+        broadcast_params(model)
+    else:
+        device_ids = args.rank % torch.cuda.device_count()
+        model = MMDataParallel(model, device_ids=device_ids).cuda()
+
+    # register hooks
+    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
+                    cfg.log_level)
+    optimizer_stepper = DistOptimizerStepperHook(
+        **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config
+    runner.register_training_hooks(cfg.lr_policy, optimizer_stepper,
+                                   cfg.checkpoint_config, cfg.log_config)
+    if args.dist:
+        runner.register_hook(DistSamplerSeedHook())
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args)
+
+
+if __name__ == "__main__":
+    main()