diff --git a/configs/cascade_mask_rcnn_r50_c4_1x.py b/configs/cascade_mask_rcnn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..653e2212896cedf1ed9cbf8055d7879b309d4b8a
--- /dev/null
+++ b/configs/cascade_mask_rcnn_r50_c4_1x.py
@@ -0,0 +1,228 @@
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    num_stages=3,
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    shared_head=dict(
+        type='ResLayer',
+        depth=50,
+        stage=3,
+        stride=2,
+        dilation=1,
+        style='caffe',
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_scales=[2, 4, 8, 16, 32],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[16],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=1024,
+        featmap_strides=[16]),
+    bbox_head=[
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.1, 0.1, 0.2, 0.2],
+            reg_class_agnostic=True),
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.05, 0.05, 0.1, 0.1],
+            reg_class_agnostic=True),
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.033, 0.033, 0.067, 0.067],
+            reg_class_agnostic=True)
+    ],
+    mask_roi_extractor=None,
+    mask_head=dict(
+        type='FCNMaskHead',
+        num_convs=0,
+        in_channels=2048,
+        conv_out_channels=256,
+        num_classes=81))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=[
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.6,
+                min_pos_iou=0.6,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.7,
+                min_pos_iou=0.7,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False)
+    ],
+    stage_loss_weights=[1, 0.5, 0.25])
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=12000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5),
+    keep_all_stages=False)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/cascade_mask_rcnn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/cascade_rcnn_r50_c4_1x.py b/configs/cascade_rcnn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..e53a476fb92115e6911b526fdfaf8739d1b9819f
--- /dev/null
+++ b/configs/cascade_rcnn_r50_c4_1x.py
@@ -0,0 +1,218 @@
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    num_stages=3,
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    shared_head=dict(
+        type='ResLayer',
+        depth=50,
+        stage=3,
+        stride=2,
+        dilation=1,
+        style='caffe',
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_scales=[2, 4, 8, 16, 32],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[16],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=1024,
+        featmap_strides=[16]),
+    bbox_head=[
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.1, 0.1, 0.2, 0.2],
+            reg_class_agnostic=True),
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.05, 0.05, 0.1, 0.1],
+            reg_class_agnostic=True),
+        dict(
+            type='BBoxHead',
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=81,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.033, 0.033, 0.067, 0.067],
+            reg_class_agnostic=True)
+    ])
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=[
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.6,
+                min_pos_iou=0.6,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.7,
+                min_pos_iou=0.7,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False)
+    ],
+    stage_loss_weights=[1, 0.5, 0.25])
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=12000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
+    keep_all_stages=False)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/cascade_rcnn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/fast_rcnn_r50_c4_1x.py b/configs/fast_rcnn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d98c5c516abf19eb3424e6268a8d5ce1193ad4e
--- /dev/null
+++ b/configs/fast_rcnn_r50_c4_1x.py
@@ -0,0 +1,129 @@
+# model settings
+model = dict(
+    type='FastRCNN',
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    shared_head=dict(
+        type='ResLayer',
+        depth=50,
+        stage=3,
+        stride=2,
+        dilation=1,
+        style='caffe',
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=1024,
+        featmap_strides=[16]),
+    bbox_head=dict(
+        type='BBoxHead',
+        with_avg_pool=True,
+        roi_feat_size=7,
+        in_channels=2048,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False))
+# model training and testing settings
+train_cfg = dict(
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl',
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fast_rcnn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/faster_rcnn_r50_c4_1x.py b/configs/faster_rcnn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..c38e294534bf70356a5757b83f3dd9f6f996c4ce
--- /dev/null
+++ b/configs/faster_rcnn_r50_c4_1x.py
@@ -0,0 +1,160 @@
+# model settings
+model = dict(
+    type='FasterRCNN',
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    shared_head=dict(
+        type='ResLayer',
+        depth=50,
+        stage=3,
+        stride=2,
+        dilation=1,
+        style='caffe',
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_scales=[2, 4, 8, 16, 32],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[16],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=1024,
+        featmap_strides=[16]),
+    bbox_head=dict(
+        type='BBoxHead',
+        with_avg_pool=True,
+        roi_feat_size=7,
+        in_channels=2048,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=12000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/faster_rcnn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/mask_rcnn_r50_c4_1x.py b/configs/mask_rcnn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fe21420d3a05926ab2b4b00780d7df6997b7efd
--- /dev/null
+++ b/configs/mask_rcnn_r50_c4_1x.py
@@ -0,0 +1,171 @@
+# model settings
+model = dict(
+    type='MaskRCNN',
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    shared_head=dict(
+        type='ResLayer',
+        depth=50,
+        stage=3,
+        stride=2,
+        dilation=1,
+        style='caffe',
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_scales=[2, 4, 8, 16, 32],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[16],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=1024,
+        featmap_strides=[16]),
+    bbox_head=dict(
+        type='BBoxHead',
+        with_avg_pool=True,
+        roi_feat_size=7,
+        in_channels=2048,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False),
+    mask_roi_extractor=None,
+    mask_head=dict(
+        type='FCNMaskHead',
+        num_convs=0,
+        in_channels=2048,
+        conv_out_channels=256,
+        num_classes=81))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        mask_size=14,
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=12000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/mask_rcnn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/rpn_r50_c4_1x.py b/configs/rpn_r50_c4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..228d54a54e54e2c9a23f2ae0275f2182763df8c3
--- /dev/null
+++ b/configs/rpn_r50_c4_1x.py
@@ -0,0 +1,121 @@
+# model settings
+model = dict(
+    type='RPN',
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        normalize=dict(type='BN', frozen=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=None,
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_scales=[2, 4, 8, 16, 32],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[16],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=12000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+# runner configs
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/rpn_r50_c4_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py
index 772585cffaa9a30363a2aa6d09af13a6e57f90f0..7925e6a3717410de4c9e87ca63a88bc44bdf51c7 100644
--- a/mmdet/models/__init__.py
+++ b/mmdet/models/__init__.py
@@ -2,15 +2,17 @@ from .backbones import *  # noqa: F401,F403
 from .necks import *  # noqa: F401,F403
 from .roi_extractors import *  # noqa: F401,F403
 from .anchor_heads import *  # noqa: F401,F403
+from .shared_heads import *  # noqa: F401,F403
 from .bbox_heads import *  # noqa: F401,F403
 from .mask_heads import *  # noqa: F401,F403
 from .detectors import *  # noqa: F401,F403
-from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
+from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
+                       DETECTORS)
 from .builder import (build_backbone, build_neck, build_roi_extractor,
-                      build_head, build_detector)
+                      build_shared_head, build_head, build_detector)
 
 __all__ = [
-    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS',
-    'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head',
-    'build_detector'
+    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS',
+    'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
+    'build_shared_head', 'build_head', 'build_detector'
 ]
diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py
index 63da2b5fc0170e0d3b3251c7e50a7c7f4dc55bd5..c91a92ecc5f25e8dba991f5e2a174e42e0510206 100644
--- a/mmdet/models/backbones/__init__.py
+++ b/mmdet/models/backbones/__init__.py
@@ -1,5 +1,5 @@
-from .resnet import ResNet
+from .resnet import ResNet, make_res_layer
 from .resnext import ResNeXt
 from .ssd_vgg import SSDVGG
 
-__all__ = ['ResNet', 'ResNeXt', 'SSDVGG']
+__all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG']
diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py
index d249716c84734903c499dcd0471faefb2a99533e..1965d6efef4c3f68dd21c262514fcf4d897c49d7 100644
--- a/mmdet/models/backbones/resnet.py
+++ b/mmdet/models/backbones/resnet.py
@@ -440,10 +440,7 @@ class ResNet(nn.Module):
             x = res_layer(x)
             if i in self.out_indices:
                 outs.append(x)
-        if len(outs) == 1:
-            return outs[0]
-        else:
-            return tuple(outs)
+        return tuple(outs)
 
     def train(self, mode=True):
         super(ResNet, self).train(mode)
diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py
index a1a9d2cfe90b2ab5993c0812235ec7138282e862..3de186ca64509b93a814ebec420c70b41cfd8c90 100644
--- a/mmdet/models/builder.py
+++ b/mmdet/models/builder.py
@@ -1,7 +1,8 @@
 import mmcv
 from torch import nn
 
-from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
+from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
+                       DETECTORS)
 
 
 def _build_module(cfg, registry, default_args):
@@ -43,6 +44,10 @@ def build_roi_extractor(cfg):
     return build(cfg, ROI_EXTRACTORS)
 
 
+def build_shared_head(cfg):
+    return build(cfg, SHARED_HEADS)
+
+
 def build_head(cfg):
     return build(cfg, HEADS)
 
diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py
index e23784ea483e0f7ac30ed22c8c76949dc40424ff..8e2bbde38b9f6687ea1b7ed7b6ffdc6bf1f23c1d 100644
--- a/mmdet/models/detectors/base.py
+++ b/mmdet/models/detectors/base.py
@@ -21,6 +21,10 @@ class BaseDetector(nn.Module):
     def with_neck(self):
         return hasattr(self, 'neck') and self.neck is not None
 
+    @property
+    def with_shared_head(self):
+        return hasattr(self, 'shared_head') and self.shared_head is not None
+
     @property
     def with_bbox(self):
         return hasattr(self, 'bbox_head') and self.bbox_head is not None
diff --git a/mmdet/models/detectors/cascade_rcnn.py b/mmdet/models/detectors/cascade_rcnn.py
index d466f633d27aeb52dcc8b78b5c68939f3691253e..b20274b30e6c042620f6b0469264ed67405e7143 100644
--- a/mmdet/models/detectors/cascade_rcnn.py
+++ b/mmdet/models/detectors/cascade_rcnn.py
@@ -18,6 +18,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                  num_stages,
                  backbone,
                  neck=None,
+                 shared_head=None,
                  rpn_head=None,
                  bbox_roi_extractor=None,
                  bbox_head=None,
@@ -35,12 +36,13 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
 
         if neck is not None:
             self.neck = builder.build_neck(neck)
-        else:
-            raise NotImplementedError
 
         if rpn_head is not None:
             self.rpn_head = builder.build_head(rpn_head)
 
+        if shared_head is not None:
+            self.shared_head = builder.build_shared_head(shared_head)
+
         if bbox_head is not None:
             self.bbox_roi_extractor = nn.ModuleList()
             self.bbox_head = nn.ModuleList()
@@ -57,19 +59,26 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                 self.bbox_head.append(builder.build_head(head))
 
         if mask_head is not None:
-            self.mask_roi_extractor = nn.ModuleList()
             self.mask_head = nn.ModuleList()
-            if not isinstance(mask_roi_extractor, list):
-                mask_roi_extractor = [
-                    mask_roi_extractor for _ in range(num_stages)
-                ]
             if not isinstance(mask_head, list):
                 mask_head = [mask_head for _ in range(num_stages)]
-            assert len(mask_roi_extractor) == len(mask_head) == self.num_stages
-            for roi_extractor, head in zip(mask_roi_extractor, mask_head):
-                self.mask_roi_extractor.append(
-                    builder.build_roi_extractor(roi_extractor))
+            assert len(mask_head) == self.num_stages
+            for head in mask_head:
                 self.mask_head.append(builder.build_head(head))
+            if mask_roi_extractor is not None:
+                self.share_roi_extractor = False
+                self.mask_roi_extractor = nn.ModuleList()
+                if not isinstance(mask_roi_extractor, list):
+                    mask_roi_extractor = [
+                        mask_roi_extractor for _ in range(num_stages)
+                    ]
+                assert len(mask_roi_extractor) == self.num_stages
+                for roi_extractor in mask_roi_extractor:
+                    self.mask_roi_extractor.append(
+                        builder.build_roi_extractor(roi_extractor))
+            else:
+                self.share_roi_extractor = True
+                self.mask_roi_extractor = self.bbox_roi_extractor
 
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
@@ -91,12 +100,15 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                 self.neck.init_weights()
         if self.with_rpn:
             self.rpn_head.init_weights()
+        if self.with_shared_head:
+            self.shared_head.init_weights(pretrained=pretrained)
         for i in range(self.num_stages):
             if self.with_bbox:
                 self.bbox_roi_extractor[i].init_weights()
                 self.bbox_head[i].init_weights()
             if self.with_mask:
-                self.mask_roi_extractor[i].init_weights()
+                if not self.share_roi_extractor:
+                    self.mask_roi_extractor[i].init_weights()
                 self.mask_head[i].init_weights()
 
     def extract_feat(self, img):
@@ -164,23 +176,45 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
             rois = bbox2roi([res.bboxes for res in sampling_results])
             bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
                                             rois)
+            if self.with_shared_head:
+                bbox_feats = self.shared_head(bbox_feats)
             cls_score, bbox_pred = bbox_head(bbox_feats)
 
             bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
                                                 gt_labels, rcnn_train_cfg)
             loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
             for name, value in loss_bbox.items():
-                losses['s{}.{}'.format(i, name)] = (value * lw if
-                                                    'loss' in name else value)
+                losses['s{}.{}'.format(
+                    i, name)] = (value * lw if 'loss' in name else value)
 
             # mask head forward and loss
             if self.with_mask:
-                mask_roi_extractor = self.mask_roi_extractor[i]
+                if not self.share_roi_extractor:
+                    mask_roi_extractor = self.mask_roi_extractor[i]
+                    pos_rois = bbox2roi(
+                        [res.pos_bboxes for res in sampling_results])
+                    mask_feats = mask_roi_extractor(
+                        x[:mask_roi_extractor.num_inputs], pos_rois)
+                    if self.with_shared_head:
+                        mask_feats = self.shared_head(mask_feats)
+                else:
+                    # reuse positive bbox feats
+                    pos_inds = []
+                    device = bbox_feats.device
+                    for res in sampling_results:
+                        pos_inds.append(
+                            torch.ones(
+                                res.pos_bboxes.shape[0],
+                                device=device,
+                                dtype=torch.uint8))
+                        pos_inds.append(
+                            torch.zeros(
+                                res.neg_bboxes.shape[0],
+                                device=device,
+                                dtype=torch.uint8))
+                    pos_inds = torch.cat(pos_inds)
+                    mask_feats = bbox_feats[pos_inds]
                 mask_head = self.mask_head[i]
-                pos_rois = bbox2roi(
-                    [res.pos_bboxes for res in sampling_results])
-                mask_feats = mask_roi_extractor(
-                    x[:mask_roi_extractor.num_inputs], pos_rois)
                 mask_pred = mask_head(mask_feats)
                 mask_targets = mask_head.get_target(sampling_results, gt_masks,
                                                     rcnn_train_cfg)
@@ -188,9 +222,8 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                     [res.pos_gt_labels for res in sampling_results])
                 loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
                 for name, value in loss_mask.items():
-                    losses['s{}.{}'.format(i, name)] = (value * lw
-                                                        if 'loss' in name else
-                                                        value)
+                    losses['s{}.{}'.format(
+                        i, name)] = (value * lw if 'loss' in name else value)
 
             # refine bboxes
             if i < self.num_stages - 1:
@@ -224,6 +257,9 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
 
             bbox_feats = bbox_roi_extractor(
                 x[:len(bbox_roi_extractor.featmap_strides)], rois)
+            if self.with_shared_head:
+                bbox_feats = self.shared_head(bbox_feats)
+
             cls_score, bbox_pred = bbox_head(bbox_feats)
             ms_scores.append(cls_score)
 
@@ -254,6 +290,8 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                         mask_feats = mask_roi_extractor(
                             x[:len(mask_roi_extractor.featmap_strides)],
                             mask_rois)
+                        if self.with_shared_head:
+                            mask_feats = self.shared_head(mask_feats, i)
                         mask_pred = mask_head(mask_feats)
                         segm_result = mask_head.get_seg_masks(
                             mask_pred, _bboxes, det_labels, rcnn_test_cfg,
@@ -292,6 +330,8 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                     mask_roi_extractor = self.mask_roi_extractor[i]
                     mask_feats = mask_roi_extractor(
                         x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
+                    if self.with_shared_head:
+                        mask_feats = self.shared_head(mask_feats)
                     mask_pred = self.mask_head[i](mask_feats)
                     aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                 merged_masks = merge_aug_masks(aug_masks,
diff --git a/mmdet/models/detectors/fast_rcnn.py b/mmdet/models/detectors/fast_rcnn.py
index 3ad7167f9fe18cfc5e26ea0e73ddd10aa9e13471..64c03916498cc4ce8cb28d2f4ce026f88dd55037 100644
--- a/mmdet/models/detectors/fast_rcnn.py
+++ b/mmdet/models/detectors/fast_rcnn.py
@@ -7,17 +7,19 @@ class FastRCNN(TwoStageDetector):
 
     def __init__(self,
                  backbone,
-                 neck,
                  bbox_roi_extractor,
                  bbox_head,
                  train_cfg,
                  test_cfg,
+                 neck=None,
+                 shared_head=None,
                  mask_roi_extractor=None,
                  mask_head=None,
                  pretrained=None):
         super(FastRCNN, self).__init__(
             backbone=backbone,
             neck=neck,
+            shared_head=shared_head,
             bbox_roi_extractor=bbox_roi_extractor,
             bbox_head=bbox_head,
             train_cfg=train_cfg,
diff --git a/mmdet/models/detectors/faster_rcnn.py b/mmdet/models/detectors/faster_rcnn.py
index 39f50cfcb81a6b2573311032e5b5a4d60143e5e1..b4e961b128b5ba1969aeb0618150acfb67a7d9c7 100644
--- a/mmdet/models/detectors/faster_rcnn.py
+++ b/mmdet/models/detectors/faster_rcnn.py
@@ -7,16 +7,18 @@ class FasterRCNN(TwoStageDetector):
 
     def __init__(self,
                  backbone,
-                 neck,
                  rpn_head,
                  bbox_roi_extractor,
                  bbox_head,
                  train_cfg,
                  test_cfg,
+                 neck=None,
+                 shared_head=None,
                  pretrained=None):
         super(FasterRCNN, self).__init__(
             backbone=backbone,
             neck=neck,
+            shared_head=shared_head,
             rpn_head=rpn_head,
             bbox_roi_extractor=bbox_roi_extractor,
             bbox_head=bbox_head,
diff --git a/mmdet/models/detectors/mask_rcnn.py b/mmdet/models/detectors/mask_rcnn.py
index 3cf44def7c1da075cd6e729ad2d3bca54211ac15..003e87f9817e1eeff8c5860395270ec7ebf8a0eb 100644
--- a/mmdet/models/detectors/mask_rcnn.py
+++ b/mmdet/models/detectors/mask_rcnn.py
@@ -7,7 +7,6 @@ class MaskRCNN(TwoStageDetector):
 
     def __init__(self,
                  backbone,
-                 neck,
                  rpn_head,
                  bbox_roi_extractor,
                  bbox_head,
@@ -15,10 +14,13 @@ class MaskRCNN(TwoStageDetector):
                  mask_head,
                  train_cfg,
                  test_cfg,
+                 neck=None,
+                 shared_head=None,
                  pretrained=None):
         super(MaskRCNN, self).__init__(
             backbone=backbone,
             neck=neck,
+            shared_head=shared_head,
             rpn_head=rpn_head,
             bbox_roi_extractor=bbox_roi_extractor,
             bbox_head=bbox_head,
diff --git a/mmdet/models/detectors/test_mixins.py b/mmdet/models/detectors/test_mixins.py
index 01d557d7b990707570bcffbdcfacd0e68a9a822f..709ee39ac2446bf78971960cb853fee00d599573 100644
--- a/mmdet/models/detectors/test_mixins.py
+++ b/mmdet/models/detectors/test_mixins.py
@@ -37,6 +37,8 @@ class BBoxTestMixin(object):
         rois = bbox2roi(proposals)
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
+        if self.with_shared_head:
+            roi_feats = self.shared_head(roi_feats)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
@@ -65,6 +67,8 @@ class BBoxTestMixin(object):
             # recompute feature maps to save GPU memory
             roi_feats = self.bbox_roi_extractor(
                 x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
+            if self.with_shared_head:
+                roi_feats = self.shared_head(roi_feats)
             cls_score, bbox_pred = self.bbox_head(roi_feats)
             bboxes, scores = self.bbox_head.get_det_bboxes(
                 rois,
@@ -106,6 +110,8 @@ class MaskTestMixin(object):
             mask_rois = bbox2roi([_bboxes])
             mask_feats = self.mask_roi_extractor(
                 x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
+            if self.with_shared_head:
+                mask_feats = self.shared_head(mask_feats)
             mask_pred = self.mask_head(mask_feats)
             segm_result = self.mask_head.get_seg_masks(
                 mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape,
@@ -127,6 +133,8 @@ class MaskTestMixin(object):
                 mask_feats = self.mask_roi_extractor(
                     x[:len(self.mask_roi_extractor.featmap_strides)],
                     mask_rois)
+                if self.with_shared_head:
+                    mask_feats = self.shared_head(mask_feats)
                 mask_pred = self.mask_head(mask_feats)
                 # convert to numpy array to save memory
                 aug_masks.append(mask_pred.sigmoid().cpu().numpy())
diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py
index 3dca618acfb6070a1798ef03141ab94423b52deb..0e05b16ace317596d405380f95b2a1c01c79a4bb 100644
--- a/mmdet/models/detectors/two_stage.py
+++ b/mmdet/models/detectors/two_stage.py
@@ -15,6 +15,7 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
     def __init__(self,
                  backbone,
                  neck=None,
+                 shared_head=None,
                  rpn_head=None,
                  bbox_roi_extractor=None,
                  bbox_head=None,
@@ -28,8 +29,9 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
 
         if neck is not None:
             self.neck = builder.build_neck(neck)
-        else:
-            raise NotImplementedError
+
+        if shared_head is not None:
+            self.shared_head = builder.build_shared_head(shared_head)
 
         if rpn_head is not None:
             self.rpn_head = builder.build_head(rpn_head)
@@ -40,8 +42,13 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
             self.bbox_head = builder.build_head(bbox_head)
 
         if mask_head is not None:
-            self.mask_roi_extractor = builder.build_roi_extractor(
-                mask_roi_extractor)
+            if mask_roi_extractor is not None:
+                self.mask_roi_extractor = builder.build_roi_extractor(
+                    mask_roi_extractor)
+                self.share_roi_extractor = False
+            else:
+                self.share_roi_extractor = True
+                self.mask_roi_extractor = self.bbox_roi_extractor
             self.mask_head = builder.build_head(mask_head)
 
         self.train_cfg = train_cfg
@@ -62,14 +69,17 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
                     m.init_weights()
             else:
                 self.neck.init_weights()
+        if self.with_shared_head:
+            self.shared_head.init_weights(pretrained=pretrained)
         if self.with_rpn:
             self.rpn_head.init_weights()
         if self.with_bbox:
             self.bbox_roi_extractor.init_weights()
             self.bbox_head.init_weights()
         if self.with_mask:
-            self.mask_roi_extractor.init_weights()
             self.mask_head.init_weights()
+            if not self.share_roi_extractor:
+                self.mask_roi_extractor.init_weights()
 
     def extract_feat(self, img):
         x = self.backbone(img)
@@ -130,6 +140,8 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
             # TODO: a more flexible way to decide which feature maps to use
             bbox_feats = self.bbox_roi_extractor(
                 x[:self.bbox_roi_extractor.num_inputs], rois)
+            if self.with_shared_head:
+                bbox_feats = self.shared_head(bbox_feats)
             cls_score, bbox_pred = self.bbox_head(bbox_feats)
 
             bbox_targets = self.bbox_head.get_target(
@@ -140,9 +152,29 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
 
         # mask head forward and loss
         if self.with_mask:
-            pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])
-            mask_feats = self.mask_roi_extractor(
-                x[:self.mask_roi_extractor.num_inputs], pos_rois)
+            if not self.share_roi_extractor:
+                pos_rois = bbox2roi(
+                    [res.pos_bboxes for res in sampling_results])
+                mask_feats = self.mask_roi_extractor(
+                    x[:self.mask_roi_extractor.num_inputs], pos_rois)
+                if self.with_shared_head:
+                    mask_feats = self.shared_head(mask_feats)
+            else:
+                pos_inds = []
+                device = bbox_feats.device
+                for res in sampling_results:
+                    pos_inds.append(
+                        torch.ones(
+                            res.pos_bboxes.shape[0],
+                            device=device,
+                            dtype=torch.uint8))
+                    pos_inds.append(
+                        torch.zeros(
+                            res.neg_bboxes.shape[0],
+                            device=device,
+                            dtype=torch.uint8))
+                pos_inds = torch.cat(pos_inds)
+                mask_feats = bbox_feats[pos_inds]
             mask_pred = self.mask_head(mask_feats)
 
             mask_targets = self.mask_head.get_target(
diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py
index 6578e8cf4e8e94d860170b4409b1c2b9a5e3a19c..e1889f01d4daa25f33c0c93477d62d77e39afd66 100644
--- a/mmdet/models/mask_heads/fcn_mask_head.py
+++ b/mmdet/models/mask_heads/fcn_mask_head.py
@@ -53,11 +53,13 @@ class FCNMaskHead(nn.Module):
                     padding=padding,
                     normalize=normalize,
                     bias=self.with_bias))
+        upsample_in_channels = (self.conv_out_channels
+                                if self.num_convs > 0 else in_channels)
         if self.upsample_method is None:
             self.upsample = None
         elif self.upsample_method == 'deconv':
             self.upsample = nn.ConvTranspose2d(
-                self.conv_out_channels,
+                upsample_in_channels,
                 self.conv_out_channels,
                 self.upsample_ratio,
                 stride=self.upsample_ratio)
@@ -66,7 +68,10 @@ class FCNMaskHead(nn.Module):
                 scale_factor=self.upsample_ratio, mode=self.upsample_method)
 
         out_channels = 1 if self.class_agnostic else self.num_classes
-        self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1)
+        logits_in_channel = (self.conv_out_channels
+                             if self.upsample_method == 'deconv' else
+                             upsample_in_channels)
+        self.conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1)
         self.relu = nn.ReLU(inplace=True)
         self.debug_imgs = None
 
diff --git a/mmdet/models/registry.py b/mmdet/models/registry.py
index bab395d60597f264e86284da275a382074921861..fc14bc11b6c6ab468e9791c0303b9606c7eed3ec 100644
--- a/mmdet/models/registry.py
+++ b/mmdet/models/registry.py
@@ -22,9 +22,8 @@ class Registry(object):
             module (:obj:`nn.Module`): Module to be registered.
         """
         if not issubclass(module_class, nn.Module):
-            raise TypeError(
-                'module must be a child of nn.Module, but got {}'.format(
-                    module_class))
+            raise TypeError('module must be a child of nn.Module, but got {}'.
+                            format(module_class))
         module_name = module_class.__name__
         if module_name in self._module_dict:
             raise KeyError('{} is already registered in {}'.format(
@@ -39,5 +38,6 @@ class Registry(object):
 BACKBONES = Registry('backbone')
 NECKS = Registry('neck')
 ROI_EXTRACTORS = Registry('roi_extractor')
+SHARED_HEADS = Registry('shared_head')
 HEADS = Registry('head')
 DETECTORS = Registry('detector')
diff --git a/mmdet/models/shared_heads/__init__.py b/mmdet/models/shared_heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbe70145b8bf7c304370f725f5afa8db98666679
--- /dev/null
+++ b/mmdet/models/shared_heads/__init__.py
@@ -0,0 +1,3 @@
+from .res_layer import ResLayer
+
+__all__ = ['ResLayer']
diff --git a/mmdet/models/shared_heads/res_layer.py b/mmdet/models/shared_heads/res_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f42487eeb0ede2637f1ecfc5e29341c5bb3816fc
--- /dev/null
+++ b/mmdet/models/shared_heads/res_layer.py
@@ -0,0 +1,69 @@
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import constant_init, kaiming_init
+from mmcv.runner import load_checkpoint
+
+from ..backbones import ResNet, make_res_layer
+from ..registry import SHARED_HEADS
+
+
+@SHARED_HEADS.register_module
+class ResLayer(nn.Module):
+
+    def __init__(self,
+                 depth,
+                 stage=3,
+                 stride=2,
+                 dilation=1,
+                 style='pytorch',
+                 normalize=dict(type='BN', frozen=False),
+                 norm_eval=True,
+                 with_cp=False,
+                 dcn=None):
+        super(ResLayer, self).__init__()
+        self.norm_eval = norm_eval
+        self.normalize = normalize
+        self.stage = stage
+        block, stage_blocks = ResNet.arch_settings[depth]
+        stage_block = stage_blocks[stage]
+        planes = 64 * 2**stage
+        inplanes = 64 * 2**(stage - 1) * block.expansion
+
+        res_layer = make_res_layer(
+            block,
+            inplanes,
+            planes,
+            stage_block,
+            stride=stride,
+            dilation=dilation,
+            style=style,
+            with_cp=with_cp,
+            normalize=self.normalize,
+            dcn=dcn)
+        self.add_module('layer{}'.format(stage + 1), res_layer)
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, nn.BatchNorm2d):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
+        out = res_layer(x)
+        return out
+
+    def train(self, mode=True):
+        super(ResLayer, self).train(mode)
+        if self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eval()