diff --git a/MODEL_ZOO.md b/MODEL_ZOO.md index ea25a021ce708a4eb6b2fc45e8df2cd842c9efd9..7526d8309fdf50ee5dba6dec870c4568dfa46eb1 100644 --- a/MODEL_ZOO.md +++ b/MODEL_ZOO.md @@ -41,35 +41,50 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR1000 | Download | |:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| | R-50-FPN | caffe | 1x | 4.5 | 0.379 | 14.4 | 58.2 | - | -| R-50-FPN | pytorch | 1x | 4.8 | 0.407 | 14.5 | 57.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/rpn_r50_fpn_1x_20181010_results.pkl.json) | -| R-50-FPN | pytorch | 2x | 4.8 | 0.407 | 14.5 | 57.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/rpn_r50_fpn_2x_20181010_results.pkl.json) | +| R-50-FPN | pytorch | 1x | 4.8 | 0.407 | 14.5 | 57.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth) | +| R-50-FPN | pytorch | 2x | 4.8 | 0.407 | 14.5 | 57.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth) | +| R-101-FPN | caffe | 1x | 7.4 | 0.513 | 11.1 | 59.4 | - | +| R-101-FPN | pytorch | 1x | 8.0 | 0.552 | 11.1 | 58.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_1x_20181129-f50da4bd.pth) | +| R-101-FPN | pytorch | 2x | 8.0 | 0.552 | 11.1 | 59.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_2x_20181129-e42c6c9a.pth) | ### Faster R-CNN | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | |:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| | R-50-FPN | caffe | 1x | 4.9 | 0.525 | 10.0 | 36.7 | - | -| R-50-FPN | pytorch | 1x | 5.1 | 0.554 | 9.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/faster_rcnn_r50_fpn_1x_20181010_results.pkl.json) | -| R-50-FPN | pytorch | 2x | 5.1 | 0.554 | 9.9 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/faster_rcnn_r50_fpn_2x_20181010_results.pkl.json) | +| R-50-FPN | pytorch | 1x | 5.1 | 0.554 | 9.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth) | +| R-50-FPN | pytorch | 2x | 5.1 | 0.554 | 9.9 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth) | +| R-101-FPN | caffe | 1x | 7.4 | 0.663 | 8.4 | 38.8 | - | +| R-101-FPN | pytorch | 1x | 8.0 | 0.698 | 8.3 | 38.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_1x_20181129-d1468807.pth) | +| R-101-FPN | pytorch | 2x | 8.0 | 0.698 | 8.3 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_2x_20181129-73e7ade7.pth) | ### Mask R-CNN | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | |:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| | R-50-FPN | caffe | 1x | 5.9 | 0.658 | 7.7 | 37.5 | 34.4 | - | -| R-50-FPN | pytorch | 1x | 5.8 | 0.690 | 7.7 | 37.3 | 34.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/mask_rcnn_r50_fpn_1x_20181010_results.pkl.json) | -| R-50-FPN | pytorch | 2x | 5.8 | 0.690 | 7.7 | 38.6 | 35.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/mask_rcnn_r50_fpn_2x_20181010_results.pkl.json) | +| R-50-FPN | pytorch | 1x | 5.8 | 0.690 | 7.7 | 37.3 | 34.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) | +| R-50-FPN | pytorch | 2x | 5.8 | 0.690 | 7.7 | 38.6 | 35.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth) | +| R-101-FPN | caffe | 1x | 8.8 | 0.791 | 7.0 | 39.9 | 36.1 | - | +| R-101-FPN | pytorch | 1x | 9.1 | 0.825 | 6.7 | 39.4 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_1x_20181129-34ad1961.pth) | +| R-101-FPN | pytorch | 2x | 9.1 | 0.825 | 6.7 | 40.4 | 36.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_2x_20181129-a254bdfc.pth) | ### Fast R-CNN (with pre-computed proposals) | Backbone | Style | Type | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | |:--------:|:-------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| -| R-50-FPN | caffe | Faster | 1x | 3.5 | 0.35 | 14.6 | 36.6 | - | - | -| R-50-FPN | pytorch | Faster | 1x | 4.0 | 0.38 | 14.5 | 35.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/fast_rcnn_r50_fpn_1x_20181010_results.pkl.json) | -| R-50-FPN | pytorch | Faster | 2x | 4.0 | 0.38 | 14.5 | 37.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/fast_rcnn_r50_fpn_2x_20181010_results.pkl.json) | -| R-50-FPN | caffe | Mask | 1x | 5.4 | 0.47 | 10.7 | 37.3 | 34.5 | - | -| R-50-FPN | pytorch | Mask | 1x | 5.3 | 0.50 | 10.6 | 36.8 | 34.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/fast_mask_rcnn_r50_fpn_1x_20181010_results.pkl.json) | -| R-50-FPN | pytorch | Mask | 2x | 5.3 | 0.50 | 10.6 | 37.9 | 34.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) \| [result](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/results/fast_mask_rcnn_r50_fpn_2x_20181010_results.pkl.json) | +| R-50-FPN | caffe | Faster | 1x | 3.5 | 0.348 | 14.6 | 36.6 | - | - | +| R-50-FPN | pytorch | Faster | 1x | 4.0 | 0.375 | 14.5 | 35.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth) | +| R-50-FPN | pytorch | Faster | 2x | 4.0 | 0.375 | 14.5 | 37.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth) | +| R-101-FPN| caffe | Faster | 1x | 7.1 | 0.484 | 11.9 | 38.4 | - | - | +| R-101-FPN| pytorch | Faster | 1x | 7.6 | 0.540 | 11.8 | 38.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_1x_20181129-ffaa2eb0.pth) | +| R-101-FPN| pytorch | Faster | 2x | 7.6 | 0.540 | 11.8 | 38.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_2x_20181129-9dba92ce.pth) | +| R-50-FPN | caffe | Mask | 1x | 5.4 | 0.473 | 10.7 | 37.3 | 34.5 | - | +| R-50-FPN | pytorch | Mask | 1x | 5.3 | 0.504 | 10.6 | 36.8 | 34.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth) | +| R-50-FPN | pytorch | Mask | 2x | 5.3 | 0.504 | 10.6 | 37.9 | 34.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) | +| R-101-FPN| caffe | Mask | 1x | 8.6 | 0.607 | 9.5 | 39.4 | 36.1 | - | +| R-101-FPN| pytorch | Mask | 1x | 9.0 | 0.656 | 9.3 | 38.9 | 35.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_1x_20181129-2273fa9b.pth) | +| R-101-FPN| pytorch | Mask | 2x | 9.0 | 0.656 | 9.3 | 39.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_2x_20181129-bf63ec5e.pth) | ### RetinaNet @@ -78,6 +93,9 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m | R-50-FPN | caffe | 1x | 6.7 | 0.468 | 9.4 | 35.8 | - | | R-50-FPN | pytorch | 1x | 6.9 | 0.496 | 9.1 | 35.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-3d3c2142.pth) | | R-50-FPN | pytorch | 2x | 6.9 | 0.496 | 9.1 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_2x_20181125-e0dbec97.pth) | +| R-101-FPN | caffe | 1x | 9.2 | 0.614 | 8.2 | 37.8 | - | +| R-101-FPN | pytorch | 1x | 9.6 | 0.643 | 8.1 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_1x_20181129-f738a02f.pth) | +| R-101-FPN | pytorch | 2x | 9.6 | 0.643 | 8.1 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_2x_20181129-f654534b.pth) | ### Cascade R-CNN @@ -86,6 +104,9 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m | R-50-FPN | caffe | 1x | 5.0 | 0.592 | 8.1 | 40.3 | - | | R-50-FPN | pytorch | 1x | 5.5 | 0.622 | 8.0 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_1x_20181123-b1987c4a.pth) | | R-50-FPN | pytorch | 20e | 5.5 | 0.622 | 8.0 | 41.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_20e_20181123-db483a09.pth) | +| R-101-FPN | caffe | 1x | 8.5 | 0.731 | 7.0 | 42.2 | - | +| R-101-FPN | pytorch | 1x | 8.7 | 0.766 | 6.9 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_1x_20181129-d64ebac7.pth) | +| R-101-FPN | pytorch | 20e | 8.7 | 0.766 | 6.9 | 42.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_20e_20181129-b46dcede.pth) | ### Cascade Mask R-CNN @@ -94,6 +115,9 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m | R-50-FPN | caffe | 1x | 7.5 | 0.880 | 5.8 | 41.0 | 35.6 | - | | R-50-FPN | pytorch | 1x | 7.6 | 0.910 | 5.7 | 41.3 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) | | R-50-FPN | pytorch | 20e | 7.6 | 0.910 | 5.7 | 42.4 | 36.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_20e_20181123-6e0c9713.pth) | +| R-101-FPN | caffe | 1x | 10.5 | 1.024 | 5.3 | 43.1 | 37.3 | - | +| R-101-FPN | pytorch | 1x | 10.9 | 1.055 | 5.2 | 42.7 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_1x_20181129-64f00602.pth) | +| R-101-FPN | pytorch | 20e | 10.9 | 1.055 | 5.2 | 43.4 | 37.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_20e_20181129-cb85151d.pth) | ## Comparison with Detectron diff --git a/configs/cascade_mask_rcnn_r101_fpn_1x.py b/configs/cascade_mask_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb289e096638bcbee8f42626bb5cfc025713fb6 --- /dev/null +++ b/configs/cascade_mask_rcnn_r101_fpn_1x.py @@ -0,0 +1,227 @@ +# model settings +model = dict( + type='CascadeRCNN', + num_stages=3, + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=True), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1], + reg_class_agnostic=True), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067], + reg_class_agnostic=True) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + pos_balance_sampling=False, + neg_balance_thr=0), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=[ + dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + mask_size=28, + pos_weight=-1, + debug=False) + ], + stage_loss_weights=[1, 0.5, 0.25]) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100, + mask_thr_binary=0.5), + keep_all_stages=False) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/cascade_mask_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/cascade_rcnn_r101_fpn_1x.py b/configs/cascade_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..e30370bbe85cb6460c2e0be9c3edb04bde1a7fb2 --- /dev/null +++ b/configs/cascade_rcnn_r101_fpn_1x.py @@ -0,0 +1,210 @@ +# model settings +model = dict( + type='CascadeRCNN', + num_stages=3, + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=True), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1], + reg_class_agnostic=True), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067], + reg_class_agnostic=True) + ]) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + pos_balance_sampling=False, + neg_balance_thr=0), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=[ + dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + pos_weight=-1, + debug=False) + ], + stage_loss_weights=[1, 0.5, 0.25]) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100), + keep_all_stages=False) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/cascade_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/fast_mask_rcnn_r101_fpn_1x.py b/configs/fast_mask_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..342d775ec62b542a0e5fed90cd62d65e3d05c2d3 --- /dev/null +++ b/configs/fast_mask_rcnn_r101_fpn_1x.py @@ -0,0 +1,137 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + mask_size=28, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100, + mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_mask_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/fast_rcnn_r101_fpn_1x.py b/configs/fast_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..66b7e8c942a2dc49bb0432ae362c921273c0dac3 --- /dev/null +++ b/configs/fast_rcnn_r101_fpn_1x.py @@ -0,0 +1,122 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + pos_weight=-1, + debug=False)) +test_cfg = dict( + rcnn=dict( + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/fast_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/faster_rcnn_r101_fpn_1x.py b/configs/faster_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..49813d180781aaa4a5c822fbc5a5889ff5fb708f --- /dev/null +++ b/configs/faster_rcnn_r101_fpn_1x.py @@ -0,0 +1,156 @@ +# model settings +model = dict( + type='FasterRCNN', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + pos_balance_sampling=False, + neg_balance_thr=0), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) +) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/faster_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/mask_rcnn_r101_fpn_1x.py b/configs/mask_rcnn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..65a93e28e6d18ed94c8135b1b525e3c18bb7c981 --- /dev/null +++ b/configs/mask_rcnn_r101_fpn_1x.py @@ -0,0 +1,168 @@ +# model settings +model = dict( + type='MaskRCNN', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + pos_balance_sampling=False, + neg_balance_thr=0), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=dict( + assigner=dict( + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + pos_balance_sampling=False, + neg_balance_thr=0), + mask_size=28, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100, + mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/mask_rcnn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/retinanet_r101_fpn_1x.py b/configs/retinanet_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..fd4dd9a6d65d8cb700749a37b23c290570c1af17 --- /dev/null +++ b/configs/retinanet_r101_fpn_1x.py @@ -0,0 +1,116 @@ +# model settings +model = dict( + type='RetinaNet', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=4, + scales_per_octave=3, + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[8, 16, 32, 64, 128], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0])) +# training and testing settings +train_cfg = dict( + assigner=dict( + pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0, ignore_iof_thr=-1), + smoothl1_beta=0.11, + gamma=2.0, + alpha=0.25, + allowed_border=-1, + pos_weight=-1, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=False, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=False, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +device_ids = range(8) +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/retinanet_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/rpn_r101_fpn_1x.py b/configs/rpn_r101_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..90f260d60f81e0fce371156b252f59936cb1f6a0 --- /dev/null +++ b/configs/rpn_r101_fpn_1x.py @@ -0,0 +1,121 @@ +# model settings +model = dict( + type='RPN', + pretrained='modelzoo://resnet101', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + pos_balance_sampling=False, + neg_balance_thr=0), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=False, + with_label=False), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=False, + with_label=False), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +# runner configs +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/rpn_r101_fpn_1x' +load_from = None +resume_from = None +workflow = [('train', 1)]