diff --git a/configs/cascade_mask_rcnn_r101_fpn_1x.py b/configs/cascade_mask_rcnn_r101_fpn_1x.py index 1b63d2b2703f83f94b1a3cb30d86ca228582b6ce..0ad9c88207b061ed6be72a35fb4687081fc72934 100644 --- a/configs/cascade_mask_rcnn_r101_fpn_1x.py +++ b/configs/cascade_mask_rcnn_r101_fpn_1x.py @@ -44,13 +44,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -62,13 +57,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -80,13 +70,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -196,6 +181,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -203,35 +213,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py b/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py index cdf5ff03650848bcd19199cdb57ec26421a264db..dd5f356b4df35256b4ae9e0a1911907667cb4633 100644 --- a/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py +++ b/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py @@ -52,13 +52,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='BBoxHead', with_avg_pool=True, @@ -69,13 +64,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='BBoxHead', with_avg_pool=True, @@ -86,13 +76,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=None, mask_head=dict( @@ -198,42 +183,49 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x.py b/configs/cascade_mask_rcnn_r50_fpn_1x.py index 39450b812b4f8d8420c3161c03b506626d044174..c9f007ed653b76b09d9e680143a5fa9bb5261af4 100644 --- a/configs/cascade_mask_rcnn_r50_fpn_1x.py +++ b/configs/cascade_mask_rcnn_r50_fpn_1x.py @@ -44,13 +44,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -62,13 +57,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -80,13 +70,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -196,6 +181,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -203,35 +213,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py index e8a31d0549e54d6d5ff7f390ac8b2bf4c5897a5b..3167be4c340c5feddf09fce9984a09542ae9370d 100644 --- a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py +++ b/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -198,6 +183,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -205,35 +215,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py index 1661a105201c77c128623b8579db3aa3181a189b..0c5434ead4d6df865180cf4edb0b45c73b54cb5f 100644 --- a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -198,6 +183,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -205,35 +215,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rcnn_r101_fpn_1x.py b/configs/cascade_rcnn_r101_fpn_1x.py index a09131404a30b0f59a09002c3de179660d35100c..a790c2bd55bf9e0b7205876d6805b909de75f6a3 100644 --- a/configs/cascade_rcnn_r101_fpn_1x.py +++ b/configs/cascade_rcnn_r101_fpn_1x.py @@ -44,13 +44,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -62,13 +57,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -80,13 +70,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]) # model training and testing settings train_cfg = dict( @@ -177,6 +162,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -184,35 +194,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rcnn_r50_caffe_c4_1x.py b/configs/cascade_rcnn_r50_caffe_c4_1x.py index 978724b9d428be44bea6f8a92a002ad706757d94..0dd10abb00b993e8e51c51d92780ae914f05dd08 100644 --- a/configs/cascade_rcnn_r50_caffe_c4_1x.py +++ b/configs/cascade_rcnn_r50_caffe_c4_1x.py @@ -52,13 +52,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='BBoxHead', with_avg_pool=True, @@ -69,13 +64,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='BBoxHead', with_avg_pool=True, @@ -86,13 +76,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), ]) # model training and testing settings train_cfg = dict( @@ -186,42 +171,49 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rcnn_r50_fpn_1x.py b/configs/cascade_rcnn_r50_fpn_1x.py index 045f0eb28d9ac53c2f20ae3fe665897bdbfcbd5e..96269fab0051dcf6ca591b305f18d1bf7ed2e090 100644 --- a/configs/cascade_rcnn_r50_fpn_1x.py +++ b/configs/cascade_rcnn_r50_fpn_1x.py @@ -44,13 +44,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -62,13 +57,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -80,13 +70,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]) # model training and testing settings train_cfg = dict( @@ -177,6 +162,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -184,35 +194,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_rcnn_x101_32x4d_fpn_1x.py index 7ce7aab875fbd08a6eb700aeb786ecb8bce50456..6de3d37d7177423e7347a7c62aa2c5d1b80e8b36 100644 --- a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py +++ b/configs/cascade_rcnn_x101_32x4d_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]) # model training and testing settings train_cfg = dict( @@ -179,6 +164,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -186,35 +196,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_rcnn_x101_64x4d_fpn_1x.py index 401dfac8ca17dc1f08eb92fc7d561d623374e36a..d6e9d1f6f12dc0c8583d1d997703c2a94b6ae369 100644 --- a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/cascade_rcnn_x101_64x4d_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]) # model training and testing settings train_cfg = dict( @@ -179,6 +164,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -186,35 +196,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py index 0ccacd201fffe1343adca24b87270dfa228186cd..2f2c03556275ea88f6230ff5dc52474c5ce75127 100644 --- a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py @@ -102,49 +102,56 @@ dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=1, workers_per_gpu=2, train=dict( - type='RepeatDataset', # to avoid reloading datasets frequently + type='RepeatDataset', times=8, dataset=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_train.json', img_prefix=data_root + 'train/', - img_scale=[(2048, 800), (2048, 1024)], - img_norm_cfg=img_norm_cfg, - multiscale_mode='range', - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', img_prefix=data_root + 'val/', - img_scale=(2048, 1024), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', img_prefix=data_root + 'val/', - img_scale=(2048, 1024), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer # lr is set for a batch size of 8 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py index 85f32f7e809a76ea72690cd4763bcc9ababd2f91..84082506674c5f16f1c6a040095e89abf34e4e69 100644 --- a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py @@ -116,49 +116,56 @@ dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=1, workers_per_gpu=2, train=dict( - type='RepeatDataset', # to avoid reloading datasets frequently + type='RepeatDataset', times=8, dataset=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_train.json', img_prefix=data_root + 'train/', - img_scale=[(2048, 800), (2048, 1024)], - img_norm_cfg=img_norm_cfg, - multiscale_mode='range', - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', img_prefix=data_root + 'val/', - img_scale=(2048, 1024), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', img_prefix=data_root + 'val/', - img_scale=(2048, 1024), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer # lr is set for a batch size of 8 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py index 2865922a86e930d275f23e43f112f386667d5221..27476d3a9a0434f5a458e8f15eb7cb833fa742cf 100644 --- a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ b/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py @@ -184,6 +184,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -191,35 +216,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py index d3ea852f391b45315a12965b3e787b0a14ea5f29..9f9f10cf0ad7318ca7edebd9994a90ed43ff424f 100644 --- a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ b/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py @@ -11,9 +11,7 @@ model = dict( frozen_stages=1, style='pytorch', dcn=dict( - modulated=False, - deformable_groups=1, - fallback_on_stride=False), + modulated=False, deformable_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True)), neck=dict( type='FPN', @@ -49,13 +47,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -67,13 +60,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -85,13 +73,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]) # model training and testing settings train_cfg = dict( @@ -182,6 +165,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -189,35 +197,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py index 0f028991acf18aaaed5ff9bf07b0efbec310e97e..11c7dd35e6aee4503de4e4bacb8013434dad8c6f 100644 --- a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ b/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py @@ -105,6 +105,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -112,35 +137,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py index 31aa1981700a23080098be8e0e31849a88f79862..9156b0d8ba6f509c593b6ce4613c2566cb958a93 100644 --- a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py +++ b/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py @@ -110,6 +110,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -117,35 +142,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py index ec3872205fc1dcfcc15b0a4074f36afdd8516cb4..bddcce40cd2cee9f9f9a7ebee34e464cb69f3e01 100644 --- a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py +++ b/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py @@ -108,6 +108,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -115,35 +140,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py index 1aca07ed27ee6b55d08207ecbf4879a284001646..484b4aff10782debbace72a218bf538f9785fefd 100644 --- a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py +++ b/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py @@ -105,6 +105,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -112,35 +137,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py index bb8d3c4d26d2ca5303ddc50698ec254c72122f27..fba0b330bfeefd55f3824658dc2f8b777e98edb3 100644 --- a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py +++ b/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py @@ -108,6 +108,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -115,35 +140,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py index 6f910aa22f4ab03d669816287ec19d01db1e729e..c3de699a1c4aff0551830aa4e034edddeb15fd84 100644 --- a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ b/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py @@ -10,9 +10,7 @@ model = dict( frozen_stages=1, style='pytorch', dcn=dict( - modulated=False, - deformable_groups=1, - fallback_on_stride=False), + modulated=False, deformable_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True)), neck=dict( type='FPN', @@ -121,6 +119,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -128,35 +151,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py index cef46570279d5835ddad67b2569ecc5ec8ccfad4..708f67265b8f124f8df67cca06c93d4e7f311237 100644 --- a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py +++ b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py @@ -105,6 +105,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -112,35 +137,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py index 4adba5d485737d840fa9edb58799587020dcd20c..db20d8d2c979f91b25dfe82398df852d716ea3cc 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py @@ -106,6 +106,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -113,35 +138,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py index f8e1f83a15937d59d7fe5cbb114e6caaa8663878..d264b9bf4ad1f18c533ea7fe35b4ef27117a95e8 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py @@ -109,6 +109,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -116,35 +141,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py index 4a43cced746ae66dd35915a3544d7d830b0365cf..f3eace271f576641de1fb35e7e6ebaf90a363e39 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py @@ -106,6 +106,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -113,35 +138,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py index 0a3cf0e27af94614366ca6fadc22319b70965976..f39edee846eaeb5e886f06b88141da7d96e17ed0 100644 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py @@ -109,6 +109,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -116,35 +141,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_mask_rcnn_r101_fpn_1x.py b/configs/fast_mask_rcnn_r101_fpn_1x.py index 0a9fc45e3a3d7a2e25ada472bf5f8e6320d859e2..576d488a1ca761836c7a651faacd34903d398e02 100644 --- a/configs/fast_mask_rcnn_r101_fpn_1x.py +++ b/configs/fast_mask_rcnn_r101_fpn_1x.py @@ -74,45 +74,56 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_mask_rcnn_r50_caffe_c4_1x.py b/configs/fast_mask_rcnn_r50_caffe_c4_1x.py index 1aa97e054a6a2d57b033031605535762afa18db8..cca87107861308fbeb0ade85eb1f9ee206083add 100644 --- a/configs/fast_mask_rcnn_r50_caffe_c4_1x.py +++ b/configs/fast_mask_rcnn_r50_caffe_c4_1x.py @@ -73,45 +73,56 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py index 81a46e64acb6faf0132205f526fc1c11d26f4db2..c324356d5a2ffff2e0b3d3884b50478569cf85b9 100644 --- a/configs/fast_mask_rcnn_r50_fpn_1x.py +++ b/configs/fast_mask_rcnn_r50_fpn_1x.py @@ -74,45 +74,56 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_rcnn_r101_fpn_1x.py b/configs/fast_rcnn_r101_fpn_1x.py index 51cca7a18230e04f8245faa2285460469906ad46..06d88126a1d6b0c9834654da6b571fffbebce3a7 100644 --- a/configs/fast_rcnn_r101_fpn_1x.py +++ b/configs/fast_rcnn_r101_fpn_1x.py @@ -30,11 +30,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) # model training and testing settings train_cfg = dict( rcnn=dict( @@ -60,45 +57,54 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_rcnn_r50_caffe_c4_1x.py b/configs/fast_rcnn_r50_caffe_c4_1x.py index 5d78bafb7b018d5ac001fb41f5d4cacd2c18837c..d1c6f3ab56438876f4a5f79c1f6dde8b8d9de62e 100644 --- a/configs/fast_rcnn_r50_caffe_c4_1x.py +++ b/configs/fast_rcnn_r50_caffe_c4_1x.py @@ -38,11 +38,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) # model training and testing settings train_cfg = dict( rcnn=dict( @@ -68,45 +65,54 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py index 8d58453df0fec69751732683742364b156e71e75..b22dede05d9a1fba1ba433985c20ea192f4c9ca0 100644 --- a/configs/fast_rcnn_r50_fpn_1x.py +++ b/configs/fast_rcnn_r50_fpn_1x.py @@ -30,11 +30,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=False, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) # model training and testing settings train_cfg = dict( rcnn=dict( @@ -60,45 +57,54 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_ohem_r50_fpn_1x.py b/configs/faster_rcnn_ohem_r50_fpn_1x.py index 9e65c106b3df627f9b89325ffe5c426569ab46c5..e2dbc8df9918818b19164c157ca2142f110a8375 100644 --- a/configs/faster_rcnn_ohem_r50_fpn_1x.py +++ b/configs/faster_rcnn_ohem_r50_fpn_1x.py @@ -102,6 +102,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -109,35 +134,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_r101_fpn_1x.py b/configs/faster_rcnn_r101_fpn_1x.py index f77028ec1f7880e05cbd6a26f758129a913b7290..e0ad7066f48d4e521bbce5b00d3f6d0a1841336c 100644 --- a/configs/faster_rcnn_r101_fpn_1x.py +++ b/configs/faster_rcnn_r101_fpn_1x.py @@ -102,6 +102,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -109,35 +134,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_r50_caffe_c4_1x.py b/configs/faster_rcnn_r50_caffe_c4_1x.py index ebbcc4e0cb780ce0ecc30686f80e87bf87a514a1..ddd8113420a87555c496a564b3547253fee5f8ef 100644 --- a/configs/faster_rcnn_r50_caffe_c4_1x.py +++ b/configs/faster_rcnn_r50_caffe_c4_1x.py @@ -107,42 +107,49 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py index a6049cd6012a4ad086982c703b7530205e3e3955..0c5e243ac2aeb8b222ccf45e73aa38853721f122 100644 --- a/configs/faster_rcnn_r50_fpn_1x.py +++ b/configs/faster_rcnn_r50_fpn_1x.py @@ -102,6 +102,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -109,35 +134,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_x101_32x4d_fpn_1x.py b/configs/faster_rcnn_x101_32x4d_fpn_1x.py index 50b479d0e0575ef9267874def8e62e09345bfd6b..8f3c6f706f4dcd134587990670ab1fc99a7a2862 100644 --- a/configs/faster_rcnn_x101_32x4d_fpn_1x.py +++ b/configs/faster_rcnn_x101_32x4d_fpn_1x.py @@ -104,6 +104,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -111,35 +136,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/faster_rcnn_x101_64x4d_fpn_1x.py b/configs/faster_rcnn_x101_64x4d_fpn_1x.py index 4397d2defd16524dee102ba3d479f7bdcf94b7c2..90ad0ac3a998e949abe0ec27b80dea3269f4a09a 100644 --- a/configs/faster_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/faster_rcnn_x101_64x4d_fpn_1x.py @@ -104,6 +104,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -111,35 +136,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py index ac21fada1667ec4c8dc7153798ec8d01f02b1dc3..0356a5730e428e5c6a58c0c120a7e0a05fad9b54 100644 --- a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py +++ b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py @@ -57,6 +57,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=4, workers_per_gpu=4, @@ -64,37 +93,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py index d932bcfe2aabfa0c0808f2fe867b2981ae2677b4..c6f725a75ddbaa406e63b4be54440b1e9cbcb78b 100644 --- a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py +++ b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py @@ -58,6 +58,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -65,37 +94,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py b/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py index 6243c3645909bfe377525f4e4417538eae80f0dd..2c3248ae1be28534e1b8ad7eeea4a386b9358aad 100644 --- a/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py +++ b/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=4, workers_per_gpu=4, @@ -64,36 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py index 161a9756cfd060aebe7882a5df5b9168ee2687f5..20ff20b4b966e738c783a5afe1bc8d2f6d25e061 100644 --- a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py +++ b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py @@ -105,6 +105,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -112,35 +137,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py index 31b32f080777c9f2e16d1ad299bf5acd60a4166f..23cbcf5267a44be55815ac680a5f06b48ef98d3e 100644 --- a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py @@ -119,6 +119,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -126,35 +151,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x.py b/configs/fp16/retinanet_r50_fpn_fp16_1x.py index 87432f7ccbe3d3d882c2fb884a6bf6f8b5387dc3..a0577e770dd145f59042b005451471f313181970 100644 --- a/configs/fp16/retinanet_r50_fpn_fp16_1x.py +++ b/configs/fp16/retinanet_r50_fpn_fp16_1x.py @@ -60,6 +60,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -67,36 +92,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py index 480e40337d5709fad197c00d3b3eaa68b6cb52ce..bc949074c831057dbde277b5b992d44daaceb824 100644 --- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py +++ b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py @@ -9,9 +9,7 @@ model = dict( out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', - gcb=dict( - ratio=1./16., - ), + gcb=dict(ratio=1. / 16., ), stage_with_gcb=(False, True, True, True)), neck=dict( type='FPN', @@ -120,6 +118,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -127,35 +150,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py index 349a52acca448dfdc494ed0d76c7397608c5cf16..ad06c9b84a8d308c16709bcc6aac510949cefbc1 100644 --- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py +++ b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py @@ -11,9 +11,7 @@ model = dict( out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', - gcb=dict( - ratio=1./16., - ), + gcb=dict(ratio=1. / 16., ), stage_with_gcb=(False, True, True, True), norm_eval=False, norm_cfg=norm_cfg), @@ -124,6 +122,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +154,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py index 08b26055a4320bd321dcc37860ed8422b54bdba2..5568b2f7987f79460b817f52b2298463268dfc64 100644 --- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py +++ b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py @@ -9,9 +9,7 @@ model = dict( out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', - gcb=dict( - ratio=1./4., - ), + gcb=dict(ratio=1. / 4., ), stage_with_gcb=(False, True, True, True)), neck=dict( type='FPN', @@ -120,6 +118,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -127,35 +150,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py index b8b55647eac65565834922bf4dee6c55ac7baff4..6a32126ec785724516a9a9d99b9f69ecc8ba6dfa 100644 --- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py +++ b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py @@ -11,9 +11,7 @@ model = dict( out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', - gcb=dict( - ratio=1./4., - ), + gcb=dict(ratio=1. / 4., ), stage_with_gcb=(False, True, True, True), norm_eval=False, norm_cfg=norm_cfg), @@ -124,6 +122,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +154,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py b/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py index b261934b66dea19335cc938f601fd3792b179fc3..819c78facd935fb62d0a01c92e8076598cf3933e 100644 --- a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py +++ b/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py @@ -120,6 +120,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -127,35 +152,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x.py b/configs/ghm/retinanet_ghm_r50_fpn_1x.py index eddf36846e1f505113ea44dbeb8f1bbea28cdae9..ea90fb7d78af86548f94cc76d5bfa5dd72c7083e 100644 --- a/configs/ghm/retinanet_ghm_r50_fpn_1x.py +++ b/configs/ghm/retinanet_ghm_r50_fpn_1x.py @@ -35,11 +35,7 @@ model = dict( use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( - type='GHMR', - mu=0.02, - bins=10, - momentum=0.7, - loss_weight=10.0))) + type='GHMR', mu=0.02, bins=10, momentum=0.7, loss_weight=10.0))) # training and testing settings train_cfg = dict( assigner=dict( @@ -62,6 +58,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -69,36 +90,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py index 0eef1e9d55c1ccf20235c48449d8307ddd480eed..eef21dbf7e482eaee7a6b2a82d92dd2adb5e2ab9 100644 --- a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py +++ b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py @@ -109,6 +109,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -116,35 +141,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py index da39db1b4e41b9af0227ab1f3c2f4a7d6f0da85c..8856c84ec27c03f29bef219bca7812b6e3be7d88 100644 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py @@ -128,6 +128,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -135,35 +160,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py index c023ec3aae1f2e7f2ccb5b2a3d6b963b652fcb69..ce0348ff8ff0f7bf086e227ec81728623f46b5c4 100644 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py @@ -128,6 +128,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -135,35 +160,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py index 271ed6e4c8f7bcbcee26e0ec32581de8437773f7..b250590d94af082702ddfb4284f4d11e3700eb57 100644 --- a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py @@ -130,6 +130,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -137,35 +162,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py b/configs/gn/mask_rcnn_r101_fpn_gn_2x.py index ae09042d07032d6cc4977cf2e0f55ab17b1a7a23..ae1aeb89d3e6bd70e1ef5ef50a32182911480763 100644 --- a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py +++ b/configs/gn/mask_rcnn_r101_fpn_gn_2x.py @@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +156,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_2x.py index bbeaa78f6dbc34798a8d47392aeaa9738b1ad538..43ecf9ea9f1d1f48a22316b0fbcaba5fb103ba7c 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn_2x.py @@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +156,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py index 87db0f5447374caca51977b9ddee017d43728e68..ee4ffafdc0a80f1a2de4f6a8b4e24adbf217fe90 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py @@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +156,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py index cbe345179e9321f066ff248842e18bbde4962045..5a2c071f70d224cf112c6f7846257ecb441f3630 100644 --- a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py +++ b/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py @@ -113,6 +113,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -120,35 +145,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py index b08f8095c7b3bf048c0c4955ed327eef67123787..989065ec7bb1f4e090a891bced85aaa6eb7544db 100644 --- a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py +++ b/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py @@ -115,6 +115,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -122,35 +147,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) diff --git a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py index 269967df0ad43c25b4cec783c8db86860d4da7bb..98fabd29ccb9042ba85f18db379d883a09e04f83 100644 --- a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py +++ b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py @@ -59,48 +59,54 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - num_max_proposals=300, proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - num_max_proposals=300, proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - num_max_proposals=300, proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py index f78e1c1812ebf8843490aaeb05db68bcf464fda4..791368cd750f30a9cabab5cb542a5f370bbad073 100644 --- a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py +++ b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py @@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -134,35 +159,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py index 61e7b99e667822bf5fd9be46152febd907d220fb..b0f2cb3b9bfc306fc05f58bf9077b42a2080cb7d 100644 --- a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py +++ b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py @@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -134,35 +159,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py index ae6a18a9d452709ac3d700ed1c956e66fdc82828..175b964f080954c0ac7b252be49c9864111ee668 100644 --- a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py +++ b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py @@ -84,6 +84,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -91,36 +116,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py index 32f2bd620c044b61fe186e04cd9184478bf312c3..7bc348409a4ce65fbe196e2bc1a74fbe27e24517 100644 --- a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py +++ b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py @@ -84,6 +84,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -91,36 +116,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py b/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py index c3d3b65654c08451708fb882f5b259bbca6b2802..8d81f326d8253e34e6e41514615555ed9f56459b 100644 --- a/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py +++ b/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py @@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -92,35 +117,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py index a4b6b6d624c663ce1b662e87440e5d9fa002c668..9d6b7cebbb6dec4069d949a5ccc01acee3ca33b0 100644 --- a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py +++ b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py @@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -92,35 +117,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py index 9eb1a69cc648388b0723dc1c33cb4380d3c4bc0f..1c5b7a0f89bdc9aab8b0e8cc690a6d7abb7dc26c 100644 --- a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py +++ b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py @@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -92,35 +117,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py index 06e0c169ac56de0cead56c9570b41374250f8e7c..17fe9455bb2c8735db2d5aa24f9aae05e4cd1b4b 100644 --- a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py @@ -30,10 +30,7 @@ model = dict( block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(32, 64, 128, 256)))), - neck=dict( - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256), + neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), rpn_head=dict( type='RPNHead', in_channels=256, @@ -63,13 +60,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -81,13 +73,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -99,13 +86,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -215,6 +197,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -222,35 +229,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py index 512c652451940c2366b71d31b5ec547e58dbb2c6..65eedd15a575aa803d21cd083b3fdf42dee239f0 100644 --- a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py @@ -30,10 +30,7 @@ model = dict( block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(32, 64, 128, 256)))), - neck=dict( - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256), + neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), rpn_head=dict( type='RPNHead', in_channels=256, @@ -48,10 +45,7 @@ model = dict( loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), bbox_roi_extractor=dict( type='SingleRoIExtractor', - roi_layer=dict( - type='RoIAlign', - out_size=7, - sample_num=2), + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ @@ -66,13 +60,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -84,13 +73,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -102,13 +86,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), ]) # model training and testing settings train_cfg = dict( @@ -192,17 +171,38 @@ test_cfg = dict( nms_thr=0.7, min_bbox_size=0), rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100), + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100), keep_all_stages=False) # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True) + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -210,35 +210,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py index ceada23d245e1c98cf5ca1c39f63aa4b8ff503bd..f3a298f5faf5030803bca552a45b392676104ad5 100644 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py @@ -119,44 +119,51 @@ test_cfg = dict( # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py index 41dfade169f663b26a608d98b592cee87daac6a5..72ad914557aaadd8e5c588f9f0c25ed1652eb256 100644 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py @@ -119,8 +119,33 @@ test_cfg = dict( # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -128,35 +153,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py index 72d6e5729fd87f86fe7d8bbcb6163e707ec2e5da..62147900daab716b57ea5134e49c65109c04fa83 100644 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py @@ -119,44 +119,51 @@ test_cfg = dict( # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py b/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py index d7a68177ad9fc6cc2ce3d9303df390182dd26184..f91df1fb08775b5a972bd49d3e3d20194d089557 100644 --- a/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py @@ -9,8 +9,8 @@ model = dict( num_modules=1, num_branches=1, block='BOTTLENECK', - num_blocks=(4,), - num_channels=(64,)), + num_blocks=(4, ), + num_channels=(64, )), stage2=dict( num_modules=1, num_branches=2, @@ -73,6 +73,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=4, workers_per_gpu=4, @@ -80,36 +105,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/hrnet/htc_hrnetv2p_w32_20e.py b/configs/hrnet/htc_hrnetv2p_w32_20e.py index 3de5b4fb81bbcd97b2c0fc386d6ac207c0ab7661..b1f9ff567f7e2ecb94b89ee930585f41ddc96f37 100644 --- a/configs/hrnet/htc_hrnetv2p_w32_20e.py +++ b/configs/hrnet/htc_hrnetv2p_w32_20e.py @@ -32,10 +32,7 @@ model = dict( block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(32, 64, 128, 256)))), - neck=dict( - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256), + neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), rpn_head=dict( type='RPNHead', in_channels=256, @@ -65,13 +62,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -83,13 +75,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -101,13 +88,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -232,6 +214,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -239,38 +250,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py index e8dcfe4dab7df5586ba5e4167a83fa4e41c20c89..153ce55e54e5329a4cd7fed284d8463e249cc239 100644 --- a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py @@ -9,8 +9,8 @@ model = dict( num_modules=1, num_branches=1, block='BOTTLENECK', - num_blocks=(4,), - num_channels=(64,)), + num_blocks=(4, ), + num_channels=(64, )), stage2=dict( num_modules=1, num_branches=2, @@ -29,10 +29,7 @@ model = dict( block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(18, 36, 72, 144)))), - neck=dict( - type='HRFPN', - in_channels=[18, 36, 72, 144], - out_channels=256), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256), rpn_head=dict( type='RPNHead', in_channels=256, @@ -133,8 +130,33 @@ test_cfg = dict( # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -142,35 +164,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer # if you use 8 GPUs for training, please change lr to 0.02 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py index 3abf2b2c918deb4cadfce9591ff65d13f481810d..a334ca3fcc30d076cb15c6b5f3dfc5c61d124b21 100644 --- a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py @@ -9,8 +9,8 @@ model = dict( num_modules=1, num_branches=1, block='BOTTLENECK', - num_blocks=(4,), - num_channels=(64,)), + num_blocks=(4, ), + num_channels=(64, )), stage2=dict( num_modules=1, num_branches=2, @@ -29,10 +29,7 @@ model = dict( block='BASIC', num_blocks=(4, 4, 4, 4), num_channels=(32, 64, 128, 256)))), - neck=dict( - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256), + neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), rpn_head=dict( type='RPNHead', in_channels=256, @@ -132,8 +129,33 @@ test_cfg = dict( # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], to_rgb=True) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -141,35 +163,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py b/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py index 4c9581906390864a3bfef5ce87fd279ff35d2532..f06904c531d6c1972a27c5f59bd0329610603914 100644 --- a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py +++ b/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py @@ -54,13 +54,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -72,13 +67,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -90,13 +80,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -221,6 +206,39 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict( + type='Resize', + img_scale=[(1600, 400), (1600, 1400)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=1, workers_per_gpu=1, @@ -228,39 +246,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=[(1600, 400), (1600, 1400)], - multiscale_mode='range', - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_r101_fpn_20e.py b/configs/htc/htc_r101_fpn_20e.py index de6711d6caf757314173783d7c986ebf41bbb81e..36584a3db62f1573b83ad6dd2fc16c47914ef2da 100644 --- a/configs/htc/htc_r101_fpn_20e.py +++ b/configs/htc/htc_r101_fpn_20e.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -213,6 +198,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -220,38 +234,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_r50_fpn_1x.py b/configs/htc/htc_r50_fpn_1x.py index 02da445b353fde27308e228092edf4d073ec34f3..d77d60c7c9da85248cd271379ec268ff36e08ba4 100644 --- a/configs/htc/htc_r50_fpn_1x.py +++ b/configs/htc/htc_r50_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -213,6 +198,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -220,38 +234,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_r50_fpn_20e.py b/configs/htc/htc_r50_fpn_20e.py index 4410a5506c447a0d8bd9cebd3b5dd137c2cd773a..9bc49afa39ce638488ae7bbe5b99042edc3b5abf 100644 --- a/configs/htc/htc_r50_fpn_20e.py +++ b/configs/htc/htc_r50_fpn_20e.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -213,6 +198,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -220,38 +234,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x.py b/configs/htc/htc_without_semantic_r50_fpn_1x.py index 8adbe366a52fe4b91eb4f2ea90a18fb4fa7fbc15..2a4b7771274c952547f8dbc42f2878a1872a6bd8 100644 --- a/configs/htc/htc_without_semantic_r50_fpn_1x.py +++ b/configs/htc/htc_without_semantic_r50_fpn_1x.py @@ -46,13 +46,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -64,13 +59,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -82,13 +72,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -198,6 +183,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -205,35 +215,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py index 2846c572d6ac36c44043156e526b3e807250c44d..830a655813ac58f3812ca7ef6f99d2c7a4ecc614 100644 --- a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py +++ b/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py @@ -215,6 +215,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=1, workers_per_gpu=1, @@ -222,38 +251,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py index 095a81b549c878d41507178e5a68566c9a31d80f..6c5dada91b2976d66b5b0f581ef0a83cbf31739b 100644 --- a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py +++ b/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py @@ -48,13 +48,8 @@ model = dict( target_stds=[0.1, 0.1, 0.2, 0.2], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -66,13 +61,8 @@ model = dict( target_stds=[0.05, 0.05, 0.1, 0.1], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)), + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='SharedFCBBoxHead', num_fcs=2, @@ -84,13 +74,8 @@ model = dict( target_stds=[0.033, 0.033, 0.067, 0.067], reg_class_agnostic=True, loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=False, - loss_weight=1.0), - loss_bbox=dict( - type='SmoothL1Loss', - beta=1.0, - loss_weight=1.0)) + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', @@ -215,6 +200,35 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegResizeFlipPadRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=1, workers_per_gpu=1, @@ -222,38 +236,18 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, seg_prefix=data_root + 'stuffthingmaps/train2017/', - seg_scale_factor=1 / 8, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py index 365c0145b4c5b6de0e14dd4850bbe50d96371111..5855f2395c0272ef29699d1fa45426f0f50ca1cf 100644 --- a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py +++ b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py @@ -75,46 +75,55 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] data = dict( imgs_per_gpu=2, - workers_per_gpu=0, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, proposal_file=data_root + 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl', - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, proposal_file=data_root + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl', - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py index 38af55dcaca794a497aa00e492c12b8b34dd2bc2..fec7052161e6423cff6cc8472c4d243c2eee11d4 100644 --- a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py +++ b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py @@ -120,6 +120,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -127,35 +152,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py index 2ab33dce9ed3206c028e0a1030578f84ebde923c..8e1284a6e597b079a73f2fb8e4055bb4e3bc6a37 100644 --- a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py +++ b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py @@ -120,6 +120,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -127,35 +152,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py index 26172a486263d29c2b664e54a0d6bd81e99c5829..fbd50c0425d258c6fce94726214847da162cbf8b 100644 --- a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py @@ -122,6 +122,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -129,35 +154,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py index 1e0cab929591f0cea539ebcf5f99aa2a8a4b2f49..ab36054481b7d6f3c4b88c8de1f203b5dd966c90 100644 --- a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py +++ b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py @@ -74,6 +74,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -81,36 +106,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/mask_rcnn_r101_fpn_1x.py b/configs/mask_rcnn_r101_fpn_1x.py index 2840be7b392679e3884eb7c7ad72662efe03a257..280808cf92f4a922810b0e298f48951c4e2e60a7 100644 --- a/configs/mask_rcnn_r101_fpn_1x.py +++ b/configs/mask_rcnn_r101_fpn_1x.py @@ -116,6 +116,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -123,35 +148,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/mask_rcnn_r50_caffe_c4_1x.py b/configs/mask_rcnn_r50_caffe_c4_1x.py index f901c51904f5aff605e6b5ce47af18a974dde38a..15fdafb0e95c4ef8c311ed7ce8625e8f61fbb559 100644 --- a/configs/mask_rcnn_r50_caffe_c4_1x.py +++ b/configs/mask_rcnn_r50_caffe_c4_1x.py @@ -120,42 +120,49 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=1, + imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py index 59a84c86a6079c843274b41f099d933f2706cb36..04f6d22ef1ef1df218450faba7d8f07422708c01 100644 --- a/configs/mask_rcnn_r50_fpn_1x.py +++ b/configs/mask_rcnn_r50_fpn_1x.py @@ -116,6 +116,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -123,35 +148,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/mask_rcnn_x101_32x4d_fpn_1x.py b/configs/mask_rcnn_x101_32x4d_fpn_1x.py index 051801b36f4894e63e922671d3148f2976612fdd..74d6823f6beba6807c8ce2dc35c0c7984d85957b 100644 --- a/configs/mask_rcnn_x101_32x4d_fpn_1x.py +++ b/configs/mask_rcnn_x101_32x4d_fpn_1x.py @@ -118,6 +118,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -125,35 +150,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/mask_rcnn_x101_64x4d_fpn_1x.py b/configs/mask_rcnn_x101_64x4d_fpn_1x.py index 434bf6922608c927717a5454fb406e4bc3640bfb..18e4244c8c77d2e452e93e6ae13f004a5d629dbf 100644 --- a/configs/mask_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/mask_rcnn_x101_64x4d_fpn_1x.py @@ -118,6 +118,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -125,35 +150,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py index 682d308a937675bc2a03c59982ab853bd7c39b15..0a523f62333383b30e28db21cc5feca367d87f7a 100644 --- a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py @@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -134,35 +159,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py index 16ebde50bfcc25b79447dc894352a8d48638d40b..6f2a7916ecf4140631576d91e3e645a2d1a92950 100644 --- a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py @@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -134,35 +159,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py index a6b720332cf33263295dcfeeae0d85b793e5166d..009b0ad3a8257b401d3dd5a3b0299fd05481cda4 100644 --- a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py @@ -128,6 +128,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -135,35 +160,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py index 07e72ffdfda661f7406386f43f63c03abe5f180c..b4b533a2b300efce356d7e6542f52ed0b997f8de 100644 --- a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py @@ -102,11 +102,36 @@ dataset_type = 'VOCDataset' data_root = 'data/VOCdevkit/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( - type='RepeatDataset', # to avoid reloading datasets frequently + type='RepeatDataset', times=3, dataset=dict( type=dataset_type, @@ -115,35 +140,17 @@ data = dict( data_root + 'VOC2012/ImageSets/Main/trainval.txt' ], img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - img_scale=(1000, 600), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(1000, 600), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(1000, 600), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/pascal_voc/ssd300_voc.py b/configs/pascal_voc/ssd300_voc.py index 551ecda3cb6ecc658aa9107d5af991977dc8b4af..2a5756d5fe0cc366d531a662391309c28d2c963d 100644 --- a/configs/pascal_voc/ssd300_voc.py +++ b/configs/pascal_voc/ssd300_voc.py @@ -23,6 +23,7 @@ model = dict( anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2))) +# model training and testing settings cudnn_benchmark = True train_cfg = dict( assigner=dict( @@ -42,14 +43,50 @@ test_cfg = dict( min_bbox_size=0, score_thr=0.02, max_per_img=200) -# model training and testing settings # dataset settings dataset_type = 'VOCDataset' data_root = 'data/VOCdevkit/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=4, - workers_per_gpu=2, + imgs_per_gpu=8, + workers_per_gpu=3, train=dict( type='RepeatDataset', times=10, @@ -60,51 +97,17 @@ data = dict( data_root + 'VOC2012/ImageSets/Main/trainval.txt' ], img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True, - test_mode=False, - extra_aug=dict( - photo_metric_distortion=dict( - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - expand=dict( - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - random_crop=dict( - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), - resize_keep_ratio=False)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() diff --git a/configs/pascal_voc/ssd512_voc.py b/configs/pascal_voc/ssd512_voc.py index f01404adfa2825b2e938ec6dc3c584b281868075..4fa7df658a755e67c74c3c454bf2f90e4423a9b5 100644 --- a/configs/pascal_voc/ssd512_voc.py +++ b/configs/pascal_voc/ssd512_voc.py @@ -23,6 +23,7 @@ model = dict( anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2))) +# model training and testing settings cudnn_benchmark = True train_cfg = dict( assigner=dict( @@ -42,14 +43,50 @@ test_cfg = dict( min_bbox_size=0, score_thr=0.02, max_per_img=200) -# model training and testing settings # dataset settings dataset_type = 'VOCDataset' data_root = 'data/VOCdevkit/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(512, 512), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( - imgs_per_gpu=4, - workers_per_gpu=2, + imgs_per_gpu=8, + workers_per_gpu=3, train=dict( type='RepeatDataset', times=10, @@ -60,51 +97,17 @@ data = dict( data_root + 'VOC2012/ImageSets/Main/trainval.txt' ], img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True, - test_mode=False, - extra_aug=dict( - photo_metric_distortion=dict( - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - expand=dict( - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - random_crop=dict( - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), - resize_keep_ratio=False)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', img_prefix=data_root + 'VOC2007/', - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() diff --git a/configs/retinanet_r101_fpn_1x.py b/configs/retinanet_r101_fpn_1x.py index fb68f933d1245966f55fb73a8852dc33f733da10..837207c85d4316879816eef1c7ed2bbf7ff268e1 100644 --- a/configs/retinanet_r101_fpn_1x.py +++ b/configs/retinanet_r101_fpn_1x.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -64,36 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/retinanet_r50_fpn_1x.py b/configs/retinanet_r50_fpn_1x.py index 2e8246823c7a457ec177aa132fbbb6c1668b0c20..8255d5a33902962079c5bfad48facb3db832fa63 100644 --- a/configs/retinanet_r50_fpn_1x.py +++ b/configs/retinanet_r50_fpn_1x.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -64,36 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/retinanet_x101_32x4d_fpn_1x.py b/configs/retinanet_x101_32x4d_fpn_1x.py index 1b0aaaa5b5ca93e5d580b546160a3ef8dc7cadb5..f31555d9dd38aeda87987d29715c8bda522d5c5e 100644 --- a/configs/retinanet_x101_32x4d_fpn_1x.py +++ b/configs/retinanet_x101_32x4d_fpn_1x.py @@ -59,6 +59,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -66,36 +91,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/retinanet_x101_64x4d_fpn_1x.py b/configs/retinanet_x101_64x4d_fpn_1x.py index f5631f70f3bf87ec9fd21b04c8e318f7856b480a..47c87d6b95d0da60f2258cdfb19f5482df662fe4 100644 --- a/configs/retinanet_x101_64x4d_fpn_1x.py +++ b/configs/retinanet_x101_64x4d_fpn_1x.py @@ -59,6 +59,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -66,36 +91,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/rpn_r101_fpn_1x.py b/configs/rpn_r101_fpn_1x.py index f1eecd2116cf2851204ecfd52ebed934e1e7796e..bcda8c19c748a5798bd8b0d36ea7b2a7d926a403 100644 --- a/configs/rpn_r101_fpn_1x.py +++ b/configs/rpn_r101_fpn_1x.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -64,35 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/rpn_r50_caffe_c4_1x.py b/configs/rpn_r50_caffe_c4_1x.py index caf0108be344c8a0a69b84222a456a3e0af795f0..3d7d15b589bda64527ee5705424720352f8b7496 100644 --- a/configs/rpn_r50_caffe_c4_1x.py +++ b/configs/rpn_r50_caffe_c4_1x.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -64,35 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/rpn_r50_fpn_1x.py b/configs/rpn_r50_fpn_1x.py index 96e71e08ea2b4bc6444f7aa20c51de67c0007928..b8928257591ab7f4503bf5fb1ca105ef60e15f0d 100644 --- a/configs/rpn_r50_fpn_1x.py +++ b/configs/rpn_r50_fpn_1x.py @@ -57,6 +57,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -64,35 +89,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/rpn_x101_32x4d_fpn_1x.py b/configs/rpn_x101_32x4d_fpn_1x.py index 2a3faef55293a970b3b857d815f9c5394e55865b..709b5de56bb5fd759c1e1104ae2105ac949ffd45 100644 --- a/configs/rpn_x101_32x4d_fpn_1x.py +++ b/configs/rpn_x101_32x4d_fpn_1x.py @@ -59,6 +59,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -66,35 +91,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/rpn_x101_64x4d_fpn_1x.py b/configs/rpn_x101_64x4d_fpn_1x.py index 182a0b39530b579cbd0c681aaad5072dce9e3a4a..b2946d10aee77b16b3d1067ab068606cceb1c137 100644 --- a/configs/rpn_x101_64x4d_fpn_1x.py +++ b/configs/rpn_x101_64x4d_fpn_1x.py @@ -59,6 +59,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -66,35 +91,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=False, - with_label=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) # runner configs diff --git a/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py b/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py index cbb0e232acab00aad5bfad724ad9cb12e006ee7e..5621d077d3ae7cdeef181e060483321c7580e085 100644 --- a/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py +++ b/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py @@ -107,6 +107,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -114,35 +139,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py b/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py index 97a7ef2a7ce173f0c44a59a2474f96cc89118f72..321619d9825801d181fd2a1b47fe10cefb4aaa95 100644 --- a/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py +++ b/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py @@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -131,35 +156,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0.5, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(1333, 800), - img_norm_cfg=img_norm_cfg, - size_divisor=32, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True)) + pipeline=test_pipeline)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/ssd300_coco.py b/configs/ssd300_coco.py index e48a6e69be192a5ed806b9170931936cc524f712..f34d52e1b2d2a39723bf183c6a140ca2f808df2f 100644 --- a/configs/ssd300_coco.py +++ b/configs/ssd300_coco.py @@ -47,6 +47,43 @@ test_cfg = dict( dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=8, workers_per_gpu=3, @@ -57,51 +94,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True, - test_mode=False, - extra_aug=dict( - photo_metric_distortion=dict( - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - expand=dict( - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - random_crop=dict( - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), - resize_keep_ratio=False)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() diff --git a/configs/ssd512_coco.py b/configs/ssd512_coco.py index 58242631b2474ae4c1b3be6e7875e676e6c698ef..67fe0309c6bd3b05bf151011685dbd40ca732b91 100644 --- a/configs/ssd512_coco.py +++ b/configs/ssd512_coco.py @@ -47,6 +47,43 @@ test_cfg = dict( dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=8, workers_per_gpu=3, @@ -57,51 +94,17 @@ data = dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True, - test_mode=False, - extra_aug=dict( - photo_metric_distortion=dict( - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - expand=dict( - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - random_crop=dict( - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), - resize_keep_ratio=False)), + pipeline=train_pipeline)), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False), + pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', - img_scale=(512, 512), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() diff --git a/configs/wider_face/ssd300_wider_face.py b/configs/wider_face/ssd300_wider_face.py index 53cafc1ef2ea269e3a98208de3e6fefc03a94837..6a4184e1d874aaf1d40eb2410bc5cbd76947cede 100644 --- a/configs/wider_face/ssd300_wider_face.py +++ b/configs/wider_face/ssd300_wider_face.py @@ -23,6 +23,7 @@ model = dict( anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2))) +# model training and testing settings cudnn_benchmark = True train_cfg = dict( assigner=dict( @@ -42,11 +43,47 @@ test_cfg = dict( min_bbox_size=0, score_thr=0.02, max_per_img=200) -# model training and testing settings # dataset settings dataset_type = 'WIDERFaceDataset' data_root = 'data/WIDERFace/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] data = dict( imgs_per_gpu=60, workers_per_gpu=2, @@ -55,57 +92,20 @@ data = dict( times=2, dataset=dict( type=dataset_type, - ann_file=[ - data_root + 'train.txt', - ], - img_prefix=[data_root + 'WIDER_train/'], - img_scale=(300, 300), - min_size=17, # throw away very small faces to improve training, - # because 300x300 is too low resolution to detect them - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0.5, - with_mask=False, - with_crowd=False, - with_label=True, - test_mode=False, - extra_aug=dict( - photo_metric_distortion=dict( - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - expand=dict( - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - random_crop=dict( - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), - resize_keep_ratio=False)), + ann_file=data_root + 'train.txt', + img_prefix=data_root + 'WIDER_train/', + min_size=17, + pipeline=train_pipeline)), val=dict( type=dataset_type, - ann_file=data_root + '/val.txt', + ann_file=data_root + 'val.txt', img_prefix=data_root + 'WIDER_val/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False), + pipeline=test_pipeline), test=dict( type=dataset_type, - ann_file=data_root + '/val.txt', + ann_file=data_root + 'val.txt', img_prefix=data_root + 'WIDER_val/', - img_scale=(300, 300), - img_norm_cfg=img_norm_cfg, - size_divisor=None, - flip_ratio=0, - with_mask=False, - with_label=False, - test_mode=True, - resize_keep_ratio=False)) + pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() @@ -122,7 +122,7 @@ log_config = dict( interval=1, hooks=[ dict(type='TextLoggerHook'), - dict(type='TensorboardLoggerHook') + # dict(type='TensorboardLoggerHook') ]) # yapf:enable # runtime settings diff --git a/demo/data_pipeline.png b/demo/data_pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..6ac3fee2bc62753681b2e42a9705dd7eefbee443 Binary files /dev/null and b/demo/data_pipeline.png differ diff --git a/docs/DATA_PIPELINE.md b/docs/DATA_PIPELINE.md new file mode 100644 index 0000000000000000000000000000000000000000..413463abd8e0a45163506aa4ea37a703ec81fcee --- /dev/null +++ b/docs/DATA_PIPELINE.md @@ -0,0 +1,115 @@ +## Data preparation pipeline + +The data preparation pipeline and the dataset is decomposed. Usually a dataset +defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict. +A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform. + +We present a classical pipeline in the following figure. The blue blocks are pipeline operations. With the pipeline going on, each operator can add new keys (marked as green) to the result dict or update the existing keys (marked as orange). + + +The operations are categorized into data loading, pre-processing, formatting and test-time augmentation. + +Here is an pipeline example for Faster R-CNN. +```python +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +For each operation, we list the related dict fields that are added/updated/removed. + +### Data loading + +`LoadImageFromFile` +- add: img, img_shape, ori_shape + +`LoadAnnotations` +- add: gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg, bbox_fields, mask_fields + +`LoadProposals` +- add: proposals + +### Pre-processing + +`Resize` +- add: scale, scale_idx, pad_shape, scale_factor, keep_ratio +- update: img, img_shape, *bbox_fields, *mask_fields + +`RandomFlip` +- add: flip +- update: img, *bbox_fields, *mask_fields + +`Pad` +- add: pad_fixed_size, pad_size_divisor +- update: img, pad_shape, *mask_fields + +`RandomCrop` +- update: img, pad_shape, gt_bboxes, gt_labels, gt_masks, *bbox_fields + +`Normalize` +- add: img_norm_cfg +- update: img + +`SegResizeFlipPadRescale` +- update: gt_semantic_seg + +`PhotoMetricDistortion` +- update: img + +`Expand` +- update: img, gt_bboxes + +`MinIoURandomCrop` +- update: img, gt_bboxes, gt_labels + +`Corrupt` +- update: img + +### Formatting + +`ToTensor` +- update: specified by `keys`. + +`ImageToTensor` +- update: specified by `keys`. + +`Transpose` +- update: specified by `keys`. + +`ToDataContainer` +- update: specified by `fields`. + +`DefaultFormatBundle` +- update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg + +`Collect` +- add: img_meta (the keys of img_meta is specified by `meta_keys`) +- remove: all other keys except for those specified by `keys` + +### Test time augmentation + +`MultiScaleFlipAug` \ No newline at end of file diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index b603e19e35ffe4d997de37c7bdca00568526e636..5977c7131b5bad829c7d51ee959004d8024d5240 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -89,12 +89,10 @@ model = init_detector(config_file, checkpoint_file, device='cuda:0') # test a single image and show the results img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once result = inference_detector(model, img) +# visualize the results in a new window show_result(img, result, model.CLASSES) - -# test a list of images and write the results to image files -imgs = ['test1.jpg', 'test2.jpg'] -for i, result in enumerate(inference_detector(model, imgs)): - show_result(imgs[i], result, model.CLASSES, out_file='result_{}.jpg'.format(i)) +# or save the visualization results to image files +show_result(img, result, model.CLASSES, out_file='result.jpg') # test a video and show the results video = mmcv.VideoReader('video.mp4') diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py index 85bfd2bfbdcfb613864d2f99fae7ff393ddab1e3..67b2b241404251dc966a9e9322b3ee6975c35d4c 100644 --- a/mmdet/apis/inference.py +++ b/mmdet/apis/inference.py @@ -5,11 +5,11 @@ import mmcv import numpy as np import pycocotools.mask as maskUtils import torch +from mmcv.parallel import collate, scatter from mmcv.runner import load_checkpoint from mmdet.core import get_classes -from mmdet.datasets import to_tensor -from mmdet.datasets.transforms import ImageTransform +from mmdet.datasets.pipelines import Compose from mmdet.models import build_detector @@ -46,7 +46,16 @@ def init_detector(config, checkpoint=None, device='cuda:0'): return model -def inference_detector(model, imgs): +class LoadImage(object): + + def __call__(self, results): + img = mmcv.imread(results['img']) + results['img'] = img + results['ori_shape'] = img.shape + return results + + +def inference_detector(model, img): """Inference image(s) with the detector. Args: @@ -59,45 +68,19 @@ def inference_detector(model, imgs): detection results directly. """ cfg = model.cfg - img_transform = ImageTransform( - size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) - device = next(model.parameters()).device # model device - if not isinstance(imgs, list): - return _inference_single(model, imgs, img_transform, device) - else: - return _inference_generator(model, imgs, img_transform, device) - - -def _prepare_data(img, img_transform, cfg, device): - ori_shape = img.shape - img, img_shape, pad_shape, scale_factor = img_transform( - img, - scale=cfg.data.test.img_scale, - keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) - img = to_tensor(img).to(device).unsqueeze(0) - img_meta = [ - dict( - ori_shape=ori_shape, - img_shape=img_shape, - pad_shape=pad_shape, - scale_factor=scale_factor, - flip=False) - ] - return dict(img=[img], img_meta=[img_meta]) - - -def _inference_single(model, img, img_transform, device): - img = mmcv.imread(img) - data = _prepare_data(img, img_transform, model.cfg, device) + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = scatter(collate([data], samples_per_gpu=1), [device])[0] + # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) - return result - -def _inference_generator(model, imgs, img_transform, device): - for img in imgs: - yield _inference_single(model, img, img_transform, device) + return result # TODO: merge this method with the one in BaseDetector diff --git a/mmdet/core/evaluation/eval_hooks.py b/mmdet/core/evaluation/eval_hooks.py index c37f7bd0291474d1411cc4e5f0799bf6959385f5..6cf87ffe40b6d7fc13e6278807e6a6e8a6af7b27 100644 --- a/mmdet/core/evaluation/eval_hooks.py +++ b/mmdet/core/evaluation/eval_hooks.py @@ -78,12 +78,12 @@ class DistEvalmAPHook(DistEvalHook): def evaluate(self, runner, results): gt_bboxes = [] gt_labels = [] - gt_ignore = [] if self.dataset.with_crowd else None + gt_ignore = [] for i in range(len(self.dataset)): ann = self.dataset.get_ann_info(i) bboxes = ann['bboxes'] labels = ann['labels'] - if gt_ignore is not None: + if 'bboxes_ignore' in ann: ignore = np.concatenate([ np.zeros(bboxes.shape[0], dtype=np.bool), np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) @@ -93,6 +93,8 @@ class DistEvalmAPHook(DistEvalHook): labels = np.concatenate([labels, ann['labels_ignore']]) gt_bboxes.append(bboxes) gt_labels.append(labels) + if not gt_ignore: + gt_ignore = None # If the dataset is VOC2007, then use 11 points mAP evaluation. if hasattr(self.dataset, 'year') and self.dataset.year == 2007: ds_name = 'voc07' diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index 11b7569ac2e67af993c6faa939b75f4aa64136bc..8de55d26b02760378c3f1ffd63201ba69c0d6785 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -6,7 +6,6 @@ from .dataset_wrappers import ConcatDataset, RepeatDataset from .extra_aug import ExtraAugmentation from .loader import DistributedGroupSampler, GroupSampler, build_dataloader from .registry import DATASETS -from .utils import random_scale, show_ann, to_tensor from .voc import VOCDataset from .wider_face import WIDERFaceDataset from .xml_style import XMLDataset @@ -14,7 +13,6 @@ from .xml_style import XMLDataset __all__ = [ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', - 'build_dataloader', 'to_tensor', 'random_scale', 'show_ann', - 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', 'WIDERFaceDataset', - 'DATASETS', 'build_dataset' + 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', + 'WIDERFaceDataset', 'DATASETS', 'build_dataset' ] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index 46ef7099a8ff39cb3d4dd2cc9e410af955922cf7..23c9120e4917e9714b73068bb4e19abcc94d9c3a 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -42,7 +42,7 @@ class CocoDataset(CustomDataset): img_id = self.img_infos[idx]['id'] ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_info = self.coco.loadAnns(ann_ids) - return self._parse_ann_info(ann_info, self.with_mask) + return self._parse_ann_info(self.img_infos[idx], ann_info) def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" @@ -55,7 +55,7 @@ class CocoDataset(CustomDataset): valid_inds.append(i) return valid_inds - def _parse_ann_info(self, ann_info, with_mask=True): + def _parse_ann_info(self, img_info, ann_info): """Parse bbox and mask annotation. Args: @@ -64,19 +64,14 @@ class CocoDataset(CustomDataset): Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, - labels, masks, mask_polys, poly_lens. + labels, masks, seg_map. "masks" are raw annotations and not + decoded into binary masks. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] - # Two formats are provided. - # 1. mask: a binary map of the same size of the image. - # 2. polys: each mask consists of one or several polys, each poly is a - # list of float. - if with_mask: - gt_masks = [] - gt_mask_polys = [] - gt_poly_lens = [] + gt_masks_ann = [] + for i, ann in enumerate(ann_info): if ann.get('ignore', False): continue @@ -84,19 +79,13 @@ class CocoDataset(CustomDataset): if ann['area'] <= 0 or w < 1 or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] - if ann['iscrowd']: + if ann.get('iscrowd', False): gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann['category_id']]) - if with_mask: - gt_masks.append(self.coco.annToMask(ann)) - mask_polys = [ - p for p in ann['segmentation'] if len(p) >= 6 - ] # valid polygons have >= 3 points (6 coordinates) - poly_lens = [len(p) for p in mask_polys] - gt_mask_polys.append(mask_polys) - gt_poly_lens.extend(poly_lens) + gt_masks_ann.append(ann['segmentation']) + if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) @@ -109,12 +98,13 @@ class CocoDataset(CustomDataset): else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + seg_map = img_info['filename'].replace('jpg', 'png') + ann = dict( - bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) + bboxes=gt_bboxes, + labels=gt_labels, + bboxes_ignore=gt_bboxes_ignore, + masks=gt_masks_ann, + seg_map=seg_map) - if with_mask: - ann['masks'] = gt_masks - # poly format is not used in the current implementation - ann['mask_polys'] = gt_mask_polys - ann['poly_lens'] = gt_poly_lens return ann diff --git a/mmdet/datasets/custom.py b/mmdet/datasets/custom.py index aed2bf92749677043412c8b36d7eaaded89f71da..d596943f5dca668f99a61aecce535b2c944a915e 100644 --- a/mmdet/datasets/custom.py +++ b/mmdet/datasets/custom.py @@ -1,17 +1,11 @@ import os.path as osp -import warnings import mmcv import numpy as np -from imagecorruptions import corrupt -from mmcv.parallel import DataContainer as DC from torch.utils.data import Dataset -from .extra_aug import ExtraAugmentation +from .pipelines import Compose from .registry import DATASETS -from .transforms import (BboxTransform, ImageTransform, MaskTransform, - Numpy2Tensor, SegMapTransform) -from .utils import random_scale, to_tensor @DATASETS.register_module @@ -27,7 +21,7 @@ class CustomDataset(Dataset): 'ann': { 'bboxes': <np.ndarray> (n, 4), 'labels': <np.ndarray> (n, ), - 'bboxes_ignore': <np.ndarray> (k, 4), + 'bboxes_ignore': <np.ndarray> (k, 4), (optional field) 'labels_ignore': <np.ndarray> (k, 4) (optional field) } }, @@ -41,33 +35,35 @@ class CustomDataset(Dataset): def __init__(self, ann_file, - img_prefix, - img_scale, - img_norm_cfg, - multiscale_mode='value', - size_divisor=None, - proposal_file=None, - num_max_proposals=1000, - flip_ratio=0, - with_mask=True, - with_crowd=True, - with_label=True, - with_semantic_seg=False, + pipeline, + data_root=None, + img_prefix=None, seg_prefix=None, - seg_scale_factor=1, - extra_aug=None, - resize_keep_ratio=True, - corruption=None, - corruption_severity=1, - skip_img_without_anno=True, + proposal_file=None, test_mode=False): - # prefix of images path + self.ann_file = ann_file + self.data_root = data_root self.img_prefix = img_prefix + self.seg_prefix = seg_prefix + self.proposal_file = proposal_file + self.test_mode = test_mode + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.ann_file): + self.ann_file = osp.join(self.data_root, self.ann_file) + if not (self.img_prefix is None or osp.isabs(self.img_prefix)): + self.img_prefix = osp.join(self.data_root, self.img_prefix) + if not (self.seg_prefix is None or osp.isabs(self.seg_prefix)): + self.seg_prefix = osp.join(self.data_root, self.seg_prefix) + if not (self.proposal_file is None + or osp.isabs(self.proposal_file)): + self.proposal_file = osp.join(self.data_root, + self.proposal_file) # load annotations (and proposals) - self.img_infos = self.load_annotations(ann_file) - if proposal_file is not None: - self.proposals = self.load_proposals(proposal_file) + self.img_infos = self.load_annotations(self.ann_file) + if self.proposal_file is not None: + self.proposals = self.load_proposals(self.proposal_file) else: self.proposals = None # filter images with no annotation during training @@ -76,67 +72,11 @@ class CustomDataset(Dataset): self.img_infos = [self.img_infos[i] for i in valid_inds] if self.proposals is not None: self.proposals = [self.proposals[i] for i in valid_inds] - - # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] - self.img_scales = img_scale if isinstance(img_scale, - list) else [img_scale] - assert mmcv.is_list_of(self.img_scales, tuple) - # normalization configs - self.img_norm_cfg = img_norm_cfg - - # multi-scale mode (only applicable for multi-scale training) - self.multiscale_mode = multiscale_mode - assert multiscale_mode in ['value', 'range'] - - # max proposals per image - self.num_max_proposals = num_max_proposals - # flip ratio - self.flip_ratio = flip_ratio - assert flip_ratio >= 0 and flip_ratio <= 1 - # padding border to ensure the image size can be divided by - # size_divisor (used for FPN) - self.size_divisor = size_divisor - - # with mask or not (reserved field, takes no effect) - self.with_mask = with_mask - # some datasets provide bbox annotations as ignore/crowd/difficult, - # if `with_crowd` is True, then these info is returned. - self.with_crowd = with_crowd - # with label is False for RPN - self.with_label = with_label - # with semantic segmentation (stuff) annotation or not - self.with_seg = with_semantic_seg - # prefix of semantic segmentation map path - self.seg_prefix = seg_prefix - # rescale factor for segmentation maps - self.seg_scale_factor = seg_scale_factor - # in test mode or not - self.test_mode = test_mode - # set group flag for the sampler if not self.test_mode: self._set_group_flag() - # transforms - self.img_transform = ImageTransform( - size_divisor=self.size_divisor, **self.img_norm_cfg) - self.bbox_transform = BboxTransform() - self.mask_transform = MaskTransform() - self.seg_transform = SegMapTransform(self.size_divisor) - self.numpy2tensor = Numpy2Tensor() - - # if use extra augmentation - if extra_aug is not None: - self.extra_aug = ExtraAugmentation(**extra_aug) - else: - self.extra_aug = None - - # image rescale if keep ratio - self.resize_keep_ratio = resize_keep_ratio - self.skip_img_without_anno = skip_img_without_anno - - # corruptions - self.corruption = corruption - self.corruption_severity = corruption_severity + # processing pipeline + self.pipeline = Compose(pipeline) def __len__(self): return len(self.img_infos) @@ -150,6 +90,13 @@ class CustomDataset(Dataset): def get_ann_info(self, idx): return self.img_infos[idx]['ann'] + def pre_pipeline(self, results): + results['img_prefix'] = self.img_prefix + results['seg_prefix'] = self.seg_prefix + results['proposal_file'] = self.proposal_file + results['bbox_fields'] = [] + results['mask_fields'] = [] + def _filter_imgs(self, min_size=32): """Filter images too small.""" valid_inds = [] @@ -186,164 +133,17 @@ class CustomDataset(Dataset): def prepare_train_img(self, idx): img_info = self.img_infos[idx] - # load image - img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) - # corruption - if self.corruption is not None: - img = corrupt( - img, - severity=self.corruption_severity, - corruption_name=self.corruption) - # load proposals if necessary - if self.proposals is not None: - proposals = self.proposals[idx][:self.num_max_proposals] - # TODO: Handle empty proposals properly. Currently images with - # no proposals are just ignored, but they can be used for - # training in concept. - if len(proposals) == 0: - return None - if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): - raise AssertionError( - 'proposals should have shapes (n, 4) or (n, 5), ' - 'but found {}'.format(proposals.shape)) - if proposals.shape[1] == 5: - scores = proposals[:, 4, None] - proposals = proposals[:, :4] - else: - scores = None - - ann = self.get_ann_info(idx) - gt_bboxes = ann['bboxes'] - gt_labels = ann['labels'] - if self.with_crowd: - gt_bboxes_ignore = ann['bboxes_ignore'] - - # skip the image if there is no valid gt bbox - if len(gt_bboxes) == 0 and self.skip_img_without_anno: - warnings.warn('Skip the image "%s" that has no valid gt bbox' % - osp.join(self.img_prefix, img_info['filename'])) - return None - - # extra augmentation - if self.extra_aug is not None: - img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, - gt_labels) - - # apply transforms - flip = True if np.random.rand() < self.flip_ratio else False - # randomly sample a scale - img_scale = random_scale(self.img_scales, self.multiscale_mode) - img, img_shape, pad_shape, scale_factor = self.img_transform( - img, img_scale, flip, keep_ratio=self.resize_keep_ratio) - img = img.copy() - if self.with_seg: - gt_seg = mmcv.imread( - osp.join(self.seg_prefix, - img_info['filename'].replace('jpg', 'png')), - flag='unchanged') - gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip) - gt_seg = mmcv.imrescale( - gt_seg, self.seg_scale_factor, interpolation='nearest') - gt_seg = gt_seg[None, ...] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) if self.proposals is not None: - proposals = self.bbox_transform(proposals, img_shape, scale_factor, - flip) - proposals = np.hstack([proposals, scores - ]) if scores is not None else proposals - gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, - flip) - if self.with_crowd: - gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, - scale_factor, flip) - if self.with_mask: - gt_masks = self.mask_transform(ann['masks'], pad_shape, - scale_factor, flip) - - ori_shape = (img_info['height'], img_info['width'], 3) - img_meta = dict( - ori_shape=ori_shape, - img_shape=img_shape, - pad_shape=pad_shape, - scale_factor=scale_factor, - flip=flip) - - data = dict( - img=DC(to_tensor(img), stack=True), - img_meta=DC(img_meta, cpu_only=True), - gt_bboxes=DC(to_tensor(gt_bboxes))) - if self.proposals is not None: - data['proposals'] = DC(to_tensor(proposals)) - if self.with_label: - data['gt_labels'] = DC(to_tensor(gt_labels)) - if self.with_crowd: - data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) - if self.with_mask: - data['gt_masks'] = DC(gt_masks, cpu_only=True) - if self.with_seg: - data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True) - return data + results['proposals'] = self.proposals[idx] + self.pre_pipeline(results) + return self.pipeline(results) def prepare_test_img(self, idx): - """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] - img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) - # corruption - if self.corruption is not None: - img = corrupt( - img, - severity=self.corruption_severity, - corruption_name=self.corruption) - # load proposals if necessary - if self.proposals is not None: - proposal = self.proposals[idx][:self.num_max_proposals] - if not (proposal.shape[1] == 4 or proposal.shape[1] == 5): - raise AssertionError( - 'proposals should have shapes (n, 4) or (n, 5), ' - 'but found {}'.format(proposal.shape)) - else: - proposal = None - - def prepare_single(img, scale, flip, proposal=None): - _img, img_shape, pad_shape, scale_factor = self.img_transform( - img, scale, flip, keep_ratio=self.resize_keep_ratio) - _img = to_tensor(_img) - _img_meta = dict( - ori_shape=(img_info['height'], img_info['width'], 3), - img_shape=img_shape, - pad_shape=pad_shape, - scale_factor=scale_factor, - flip=flip) - if proposal is not None: - if proposal.shape[1] == 5: - score = proposal[:, 4, None] - proposal = proposal[:, :4] - else: - score = None - _proposal = self.bbox_transform(proposal, img_shape, - scale_factor, flip) - _proposal = np.hstack([_proposal, score - ]) if score is not None else _proposal - _proposal = to_tensor(_proposal) - else: - _proposal = None - return _img, _img_meta, _proposal - - imgs = [] - img_metas = [] - proposals = [] - for scale in self.img_scales: - _img, _img_meta, _proposal = prepare_single( - img, scale, False, proposal) - imgs.append(_img) - img_metas.append(DC(_img_meta, cpu_only=True)) - proposals.append(_proposal) - if self.flip_ratio > 0: - _img, _img_meta, _proposal = prepare_single( - img, scale, True, proposal) - imgs.append(_img) - img_metas.append(DC(_img_meta, cpu_only=True)) - proposals.append(_proposal) - data = dict(img=imgs, img_meta=img_metas) + results = dict(img_info=img_info) if self.proposals is not None: - data['proposals'] = proposals - return data + results['proposals'] = self.proposals[idx] + self.pre_pipeline(results) + return self.pipeline(results) diff --git a/mmdet/datasets/pipelines/__init__.py b/mmdet/datasets/pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dd5919e0517fbfdb93fb163a75011f331158f3e1 --- /dev/null +++ b/mmdet/datasets/pipelines/__init__.py @@ -0,0 +1,16 @@ +from .compose import Compose +from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, + Transpose, to_tensor) +from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals +from .test_aug import MultiScaleFlipAug +from .transforms import (Expand, MinIoURandomCrop, Normalize, Pad, + PhotoMetricDistortion, RandomCrop, RandomFlip, Resize, + SegResizeFlipPadRescale) + +__all__ = [ + 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', + 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', + 'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', + 'RandomCrop', 'Normalize', 'SegResizeFlipPadRescale', 'MinIoURandomCrop', + 'Expand', 'PhotoMetricDistortion' +] diff --git a/mmdet/datasets/pipelines/compose.py b/mmdet/datasets/pipelines/compose.py new file mode 100644 index 0000000000000000000000000000000000000000..f160eed97e383845bcc4822dfbb102f01eecfefe --- /dev/null +++ b/mmdet/datasets/pipelines/compose.py @@ -0,0 +1,35 @@ +import collections + +from mmdet.utils import build_from_cfg +from ..registry import PIPELINES + + +@PIPELINES.register_module +class Compose(object): + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string diff --git a/mmdet/datasets/pipelines/formating.py b/mmdet/datasets/pipelines/formating.py new file mode 100644 index 0000000000000000000000000000000000000000..f5357f742f9531262bb4382857715f3b26a79d6a --- /dev/null +++ b/mmdet/datasets/pipelines/formating.py @@ -0,0 +1,157 @@ +from collections.abc import Sequence + +import mmcv +import numpy as np +import torch +from mmcv.parallel import DataContainer as DC + +from ..registry import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError('type {} cannot be converted to tensor.'.format( + type(data))) + + +@PIPELINES.register_module +class ToTensor(object): + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + '(keys={})'.format(self.keys) + + +@PIPELINES.register_module +class ImageToTensor(object): + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + for key in self.keys: + results[key] = to_tensor(results[key].transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + '(keys={})'.format(self.keys) + + +@PIPELINES.register_module +class Transpose(object): + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + '(keys={}, order={})'.format( + self.keys, self.order) + + +@PIPELINES.register_module +class ToDataContainer(object): + + def __init__(self, + fields=(dict(key='img', stack=True), dict(key='gt_bboxes'), + dict(key='gt_labels'))): + self.fields = fields + + def __call__(self, results): + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + '(fields={})'.format(self.fields) + + +@PIPELINES.register_module +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img", + "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg". + These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - proposals: (1)to tensor, (2)to DataContainer + - gt_bboxes: (1)to tensor, (2)to DataContainer + - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer + - gt_labels: (1)to tensor, (2)to DataContainer + - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + if 'img' in results: + img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']: + if key not in results: + continue + results[key] = DC(to_tensor(results[key])) + if 'gt_masks' in results: + results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) + if 'gt_semantic_seg' in results: + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module +class Collect(object): + + def __init__(self, + keys, + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg')): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_meta'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + '(keys={}, meta_keys={})'.format( + self.keys, self.meta_keys) diff --git a/mmdet/datasets/pipelines/loading.py b/mmdet/datasets/pipelines/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..cb5ce387e0d6eaa762a6b3eb72671dc05a804b63 --- /dev/null +++ b/mmdet/datasets/pipelines/loading.py @@ -0,0 +1,145 @@ +import os.path as osp +import warnings + +import mmcv +import numpy as np +import pycocotools.mask as maskUtils + +from ..registry import PIPELINES + + +@PIPELINES.register_module +class LoadImageFromFile(object): + + def __init__(self, to_float32=False): + self.to_float32 = to_float32 + + def __call__(self, results): + filename = osp.join(results['img_prefix'], + results['img_info']['filename']) + img = mmcv.imread(filename) + if self.to_float32: + img = img.astype(np.float32) + results['filename'] = filename + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + def __repr__(self): + return self.__class__.__name__ + '(to_float32={})'.format( + self.to_float32) + + +@PIPELINES.register_module +class LoadAnnotations(object): + + def __init__(self, + with_bbox=True, + with_label=True, + with_mask=False, + with_seg=False, + poly2mask=True, + skip_img_without_anno=True): + self.with_bbox = with_bbox + self.with_label = with_label + self.with_mask = with_mask + self.with_seg = with_seg + self.poly2mask = poly2mask + self.skip_img_without_anno = skip_img_without_anno + + def _load_bboxes(self, results): + ann_info = results['ann_info'] + results['gt_bboxes'] = ann_info['bboxes'] + if len(results['gt_bboxes']) == 0 and self.skip_img_without_anno: + file_path = osp.join(results['img_prefix'], + results['img_info']['filename']) + warnings.warn( + 'Skip the image "{}" that has no valid gt bbox'.format( + file_path)) + return None + results['gt_bboxes_ignore'] = ann_info.get('bboxes_ignore', None) + results['bbox_fields'].extend(['gt_bboxes', 'gt_bboxes_ignore']) + return results + + def _load_labels(self, results): + results['gt_labels'] = results['ann_info']['labels'] + return results + + def _poly2mask(self, mask_ann, img_h, img_w): + if isinstance(mask_ann, list): + # polygon -- a single object might consist of multiple parts + # we merge all parts into one mask rle code + rles = maskUtils.frPyObjects(mask_ann, img_h, img_w) + rle = maskUtils.merge(rles) + elif isinstance(mask_ann['counts'], list): + # uncompressed RLE + rle = maskUtils.frPyObjects(mask_ann, img_h, img_w) + else: + # rle + rle = mask_ann + mask = maskUtils.decode(rle) + return mask + + def _load_masks(self, results): + h, w = results['img_info']['height'], results['img_info']['width'] + gt_masks = results['ann_info']['masks'] + if self.poly2mask: + gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks] + results['gt_masks'] = gt_masks + results['mask_fields'].append('gt_masks') + return results + + def _load_semantic_seg(self, results): + results['gt_semantic_seg'] = mmcv.imread( + osp.join(results['seg_prefix'], results['ann_info']['seg_map']), + flag='unchanged').squeeze() + return results + + def __call__(self, results): + if self.with_bbox: + results = self._load_bboxes(results) + if results is None: + return None + if self.with_label: + results = self._load_labels(results) + if self.with_mask: + results = self._load_masks(results) + if self.with_seg: + results = self._load_semantic_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ('(with_bbox={}, with_label={}, with_mask={},' + ' with_seg={})').format(self.with_bbox, self.with_label, + self.with_mask, self.with_seg) + return repr_str + + +@PIPELINES.register_module +class LoadProposals(object): + + def __init__(self, num_max_proposals=None): + self.num_max_proposals = num_max_proposals + + def __call__(self, results): + proposals = results['proposals'] + if proposals.shape[1] not in (4, 5): + raise AssertionError( + 'proposals should have shapes (n, 4) or (n, 5), ' + 'but found {}'.format(proposals.shape)) + proposals = proposals[:, :4] + + if self.num_max_proposals is not None: + proposals = proposals[:self.num_max_proposals] + + if len(proposals) == 0: + proposals = np.array([0, 0, 0, 0], dtype=np.float32) + results['proposals'] = proposals + results['bbox_fields'].append('proposals') + return results + + def __repr__(self): + return self.__class__.__name__ + '(num_max_proposals={})'.format( + self.num_max_proposals) diff --git a/mmdet/datasets/pipelines/test_aug.py b/mmdet/datasets/pipelines/test_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..b5d21807529bdda7f5a3f575cfd245c580cf7592 --- /dev/null +++ b/mmdet/datasets/pipelines/test_aug.py @@ -0,0 +1,38 @@ +import mmcv + +from ..registry import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module +class MultiScaleFlipAug(object): + + def __init__(self, transforms, img_scale, flip=False): + self.transforms = Compose(transforms) + self.img_scale = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + self.flip = flip + + def __call__(self, results): + aug_data = [] + flip_aug = [False, True] if self.flip else [False] + for scale in self.img_scale: + for flip in flip_aug: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(transforms={}, img_scale={}, flip={})'.format( + self.transforms, self.img_scale, self.flip) + return repr_str diff --git a/mmdet/datasets/pipelines/transforms.py b/mmdet/datasets/pipelines/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..60ee42de69cd85845ea9ac8731a8d12bda7db715 --- /dev/null +++ b/mmdet/datasets/pipelines/transforms.py @@ -0,0 +1,634 @@ +import mmcv +import numpy as np +from imagecorruptions import corrupt +from numpy import random + +from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps +from ..registry import PIPELINES + + +@PIPELINES.register_module +class Resize(object): + """Resize images & bbox & mask. + + This transform resizes the input image to some scale. Bboxes and masks are + then resized with the same scale factor. If the input dict contains the key + "scale", then the scale in the input dict is used, otherwise the specified + scale in the init method is used. + + `img_scale` can either be a tuple (single-scale) or a list of tuple + (multi-scale). There are 3 multiscale modes: + - `ratio_range` is not None: randomly sample a ratio from the ratio range + and multiply it with the image scale. + - `ratio_range` is None and `multiscale_mode` == "range": randomly sample a + scale from the a range. + - `ratio_range` is None and `multiscale_mode` == "value": randomly sample a + scale from multiple scales. + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + multiscale_mode (str): Either "range" or "value". + ratio_range (tuple[float]): (min_ratio, max_ratio) + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given a scale and a range of image ratio + assert len(self.img_scale) == 1 + else: + # mode 2: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + + @staticmethod + def random_select(img_scales): + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + if self.ratio_range is not None: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + if self.keep_ratio: + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_bboxes(self, results): + img_shape = results['img_shape'] + for key in results.get('bbox_fields', []): + bboxes = results[key] * results['scale_factor'] + bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1) + bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1) + results[key] = bboxes + + def _resize_masks(self, results): + for key in results.get('mask_fields', []): + if results[key] is None: + continue + if self.keep_ratio: + masks = [ + mmcv.imrescale( + mask, results['scale_factor'], interpolation='nearest') + for mask in results[key] + ] + else: + mask_size = (results['img_shape'][1], results['img_shape'][0]) + masks = [ + mmcv.imresize(mask, mask_size, interpolation='nearest') + for mask in results[key] + ] + results[key] = masks + + def __call__(self, results): + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_bboxes(results) + self._resize_masks(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ('(img_scale={}, multiscale_mode={}, ratio_range={}, ' + 'keep_ratio={})').format(self.img_scale, + self.multiscale_mode, + self.ratio_range, + self.keep_ratio) + return repr_str + + +@PIPELINES.register_module +class RandomFlip(object): + """Flip the image & bbox & mask. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + flip_ratio (float, optional): The flipping probability. + """ + + def __init__(self, flip_ratio=None): + self.flip_ratio = flip_ratio + if flip_ratio is not None: + assert flip_ratio >= 0 and flip_ratio <= 1 + + def bbox_flip(self, bboxes, img_shape): + """Flip bboxes horizontally. + + Args: + bboxes(ndarray): shape (..., 4*k) + img_shape(tuple): (height, width) + """ + assert bboxes.shape[-1] % 4 == 0 + w = img_shape[1] + flipped = bboxes.copy() + flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 + flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 + return flipped + + def __call__(self, results): + if 'flip' not in results: + flip = True if np.random.rand() < self.flip_ratio else False + results['flip'] = flip + if results['flip']: + # flip image + results['img'] = mmcv.imflip(results['img']) + # flip bboxes + for key in results.get('bbox_fields', []): + results[key] = self.bbox_flip(results[key], + results['img_shape']) + # flip masks + for key in results.get('mask_fields', []): + results[key] = [mask[:, ::-1] for mask in results[key]] + return results + + def __repr__(self): + return self.__class__.__name__ + '(flip_ratio={})'.format( + self.flip_ratio) + + +@PIPELINES.register_module +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value, 0 by default. + """ + + def __init__(self, size=None, size_divisor=None, pad_val=0): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + if self.size is not None: + padded_img = mmcv.impad(results['img'], self.size) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple( + results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_masks(self, results): + pad_shape = results['pad_shape'][:2] + for key in results.get('mask_fields', []): + padded_masks = [ + mmcv.impad(mask, pad_shape, pad_val=self.pad_val) + for mask in results[key] + ] + results[key] = np.stack(padded_masks, axis=0) + + def __call__(self, results): + self._pad_img(results) + self._pad_masks(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(size={}, size_divisor={}, pad_val={})'.format( + self.size, self.size_divisor, self.pad_val) + return repr_str + + +@PIPELINES.register_module +class Normalize(object): + """Normalize the image. + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, + self.to_rgb) + results['img_norm_cfg'] = dict( + mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(mean={}, std={}, to_rgb={})'.format( + self.mean, self.std, self.to_rgb) + return repr_str + + +@PIPELINES.register_module +class RandomCrop(object): + """Random crop the image & bboxes. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + """ + + def __init__(self, crop_size): + self.crop_size = crop_size + + def __call__(self, results): + img = results['img'] + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + # crop the image + img = img[crop_y1:crop_y2, crop_x1:crop_x2, :] + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop bboxes accordingly and clip to the image boundary + for key in results.get('bbox_fields', []): + bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h], + dtype=np.float32) + bboxes = results[key] - bbox_offset + bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1) + bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1) + results[key] = bboxes + + # filter out the gt bboxes that are completely cropped + if 'gt_bboxes' in results: + gt_bboxes = results['gt_bboxes'] + valid_inds = (gt_bboxes[:, 2] > gt_bboxes[:, 0]) & ( + gt_bboxes[:, 3] > gt_bboxes[:, 1]) + # if no gt bbox remains after cropping, just skip this image + if not np.any(valid_inds): + return None + results['gt_bboxes'] = gt_bboxes[valid_inds, :] + if 'gt_labels' in results: + results['gt_labels'] = results['gt_labels'][valid_inds] + + # filter and crop the masks + if 'gt_masks' in results: + valid_gt_masks = [] + for i in valid_inds: + gt_mask = results['gt_masks'][i][crop_y1:crop_y2, crop_x1: + crop_x2] + valid_gt_masks.append(gt_mask) + results['gt_masks'] = valid_gt_masks + + return results + + def __repr__(self): + return self.__class__.__name__ + '(crop_size={})'.format( + self.crop_size) + + +@PIPELINES.register_module +class SegResizeFlipPadRescale(object): + """A sequential transforms to semantic segmentation maps. + + The same pipeline as input images is applied to the semantic segmentation + map, and finally rescale it by some scale factor. The transforms include: + 1. resize + 2. flip + 3. pad + 4. rescale (so that the final size can be different from the image size) + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + if results['keep_ratio']: + gt_seg = mmcv.imrescale( + results['gt_semantic_seg'], + results['scale'], + interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results['gt_semantic_seg'], + results['scale'], + interpolation='nearest') + if results['flip']: + gt_seg = mmcv.imflip(gt_seg) + if gt_seg.shape != results['pad_shape']: + gt_seg = mmcv.impad(gt_seg, results['pad_shape'][:2]) + if self.scale_factor != 1: + gt_seg = mmcv.imrescale( + gt_seg, self.scale_factor, interpolation='nearest') + results['gt_semantic_seg'] = gt_seg + return results + + def __repr__(self): + return self.__class__.__name__ + '(scale_factor={})'.format( + self.scale_factor) + + +@PIPELINES.register_module +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + 8. randomly swap channels + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def __call__(self, results): + img = results['img'] + # random brightness + if random.randint(2): + delta = random.uniform(-self.brightness_delta, + self.brightness_delta) + img += delta + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + if random.randint(2): + alpha = random.uniform(self.contrast_lower, + self.contrast_upper) + img *= alpha + + # convert color from BGR to HSV + img = mmcv.bgr2hsv(img) + + # random saturation + if random.randint(2): + img[..., 1] *= random.uniform(self.saturation_lower, + self.saturation_upper) + + # random hue + if random.randint(2): + img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) + img[..., 0][img[..., 0] > 360] -= 360 + img[..., 0][img[..., 0] < 0] += 360 + + # convert color from HSV to BGR + img = mmcv.hsv2bgr(img) + + # random contrast + if mode == 0: + if random.randint(2): + alpha = random.uniform(self.contrast_lower, + self.contrast_upper) + img *= alpha + + # randomly swap channels + if random.randint(2): + img = img[..., random.permutation(3)] + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += ('(brightness_delta={}, contrast_range={}, ' + 'saturation_range={}, hue_delta={})').format( + self.brightness_delta, self.contrast_range, + self.saturation_range, self.hue_delta) + return repr_str + + +@PIPELINES.register_module +class Expand(object): + """Random expand the image & bboxes. + + Randomly place the original image on a canvas of 'ratio' x original image + size filled with mean values. The ratio is in the range of ratio_range. + + Args: + mean (tuple): mean value of dataset. + to_rgb (bool): if need to convert the order of mean to align with RGB. + ratio_range (tuple): range of expand ratio. + """ + + def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): + if to_rgb: + self.mean = mean[::-1] + else: + self.mean = mean + self.min_ratio, self.max_ratio = ratio_range + + def __call__(self, results): + if random.randint(2): + return results + + img, boxes = [results[k] for k in ('img', 'gt_bboxes')] + + h, w, c = img.shape + ratio = random.uniform(self.min_ratio, self.max_ratio) + expand_img = np.full((int(h * ratio), int(w * ratio), c), + self.mean).astype(img.dtype) + left = int(random.uniform(0, w * ratio - w)) + top = int(random.uniform(0, h * ratio - h)) + expand_img[top:top + h, left:left + w] = img + boxes += np.tile((left, top), 2) + + results['img'] = expand_img + results['gt_bboxes'] = boxes + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(mean={}, to_rgb={}, ratio_range={})'.format( + self.mean, self.to_rgb, self.ratio_range) + return repr_str + + +@PIPELINES.register_module +class MinIoURandomCrop(object): + """Random crop the image & bboxes, the cropped patches have minimum IoU + requirement with original image & bboxes, the IoU threshold is randomly + selected from min_ious. + + Args: + min_ious (tuple): minimum IoU threshold + crop_size (tuple): Expected size after cropping, (h, w). + """ + + def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): + # 1: return ori img + self.sample_mode = (1, *min_ious, 0) + self.min_crop_size = min_crop_size + + def __call__(self, results): + img, boxes, labels = [ + results[k] for k in ('img', 'gt_bboxes', 'gt_labels') + ] + h, w, c = img.shape + while True: + mode = random.choice(self.sample_mode) + if mode == 1: + return results + + min_iou = mode + for i in range(50): + new_w = random.uniform(self.min_crop_size * w, w) + new_h = random.uniform(self.min_crop_size * h, h) + + # h / w in [0.5, 2] + if new_h / new_w < 0.5 or new_h / new_w > 2: + continue + + left = random.uniform(w - new_w) + top = random.uniform(h - new_h) + + patch = np.array( + (int(left), int(top), int(left + new_w), int(top + new_h))) + overlaps = bbox_overlaps( + patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) + if overlaps.min() < min_iou: + continue + + # center of boxes should inside the crop img + center = (boxes[:, :2] + boxes[:, 2:]) / 2 + mask = (center[:, 0] > patch[0]) * ( + center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( + center[:, 1] < patch[3]) + if not mask.any(): + continue + boxes = boxes[mask] + labels = labels[mask] + + # adjust boxes + img = img[patch[1]:patch[3], patch[0]:patch[2]] + boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) + boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) + boxes -= np.tile(patch[:2], 2) + + results['img'] = img + results['gt_bboxes'] = boxes + results['gt_labels'] = labels + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(min_ious={}, min_crop_size={})'.format( + self.min_ious, self.min_crop_size) + return repr_str + + +@PIPELINES.register_module +class Corrupt(object): + + def __init__(self, corruption, severity=1): + self.corruption = corruption + self.severity = severity + + def __call__(self, results): + results['img'] = corrupt( + results['img'].astype(np.uint8), + corruption_name=self.corruption, + severity=self.severity) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += '(corruption={}, severity={})'.format( + self.corruption, self.severity) + return repr_str diff --git a/mmdet/datasets/registry.py b/mmdet/datasets/registry.py index e7266240be5cd15fd505be335e12a0fb7d67dd31..974a4fbb769fe70770fa7d41766f9ce041244195 100644 --- a/mmdet/datasets/registry.py +++ b/mmdet/datasets/registry.py @@ -1,3 +1,4 @@ from mmdet.utils import Registry DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') diff --git a/mmdet/datasets/utils.py b/mmdet/datasets/utils.py deleted file mode 100644 index 9f4f46caec7d2411da44eda8f41ae48fdf881e00..0000000000000000000000000000000000000000 --- a/mmdet/datasets/utils.py +++ /dev/null @@ -1,68 +0,0 @@ -from collections import Sequence - -import matplotlib.pyplot as plt -import mmcv -import numpy as np -import torch - - -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - """ - if isinstance(data, torch.Tensor): - return data - elif isinstance(data, np.ndarray): - return torch.from_numpy(data) - elif isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - elif isinstance(data, int): - return torch.LongTensor([data]) - elif isinstance(data, float): - return torch.FloatTensor([data]) - else: - raise TypeError('type {} cannot be converted to tensor.'.format( - type(data))) - - -def random_scale(img_scales, mode='range'): - """Randomly select a scale from a list of scales or scale ranges. - - Args: - img_scales (list[tuple]): Image scale or scale range. - mode (str): "range" or "value". - - Returns: - tuple: Sampled image scale. - """ - num_scales = len(img_scales) - if num_scales == 1: # fixed scale is specified - img_scale = img_scales[0] - elif num_scales == 2: # randomly sample a scale - if mode == 'range': - img_scale_long = [max(s) for s in img_scales] - img_scale_short = [min(s) for s in img_scales] - long_edge = np.random.randint( - min(img_scale_long), - max(img_scale_long) + 1) - short_edge = np.random.randint( - min(img_scale_short), - max(img_scale_short) + 1) - img_scale = (long_edge, short_edge) - elif mode == 'value': - img_scale = img_scales[np.random.randint(num_scales)] - else: - if mode != 'value': - raise ValueError( - 'Only "value" mode supports more than 2 image scales') - img_scale = img_scales[np.random.randint(num_scales)] - return img_scale - - -def show_ann(coco, img, ann_info): - plt.imshow(mmcv.bgr2rgb(img)) - plt.axis('off') - coco.showAnns(ann_info) - plt.show() diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py index 038dd109c08a454d4505616a3c7c8eb9ffe0580e..7650878b6d63204d6974b0a46c23f079d0bd5a4b 100644 --- a/mmdet/models/detectors/base.py +++ b/mmdet/models/detectors/base.py @@ -87,12 +87,7 @@ class BaseDetector(nn.Module): else: return self.forward_test(img, img_meta, **kwargs) - def show_result(self, - data, - result, - img_norm_cfg, - dataset=None, - score_thr=0.3): + def show_result(self, data, result, dataset=None, score_thr=0.3): if isinstance(result, tuple): bbox_result, segm_result = result else: @@ -100,7 +95,7 @@ class BaseDetector(nn.Module): img_tensor = data['img'][0] img_metas = data['img_meta'][0].data[0] - imgs = tensor2imgs(img_tensor, **img_norm_cfg) + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) if dataset is None: diff --git a/mmdet/models/detectors/cascade_rcnn.py b/mmdet/models/detectors/cascade_rcnn.py index bd878eb40c0bcfd126cf8e3a62a6ef8a1bf86cf5..4333b811bdeee15984f752804ffeeae77ffb38b3 100644 --- a/mmdet/models/detectors/cascade_rcnn.py +++ b/mmdet/models/detectors/cascade_rcnn.py @@ -402,7 +402,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): def aug_test(self, img, img_meta, proposals=None, rescale=False): raise NotImplementedError - def show_result(self, data, result, img_norm_cfg, **kwargs): + def show_result(self, data, result, **kwargs): if self.with_mask: ms_bbox_result, ms_segm_result = result if isinstance(ms_bbox_result, dict): @@ -411,5 +411,4 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): else: if isinstance(result, dict): result = result['ensemble'] - super(CascadeRCNN, self).show_result(data, result, img_norm_cfg, - **kwargs) + super(CascadeRCNN, self).show_result(data, result, **kwargs) diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py index c9de290fbfa46b3daeb8a062ed0f582b67dc147a..fafee4fc21f58ada5a02d9e91a1685b0dadb2474 100644 --- a/mmdet/models/detectors/rpn.py +++ b/mmdet/models/detectors/rpn.py @@ -81,7 +81,7 @@ class RPN(BaseDetector, RPNTestMixin): # TODO: remove this restriction return proposal_list[0].cpu().numpy() - def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): + def show_result(self, data, result, dataset=None, top_k=20): """Show RPN proposals on the image. Although we assume batch size is 1, this method supports arbitrary @@ -89,7 +89,7 @@ class RPN(BaseDetector, RPNTestMixin): """ img_tensor = data['img'][0] img_metas = data['img_meta'][0].data[0] - imgs = tensor2imgs(img_tensor, **img_norm_cfg) + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) assert len(imgs) == len(img_metas) for img, img_meta in zip(imgs, img_metas): h, w, _ = img_meta['img_shape'] diff --git a/mmdet/utils/registry.py b/mmdet/utils/registry.py index e39552a844bf5f37c19b95750c38a612a33bc15e..a1cc87dcfdb5d954ec6fa496b4ffc52ac14dbbfa 100644 --- a/mmdet/utils/registry.py +++ b/mmdet/utils/registry.py @@ -61,14 +61,16 @@ def build_from_cfg(cfg, registry, default_args=None): args = cfg.copy() obj_type = args.pop('type') if mmcv.is_str(obj_type): - obj_type = registry.get(obj_type) - if obj_type is None: + obj_cls = registry.get(obj_type) + if obj_cls is None: raise KeyError('{} is not in the {} registry'.format( obj_type, registry.name)) - elif not inspect.isclass(obj_type): + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: raise TypeError('type must be a str or valid type, but got {}'.format( type(obj_type))) if default_args is not None: for name, value in default_args.items(): args.setdefault(name, value) - return obj_type(**args) + return obj_cls(**args) diff --git a/tools/test.py b/tools/test.py index c0fdec700ca63b3f68f78187ad1f0ec4d3021db2..e3ff48734e35cffaef17f575c68ef5bc07ac5b3b 100644 --- a/tools/test.py +++ b/tools/test.py @@ -27,7 +27,7 @@ def single_gpu_test(model, data_loader, show=False): results.append(result) if show: - model.module.show_result(data, result, dataset.img_norm_cfg) + model.module.show_result(data, result) batch_size = data['img'][0].size(0) for _ in range(batch_size): diff --git a/tools/test_robustness.py b/tools/test_robustness.py index 584654b618bef768dfe15204caedaecb68b2c9ea..e2632151777b32a83aedf198c3db4796b57e65ca 100644 --- a/tools/test_robustness.py +++ b/tools/test_robustness.py @@ -1,4 +1,5 @@ import argparse +import copy import os import os.path as osp import shutil @@ -350,13 +351,15 @@ def main(): continue # assign corruption and severity - if corruption_severity == 0: - # evaluate without corruptions for severity = 0 - cfg.data.test['corruption'] = None - cfg.data.test['corruption_severity'] = 0 - else: - cfg.data.test['corruption'] = corruption - cfg.data.test['corruption_severity'] = corruption_severity + if corruption_severity > 0: + test_data_cfg = copy.deepcopy(cfg.data.test) + corruption_trans = dict( + type='Corrupt', + corruption=corruption, + severity=corruption_severity) + # TODO: hard coded "1", we assume that the first step is + # loading images, which needs to be fixed in the future + test_data_cfg['pipeline'].insert(1, corruption_trans) # print info print('\nTesting {} at severity {}'.format(corruption,