From d568f7bfcfae242cdb2ee03bd130e8ce7d2fc27d Mon Sep 17 00:00:00 2001 From: Kai Chen <chenkaidev@gmail.com> Date: Wed, 12 Dec 2018 14:24:42 +0800 Subject: [PATCH] add voc training configs and eval script --- .../faster_rcnn_r50_fpn_1x_voc0712.py | 157 ++++++++++++++++++ tools/voc_eval.py | 62 +++++++ 2 files changed, 219 insertions(+) create mode 100644 configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py create mode 100644 tools/voc_eval.py diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py new file mode 100644 index 0000000..cae0a06 --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py @@ -0,0 +1,157 @@ +# model settings +model = dict( + type='FasterRCNN', + pretrained='modelzoo://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=21, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False)) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) +) +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', # to avoid reloading datasets frequently + times=3, + dataset=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + img_scale=(1000, 600), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=False, + with_crowd=True, + with_label=True)), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + img_scale=(1000, 600), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + img_scale=(1000, 600), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[3]) # actual epoch = 3 * 3 = 9 +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 4 # actual epoch = 4 * 3 = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/faster_rcnn_r50_fpn_1x_voc0712' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/tools/voc_eval.py b/tools/voc_eval.py new file mode 100644 index 0000000..478ec3c --- /dev/null +++ b/tools/voc_eval.py @@ -0,0 +1,62 @@ +from argparse import ArgumentParser + +import mmcv +import numpy as np + +from mmdet import datasets +from mmdet.core import eval_map + + +def voc_eval(result_file, dataset, iou_thr=0.5): + det_results = mmcv.load(result_file) + gt_bboxes = [] + gt_labels = [] + gt_ignore = [] + for i in range(len(dataset)): + ann = dataset.get_ann_info(i) + bboxes = ann['bboxes'] + labels = ann['labels'] + if 'bboxes_ignore' in ann: + ignore = np.concatenate([ + np.zeros(bboxes.shape[0], dtype=np.bool), + np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) + ]) + gt_ignore.append(ignore) + bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) + labels = np.concatenate([labels, ann['labels_ignore']]) + gt_bboxes.append(bboxes) + gt_labels.append(labels) + if not gt_ignore: + gt_ignore = gt_ignore + if hasattr(dataset, 'year') and dataset.year == 2007: + dataset_name = 'voc07' + else: + dataset_name = dataset.CLASSES + eval_map( + det_results, + gt_bboxes, + gt_labels, + gt_ignore=gt_ignore, + scale_ranges=None, + iou_thr=iou_thr, + dataset=dataset_name, + print_summary=True) + + +def main(): + parser = ArgumentParser(description='VOC Evaluation') + parser.add_argument('result', help='result file path') + parser.add_argument('config', help='config file path') + parser.add_argument( + '--iou-thr', + type=float, + default=0.5, + help='IoU threshold for evaluation') + args = parser.parse_args() + cfg = mmcv.Config.fromfile(args.config) + test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) + voc_eval(args.result, test_dataset, args.iou_thr) + + +if __name__ == '__main__': + main() -- GitLab