From 7ae953332a061d0f82528048a798e4e0db663761 Mon Sep 17 00:00:00 2001 From: nikhil_rayaprolu <nikhil684@gmail.com> Date: Sun, 9 Feb 2020 15:31:56 +0530 Subject: [PATCH] changes for aicrowd --- configs/htc_r50.py | 317 ++++++++++++++++++++++ mmdet/datasets/custom.py | 5 +- mmdet/models/losses/cross_entropy_loss.py | 3 + tools/analyze_logs.py | 29 +- tools/dist_train.sh | 2 +- 5 files changed, 339 insertions(+), 17 deletions(-) create mode 100644 configs/htc_r50.py diff --git a/configs/htc_r50.py b/configs/htc_r50.py new file mode 100644 index 0000000..f065170 --- /dev/null +++ b/configs/htc_r50.py @@ -0,0 +1,317 @@ +# model settings +model = dict( + type='HybridTaskCascade', + num_stages=3, + pretrained='torchvision://resnet50', + interleaved=True, + mask_info_flow=True, + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=62, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=62, + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=62, + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=62, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + ) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ], + stage_loss_weights=[1, 0.5, 0.25]) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.001, + nms=dict(type='soft_nms', iou_thr=0.3), + max_per_img=10, + mask_thr_binary=0.5), + keep_all_stages=False) +# dataset settings +dataset_type = 'CocoDataset' +data_root = '/ssd_scratch/cvit/mmdetection/data/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +albu_train_transforms = [ + dict( + type='ShiftScaleRotate', + shift_limit=0.0625, + scale_limit=0.0, + rotate_limit=0, + interpolation=1, + p=0.5), + dict( + type='RandomBrightnessContrast', + brightness_limit=[0.1, 0.3], + contrast_limit=[0.1, 0.3], + p=0.2), + dict( + type='OneOf', + transforms=[ + dict( + type='RGBShift', + r_shift_limit=10, + g_shift_limit=10, + b_shift_limit=10, + p=1.0), + dict( + type='HueSaturationValue', + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=20, + p=1.0) + ], + p=0.1), + dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2), + dict(type='ChannelShuffle', p=0.1), + dict( + type='OneOf', + transforms=[ + dict(type='Blur', blur_limit=3, p=1.0), + dict(type='MedianBlur', blur_limit=3, p=1.0) + ], + p=0.1), +] +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(800, 800), keep_ratio=True), + dict(type='Pad', size_divisor=32), + dict( + type='Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_labels'], + min_visibility=0.0, + filter_lost_elements=True), + keymap={ + 'img': 'image', + 'gt_masks': 'masks', + 'gt_bboxes': 'bboxes' + }, + update_pad_shape=False, + skip_img_without_anno=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg', + 'pad_shape', 'scale_factor')) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(800, 800), + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=4, + workers_per_gpu=0, + train=dict( + type=dataset_type, + ann_file=data_root + 'train/annotations.json', + img_prefix=data_root + 'train/images', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'val/annotations.json', + img_prefix=data_root + 'val/images', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'val/annotations.json', + img_prefix=data_root + 'val/images', + pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[17, 24]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 30 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/htc_r50_fpn' +load_from = '/ssd_scratch/cvit/mmdetection/htc_r50_fpn_20e_20190408-c03b7015.pth' +resume_from = None +workflow = [('train', 1)] \ No newline at end of file diff --git a/mmdet/datasets/custom.py b/mmdet/datasets/custom.py index 935b39d..5bdfeea 100644 --- a/mmdet/datasets/custom.py +++ b/mmdet/datasets/custom.py @@ -141,7 +141,10 @@ class CustomDataset(Dataset): if self.proposals is not None: results['proposals'] = self.proposals[idx] self.pre_pipeline(results) - return self.pipeline(results) + try: + return self.pipeline(results) + except: + return None def prepare_test_img(self, idx): img_info = self.img_infos[idx] diff --git a/mmdet/models/losses/cross_entropy_loss.py b/mmdet/models/losses/cross_entropy_loss.py index dd9d477..53c9da3 100644 --- a/mmdet/models/losses/cross_entropy_loss.py +++ b/mmdet/models/losses/cross_entropy_loss.py @@ -8,6 +8,9 @@ from .utils import weight_reduce_loss def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None): # element-wise losses +# print(pred.shape, label.shape) +# torch.set_printoptions(profile="full") +# print(label) loss = F.cross_entropy(pred, label, reduction='none') # apply weights and do the reduction diff --git a/tools/analyze_logs.py b/tools/analyze_logs.py index 2810c98..0d54e64 100644 --- a/tools/analyze_logs.py +++ b/tools/analyze_logs.py @@ -32,28 +32,27 @@ def cal_train_time(log_dicts, args): def plot_curve(log_dicts, args): - if args.backend is not None: - plt.switch_backend(args.backend) - sns.set_style(args.style) + if args['backend'] is not None: + plt.switch_backend(args['backend']) # if legend is None, use {filename}_{key} as legend - legend = args.legend + legend = args['legend'] if legend is None: legend = [] - for json_log in args.json_logs: - for metric in args.keys: + for json_log in args['json_logs']: + for metric in args['keys']: legend.append('{}_{}'.format(json_log, metric)) - assert len(legend) == (len(args.json_logs) * len(args.keys)) - metrics = args.keys + assert len(legend) == (len(args['json_logs']) * len(args['keys'])) + metrics = args['keys'] num_metrics = len(metrics) for i, log_dict in enumerate(log_dicts): epochs = list(log_dict.keys()) for j, metric in enumerate(metrics): print('plot curve of {}, metric is {}'.format( - args.json_logs[i], metric)) + args['json_logs'][i], metric)) if metric not in log_dict[epochs[0]]: raise KeyError('{} does not contain metric {}'.format( - args.json_logs[i], metric)) + args['json_logs'][i], metric)) if 'mAP' in metric: xs = np.arange(1, max(epochs) + 1) @@ -81,13 +80,13 @@ def plot_curve(log_dicts, args): plt.plot( xs, ys, label=legend[i * num_metrics + j], linewidth=0.5) plt.legend() - if args.title is not None: - plt.title(args.title) - if args.out is None: + if args['title'] is not None: + plt.title(args['title']) + if args['out'] is None: plt.show() else: - print('save curve to: {}'.format(args.out)) - plt.savefig(args.out) + print('save curve to: {}'.format(args['out'])) + plt.savefig(args['out']) plt.cla() diff --git a/tools/dist_train.sh b/tools/dist_train.sh index 3b3b580..4e6911d 100755 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -6,5 +6,5 @@ CONFIG=$1 GPUS=$2 PORT=${PORT:-29500} -$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ +$PYTHON -W ignore::UserWarning -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} -- GitLab