From 7ae953332a061d0f82528048a798e4e0db663761 Mon Sep 17 00:00:00 2001
From: nikhil_rayaprolu <nikhil684@gmail.com>
Date: Sun, 9 Feb 2020 15:31:56 +0530
Subject: [PATCH] changes for aicrowd

---
 configs/htc_r50.py                        | 317 ++++++++++++++++++++++
 mmdet/datasets/custom.py                  |   5 +-
 mmdet/models/losses/cross_entropy_loss.py |   3 +
 tools/analyze_logs.py                     |  29 +-
 tools/dist_train.sh                       |   2 +-
 5 files changed, 339 insertions(+), 17 deletions(-)
 create mode 100644 configs/htc_r50.py

diff --git a/configs/htc_r50.py b/configs/htc_r50.py
new file mode 100644
index 0000000..f065170
--- /dev/null
+++ b/configs/htc_r50.py
@@ -0,0 +1,317 @@
+# model settings
+model = dict(
+    type='HybridTaskCascade',
+    num_stages=3,
+    pretrained='torchvision://resnet50',
+    interleaved=True,
+    mask_info_flow=True,
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    bbox_head=[
+        dict(
+            type='SharedFCBBoxHead',
+            num_fcs=2,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=62,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.1, 0.1, 0.2, 0.2],
+            reg_class_agnostic=True,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
+        dict(
+            type='SharedFCBBoxHead',
+            num_fcs=2,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=62,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.05, 0.05, 0.1, 0.1],
+            reg_class_agnostic=True,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
+        dict(
+            type='SharedFCBBoxHead',
+            num_fcs=2,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=62,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.033, 0.033, 0.067, 0.067],
+            reg_class_agnostic=True,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+    ],
+    mask_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    mask_head=dict(
+        type='HTCMaskHead',
+        num_convs=4,
+        in_channels=256,
+        conv_out_channels=256,
+        num_classes=62,
+        loss_mask=dict(
+            type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
+            )
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        debug=False),
+    rpn_proposal=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=[
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.6,
+                min_pos_iou=0.6,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False),
+        dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.7,
+                min_pos_iou=0.7,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)
+    ],
+    stage_loss_weights=[1, 0.5, 0.25])
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_post=1000,
+        max_num=1000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.001,
+        nms=dict(type='soft_nms', iou_thr=0.3),
+        max_per_img=10,
+        mask_thr_binary=0.5),
+    keep_all_stages=False)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = '/ssd_scratch/cvit/mmdetection/data/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+albu_train_transforms = [
+    dict(
+        type='ShiftScaleRotate',
+        shift_limit=0.0625,
+        scale_limit=0.0,
+        rotate_limit=0,
+        interpolation=1,
+        p=0.5),
+    dict(
+        type='RandomBrightnessContrast',
+        brightness_limit=[0.1, 0.3],
+        contrast_limit=[0.1, 0.3],
+        p=0.2),
+    dict(
+        type='OneOf',
+        transforms=[
+            dict(
+                type='RGBShift',
+                r_shift_limit=10,
+                g_shift_limit=10,
+                b_shift_limit=10,
+                p=1.0),
+            dict(
+                type='HueSaturationValue',
+                hue_shift_limit=20,
+                sat_shift_limit=30,
+                val_shift_limit=20,
+                p=1.0)
+        ],
+        p=0.1),
+    dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
+    dict(type='ChannelShuffle', p=0.1),
+    dict(
+        type='OneOf',
+        transforms=[
+            dict(type='Blur', blur_limit=3, p=1.0),
+            dict(type='MedianBlur', blur_limit=3, p=1.0)
+        ],
+        p=0.1),
+]
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(type='Resize', img_scale=(800, 800), keep_ratio=True),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(
+            type='BboxParams',
+            format='pascal_voc',
+            label_fields=['gt_labels'],
+            min_visibility=0.0,
+            filter_lost_elements=True),
+        keymap={
+            'img': 'image',
+            'gt_masks': 'masks',
+            'gt_bboxes': 'bboxes'
+        },
+        update_pad_shape=False,
+        skip_img_without_anno=True),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'],
+        meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
+                   'pad_shape', 'scale_factor'))
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(800, 800),
+        flip=True,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip', flip_ratio=0.5),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=0,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'train/annotations.json',
+        img_prefix=data_root + 'train/images',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'val/annotations.json',
+        img_prefix=data_root + 'val/images',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'val/annotations.json',
+        img_prefix=data_root + 'val/images',
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[17, 24])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 30
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/htc_r50_fpn'
+load_from = '/ssd_scratch/cvit/mmdetection/htc_r50_fpn_20e_20190408-c03b7015.pth' 
+resume_from = None
+workflow = [('train', 1)]
\ No newline at end of file
diff --git a/mmdet/datasets/custom.py b/mmdet/datasets/custom.py
index 935b39d..5bdfeea 100644
--- a/mmdet/datasets/custom.py
+++ b/mmdet/datasets/custom.py
@@ -141,7 +141,10 @@ class CustomDataset(Dataset):
         if self.proposals is not None:
             results['proposals'] = self.proposals[idx]
         self.pre_pipeline(results)
-        return self.pipeline(results)
+        try:
+            return self.pipeline(results)
+        except:
+            return None
 
     def prepare_test_img(self, idx):
         img_info = self.img_infos[idx]
diff --git a/mmdet/models/losses/cross_entropy_loss.py b/mmdet/models/losses/cross_entropy_loss.py
index dd9d477..53c9da3 100644
--- a/mmdet/models/losses/cross_entropy_loss.py
+++ b/mmdet/models/losses/cross_entropy_loss.py
@@ -8,6 +8,9 @@ from .utils import weight_reduce_loss
 
 def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
     # element-wise losses
+#     print(pred.shape, label.shape)
+#     torch.set_printoptions(profile="full")
+#     print(label)
     loss = F.cross_entropy(pred, label, reduction='none')
 
     # apply weights and do the reduction
diff --git a/tools/analyze_logs.py b/tools/analyze_logs.py
index 2810c98..0d54e64 100644
--- a/tools/analyze_logs.py
+++ b/tools/analyze_logs.py
@@ -32,28 +32,27 @@ def cal_train_time(log_dicts, args):
 
 
 def plot_curve(log_dicts, args):
-    if args.backend is not None:
-        plt.switch_backend(args.backend)
-    sns.set_style(args.style)
+    if args['backend'] is not None:
+        plt.switch_backend(args['backend'])
     # if legend is None, use {filename}_{key} as legend
-    legend = args.legend
+    legend = args['legend']
     if legend is None:
         legend = []
-        for json_log in args.json_logs:
-            for metric in args.keys:
+        for json_log in args['json_logs']:
+            for metric in args['keys']:
                 legend.append('{}_{}'.format(json_log, metric))
-    assert len(legend) == (len(args.json_logs) * len(args.keys))
-    metrics = args.keys
+    assert len(legend) == (len(args['json_logs']) * len(args['keys']))
+    metrics = args['keys']
 
     num_metrics = len(metrics)
     for i, log_dict in enumerate(log_dicts):
         epochs = list(log_dict.keys())
         for j, metric in enumerate(metrics):
             print('plot curve of {}, metric is {}'.format(
-                args.json_logs[i], metric))
+                args['json_logs'][i], metric))
             if metric not in log_dict[epochs[0]]:
                 raise KeyError('{} does not contain metric {}'.format(
-                    args.json_logs[i], metric))
+                    args['json_logs'][i], metric))
 
             if 'mAP' in metric:
                 xs = np.arange(1, max(epochs) + 1)
@@ -81,13 +80,13 @@ def plot_curve(log_dicts, args):
                 plt.plot(
                     xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
             plt.legend()
-        if args.title is not None:
-            plt.title(args.title)
-    if args.out is None:
+        if args['title'] is not None:
+            plt.title(args['title'])
+    if args['out'] is None:
         plt.show()
     else:
-        print('save curve to: {}'.format(args.out))
-        plt.savefig(args.out)
+        print('save curve to: {}'.format(args['out']))
+        plt.savefig(args['out'])
         plt.cla()
 
 
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
index 3b3b580..4e6911d 100755
--- a/tools/dist_train.sh
+++ b/tools/dist_train.sh
@@ -6,5 +6,5 @@ CONFIG=$1
 GPUS=$2
 PORT=${PORT:-29500}
 
-$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+$PYTHON -W ignore::UserWarning -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
-- 
GitLab