Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • Archana_Badagi/food-round2
  • eric_a_scuccimarra/food-round2
  • joel_joseph/food-round2
  • darthgera123/food-round2
  • reshmarameshbabu/food-round2
  • nikhil_rayaprolu/food-round2
6 results
Show changes
Showing
with 849 additions and 445 deletions
...@@ -109,6 +109,31 @@ dataset_type = 'CocoDataset' ...@@ -109,6 +109,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -116,35 +141,17 @@ data = dict( ...@@ -116,35 +141,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -128,6 +128,31 @@ dataset_type = 'CocoDataset' ...@@ -128,6 +128,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -135,35 +160,17 @@ data = dict( ...@@ -135,35 +160,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -128,6 +128,31 @@ dataset_type = 'CocoDataset' ...@@ -128,6 +128,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -135,35 +160,17 @@ data = dict( ...@@ -135,35 +160,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -130,6 +130,31 @@ dataset_type = 'CocoDataset' ...@@ -130,6 +130,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -137,35 +162,17 @@ data = dict( ...@@ -137,35 +162,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' ...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -131,35 +156,17 @@ data = dict( ...@@ -131,35 +156,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' ...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -131,35 +156,17 @@ data = dict( ...@@ -131,35 +156,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset' ...@@ -124,6 +124,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -131,35 +156,17 @@ data = dict( ...@@ -131,35 +156,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
# model settings # model settings
model = dict( model = dict(
type='GridRCNN', type='GridRCNN',
pretrained='modelzoo://resnet50', pretrained='torchvision://resnet50',
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -113,6 +113,31 @@ dataset_type = 'CocoDataset' ...@@ -113,6 +113,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -120,35 +145,17 @@ data = dict( ...@@ -120,35 +145,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
......
...@@ -115,6 +115,31 @@ dataset_type = 'CocoDataset' ...@@ -115,6 +115,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -122,35 +147,17 @@ data = dict( ...@@ -122,35 +147,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
......
...@@ -59,48 +59,54 @@ dataset_type = 'CocoDataset' ...@@ -59,48 +59,54 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=300),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=None),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img', 'proposals']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
num_max_proposals=300,
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl',
flip_ratio=0.5, img_prefix=data_root + 'train2017/',
with_mask=False, pipeline=train_pipeline),
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
num_max_proposals=300,
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',
flip_ratio=0, img_prefix=data_root + 'val2017/',
with_mask=False, pipeline=test_pipeline),
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
num_max_proposals=300,
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',
flip_ratio=0, img_prefix=data_root + 'val2017/',
with_mask=False, pipeline=test_pipeline))
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' ...@@ -127,6 +127,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -134,35 +159,17 @@ data = dict( ...@@ -134,35 +159,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -127,6 +127,31 @@ dataset_type = 'CocoDataset' ...@@ -127,6 +127,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -134,35 +159,17 @@ data = dict( ...@@ -134,35 +159,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
...@@ -84,6 +84,31 @@ dataset_type = 'CocoDataset' ...@@ -84,6 +84,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -91,36 +116,17 @@ data = dict( ...@@ -91,36 +116,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
...@@ -142,7 +148,6 @@ log_config = dict( ...@@ -142,7 +148,6 @@ log_config = dict(
# yapf:enable # yapf:enable
# runtime settings # runtime settings
total_epochs = 12 total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl') dist_params = dict(backend='nccl')
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/ga_retinanet_r50_caffe_fpn_1x' work_dir = './work_dirs/ga_retinanet_r50_caffe_fpn_1x'
......
...@@ -84,6 +84,31 @@ dataset_type = 'CocoDataset' ...@@ -84,6 +84,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -91,36 +116,17 @@ data = dict( ...@@ -91,36 +116,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
...@@ -142,7 +148,6 @@ log_config = dict( ...@@ -142,7 +148,6 @@ log_config = dict(
# yapf:enable # yapf:enable
# runtime settings # runtime settings
total_epochs = 12 total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl') dist_params = dict(backend='nccl')
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/ga_retinanet_x101_32x4d_fpn_1x' work_dir = './work_dirs/ga_retinanet_x101_32x4d_fpn_1x'
......
...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' ...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_label=False),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -92,35 +117,17 @@ data = dict( ...@@ -92,35 +117,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
# runner configs # runner configs
......
...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' ...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_label=False),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -92,35 +117,17 @@ data = dict( ...@@ -92,35 +117,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
# runner configs # runner configs
......
...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset' ...@@ -85,6 +85,31 @@ dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_label=False),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -92,35 +117,17 @@ data = dict( ...@@ -92,35 +117,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
# runner configs # runner configs
......
...@@ -22,33 +22,74 @@ ...@@ -22,33 +22,74 @@
## Results and Models ## Results and Models
Faster R-CNN
| Backbone|#Params|GFLOPs|Lr sched|mAP|Download| ### Faster R-CNN
| :--:|:--:|:--:|:--:|:--:|:--:|
| HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)|
| HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)|
| HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)|
| HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)|
| HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)|
| HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)|
| Backbone | Style | Lr schd | box AP | Download |
| :-------------: | :-----: | :-----: | :----: | :-----------------: |
| HRNetV2p-W18 | pytorch | 1x | 36.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w18_1x_20190522-e368c387.pth) |
| HRNetV2p-W18 | pytorch | 2x | 38.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w18_2x_20190810-9c8615d5.pth) |
| HRNetV2p-W32 | pytorch | 1x | 39.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w32_1x_20190522-d22f1fef.pth) |
| HRNetV2p-W32 | pytorch | 2x | 40.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w32_2x_20190810-24e8912a.pth) |
| HRNetV2p-W48 | pytorch | 1x | 40.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w48_1x_20190820-5c6d0903.pth) |
| HRNetV2p-W48 | pytorch | 2x | 41.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/faster_rcnn_hrnetv2p_w48_2x_20190820-79fb8bfc.pth) |
Mask R-CNN ### Mask R-CNN
|Backbone|Lr sched|mask mAP|box mAP|Download| | Backbone | Style | Lr schd | box AP | mask AP | Download |
|:--:|:--:|:--:|:--:|:--:| | :-------------: | :-----: | :-----: | :----: | :----: | :-----------------: |
| HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)| | HRNetV2p-W18 | pytorch | 1x | 37.3 | 34.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w18_1x_20190522-c8ad459f.pth) |
| HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)| | HRNetV2p-W18 | pytorch | 2x | 39.2 | 35.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w18_2x_20190810-1e4747eb.pth) |
| HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)| | HRNetV2p-W32 | pytorch | 1x | 40.7 | 36.8 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w32_1x_20190522-374aaa00.pth) |
| HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)| | HRNetV2p-W32 | pytorch | 2x | 41.7 | 37.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w32_2x_20190810-773eca75.pth) |
| HRNetV2p-W48 | pytorch | 1x | 42.4 | 38.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w48_1x_20190820-0923d1ad.pth) |
| HRNetV2p-W48 | pytorch | 2x | 42.9 | 38.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/mask_rcnn_hrnetv2p_w48_2x_20190820-70df51b2.pth) |
### Cascade R-CNN
| Backbone | Style | Lr schd | box AP | Download |
| :-------------: | :-----: | :-----: | :----: | :-----------------: |
| HRNetV2p-W18 | pytorch | 20e | 41.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_rcnn_hrnetv2p_w18_20e_20190810-132012d0.pth)|
| HRNetV2p-W32 | pytorch | 20e | 43.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_rcnn_hrnetv2p_w32_20e_20190522-55bec4ee.pth)|
| HRNetV2p-W48 | pytorch | 20e | 44.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_rcnn_hrnetv2p_w48_20e_20190810-f40ed8e1.pth)|
### Cascade Mask R-CNN
| Backbone | Style | Lr schd | box AP | mask AP | Download |
| :-------------: | :-----: | :-----: | :----: | :----: | :-----------------: |
| HRNetV2p-W18 | pytorch | 20e | 41.9 | 36.4 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_20190810-054fb7bf.pth) |
| HRNetV2p-W32 | pytorch | 20e | 44.5 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_20190810-76f61cd0.pth) |
| HRNetV2p-W48 | pytorch | 20e | 46.0 | 39.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/cascade_mask_rcnn_hrnetv2p_w48_20e_20190810-d04a1415.pth) |
### Hybrid Task Cascade (HTC)
| Backbone | Style | Lr schd | box AP | mask AP | Download |
| :-------------: | :-----: | :-----: | :----: | :----: | :-----------------: |
| HRNetV2p-W18 | pytorch | 20e | 43.1 | 37.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/htc_hrnetv2p_w18_20e_20190810-d70072af.pth) |
| HRNetV2p-W32 | pytorch | 20e | 45.3 | 39.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/htc_hrnetv2p_w32_20e_20190810-82f9ef5a.pth) |
| HRNetV2p-W48 | pytorch | 20e | 46.8 | 40.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/htc_hrnetv2p_w48_20e_20190810-f6d2c3fd.pth) |
| HRNetV2p-W48 | pytorch | 28e | 47.0 | 41.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/htc_hrnetv2p_w48_28e_20190810-a4274b38.pth) |
| X-101-64x4d-FPN | pytorch | 28e | 46.8 | 40.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/htc_x101_64x4d_28e_20190810-d7c19dc0.pth) |
### FCOS
| Backbone | Style | GN | MS train | Lr schd | box AP | Download |
|:---------:|:-------:|:-------:|:--------:|:-------:|:------:|:--------:|
|HRNetV2p-W18| pytorch | Y | N | 1x | 35.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w18_1x_20190810-87a17998.pth) |
|HRNetV2p-W18| pytorch | Y | N | 2x | 38.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w18_2x_20190810-dfd60a7b.pth) |
|HRNetV2p-W32| pytorch | Y | N | 1x | 37.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w32_1x_20190810-62014622.pth) |
|HRNetV2p-W32| pytorch | Y | N | 2x | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w32_2x_20190810-8e987ec1.pth) |
|HRNetV2p-W18| pytorch | Y | Y | 2x | 38.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w18_mstrain_2x_20190810-eb846b2c.pth) |
|HRNetV2p-W32| pytorch | Y | Y | 2x | 41.4 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w32_mstrain_2x_20190810-96127bf8.pth) |
|HRNetV2p-W48| pytorch | Y | Y | 2x | 42.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/hrnet/fcos_hrnetv2p_w48_mstrain_2x_20190810-f7dc8801.pth) |
Cascade R-CNN
|Backbone|Lr sched|mAP|Download|
|:--:|:--:|:--:|:--:|
| HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)|
**Note:** **Note:**
- The `28e` schedule in HTC indicates decreasing the lr at 24 and 27 epochs, with a total of 28 epochs.
- HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification).
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://msra/hrnetv2_w32',
backbone=dict(
type='HRNet',
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256)))),
neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 19])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 20
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_hrnetv2p_w32_20e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -30,10 +30,7 @@ model = dict( ...@@ -30,10 +30,7 @@ model = dict(
block='BASIC', block='BASIC',
num_blocks=(4, 4, 4, 4), num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256)))), num_channels=(32, 64, 128, 256)))),
neck=dict( neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256),
type='HRFPN',
in_channels=[32, 64, 128, 256],
out_channels=256),
rpn_head=dict( rpn_head=dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
...@@ -48,10 +45,7 @@ model = dict( ...@@ -48,10 +45,7 @@ model = dict(
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
bbox_roi_extractor=dict( bbox_roi_extractor=dict(
type='SingleRoIExtractor', type='SingleRoIExtractor',
roi_layer=dict( roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
type='RoIAlign',
out_size=7,
sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
bbox_head=[ bbox_head=[
...@@ -66,13 +60,8 @@ model = dict( ...@@ -66,13 +60,8 @@ model = dict(
target_stds=[0.1, 0.1, 0.2, 0.2], target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True, reg_class_agnostic=True,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
use_sigmoid=False, loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
dict( dict(
type='SharedFCBBoxHead', type='SharedFCBBoxHead',
num_fcs=2, num_fcs=2,
...@@ -84,13 +73,8 @@ model = dict( ...@@ -84,13 +73,8 @@ model = dict(
target_stds=[0.05, 0.05, 0.1, 0.1], target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True, reg_class_agnostic=True,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
use_sigmoid=False, loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
dict( dict(
type='SharedFCBBoxHead', type='SharedFCBBoxHead',
num_fcs=2, num_fcs=2,
...@@ -102,13 +86,8 @@ model = dict( ...@@ -102,13 +86,8 @@ model = dict(
target_stds=[0.033, 0.033, 0.067, 0.067], target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True, reg_class_agnostic=True,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
use_sigmoid=False, loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
]) ])
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
...@@ -192,17 +171,37 @@ test_cfg = dict( ...@@ -192,17 +171,37 @@ test_cfg = dict(
nms_thr=0.7, nms_thr=0.7,
min_bbox_size=0), min_bbox_size=0),
rcnn=dict( rcnn=dict(
score_thr=0.05, score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100),
keep_all_stages=False)
# dataset settings # dataset settings
dataset_type = 'CocoDataset' dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
std=[58.395, 57.12, 57.375], train_pipeline = [
to_rgb=True) dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
...@@ -210,35 +209,17 @@ data = dict( ...@@ -210,35 +209,17 @@ data = dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), pipeline=train_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json', ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/', img_prefix=data_root + 'val2017/',
img_scale=(1333, 800), pipeline=test_pipeline))
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......