Skip to content
Snippets Groups Projects
Commit fc0172b4 authored by Vladislav Sovrasov's avatar Vladislav Sovrasov Committed by Kai Chen
Browse files

Add support of the wider face dataset (#765)

* Add support of the wider face dataset

* Update readme

* Prevent ignoring boxes in test mode

* Rename wider to wider_face

* Rename configs for wider face
parent 1b225a1c
No related branches found
No related tags found
No related merge requests found
## WIDER Face Dataset
To use the WIDER Face dataset you need to download it
and extract to the `data/WIDERFace` folder. Annotation in the VOC format
can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git).
You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders
to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`.
Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`.
The directory should be like this:
```
mmdetection
├── mmdet
├── tools
├── configs
├── data
│ ├── WIDERFace
│ │ ├── WIDER_train
│ | │ ├──0--Parade
│ | │ ├── ...
│ | │ ├── Annotations
│ │ ├── WIDER_val
│ | │ ├──0--Parade
│ | │ ├── ...
│ | │ ├── Annotations
│ │ ├── val.txt
│ │ ├── train.txt
```
After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or
create your own config based on the presented one.
# model settings
input_size = 300
model = dict(
type='SingleStageDetector',
pretrained='open-mmlab://vgg16_caffe',
backbone=dict(
type='SSDVGG',
input_size=input_size,
depth=16,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20),
neck=None,
bbox_head=dict(
type='SSDHead',
input_size=input_size,
in_channels=(512, 1024, 512, 256, 256, 256),
num_classes=2,
anchor_strides=(8, 16, 32, 64, 100, 300),
basesize_ratio_range=(0.15, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False)
test_cfg = dict(
nms=dict(type='nms', iou_thr=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200)
# model training and testing settings
# dataset settings
dataset_type = 'WIDERFaceDataset'
data_root = 'data/WIDERFace/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
data = dict(
imgs_per_gpu=60,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'train.txt',
],
img_prefix=[data_root + 'WIDER_train/'],
img_scale=(300, 300),
min_size=17, # throw away very small faces to improve training,
# because 300x300 is too low resolution to detect them
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=True,
test_mode=False,
extra_aug=dict(
photo_metric_distortion=dict(
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
expand=dict(
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
random_crop=dict(
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
resize_keep_ratio=False)),
val=dict(
type=dataset_type,
ann_file=data_root + '/val.txt',
img_prefix=data_root + 'WIDER_val/',
img_scale=(300, 300),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False),
test=dict(
type=dataset_type,
ann_file=data_root + '/val.txt',
img_prefix=data_root + 'WIDER_val/',
img_scale=(300, 300),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 3,
step=[16, 20])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd300_wider'
load_from = None
resume_from = None
workflow = [('train', 1)]
import mmcv import mmcv
def wider_face_classes():
return ['face']
def voc_classes(): def voc_classes():
return [ return [
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
...@@ -82,7 +86,8 @@ dataset_aliases = { ...@@ -82,7 +86,8 @@ dataset_aliases = {
'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'], 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'], 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'], 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
'coco': ['coco', 'mscoco', 'ms_coco'] 'coco': ['coco', 'mscoco', 'ms_coco'],
'wider_face': ['WIDERFaceDataset', 'wider_face', 'WDIERFace']
} }
......
...@@ -2,6 +2,7 @@ from .custom import CustomDataset ...@@ -2,6 +2,7 @@ from .custom import CustomDataset
from .xml_style import XMLDataset from .xml_style import XMLDataset
from .coco import CocoDataset from .coco import CocoDataset
from .voc import VOCDataset from .voc import VOCDataset
from .wider_face import WIDERFaceDataset
from .loader import GroupSampler, DistributedGroupSampler, build_dataloader from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann, get_dataset from .utils import to_tensor, random_scale, show_ann, get_dataset
from .concat_dataset import ConcatDataset from .concat_dataset import ConcatDataset
...@@ -12,5 +13,5 @@ __all__ = [ ...@@ -12,5 +13,5 @@ __all__ = [
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',
'ExtraAugmentation' 'ExtraAugmentation', 'WIDERFaceDataset'
] ]
import os.path as osp
import xml.etree.ElementTree as ET
import mmcv
from .xml_style import XMLDataset
class WIDERFaceDataset(XMLDataset):
"""
Reader for the WIDER Face dataset in PASCAL VOC format.
Conversion scripts can be found in
https://github.com/sovrasov/wider-face-pascal-voc-annotations
"""
CLASSES = ('face',)
def __init__(self, **kwargs):
super(WIDERFaceDataset, self).__init__(**kwargs)
def load_annotations(self, ann_file):
img_infos = []
img_ids = mmcv.list_from_file(ann_file)
for img_id in img_ids:
filename = '{}.jpg'.format(img_id)
xml_path = osp.join(self.img_prefix, 'Annotations',
'{}.xml'.format(img_id))
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
folder = root.find('folder').text
img_infos.append(
dict(id=img_id, filename=osp.join(folder, filename),
width=width, height=height))
return img_infos
...@@ -9,9 +9,10 @@ from .custom import CustomDataset ...@@ -9,9 +9,10 @@ from .custom import CustomDataset
class XMLDataset(CustomDataset): class XMLDataset(CustomDataset):
def __init__(self, **kwargs): def __init__(self, min_size=None, **kwargs):
super(XMLDataset, self).__init__(**kwargs) super(XMLDataset, self).__init__(**kwargs)
self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
self.min_size = min_size
def load_annotations(self, ann_file): def load_annotations(self, ann_file):
img_infos = [] img_infos = []
...@@ -50,7 +51,14 @@ class XMLDataset(CustomDataset): ...@@ -50,7 +51,14 @@ class XMLDataset(CustomDataset):
int(bnd_box.find('xmax').text), int(bnd_box.find('xmax').text),
int(bnd_box.find('ymax').text) int(bnd_box.find('ymax').text)
] ]
if difficult: ignore = False
if self.min_size:
assert not self.test_mode
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
if w < self.min_size or h < self.min_size:
ignore = True
if difficult or ignore:
bboxes_ignore.append(bbox) bboxes_ignore.append(bbox)
labels_ignore.append(label) labels_ignore.append(label)
else: else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment