Skip to content
Snippets Groups Projects
Commit bfbd2223 authored by Kai Chen's avatar Kai Chen
Browse files

add custom datasets

parent 4990aae6
No related branches found
No related tags found
No related merge requests found
from .custom import CustomDataset
from .coco import CocoDataset
from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann
__all__ = [
'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann'
]
import os.path as osp
import mmcv
import numpy as np
from mmcv.parallel import DataContainer as DC
from pycocotools.coco import COCO
from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor)
from .utils import to_tensor, show_ann, random_scale
from .custom import CustomDataset
class CocoDataset(Dataset):
class CocoDataset(CustomDataset):
def __init__(self,
ann_file,
img_prefix,
img_scale,
img_norm_cfg,
size_divisor=None,
proposal_file=None,
num_max_proposals=1000,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True,
test_mode=False,
debug=False):
# path of the data file
def load_annotations(self, ann_file):
self.coco = COCO(ann_file)
# filter images with no annotation during training
if not test_mode:
self.img_ids, self.img_infos = self._filter_imgs()
else:
self.img_ids = self.coco.getImgIds()
self.img_infos = [
self.coco.loadImgs(idx)[0] for idx in self.img_ids
]
assert len(self.img_ids) == len(self.img_infos)
# get the mapping from original category ids to labels
self.cat_ids = self.coco.getCatIds()
self.cat2label = {
cat_id: i + 1
for i, cat_id in enumerate(self.cat_ids)
}
# prefix of images path
self.img_prefix = img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self.img_scales = img_scale if isinstance(img_scale,
list) else [img_scale]
assert mmcv.is_list_of(self.img_scales, tuple)
# color channel order and normalize configs
self.img_norm_cfg = img_norm_cfg
# proposals
# TODO: revise _filter_imgs to be more flexible
if proposal_file is not None:
self.proposals = mmcv.load(proposal_file)
ori_ids = self.coco.getImgIds()
sorted_idx = [ori_ids.index(id) for id in self.img_ids]
self.proposals = [self.proposals[idx] for idx in sorted_idx]
else:
self.proposals = None
self.num_max_proposals = num_max_proposals
# flip ratio
self.flip_ratio = flip_ratio
assert flip_ratio >= 0 and flip_ratio <= 1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self.size_divisor = size_divisor
# with crowd or not, False when using RetinaNet
self.with_crowd = with_crowd
# with mask or not
self.with_mask = with_mask
# with label is False for RPN
self.with_label = with_label
# in test mode or not
self.test_mode = test_mode
# debug mode or not
self.debug = debug
# set group flag for the sampler
self._set_group_flag()
# transforms
self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform()
self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor()
def __len__(self):
return len(self.img_ids)
def _filter_imgs(self, min_size=32):
"""Filter images too small or without ground truths."""
img_ids = list(set([_['image_id'] for _ in self.coco.anns.values()]))
valid_ids = []
cat_ids = self.coco.getCatIds()
self.cat2label = {cat_id: i + 1 for i, cat_id in enumerate(cat_ids)}
self.img_ids = self.coco.getImgIds()
img_infos = []
for i in img_ids:
for i in self.img_ids:
info = self.coco.loadImgs(i)[0]
if min(info['width'], info['height']) >= min_size:
valid_ids.append(i)
img_infos.append(info)
return valid_ids, img_infos
info['filename'] = info['file_name']
img_infos.append(info)
return img_infos
def _load_ann_info(self, idx):
img_id = self.img_ids[idx]
def get_ann_info(self, idx):
img_id = self.img_infos[idx]['id']
ann_ids = self.coco.getAnnIds(imgIds=img_id)
ann_info = self.coco.loadAnns(ann_ids)
return ann_info
return self._parse_ann_info(ann_info)
def _filter_imgs(self, min_size=32):
"""Filter images too small or without ground truths."""
valid_inds = []
ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
for i, img_info in enumerate(self.img_infos):
if self.img_ids[i] not in ids_with_ann:
continue
if min(img_info['width'], img_info['height']) >= min_size:
valid_inds.append(i)
return valid_inds
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
......@@ -172,158 +98,3 @@ class CocoDataset(Dataset):
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self.flag = np.zeros(len(self.img_ids), dtype=np.uint8)
for i in range(len(self.img_ids)):
img_info = self.img_infos[i]
if img_info['width'] / img_info['height'] > 1:
self.flag[i] = 1
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_img(idx)
while True:
img_info = self.img_infos[idx]
ann_info = self._load_ann_info(idx)
# load image
img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name']))
if self.debug:
show_ann(self.coco, img, ann_info)
# load proposals if necessary
if self.proposals is not None:
proposals = self.proposals[idx][:self.num_max_proposals]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if len(proposals) == 0:
idx = self._rand_another(idx)
continue
if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
if proposals.shape[1] == 5:
scores = proposals[:, 4, None]
proposals = proposals[:, :4]
else:
scores = None
ann = self._parse_ann_info(ann_info, self.with_mask)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore']
# skip the image if there is no valid gt bbox
if len(gt_bboxes) == 0:
idx = self._rand_another(idx)
continue
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip)
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape,
scale_factor, flip)
proposals = np.hstack(
[proposals, scores]) if scores is not None else proposals
gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
flip)
gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
scale_factor, flip)
if self.with_mask:
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
data = dict(
img=DC(to_tensor(img), stack=True),
img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None:
data['proposals'] = DC(to_tensor(proposals))
if self.with_label:
data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_masks'] = DC(gt_masks, cpu_only=True)
return data
def prepare_test_img(self, idx):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info = self.img_infos[idx]
img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name']))
if self.proposals is not None:
proposal = self.proposals[idx][:self.num_max_proposals]
if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposal.shape))
else:
proposal = None
def prepare_single(img, scale, flip, proposal=None):
_img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip)
_img = to_tensor(_img)
_img_meta = dict(
ori_shape=(img_info['height'], img_info['width'], 3),
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
if proposal is not None:
if proposal.shape[1] == 5:
score = proposal[:, 4, None]
proposal = proposal[:, :4]
else:
score = None
_proposal = self.bbox_transform(proposal, img_shape,
scale_factor, flip)
_proposal = np.hstack(
[_proposal, score]) if score is not None else _proposal
_proposal = to_tensor(_proposal)
else:
_proposal = None
return _img, _img_meta, _proposal
imgs = []
img_metas = []
proposals = []
for scale in self.img_scales:
_img, _img_meta, _proposal = prepare_single(
img, scale, False, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
if self.flip_ratio > 0:
_img, _img_meta, _proposal = prepare_single(
img, scale, True, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
data = dict(img=imgs, img_meta=img_metas)
if self.proposals is not None:
data['proposals'] = proposals
return data
import os.path as osp
import mmcv
import numpy as np
from mmcv.parallel import DataContainer as DC
from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor)
from .utils import to_tensor, random_scale
class CustomDataset(Dataset):
"""Custom dataset for detection.
Annotation format:
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray> (n, 4),
'labels': <np.ndarray> (n, ),
'bboxes_ignore': <np.ndarray> (k, 4),
'labels_ignore': <np.ndarray> (k, 4) (optional field)
}
},
...
]
The `ann` field is optional for testing.
"""
def __init__(self,
ann_file,
img_prefix,
img_scale,
img_norm_cfg,
size_divisor=None,
proposal_file=None,
num_max_proposals=1000,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True,
test_mode=False):
# load annotations (and proposals)
self.img_infos = self.load_annotations(ann_file)
if proposal_file is not None:
self.proposals = self.load_proposals(proposal_file)
else:
self.proposals = None
# filter images with no annotation during training
if not test_mode:
valid_inds = self._filter_imgs()
self.img_infos = [self.img_infos[i] for i in valid_inds]
if self.proposals is not None:
self.proposals = [self.proposals[i] for i in valid_inds]
# prefix of images path
self.img_prefix = img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self.img_scales = img_scale if isinstance(img_scale,
list) else [img_scale]
assert mmcv.is_list_of(self.img_scales, tuple)
# normalization configs
self.img_norm_cfg = img_norm_cfg
# max proposals per image
self.num_max_proposals = num_max_proposals
# flip ratio
self.flip_ratio = flip_ratio
assert flip_ratio >= 0 and flip_ratio <= 1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self.size_divisor = size_divisor
# with mask or not (reserved field, takes no effect)
self.with_mask = with_mask
# some datasets provide bbox annotations as ignore/crowd/difficult,
# if `with_crowd` is True, then these info is returned.
self.with_crowd = with_crowd
# with label is False for RPN
self.with_label = with_label
# in test mode or not
self.test_mode = test_mode
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# transforms
self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform()
self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor()
def __len__(self):
return len(self.img_infos)
def load_annotations(self, ann_file):
return mmcv.load(ann_file)
def load_proposals(self, proposal_file):
return mmcv.load(proposal_file)
def get_ann_info(self, idx):
return self.img_infos[idx]['ann']
def _filter_imgs(self, min_size=32):
"""Filter images too small."""
valid_inds = []
for i, img_info in enumerate(self.img_infos):
if min(img_info['width'], img_info['height']) >= min_size:
valid_inds.append(i)
return valid_inds
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
for i in range(len(self)):
img_info = self.img_infos[i]
if img_info['width'] / img_info['height'] > 1:
self.flag[i] = 1
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_img(idx)
while True:
data = self.prepare_train_img(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_img(self, idx):
img_info = self.img_infos[idx]
# load image
img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
# load proposals if necessary
if self.proposals is not None:
proposals = self.proposals[idx][:self.num_max_proposals]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if len(proposals) == 0:
return None
if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
if proposals.shape[1] == 5:
scores = proposals[:, 4, None]
proposals = proposals[:, :4]
else:
scores = None
ann = self.get_ann_info(idx)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
if self.with_crowd:
gt_bboxes_ignore = ann['bboxes_ignore']
# skip the image if there is no valid gt bbox
if len(gt_bboxes) == 0:
return None
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip)
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip)
proposals = np.hstack(
[proposals, scores]) if scores is not None else proposals
gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
flip)
if self.with_crowd:
gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
scale_factor, flip)
if self.with_mask:
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
data = dict(
img=DC(to_tensor(img), stack=True),
img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None:
data['proposals'] = DC(to_tensor(proposals))
if self.with_label:
data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_masks'] = DC(gt_masks, cpu_only=True)
return data
def prepare_test_img(self, idx):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info = self.img_infos[idx]
img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
if self.proposals is not None:
proposal = self.proposals[idx][:self.num_max_proposals]
if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposal.shape))
else:
proposal = None
def prepare_single(img, scale, flip, proposal=None):
_img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip)
_img = to_tensor(_img)
_img_meta = dict(
ori_shape=(img_info['height'], img_info['width'], 3),
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
if proposal is not None:
if proposal.shape[1] == 5:
score = proposal[:, 4, None]
proposal = proposal[:, :4]
else:
score = None
_proposal = self.bbox_transform(proposal, img_shape,
scale_factor, flip)
_proposal = np.hstack(
[_proposal, score]) if score is not None else _proposal
_proposal = to_tensor(_proposal)
else:
_proposal = None
return _img, _img_meta, _proposal
imgs = []
img_metas = []
proposals = []
for scale in self.img_scales:
_img, _img_meta, _proposal = prepare_single(
img, scale, False, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
if self.flip_ratio > 0:
_img, _img_meta, _proposal = prepare_single(
img, scale, True, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
data = dict(img=imgs, img_meta=img_metas)
if self.proposals is not None:
data['proposals'] = proposals
return data
\ No newline at end of file
import argparse
import os.path as osp
import xml.etree.ElementTree as ET
import mmcv
import numpy as np
from mmdet.core import voc_classes
label_ids = {name: i + 1 for i, name in enumerate(voc_classes())}
def parse_xml(args):
xml_path, img_path = args
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
bboxes = []
labels = []
bboxes_ignore = []
labels_ignore = []
for obj in root.findall('object'):
name = obj.find('name').text
label = label_ids[name]
difficult = int(obj.find('difficult').text)
bnd_box = obj.find('bndbox')
bbox = [
int(bnd_box.find('xmin').text),
int(bnd_box.find('ymin').text),
int(bnd_box.find('xmax').text),
int(bnd_box.find('ymax').text)
]
if difficult:
bboxes_ignore.append(bbox)
labels_ignore.append(label)
else:
bboxes.append(bbox)
labels.append(label)
if not bboxes:
bboxes = np.zeros((0, 4))
labels = np.zeros((0, ))
else:
bboxes = np.array(bboxes, ndmin=2) - 1
labels = np.array(labels)
if not bboxes_ignore:
bboxes_ignore = np.zeros((0, 4))
labels_ignore = np.zeros((0, ))
else:
bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
labels_ignore = np.array(labels_ignore)
annotation = {
'filename': img_path,
'width': w,
'height': h,
'ann': {
'bboxes': bboxes.astype(np.float32),
'labels': labels.astype(np.int64),
'bboxes_ignore': bboxes_ignore.astype(np.float32),
'labels_ignore': labels_ignore.astype(np.int64)
}
}
return annotation
def cvt_annotations(devkit_path, years, split, out_file):
if not isinstance(years, list):
years = [years]
annotations = []
for year in years:
filelist = osp.join(devkit_path, 'VOC{}/ImageSets/Main/{}.txt'.format(
year, split))
if not osp.isfile(filelist):
print('filelist does not exist: {}, skip voc{} {}'.format(
filelist, year, split))
return
img_names = mmcv.list_from_file(filelist)
xml_paths = [
osp.join(devkit_path, 'VOC{}/Annotations/{}.xml'.format(
year, img_name)) for img_name in img_names
]
img_paths = [
'VOC{}/JPEGImages/{}.jpg'.format(year, img_name)
for img_name in img_names
]
part_annotations = mmcv.track_progress(parse_xml,
list(zip(xml_paths, img_paths)))
annotations.extend(part_annotations)
mmcv.dump(annotations, out_file)
return annotations
def parse_args():
parser = argparse.ArgumentParser(
description='Convert PASCAL VOC annotations to mmdetection format')
parser.add_argument('devkit_path', help='pascal voc devkit path')
parser.add_argument('-o', '--out-dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
devkit_path = args.devkit_path
out_dir = args.out_dir if args.out_dir else devkit_path
mmcv.mkdir_or_exist(out_dir)
years = []
if osp.isdir(osp.join(devkit_path, 'VOC2007')):
years.append('2007')
if osp.isdir(osp.join(devkit_path, 'VOC2012')):
years.append('2012')
if '2007' in years and '2012' in years:
years.append(['2007', '2012'])
if not years:
raise IOError('The devkit path {} contains neither "VOC2007" nor '
'"VOC2012" subfolder'.format(devkit_path))
for year in years:
if year == '2007':
prefix = 'voc07'
elif year == '2012':
prefix = 'voc12'
elif year == ['2007', '2012']:
prefix = 'voc0712'
for split in ['train', 'val', 'trainval']:
dataset_name = prefix + '_' + split
print('processing {} ...'.format(dataset_name))
cvt_annotations(devkit_path, year, split,
osp.join(out_dir, dataset_name + '.pkl'))
if not isinstance(year, list):
dataset_name = prefix + '_test'
print('processing {} ...'.format(dataset_name))
cvt_annotations(devkit_path, year, 'test',
osp.join(out_dir, dataset_name + '.pkl'))
print('Done!')
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment