diff --git a/README.md b/README.md
index db83b3a680518bb0040bc843652ee7407e1ac440..79f2316859e84a3cb0e86bc0d55e1466cdeccfc7 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,7 @@ Results and models are available in the [Model zoo](MODEL_ZOO.md).
 | SSD                | ✗        | ✗        | ✗        | ✓        |
 | RetinaNet          | ✓        | ✓        | ☐        | ✗        |
 | Hybrid Task Cascade| ✓        | ✓        | ☐        | ✗        |
+| FCOS               | ✓        | ✓        | ☐        | ✗        |
 
 Other features
 - [x] DCNv2
diff --git a/configs/fcos/README.md b/configs/fcos/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e31b22af978c21216638ee4dd37288887e78c2c
--- /dev/null
+++ b/configs/fcos/README.md
@@ -0,0 +1,25 @@
+# FCOS: Fully Convolutional One-Stage Object Detection
+
+## Introduction
+
+```
+@article{tian2019fcos,
+  title={FCOS: Fully Convolutional One-Stage Object Detection},
+  author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
+  journal={arXiv preprint arXiv:1904.01355},
+  year={2019}
+}
+```
+
+## Results and Models
+
+| Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
+|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
+| R-50-FPN  | caffe   | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_1x-9f253a93.pth) |
+| R-50-FPN  | caffe   | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_2x-f7329d80.pth) |
+| R-101-FPN | caffe   | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_1x-e4889733.pth) |
+| R-101-FPN | caffe   | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_2x-42e6f62d.pth) |
+| X-101-64x4d-FPN | caffe   |2x | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_x101_64x4d_fpn_2x-a36c0872.pth) |
+
+**Notes:**
+- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
diff --git a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..41297fc0aa4dd0939839d7c9e6798d36f07d4b11
--- /dev/null
+++ b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
@@ -0,0 +1,124 @@
+# model settings
+model = dict(
+    type='FCOS',
+    pretrained='open-mmlab://resnet101_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=81,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128]))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    smoothl1_beta=0.11,
+    gamma=2.0,
+    alpha=0.25,
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=[(1333, 640), (1333, 800)],
+        multiscale_mode='value',
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+device_ids = range(4)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f9352c2c9df4514f5b43074ef5d956b2d5a309c
--- /dev/null
+++ b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
@@ -0,0 +1,125 @@
+# model settings
+model = dict(
+    type='FCOS',
+    pretrained='open-mmlab://resnext101_64x4d',
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=81,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128]))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    smoothl1_beta=0.11,
+    gamma=2.0,
+    alpha=0.25,
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=[(1333, 640), (1333, 800)],
+        multiscale_mode='value',
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py b/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd63ccfb2296d7d077aa8a35548f382eba71a560
--- /dev/null
+++ b/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py
@@ -0,0 +1,123 @@
+# model settings
+model = dict(
+    type='FCOS',
+    pretrained='open-mmlab://resnet50_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=81,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128]))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    smoothl1_beta=0.11,
+    gamma=2.0,
+    alpha=0.25,
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+device_ids = range(4)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fcos_r50_caffe_fpn_gn_1x_4gpu'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/mmdet/apis/train.py b/mmdet/apis/train.py
index 34b632624ca5e20cccd5394f967ca1cc1c1ad5d2..6732664b3b049ee10f4337d6bda946b947c4e789 100644
--- a/mmdet/apis/train.py
+++ b/mmdet/apis/train.py
@@ -39,9 +39,8 @@ def batch_processor(model, data, train_mode):
     losses = model(**data)
     loss, log_vars = parse_losses(losses)
 
-    outputs = dict(loss=loss,
-                   log_vars=log_vars,
-                   num_samples=len(data['img'].data))
+    outputs = dict(
+        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
 
     return outputs
 
@@ -135,10 +134,11 @@ def build_optimizer(model, optimizer_cfg):
 def _dist_train(model, dataset, cfg, validate=False):
     # prepare data loaders
     data_loaders = [
-        build_dataloader(dataset,
-                         cfg.data.imgs_per_gpu,
-                         cfg.data.workers_per_gpu,
-                         dist=True)
+        build_dataloader(
+            dataset,
+            cfg.data.imgs_per_gpu,
+            cfg.data.workers_per_gpu,
+            dist=True)
     ]
     # put model on gpus
     model = MMDistributedDataParallel(model.cuda())
@@ -174,11 +174,12 @@ def _dist_train(model, dataset, cfg, validate=False):
 def _non_dist_train(model, dataset, cfg, validate=False):
     # prepare data loaders
     data_loaders = [
-        build_dataloader(dataset,
-                         cfg.data.imgs_per_gpu,
-                         cfg.data.workers_per_gpu,
-                         cfg.gpus,
-                         dist=False)
+        build_dataloader(
+            dataset,
+            cfg.data.imgs_per_gpu,
+            cfg.data.workers_per_gpu,
+            cfg.gpus,
+            dist=False)
     ]
     # put model on gpus
     model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
diff --git a/mmdet/core/bbox/__init__.py b/mmdet/core/bbox/__init__.py
index 496bd7a26ad0d4278059e07510fffa945d8cd116..bcf6efda4772a44c1e63b45e443c87c19da20cd1 100644
--- a/mmdet/core/bbox/__init__.py
+++ b/mmdet/core/bbox/__init__.py
@@ -5,7 +5,8 @@ from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
                        CombinedSampler, SamplingResult)
 from .assign_sampling import build_assigner, build_sampler, assign_and_sample
 from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
-                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
+                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result,
+                         distance2bbox)
 from .bbox_target import bbox_target
 
 __all__ = [
@@ -14,5 +15,6 @@ __all__ = [
     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
-    'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target'
+    'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
+    'distance2bbox', 'bbox_target'
 ]
diff --git a/mmdet/core/bbox/transforms.py b/mmdet/core/bbox/transforms.py
index 0d8f6f44f20df5c019dc8ed9ea46c2eb6c411c66..580b9bdfb24d43e80600ad44a70cde6b8ccc58e3 100644
--- a/mmdet/core/bbox/transforms.py
+++ b/mmdet/core/bbox/transforms.py
@@ -154,3 +154,27 @@ def bbox2result(bboxes, labels, num_classes):
         bboxes = bboxes.cpu().numpy()
         labels = labels.cpu().numpy()
         return [bboxes[labels == i, :] for i in range(num_classes - 1)]
+
+
+def distance2bbox(points, distance, max_shape=None):
+    """Decode distance prediction to bounding box.
+
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        distance (Tensor): Distance from the given point to 4
+            boundaries (left, top, right, bottom).
+        max_shape (tuple): Shape of the image.
+
+    Returns:
+        Tensor: Decoded bboxes.
+    """
+    x1 = points[:, 0] - distance[:, 0]
+    y1 = points[:, 1] - distance[:, 1]
+    x2 = points[:, 0] + distance[:, 2]
+    y2 = points[:, 1] + distance[:, 3]
+    if max_shape is not None:
+        x1 = x1.clamp(min=0, max=max_shape[1] - 1)
+        y1 = y1.clamp(min=0, max=max_shape[0] - 1)
+        x2 = x2.clamp(min=0, max=max_shape[1] - 1)
+        y2 = y2.clamp(min=0, max=max_shape[0] - 1)
+    return torch.stack([x1, y1, x2, y2], -1)
diff --git a/mmdet/core/loss/__init__.py b/mmdet/core/loss/__init__.py
index 477906e5449cdda8d22c948626593c3232a5b0da..888051875f16073c1c46282ae4e08b7487fb2501 100644
--- a/mmdet/core/loss/__init__.py
+++ b/mmdet/core/loss/__init__.py
@@ -1,11 +1,12 @@
 from .losses import (
     weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
     sigmoid_focal_loss, py_sigmoid_focal_loss, weighted_sigmoid_focal_loss,
-    mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy)
+    mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy, iou_loss)
 
 __all__ = [
     'weighted_nll_loss', 'weighted_cross_entropy',
     'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
     'py_sigmoid_focal_loss', 'weighted_sigmoid_focal_loss',
-    'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 'accuracy'
+    'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 'accuracy',
+    'iou_loss'
 ]
diff --git a/mmdet/core/loss/losses.py b/mmdet/core/loss/losses.py
index 1c5bf70051b16d86c3b0027f3277e9167931ca02..e541ec47b43ea5ca7f13c106820b5d64b87bc259 100644
--- a/mmdet/core/loss/losses.py
+++ b/mmdet/core/loss/losses.py
@@ -2,6 +2,7 @@
 import torch
 import torch.nn.functional as F
 
+from ..bbox import bbox_overlaps
 from ...ops import sigmoid_focal_loss
 
 
@@ -127,3 +128,16 @@ def _expand_binary_labels(labels, label_weights, label_channels):
     bin_label_weights = label_weights.view(-1, 1).expand(
         label_weights.size(0), label_channels)
     return bin_labels, bin_label_weights
+
+
+def iou_loss(pred_bboxes, target_bboxes, reduction='mean'):
+    ious = bbox_overlaps(pred_bboxes, target_bboxes, is_aligned=True)
+    loss = -ious.log()
+
+    reduction_enum = F._Reduction.get_enum(reduction)
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    elif reduction_enum == 2:
+        return loss.sum()
diff --git a/mmdet/core/post_processing/bbox_nms.py b/mmdet/core/post_processing/bbox_nms.py
index 1f7c6f17e36af180c7dd78a48bf431a4ad85e226..01beecd43abc7641d43773b192f988a55f4295d9 100644
--- a/mmdet/core/post_processing/bbox_nms.py
+++ b/mmdet/core/post_processing/bbox_nms.py
@@ -3,7 +3,12 @@ import torch
 from mmdet.ops.nms import nms_wrapper
 
 
-def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
+def multiclass_nms(multi_bboxes,
+                   multi_scores,
+                   score_thr,
+                   nms_cfg,
+                   max_num=-1,
+                   score_factors=None):
     """NMS for multi-class bboxes.
 
     Args:
@@ -14,6 +19,8 @@ def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
         nms_thr (float): NMS IoU threshold
         max_num (int): if there are more than max_num bboxes after NMS,
             only top max_num will be kept.
+        score_factors (Tensor): The factors multiplied to scores before
+            applying NMS
 
     Returns:
         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
@@ -34,10 +41,13 @@ def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
         else:
             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
         _scores = multi_scores[cls_inds, i]
+        if score_factors is not None:
+            _scores *= score_factors[cls_inds]
         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
-        cls_labels = multi_bboxes.new_full(
-            (cls_dets.shape[0], ), i - 1, dtype=torch.long)
+        cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
+                                           i - 1,
+                                           dtype=torch.long)
         bboxes.append(cls_dets)
         labels.append(cls_labels)
     if bboxes:
diff --git a/mmdet/models/anchor_heads/__init__.py b/mmdet/models/anchor_heads/__init__.py
index 25f12087e06126d4169dac9f085776ad55224b2b..86877a2425f56f49a00c680f6d271992ee89dfd3 100644
--- a/mmdet/models/anchor_heads/__init__.py
+++ b/mmdet/models/anchor_heads/__init__.py
@@ -1,6 +1,7 @@
 from .anchor_head import AnchorHead
-from .rpn_head import RPNHead
+from .fcos_head import FCOSHead
 from .retina_head import RetinaHead
+from .rpn_head import RPNHead
 from .ssd_head import SSDHead
 
-__all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead']
+__all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead', 'FCOSHead']
diff --git a/mmdet/models/anchor_heads/fcos_head.py b/mmdet/models/anchor_heads/fcos_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..979177cb86ccfaa3f07b926d0cac5908323a3678
--- /dev/null
+++ b/mmdet/models/anchor_heads/fcos_head.py
@@ -0,0 +1,371 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+
+from mmdet.core import (sigmoid_focal_loss, iou_loss, multi_apply,
+                        multiclass_nms, distance2bbox)
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, Scale, ConvModule
+
+INF = 1e8
+
+
+@HEADS.register_module
+class FCOSHead(nn.Module):
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 feat_channels=256,
+                 stacked_convs=4,
+                 strides=(4, 8, 16, 32, 64),
+                 regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512),
+                                 (512, INF)),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)):
+        super(FCOSHead, self).__init__()
+
+        self.num_classes = num_classes
+        self.cls_out_channels = num_classes - 1
+        self.in_channels = in_channels
+        self.feat_channels = feat_channels
+        self.stacked_convs = stacked_convs
+        self.strides = strides
+        self.regress_ranges = regress_ranges
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+
+        self._init_layers()
+
+    def _init_layers(self):
+        self.cls_convs = nn.ModuleList()
+        self.reg_convs = nn.ModuleList()
+        for i in range(self.stacked_convs):
+            chn = self.in_channels if i == 0 else self.feat_channels
+            self.cls_convs.append(
+                ConvModule(
+                    chn,
+                    self.feat_channels,
+                    3,
+                    stride=1,
+                    padding=1,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    bias=self.norm_cfg is None))
+            self.reg_convs.append(
+                ConvModule(
+                    chn,
+                    self.feat_channels,
+                    3,
+                    stride=1,
+                    padding=1,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    bias=self.norm_cfg is None))
+        self.fcos_cls = nn.Conv2d(
+            self.feat_channels, self.cls_out_channels, 3, padding=1)
+        self.fcos_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)
+        self.fcos_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)
+
+        self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])
+
+    def init_weights(self):
+        for m in self.cls_convs:
+            normal_init(m.conv, std=0.01)
+        for m in self.reg_convs:
+            normal_init(m.conv, std=0.01)
+        bias_cls = bias_init_with_prob(0.01)
+        normal_init(self.fcos_cls, std=0.01, bias=bias_cls)
+        normal_init(self.fcos_reg, std=0.01)
+        normal_init(self.fcos_centerness, std=0.01)
+
+    def forward(self, feats):
+        return multi_apply(self.forward_single, feats, self.scales)
+
+    def forward_single(self, x, scale):
+        cls_feat = x
+        reg_feat = x
+
+        for cls_layer in self.cls_convs:
+            cls_feat = cls_layer(cls_feat)
+        cls_score = self.fcos_cls(cls_feat)
+        centerness = self.fcos_centerness(cls_feat)
+
+        for reg_layer in self.reg_convs:
+            reg_feat = reg_layer(reg_feat)
+        # scale the bbox_pred of different level
+        bbox_pred = scale(self.fcos_reg(reg_feat)).exp()
+        return cls_score, bbox_pred, centerness
+
+    def loss(self,
+             cls_scores,
+             bbox_preds,
+             centernesses,
+             gt_bboxes,
+             gt_labels,
+             img_metas,
+             cfg,
+             gt_bboxes_ignore=None):
+        assert len(cls_scores) == len(bbox_preds) == len(centernesses)
+        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+        all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
+                                           bbox_preds[0].device)
+        labels, bbox_targets = self.fcos_target(all_level_points, gt_bboxes,
+                                                gt_labels)
+
+        num_imgs = cls_scores[0].size(0)
+        # flatten cls_scores, bbox_preds and centerness
+        flatten_cls_scores = [
+            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)
+            for cls_score in cls_scores
+        ]
+        flatten_bbox_preds = [
+            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+            for bbox_pred in bbox_preds
+        ]
+        flatten_centerness = [
+            centerness.permute(0, 2, 3, 1).reshape(-1)
+            for centerness in centernesses
+        ]
+        flatten_cls_scores = torch.cat(flatten_cls_scores)
+        flatten_bbox_preds = torch.cat(flatten_bbox_preds)
+        flatten_centerness = torch.cat(flatten_centerness)
+        flatten_labels = torch.cat(labels)
+        flatten_bbox_targets = torch.cat(bbox_targets)
+        # repeat points to align with bbox_preds
+        flatten_points = torch.cat(
+            [points.repeat(num_imgs, 1) for points in all_level_points])
+
+        pos_inds = flatten_labels.nonzero().reshape(-1)
+        num_pos = len(pos_inds)
+        loss_cls = sigmoid_focal_loss(
+            flatten_cls_scores, flatten_labels, cfg.gamma, cfg.alpha,
+            'none').sum()[None] / (num_pos + num_imgs)  # avoid num_pos is 0
+
+        pos_bbox_preds = flatten_bbox_preds[pos_inds]
+        pos_bbox_targets = flatten_bbox_targets[pos_inds]
+        pos_centerness = flatten_centerness[pos_inds]
+        pos_centerness_targets = self.centerness_target(pos_bbox_targets)
+
+        if num_pos > 0:
+            pos_points = flatten_points[pos_inds]
+            pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds)
+            pos_decoded_target_preds = distance2bbox(pos_points,
+                                                     pos_bbox_targets)
+            # centerness weighted iou loss
+            loss_reg = ((iou_loss(
+                pos_decoded_bbox_preds,
+                pos_decoded_target_preds,
+                reduction='none') * pos_centerness_targets).sum() /
+                        pos_centerness_targets.sum())[None]
+            loss_centerness = F.binary_cross_entropy_with_logits(
+                pos_centerness, pos_centerness_targets, reduction='mean')[None]
+        else:
+            loss_reg = pos_bbox_preds.sum()[None]
+            loss_centerness = pos_centerness.sum()[None]
+
+        return dict(
+            loss_cls=loss_cls,
+            loss_reg=loss_reg,
+            loss_centerness=loss_centerness)
+
+    def get_bboxes(self,
+                   cls_scores,
+                   bbox_preds,
+                   centernesses,
+                   img_metas,
+                   cfg,
+                   rescale=None):
+        assert len(cls_scores) == len(bbox_preds)
+        num_levels = len(cls_scores)
+
+        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+        mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
+                                      bbox_preds[0].device)
+        result_list = []
+        for img_id in range(len(img_metas)):
+            cls_score_list = [
+                cls_scores[i][img_id].detach() for i in range(num_levels)
+            ]
+            bbox_pred_list = [
+                bbox_preds[i][img_id].detach() for i in range(num_levels)
+            ]
+            centerness_pred_list = [
+                centernesses[i][img_id].detach() for i in range(num_levels)
+            ]
+            img_shape = img_metas[img_id]['img_shape']
+            scale_factor = img_metas[img_id]['scale_factor']
+            det_bboxes = self.get_bboxes_single(
+                cls_score_list, bbox_pred_list, centerness_pred_list,
+                mlvl_points, img_shape, scale_factor, cfg, rescale)
+            result_list.append(det_bboxes)
+        return result_list
+
+    def get_bboxes_single(self,
+                          cls_scores,
+                          bbox_preds,
+                          centernesses,
+                          mlvl_points,
+                          img_shape,
+                          scale_factor,
+                          cfg,
+                          rescale=False):
+        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
+        mlvl_bboxes = []
+        mlvl_scores = []
+        mlvl_centerness = []
+        for cls_score, bbox_pred, centerness, points in zip(
+                cls_scores, bbox_preds, centernesses, mlvl_points):
+            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+            scores = cls_score.permute(1, 2, 0).reshape(
+                -1, self.cls_out_channels).sigmoid()
+            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()
+
+            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+            nms_pre = cfg.get('nms_pre', -1)
+            if nms_pre > 0 and scores.shape[0] > nms_pre:
+                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
+                _, topk_inds = max_scores.topk(nms_pre)
+                points = points[topk_inds, :]
+                bbox_pred = bbox_pred[topk_inds, :]
+                scores = scores[topk_inds, :]
+                centerness = centerness[topk_inds]
+            bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
+            mlvl_bboxes.append(bboxes)
+            mlvl_scores.append(scores)
+            mlvl_centerness.append(centerness)
+        mlvl_bboxes = torch.cat(mlvl_bboxes)
+        if rescale:
+            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+        mlvl_scores = torch.cat(mlvl_scores)
+        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+        mlvl_centerness = torch.cat(mlvl_centerness)
+        det_bboxes, det_labels = multiclass_nms(
+            mlvl_bboxes,
+            mlvl_scores,
+            cfg.score_thr,
+            cfg.nms,
+            cfg.max_per_img,
+            score_cofficient=mlvl_centerness)
+        return det_bboxes, det_labels
+
+    def get_points(self, featmap_sizes, dtype, device):
+        """Get points according to feature map sizes.
+
+        Args:
+            featmap_sizes (list[tuple]): Multi-level feature map sizes.
+            dtype (torch.dtype): Type of points.
+            device (torch.device): Device of points.
+
+        Returns:
+            tuple: points of each image.
+        """
+        mlvl_points = []
+        for i in range(len(featmap_sizes)):
+            mlvl_points.append(
+                self.get_points_single(featmap_sizes[i], self.strides[i],
+                                       dtype, device))
+        return mlvl_points
+
+    def get_points_single(self, featmap_size, stride, dtype, device):
+        h, w = featmap_size
+        x_range = torch.arange(
+            0, w * stride, stride, dtype=dtype, device=device)
+        y_range = torch.arange(
+            0, h * stride, stride, dtype=dtype, device=device)
+        y, x = torch.meshgrid(y_range, x_range)
+        points = torch.stack(
+            (x.reshape(-1), y.reshape(-1)), dim=-1) + stride // 2
+        return points
+
+    def fcos_target(self, points, gt_bboxes_list, gt_labels_list):
+        assert len(points) == len(self.regress_ranges)
+        num_levels = len(points)
+        # expand regress ranges to align with points
+        expanded_regress_ranges = [
+            points[i].new_tensor(self.regress_ranges[i])[None].expand_as(
+                points[i]) for i in range(num_levels)
+        ]
+        # concat all levels points and regress ranges
+        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)
+        concat_points = torch.cat(points, dim=0)
+        # get labels and bbox_targets of each image
+        labels_list, bbox_targets_list = multi_apply(
+            self.fcos_target_single,
+            gt_bboxes_list,
+            gt_labels_list,
+            points=concat_points,
+            regress_ranges=concat_regress_ranges)
+
+        # split to per img, per level
+        num_points = [center.size(0) for center in points]
+        labels_list = [labels.split(num_points, 0) for labels in labels_list]
+        bbox_targets_list = [
+            bbox_targets.split(num_points, 0)
+            for bbox_targets in bbox_targets_list
+        ]
+
+        # concat per level image
+        concat_lvl_labels = []
+        concat_lvl_bbox_targets = []
+        for i in range(num_levels):
+            concat_lvl_labels.append(
+                torch.cat([labels[i] for labels in labels_list]))
+            concat_lvl_bbox_targets.append(
+                torch.cat(
+                    [bbox_targets[i] for bbox_targets in bbox_targets_list]))
+        return concat_lvl_labels, concat_lvl_bbox_targets
+
+    def fcos_target_single(self, gt_bboxes, gt_labels, points, regress_ranges):
+        num_points = points.size(0)
+        num_gts = gt_labels.size(0)
+
+        areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (
+            gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)
+        # TODO: figure out why these two are different
+        # areas = areas[None].expand(num_points, num_gts)
+        areas = areas[None].repeat(num_points, 1)
+        regress_ranges = regress_ranges[:, None, :].expand(
+            num_points, num_gts, 2)
+        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)
+        xs, ys = points[:, 0], points[:, 1]
+        xs = xs[:, None].expand(num_points, num_gts)
+        ys = ys[:, None].expand(num_points, num_gts)
+
+        left = xs - gt_bboxes[..., 0]
+        right = gt_bboxes[..., 2] - xs
+        top = ys - gt_bboxes[..., 1]
+        bottom = gt_bboxes[..., 3] - ys
+        bbox_targets = torch.stack((left, top, right, bottom), -1)
+
+        # condition1: inside a gt bbox
+        inside_gt_bbox_mask = bbox_targets.min(-1)[0] > 0
+
+        # condition2: limit the regression range for each location
+        max_regress_distance = bbox_targets.max(-1)[0]
+        inside_regress_range = (
+            max_regress_distance >= regress_ranges[..., 0]) & (
+                max_regress_distance <= regress_ranges[..., 1])
+
+        # if there are still more than one objects for a location,
+        # we choose the one with minimal area
+        areas[inside_gt_bbox_mask == 0] = INF
+        areas[inside_regress_range == 0] = INF
+        min_area, min_area_inds = areas.min(dim=1)
+
+        labels = gt_labels[min_area_inds]
+        labels[min_area == INF] = 0
+        bbox_targets = bbox_targets[range(num_points), min_area_inds]
+
+        return labels, bbox_targets
+
+    def centerness_target(self, pos_bbox_targets):
+        # only calculate pos centerness targets, otherwise there may be nan
+        left_right = pos_bbox_targets[:, [0, 2]]
+        top_bottom = pos_bbox_targets[:, [1, 3]]
+        centerness_targets = (
+            left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * (
+                top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
+        return torch.sqrt(centerness_targets)
diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py
index d3653ec903cdc4b615678ba1391da3a1ae7755a6..824833d8823d924c75ebe68c3ffca4c80270a981 100644
--- a/mmdet/models/detectors/__init__.py
+++ b/mmdet/models/detectors/__init__.py
@@ -8,9 +8,10 @@ from .mask_rcnn import MaskRCNN
 from .cascade_rcnn import CascadeRCNN
 from .htc import HybridTaskCascade
 from .retinanet import RetinaNet
+from .fcos import FCOS
 
 __all__ = [
     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
-    'RetinaNet'
+    'RetinaNet', 'FCOS'
 ]
diff --git a/mmdet/models/detectors/fcos.py b/mmdet/models/detectors/fcos.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c3dce1b164c750f58f9fb2fc0e836a1dcc0e8d1
--- /dev/null
+++ b/mmdet/models/detectors/fcos.py
@@ -0,0 +1,16 @@
+from .single_stage import SingleStageDetector
+from ..registry import DETECTORS
+
+
+@DETECTORS.register_module
+class FCOS(SingleStageDetector):
+
+    def __init__(self,
+                 backbone,
+                 neck,
+                 bbox_head,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
+                                   test_cfg, pretrained)
diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py
index 7b33b69de7de8432c7b1e40ec737b1cdc63a28e8..6b8c862a223144566a748b8fd584a64e9274b85d 100644
--- a/mmdet/models/necks/fpn.py
+++ b/mmdet/models/necks/fpn.py
@@ -2,8 +2,8 @@ import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import xavier_init
 
-from ..utils import ConvModule
 from ..registry import NECKS
+from ..utils import ConvModule
 
 
 @NECKS.register_module
@@ -17,6 +17,7 @@ class FPN(nn.Module):
                  end_level=-1,
                  add_extra_convs=False,
                  extra_convs_on_inputs=True,
+                 relu_before_extra_convs=False,
                  conv_cfg=None,
                  norm_cfg=None,
                  activation=None):
@@ -27,6 +28,7 @@ class FPN(nn.Module):
         self.num_ins = len(in_channels)
         self.num_outs = num_outs
         self.activation = activation
+        self.relu_before_extra_convs = relu_before_extra_convs
 
         if end_level == -1:
             self.backbone_end_level = self.num_ins
@@ -127,6 +129,8 @@ class FPN(nn.Module):
                 else:
                     outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
                 for i in range(used_backbone_levels + 1, self.num_outs):
-                    # BUG: we should add relu before each extra conv
-                    outs.append(self.fpn_convs[i](outs[-1]))
+                    if self.relu_before_extra_convs:
+                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))
+                    else:
+                        outs.append(self.fpn_convs[i](outs[-1]))
         return tuple(outs)
diff --git a/mmdet/models/utils/__init__.py b/mmdet/models/utils/__init__.py
index c517b07aef65c74b9f2853cf96c388aac1531d79..f9215c5ade9757b812697d85704188d5a593e0d9 100644
--- a/mmdet/models/utils/__init__.py
+++ b/mmdet/models/utils/__init__.py
@@ -1,11 +1,12 @@
 from .conv_ws import conv_ws_2d, ConvWS2d
 from .conv_module import build_conv_layer, ConvModule
 from .norm import build_norm_layer
+from .scale import Scale
 from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
                           bias_init_with_prob)
 
 __all__ = [
     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
-    'kaiming_init', 'bias_init_with_prob'
+    'kaiming_init', 'bias_init_with_prob', 'Scale'
 ]
diff --git a/mmdet/models/utils/scale.py b/mmdet/models/utils/scale.py
new file mode 100644
index 0000000000000000000000000000000000000000..68c37cd4e99c50341869f8fc1efa85f94f016adb
--- /dev/null
+++ b/mmdet/models/utils/scale.py
@@ -0,0 +1,12 @@
+import torch
+import torch.nn as nn
+
+
+class Scale(nn.Module):
+
+    def __init__(self, scale=1.0):
+        super(Scale, self).__init__()
+        self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
+
+    def forward(self, x):
+        return x * self.scale