diff --git a/mmdet/core/mask/mask_target.py b/mmdet/core/mask/mask_target.py
index 0acbc0033e1c43069f21ffdbdbbbaaf49f48c199..423f5ef9539bc9b0303c7467cb973c9f820550e7 100644
--- a/mmdet/core/mask/mask_target.py
+++ b/mmdet/core/mask/mask_target.py
@@ -1,6 +1,7 @@
 import mmcv
 import numpy as np
 import torch
+from torch.nn.modules.utils import _pair
 
 
 def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
@@ -13,7 +14,7 @@ def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 
 
 def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
-    mask_size = cfg.mask_size
+    mask_size = _pair(cfg.mask_size)
     num_pos = pos_proposals.size(0)
     mask_targets = []
     if num_pos > 0:
@@ -26,11 +27,12 @@ def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
             w = np.maximum(x2 - x1 + 1, 1)
             h = np.maximum(y2 - y1 + 1, 1)
             # mask is uint8 both before and after resizing
+            # mask_size (h, w) to (w, h)
             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
-                                   (mask_size, mask_size))
+                                   mask_size[::-1])
             mask_targets.append(target)
         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
             pos_proposals.device)
     else:
-        mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
+        mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
     return mask_targets
diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py
index 1a155f9ee2acd459518cf4597ac77447550a3791..5c54828c393d55f97215385093b1311e45e01840 100644
--- a/mmdet/models/bbox_heads/bbox_head.py
+++ b/mmdet/models/bbox_heads/bbox_head.py
@@ -1,6 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch.nn.modules.utils import _pair
 
 from mmdet.core import (auto_fp16, bbox_target, delta2bbox, force_fp32,
                         multiclass_nms)
@@ -35,7 +36,8 @@ class BBoxHead(nn.Module):
         self.with_avg_pool = with_avg_pool
         self.with_cls = with_cls
         self.with_reg = with_reg
-        self.roi_feat_size = roi_feat_size
+        self.roi_feat_size = _pair(roi_feat_size)
+        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
         self.in_channels = in_channels
         self.num_classes = num_classes
         self.target_means = target_means
@@ -48,9 +50,9 @@ class BBoxHead(nn.Module):
 
         in_channels = self.in_channels
         if self.with_avg_pool:
-            self.avg_pool = nn.AvgPool2d(roi_feat_size)
+            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
         else:
-            in_channels *= (self.roi_feat_size * self.roi_feat_size)
+            in_channels *= self.roi_feat_area
         if self.with_cls:
             self.fc_cls = nn.Linear(in_channels, num_classes)
         if self.with_reg:
diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py
index 801cf8f15f2f47e9f67d9f79d663e9a97a437318..f6659b32d85ba89d305dc2202a035cfa5a130736 100644
--- a/mmdet/models/bbox_heads/convfc_bbox_head.py
+++ b/mmdet/models/bbox_heads/convfc_bbox_head.py
@@ -67,9 +67,9 @@ class ConvFCBBoxHead(BBoxHead):
 
         if self.num_shared_fcs == 0 and not self.with_avg_pool:
             if self.num_cls_fcs == 0:
-                self.cls_last_dim *= (self.roi_feat_size * self.roi_feat_size)
+                self.cls_last_dim *= self.roi_feat_area
             if self.num_reg_fcs == 0:
-                self.reg_last_dim *= (self.roi_feat_size * self.roi_feat_size)
+                self.reg_last_dim *= self.roi_feat_area
 
         self.relu = nn.ReLU(inplace=True)
         # reconstruct fc_cls and fc_reg since input channels are changed
@@ -112,7 +112,7 @@ class ConvFCBBoxHead(BBoxHead):
             # for separated branches, also consider self.num_shared_fcs
             if (is_shared
                     or self.num_shared_fcs == 0) and not self.with_avg_pool:
-                last_layer_dim *= (self.roi_feat_size * self.roi_feat_size)
+                last_layer_dim *= self.roi_feat_area
             for i in range(num_branch_fcs):
                 fc_in_channels = (
                     last_layer_dim if i == 0 else self.fc_out_channels)
diff --git a/mmdet/models/bbox_heads/double_bbox_head.py b/mmdet/models/bbox_heads/double_bbox_head.py
index c02039bee9ed2b5bd08b78249481d41c0f4de605..a2934a0bfe2d3da88d795e963876e1102d87b487 100644
--- a/mmdet/models/bbox_heads/double_bbox_head.py
+++ b/mmdet/models/bbox_heads/double_bbox_head.py
@@ -134,8 +134,8 @@ class DoubleConvFCBBoxHead(BBoxHead):
         branch_fcs = nn.ModuleList()
         for i in range(self.num_fcs):
             fc_in_channels = (
-                self.in_channels * self.roi_feat_size *
-                self.roi_feat_size if i == 0 else self.fc_out_channels)
+                self.in_channels *
+                self.roi_feat_area if i == 0 else self.fc_out_channels)
             branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
         return branch_fcs
 
diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py
index 96ebd11a4e8dde654ef655ae4f7ecc75d59bd23a..f25aa172855bc8c0f8f29de60c6ea711714108de 100644
--- a/mmdet/models/mask_heads/fcn_mask_head.py
+++ b/mmdet/models/mask_heads/fcn_mask_head.py
@@ -3,6 +3,7 @@ import numpy as np
 import pycocotools.mask as mask_util
 import torch
 import torch.nn as nn
+from torch.nn.modules.utils import _pair
 
 from mmdet.core import auto_fp16, force_fp32, mask_target
 from ..builder import build_loss
@@ -33,7 +34,8 @@ class FCNMaskHead(nn.Module):
                 'Invalid upsample method {}, accepted methods '
                 'are "deconv", "nearest", "bilinear"'.format(upsample_method))
         self.num_convs = num_convs
-        self.roi_feat_size = roi_feat_size  # WARN: not used and reserved
+        # WARN: roi_feat_size is reserved and not used
+        self.roi_feat_size = _pair(roi_feat_size)
         self.in_channels = in_channels
         self.conv_kernel_size = conv_kernel_size
         self.conv_out_channels = conv_out_channels
diff --git a/mmdet/models/mask_heads/grid_head.py b/mmdet/models/mask_heads/grid_head.py
index 39e69b33a62d9486587c45802815885b707a7eaf..72065309b6129b6a01f21c3eb8e681bdc91d649b 100644
--- a/mmdet/models/mask_heads/grid_head.py
+++ b/mmdet/models/mask_heads/grid_head.py
@@ -46,6 +46,8 @@ class GridHead(nn.Module):
             raise ValueError('grid_points must be a square number')
 
         # the predicted heatmap is half of whole_map_size
+        if not isinstance(self.roi_feat_size, int):
+            raise ValueError('Only square RoIs are supporeted in Grid R-CNN')
         self.whole_map_size = self.roi_feat_size * 4
 
         # compute point-wise sub-regions
diff --git a/mmdet/models/mask_heads/maskiou_head.py b/mmdet/models/mask_heads/maskiou_head.py
index 704978fddb0892dbae6ca51776b026310596107f..3c923680313215acac2ce19458c5510b60427e45 100644
--- a/mmdet/models/mask_heads/maskiou_head.py
+++ b/mmdet/models/mask_heads/maskiou_head.py
@@ -2,6 +2,7 @@ import numpy as np
 import torch
 import torch.nn as nn
 from mmcv.cnn import kaiming_init, normal_init
+from torch.nn.modules.utils import _pair
 
 from mmdet.core import force_fp32
 from ..builder import build_loss
@@ -47,10 +48,13 @@ class MaskIoUHead(nn.Module):
                     stride=stride,
                     padding=1))
 
+        roi_feat_size = _pair(roi_feat_size)
+        pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2)
         self.fcs = nn.ModuleList()
         for i in range(num_fcs):
-            in_channels = self.conv_out_channels * (
-                roi_feat_size // 2)**2 if i == 0 else self.fc_out_channels
+            in_channels = (
+                self.conv_out_channels *
+                pooled_area if i == 0 else self.fc_out_channels)
             self.fcs.append(nn.Linear(in_channels, self.fc_out_channels))
 
         self.fc_mask_iou = nn.Linear(self.fc_out_channels, self.num_classes)
diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py
index 503818ef368179cb0258645941725c3e52c9d4ac..a2ed5c56365c42340dbbe20de50a8160611cdd84 100644
--- a/mmdet/models/roi_extractors/single_level.py
+++ b/mmdet/models/roi_extractors/single_level.py
@@ -94,8 +94,8 @@ class SingleRoIExtractor(nn.Module):
         out_size = self.roi_layers[0].out_size
         num_levels = len(feats)
         target_lvls = self.map_roi_levels(rois, num_levels)
-        roi_feats = feats[0].new_zeros(rois.size()[0], self.out_channels,
-                                       out_size, out_size)
+        roi_feats = feats[0].new_zeros(
+            rois.size(0), self.out_channels, *out_size)
         if roi_scale_factor is not None:
             rois = self.roi_rescale(rois, roi_scale_factor)
         for i in range(num_levels):
diff --git a/mmdet/ops/roi_align/roi_align.py b/mmdet/ops/roi_align/roi_align.py
index a1fd3641213300050faf81d8033c7f40124266a7..a4cf24459a94854fc302ead679a94e8de4eca261 100644
--- a/mmdet/ops/roi_align/roi_align.py
+++ b/mmdet/ops/roi_align/roi_align.py
@@ -65,7 +65,7 @@ class RoIAlign(nn.Module):
                  use_torchvision=False):
         super(RoIAlign, self).__init__()
 
-        self.out_size = out_size
+        self.out_size = _pair(out_size)
         self.spatial_scale = float(spatial_scale)
         self.sample_num = int(sample_num)
         self.use_torchvision = use_torchvision
@@ -73,7 +73,7 @@ class RoIAlign(nn.Module):
     def forward(self, features, rois):
         if self.use_torchvision:
             from torchvision.ops import roi_align as tv_roi_align
-            return tv_roi_align(features, rois, _pair(self.out_size),
+            return tv_roi_align(features, rois, self.out_size,
                                 self.spatial_scale, self.sample_num)
         else:
             return roi_align(features, rois, self.out_size, self.spatial_scale,
diff --git a/mmdet/ops/roi_pool/roi_pool.py b/mmdet/ops/roi_pool/roi_pool.py
index 981e81d4e2afc7dc82ff714ccd3d285a8e6e2b22..26d900f780ecb0194df899ed7319b1555a2b2a84 100644
--- a/mmdet/ops/roi_pool/roi_pool.py
+++ b/mmdet/ops/roi_pool/roi_pool.py
@@ -55,14 +55,14 @@ class RoIPool(nn.Module):
     def __init__(self, out_size, spatial_scale, use_torchvision=False):
         super(RoIPool, self).__init__()
 
-        self.out_size = out_size
+        self.out_size = _pair(out_size)
         self.spatial_scale = float(spatial_scale)
         self.use_torchvision = use_torchvision
 
     def forward(self, features, rois):
         if self.use_torchvision:
             from torchvision.ops import roi_pool as tv_roi_pool
-            return tv_roi_pool(features, rois, _pair(self.out_size),
+            return tv_roi_pool(features, rois, self.out_size,
                                self.spatial_scale)
         else:
             return roi_pool(features, rois, self.out_size, self.spatial_scale)