diff --git a/configs/cascade_mask_rcnn_r50_c4_1x.py b/configs/cascade_mask_rcnn_r50_c4_1x.py
index 653e2212896cedf1ed9cbf8055d7879b309d4b8a..cd50d75565f004d66442a628464bacf00268f0a9 100644
--- a/configs/cascade_mask_rcnn_r50_c4_1x.py
+++ b/configs/cascade_mask_rcnn_r50_c4_1x.py
@@ -11,7 +11,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     shared_head=dict(
@@ -21,7 +21,7 @@ model = dict(
         stride=2,
         dilation=1,
         style='caffe',
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True),
     rpn_head=dict(
         type='RPNHead',
diff --git a/configs/cascade_rcnn_r50_c4_1x.py b/configs/cascade_rcnn_r50_c4_1x.py
index e53a476fb92115e6911b526fdfaf8739d1b9819f..adc3818a887a4a6308d284b4f2e44bc15537fe53 100644
--- a/configs/cascade_rcnn_r50_c4_1x.py
+++ b/configs/cascade_rcnn_r50_c4_1x.py
@@ -11,7 +11,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     shared_head=dict(
@@ -21,7 +21,7 @@ model = dict(
         stride=2,
         dilation=1,
         style='caffe',
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True),
     rpn_head=dict(
         type='RPNHead',
diff --git a/configs/fast_rcnn_r50_c4_1x.py b/configs/fast_rcnn_r50_c4_1x.py
index 3d98c5c516abf19eb3424e6268a8d5ce1193ad4e..052e5780cf8e9b632adba3034054a052dec3c30a 100644
--- a/configs/fast_rcnn_r50_c4_1x.py
+++ b/configs/fast_rcnn_r50_c4_1x.py
@@ -10,7 +10,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     shared_head=dict(
@@ -20,7 +20,7 @@ model = dict(
         stride=2,
         dilation=1,
         style='caffe',
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True),
     bbox_roi_extractor=dict(
         type='SingleRoIExtractor',
diff --git a/configs/faster_rcnn_r50_c4_1x.py b/configs/faster_rcnn_r50_c4_1x.py
index c38e294534bf70356a5757b83f3dd9f6f996c4ce..900b2e8cfa09e5e8ad4556868560fbc4b054ecdf 100644
--- a/configs/faster_rcnn_r50_c4_1x.py
+++ b/configs/faster_rcnn_r50_c4_1x.py
@@ -10,7 +10,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     shared_head=dict(
@@ -20,7 +20,7 @@ model = dict(
         stride=2,
         dilation=1,
         style='caffe',
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True),
     rpn_head=dict(
         type='RPNHead',
diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py
index d494043964fb2bfe980c96a9a9ebee05e20451f6..83fc821ad3bdf65860dd0705252d6654276d7be7 100644
--- a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py
+++ b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py
@@ -1,6 +1,6 @@
 # model settings
 conv_cfg = dict(type='ConvWS')
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 model = dict(
     type='FasterRCNN',
     pretrained='open-mmlab://jhu/resnet50_gn_ws',
diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
index 2d98767dff15f4e958a850c76568bafc82289023..ed71cc1a95fb81651fc63cc7b74d4f6dedf8ec1e 100644
--- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
+++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
@@ -1,6 +1,6 @@
 # model settings
 conv_cfg = dict(type='ConvWS')
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 model = dict(
     type='MaskRCNN',
     pretrained='open-mmlab://jhu/resnet50_gn_ws',
diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py
index c28c6ed2310185fa34845df2c4b68b4b0484025c..799e2af83cf38daeafe7ef92d712beb17ec959c8 100644
--- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py
+++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py
@@ -1,6 +1,6 @@
 # model settings
 conv_cfg = dict(type='ConvWS')
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 model = dict(
     type='MaskRCNN',
     pretrained='open-mmlab://jhu/resnet50_gn_ws',
diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
index 8fdeaa07134b14079107663c54759e8e13349f8c..e0b2aa3154464662e06db87a9ab4fca38c507fde 100644
--- a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
+++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
@@ -1,6 +1,6 @@
 # model settings
 conv_cfg = dict(type='ConvWS')
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 model = dict(
     type='MaskRCNN',
     pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws',
diff --git a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py b/configs/gn/mask_rcnn_r101_fpn_gn_2x.py
index d15d1c0a2f19c06f06bc95149b67c87d159325f7..bc08720f16d1ccb70d8a79f1a9fb9ad37cbc4207 100644
--- a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py
+++ b/configs/gn/mask_rcnn_r101_fpn_gn_2x.py
@@ -1,5 +1,5 @@
 # model settings
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 
 model = dict(
     type='MaskRCNN',
diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_2x.py
index da07ce8d90e5f6e0849a58da82ce58b1b9c011c0..d19633ffaec434d6032962a556a8f7f0a146c0d0 100644
--- a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py
+++ b/configs/gn/mask_rcnn_r50_fpn_gn_2x.py
@@ -1,5 +1,5 @@
 # model settings
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 
 model = dict(
     type='MaskRCNN',
diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py
index bffb778bf763b0cfb68930ccf7e97ccac54aeb5c..eb3fa2be84c4ca7928ce626189e8a82ecd12b4e2 100644
--- a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py
+++ b/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py
@@ -1,5 +1,5 @@
 # model settings
-normalize = dict(type='GN', num_groups=32, frozen=False)
+normalize = dict(type='GN', num_groups=32, requires_grad=True)
 
 model = dict(
     type='MaskRCNN',
diff --git a/configs/mask_rcnn_r50_c4_1x.py b/configs/mask_rcnn_r50_c4_1x.py
index 6fe21420d3a05926ab2b4b00780d7df6997b7efd..4bdf36c27b530b9697158b0d994e9912c1c75629 100644
--- a/configs/mask_rcnn_r50_c4_1x.py
+++ b/configs/mask_rcnn_r50_c4_1x.py
@@ -10,7 +10,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     shared_head=dict(
@@ -20,7 +20,7 @@ model = dict(
         stride=2,
         dilation=1,
         style='caffe',
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True),
     rpn_head=dict(
         type='RPNHead',
diff --git a/configs/rpn_r50_c4_1x.py b/configs/rpn_r50_c4_1x.py
index 228d54a54e54e2c9a23f2ae0275f2182763df8c3..bc3d2a8f1f64b90f211bde8952ce7e631c4bf55e 100644
--- a/configs/rpn_r50_c4_1x.py
+++ b/configs/rpn_r50_c4_1x.py
@@ -10,7 +10,7 @@ model = dict(
         dilations=(1, 1, 1),
         out_indices=(2, ),
         frozen_stages=1,
-        normalize=dict(type='BN', frozen=True),
+        normalize=dict(type='BN', requires_grad=False),
         norm_eval=True,
         style='caffe'),
     neck=None,
diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py
index 2654030defb5f15add6a3c8596e3f54492cda4b5..c9daeef68c81213c2809d0c985e024a2953f2cc3 100644
--- a/mmdet/models/backbones/resnet.py
+++ b/mmdet/models/backbones/resnet.py
@@ -307,8 +307,8 @@ class ResNet(nn.Module):
         style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
             layer is the 3x3 conv layer, otherwise the stride-two layer is
             the first 1x1 conv layer.
-        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
-            not freezing any parameters.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
         normalize (dict): dictionary to construct and config norm layer.
         norm_eval (bool): Whether to set norm layers to eval mode, namely,
             freeze running stats (mean and var). Note: Effect on Batch Norm
@@ -336,7 +336,7 @@ class ResNet(nn.Module):
                  style='pytorch',
                  frozen_stages=-1,
                  conv_cfg=None,
-                 normalize=dict(type='BN', frozen=False),
+                 normalize=dict(type='BN', requires_grad=True),
                  norm_eval=True,
                  dcn=None,
                  stage_with_dcn=(False, False, False, False),
diff --git a/mmdet/models/shared_heads/res_layer.py b/mmdet/models/shared_heads/res_layer.py
index f42487eeb0ede2637f1ecfc5e29341c5bb3816fc..ea306e54559c42d768693357caddd649d9fa1980 100644
--- a/mmdet/models/shared_heads/res_layer.py
+++ b/mmdet/models/shared_heads/res_layer.py
@@ -17,7 +17,7 @@ class ResLayer(nn.Module):
                  stride=2,
                  dilation=1,
                  style='pytorch',
-                 normalize=dict(type='BN', frozen=False),
+                 normalize=dict(type='BN', requires_grad=True),
                  norm_eval=True,
                  with_cp=False,
                  dcn=None):
diff --git a/mmdet/models/utils/norm.py b/mmdet/models/utils/norm.py
index 35f0fe7ce53752a906f3decc4728efc11c6308ba..8658f6bafcd2caa148752a135dcc5309f3f3f2c5 100644
--- a/mmdet/models/utils/norm.py
+++ b/mmdet/models/utils/norm.py
@@ -17,15 +17,13 @@ def build_norm_layer(cfg, num_features, postfix=''):
         cfg (dict): cfg should contain:
             type (str): identify norm layer type.
             layer args: args needed to instantiate a norm layer.
-            frozen (bool): [optional] whether stop gradient updates
-                of norm layer, it is helpful to set frozen mode
-                in backbone's norms.
-        num_features (int): number of channels from input
-        postfix (int, str): appended into norm abbreation to
+            requires_grad (bool): [optional] whether stop gradient updates
+        num_features (int): number of channels from input.
+        postfix (int, str): appended into norm abbreviation to
             create named layer.
 
     Returns:
-        name (str): abbreation + postfix
+        name (str): abbreviation + postfix
         layer (nn.Module): created norm layer
     """
     assert isinstance(cfg, dict) and 'type' in cfg
@@ -42,7 +40,7 @@ def build_norm_layer(cfg, num_features, postfix=''):
     assert isinstance(postfix, (int, str))
     name = abbr + str(postfix)
 
-    frozen = cfg_.pop('frozen', False)
+    requires_grad = cfg_.pop('requires_grad', True)
     cfg_.setdefault('eps', 1e-5)
     if layer_type != 'GN':
         layer = norm_layer(num_features, **cfg_)
@@ -50,8 +48,7 @@ def build_norm_layer(cfg, num_features, postfix=''):
         assert 'num_groups' in cfg_
         layer = norm_layer(num_channels=num_features, **cfg_)
 
-    if frozen:
-        for param in layer.parameters():
-            param.requires_grad = False
+    for param in layer.parameters():
+        param.requires_grad = requires_grad
 
     return name, layer