diff --git a/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu b/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
index 80b73bc3e00163d78ff714f04b6502eefa3d9e3b..fd56016306baeeb935cdaa7e15ddc962e330390c 100644
--- a/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+++ b/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
@@ -73,10 +73,11 @@ using namespace at;
        i += blockDim.x * gridDim.x)
 
 const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
 
 inline int GET_BLOCKS(const int N)
 {
-  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
+  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
 }
 
 template <typename scalar_t>