From f92f64c2e4301208c91c2d3bfd5151f3584e45d9 Mon Sep 17 00:00:00 2001 From: Kai Chen <chenkaidev@gmail.com> Date: Wed, 7 Aug 2019 15:03:52 +0800 Subject: [PATCH] use int64_t instead of long in cuda kernels (#1131) --- .../masked_conv/src/masked_conv2d_kernel.cu | 23 +++++++++---------- .../src/sigmoid_focal_loss_cuda.cu | 14 ++++++----- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu b/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu index a0a949d..2312d12 100644 --- a/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu +++ b/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu @@ -18,9 +18,9 @@ __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, - const long *mask_h_idx, - const long *mask_w_idx, const int mask_cnt, - scalar_t *data_col) { + const int64_t *mask_h_idx, + const int64_t *mask_w_idx, + const int mask_cnt, scalar_t *data_col) { // mask_cnt * channels CUDA_1D_KERNEL_LOOP(index, n) { const int m_index = index % mask_cnt; @@ -59,8 +59,8 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height, AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] { const scalar_t *bottom_data_ = bottom_data.data<scalar_t>(); - const long *mask_h_idx_ = mask_h_idx.data<long>(); - const long *mask_w_idx_ = mask_w_idx.data<long>(); + const int64_t *mask_h_idx_ = mask_h_idx.data<int64_t>(); + const int64_t *mask_w_idx_ = mask_w_idx.data<int64_t>(); scalar_t *top_data_ = top_data.data<scalar_t>(); MaskedIm2colForward<scalar_t> <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( @@ -74,16 +74,15 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height, template <typename scalar_t> __global__ void MaskedCol2imForward(const int n, const scalar_t *data_col, const int height, const int width, - const int channels, const long *mask_h_idx, - const long *mask_w_idx, const int mask_cnt, - scalar_t *data_im) { + const int channels, + const int64_t *mask_h_idx, + const int64_t *mask_w_idx, + const int mask_cnt, scalar_t *data_im) { CUDA_1D_KERNEL_LOOP(index, n) { const int m_index = index % mask_cnt; const int h_im = mask_h_idx[m_index]; const int w_im = mask_w_idx[m_index]; const int c_im = index / mask_cnt; - // int kernel_extent_w = (kernel_w - 1) + 1; - // int kernel_extent_h = (kernel_h - 1) + 1; // compute the start and end of the output data_im[(c_im * height + h_im) * width + w_im] = data_col[index]; } @@ -99,8 +98,8 @@ int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height, AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] { const scalar_t *bottom_data_ = bottom_data.data<scalar_t>(); - const long *mask_h_idx_ = mask_h_idx.data<long>(); - const long *mask_w_idx_ = mask_w_idx.data<long>(); + const int64_t *mask_h_idx_ = mask_h_idx.data<int64_t>(); + const int64_t *mask_w_idx_ = mask_w_idx.data<int64_t>(); scalar_t *top_data_ = top_data.data<scalar_t>(); MaskedCol2imForward<scalar_t> diff --git a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu b/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu index c8db6df..6a9104c 100644 --- a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu +++ b/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu @@ -23,7 +23,7 @@ template <typename scalar_t> __global__ void SigmoidFocalLossForward(const int nthreads, const scalar_t *logits, - const long *targets, + const int64_t *targets, const int num_classes, const float gamma, const float alpha, const int num, scalar_t *losses) { @@ -60,7 +60,7 @@ __global__ void SigmoidFocalLossForward(const int nthreads, template <typename scalar_t> __global__ void SigmoidFocalLossBackward( - const int nthreads, const scalar_t *logits, const long *targets, + const int nthreads, const scalar_t *logits, const int64_t *targets, const scalar_t *d_losses, const int num_classes, const float gamma, const float alpha, const int num, scalar_t *d_logits) { CUDA_1D_KERNEL_LOOP(i, nthreads) { @@ -109,7 +109,8 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); auto losses_size = num_samples * logits.size(1); - dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L)); + dim3 grid( + std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096)); dim3 block(512); if (losses.numel() == 0) { @@ -121,7 +122,7 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, logits.scalar_type(), "SigmoidFocalLoss_forward", [&] { SigmoidFocalLossForward<scalar_t><<<grid, block>>>( losses_size, logits.contiguous().data<scalar_t>(), - targets.contiguous().data<long>(), num_classes, gamma, alpha, + targets.contiguous().data<int64_t>(), num_classes, gamma, alpha, num_samples, losses.data<scalar_t>()); }); THCudaCheck(cudaGetLastError()); @@ -147,7 +148,8 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); auto d_logits_size = num_samples * logits.size(1); - dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L)); + dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512), + (int64_t)4096)); dim3 block(512); if (d_logits.numel() == 0) { @@ -159,7 +161,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, logits.scalar_type(), "SigmoidFocalLoss_backward", [&] { SigmoidFocalLossBackward<scalar_t><<<grid, block>>>( d_logits_size, logits.contiguous().data<scalar_t>(), - targets.contiguous().data<long>(), + targets.contiguous().data<int64_t>(), d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha, num_samples, d_logits.data<scalar_t>()); }); -- GitLab