Skip to content
Snippets Groups Projects
Unverified Commit f92f64c2 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

use int64_t instead of long in cuda kernels (#1131)

parent bc94b212
No related branches found
No related tags found
No related merge requests found
......@@ -18,9 +18,9 @@ __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const long *mask_h_idx,
const long *mask_w_idx, const int mask_cnt,
scalar_t *data_col) {
const int64_t *mask_h_idx,
const int64_t *mask_w_idx,
const int mask_cnt, scalar_t *data_col) {
// mask_cnt * channels
CUDA_1D_KERNEL_LOOP(index, n) {
const int m_index = index % mask_cnt;
......@@ -59,8 +59,8 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] {
const scalar_t *bottom_data_ = bottom_data.data<scalar_t>();
const long *mask_h_idx_ = mask_h_idx.data<long>();
const long *mask_w_idx_ = mask_w_idx.data<long>();
const int64_t *mask_h_idx_ = mask_h_idx.data<int64_t>();
const int64_t *mask_w_idx_ = mask_w_idx.data<int64_t>();
scalar_t *top_data_ = top_data.data<scalar_t>();
MaskedIm2colForward<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
......@@ -74,16 +74,15 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
template <typename scalar_t>
__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col,
const int height, const int width,
const int channels, const long *mask_h_idx,
const long *mask_w_idx, const int mask_cnt,
scalar_t *data_im) {
const int channels,
const int64_t *mask_h_idx,
const int64_t *mask_w_idx,
const int mask_cnt, scalar_t *data_im) {
CUDA_1D_KERNEL_LOOP(index, n) {
const int m_index = index % mask_cnt;
const int h_im = mask_h_idx[m_index];
const int w_im = mask_w_idx[m_index];
const int c_im = index / mask_cnt;
// int kernel_extent_w = (kernel_w - 1) + 1;
// int kernel_extent_h = (kernel_h - 1) + 1;
// compute the start and end of the output
data_im[(c_im * height + h_im) * width + w_im] = data_col[index];
}
......@@ -99,8 +98,8 @@ int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height,
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] {
const scalar_t *bottom_data_ = bottom_data.data<scalar_t>();
const long *mask_h_idx_ = mask_h_idx.data<long>();
const long *mask_w_idx_ = mask_w_idx.data<long>();
const int64_t *mask_h_idx_ = mask_h_idx.data<int64_t>();
const int64_t *mask_w_idx_ = mask_w_idx.data<int64_t>();
scalar_t *top_data_ = top_data.data<scalar_t>();
MaskedCol2imForward<scalar_t>
......
......@@ -23,7 +23,7 @@
template <typename scalar_t>
__global__ void SigmoidFocalLossForward(const int nthreads,
const scalar_t *logits,
const long *targets,
const int64_t *targets,
const int num_classes,
const float gamma, const float alpha,
const int num, scalar_t *losses) {
......@@ -60,7 +60,7 @@ __global__ void SigmoidFocalLossForward(const int nthreads,
template <typename scalar_t>
__global__ void SigmoidFocalLossBackward(
const int nthreads, const scalar_t *logits, const long *targets,
const int nthreads, const scalar_t *logits, const int64_t *targets,
const scalar_t *d_losses, const int num_classes, const float gamma,
const float alpha, const int num, scalar_t *d_logits) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
......@@ -109,7 +109,8 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
auto losses_size = num_samples * logits.size(1);
dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L));
dim3 grid(
std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096));
dim3 block(512);
if (losses.numel() == 0) {
......@@ -121,7 +122,7 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
logits.scalar_type(), "SigmoidFocalLoss_forward", [&] {
SigmoidFocalLossForward<scalar_t><<<grid, block>>>(
losses_size, logits.contiguous().data<scalar_t>(),
targets.contiguous().data<long>(), num_classes, gamma, alpha,
targets.contiguous().data<int64_t>(), num_classes, gamma, alpha,
num_samples, losses.data<scalar_t>());
});
THCudaCheck(cudaGetLastError());
......@@ -147,7 +148,8 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
auto d_logits_size = num_samples * logits.size(1);
dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L));
dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512),
(int64_t)4096));
dim3 block(512);
if (d_logits.numel() == 0) {
......@@ -159,7 +161,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
logits.scalar_type(), "SigmoidFocalLoss_backward", [&] {
SigmoidFocalLossBackward<scalar_t><<<grid, block>>>(
d_logits_size, logits.contiguous().data<scalar_t>(),
targets.contiguous().data<long>(),
targets.contiguous().data<int64_t>(),
d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha,
num_samples, d_logits.data<scalar_t>());
});
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment