Skip to content
Snippets Groups Projects
Commit 4eb0affa authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Kaiser-Bessel kernel on GPU

parent 732d1d44
Branches
Tags RICKv2.0
No related merge requests found
...@@ -114,7 +114,13 @@ __global__ void convolve_g( ...@@ -114,7 +114,13 @@ __global__ void convolve_g(
int grid_size_x, int grid_size_x,
int grid_size_y, int grid_size_y,
double* grid, double* grid,
double std22) #if defined(GAUSS_HI_PRECISION)
double std22
#else
double std22,
double* convkernel
#endif
)
...@@ -296,11 +302,11 @@ void wstack( ...@@ -296,11 +302,11 @@ void wstack(
mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float));
mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float));
//mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); //mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
/*
#if !defined(GAUSS_HI_PRECISION) #if !defined(GAUSS_HI_PRECISION)
mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double)); mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double));
#endif #endif
*/
if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);}
//mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); //mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);}
...@@ -313,14 +319,15 @@ void wstack( ...@@ -313,14 +319,15 @@ void wstack(
mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
/*
#if !defined(GAUSS_HI_PRECISION) #if !defined(GAUSS_HI_PRECISION)
mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking); mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking);
#endif #endif
*/
if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);} if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);}
// Call main GPU Kernel // Call main GPU Kernel
#if defined(GAUSS_HI_PRECISION)
convolve_g <<<Nbl,Nth,0,stream_stacking>>> ( convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
num_w_planes, num_w_planes,
num_points, num_points,
...@@ -340,6 +347,29 @@ void wstack( ...@@ -340,6 +347,29 @@ void wstack(
grid, grid,
std22 std22
); );
#else
convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
num_w_planes,
num_points,
freq_per_chan,
polarizations,
uu_g,
vv_g,
ww_g,
vis_real_g,
vis_img_g,
weight_g,
dx,
dw,
KernelLen,
grid_size_x,
grid_size_y,
grid,
std22,
convkernel_g
);
#endif
mmm=cudaStreamSynchronize(stream_stacking); mmm=cudaStreamSynchronize(stream_stacking);
//Record the event //Record the event
...@@ -360,11 +390,11 @@ void wstack( ...@@ -360,11 +390,11 @@ void wstack(
mmm=cudaFree(vis_img_g); mmm=cudaFree(vis_img_g);
mmm=cudaFree(weight_g); mmm=cudaFree(weight_g);
//mmm=cudaFree(grid_g); //mmm=cudaFree(grid_g);
/*
#if !defined(GAUSS_HI_PRECISION) #if !defined(GAUSS_HI_PRECISION)
mmm=cudaFree(convkernel_g); mmm=cudaFree(convkernel_g);
#endif #endif
*/
// Switch between CUDA and GPU versions // Switch between CUDA and GPU versions
# else # else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment