Skip to content
Snippets Groups Projects
Commit eacb3f9e authored by Claudio Gheller's avatar Claudio Gheller
Browse files

bug fix in the CUDA implementation. nbucket defined to reduce the number of blocks

parent 9d47e94e
No related branches found
No related tags found
No related merge requests found
...@@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis, ...@@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis,
double dwnorm, double dwnorm,
int xaxistot, int xaxistot,
int yaxistot, int yaxistot,
double resolution) double resolution,
int nbucket)
{ {
long gid = blockIdx.x*blockDim.x + threadIdx.x; long gid = blockIdx.x*blockDim.x + threadIdx.x;
double add_term_real; double add_term_real;
double add_term_img; double add_term_img;
double wterm; double wterm;
long arraysize = xaxis*yaxis*num_w_planes; long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1);
if(gid < arraysize) if(gid < arraysize)
{ {
int iw = (int)(gid/(xaxis*yaxis)); long gid_aux = nbucket*gid;
int iv = (int)((gid%(xaxis*yaxis))/xaxis); for(int iaux=0; iaux<nbucket; iaux++)
int iu = (iv%yaxis); {
long index = 2*gid; int iw = gid_aux/(xaxis*yaxis);
int ivaux = gid_aux%(xaxis*yaxis);
int iv = ivaux/xaxis;
int iu = ivaux%xaxis;
long index = 2*gid_aux;
long img_index = iu+iv*xaxis; long img_index = iu+iv*xaxis;
wterm = wmin + iw*dw; wterm = wmin + iw*dw;
...@@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis, ...@@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis,
atomicAdd(&(image_real[img_index]),gridss[index]); atomicAdd(&(image_real[img_index]),gridss[index]);
atomicAdd(&(image_imag[img_index]),gridss[index+1]); atomicAdd(&(image_imag[img_index]),gridss[index+1]);
#endif // end of PHASE_ON #endif // end of PHASE_ON
gid_aux++;
}
} }
} }
...@@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in ...@@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#ifdef __CUDACC__ #ifdef __CUDACC__
int nbucket = 32;
int Nth = NTHREADS; int Nth = NTHREADS;
long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
...@@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in ...@@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
dwnorm, dwnorm,
xaxistot, xaxistot,
yaxistot, yaxistot,
resolution); resolution,
nbucket);
mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
......
...@@ -94,8 +94,8 @@ int main(int argc, char * argv[]) ...@@ -94,8 +94,8 @@ int main(int argc, char * argv[])
double resolution; double resolution;
// MESH SIZE // MESH SIZE
int grid_size_x = 256; int grid_size_x = 2048;
int grid_size_y = 256; int grid_size_y = 2048;
int local_grid_size_x;// = 8; int local_grid_size_x;// = 8;
int local_grid_size_y;// = 8; int local_grid_size_y;// = 8;
int xaxis; int xaxis;
...@@ -153,9 +153,9 @@ int main(int argc, char * argv[]) ...@@ -153,9 +153,9 @@ int main(int argc, char * argv[])
// INPUT FILES (only the first ndatasets entries are used) // INPUT FILES (only the first ndatasets entries are used)
int ndatasets = 1; int ndatasets = 1;
strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
//strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
strcpy(datapath,datapath_multi[0]); strcpy(datapath,datapath_multi[0]);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment