Skip to content
Snippets Groups Projects
Commit eacb3f9e authored by Claudio Gheller's avatar Claudio Gheller
Browse files

bug fix in the CUDA implementation. nbucket defined to reduce the number of blocks

parent 9d47e94e
No related branches found
No related tags found
No related merge requests found
......@@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis,
double dwnorm,
int xaxistot,
int yaxistot,
double resolution)
double resolution,
int nbucket)
{
long gid = blockIdx.x*blockDim.x + threadIdx.x;
double add_term_real;
double add_term_img;
double wterm;
long arraysize = xaxis*yaxis*num_w_planes;
long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1);
if(gid < arraysize)
{
int iw = (int)(gid/(xaxis*yaxis));
int iv = (int)((gid%(xaxis*yaxis))/xaxis);
int iu = (iv%yaxis);
long index = 2*gid;
long gid_aux = nbucket*gid;
for(int iaux=0; iaux<nbucket; iaux++)
{
int iw = gid_aux/(xaxis*yaxis);
int ivaux = gid_aux%(xaxis*yaxis);
int iv = ivaux/xaxis;
int iu = ivaux%xaxis;
long index = 2*gid_aux;
long img_index = iu+iv*xaxis;
wterm = wmin + iw*dw;
......@@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis,
atomicAdd(&(image_real[img_index]),gridss[index]);
atomicAdd(&(image_imag[img_index]),gridss[index+1]);
#endif // end of PHASE_ON
gid_aux++;
}
}
}
......@@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#ifdef __CUDACC__
int nbucket = 32;
int Nth = NTHREADS;
long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1;
long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
......@@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
dwnorm,
xaxistot,
yaxistot,
resolution);
resolution,
nbucket);
mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
......
......@@ -94,8 +94,8 @@ int main(int argc, char * argv[])
double resolution;
// MESH SIZE
int grid_size_x = 256;
int grid_size_y = 256;
int grid_size_x = 2048;
int grid_size_y = 2048;
int local_grid_size_x;// = 8;
int local_grid_size_y;// = 8;
int xaxis;
......@@ -153,9 +153,9 @@ int main(int argc, char * argv[])
// INPUT FILES (only the first ndatasets entries are used)
int ndatasets = 1;
strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
//strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
strcpy(datapath,datapath_multi[0]);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment