diff --git a/phase_correction.cu b/phase_correction.cu index b5814718a480f7375b0e5d1b71e3a6b4b69403b7..d43e4cfa4f5639c7a12f9e6bc0b1ae922f1cbb27 100644 --- a/phase_correction.cu +++ b/phase_correction.cu @@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis, double dwnorm, int xaxistot, int yaxistot, - double resolution) + double resolution, + int nbucket) { long gid = blockIdx.x*blockDim.x + threadIdx.x; double add_term_real; double add_term_img; double wterm; - long arraysize = xaxis*yaxis*num_w_planes; + long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1); if(gid < arraysize) { - int iw = (int)(gid/(xaxis*yaxis)); - int iv = (int)((gid%(xaxis*yaxis))/xaxis); - int iu = (iv%yaxis); - long index = 2*gid; + long gid_aux = nbucket*gid; + for(int iaux=0; iaux<nbucket; iaux++) + { + int iw = gid_aux/(xaxis*yaxis); + int ivaux = gid_aux%(xaxis*yaxis); + int iv = ivaux/xaxis; + int iu = ivaux%xaxis; + long index = 2*gid_aux; long img_index = iu+iv*xaxis; wterm = wmin + iw*dw; @@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis, atomicAdd(&(image_real[img_index]),gridss[index]); atomicAdd(&(image_imag[img_index]),gridss[index+1]); #endif // end of PHASE_ON - + gid_aux++; + } } } @@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in #ifdef __CUDACC__ + int nbucket = 32; int Nth = NTHREADS; - long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1; + long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); @@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in dwnorm, xaxistot, yaxistot, - resolution); + resolution, + nbucket); mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 07f25becd5e607419fcb767ca57d4b4c9e97f714..15807158c84387b53e8bef5f40926f6604a0b01c 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -94,8 +94,8 @@ int main(int argc, char * argv[]) double resolution; // MESH SIZE - int grid_size_x = 256; - int grid_size_y = 256; + int grid_size_x = 2048; + int grid_size_y = 2048; int local_grid_size_x;// = 8; int local_grid_size_y;// = 8; int xaxis; @@ -153,9 +153,9 @@ int main(int argc, char * argv[]) // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; - strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); + //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); - //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); + strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]);