diff --git a/Makefile b/Makefile index 9d6921cc0895d8139818e537a00c10cdf7d05f2e..165575e64fbf47d15af899c8ddd38c9bb67d845f 100644 --- a/Makefile +++ b/Makefile @@ -33,22 +33,18 @@ OPT += -DNVIDIA # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction -<<<<<<< HEAD OPT += -DPHASE_ON # GPU support for FFT using cuFFTMP OPT += -DCUDA_FFT -# Support CFITSIO -======= -#OPT += -DPHASE_ON # Support CFITSIO !!! Remember to add the path to the CFITSIO library to LD_LIBRARY_PATH ->>>>>>> main #OPT += -DFITSIO # Perform true parallel images writing #OPT += -DPARALLELIO # Normalize uvw in case it is not done in the binMS -OPT += -DNORMALIZE_UVW +#OPT += -DNORMALIZE_UVW # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL -OPT += -DGAUSS_HI_PRECISION +OPT += -DGAUSS +#OPT += -DVERBOSE ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio @@ -100,3 +96,5 @@ clean: rm *.o rm w-stacking.c rm phase_correction.c +# module load spectrum_mpi/10.3.1--binary && module load fftw && module load cuda/11.3 && module load profile/candidate && module load hpc-sdk/2022--binary && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/m100_work/IscrC_CD-DLS/cfitsio-3.49 \ +# && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cineca/prod/opt/compilers/hpc-sdk/2022/binary/Linux_ppc64le/2022/math_libs/11.8/lib64 && export LD_LIBRARY_PATH diff --git a/phase_correction.cu b/phase_correction.cu index 98bd6cc96939e214942e86cf947af32332841e40..1be47b2bffc7e39b164169c66f8b5f5ecabf2860 100644 --- a/phase_correction.cu +++ b/phase_correction.cu @@ -98,7 +98,7 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in // WARNING: nbucket MUST be chosen such that xaxis*yaxis*num_w_planes is a multiple of nbucket int nbucket = 1; int Nth = NTHREADS; - long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; + long Nbl = (long)((num_w_planes*xaxis*yaxis)/nbucket/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 70024c21be588c92273cdc3d47c17f689db920e3..3d274a6fcbe830e5e3a921dcba94dbb0ca958bb6 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -106,8 +106,8 @@ int main(int argc, char * argv[]) double resolution; // Mesh related parameters: global size - int grid_size_x = 4096; - int grid_size_y = 4096; + int grid_size_x = 8192; + int grid_size_y = 8192; // Split Mesh size (auto-calculated) int local_grid_size_x; int local_grid_size_y; @@ -115,7 +115,7 @@ int main(int argc, char * argv[]) int yaxis; // Number of planes in the w direction - int num_w_planes = 1; + int num_w_planes = 2; // Size of the convoutional kernel support int w_support = 7; @@ -131,23 +131,13 @@ int main(int argc, char * argv[]) // Initialize FITS image parameters -<<<<<<< HEAD #ifdef FITSIO fitsfile *fptreal; fitsfile *fptrimg; int status; - char testfitsreal[FILENAMELENGTH] = "parallel_np8_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np8_img.fits"; -======= - #ifdef FITSIO - fitsfile *fptreal; - fitsfile *fptrimg; - int status; - char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; - #endif ->>>>>>> main + char testfitsreal[FILENAMELENGTH] = "parallel_real.fits"; + char testfitsimag[FILENAMELENGTH] = "parallel_img.fits"; long naxis = 2; long naxes[2] = { grid_size_x, grid_size_y }; @@ -241,7 +231,9 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; - strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); +// strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); + strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); + strcpy(datapath,datapath_multi[0]); // Read metadata @@ -266,7 +258,7 @@ if(rank == 0){ // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int nsub = 1000; //int nsub = 10; - if(rank == 0)printf("Subtracting last %d measurements\n",nsub); + printf("Subtracting last %d measurements\n",nsub); Nmeasures = Nmeasures-nsub; Nvis = Nmeasures*freq_per_chan*polarisations; @@ -345,8 +337,8 @@ if(rank == 0){ double maxu = -1e20; double maxv = -1e20; double maxw = -1e20; - - for (long inorm=0; inorm<Nmeasures; inorm++) + + for (long inorm=0; inorm<Nmeasures; inorm++) { minu = MIN(minu,uu[inorm]); minv = MIN(minv,vv[inorm]); @@ -504,7 +496,7 @@ if(rank == 0){ for (int ifiles=0; ifiles<ndatasets; ifiles++) { strcpy(filename,datapath_multi[ifiles]); - if(rank == 0)printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); + printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); // Read metadata strcpy(filename,datapath); @@ -528,7 +520,7 @@ if(rank == 0){ resolution = 1.0/MAX(abs(uvmin),abs(uvmax)); // calculate the resolution in arcsec double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI; - if(rank == 0)printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); + printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); strcpy(filename,datapath); strcat(filename,weightsfile); diff --git a/w-stacking.cu b/w-stacking.cu index cbad05c30e47fdb45ff8c86e424f2f00967c5522..fa7ad81409e48ff9d639f83225f116e5dce4cd53 100644 --- a/w-stacking.cu +++ b/w-stacking.cu @@ -59,7 +59,7 @@ void makeKaiserBesselKernel(double * kernel, double overSamplingFactor, int withSinc) { int n = increaseprecision*KernelLen, mid = n / 2; - double * sincKernel = malloc((mid + 1)*sizeof(*sincKernel)); + double * sincKernel = (double*)malloc((mid + 1) * sizeof(*sincKernel)); const double filterRatio = 1.0 / overSamplingFactor; sincKernel[0] = filterRatio; for (int i = 1; i != mid + 1; i++) { @@ -209,7 +209,7 @@ void wstack( double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; - double * convkernel = malloc(increaseprecision*w_support*sizeof(*convkernel)); + double * convkernel = (double*)malloc(increaseprecision*w_support*sizeof(*convkernel)); double overSamplingFactor = 1.0; int withSinc = 0; double alpha = 8.6; @@ -220,7 +220,7 @@ void wstack( makeKaiserBesselKernel(convkernel, w_support, increaseprecision, alpha, overSamplingFactor, withSinc); #endif - + // Loop over visibilities. // Switch between CUDA and GPU versions #ifdef __CUDACC__ @@ -249,7 +249,10 @@ void wstack( mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); + if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); + if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} + mmm=cudaMemcpy(uu_g, uu, num_points*sizeof(double), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vv_g, vv, num_points*sizeof(double), cudaMemcpyHostToDevice); @@ -257,6 +260,7 @@ void wstack( mmm=cudaMemcpy(vis_real_g, vis_real, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice); + if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpy ERROR %d !!!\n", mmm);} // Call main GPU Kernel convolve_g <<<Nbl,Nth>>> ( @@ -348,7 +352,7 @@ void wstack( int jKer = (int)(increaseprecision * (fabs(u_dist+(double)KernelLen))); int kKer = (int)(increaseprecision * (fabs(v_dist+(double)KernelLen))); - #ifdef GAUSS_HI_PRECISION + #ifdef GAUSS_HI_PRECISION double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); #endif #ifdef GAUSS @@ -359,7 +363,7 @@ void wstack( #endif #ifdef KAISERBESSEL double conv_weight = convkernel[jKer]*convkernel[kKer]; - #endif + #endif // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0;