diff --git a/.gitignore b/.gitignore index 2f2b2deae59f09c00e894b90488f0ee3ecb200a2..8edc32e8a54a246064c56e86b61e7ea0eccb1724 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ w-stackingfftw w-stackingCfftw_serial w-stackingfftw_serial inverse-imaging +.* diff --git a/Build/Makefile.M100 b/Build/Makefile.M100 index 737d24294f64606a97860290efd98ece981a1d9c..a1dde127926069cda6b36834d3241ffc7790f0dc 100644 --- a/Build/Makefile.M100 +++ b/Build/Makefile.M100 @@ -13,7 +13,7 @@ NVCC = nvcc NVFLAGS = -arch=sm_70 -Xcompiler -mno-float128 -std=c++11 NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda -OMP= -fopenmp +OMP= #-fopenmp CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) diff --git a/Makefile b/Makefile index 0a8527d5b6f6a56e44f6bc79615afb8ce15f0575..b8257839d7d2c666afbdf455f35cc389c11c3437 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ endif # perform one-side communication (suggested) instead of reduce (only if MPI is active) OPT += -DONE_SIDE # write the full 3D cube of gridded visibilities and its FFT transform -#OPT += -DWRITE_DATA +OPT += -DWRITE_DATA # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction @@ -71,8 +71,8 @@ serial_cuda: $(CXX) $(OPTIMIZE) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(CFLAGS) $(NVLIB) -lm mpi: $(COBJ) - $(MPICC) $(OPTIMIZE) -o w-stackingCfftw $^ $(CFLAGS) $(LIBS) - $(MPICC) $(OPTIMIZE) -o inverse-imaging inverse-imaging.c w-stacking.c $(CFLAGS) $(LIBS) + $(MPICC) $(OPTIMIZE) $(OPT) -o w-stackingCfftw $^ $(CFLAGS) $(LIBS) + $(MPICC) $(OPTIMIZE) $(OPT) -o inverse-imaging inverse-imaging.c w-stacking.c $(CFLAGS) $(LIBS) mpi_cuda: $(NVCC) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB) diff --git a/inverse-imaging.c b/inverse-imaging.c index 2a10345487ef06c191eee3ea001f38f355ed3fde..a3dfbbeaebb15f3b9c0c0050a56e2013c1087674 100644 --- a/inverse-imaging.c +++ b/inverse-imaging.c @@ -201,6 +201,7 @@ if(rank == 0){ // LOCAL grid size xaxis = local_grid_size_x; yaxis = local_grid_size_y; + double shift = (double)(dx*yaxis); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); @@ -232,6 +233,12 @@ if(rank == 0){ fclose(pFile); + // calculate the resolution in radians + resolution = 1.0/MAX(abs(uvmin),abs(uvmax)); + // calculate the resolution in arcsec + double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI; + printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); + // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int nsub = 1000; //int nsub = 10; @@ -253,8 +260,8 @@ if(rank == 0){ long nm_pe = (long)(Nmeasures/size); long remaining = Nmeasures%size; - long startrow = rank*nm_pe; - if (rank == size-1)nm_pe = nm_pe+remaining; + long startrow = rank*nm_pe; + if (rank == size-1)nm_pe = nm_pe+remaining; long Nmeasures_tot = Nmeasures; Nmeasures = nm_pe; @@ -280,7 +287,7 @@ if(rank == 0){ // reading baselines - if(rank == 0)printf("READING DATA\n"); + if(rank == 0)printf("READING DATA\n"); // Read data strcpy(filename,datapath); strcat(filename,ufile); @@ -295,7 +302,7 @@ if(rank == 0){ strcat(filename,vfile); //printf("Reading %s\n",filename); - pFile = fopen (filename,"rb"); + pFile = fopen (filename,"rb"); fseek (pFile,startrow*sizeof(double),SEEK_SET); fread(vv,Nmeasures*sizeof(double),1,pFile); fclose(pFile); @@ -332,15 +339,163 @@ if(rank == 0){ pFilereal = fopen (filename,"rb"); long global_index = rank*(xaxis*yaxis)*sizeof(double); fseek(pFilereal, global_index, SEEK_SET); - fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); + fread(image_real, xaxis*yaxis, sizeof(double), pFilereal); fclose(pFilereal); // image read + // We read binary images, however we can easily extend to fits files - if(rank == 0)printf("FFT TRANSFORMING (from Real to Complex Fourier space)\n"); + if(rank == 0)printf("FFT TRANSFORMING (from Real to Complex Fourier space)\n"); + fftw_plan plan; + ptrdiff_t alloc_local, local_n0, local_0_start; + // FFTW double input variable whose size is = local_n0 x 2(Nx/2+1) + double *fftwimage; + // FFTW complex output variable whose size is = local_n0 x Nx/2 + 1 + fftw_complex *fftwgrid; + + alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD, &local_n0, &local_0_start); + fftwimage = fftw_alloc_real(2 * alloc_local); + fftwgrid = fftw_alloc_complex(alloc_local); + + // Create plan for r2c DFT + //plan = fftw_mpi_plan_dft_r2c_2d(grid_size_y, grid_size_x, fftwimage, fftwgrid, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE); + plan = fftw_mpi_plan_dft_r2c_2d(grid_size_y, grid_size_x, fftwimage, fftwgrid, MPI_COMM_WORLD, FFTW_ESTIMATE); + + // Initialize FFTW arrays + long fftwindex = 0; + long fftwindex2D = 0; + for (long iv = 0; iv < local_n0; iv++) + for (long iu = 0; iu < xaxis; iu++) + { + fftwindex2D = iu + iv*xaxis; + fftwimage[fftwindex2D] = image_real[fftwindex2D]; + fftwgrid[fftwindex2D][0] = 0.0; + fftwgrid[fftwindex2D][1] = 0.0; + } + + // Perform the FFT + fftw_execute(plan); + // Finalize the FFT + fftw_destroy_plan(plan); + + // Create the grid array + double * grid; + long size_of_grid = 2*num_w_planes*xaxis*yaxis; + grid = (double*) calloc(size_of_grid,sizeof(double)); + for (int iw=0; iw 1) + { + for (int iw=0; iw= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; + Push(§orhead[binphi],iphi); + if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);}; } + // Linked lists and communication histograms created + #ifdef PIPPO struct sectorlist * current; long iiii = 0; @@ -393,39 +550,9 @@ if(rank == 0){ #endif #ifdef VERBOSE - for (int iii=0; iiiindex != -1) - { + while (current->index != -1) + { long ilocal = current->index; //double vvh = vv[ilocal]; //int binphi = (int)(vvh*nsectors); @@ -540,36 +613,35 @@ if(rank == 0){ vvs[icount] = vv[ilocal]-isector*shift; wws[icount] = ww[ilocal]; for (long ipol=0; ipol1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); - inu++; - } - icount++; - //} - current = current->next; } + for (long ifreq=0; ifreq1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); + inu++; + } + icount++; + current = current->next; + } - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - compose_time1 += (finishk.tv_sec - begink.tv_sec); - compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; + clock_gettime(CLOCK_MONOTONIC, &finishk); + endk = clock(); + compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; + compose_time1 += (finishk.tv_sec - begink.tv_sec); + compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; #ifndef USE_MPI - double uumin = 1e20; - double vvmin = 1e20; - double uumax = -1e20; - double vvmax = -1e20; + double uumin = 1e20; + double vvmin = 1e20; + double uumax = -1e20; + double vvmax = -1e20; - for (long ipart=0; ipart %f\n",gridss[iii]); - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - kernel_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - kernel_time1 += (finishk.tv_sec - begink.tv_sec); - kernel_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - #ifdef VERBOSE - printf("Processed sector %ld\n",isector); - #endif - clock_gettime(CLOCK_MONOTONIC, &begink); - startk = clock(); - - //for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]); - - #ifndef USE_MPI - long stride = isector*2*xaxis*yaxis*num_w_planes; - for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; - #endif + #ifndef USE_MPI + long stride = isector*2*xaxis*yaxis*num_w_planes; + for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; + #endif - // Write grid in the corresponding remote slab - #ifdef USE_MPI - int target_rank = (int)isector; - //int target_rank = (int)(size-isector-1); - #ifdef ONE_SIDE - printf("One Side communication active\n"); - MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); - MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); - MPI_Win_unlock(target_rank,slabwin); - //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); - #else - MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); - #endif //ONE_SIDE - #endif //USE_MPI - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - reduce_time1 += (finishk.tv_sec - begink.tv_sec); - reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - // Go to next sector - for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; - - // Deallocate all sector arrays - free(uus); - free(vvs); - free(wws); - free(weightss); - free(visreals); - free(visimgs); + // Write grid in the corresponding remote slab + #ifdef USE_MPI + int target_rank = (int)isector; + //int target_rank = (int)(size-isector-1); + #ifdef ONE_SIDE + printf("One Side communication active\n"); + MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); + MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); + MPI_Win_unlock(target_rank,slabwin); + //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); + #else + MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); + #endif //ONE_SIDE + #endif //USE_MPI + + clock_gettime(CLOCK_MONOTONIC, &finishk); + endk = clock(); + reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; + reduce_time1 += (finishk.tv_sec - begink.tv_sec); + reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; + // Go to next sector + for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; + + // Deallocate all sector arrays + free(uus); + free(vvs); + free(wws); + free(weightss); + free(visreals); + free(visimgs); // End of loop over sectors } // End of loop over input files @@ -676,40 +746,6 @@ if(rank == 0){ MPI_Win_fence(0,slabwin); #endif - // Swap left and right parts APPARENTLY NOT NECESSARY - /* - for (long kswap=0; kswap 1) - { - for (int iw=0; iw n0) - // and perform the FFT per w plane - alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); - fftwgrid = fftw_alloc_complex(alloc_local); - plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); - - long fftwindex = 0; - long fftwindex2D = 0; - for (int iw=0; iw 1) - { - for (int iw=0; iw