#include #include #include #ifdef USE_MPI #include #ifdef USE_FFTW #include #endif #endif #include #include #include "w-stacking.h" #define PI 3.14159265359 #define NUM_OF_SECTORS -1 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) #define NOVERBOSE #define NFILES 10 // Linked List set-up struct sectorlist { long index; struct sectorlist * next; }; void Push(struct sectorlist** headRef, long data) { struct sectorlist* newNode = malloc(sizeof(struct sectorlist)); newNode->index = data; newNode->next = *headRef; *headRef = newNode; } // Main Code int main(int argc, char * argv[]) { int rank; int size; FILE * pFile; FILE * pFile1; FILE * pFilereal; FILE * pFileimg; char filename[1000]; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/old/data/gauss2_t201806301100_SBL180.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/newgauss2noconj_t201806301100_SBL180.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/gauss1_t201806301100_SBL180.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/gauss4_t201806301100_SBL180.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/hba-8hrs_t201806301100_SBH255i-test.binMS/"; // //char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/hba-8hrs_gauss4new.binMS/"; //char datapath[900] = "/m100_scratch/userexternal/cgheller/Lofar/Observations/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"; char datapath[900]; char datapath_multi[NFILES][900]; char ufile[30] = "ucoord.bin"; char vfile[30] = "vcoord.bin"; char wfile[30] = "wcoord.bin"; char weightsfile[30] = "weights.bin"; char visrealfile[30] = "visibilities_real.bin"; char visimgfile[30] = "visibilities_img.bin"; char metafile[30] = "meta.txt"; char outfile[30] = "grid.txt"; char outfile1[30] = "coords.txt"; char outfile2[30] = "grid_real.bin"; char outfile3[30] = "grid_img.bin"; char fftfile[30] = "fft.txt"; char fftfile2[30] = "fft_real.bin"; char fftfile3[30] = "fft_img.bin"; char logfile[30] = "run.log"; char extension[30] = ".txt"; char srank[4]; double * uu; double * vv; double * ww; float * weights; float * visreal; float * visimg; long Nmeasures; long Nvis; long Nweights; long freq_per_chan; long polarisations; long Ntimes; double dt; double thours; long baselines; double uvmin; double uvmax; double wmin; double wmax; int grid_size_x = 2048; int grid_size_y = 2048; int local_grid_size_x;// = 100; int local_grid_size_y;// = 100; int xaxis; int yaxis; int num_w_planes = 10; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization int w_support = 7; int num_threads;// = 4; double dx = 1.0/(double)grid_size_x; double dw = 1.0/(double)num_w_planes; double w_supporth = (double)((w_support-1)/2)*dx; clock_t start, end, start0, startk, endk; double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time; double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1; double writetime, writetime1; struct timespec begin, finish, begin0, begink, finishk; double elapsed; long nsectors; clock_gettime(CLOCK_MONOTONIC, &begin0); start0 = clock(); // Set the number of OpenMP threads num_threads = atoi(argv[1]); // Intialize MPI environment #ifdef USE_MPI MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if(rank == 0)printf("Running with %d MPI tasks\n",size); #ifdef USE_FFTW fftw_mpi_init(); #endif #else rank = 0; size = 1; #endif if(rank == 0)printf("Running with %d threads\n",num_threads); // set the local size of the image local_grid_size_x = grid_size_x; nsectors = NUM_OF_SECTORS; if (nsectors < 0) nsectors = size; local_grid_size_y = grid_size_y/nsectors; //nsectors = size; // LOCAL grid size xaxis = local_grid_size_x; yaxis = local_grid_size_y; clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); // CLAAAAAA int ndatasets = 1; strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); // Read metadata strcpy(filename,datapath); strcat(filename,metafile); pFile = fopen (filename,"r"); fscanf(pFile,"%ld",&Nmeasures); fscanf(pFile,"%ld",&Nvis); fscanf(pFile,"%ld",&freq_per_chan); fscanf(pFile,"%ld",&polarisations); fscanf(pFile,"%ld",&Ntimes); fscanf(pFile,"%lf",&dt); fscanf(pFile,"%lf",&thours); fscanf(pFile,"%ld",&baselines); fscanf(pFile,"%lf",&uvmin); fscanf(pFile,"%lf",&uvmax); fscanf(pFile,"%lf",&wmin); fscanf(pFile,"%lf",&wmax); fclose(pFile); // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int nsub = 1000; //int nsub = 10; printf("Subtracting last %d measurements\n",nsub); Nmeasures = Nmeasures-nsub; Nvis = Nmeasures*freq_per_chan*polarisations; // calculate the coordinates of the center double uvshift = uvmin/(uvmax-uvmin); //printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift); if (rank == 0) { printf("N. measurements %ld\n",Nmeasures); printf("N. visibilities %ld\n",Nvis); } // Set temporary local size of points long nm_pe = (long)(Nmeasures/size); long remaining = Nmeasures%size; long startrow = rank*nm_pe; if (rank == size-1)nm_pe = nm_pe+remaining; long Nmeasures_tot = Nmeasures; Nmeasures = nm_pe; long Nvis_tot = Nvis; Nvis = Nmeasures*freq_per_chan*polarisations; Nweights = Nmeasures*polarisations; #ifdef VERBOSE printf("N. measurements on %d %ld\n",rank,Nmeasures); printf("N. visibilities on %d %ld\n",rank,Nvis); #endif // DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used // all the sizes are rescaled by the number of MPI tasks // Allocate arrays uu = (double*) calloc(Nmeasures,sizeof(double)); vv = (double*) calloc(Nmeasures,sizeof(double)); ww = (double*) calloc(Nmeasures,sizeof(double)); weights = (float*) calloc(Nweights,sizeof(float)); visreal = (float*) calloc(Nvis,sizeof(float)); visimg = (float*) calloc(Nvis,sizeof(float)); if(rank == 0)printf("READING DATA\n"); // Read data strcpy(filename,datapath); strcat(filename,ufile); //printf("Reading %s\n",filename); pFile = fopen (filename,"rb"); fseek (pFile,startrow*sizeof(double),SEEK_SET); fread(uu,Nmeasures*sizeof(double),1,pFile); fclose(pFile); strcpy(filename,datapath); strcat(filename,vfile); //printf("Reading %s\n",filename); pFile = fopen (filename,"rb"); fseek (pFile,startrow*sizeof(double),SEEK_SET); fread(vv,Nmeasures*sizeof(double),1,pFile); fclose(pFile); strcpy(filename,datapath); strcat(filename,wfile); //printf("Reading %s\n",filename); pFile = fopen (filename,"rb"); fseek (pFile,startrow*sizeof(double),SEEK_SET); fread(ww,Nmeasures*sizeof(double),1,pFile); fclose(pFile); #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif clock_gettime(CLOCK_MONOTONIC, &finish); end = clock(); setup_time = ((double) (end - start)) / CLOCKS_PER_SEC; setup_time1 = (finish.tv_sec - begin.tv_sec); setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; if(rank == 0)printf("GRIDDING DATA\n"); // Create histograms and linked lists clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); //CLAAA // Initialize linked list struct sectorlist ** sectorhead; sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist)); for (int isec=0; isec<=nsectors; isec++) { sectorhead[isec] = malloc(sizeof(struct sectorlist)); sectorhead[isec]->index = -1; sectorhead[isec]->next = NULL; } long * histo_send = (long*) calloc(nsectors+1,sizeof(long)); int * boundary = (int*) calloc(Nmeasures,sizeof(int)); double uuh,vvh; for (long iphi = 0; iphi < Nmeasures; iphi++) { boundary[iphi] = -1; uuh = uu[iphi]; vvh = vv[iphi]; int binphi = (int)(vvh*nsectors); // check if the point influence also neighboring slabs double updist = (double)((binphi+1)*yaxis)*dx - vvh; double downdist = vvh - (double)(binphi*yaxis)*dx; // histo_send[binphi]++; Push(§orhead[binphi],iphi); if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);}; } #ifdef PIPPO struct sectorlist * current; long iiii = 0; for (int j=0; jindex != -1) { printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index); current = current->next; iiii++; } } #endif #ifdef VERBOSE for (int iii=0; iiiindex != -1) { long ilocal = current->index; //double vvh = vv[ilocal]; //int binphi = (int)(vvh*nsectors); //if (binphi == isector || boundary[ilocal] == isector) //{ uus[icount] = uu[ilocal]; vvs[icount] = vv[ilocal]-isector*shift; wws[icount] = ww[ilocal]; for (long ipol=0; ipol1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); inu++; } icount++; //} current = current->next; } clock_gettime(CLOCK_MONOTONIC, &finishk); endk = clock(); compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; compose_time1 += (finishk.tv_sec - begink.tv_sec); compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; #ifndef USE_MPI double uumin = 1e20; double vvmin = 1e20; double uumax = -1e20; double vvmax = -1e20; for (long ipart=0; ipart %f\n",gridss[iii]); #ifndef USE_MPI long stride = isector*2*xaxis*yaxis*num_w_planes; for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; #endif // Write grid in the corresponding remote slab #ifdef USE_MPI int target_rank = (int)isector; //int target_rank = (int)(size-isector-1); #ifdef ONE_SIDE printf("One Side communication active\n"); MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); MPI_Win_unlock(target_rank,slabwin); //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); #else MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); #endif //ONE_SIDE #endif //USE_MPI clock_gettime(CLOCK_MONOTONIC, &finishk); endk = clock(); reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; reduce_time1 += (finishk.tv_sec - begink.tv_sec); reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; // Go to next sector for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; // Deallocate all sector arrays free(uus); free(vvs); free(wws); free(weightss); free(visreals); free(visimgs); // End of loop over sectors } // End of loop over input files } // Finalize MPI communication #ifdef USE_MPI MPI_Win_fence(0,slabwin); #endif // Swap left and right parts APPARENTLY NOT NECESSARY /* for (long kswap=0; kswap 1) { for (int iw=0; iw n0) // and perform the FFT per w plane alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); fftwgrid = fftw_alloc_complex(alloc_local); plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw 1) { for (int iw=0; iw 1) { printf("PSetup time: %f sec\n",setup_time1); printf("PProcess time: %f sec\n",process_time1); printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1); #ifdef USE_FFTW printf("PFFTW time: %f sec\n",fftw_time1); #endif printf("PTOT time: %f sec\n",tot_time1); } } // Close MPI environment #ifdef USE_MPI MPI_Win_fence(0,slabwin); MPI_Win_free(&slabwin); MPI_Finalize(); #endif }