From 9aed31901a0c13cc9430936cee0a61ea11043a10 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis <emanuele.derubeis2@unibo.it> Date: Sun, 29 Jan 2023 09:02:37 +0000 Subject: [PATCH] Delete w-stacking-fftw-cfitsio.c --- w-stacking-fftw-cfitsio.c | 1089 ------------------------------------- 1 file changed, 1089 deletions(-) delete mode 100644 w-stacking-fftw-cfitsio.c diff --git a/w-stacking-fftw-cfitsio.c b/w-stacking-fftw-cfitsio.c deleted file mode 100644 index ebba5b3..0000000 --- a/w-stacking-fftw-cfitsio.c +++ /dev/null @@ -1,1089 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifdef FITSIO -#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h" -#endif -#ifdef USE_MPI -#include <mpi.h> -#ifdef USE_FFTW -#include <fftw3-mpi.h> -#endif -#endif -#include <omp.h> -#include <math.h> -#include <time.h> -#include <unistd.h> -#ifdef ACCOMP -#include "w-stacking_omp.h" -#else -#include "w-stacking.h" -#endif -#define PI 3.14159265359 -#define NUM_OF_SECTORS -1 -#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) -#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) -#define NOVERBOSE -#define NFILES 100 -#define FILENAMELENGTH 30 - -// Linked List set-up -struct sectorlist { - long index; - struct sectorlist * next; -}; - -void Push(struct sectorlist** headRef, long data) { - struct sectorlist* newNode = malloc(sizeof(struct sectorlist)); - newNode->index = data; - newNode->next = *headRef; - *headRef = newNode; -} - -// Main Code -int main(int argc, char * argv[]) -{ - // Main MPI parameters - int rank; - int size; - - - // Define main filenames - FILE * pFile; - FILE * pFile1; - FILE * pFilereal; - FILE * pFileimg; - // Global filename to be composed - char filename[1000]; - - // MS paths - char datapath[900]; - char datapath_multi[NFILES][900]; - char ufile[FILENAMELENGTH] = "ucoord.bin"; - char vfile[FILENAMELENGTH] = "vcoord.bin"; - char wfile[FILENAMELENGTH] = "wcoord.bin"; - char weightsfile[FILENAMELENGTH] = "weights.bin"; - char visrealfile[FILENAMELENGTH] = "visibilities_real.bin"; - char visimgfile[FILENAMELENGTH] = "visibilities_img.bin"; - char metafile[FILENAMELENGTH] = "meta.txt"; - - // Mesh related files - char outfile[FILENAMELENGTH] = "grid.txt"; - char outfile1[FILENAMELENGTH] = "coords.txt"; - char outfile2[FILENAMELENGTH] = "grid_real.bin"; - char outfile3[FILENAMELENGTH] = "grid_img.bin"; - char fftfile[FILENAMELENGTH] = "fft.txt"; - char fftfile2[FILENAMELENGTH] = "fft_real.bin"; - char fftfile3[FILENAMELENGTH] = "fft_img.bin"; - char logfile[FILENAMELENGTH] = "run.log"; - char extension[FILENAMELENGTH] = ".txt"; - char srank[4]; - char timingfile[FILENAMELENGTH] = "timings.dat"; - - // Visibilities related variables - double * uu; - double * vv; - double * ww; - float * weights; - float * visreal; - float * visimg; - - long Nmeasures,Nmeasures0; - long Nvis,Nvis0; - long Nweights,Nweights0; - long freq_per_chan,freq_per_chan0; - long polarisations,polarisations0; - long Ntimes,Ntimes0; - double dt,dt0; - double thours,thours0; - long baselines,baselines0; - double uvmin,uvmin0; - double uvmax,uvmax0; - double wmin,wmin0; - double wmax,wmax0; - double resolution; - - // Mesh related parameters: global size - int grid_size_x = 2048; - int grid_size_y = 2048; - // Split Mesh size (auto-calculated) - int local_grid_size_x; - int local_grid_size_y; - int xaxis; - int yaxis; - - // Number of planes in the w direction - int num_w_planes = 8; - - // Size of the convoutional kernel support - int w_support = 7; - - // Number of OpenMP threads (input parameter) - int num_threads; - - // Resolution - double dx = 0.5/(double)grid_size_x; - double dw = 0.5/(double)num_w_planes; - // Half support size - double w_supporth = (double)((w_support-1)/2)*dx; - - - // Initialize FITS image parameters - fitsfile *fptreal; - fitsfile *fptrimg; - int status; - long nelements; -// long fpixel, lpixel; - char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; - - long naxis = 2; - long naxes[2] = { grid_size_x, grid_size_y }; - - nelements = naxes[0] * naxes[1]; - - - - - // Internal profiling parameters - clock_t start, end, start0, startk, endk; - double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time; - double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1; - double writetime, writetime1; - struct timespec begin, finish, begin0, begink, finishk; - double elapsed; - - // Number of sectors in which the mesh is divided along the v direction - // IF nsectors < 0, nsectors = NUMBER OF MPI RANKS - long nsectors = NUM_OF_SECTORS; - - - /* Get number of threads, exit if not given */ - if (argc == 1) { - fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); - exit(1); - } - // Set the number of OpenMP threads - num_threads = atoi(argv[1]); - - // Number of threads <= 0 not acceptable - if (num_threads <= 0) { - fprintf(stderr, "Wrong parameter: %s\n\n", argv[1]); - fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); - exit(1); - } - - // Internal profiling - clock_gettime(CLOCK_MONOTONIC, &begin0); - start0 = clock(); - -#ifdef USE_MPI - // Intialize MPI environment - MPI_Init(&argc,&argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if(rank == 0)printf("Running with %d MPI tasks\n",size); - #ifdef USE_FFTW - // Initialize FFTW environment - fftw_mpi_init(); - #endif -#else - // If MPI is not used the two parameters are set for consistency - rank = 0; - size = 1; -#endif - - if(rank == 0)printf("Running with %d threads\n",num_threads); - -#ifdef ACCOMP -if(rank == 0){ - if (0 == omp_get_num_devices()) { - printf("No accelerator found ... exit\n"); - exit(255); - } - printf("Number of available GPUs %d\n", omp_get_num_devices()); - #ifdef NVIDIA - prtAccelInfo(); - #endif - } -#endif - - // set the local size of the image - local_grid_size_x = grid_size_x; - if (nsectors < 0) nsectors = size; - local_grid_size_y = grid_size_y/nsectors; - //nsectors = size; - - // LOCAL grid size - xaxis = local_grid_size_x; - yaxis = local_grid_size_y; - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - // INPUT FILES (only the first ndatasets entries are used) - int ndatasets = 1; - strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); - //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); - //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); - //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); - - strcpy(datapath,datapath_multi[0]); - // Read metadata - strcpy(filename,datapath); - strcat(filename,metafile); - pFile = fopen (filename,"r"); - fscanf(pFile,"%ld",&Nmeasures); - fscanf(pFile,"%ld",&Nvis); - fscanf(pFile,"%ld",&freq_per_chan); - fscanf(pFile,"%ld",&polarisations); - fscanf(pFile,"%ld",&Ntimes); - fscanf(pFile,"%lf",&dt); - fscanf(pFile,"%lf",&thours); - fscanf(pFile,"%ld",&baselines); - fscanf(pFile,"%lf",&uvmin); - fscanf(pFile,"%lf",&uvmax); - fscanf(pFile,"%lf",&wmin); - fscanf(pFile,"%lf",&wmax); - fclose(pFile); - - - // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - int nsub = 1000; - //int nsub = 10; - printf("Subtracting last %d measurements\n",nsub); - Nmeasures = Nmeasures-nsub; - Nvis = Nmeasures*freq_per_chan*polarisations; - - // calculate the coordinates of the center - double uvshift = uvmin/(uvmax-uvmin); - //printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift); - - if (rank == 0) - { - printf("N. measurements %ld\n",Nmeasures); - printf("N. visibilities %ld\n",Nvis); - } - - // Set temporary local size of points - long nm_pe = (long)(Nmeasures/size); - long remaining = Nmeasures%size; - - long startrow = rank*nm_pe; - if (rank == size-1)nm_pe = nm_pe+remaining; - - long Nmeasures_tot = Nmeasures; - Nmeasures = nm_pe; - long Nvis_tot = Nvis; - Nvis = Nmeasures*freq_per_chan*polarisations; - Nweights = Nmeasures*polarisations; - -#ifdef VERBOSE - printf("N. measurements on %d %ld\n",rank,Nmeasures); - printf("N. visibilities on %d %ld\n",rank,Nvis); -#endif - - - // DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used - // all the sizes are rescaled by the number of MPI tasks - // Allocate arrays - uu = (double*) calloc(Nmeasures,sizeof(double)); - vv = (double*) calloc(Nmeasures,sizeof(double)); - ww = (double*) calloc(Nmeasures,sizeof(double)); - weights = (float*) calloc(Nweights,sizeof(float)); - visreal = (float*) calloc(Nvis,sizeof(float)); - visimg = (float*) calloc(Nvis,sizeof(float)); - - if(rank == 0)printf("READING DATA\n"); - // Read data - strcpy(filename,datapath); - strcat(filename,ufile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(uu,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - - strcpy(filename,datapath); - strcat(filename,vfile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(vv,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - - strcpy(filename,datapath); - strcat(filename,wfile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(ww,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - -#ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - - clock_gettime(CLOCK_MONOTONIC, &finish); - end = clock(); - setup_time = ((double) (end - start)) / CLOCKS_PER_SEC; - setup_time1 = (finish.tv_sec - begin.tv_sec); - setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - - - if(rank == 0)printf("GRIDDING DATA\n"); - - // Create histograms and linked lists - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - // Initialize linked list - struct sectorlist ** sectorhead; - sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist)); - for (int isec=0; isec<=nsectors; isec++) - { - sectorhead[isec] = malloc(sizeof(struct sectorlist)); - sectorhead[isec]->index = -1; - sectorhead[isec]->next = NULL; - } - - - long * histo_send = (long*) calloc(nsectors+1,sizeof(long)); - int * boundary = (int*) calloc(Nmeasures,sizeof(int)); - double uuh,vvh; - for (long iphi = 0; iphi < Nmeasures; iphi++) - { - boundary[iphi] = -1; - uuh = uu[iphi]; - vvh = vv[iphi]; - int binphi = (int)(vvh*nsectors); - // check if the point influence also neighboring slabs - double updist = (double)((binphi+1)*yaxis)*dx - vvh; - double downdist = vvh - (double)(binphi*yaxis)*dx; - // - histo_send[binphi]++; - Push(§orhead[binphi],iphi); - if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; - if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);}; - } - -#ifdef PIPPO - struct sectorlist * current; - long iiii = 0; - for (int j=0; j<nsectors; j++) - { - current = sectorhead[j]; - iiii = 0; - while (current->index != -1) - { - printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index); - current = current->next; - iiii++; - } - } -#endif - -#ifdef VERBOSE - for (int iii=0; iii<nsectors+1; iii++)printf("HISTO %d %d %ld\n",rank, iii, histo_send[iii]); -#endif - -// Create sector grid - double * gridss; - double * gridss_w; - double * gridss_real; - double * gridss_img; - double * grid; - long size_of_grid; - size_of_grid = 2*num_w_planes*xaxis*yaxis; - gridss = (double*) calloc(size_of_grid,sizeof(double)); - gridss_w = (double*) calloc(size_of_grid,sizeof(double)); - gridss_real = (double*) calloc(size_of_grid/2,sizeof(double)); - gridss_img = (double*) calloc(size_of_grid/2,sizeof(double)); - // Create destination slab - grid = (double*) calloc(size_of_grid,sizeof(double)); - // Create temporary global grid -#ifndef USE_MPI - double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double)); -#endif - double shift = (double)(dx*yaxis); - // Open the MPI Memory Window for the slab -#ifdef USE_MPI - MPI_Win slabwin; - MPI_Win_create(grid, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &slabwin); - MPI_Win_fence(0,slabwin); -#endif -#ifndef USE_MPI - pFile1 = fopen (outfile1,"w"); -#endif - - - // loop over files - // - kernel_time = 0.0; - kernel_time1 = 0.0; - reduce_time = 0.0; - reduce_time1 = 0.0; - compose_time = 0.0; - compose_time1 = 0.0; - - // MAIN LOOP OVER FILES - // - for (int ifiles=0; ifiles<ndatasets; ifiles++) - { - strcpy(filename,datapath_multi[ifiles]); - printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); - - // Read metadata - strcpy(filename,datapath); - strcat(filename,metafile); - pFile = fopen (filename,"r"); - fscanf(pFile,"%ld",&Nmeasures0); - fscanf(pFile,"%ld",&Nvis0); - fscanf(pFile,"%ld",&freq_per_chan0); - fscanf(pFile,"%ld",&polarisations0); - fscanf(pFile,"%ld",&Ntimes0); - fscanf(pFile,"%lf",&dt0); - fscanf(pFile,"%lf",&thours0); - fscanf(pFile,"%ld",&baselines0); - fscanf(pFile,"%lf",&uvmin); - fscanf(pFile,"%lf",&uvmax); - fscanf(pFile,"%lf",&wmin0); - fscanf(pFile,"%lf",&wmax0); - fclose(pFile); - - // calculate the resolution in radians - resolution = 1.0/MAX(abs(uvmin),abs(uvmax)); - // calculate the resolution in arcsec - double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI; - printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); - - strcpy(filename,datapath); - strcat(filename,weightsfile); - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*polarisations*sizeof(float),SEEK_SET); - fread(weights,(Nweights)*sizeof(float),1,pFile); - fclose(pFile); - - strcpy(filename,datapath); - strcat(filename,visrealfile); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET); - fread(visreal,Nvis*sizeof(float),1,pFile); - fclose(pFile); - strcpy(filename,datapath); - strcat(filename,visimgfile); -#ifdef VERBOSE - printf("Reading %s\n",filename); -#endif - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET); - fread(visimg,Nvis*sizeof(float),1,pFile); - fclose(pFile); - -#ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - // Declare temporary arrays for the masking - double * uus; - double * vvs; - double * wws; - float * visreals; - float * visimgs; - float * weightss; - long isector; - - for (long isector_count=0; isector_count<nsectors; isector_count++) - { - clock_gettime(CLOCK_MONOTONIC, &begink); - startk = clock(); - // define local destination sector - //isector = (isector_count+rank)%size; - isector = isector_count; - // allocate sector arrays - long Nsec = histo_send[isector]; - uus = (double*) malloc(Nsec*sizeof(double)); - vvs = (double*) malloc(Nsec*sizeof(double)); - wws = (double*) malloc(Nsec*sizeof(double)); - long Nweightss = Nsec*polarisations; - long Nvissec = Nweightss*freq_per_chan; - weightss = (float*) malloc(Nweightss*sizeof(float)); - visreals = (float*) malloc(Nvissec*sizeof(float)); - visimgs = (float*) malloc(Nvissec*sizeof(float)); - - // select data for this sector - long icount = 0; - long ip = 0; - long inu = 0; -//CLAAAA - struct sectorlist * current; - current = sectorhead[isector]; - - - - while (current->index != -1) - { - long ilocal = current->index; - //double vvh = vv[ilocal]; - //int binphi = (int)(vvh*nsectors); - //if (binphi == isector || boundary[ilocal] == isector) { - uus[icount] = uu[ilocal]; - vvs[icount] = vv[ilocal]-isector*shift; - wws[icount] = ww[ilocal]; - for (long ipol=0; ipol<polarisations; ipol++) - { - weightss[ip] = weights[ilocal*polarisations+ipol]; - ip++; - } - for (long ifreq=0; ifreq<polarisations*freq_per_chan; ifreq++) - { - visreals[inu] = visreal[ilocal*polarisations*freq_per_chan+ifreq]; - visimgs[inu] = visimg[ilocal*polarisations*freq_per_chan+ifreq]; - //if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); - inu++; - } - icount++; - //} - current = current->next; - } - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - compose_time1 += (finishk.tv_sec - begink.tv_sec); - compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - - #ifndef USE_MPI - double uumin = 1e20; - double vvmin = 1e20; - double uumax = -1e20; - double vvmax = -1e20; - - for (long ipart=0; ipart<Nsec; ipart++) - { - uumin = MIN(uumin,uus[ipart]); - uumax = MAX(uumax,uus[ipart]); - vvmin = MIN(vvmin,vvs[ipart]); - vvmax = MAX(vvmax,vvs[ipart]); - - - if(ipart%10 == 0)fprintf (pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]); - } - - printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); - #endif - - // Make convolution on the grid - #ifdef VERBOSE - printf("Processing sector %ld\n",isector); - #endif - clock_gettime(CLOCK_MONOTONIC, &begink); - startk = clock(); - - wstack(num_w_planes, - Nsec, - freq_per_chan, - polarisations, - uus, - vvs, - wws, - visreals, - visimgs, - weightss, - dx, - dw, - w_support, - xaxis, - yaxis, - gridss, - num_threads); - - -/* int z =0 ; -#pragma omp target map(to:test_i_gpu) map(from:z) -{ - int x; // only accessible from accelerator - x = 2; - z = x + test_i_gpu; -}*/ - - - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - kernel_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - kernel_time1 += (finishk.tv_sec - begink.tv_sec); - kernel_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - #ifdef VERBOSE - printf("Processed sector %ld\n",isector); - #endif - clock_gettime(CLOCK_MONOTONIC, &begink); - startk = clock(); - - //for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]); - - #ifndef USE_MPI - long stride = isector*2*xaxis*yaxis*num_w_planes; - for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; - #endif - - // Write grid in the corresponding remote slab - #ifdef USE_MPI - int target_rank = (int)isector; - //int target_rank = (int)(size-isector-1); - #ifdef ONE_SIDE - printf("One Side communication active\n"); - MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); - MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); - MPI_Win_unlock(target_rank,slabwin); - //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); - #else - MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); - #endif //ONE_SIDE - #endif //USE_MPI - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - reduce_time1 += (finishk.tv_sec - begink.tv_sec); - reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - // Go to next sector - for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; - - // Deallocate all sector arrays - free(uus); - free(vvs); - free(wws); - free(weightss); - free(visreals); - free(visimgs); - // End of loop over sectors - } - // End of loop over input files - } - - // Finalize MPI communication - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - #endif - - // Swap left and right parts APPARENTLY NOT NECESSARY - /* - for (long kswap=0; kswap<num_w_planes; kswap++) - for (long jswap=0; jswap<yaxis; jswap++) - for (long iswap=0; iswap<xaxis; iswap++) - { - long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis); - gridss[index_origin] = grid[index_origin]; - gridss[index_origin+1] = grid[index_origin+1]; - } - for (long kswap=0; kswap<num_w_planes; kswap++) - for (long jswap=0; jswap<yaxis; jswap++) - for (long iswap=0; iswap<xaxis/2; iswap++) - { - long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis); - long index_destination = 2*(iswap+xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis); - grid[index_destination] = gridss[index_origin]; - grid[index_destination+1] = gridss[index_origin+1]; - } - - for (long kswap=0; kswap<num_w_planes; kswap++) - for (long jswap=0; jswap<yaxis; jswap++) - for (long iswap=xaxis/2; iswap<xaxis; iswap++) - { - long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis); - long index_destination = 2*(iswap-xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis); - grid[index_destination] = gridss[index_origin]; - grid[index_destination+1] = gridss[index_origin+1]; - } - */ - - #ifndef USE_MPI - fclose(pFile1); - #endif - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - process_time = ((double) (end - start)) / CLOCKS_PER_SEC; - process_time1 = (finish.tv_sec - begin.tv_sec); - process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - clock_gettime(CLOCK_MONOTONIC, &begin); - - -#ifdef WRITE_DATA - // Write results - if (rank == 0) - { - printf("WRITING GRIDDED DATA\n"); - pFilereal = fopen (outfile2,"wb"); - pFileimg = fopen (outfile3,"wb"); - #ifdef USE_MPI - for (int isector=0; isector<nsectors; isector++) - { - MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin); - MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin); - MPI_Win_unlock(isector,slabwin); - for (long i=0; i<size_of_grid/2; i++) - { - gridss_real[i] = gridss[2*i]; - gridss_img[i] = gridss[2*i+1]; - } - if (num_w_planes > 1) - { - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(pFilereal, global_index, SEEK_SET); - fwrite(&gridss_real[index], 1, sizeof(double), pFilereal); - } - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(pFileimg, global_index, SEEK_SET); - fwrite(&gridss_img[index], 1, sizeof(double), pFileimg); - //double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]); - //fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm); - } - - } else { - for (int iw=0; iw<num_w_planes; iw++) - { - long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double); - long index = iw*xaxis*yaxis; - fseek(pFilereal, global_index, SEEK_SET); - fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), pFilereal); - fseek(pFileimg, global_index, SEEK_SET); - fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), pFileimg); - } - } - } - #else - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<grid_size_y; iv++) - for (int iu=0; iu<grid_size_x; iu++) - { - long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); - fwrite(&gridtot[index], 1, sizeof(double), pFilereal); - fwrite(&gridtot[index+1], 1, sizeof(double), pFileimg); - //double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); - //fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); - } - #endif - fclose(pFilereal); - fclose(pFileimg); - } - - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - #endif -#endif //WRITE_DATA - - -#ifdef USE_FFTW - // FFT transform the data (using distributed FFTW) - - if(rank == 0)printf("PERFORMING FFT\n"); - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - fftw_plan plan; - fftw_complex *fftwgrid; - ptrdiff_t alloc_local, local_n0, local_0_start; - double norm = 1.0/(double)(grid_size_x*grid_size_y); - - // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) - // x is the direction of contiguous data and maps to the second parameter - // y is the parallelized direction and corresponds to the first parameter (--> n0) - // and perform the FFT per w plane - alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); - fftwgrid = fftw_alloc_complex(alloc_local); - plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); - - long fftwindex = 0; - long fftwindex2D = 0; - for (int iw=0; iw<num_w_planes; iw++) - { - //printf("FFTing plan %d\n",iw); - // select the w-plane to transform - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - fftwgrid[fftwindex2D][0] = grid[fftwindex]; - fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; - } - } - - // do the transform for each w-plane - fftw_execute(plan); - - // save the transformed w-plane - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; - gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; - } - } - - } - - fftw_destroy_plan(plan); - - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - MPI_Barrier(MPI_COMM_WORLD); - #endif - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; - fftw_time1 = (finish.tv_sec - begin.tv_sec); - fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - clock_gettime(CLOCK_MONOTONIC, &begin); - -#ifdef WRITE_DATA - // Write results - #ifdef USE_MPI - MPI_Win writewin; - MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); - MPI_Win_fence(0,writewin); - #endif - if (rank == 0) - { - printf("WRITING FFT TRANSFORMED DATA\n"); - pFilereal = fopen (fftfile2,"wb"); - pFileimg = fopen (fftfile3,"wb"); - #ifdef USE_MPI - for (int isector=0; isector<nsectors; isector++) - { - MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); - MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); - MPI_Win_unlock(isector,writewin); - for (long i=0; i<size_of_grid/2; i++) - { - gridss_real[i] = gridss_w[2*i]; - gridss_img[i] = gridss_w[2*i+1]; - } - if (num_w_planes > 1) - { - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(pFilereal, global_index, SEEK_SET); - fwrite(&gridss_real[index], 1, sizeof(double), pFilereal); - } - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(pFileimg, global_index, SEEK_SET); - fwrite(&gridss_img[index], 1, sizeof(double), pFileimg); - } - } else { - fwrite(gridss_real, size_of_grid/2, sizeof(double), pFilereal); - fwrite(gridss_img, size_of_grid/2, sizeof(double), pFileimg); - } - - - } - #else - /* - for (int iw=0; iw<num_w_planes; iw++) - for (int iv=0; iv<grid_size_y; iv++) - for (int iu=0; iu<grid_size_x; iu++) - { - int isector = 0; - long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); - double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); - fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); - } - */ - #endif - fclose(pFilereal); - fclose(pFileimg); - } - - #ifdef USE_MPI - //MPI_Win_fence(0,writewin); - MPI_Win_fence(0,writewin); - MPI_Win_free(&writewin); - MPI_Barrier(MPI_COMM_WORLD); - #endif -#endif //WRITE_DATA - - fftw_free(fftwgrid); - - // Phase correction - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - if(rank == 0)printf("PHASE CORRECTION\n"); - double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); - double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); - - phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y,resolution,wmin,wmax,num_threads); - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; - phase_time1 = (finish.tv_sec - begin.tv_sec); - phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; -#ifdef WRITE_IMAGE - - if(rank == 0) - { - #ifdef FITSIO - printf("REMOVING RESIDUAL FITS FILE\n"); - remove(testfitsreal); - remove(testfitsimag); - - - printf("FITS CREATING\n"); - status = 0; - - fits_create_file(&fptrimg, testfitsimag, &status); - fits_create_img(fptrimg, DOUBLE_IMG, naxis, naxes, &status); -// fits_write_img(fptrimg, TDOUBLE, fpixel, nelements, image_imag, &status); - fits_close_file(fptrimg, &status); - - status = 0; - - fits_create_file(&fptreal, testfitsreal, &status); - fits_create_img(fptreal, DOUBLE_IMG, naxis, naxes, &status); -// fits_write_img(fptreal, TDOUBLE, fpixel, nelements, image_real, &status); - fits_close_file(fptreal, &status); - #endif - - pFilereal = fopen (fftfile2, "wb"); - pFileimg = fopen (fftfile3, "wb"); - fclose(pFilereal); - fclose(pFileimg); - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(rank == 0)printf("WRITING IMAGE\n"); - for (int isector=0; isector<size; isector++) - { - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(isector == rank) - //if(rank == 0) - { - #ifdef FITSIO - - printf("%d writing\n",isector); - long * fpixel = (long *) malloc(sizeof(long)*naxis); - long * lpixel = (long *) malloc(sizeof(long)*naxis); - - - fpixel[0] = 1; - fpixel[1] = isector*yaxis+1; - lpixel[0] = xaxis; - lpixel[1] = (isector+1)*yaxis; - //printf("fpixel %d, %d\n", fpixel[0], fpixel[1]); - //printf("lpixel %d, %d\n", lpixel[0], lpixel[1]); - status = 0; - fits_open_image(&fptreal, testfitsreal, READWRITE, &status); - fits_write_subset(fptreal, TDOUBLE, fpixel, lpixel, image_real, &status); - fits_close_file(fptreal, &status); - - status = 0; - fits_open_image(&fptrimg, testfitsimag, READWRITE, &status); - fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status); - fits_close_file(fptrimg, &status); - - #endif - - pFilereal = fopen (fftfile2,"ab"); - pFileimg = fopen (fftfile3,"ab"); - - long global_index = isector*(xaxis*yaxis)*sizeof(double); - - fseek(pFilereal, global_index, SEEK_SET); - fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); - fseek(pFileimg, global_index, SEEK_SET); - fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); - - fclose(pFilereal); - fclose(pFileimg); - } - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - -#endif //WRITE_IMAGE - - -#endif //USE_FFTW - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - tot_time = ((double) (end - start0)) / CLOCKS_PER_SEC; - tot_time1 = (finish.tv_sec - begin0.tv_sec); - tot_time1 += (finish.tv_nsec - begin0.tv_nsec) / 1000000000.0; - - if (rank == 0) - { - printf("Setup time: %f sec\n",setup_time); - printf("Process time: %f sec\n",process_time); - printf("Kernel time = %f, Array Composition time %f, Reduce time: %f sec\n",kernel_time,compose_time,reduce_time); -#ifdef USE_FFTW - printf("FFTW time: %f sec\n",fftw_time); - printf("Phase time: %f sec\n",phase_time); -#endif - printf("TOT time: %f sec\n",tot_time); - if(num_threads > 1) - { - printf("PSetup time: %f sec\n",setup_time1); - printf("PProcess time: %f sec\n",process_time1); - printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1); -#ifdef USE_FFTW - printf("PFFTW time: %f sec\n",fftw_time1); - printf("PPhase time: %f sec\n",phase_time1); -#endif - printf("PTOT time: %f sec\n",tot_time1); - } - } - - if (rank == 0) - { - pFile = fopen (timingfile,"w"); - if (num_threads == 1) - { - fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time,kernel_time,compose_time,reduce_time,fftw_time,phase_time,tot_time); - } else { - fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time1,kernel_time1,compose_time1,reduce_time1,fftw_time1,phase_time1,tot_time1); - } - fclose(pFile); - } - - // Close MPI environment - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - MPI_Win_free(&slabwin); - MPI_Finalize(); - #endif -} -- GitLab