From 9aed31901a0c13cc9430936cee0a61ea11043a10 Mon Sep 17 00:00:00 2001
From: Emanuele De Rubeis <emanuele.derubeis2@unibo.it>
Date: Sun, 29 Jan 2023 09:02:37 +0000
Subject: [PATCH] Delete w-stacking-fftw-cfitsio.c

---
 w-stacking-fftw-cfitsio.c | 1089 -------------------------------------
 1 file changed, 1089 deletions(-)
 delete mode 100644 w-stacking-fftw-cfitsio.c

diff --git a/w-stacking-fftw-cfitsio.c b/w-stacking-fftw-cfitsio.c
deleted file mode 100644
index ebba5b3..0000000
--- a/w-stacking-fftw-cfitsio.c
+++ /dev/null
@@ -1,1089 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifdef FITSIO
-#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h"
-#endif
-#ifdef USE_MPI
-#include <mpi.h>
-#ifdef USE_FFTW
-#include <fftw3-mpi.h>
-#endif
-#endif
-#include <omp.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-#ifdef ACCOMP
-#include "w-stacking_omp.h"
-#else
-#include "w-stacking.h"
-#endif
-#define PI 3.14159265359
-#define NUM_OF_SECTORS -1
-#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
-#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
-#define NOVERBOSE
-#define NFILES 100
-#define FILENAMELENGTH 30
-
-// Linked List set-up
-struct sectorlist {
-     long index;
-     struct sectorlist * next;
-};
-
-void Push(struct sectorlist** headRef, long data) {
-     struct sectorlist* newNode = malloc(sizeof(struct sectorlist));
-     newNode->index = data;
-     newNode->next = *headRef;
-     *headRef = newNode;
-}
-
-// Main Code
-int main(int argc, char * argv[])
-{
-        // Main MPI parameters
-	int rank;
-	int size;
-
-
-        // Define main filenames
-	FILE * pFile;
-	FILE * pFile1;
-	FILE * pFilereal;
-	FILE * pFileimg;
-	// Global filename to be composed
-	char filename[1000];
-
-        // MS paths
-        char datapath[900];
-        char datapath_multi[NFILES][900];
-        char ufile[FILENAMELENGTH] = "ucoord.bin";
-        char vfile[FILENAMELENGTH] = "vcoord.bin";
-        char wfile[FILENAMELENGTH] = "wcoord.bin";
-        char weightsfile[FILENAMELENGTH] = "weights.bin";
-        char visrealfile[FILENAMELENGTH] = "visibilities_real.bin";
-        char visimgfile[FILENAMELENGTH] = "visibilities_img.bin";
-        char metafile[FILENAMELENGTH] = "meta.txt";
-
-        // Mesh related files 
-        char outfile[FILENAMELENGTH] = "grid.txt";
-        char outfile1[FILENAMELENGTH] = "coords.txt";
-        char outfile2[FILENAMELENGTH] = "grid_real.bin";
-        char outfile3[FILENAMELENGTH] = "grid_img.bin";
-        char fftfile[FILENAMELENGTH] = "fft.txt";
-        char fftfile2[FILENAMELENGTH] = "fft_real.bin";
-        char fftfile3[FILENAMELENGTH] = "fft_img.bin";
-        char logfile[FILENAMELENGTH] = "run.log";
-        char extension[FILENAMELENGTH] = ".txt";
-        char srank[4];
-        char timingfile[FILENAMELENGTH] = "timings.dat";
-
-	// Visibilities related variables
-	double * uu;
-	double * vv;
-	double * ww;
-	float * weights;
-	float * visreal;
-	float * visimg;
-
-	long Nmeasures,Nmeasures0;
-	long Nvis,Nvis0;
-	long Nweights,Nweights0;
-	long freq_per_chan,freq_per_chan0;
-	long polarisations,polarisations0;
-        long Ntimes,Ntimes0;
-	double dt,dt0;
-	double thours,thours0;
-	long baselines,baselines0;
-	double uvmin,uvmin0;
-	double uvmax,uvmax0;
-	double wmin,wmin0;
-	double wmax,wmax0;
-	double resolution;
-
-        // Mesh related parameters: global size
-	int grid_size_x = 2048;
-	int grid_size_y = 2048;
-	// Split Mesh size (auto-calculated)
-	int local_grid_size_x;
-	int local_grid_size_y;
-	int xaxis;
-	int yaxis;
-
-	// Number of planes in the w direction
-	int num_w_planes = 8;
-
-	// Size of the convoutional kernel support
-	int w_support = 7;
-
-	// Number of OpenMP threads (input parameter)
-	int num_threads;
-
-        // Resolution 
-	double dx = 0.5/(double)grid_size_x;
-	double dw = 0.5/(double)num_w_planes;
-	// Half support size
-	double w_supporth = (double)((w_support-1)/2)*dx;
-
-
-	// Initialize FITS image parameters
-	fitsfile *fptreal;
-	fitsfile *fptrimg;
-	int status;
-	long nelements;
-//	long fpixel, lpixel;
-	char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits";
-	char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits";
-
-	long naxis = 2;
-	long naxes[2] = { grid_size_x, grid_size_y };
-
-	nelements = naxes[0] * naxes[1];
-
-
-
-
-	// Internal profiling parameters 
-	clock_t start, end, start0, startk, endk;
-	double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time;
-	double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1;
-	double writetime, writetime1;
-	struct timespec begin, finish, begin0, begink, finishk;
-	double elapsed;
-
-        // Number of sectors in which the mesh is divided along the v direction
-	// IF nsectors < 0, nsectors = NUMBER OF MPI RANKS
-	long nsectors = NUM_OF_SECTORS;
-
-
-        /* Get number of threads, exit if not given */
-        if (argc == 1) {
-          fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]);
-          exit(1);
-        }
-        // Set the number of OpenMP threads
-        num_threads = atoi(argv[1]);
-
-        // Number of threads <= 0 not acceptable
-        if (num_threads <= 0) {
-           fprintf(stderr, "Wrong parameter: %s\n\n", argv[1]);
-           fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]);
-           exit(1);
-        }
-
-	// Internal profiling
-	clock_gettime(CLOCK_MONOTONIC, &begin0);
-	start0 = clock();
-
-#ifdef USE_MPI
-	// Intialize MPI environment
-	MPI_Init(&argc,&argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
-	if(rank == 0)printf("Running with %d MPI tasks\n",size);
-  #ifdef USE_FFTW
-	// Initialize FFTW environment
-	fftw_mpi_init();
-  #endif
-#else
-	// If MPI is not used the two parameters are set for consistency
-	rank = 0;
-	size = 1;
-#endif
-
-  if(rank == 0)printf("Running with %d threads\n",num_threads);
-
-#ifdef ACCOMP
-if(rank == 0){
-  if (0 == omp_get_num_devices()) {
-      printf("No accelerator found ... exit\n");
-      exit(255);
-   }
-   printf("Number of available GPUs %d\n", omp_get_num_devices());
-   #ifdef NVIDIA
-      prtAccelInfo();
-   #endif
- }
-#endif
-
-	// set the local size of the image
-	local_grid_size_x = grid_size_x;
-	if (nsectors < 0) nsectors = size;
-	local_grid_size_y = grid_size_y/nsectors;
-	//nsectors = size;
-
-	// LOCAL grid size
-	xaxis = local_grid_size_x;
-	yaxis = local_grid_size_y;
-	clock_gettime(CLOCK_MONOTONIC, &begin);
-	start = clock();
-
-        // INPUT FILES (only the first ndatasets entries are used)
-	int ndatasets = 1;
-        strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
-        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
-        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
-        //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
-
-	strcpy(datapath,datapath_multi[0]);
-	// Read metadata
-	strcpy(filename,datapath);
-	strcat(filename,metafile);
-	pFile = fopen (filename,"r");
-        fscanf(pFile,"%ld",&Nmeasures);
-        fscanf(pFile,"%ld",&Nvis);
-        fscanf(pFile,"%ld",&freq_per_chan);
-        fscanf(pFile,"%ld",&polarisations);
-        fscanf(pFile,"%ld",&Ntimes);
-        fscanf(pFile,"%lf",&dt);
-        fscanf(pFile,"%lf",&thours);
-        fscanf(pFile,"%ld",&baselines);
-        fscanf(pFile,"%lf",&uvmin);
-        fscanf(pFile,"%lf",&uvmax);
-        fscanf(pFile,"%lf",&wmin);
-        fscanf(pFile,"%lf",&wmax);
-	fclose(pFile);
-
-
-	// WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-	int nsub = 1000;
-	//int nsub = 10;
-	printf("Subtracting last %d measurements\n",nsub);
-  Nmeasures = Nmeasures-nsub;
-	Nvis = Nmeasures*freq_per_chan*polarisations;
-
-  // calculate the coordinates of the center
-	double uvshift = uvmin/(uvmax-uvmin);
-	//printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift);
-
-	if (rank == 0)
-	{
-	   printf("N. measurements %ld\n",Nmeasures);
-	   printf("N. visibilities %ld\n",Nvis);
-	}
-
-  // Set temporary local size of points
-	long nm_pe = (long)(Nmeasures/size);
-	long remaining = Nmeasures%size;
-
-  long startrow = rank*nm_pe;
-  if (rank == size-1)nm_pe = nm_pe+remaining;
-
-	long Nmeasures_tot = Nmeasures;
-	Nmeasures = nm_pe;
-	long Nvis_tot = Nvis;
-	Nvis = Nmeasures*freq_per_chan*polarisations;
-	Nweights = Nmeasures*polarisations;
-
-#ifdef VERBOSE
-	printf("N. measurements on %d %ld\n",rank,Nmeasures);
-	printf("N. visibilities on %d %ld\n",rank,Nvis);
-#endif
-
-
-	// DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used
-	// all the sizes are rescaled by the number of MPI tasks
-	// Allocate arrays
-	uu = (double*) calloc(Nmeasures,sizeof(double));
-	vv = (double*) calloc(Nmeasures,sizeof(double));
-	ww = (double*) calloc(Nmeasures,sizeof(double));
-	weights = (float*) calloc(Nweights,sizeof(float));
-	visreal = (float*) calloc(Nvis,sizeof(float));
-	visimg = (float*) calloc(Nvis,sizeof(float));
-
-  if(rank == 0)printf("READING DATA\n");
-	// Read data
-	strcpy(filename,datapath);
-	strcat(filename,ufile);
-	//printf("Reading %s\n",filename);
-
-	pFile = fopen (filename,"rb");
-	fseek (pFile,startrow*sizeof(double),SEEK_SET);
-	fread(uu,Nmeasures*sizeof(double),1,pFile);
-	fclose(pFile);
-
-	strcpy(filename,datapath);
-	strcat(filename,vfile);
-	//printf("Reading %s\n",filename);
-
-  pFile = fopen (filename,"rb");
-	fseek (pFile,startrow*sizeof(double),SEEK_SET);
-	fread(vv,Nmeasures*sizeof(double),1,pFile);
-	fclose(pFile);
-
-	strcpy(filename,datapath);
-	strcat(filename,wfile);
-	//printf("Reading %s\n",filename);
-
-	pFile = fopen (filename,"rb");
-	fseek (pFile,startrow*sizeof(double),SEEK_SET);
-	fread(ww,Nmeasures*sizeof(double),1,pFile);
-	fclose(pFile);
-
-#ifdef USE_MPI
-	MPI_Barrier(MPI_COMM_WORLD);
-#endif
-
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	end = clock();
-	setup_time = ((double) (end - start)) / CLOCKS_PER_SEC;
-	setup_time1 = (finish.tv_sec - begin.tv_sec);
-	setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
-
-
-  if(rank == 0)printf("GRIDDING DATA\n");
-
-	// Create histograms and linked lists
-  clock_gettime(CLOCK_MONOTONIC, &begin);
-  start = clock();
-
-	// Initialize linked list
-	struct sectorlist ** sectorhead;
-	sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist));
-	for (int isec=0; isec<=nsectors; isec++)
-	{
-		sectorhead[isec] = malloc(sizeof(struct sectorlist));
-		sectorhead[isec]->index = -1;
-		sectorhead[isec]->next = NULL;
-	}
-
-
-	long * histo_send = (long*) calloc(nsectors+1,sizeof(long));
-	int * boundary = (int*) calloc(Nmeasures,sizeof(int));
-	double uuh,vvh;
-	for (long iphi = 0; iphi < Nmeasures; iphi++)
-	{
-     boundary[iphi] = -1;
-	   uuh = uu[iphi];
-	   vvh = vv[iphi];
-	   int binphi = (int)(vvh*nsectors);
-	   // check if the point influence also neighboring slabs
-	   double updist = (double)((binphi+1)*yaxis)*dx - vvh;
-	   double downdist = vvh - (double)(binphi*yaxis)*dx;
-	   //
-	   histo_send[binphi]++;
-     Push(&sectorhead[binphi],iphi);
-     if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(&sectorhead[binphi+1],iphi);};
-	   if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(&sectorhead[binphi-1],iphi);};
-	}
-
-#ifdef PIPPO
-	struct sectorlist * current;
-	long iiii = 0;
-	for (int j=0; j<nsectors; j++)
-	{
-		current = sectorhead[j];
-		iiii = 0;
-		while (current->index != -1)
-		{
-			printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index);
-			current = current->next;
-			iiii++;
-		}
-	}
-#endif
-
-#ifdef VERBOSE
-  for (int iii=0; iii<nsectors+1; iii++)printf("HISTO %d %d %ld\n",rank, iii, histo_send[iii]);
-#endif
-
-// Create sector grid
-  double * gridss;
-  double * gridss_w;
-  double * gridss_real;
-  double * gridss_img;
-  double * grid;
-  long size_of_grid;
-  size_of_grid = 2*num_w_planes*xaxis*yaxis;
-  gridss = (double*) calloc(size_of_grid,sizeof(double));
-  gridss_w = (double*) calloc(size_of_grid,sizeof(double));
-  gridss_real = (double*) calloc(size_of_grid/2,sizeof(double));
-  gridss_img = (double*) calloc(size_of_grid/2,sizeof(double));
-  // Create destination slab
-  grid = (double*) calloc(size_of_grid,sizeof(double));
-  // Create temporary global grid
-#ifndef USE_MPI
-  double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double));
-#endif
-  double shift = (double)(dx*yaxis);
-  // Open the MPI Memory Window for the slab
-#ifdef USE_MPI
-  MPI_Win slabwin;
-  MPI_Win_create(grid, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &slabwin);
-  MPI_Win_fence(0,slabwin);
-#endif
-#ifndef USE_MPI
-  pFile1 = fopen (outfile1,"w");
-#endif
-
-
-	// loop over files
-	//
-	kernel_time = 0.0;
-	kernel_time1 = 0.0;
-	reduce_time = 0.0;
-	reduce_time1 = 0.0;
-	compose_time = 0.0;
-	compose_time1 = 0.0;
-
-	// MAIN LOOP OVER FILES
-	//
-	for (int ifiles=0; ifiles<ndatasets; ifiles++)
-	{
-	strcpy(filename,datapath_multi[ifiles]);
-        printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets);
-
-        // Read metadata
-        strcpy(filename,datapath);
-        strcat(filename,metafile);
-        pFile = fopen (filename,"r");
-        fscanf(pFile,"%ld",&Nmeasures0);
-        fscanf(pFile,"%ld",&Nvis0);
-        fscanf(pFile,"%ld",&freq_per_chan0);
-        fscanf(pFile,"%ld",&polarisations0);
-        fscanf(pFile,"%ld",&Ntimes0);
-        fscanf(pFile,"%lf",&dt0);
-        fscanf(pFile,"%lf",&thours0);
-        fscanf(pFile,"%ld",&baselines0);
-        fscanf(pFile,"%lf",&uvmin);
-        fscanf(pFile,"%lf",&uvmax);
-        fscanf(pFile,"%lf",&wmin0);
-        fscanf(pFile,"%lf",&wmax0);
-        fclose(pFile);
-
-        // calculate the resolution in radians
-        resolution = 1.0/MAX(abs(uvmin),abs(uvmax));
-        // calculate the resolution in arcsec
-        double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI;
-        printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec);
-
-        strcpy(filename,datapath);
-        strcat(filename,weightsfile);
-        pFile = fopen (filename,"rb");
-        fseek (pFile,startrow*polarisations*sizeof(float),SEEK_SET);
-        fread(weights,(Nweights)*sizeof(float),1,pFile);
-        fclose(pFile);
-
-        strcpy(filename,datapath);
-        strcat(filename,visrealfile);
-
-  pFile = fopen (filename,"rb");
-  fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET);
-  fread(visreal,Nvis*sizeof(float),1,pFile);
-  fclose(pFile);
-  strcpy(filename,datapath);
-  strcat(filename,visimgfile);
-#ifdef VERBOSE
-  printf("Reading %s\n",filename);
-#endif
-  pFile = fopen (filename,"rb");
-  fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET);
-  fread(visimg,Nvis*sizeof(float),1,pFile);
-  fclose(pFile);
-
-#ifdef USE_MPI
-  MPI_Barrier(MPI_COMM_WORLD);
-#endif
-  // Declare temporary arrays for the masking
-  double * uus;
-  double * vvs;
-  double * wws;
-  float * visreals;
-  float * visimgs;
-  float * weightss;
-	long isector;
-
-  for (long isector_count=0; isector_count<nsectors; isector_count++)
-      {
-        clock_gettime(CLOCK_MONOTONIC, &begink);
-        startk = clock();
-        // define local destination sector
-        //isector = (isector_count+rank)%size;
-        isector = isector_count;
-	  // allocate sector arrays
-        long Nsec = histo_send[isector];
-	      uus = (double*) malloc(Nsec*sizeof(double));
-	      vvs = (double*) malloc(Nsec*sizeof(double));
-	      wws = (double*) malloc(Nsec*sizeof(double));
-	      long Nweightss = Nsec*polarisations;
-	      long Nvissec = Nweightss*freq_per_chan;
-	      weightss = (float*) malloc(Nweightss*sizeof(float));
-	      visreals = (float*) malloc(Nvissec*sizeof(float));
-	      visimgs = (float*) malloc(Nvissec*sizeof(float));
-
-	  // select data for this sector
-        long icount = 0;
-	      long ip = 0;
-	      long inu = 0;
-//CLAAAA
-	       struct sectorlist * current;
-	       current = sectorhead[isector];
-
-
-
-	       while (current->index != -1)
-          {
-             long ilocal = current->index;
-      	     //double vvh = vv[ilocal];
-             //int binphi = (int)(vvh*nsectors);
-	           //if (binphi == isector || boundary[ilocal] == isector) {
-	           uus[icount] = uu[ilocal];
-	           vvs[icount] = vv[ilocal]-isector*shift;
-	           wws[icount] = ww[ilocal];
-             for (long ipol=0; ipol<polarisations; ipol++)
-                {
-		              weightss[ip] = weights[ilocal*polarisations+ipol];
-                  ip++;
-                }
-             for (long ifreq=0; ifreq<polarisations*freq_per_chan; ifreq++)
-                {
-	                 visreals[inu] = visreal[ilocal*polarisations*freq_per_chan+ifreq];
-	                 visimgs[inu] = visimg[ilocal*polarisations*freq_per_chan+ifreq];
-		               //if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis);
-	                 inu++;
-	              }
-	           icount++;
-	     //}
-	            current = current->next;
-             }
-
-	  clock_gettime(CLOCK_MONOTONIC, &finishk);
-	  endk = clock();
-	  compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
-	  compose_time1 += (finishk.tv_sec - begink.tv_sec);
-	  compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
-
-    #ifndef USE_MPI
-	  double uumin = 1e20;
-	  double vvmin = 1e20;
-	  double uumax = -1e20;
-	  double vvmax = -1e20;
-
-          for (long ipart=0; ipart<Nsec; ipart++)
-          {
-	       uumin = MIN(uumin,uus[ipart]);
-	       uumax = MAX(uumax,uus[ipart]);
-	       vvmin = MIN(vvmin,vvs[ipart]);
-	       vvmax = MAX(vvmax,vvs[ipart]);
-
-
-               if(ipart%10 == 0)fprintf (pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]);
-          }
-
-	  printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax);
-          #endif
-
-          // Make convolution on the grid
-          #ifdef VERBOSE
-	  printf("Processing sector %ld\n",isector);
-	  #endif
-          clock_gettime(CLOCK_MONOTONIC, &begink);
-          startk = clock();
-
-          wstack(num_w_planes,
-               Nsec,
-               freq_per_chan,
-               polarisations,
-               uus,
-               vvs,
-               wws,
-               visreals,
-               visimgs,
-               weightss,
-               dx,
-               dw,
-               w_support,
-	             xaxis,
-	             yaxis,
-               gridss,
-               num_threads);
-
-
-/* int z =0 ;
-#pragma omp target map(to:test_i_gpu) map(from:z)
-{
-  int x; // only accessible from accelerator
-  x = 2;
-  z = x + test_i_gpu;
-}*/
-
-
-
-	  clock_gettime(CLOCK_MONOTONIC, &finishk);
-	  endk = clock();
-	  kernel_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
-	  kernel_time1 += (finishk.tv_sec - begink.tv_sec);
-	  kernel_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
-          #ifdef VERBOSE
-	  printf("Processed sector %ld\n",isector);
-          #endif
-          clock_gettime(CLOCK_MONOTONIC, &begink);
-          startk = clock();
-
-          //for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]);
-
-          #ifndef USE_MPI
-	  long stride = isector*2*xaxis*yaxis*num_w_planes;
-          for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii];
-	  #endif
-
-	  // Write grid in the corresponding remote slab
-          #ifdef USE_MPI
-	  int target_rank = (int)isector;
-	  //int target_rank = (int)(size-isector-1);
-	  #ifdef ONE_SIDE
-	  printf("One Side communication active\n");
-	  MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin);
-	  MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin);
-	  MPI_Win_unlock(target_rank,slabwin);
-	  //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin);
-          #else
-          MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD);
-          #endif //ONE_SIDE
-          #endif //USE_MPI
-
-	  clock_gettime(CLOCK_MONOTONIC, &finishk);
-	  endk = clock();
-	  reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
-	  reduce_time1 += (finishk.tv_sec - begink.tv_sec);
-	  reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
-          // Go to next sector
-	  for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0;
-
-	  // Deallocate all sector arrays
-	  free(uus);
-	  free(vvs);
-	  free(wws);
-	  free(weightss);
-	  free(visreals);
-	  free(visimgs);
-        // End of loop over sectors
-        }
-	// End of loop over input files
-	}
-
-	// Finalize MPI communication
-        #ifdef USE_MPI
-	MPI_Win_fence(0,slabwin);
-	#endif
-
-        // Swap left and right parts APPARENTLY NOT NECESSARY
-	/*
-	for (long kswap=0; kswap<num_w_planes; kswap++)
-	for (long jswap=0; jswap<yaxis; jswap++)
-	for (long iswap=0; iswap<xaxis; iswap++)
-	{
-           long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
-	   gridss[index_origin] = grid[index_origin];
-	   gridss[index_origin+1] = grid[index_origin+1];
-	}
-	for (long kswap=0; kswap<num_w_planes; kswap++)
-	for (long jswap=0; jswap<yaxis; jswap++)
-	for (long iswap=0; iswap<xaxis/2; iswap++)
-	{
-           long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
-	   long index_destination = 2*(iswap+xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
-	   grid[index_destination] = gridss[index_origin];
-	   grid[index_destination+1] = gridss[index_origin+1];
-	}
-
-	for (long kswap=0; kswap<num_w_planes; kswap++)
-	for (long jswap=0; jswap<yaxis; jswap++)
-	for (long iswap=xaxis/2; iswap<xaxis; iswap++)
-	{
-           long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
-	   long index_destination = 2*(iswap-xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
-	   grid[index_destination] = gridss[index_origin];
-	   grid[index_destination+1] = gridss[index_origin+1];
-	}
-	*/
-
-        #ifndef USE_MPI
-        fclose(pFile1);
-        #endif
-        #ifdef USE_MPI
-	MPI_Barrier(MPI_COMM_WORLD);
-	#endif
-
-        end = clock();
-        clock_gettime(CLOCK_MONOTONIC, &finish);
-        process_time = ((double) (end - start)) / CLOCKS_PER_SEC;
-        process_time1 = (finish.tv_sec - begin.tv_sec);
-        process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
-        clock_gettime(CLOCK_MONOTONIC, &begin);
-
-
-#ifdef WRITE_DATA
-	// Write results
-	if (rank == 0)
-	{
-          printf("WRITING GRIDDED DATA\n");
-          pFilereal = fopen (outfile2,"wb");
-          pFileimg = fopen (outfile3,"wb");
-	  #ifdef USE_MPI
-	  for (int isector=0; isector<nsectors; isector++)
-          {
-	      MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin);
-	      MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin);
-	      MPI_Win_unlock(isector,slabwin);
-	      for (long i=0; i<size_of_grid/2; i++)
-	      {
-		      gridss_real[i] = gridss[2*i];
-		      gridss_img[i] = gridss[2*i+1];
-	      }
-	      if (num_w_planes > 1)
-	      {
-                for (int iw=0; iw<num_w_planes; iw++)
-                for (int iv=0; iv<yaxis; iv++)
-                for (int iu=0; iu<xaxis; iu++)
-                {
-			  long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
-                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
-			  fseek(pFilereal, global_index, SEEK_SET);
-			  fwrite(&gridss_real[index], 1, sizeof(double), pFilereal);
-                }
-                for (int iw=0; iw<num_w_planes; iw++)
-                for (int iv=0; iv<yaxis; iv++)
-                for (int iu=0; iu<xaxis; iu++)
-                {
-                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
-                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
-                          fseek(pFileimg, global_index, SEEK_SET);
-                          fwrite(&gridss_img[index], 1, sizeof(double), pFileimg);
-                          //double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]);
-                          //fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm);
-                }
-
-	      } else {
-		for (int iw=0; iw<num_w_planes; iw++)
-		{
-			  long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
-                          long index = iw*xaxis*yaxis;
-			  fseek(pFilereal, global_index, SEEK_SET);
-                          fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), pFilereal);
-                          fseek(pFileimg, global_index, SEEK_SET);
-                          fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), pFileimg);
-		}
-	      }
-          }
-	  #else
-          for (int iw=0; iw<num_w_planes; iw++)
-            for (int iv=0; iv<grid_size_y; iv++)
-               for (int iu=0; iu<grid_size_x; iu++)
-               {
-                          long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
-		          fwrite(&gridtot[index], 1, sizeof(double), pFilereal);
-		          fwrite(&gridtot[index+1], 1, sizeof(double), pFileimg);
-                          //double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
-                          //fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
-               }
-          #endif
-          fclose(pFilereal);
-          fclose(pFileimg);
-	}
-
-        #ifdef USE_MPI
-        MPI_Win_fence(0,slabwin);
-        #endif
-#endif //WRITE_DATA
-
-
-#ifdef USE_FFTW
-	// FFT transform the data (using distributed FFTW)
-
-	if(rank == 0)printf("PERFORMING FFT\n");
-        clock_gettime(CLOCK_MONOTONIC, &begin);
-        start = clock();
-        fftw_plan plan;
-        fftw_complex *fftwgrid;
-	ptrdiff_t alloc_local, local_n0, local_0_start;
-	double norm = 1.0/(double)(grid_size_x*grid_size_y);
-
-        // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
-	// x is the direction of contiguous data and maps to the second parameter
-	// y is the parallelized direction and corresponds to the first parameter (--> n0)
-	// and perform the FFT per w plane
-	alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
-	fftwgrid = fftw_alloc_complex(alloc_local);
-	plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
-
-	long fftwindex = 0;
-	long fftwindex2D = 0;
-	for (int iw=0; iw<num_w_planes; iw++)
-        {
-            //printf("FFTing plan %d\n",iw);
-            // select the w-plane to transform
-            for (int iv=0; iv<yaxis; iv++)
-            {
-               for (int iu=0; iu<xaxis; iu++)
-               {
-		   fftwindex2D = iu + iv*xaxis;
-		   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
-                   fftwgrid[fftwindex2D][0] = grid[fftwindex];
-                   fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
-	       }
-	    }
-
-            // do the transform for each w-plane
-	    fftw_execute(plan);
-
-	    // save the transformed w-plane
-            for (int iv=0; iv<yaxis; iv++)
-            {
-               for (int iu=0; iu<xaxis; iu++)
-               {
-		   fftwindex2D = iu + iv*xaxis;
-		   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
-                   gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
-                   gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
-               }
-            }
-
-	}
-
-        fftw_destroy_plan(plan);
-
-        #ifdef USE_MPI
-        MPI_Win_fence(0,slabwin);
-	MPI_Barrier(MPI_COMM_WORLD);
-        #endif
-
-        end = clock();
-        clock_gettime(CLOCK_MONOTONIC, &finish);
-        fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
-        fftw_time1 = (finish.tv_sec - begin.tv_sec);
-        fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
-        clock_gettime(CLOCK_MONOTONIC, &begin);
-
-#ifdef WRITE_DATA
-        // Write results
-        #ifdef USE_MPI
-	MPI_Win writewin;
-        MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
-	MPI_Win_fence(0,writewin);
-        #endif
-        if (rank == 0)
-        {
-          printf("WRITING FFT TRANSFORMED DATA\n");
-          pFilereal = fopen (fftfile2,"wb");
-          pFileimg = fopen (fftfile3,"wb");
-          #ifdef USE_MPI
-          for (int isector=0; isector<nsectors; isector++)
-          {
-              MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
-              MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
-              MPI_Win_unlock(isector,writewin);
-	      for (long i=0; i<size_of_grid/2; i++)
-	      {
-		      gridss_real[i] = gridss_w[2*i];
-		      gridss_img[i] = gridss_w[2*i+1];
-	      }
-	      if (num_w_planes > 1)
-	      {
-                for (int iw=0; iw<num_w_planes; iw++)
-                for (int iv=0; iv<yaxis; iv++)
-                for (int iu=0; iu<xaxis; iu++)
-                {
-			  long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
-                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
-			  fseek(pFilereal, global_index, SEEK_SET);
-			  fwrite(&gridss_real[index], 1, sizeof(double), pFilereal);
-                }
-                for (int iw=0; iw<num_w_planes; iw++)
-                for (int iv=0; iv<yaxis; iv++)
-                for (int iu=0; iu<xaxis; iu++)
-                {
-                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
-                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
-                          fseek(pFileimg, global_index, SEEK_SET);
-                          fwrite(&gridss_img[index], 1, sizeof(double), pFileimg);
-                }
-              } else {
-                          fwrite(gridss_real, size_of_grid/2, sizeof(double), pFilereal);
-                          fwrite(gridss_img, size_of_grid/2, sizeof(double), pFileimg);
-              }
-
-
-          }
-          #else
-          /*
-	  for (int iw=0; iw<num_w_planes; iw++)
-            for (int iv=0; iv<grid_size_y; iv++)
-               for (int iu=0; iu<grid_size_x; iu++)
-               {
-                          int isector = 0;
-                          long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
-                          double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
-                          fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
-               }
-	  */
-          #endif
-          fclose(pFilereal);
-          fclose(pFileimg);
-        }
-
-        #ifdef USE_MPI
-        //MPI_Win_fence(0,writewin);
-        MPI_Win_fence(0,writewin);
-	MPI_Win_free(&writewin);
-	MPI_Barrier(MPI_COMM_WORLD);
-        #endif
-#endif //WRITE_DATA
-
-	fftw_free(fftwgrid);
-
-	// Phase correction
-        clock_gettime(CLOCK_MONOTONIC, &begin);
-        start = clock();
-	if(rank == 0)printf("PHASE CORRECTION\n");
-        double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
-        double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));
-
-        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y,resolution,wmin,wmax,num_threads);
-
-        end = clock();
-        clock_gettime(CLOCK_MONOTONIC, &finish);
-        phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
-        phase_time1 = (finish.tv_sec - begin.tv_sec);
-        phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
-#ifdef WRITE_IMAGE
-
-        if(rank == 0)
-        {
-            #ifdef FITSIO
-            printf("REMOVING RESIDUAL FITS FILE\n");
-            remove(testfitsreal);
-            remove(testfitsimag);
-
-
-	    printf("FITS CREATING\n");
-            status = 0;
-
-            fits_create_file(&fptrimg, testfitsimag, &status);
-            fits_create_img(fptrimg, DOUBLE_IMG, naxis, naxes, &status);
-//          fits_write_img(fptrimg, TDOUBLE, fpixel, nelements, image_imag, &status);
-            fits_close_file(fptrimg, &status);
-
-	    status = 0;
-
-            fits_create_file(&fptreal, testfitsreal, &status);
-	    fits_create_img(fptreal, DOUBLE_IMG, naxis, naxes, &status);
-//	    fits_write_img(fptreal, TDOUBLE, fpixel, nelements, image_real, &status);
-	    fits_close_file(fptreal, &status);
-	    #endif
-
-            pFilereal = fopen (fftfile2, "wb");
-	    pFileimg = fopen (fftfile3, "wb");
-	    fclose(pFilereal);
-	    fclose(pFileimg);
-        }
-	#ifdef USE_MPI
-	MPI_Barrier(MPI_COMM_WORLD);
-        #endif
-        if(rank == 0)printf("WRITING IMAGE\n");
-	for (int isector=0; isector<size; isector++)
-	{
-	    #ifdef USE_MPI
-	    MPI_Barrier(MPI_COMM_WORLD);
-            #endif
-	    if(isector == rank)
-            //if(rank == 0)
-	    {
-               #ifdef FITSIO
-
-	       printf("%d writing\n",isector);
-               long * fpixel = (long *) malloc(sizeof(long)*naxis);
-               long * lpixel = (long *) malloc(sizeof(long)*naxis);
-
-
-               fpixel[0] = 1;
-               fpixel[1] = isector*yaxis+1;
-               lpixel[0] = xaxis;
-               lpixel[1] = (isector+1)*yaxis;
-               //printf("fpixel %d, %d\n", fpixel[0], fpixel[1]);
-	       //printf("lpixel %d, %d\n", lpixel[0], lpixel[1]);
-               status = 0;
-	       fits_open_image(&fptreal, testfitsreal, READWRITE, &status);
-               fits_write_subset(fptreal, TDOUBLE, fpixel, lpixel, image_real, &status);
-	       fits_close_file(fptreal, &status);
-
-               status = 0;
-               fits_open_image(&fptrimg, testfitsimag, READWRITE, &status);
-               fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status);
-               fits_close_file(fptrimg, &status);
-
-               #endif
-
-               pFilereal = fopen (fftfile2,"ab");
-               pFileimg = fopen (fftfile3,"ab");
-
-	       long global_index = isector*(xaxis*yaxis)*sizeof(double);
-
-               fseek(pFilereal, global_index, SEEK_SET);
-               fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal);
-               fseek(pFileimg, global_index, SEEK_SET);
-               fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg);
-
-               fclose(pFilereal);
-               fclose(pFileimg);
-	    }
-	}
-	#ifdef USE_MPI
-	MPI_Barrier(MPI_COMM_WORLD);
-        #endif
-
-#endif //WRITE_IMAGE
-
-
-#endif //USE_FFTW
-
-	end = clock();
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	tot_time = ((double) (end - start0)) / CLOCKS_PER_SEC;
-	tot_time1 = (finish.tv_sec - begin0.tv_sec);
-	tot_time1 += (finish.tv_nsec - begin0.tv_nsec) / 1000000000.0;
-
-        if (rank == 0)
-        {
-          printf("Setup time:    %f sec\n",setup_time);
-          printf("Process time:  %f sec\n",process_time);
-          printf("Kernel time = %f, Array Composition time %f, Reduce time: %f sec\n",kernel_time,compose_time,reduce_time);
-#ifdef USE_FFTW
-          printf("FFTW time:     %f sec\n",fftw_time);
-          printf("Phase time:    %f sec\n",phase_time);
-#endif
-          printf("TOT time:      %f sec\n",tot_time);
-	  if(num_threads > 1)
-          {
-            printf("PSetup time:   %f sec\n",setup_time1);
-            printf("PProcess time: %f sec\n",process_time1);
-            printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1);
-#ifdef USE_FFTW
-            printf("PFFTW time:    %f sec\n",fftw_time1);
-            printf("PPhase time:   %f sec\n",phase_time1);
-#endif
-            printf("PTOT time:     %f sec\n",tot_time1);
-	  }
-        }
-
-	if (rank == 0)
-	{
-	 pFile = fopen (timingfile,"w");
-	 if (num_threads == 1)
-         {
-	   fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time,kernel_time,compose_time,reduce_time,fftw_time,phase_time,tot_time);
-	 } else {
-	   fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time1,kernel_time1,compose_time1,reduce_time1,fftw_time1,phase_time1,tot_time1);
-	 }  
-	 fclose(pFile);
-	} 
-
-	// Close MPI environment
-	#ifdef USE_MPI
-        MPI_Win_fence(0,slabwin);
-	MPI_Win_free(&slabwin);
-	MPI_Finalize();
-	#endif
-}
-- 
GitLab