From 99ac8a60106b5aa2cba62b79ac0e32fe37418e9a Mon Sep 17 00:00:00 2001
From: Claudio Gheller <cgheller@login02.m100.cineca.it>
Date: Fri, 1 Oct 2021 11:28:44 +0200
Subject: [PATCH] first implementation of phase correction, added test data and
 matlab script to visualize results

---
 Makefile            | 18 ++++++++++++-----
 phase_correction.cu | 47 ++++++++++++++++++++++++++++++++++++++++-----
 w-stacking-fftw.c   | 27 ++++++++++++++++++--------
 w-stacking.cu       |  2 --
 w-stacking.h        |  4 ++++
 5 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 73d9ee4..59a3bc3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,16 @@
+# comment/uncomment the various options depending hoe you want to build the program
+# create MPI code
 OPT += -DUSE_MPI
+# use FFTW (it can be switched on ONLY if MPI is active)
 OPT += -DUSE_FFTW
+# perform one-side communication (suggested) instead of reduce (only if MPI is active)
 OPT += -DONE_SIDE
-OPT += -DNOWRITE_DATA
+# write the full 3D cube of gridded visibilities and its FFT transform
+#OPT += -DWRITE_DATA
+# write the final image
 OPT += -DWRITE_IMAGE
+# perform w-stacking phase correction
+# OPT += PHASE_ON
 
 CC = gcc
 CXX = g++
@@ -20,13 +28,13 @@ NVCC = nvcc
 NVFLAGS = -arch=sm_70 -Xcompiler -mno-float128 -std=c++11
 NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda
 
-DEPS = w-stacking.h
+DEPS = w-stacking.h w-stacking-fftw.c w-stacking.cu phase_correction.cu
 COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o
 
-w-stacking.c:
+w-stacking.c: w-stacking.cu
 	cp w-stacking.cu w-stacking.c
 
-phase_correction.c:
+phase_correction.c: phase_correction.cu
 	cp phase_correction.cu phase_correction.c
 
 %.o: %.c $(DEPS)
@@ -40,7 +48,7 @@ serial_cuda:
 	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
 	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm
 
-mpi: $(COBJ)
+mpi: $(COBJ) 
 	$(MPICC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)
 
 mpi_cuda:
diff --git a/phase_correction.cu b/phase_correction.cu
index 50c8c56..9d1cc7e 100644
--- a/phase_correction.cu
+++ b/phase_correction.cu
@@ -5,24 +5,61 @@
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
-#define NWORKERS -1    //100
-#define NTHREADS 32
 
 #ifdef __CUDACC__
 #endif
 
-void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes)
+void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes, int xaxistot, int yaxistot)
 {
+	double dnum_w_planes = (double)num_w_planes;
+	double dxaxistot = (double)xaxistot;
+	double dyaxistot = (double)yaxistot;
 
 	for (int iw=0; iw<num_w_planes; iw++)
 	{
-	    for (int iv=0; iw<yaxis; iv++)
+            double wterm = (double)iw/dnum_w_planes;
+
+	    for (int iv=0; iv<yaxis; iv++)
             for (int iu=0; iu<xaxis; iu++)
             {
+
 		    long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
 		    long img_index = iu+iv*xaxis;
+#ifdef PHASE_ON
+                    double xcoord = (double)(iu-xaxistot/2);
+                    if(xcoord < 0.0)xcoord = (double)(iu+xaxistot/2);
+                    double ycoord = (double)(iv-yaxistot/2);
+                    if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2);
+		    xcoord = xcoord/dxaxistot;
+		    ycoord = ycoord/dyaxistot;
+
+		    double efact;
+		    double preal, pimag;
+		    double radius2 = (xcoord*xcoord+ycoord*ycoord);
+		    if(xcoord <= 1.0)
+		    {
+			    preal = cos(2.0*PI*wterm*(sqrt(1-radius2)-1.0));
+			    pimag = sin(2.0*PI*wterm*(sqrt(1-radius2)-1.0));
+		    } else {
+			    preal = cos(-2.0*PI*wterm*(sqrt(radius2-1.0)-1));
+			    pimag = 0.0;
+		    } 
+
+
+		    double p,q,r,s;
+		    p = gridss[index];
+		    q = gridss[index+1];
+		    r = preal;
+		    s = pimag;
+
+		    //printf("%d %d %d %ld %ld\n",iu,iv,iw,index,img_index);
+		    image_real[img_index] += p*r-q*s;
+		    image_imag[img_index] += p*s+q*r;
+#else
 		    image_real[img_index] += gridss[index];
-		    image_imag[img_index] += gridss[index];
+		    image_imag[img_index] += gridss[index+1];
+#endif
+
             }  
 	}
 
diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c
index a1d0b90..4d8130f 100644
--- a/w-stacking-fftw.c
+++ b/w-stacking-fftw.c
@@ -92,14 +92,14 @@ int main(int argc, char * argv[])
 	double wmin;
 	double wmax;
 
-
+        // MESH SIZE
 	int grid_size_x = 2048;
 	int grid_size_y = 2048;
 	int local_grid_size_x;// = 100;
 	int local_grid_size_y;// = 100;
 	int xaxis;
 	int yaxis;
-	int num_w_planes = 10;
+	int num_w_planes = 1;
 	// DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization
 	int w_support = 7;
 	int num_threads;// = 4;
@@ -108,8 +108,8 @@ int main(int argc, char * argv[])
 	double w_supporth = (double)((w_support-1)/2)*dx;
 
 	clock_t start, end, start0, startk, endk;
-	double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time;
-	double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1;
+	double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time;
+	double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1;
 	double writetime, writetime1;
 
 	struct timespec begin, finish, begin0, begink, finishk;
@@ -150,10 +150,12 @@ int main(int argc, char * argv[])
 	clock_gettime(CLOCK_MONOTONIC, &begin);
 	start = clock();
 
-	// CLAAAAAA
+	// INPUT FILES (only the first ndatasets entries are used)
 	int ndatasets = 1;
-        strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
-        strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
+        strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
+        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
+        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
+        //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
 
 	strcpy(datapath,datapath_multi[0]);
 	// Read metadata
@@ -822,11 +824,18 @@ int main(int argc, char * argv[])
 	fftw_free(fftwgrid);
 
 	// Phase correction
+        clock_gettime(CLOCK_MONOTONIC, &begin);
+        start = clock();
 	if(rank == 0)printf("PHASE CORRECTION\n");
         double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));	
         double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));	
-        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes);
+        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y);
 
+        end = clock();
+        clock_gettime(CLOCK_MONOTONIC, &finish);
+        phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
+        phase_time1 = (finish.tv_sec - begin.tv_sec);
+        phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
 #ifdef WRITE_IMAGE
 
         if(rank == 0)
@@ -884,6 +893,7 @@ int main(int argc, char * argv[])
           printf("Kernel time = %f, Array Composition time %f, Reduce time: %f sec\n",kernel_time,compose_time,reduce_time);
 #ifdef USE_FFTW
           printf("FFTW time:     %f sec\n",fftw_time);
+          printf("Phase time:    %f sec\n",phase_time);
 #endif
           printf("TOT time:      %f sec\n",tot_time);
 	  if(num_threads > 1)
@@ -893,6 +903,7 @@ int main(int argc, char * argv[])
             printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1);
 #ifdef USE_FFTW
             printf("PFFTW time:    %f sec\n",fftw_time1);
+            printf("PPhase time:   %f sec\n",phase_time1);
 #endif
             printf("PTOT time:     %f sec\n",tot_time1);
 	  }
diff --git a/w-stacking.cu b/w-stacking.cu
index 9d91d76..9c76f82 100644
--- a/w-stacking.cu
+++ b/w-stacking.cu
@@ -5,8 +5,6 @@
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
-#define NWORKERS -1    //100
-#define NTHREADS 32
 
 #ifdef __CUDACC__
 double __device__
diff --git a/w-stacking.h b/w-stacking.h
index 1855d81..5c675e4 100644
--- a/w-stacking.h
+++ b/w-stacking.h
@@ -1,6 +1,8 @@
 #ifndef W_PROJECT_H_
 #define W_PROJECT_H_
 
+#define NWORKERS -1    //100
+#define NTHREADS 32
 #define PI 3.14159265359
 #define REAL_TYPE double
 #ifdef __CUDACC__
@@ -39,6 +41,8 @@ void phase_correction(
      double*,
      int,
      int,
+     int,
+     int,
      int);
 
 #endif 
-- 
GitLab