diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..797747aa16a2fb9000f5268a91f10e289e07f077
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+*.o
+phase_correction.c
+w-stacking.c
+w-stackingCfftw
+w-stackingfftw
+w-stackingCfftw_serial
+w-stackingfftw_serial
diff --git a/Makefile b/Makefile
index f109a98c7a1a1cbe5d406752091814495af52068..d8f15527e1cc177f76d5b0f5baa06383295a64ca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,16 @@
 OPT += -DUSE_MPI
 OPT += -DUSE_FFTW
 OPT += -DONE_SIDE
-OPT += -DWRITE_DATA
+OPT += -DNOWRITE_DATA
+OPT += -DWRITE_IMAGE
 
 CC = gcc
 CXX = g++
 MPICC = mpicc
-MPICXX =mpiCC 
+MPICXX = mpiCC 
+
+#OMP = -fopenmp 
+OMP = 
 
 CFLAGS += -O3 -mcpu=native
 CFLAGS += -I.
@@ -17,31 +21,35 @@ NVFLAGS = -arch=sm_70 -c w-stacking.cu -Xcompiler -mno-float128 -std=c++11
 NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda
 
 DEPS = w-stacking.h
-COBJ = w-stacking.o w-stacking-fftw.o
+COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o
 
 w-stacking.c:
 	cp w-stacking.cu w-stacking.c
 
+phase_correction.c:
+	cp phase_correction.cu phase_correction.c
+
 %.o: %.c $(DEPS)
-	$(CC) -c -o $@ $< $(CFLAGS) $(OPT)
+	$(CC) $(OMP) -c -o $@ $< $(CFLAGS) $(OPT)
 
 serial: $(COBJ)
-	$(CC) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm
+	$(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm
 
 serial_cuda:
 	$(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB)
-	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
-	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o $(NVLIB) -lm
+	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c
+	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm
 
 mpi: $(COBJ)
-	$(MPICC) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)
+	$(MPICC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)
 
 mpi_cuda:
 	$(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB)
-	$(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
-	$(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o $(NVLIB) $(LIBS) -lm
+	$(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c
+	$(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm
 
 clean:
 	rm *.o
 	rm w-stacking.c
+	rm phase_correction.c
 
diff --git a/phase_correction.cu b/phase_correction.cu
new file mode 100644
index 0000000000000000000000000000000000000000..50c8c564fb87f37306836a429446326bf196071b
--- /dev/null
+++ b/phase_correction.cu
@@ -0,0 +1,30 @@
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#include "w-stacking.h"
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define NWORKERS -1    //100
+#define NTHREADS 32
+
+#ifdef __CUDACC__
+#endif
+
+void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes)
+{
+
+	for (int iw=0; iw<num_w_planes; iw++)
+	{
+	    for (int iv=0; iw<yaxis; iv++)
+            for (int iu=0; iu<xaxis; iu++)
+            {
+		    long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
+		    long img_index = iu+iv*xaxis;
+		    image_real[img_index] += gridss[index];
+		    image_imag[img_index] += gridss[index];
+            }  
+	}
+
+
+}
diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c
index 4088543ba021d22d28c5540bd99b5f0b2a9de958..a1d0b9084448167228391959e3b3dbb855ea8748 100644
--- a/w-stacking-fftw.c
+++ b/w-stacking-fftw.c
@@ -99,7 +99,7 @@ int main(int argc, char * argv[])
 	int local_grid_size_y;// = 100;
 	int xaxis;
 	int yaxis;
-	int num_w_planes = 1;
+	int num_w_planes = 10;
 	// DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization
 	int w_support = 7;
 	int num_threads;// = 4;
@@ -151,7 +151,7 @@ int main(int argc, char * argv[])
 	start = clock();
 
 	// CLAAAAAA
-	int ndatasets = 2;
+	int ndatasets = 1;
         strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
         strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
 
@@ -821,6 +821,54 @@ int main(int argc, char * argv[])
  
 	fftw_free(fftwgrid);
 
+	// Phase correction
+	if(rank == 0)printf("PHASE CORRECTION\n");
+        double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));	
+        double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));	
+        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes);
+
+#ifdef WRITE_IMAGE
+
+        if(rank == 0)
+        {
+            pFilereal = fopen (fftfile2,"wb");
+            pFileimg = fopen (fftfile3,"wb");
+            fclose(pFilereal);
+            fclose(pFileimg);
+        }
+	#ifdef USE_MPI
+	MPI_Barrier(MPI_COMM_WORLD);
+        #endif
+        if(rank == 0)printf("WRITING IMAGE\n");
+	for (int isector=0; isector<size; isector++)
+	{
+	    #ifdef USE_MPI
+	    MPI_Barrier(MPI_COMM_WORLD);
+            #endif
+	    if(isector == rank)
+            {
+	       printf("%d writing\n",isector);
+               pFilereal = fopen (fftfile2,"ab");
+               pFileimg = fopen (fftfile3,"ab");
+
+	       long global_index = isector*(xaxis*yaxis)*sizeof(double);
+
+               fseek(pFilereal, global_index, SEEK_SET);
+               fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal);
+               fseek(pFileimg, global_index, SEEK_SET);
+               fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg);
+
+               fclose(pFilereal);
+               fclose(pFileimg);
+	    }
+	}
+	#ifdef USE_MPI
+	MPI_Barrier(MPI_COMM_WORLD);
+        #endif
+
+#endif //WRITE_IMAGE
+
+
 #endif //USE_FFTW
 
 	end = clock();
diff --git a/w-stacking.h b/w-stacking.h
index 4b8bdc1a10c3212509537eea9e455b39d839e241..1855d81f2aa1e75b4da0fc04476fa9e335bbbf34 100644
--- a/w-stacking.h
+++ b/w-stacking.h
@@ -24,9 +24,21 @@ void wstack(
      int,
      double*,
      int);
+
 #ifdef __CUDACC__
 extern "C"
 #endif
 int test(int nnn);
 
+#ifdef __CUDACC__
+extern "C"
+#endif
+void phase_correction(
+     double*,
+     double*,
+     double*,
+     int,
+     int,
+     int);
+
 #endif