diff --git a/phase_correction.cu b/phase_correction.cu
index d43e4cfa4f5639c7a12f9e6bc0b1ae922f1cbb27..95b2933df54ff129a26549b2928ed84715a02c05 100644
--- a/phase_correction.cu
+++ b/phase_correction.cu
@@ -87,7 +87,7 @@ __global__ void phase_g(int xaxis,
 #endif
 
 void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes, int xaxistot, int yaxistot,
-		      double resolution, double wmin, double wmax)
+		      double resolution, double wmin, double wmax, int num_threads)
 {
         double dw = (wmax-wmin)/(double)num_w_planes;
 	double wterm = wmin+0.5*dw;
@@ -135,6 +135,11 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
 
 #else
 
+#ifdef _OPENMP
+    omp_set_num_threads(num_threads);
+#endif
+
+        #pragma omp parallel for collapse(3) private(wterm)
 	for (int iw=0; iw<num_w_planes; iw++)
 	{
 	    for (int iv=0; iv<yaxis; iv++)
@@ -143,6 +148,7 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
 
 		long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
 		long img_index = iu+iv*xaxis;
+		wterm = wmin + iw*dw;
 #ifdef PHASE_ON
                 if (num_w_planes > 1)
 		{
@@ -166,19 +172,24 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
 		    s = pimag;
 
 		    //printf("%d %d %d %ld %ld\n",iu,iv,iw,index,img_index);
+		    #pragma omp atomic
 		    image_real[img_index] += (p*r-q*s)*dwnorm*sqrt(1-radius2);
+		    #pragma omp atomic
 		    image_imag[img_index] += (p*s+q*r)*dwnorm*sqrt(1-radius2);
 	        } else {
+		    #pragma omp atomic
 		    image_real[img_index] += gridss[index];
+		    #pragma omp atomic
 		    image_imag[img_index] += gridss[index+1];
 		}
 #else
+		#pragma omp atomic
   	        image_real[img_index] += gridss[index];
+		#pragma omp atomic
 		image_imag[img_index] += gridss[index+1];
 #endif // end of PHASE_ON
 
             }  
-	    wterm += dw;
 	}
 
 #endif // end of __CUDACC__
diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c
index 15807158c84387b53e8bef5f40926f6604a0b01c..5c6833d42208b37a512942e8b080f574370b86e5 100644
--- a/w-stacking-fftw.c
+++ b/w-stacking-fftw.c
@@ -860,7 +860,7 @@ int main(int argc, char * argv[])
         double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));	
         double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));	
 
-        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y,resolution,wmin,wmax);
+        phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y,resolution,wmin,wmax,num_threads);
 
         end = clock();
         clock_gettime(CLOCK_MONOTONIC, &finish);
diff --git a/w-stacking.h b/w-stacking.h
index 822b92c4f55e2f7469cfff9cf18fea78103e172d..35e1495ea90864c6dbc0120b3ba063f28afef583 100644
--- a/w-stacking.h
+++ b/w-stacking.h
@@ -46,5 +46,6 @@ void phase_correction(
      int,
      double,
      double,
-     double);
+     double,
+     int);
 #endif