diff --git a/w-stacking.cu b/w-stacking.cu
index c79fb0e82501c74fcb767efb71d3c2043ec5e5e9..a6b7f49fc943b97f1ab93a48213b13805b95b3fe 100755
--- a/w-stacking.cu
+++ b/w-stacking.cu
@@ -114,7 +114,13 @@ __global__ void convolve_g(
 			   int grid_size_x,
 			   int grid_size_y,
 			   double* grid,
-			   double std22)
+			  #if defined(GAUSS_HI_PRECISION)
+			   double std22
+			  #else
+			   double std22,
+			   double* convkernel
+			  #endif
+			   )
 			   
 
 
@@ -296,11 +302,11 @@ void wstack(
     mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float));
     mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float));
     //mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
-    /*
+
    #if !defined(GAUSS_HI_PRECISION)
     mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double));
    #endif
-    */
+
     if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);}
     //mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
     if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);}
@@ -313,14 +319,15 @@ void wstack(
     mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
     mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
 
-    /*
+
    #if !defined(GAUSS_HI_PRECISION)
     mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking);
    #endif
-    */
+
     if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);}
     
     // Call main GPU Kernel
+   #if defined(GAUSS_HI_PRECISION)
     convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
 	       num_w_planes,
                num_points,
@@ -340,7 +347,30 @@ void wstack(
                grid,
 	       std22
 						);
-
+   #else
+    convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
+	       num_w_planes,
+               num_points,
+               freq_per_chan,
+               polarizations,
+               uu_g,
+               vv_g,
+               ww_g,
+               vis_real_g,
+               vis_img_g,
+               weight_g,
+               dx,
+               dw,
+               KernelLen,
+               grid_size_x,
+               grid_size_y,
+               grid,
+	       std22,
+	       convkernel_g
+						);
+   #endif
+    
+    
     mmm=cudaStreamSynchronize(stream_stacking);
     //Record the event
     //mmm=cudaEventRecord(event_kernel,stream_stacking);
@@ -360,11 +390,11 @@ void wstack(
     mmm=cudaFree(vis_img_g);
     mmm=cudaFree(weight_g);
     //mmm=cudaFree(grid_g);
-    /*
+    
    #if !defined(GAUSS_HI_PRECISION)
     mmm=cudaFree(convkernel_g);
    #endif
-    */
+    
 // Switch between CUDA and GPU versions
 # else