diff --git a/Makefile b/Makefile
index ce9be41c5f9661b32a7724c6f0d6c70b5b3e2d76..fe95390a79883cd90cab2eb22c67e6398a65b6a1 100644
--- a/Makefile
+++ b/Makefile
@@ -32,16 +32,17 @@ endif
 # perform one-side communication (suggested) instead of reduce (only if MPI is active)
 OPT += -DONE_SIDE
 # write the full 3D cube of gridded visibilities and its FFT transform
-OPT += -DWRITE_DATA
+#OPT += -DWRITE_DATA
 # write the final image
-OPT += -DWRITE_IMAGE
+#OPT += -DWRITE_IMAGE
 # perform w-stacking phase correction
 OPT += -DPHASE_ON
 # perform ring reduce
 OPT += -DRING
 #perform binomial reduce
 #OPT += -DBINOMIAL
-
+#perform debuging
+#OPT += -DDEBUG
 
 
 DEPS = w-stacking.h main.c w-stacking.cu phase_correction.cu allvars.h init.c gridding.c fourier_transform.c result.c reduce.c numa.h
diff --git a/allvars.c b/allvars.c
index f74e55f246391b4942a8619e5195b37b3a51f460..2307e383cca771dfd4d7a0d46e9f32487b6335ec 100644
--- a/allvars.c
+++ b/allvars.c
@@ -47,3 +47,5 @@ int    **cwins = NULL;
 int max_level = 0;
 double *end_4, *end_reduce;
 int dsize_4, iter=0;  
+struct timing_r timing;
+struct timingmpi_r timingmpi;
diff --git a/allvars.h b/allvars.h
index e8fef2682dc5d3d7d8127ccec5a727978ca4069a..598878999b1a9648b174d013dc5daf78e20aad45 100644
--- a/allvars.h
+++ b/allvars.h
@@ -271,4 +271,6 @@ extern double **swins;
 extern int    **cwins;
 extern int max_level;
 extern double *end_4, *end_reduce;
-extern int dsize_4, iter; 
+extern int dsize_4, iter;
+extern struct timing_r { double rtime, ttotal, treduce, tspin, tspin_in, tmovmemory, tsum;} timing ;
+extern struct timingmpi_r{ double tmpi, tmpi_reduce, tmpi_reduce_wait, tmpi_setup;} timingmpi ; 
diff --git a/reduce.c b/reduce.c
index 7c866abcbca855efef24bd2cbda195f92af89416..a773557f3bda07f832644bdeb967cdedb2bc0cff 100644
--- a/reduce.c
+++ b/reduce.c
@@ -16,8 +16,8 @@ double check_host_value ;
 double check_global_value ;
 #endif
 
-struct { double rtime, ttotal, treduce, tspin, tspin_in, tmovmemory, tsum;} timing = {0};
-struct { double tmpi, tmpi_reduce, tmpi_reduce_wait, tmpi_setup;} timingmpi = {0};
+//struct { double rtime, ttotal, treduce, tspin, tspin_in, tmovmemory, tsum;} timing = {0};
+//struct { double tmpi, tmpi_reduce, tmpi_reduce_wait, tmpi_setup;} timingmpi = {0};
 
 
 int_t summations = 0;
diff --git a/result.c b/result.c
index 34fca0917b83cac17ebcd54f617d13c6d27be55c..d982dfbd6d20dd53f7cbf6337edbf4ae6473f427 100644
--- a/result.c
+++ b/result.c
@@ -16,6 +16,8 @@ void write_result()
      #if defined(ONE_SIDE)
       //printf("%14s time : %f sec\n", "Reduce sh", wt_timing.reduce_sh);
       printf("%14s time : %f sec\n", "Reduce ring", wt_timing.reduce_ring);
+      printf("%14s time : %f sec\n", "Shared mem reduce ring", timing.treduce);
+      printf("%14s time : %f sec\n", "Shmem reduce multi host", timing.treduce+timingmpi.tmpi_reduce);
       //printf("%14s time : %f sec\n", "Mmove", wt_timing.mmove);
       //printf("%14s time : %f sec\n", "ReduceMPI", wt_timing.reduce_mpi);
      #endif