diff --git a/Makefile b/Makefile index 55d1f33d4ef5e43584a47494cd48e5f12e55f801..75e3f48f3063f1dc7085ea214cbb7f9b69f9e54f 100755 --- a/Makefile +++ b/Makefile @@ -42,10 +42,10 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines -#OPT += -DHYBRID_FFTW +OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization -#OPT += -DUSE_OMP +OPT += -DUSE_OMP # ======================================================== @@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION #OPT += -DCUFFTMP # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution -OPT += -DFULL_NVIDIA +#OPT += -DFULL_NVIDIA ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP endif diff --git a/cd b/cd deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/gridding_cpu.c b/gridding_cpu.c index 40278537b04c8f7c99953cc09172dfeb399c68c1..ab8ea3716edba298ce80dc99ee718c34f6fad2a7 100755 --- a/gridding_cpu.c +++ b/gridding_cpu.c @@ -67,12 +67,12 @@ void gridding_data() } // closes reduce_method == REDUCE_RING - - timing_wt.kernel = 0.0; - timing_wt.reduce = 0.0; - timing_wt.reduce_mpi = 0.0; - timing_wt.reduce_sh = 0.0; - timing_wt.compose = 0.0; + //CLAAAA + //timing_wt.kernel = 0.0; + //timing_wt.reduce = 0.0; + //timing_wt.reduce_mpi = 0.0; + //timing_wt.reduce_sh = 0.0; + //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); @@ -142,6 +142,7 @@ void gridding_data() double uumax = -1e20; double vvmax = -1e20; + /* #pragma omp parallel reduction( min: uumin, vvmin) reduction( max: uumax, vvmax) num_threads(param.num_threads) { double my_uumin = 1e20; @@ -165,7 +166,7 @@ void gridding_data() } //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); - + */ timing_wt.compose += CPU_TIME_wt - start; diff --git a/gridding_nccl.cu b/gridding_nccl.cu index 36b1d2cd4322b12c77efc4d59ac90ee144b4440f..92fa1f900b13aeb2a8ac69467c890c403d5c6cec 100755 --- a/gridding_nccl.cu +++ b/gridding_nccl.cu @@ -52,11 +52,12 @@ void gridding_data(){ double shift = (double)(dx*yaxis); - timing_wt.kernel = 0.0; - timing_wt.reduce = 0.0; - timing_wt.reduce_mpi = 0.0; - timing_wt.reduce_sh = 0.0; - timing_wt.compose = 0.0; + // CLAAAA + //timing_wt.kernel = 0.0; + //timing_wt.reduce = 0.0; + //timing_wt.reduce_mpi = 0.0; + //timing_wt.reduce_sh = 0.0; + //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); diff --git a/main.c b/main.c index 5f5bd5d590e7d72dfc9c3ffaa272eeabcaf6111a..6b0ee85ba721104f59d36c798ef9f5d9a3e74e3d 100755 --- a/main.c +++ b/main.c @@ -33,6 +33,13 @@ int main(int argc, char * argv[]) { +//CLAAAA + timing_wt.kernel = 0.0; + timing_wt.reduce = 0.0; + timing_wt.reduce_mpi = 0.0; + timing_wt.reduce_sh = 0.0; + timing_wt.compose = 0.0; + if(argc > 1) { strcpy(in.paramfile, argv[1]); diff --git a/timing.h b/timing.h index eb2570b2bdd410f00f794da8d399846745158df4..71e778ee8f66d1409b765e98878370264e7ddcc6 100755 --- a/timing.h +++ b/timing.h @@ -77,7 +77,6 @@ typedef struct { extern timing_t timing_wt; // wall-clock process timing, at Task 0 - extern double start_tot; extern double reduce_shmem_time; extern double reduce_mpi_time;