From f8d864ea1add70ece44f83b1c864363282ce7bec Mon Sep 17 00:00:00 2001 From: Claudio Gheller Date: Wed, 15 May 2024 10:39:42 +0200 Subject: [PATCH] added some comments --- Makefile | 6 +++--- cd | 0 gridding_cpu.c | 15 ++++++++------- gridding_nccl.cu | 11 ++++++----- main.c | 7 +++++++ timing.h | 1 - 6 files changed, 24 insertions(+), 16 deletions(-) delete mode 100644 cd diff --git a/Makefile b/Makefile index 55d1f33..75e3f48 100755 --- a/Makefile +++ b/Makefile @@ -42,10 +42,10 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines -#OPT += -DHYBRID_FFTW +OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization -#OPT += -DUSE_OMP +OPT += -DUSE_OMP # ======================================================== @@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION #OPT += -DCUFFTMP # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution -OPT += -DFULL_NVIDIA +#OPT += -DFULL_NVIDIA ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP endif diff --git a/cd b/cd deleted file mode 100644 index e69de29..0000000 diff --git a/gridding_cpu.c b/gridding_cpu.c index 4027853..ab8ea37 100755 --- a/gridding_cpu.c +++ b/gridding_cpu.c @@ -67,12 +67,12 @@ void gridding_data() } // closes reduce_method == REDUCE_RING - - timing_wt.kernel = 0.0; - timing_wt.reduce = 0.0; - timing_wt.reduce_mpi = 0.0; - timing_wt.reduce_sh = 0.0; - timing_wt.compose = 0.0; + //CLAAAA + //timing_wt.kernel = 0.0; + //timing_wt.reduce = 0.0; + //timing_wt.reduce_mpi = 0.0; + //timing_wt.reduce_sh = 0.0; + //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); @@ -142,6 +142,7 @@ void gridding_data() double uumax = -1e20; double vvmax = -1e20; + /* #pragma omp parallel reduction( min: uumin, vvmin) reduction( max: uumax, vvmax) num_threads(param.num_threads) { double my_uumin = 1e20; @@ -165,7 +166,7 @@ void gridding_data() } //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); - + */ timing_wt.compose += CPU_TIME_wt - start; diff --git a/gridding_nccl.cu b/gridding_nccl.cu index 36b1d2c..92fa1f9 100755 --- a/gridding_nccl.cu +++ b/gridding_nccl.cu @@ -52,11 +52,12 @@ void gridding_data(){ double shift = (double)(dx*yaxis); - timing_wt.kernel = 0.0; - timing_wt.reduce = 0.0; - timing_wt.reduce_mpi = 0.0; - timing_wt.reduce_sh = 0.0; - timing_wt.compose = 0.0; + // CLAAAA + //timing_wt.kernel = 0.0; + //timing_wt.reduce = 0.0; + //timing_wt.reduce_mpi = 0.0; + //timing_wt.reduce_sh = 0.0; + //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); diff --git a/main.c b/main.c index 5f5bd5d..6b0ee85 100755 --- a/main.c +++ b/main.c @@ -33,6 +33,13 @@ int main(int argc, char * argv[]) { +//CLAAAA + timing_wt.kernel = 0.0; + timing_wt.reduce = 0.0; + timing_wt.reduce_mpi = 0.0; + timing_wt.reduce_sh = 0.0; + timing_wt.compose = 0.0; + if(argc > 1) { strcpy(in.paramfile, argv[1]); diff --git a/timing.h b/timing.h index eb2570b..71e778e 100755 --- a/timing.h +++ b/timing.h @@ -77,7 +77,6 @@ typedef struct { extern timing_t timing_wt; // wall-clock process timing, at Task 0 - extern double start_tot; extern double reduce_shmem_time; extern double reduce_mpi_time; -- GitLab