From aed8d315b4cc6e5eeb6e5a7006fd65d13dbd8d56 Mon Sep 17 00:00:00 2001
From: Claudio Gheller <cgheller@login01.leonardo.local>
Date: Fri, 24 May 2024 08:54:39 +0200
Subject: [PATCH] toward SPIE paper

---
 gridding.c       |  6 ++--
 gridding_cpu.c   |  2 +-
 gridding_nccl.cu | 76 +++++++++++++++++++++++++++++++++---------------
 init.c           | 16 +++++++++-
 main.c           |  3 +-
 proto.h          |  8 ++---
 6 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/gridding.c b/gridding.c
index 10cf812..2560b2e 100755
--- a/gridding.c
+++ b/gridding.c
@@ -6,11 +6,11 @@
 
 void free_array       ( uint *, uint **, int );
 void initialize_array ( void );
-void gridding_data    ( void );
+void gridding_data    ( int );
 
 
 
-void gridding()
+void gridding(int ifiles)
 {
 
   if(rank == 0)
@@ -90,7 +90,7 @@ void gridding()
   timing_wt.init += CPU_TIME_wt - start;
   
   //Sector and Gridding data
-  gridding_data();
+  gridding_data(ifiles);
   
   timing_wt.gridding += CPU_TIME_wt - start;
   
diff --git a/gridding_cpu.c b/gridding_cpu.c
index ab8ea37..6179474 100755
--- a/gridding_cpu.c
+++ b/gridding_cpu.c
@@ -14,7 +14,7 @@ int reduce_ring (int);
 
 //   .....................................................................
 //
-void gridding_data()
+void gridding_data(int ifiles)
 //
 // actually performs the gridding of the data
 //
diff --git a/gridding_nccl.cu b/gridding_nccl.cu
index 92fa1f9..26797be 100755
--- a/gridding_nccl.cu
+++ b/gridding_nccl.cu
@@ -48,7 +48,7 @@ static void getHostName(char* hostname, int maxlen) {
 
 
 
-void gridding_data(){
+void gridding_data(int ifiles){
 
   double shift = (double)(dx*yaxis);
 
@@ -98,11 +98,14 @@ void gridding_data(){
 
   long long unsigned size_finta = (long long unsigned)(2*(long long unsigned)param.num_w_planes*(long long unsigned)xaxis*(long long unsigned)yaxis); 
   
-  nnn = cudaMalloc(&grid_gpu, (size_t)(size_finta*sizeof(double)));
-  if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &grid_gpu ERROR %d !!!\n", nnn);}
+  if (ifiles == 0)
+  {
+    nnn = cudaMalloc(&grid_gpu, (size_t)(size_finta*sizeof(double)));
+    if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &grid_gpu ERROR %d !!!\n", nnn);}
 
-  nnn = cudaMalloc(&gridss_gpu, (size_t)(size_finta*sizeof(double)));
-  if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &gridss_gpu ERROR %d !!!\n", nnn);}
+    nnn = cudaMalloc(&gridss_gpu, (size_t)(size_finta*sizeof(double)));
+    if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &gridss_gpu ERROR %d !!!\n", nnn);}
+  }
   
   nnn = cudaStreamCreate(&stream_reduce);
   if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaStreamCreate &stream_reduce ERROR %d !!!\n", nnn);}
@@ -206,25 +209,49 @@ void gridding_data(){
 	    
      //We have to call different GPUs per MPI task!!! [GL]
 #ifdef CUDACC
-      wstack((long long unsigned)param.num_w_planes,
-             Nsec,
-             metaData.freq_per_chan,
-             metaData.polarisations,
-             uus,
-             vvs,
-             wws,
-             visreals,
-             visimgs,
-             weightss,
-             dx,
-             dw,
-             param.w_support,
-             (long long unsigned)xaxis,
-             (long long unsigned)yaxis,
-             gridss_gpu,
-             param.num_threads,
-             rank,
-             stream_stacking);
+      if (size > 1) {
+        wstack((long long unsigned)param.num_w_planes,
+               Nsec,
+               metaData.freq_per_chan,
+               metaData.polarisations,
+               uus,
+               vvs,
+               wws,
+               visreals,
+               visimgs,
+               weightss,
+               dx,
+               dw,
+               param.w_support,
+               (long long unsigned)xaxis,
+               (long long unsigned)yaxis,
+               gridss_gpu,
+               param.num_threads,
+               rank,
+               stream_stacking);
+      }
+      else
+        {
+          wstack((long long unsigned)param.num_w_planes,
+               Nsec,
+               metaData.freq_per_chan,
+               metaData.polarisations,
+               uus,
+               vvs,
+               wws,
+               visreals,
+               visimgs,
+               weightss,
+               dx,
+               dw,
+               param.w_support,
+               (long long unsigned)xaxis,
+               (long long unsigned)yaxis,
+               grid_gpu,
+               param.num_threads,
+               rank,
+               stream_stacking);
+        }
 #else
       wstack(param.num_w_planes,
 	     Nsec,
@@ -296,6 +323,7 @@ void gridding_data(){
   cudaFree(gridss_gpu);
 #endif
 
+
   cudaStreamDestroy(stream_reduce);
   cudaStreamDestroy(stream_stacking);
   
diff --git a/init.c b/init.c
index e8c9ffb..bcc0c40 100755
--- a/init.c
+++ b/init.c
@@ -55,7 +55,8 @@ void init(int index)
  
    MPI_Barrier(MPI_COMM_WORLD);
    
-   timing_wt.setup = CPU_TIME_wt - begin;
+   if(index == 0) timing_wt.setup = 0.0;
+   timing_wt.setup += CPU_TIME_wt - begin;
 
    return;
 }
@@ -113,9 +114,12 @@ void op_filename() {
    	strcat(buf, outparam.extension);
    	strcpy(out.extension, buf);
 
+	strcpy(out.timingfile, outparam.timingfile);
+	/*
    	strcpy(buf, num_buf);
    	strcat(buf, outparam.timingfile);
    	strcpy(out.timingfile, buf);
+	*/
     }
 
   /* Communicating the relevent parameters to the other process */
@@ -364,6 +368,16 @@ void allocate_memory(int index) {
      // all the sizes are rescaled by the number of MPI tasks
      //  Allocate arrays
      
+     if (data.uu != NULL) {
+	     printf("Freeing input data\n");
+	     free(data.uu);
+	     free(data.vv);
+	     free(data.ww);
+	     free(data.weights);
+	     free(data.visreal);
+	     free(data.visimg);
+     }
+
      data.uu = (double*) calloc(metaData.Nmeasures,sizeof(double));
      data.vv = (double*) calloc(metaData.Nmeasures,sizeof(double));
      data.ww = (double*) calloc(metaData.Nmeasures,sizeof(double));
diff --git a/main.c b/main.c
index 6b0ee85..c51b25b 100755
--- a/main.c
+++ b/main.c
@@ -143,11 +143,12 @@ int main(int argc, char * argv[])
       if(rank == 0)
 	printf( "\nDataset %d\n", ifiles);
      
+      // CLAAAAAAA
       /*INIT function */
       init(ifiles);
 
       /* GRIDDING function */
-      gridding();
+      gridding(ifiles);
     }
 
     /* WRITE_GRIDDED_DATA function */
diff --git a/proto.h b/proto.h
index 6824314..d5df193 100755
--- a/proto.h
+++ b/proto.h
@@ -24,16 +24,16 @@ void shutdown_wstacking( int, char *, char *, int);
 
 #ifdef __cplusplus
 extern "C" {
-  void gridding          (void);
-  void gridding_data     (void);
+  void gridding          (int);
+  void gridding_data     (int);
   void write_gridded_data(void);
 }
 
 #else
 /*  gridding.c */
 
-void gridding          (void);
-void gridding_data     (void);
+void gridding          (int);
+void gridding_data     (int);
 void write_gridded_data(void);
 #endif
 
-- 
GitLab