From aed8d315b4cc6e5eeb6e5a7006fd65d13dbd8d56 Mon Sep 17 00:00:00 2001 From: Claudio Gheller <cgheller@login01.leonardo.local> Date: Fri, 24 May 2024 08:54:39 +0200 Subject: [PATCH] toward SPIE paper --- gridding.c | 6 ++-- gridding_cpu.c | 2 +- gridding_nccl.cu | 76 +++++++++++++++++++++++++++++++++--------------- init.c | 16 +++++++++- main.c | 3 +- proto.h | 8 ++--- 6 files changed, 77 insertions(+), 34 deletions(-) diff --git a/gridding.c b/gridding.c index 10cf812..2560b2e 100755 --- a/gridding.c +++ b/gridding.c @@ -6,11 +6,11 @@ void free_array ( uint *, uint **, int ); void initialize_array ( void ); -void gridding_data ( void ); +void gridding_data ( int ); -void gridding() +void gridding(int ifiles) { if(rank == 0) @@ -90,7 +90,7 @@ void gridding() timing_wt.init += CPU_TIME_wt - start; //Sector and Gridding data - gridding_data(); + gridding_data(ifiles); timing_wt.gridding += CPU_TIME_wt - start; diff --git a/gridding_cpu.c b/gridding_cpu.c index ab8ea37..6179474 100755 --- a/gridding_cpu.c +++ b/gridding_cpu.c @@ -14,7 +14,7 @@ int reduce_ring (int); // ..................................................................... // -void gridding_data() +void gridding_data(int ifiles) // // actually performs the gridding of the data // diff --git a/gridding_nccl.cu b/gridding_nccl.cu index 92fa1f9..26797be 100755 --- a/gridding_nccl.cu +++ b/gridding_nccl.cu @@ -48,7 +48,7 @@ static void getHostName(char* hostname, int maxlen) { -void gridding_data(){ +void gridding_data(int ifiles){ double shift = (double)(dx*yaxis); @@ -98,11 +98,14 @@ void gridding_data(){ long long unsigned size_finta = (long long unsigned)(2*(long long unsigned)param.num_w_planes*(long long unsigned)xaxis*(long long unsigned)yaxis); - nnn = cudaMalloc(&grid_gpu, (size_t)(size_finta*sizeof(double))); - if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &grid_gpu ERROR %d !!!\n", nnn);} + if (ifiles == 0) + { + nnn = cudaMalloc(&grid_gpu, (size_t)(size_finta*sizeof(double))); + if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &grid_gpu ERROR %d !!!\n", nnn);} - nnn = cudaMalloc(&gridss_gpu, (size_t)(size_finta*sizeof(double))); - if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &gridss_gpu ERROR %d !!!\n", nnn);} + nnn = cudaMalloc(&gridss_gpu, (size_t)(size_finta*sizeof(double))); + if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaMalloc &gridss_gpu ERROR %d !!!\n", nnn);} + } nnn = cudaStreamCreate(&stream_reduce); if (nnn != cudaSuccess) {printf("!!! gridding_nccl.cu cudaStreamCreate &stream_reduce ERROR %d !!!\n", nnn);} @@ -206,25 +209,49 @@ void gridding_data(){ //We have to call different GPUs per MPI task!!! [GL] #ifdef CUDACC - wstack((long long unsigned)param.num_w_planes, - Nsec, - metaData.freq_per_chan, - metaData.polarisations, - uus, - vvs, - wws, - visreals, - visimgs, - weightss, - dx, - dw, - param.w_support, - (long long unsigned)xaxis, - (long long unsigned)yaxis, - gridss_gpu, - param.num_threads, - rank, - stream_stacking); + if (size > 1) { + wstack((long long unsigned)param.num_w_planes, + Nsec, + metaData.freq_per_chan, + metaData.polarisations, + uus, + vvs, + wws, + visreals, + visimgs, + weightss, + dx, + dw, + param.w_support, + (long long unsigned)xaxis, + (long long unsigned)yaxis, + gridss_gpu, + param.num_threads, + rank, + stream_stacking); + } + else + { + wstack((long long unsigned)param.num_w_planes, + Nsec, + metaData.freq_per_chan, + metaData.polarisations, + uus, + vvs, + wws, + visreals, + visimgs, + weightss, + dx, + dw, + param.w_support, + (long long unsigned)xaxis, + (long long unsigned)yaxis, + grid_gpu, + param.num_threads, + rank, + stream_stacking); + } #else wstack(param.num_w_planes, Nsec, @@ -296,6 +323,7 @@ void gridding_data(){ cudaFree(gridss_gpu); #endif + cudaStreamDestroy(stream_reduce); cudaStreamDestroy(stream_stacking); diff --git a/init.c b/init.c index e8c9ffb..bcc0c40 100755 --- a/init.c +++ b/init.c @@ -55,7 +55,8 @@ void init(int index) MPI_Barrier(MPI_COMM_WORLD); - timing_wt.setup = CPU_TIME_wt - begin; + if(index == 0) timing_wt.setup = 0.0; + timing_wt.setup += CPU_TIME_wt - begin; return; } @@ -113,9 +114,12 @@ void op_filename() { strcat(buf, outparam.extension); strcpy(out.extension, buf); + strcpy(out.timingfile, outparam.timingfile); + /* strcpy(buf, num_buf); strcat(buf, outparam.timingfile); strcpy(out.timingfile, buf); + */ } /* Communicating the relevent parameters to the other process */ @@ -364,6 +368,16 @@ void allocate_memory(int index) { // all the sizes are rescaled by the number of MPI tasks // Allocate arrays + if (data.uu != NULL) { + printf("Freeing input data\n"); + free(data.uu); + free(data.vv); + free(data.ww); + free(data.weights); + free(data.visreal); + free(data.visimg); + } + data.uu = (double*) calloc(metaData.Nmeasures,sizeof(double)); data.vv = (double*) calloc(metaData.Nmeasures,sizeof(double)); data.ww = (double*) calloc(metaData.Nmeasures,sizeof(double)); diff --git a/main.c b/main.c index 6b0ee85..c51b25b 100755 --- a/main.c +++ b/main.c @@ -143,11 +143,12 @@ int main(int argc, char * argv[]) if(rank == 0) printf( "\nDataset %d\n", ifiles); + // CLAAAAAAA /*INIT function */ init(ifiles); /* GRIDDING function */ - gridding(); + gridding(ifiles); } /* WRITE_GRIDDED_DATA function */ diff --git a/proto.h b/proto.h index 6824314..d5df193 100755 --- a/proto.h +++ b/proto.h @@ -24,16 +24,16 @@ void shutdown_wstacking( int, char *, char *, int); #ifdef __cplusplus extern "C" { - void gridding (void); - void gridding_data (void); + void gridding (int); + void gridding_data (int); void write_gridded_data(void); } #else /* gridding.c */ -void gridding (void); -void gridding_data (void); +void gridding (int); +void gridding_data (int); void write_gridded_data(void); #endif -- GitLab