From f1efc03aed04e732ad99ff178842bf4fdb0bf5d6 Mon Sep 17 00:00:00 2001 From: "giovanni.lacopo" Date: Fri, 8 Sep 2023 15:21:32 +0200 Subject: [PATCH] cufftMP working with CUDA and OpenMP --- cuda_fft.cu | 188 ++++++++++++++++++++++++++++------------------------ 1 file changed, 101 insertions(+), 87 deletions(-) diff --git a/cuda_fft.cu b/cuda_fft.cu index 96530e0..0d88ea8 100644 --- a/cuda_fft.cu +++ b/cuda_fft.cu @@ -5,144 +5,158 @@ #include #include #include "cuComplex.h" -#include "w-stacking.h" +#include "proto.h" +#include "errcodes.h" #include -#if defined(CUFFTMP) && !defined(USE_FFTW) +#if defined(CUFFTMP) && defined(USE_FFTW) void cuda_fft( - int num_w_planes, - int grid_size_x, - int grid_size_y, - int xaxis, - int yaxis, - double * grid, - double * gridss, - MPI_Comm comm) + int num_w_planes, + int grid_size_x, + int grid_size_y, + int xaxis, + int yaxis, + double * grid, + double * gridss, + int rank, + MPI_Comm comm) { -#ifdef __CUDACC__ - cudaError_t mmm; - cufftResult_t status; + #if !defined __CUDACC__ + int ndevices; + cudaGetDeviceCount(&ndevices); + cudaSetDevice(rank % ndevices); - cufftDoubleComplex *fftwgrid; - fftwgrid = (cufftDoubleComplex*) malloc(sizeof(cufftDoubleComplex)*2*num_w_planes*yaxis*grid_size_x); + if ( rank == 0 ) { + if (0 == ndevices) { + + shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); + } + } + #endif + + cudaError_t mmm; + cufftResult_t status; + cufftDoubleComplex *fftwgrid; + fftwgrid = (cufftDoubleComplex*) malloc(sizeof(cufftDoubleComplex)*2*num_w_planes*yaxis*grid_size_x); - // Plan creation - cufftHandle plan; - status = cufftCreate(&plan); - if (status != CUFFT_SUCCESS) {printf("!!! cufftCreate ERROR %d !!!\n", status);} + // Plan creation - cudaStream_t stream{}; - cudaStreamCreate(&stream); + cufftHandle plan; + status = cufftCreate(&plan); + if (status != CUFFT_SUCCESS) {printf("!!! cufftCreate ERROR %d !!!\n", status);} + cudaStream_t stream{}; + cudaStreamCreate(&stream); - status = cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); - if (status != CUFFT_SUCCESS) {printf("!!! cufftMpAttachComm ERROR %d !!!\n", status);} - status = cufftSetStream(plan, stream); - if (status != CUFFT_SUCCESS) {printf("!!! cufftSetStream ERROR %d !!!\n", status);} + status = cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); + if (status != CUFFT_SUCCESS) {printf("!!! cufftMpAttachComm ERROR %d !!!\n", status);} - size_t workspace; - status = cufftMakePlan2d(plan, grid_size_x, grid_size_y, CUFFT_Z2Z, &workspace); - if (status != CUFFT_SUCCESS) {printf("!!! cufftMakePlan2d ERROR %d !!!\n", status);} - cudaDeviceSynchronize(); + status = cufftSetStream(plan, stream); + if (status != CUFFT_SUCCESS) {printf("!!! cufftSetStream ERROR %d !!!\n", status);} + size_t workspace; + status = cufftMakePlan2d(plan, grid_size_x, grid_size_y, CUFFT_Z2Z, &workspace); + if (status != CUFFT_SUCCESS) {printf("!!! cufftMakePlan2d ERROR %d !!!\n", status);} + cudaDeviceSynchronize(); - long fftwindex = 0; - long fftwindex2D = 0; - double norm = 1.0/(double)(grid_size_x*grid_size_y); + uint fftwindex = 0; + uint fftwindex2D = 0; + double norm = 1.0/(double)(grid_size_x*grid_size_y); - // Grid composition - for (int iw=0; iw