Skip to content
Snippets Groups Projects
Commit f1efc03a authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

cufftMP working with CUDA and OpenMP

parent ea94cf82
Branches
Tags
No related merge requests found
......@@ -5,10 +5,11 @@
#include <cuda_runtime.h>
#include <complex.h>
#include "cuComplex.h"
#include "w-stacking.h"
#include "proto.h"
#include "errcodes.h"
#include <time.h>
#if defined(CUFFTMP) && !defined(USE_FFTW)
#if defined(CUFFTMP) && defined(USE_FFTW)
void cuda_fft(
int num_w_planes,
......@@ -18,9 +19,22 @@ void cuda_fft(
int yaxis,
double * grid,
double * gridss,
int rank,
MPI_Comm comm)
{
#ifdef __CUDACC__
#if !defined __CUDACC__
int ndevices;
cudaGetDeviceCount(&ndevices);
cudaSetDevice(rank % ndevices);
if ( rank == 0 ) {
if (0 == ndevices) {
shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ );
}
}
#endif
cudaError_t mmm;
cufftResult_t status;
......@@ -53,8 +67,8 @@ void cuda_fft(
long fftwindex = 0;
long fftwindex2D = 0;
uint fftwindex = 0;
uint fftwindex2D = 0;
double norm = 1.0/(double)(grid_size_x*grid_size_y);
......@@ -64,7 +78,7 @@ void cuda_fft(
for (int iw=0; iw<num_w_planes; iw++)
{
printf("select the %d w-plane to transform\n", iw);
//printf("select the %d w-plane to transform\n", iw);
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
......@@ -143,6 +157,6 @@ void cuda_fft(
cudaStreamDestroy(stream);
cudaDeviceSynchronize();
#endif // __CUDACC__
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment