diff --git a/fourier_transform_new.cpp b/fourier_transform_new.cpp deleted file mode 100755 index 98ef3bca86077b61977200d742c479f5f6b06af3..0000000000000000000000000000000000000000 --- a/fourier_transform_new.cpp +++ /dev/null @@ -1,315 +0,0 @@ -#include <stdio.h> -#include "allvars.h" -#include "proto.h" -#include <vector> -#include <complex> -#include <iostream> - -#ifdef CUFFTMP -#include <cufftMp.h> -#include <cuda_runtime.h> -#endif - -void fftw_data(){ - - #ifdef USE_FFTW - // FFT transform the data (using distributed FFTW) - if(rank == 0)printf("PERFORMING FFT\n"); - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - #ifndef CUFFTMP - fftw_plan plan; - fftw_complex *fftwgrid; - ptrdiff_t alloc_local, local_n0, local_0_start; - double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); - - // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) - // x is the direction of contiguous data and maps to the second parameter - // y is the parallelized direction and corresponds to the first parameter (--> n0) - // and perform the FFT per w plane - alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); - fftwgrid = fftw_alloc_complex(alloc_local); - plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); - - long fftwindex = 0; - long fftwindex2D = 0; - for (int iw=0; iw<param.num_w_planes; iw++) - { - //printf("FFTing plan %d\n",iw); - //select the w-plane to transform - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - fftwgrid[fftwindex2D][0] = grid[fftwindex]; - fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; - } - } - - // do the transform for each w-plane - fftw_execute(plan); - - // save the transformed w-plane - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; - gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; - } - } - - } - - fftw_destroy_plan(plan); - fftw_free(fftwgrid); - - //Implementation of cufftMp - #else - - //Select the default device per each MPI task - int ndevices; - cudaGetDeviceCount(&ndevices); - cudaSetDevice(rank % ndevices); - //long my_start = rank*( param.grid_size_y/size ); - //int my_ystart = rank*yaxis; - //int my_yend = (rank+1)*yaxis; - double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); - - - //Choose the communicator and allocate the vector - MPI_Comm comm = MPI_COMM_WORLD; - size_t Ny = param.grid_size_y; - size_t Nx = param.grid_size_x; - std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); - - cufftHandle plan = 0; - cudaStream_t stream = nullptr; - cudaStreamCreate(&stream); - - //Create the plan for the FFT - cufftCreate(&plan); - - size_t worksize; - //Domain decomposition along the y-axis - cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); - - cufftSetStream(plan, stream); - cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); - - long fftwindex = 0; - long fftwindex2D = 0; - - for (int iw=0; iw<param.num_w_planes; iw++) - { - //printf("FFTing plan %d\n",iw); - //select the w-plane to transform - - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - - fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; - - } - } - - // do the transform for each w-plane - //printf("Task %d: ffting the %d plane\n", rank, iw); - - cudaLibXtDesc *desc, *desc_mean; - cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); - cufftXtMalloc(plan, &desc_mean, CUFFT_XT_FORMAT_INPLACE); - - cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); - - cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); - - cudaStreamSynchronize(stream); - cufftXtMemcpy (plan, desc_mean, desc, CUFFT_COPY_DEVICE_TO_DEVICE); - cufftXtMemcpy(plan, fftwgrid.data(), desc_mean, CUFFT_COPY_DEVICE_TO_HOST); - - cufftXtFree(desc); - cufftXtFree(desc_mean); - - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); - gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); - } - } - } - - cufftDestroy(plan); - - #endif //close ifndef ACCOMP - - - #ifdef ONE_SIDE - MPI_Win_fence(0,slabwin); - MPI_Barrier(MPI_COMM_WORLD); - #else - MPI_Barrier(MPI_COMM_WORLD); - #endif - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; - timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); - timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - clock_gettime(CLOCK_MONOTONIC, &begin); - - #endif - -} - -void write_fftw_data(){ - - #ifdef USE_FFTW - #ifdef WRITE_DATA - // Write results - #ifdef USE_MPI - MPI_Win writewin; - MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); - MPI_Win_fence(0,writewin); - #endif - if (rank == 0) - { - printf("WRITING FFT TRANSFORMED DATA\n"); - file.pFilereal = fopen (out.fftfile2,"wb"); - file.pFileimg = fopen (out.fftfile3,"wb"); - #ifdef USE_MPI - for (int isector=0; isector<nsectors; isector++) - { - MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); - MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); - MPI_Win_unlock(isector,writewin); - for (long i=0; i<size_of_grid/2; i++) - { - gridss_real[i] = gridss_w[2*i]; - gridss_img[i] = gridss_w[2*i+1]; - } - if (param.num_w_planes > 1) - { - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(file.pFilereal, global_index, SEEK_SET); - fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); - } - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(file.pFileimg, global_index, SEEK_SET); - fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); - } - } - else - { - fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); - fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); - } - - } - #else - /* - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<grid_size_y; iv++) - for (int iu=0; iu<grid_size_x; iu++) - { - int isector = 0; - long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); - double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); - fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); - } - */ - #endif - - fclose(file.pFilereal); - fclose(file.pFileimg); - } - #ifdef USE_MPI - MPI_Win_fence(0,writewin); - MPI_Win_free(&writewin); - MPI_Barrier(MPI_COMM_WORLD); - #endif - #endif //WRITE_DATA - - - // Phase correction - - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - if(rank == 0)printf("PHASE CORRECTION\n"); - double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); - double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); - - phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; - timing.phase_time1 = (finish.tv_sec - begin.tv_sec); - timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - - #ifdef WRITE_IMAGE - - if(rank == 0) - { - file.pFilereal = fopen (out.fftfile2,"wb"); - file.pFileimg = fopen (out.fftfile3,"wb"); - fclose(file.pFilereal); - fclose(file.pFileimg); - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(rank == 0)printf("WRITING IMAGE\n"); - for (int isector=0; isector<size; isector++) - { - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(isector == rank) - { - printf("%d writing\n",isector); - file.pFilereal = fopen (out.fftfile2,"ab"); - file.pFileimg = fopen (out.fftfile3,"ab"); - - long global_index = isector*(xaxis*yaxis)*sizeof(double); - - fseek(file.pFilereal, global_index, SEEK_SET); - fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); - fseek(file.pFileimg, global_index, SEEK_SET); - fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); - - fclose(file.pFilereal); - fclose(file.pFileimg); - } - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - - #endif //WRITE_IMAGE - - #endif //FFTW -} diff --git a/fourier_transform_test.cpp b/fourier_transform_test.cpp deleted file mode 100755 index a53eb81ce5fdce0ad40e6ae84eaeee4edd914c76..0000000000000000000000000000000000000000 --- a/fourier_transform_test.cpp +++ /dev/null @@ -1,313 +0,0 @@ -#include <stdio.h> -#include "allvars.h" -#include "proto.h" -#include <vector> -#include <complex> -#include <iostream> - -#ifdef CUFFTMP -#include <cufftMp.h> -#include <cuda_runtime.h> -#endif - -void fftw_data(){ - - #ifdef USE_FFTW - // FFT transform the data (using distributed FFTW) - if(rank == 0)printf("PERFORMING FFT\n"); - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - #ifndef CUFFTMP - fftw_plan plan; - fftw_complex *fftwgrid; - ptrdiff_t alloc_local, local_n0, local_0_start; - double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); - - // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) - // x is the direction of contiguous data and maps to the second parameter - // y is the parallelized direction and corresponds to the first parameter (--> n0) - // and perform the FFT per w plane - alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); - fftwgrid = fftw_alloc_complex(alloc_local); - plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); - - long fftwindex = 0; - long fftwindex2D = 0; - for (int iw=0; iw<param.num_w_planes; iw++) - { - //printf("FFTing plan %d\n",iw); - //select the w-plane to transform - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - fftwgrid[fftwindex2D][0] = grid[fftwindex]; - fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; - } - } - - // do the transform for each w-plane - fftw_execute(plan); - - // save the transformed w-plane - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; - gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; - } - } - - } - - fftw_destroy_plan(plan); - fftw_free(fftwgrid); - - //Implementation of cufftMp - #else - - //Select the default device per each MPI task - int ndevices; - cudaGetDeviceCount(&ndevices); - cudaSetDevice(rank % ndevices); - //long my_start = rank*( param.grid_size_y/size ); - //int my_ystart = rank*yaxis; - //int my_yend = (rank+1)*yaxis; - double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); - - - //Choose the communicator and allocate the vector - MPI_Comm comm = MPI_COMM_WORLD; - size_t Ny = param.grid_size_y; - size_t Nx = param.grid_size_x; - std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); - - cufftHandle plan = 0; - cudaStream_t stream = nullptr; - cudaStreamCreate(&stream); - - //Create the plan for the FFT - cufftCreate(&plan); - - size_t worksize; - //Domain decomposition along the y-axis - cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); - - cufftSetStream(plan, stream); - cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); - - long fftwindex = 0; - long fftwindex2D = 0; - - for (int iw=0; iw<param.num_w_planes; iw++) - { - //printf("FFTing plan %d\n",iw); - //select the w-plane to transform - - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - - fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; - - } - } - - // do the transform for each w-plane - //printf("Task %d: ffting the %d plane\n", rank, iw); - - cudaLibXtDesc *desc; - cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); - - cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); - - cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); - - cudaStreamSynchronize(stream); - cufftXtMemcpy (plan, desc, desc, CUFFT_COPY_DEVICE_TO_DEVICE); - cufftXtMemcpy(plan, fftwgrid.data(), desc, CUFFT_COPY_DEVICE_TO_HOST); - - cufftXtFree(desc); - - for (int iv=0; iv<yaxis; iv++) - { - for (int iu=0; iu<xaxis; iu++) - { - fftwindex2D = iu + iv*xaxis; - fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); - gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); - gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); - } - } - } - - cufftDestroy(plan); - - #endif //close ifndef ACCOMP - - - #ifdef ONE_SIDE - MPI_Win_fence(0,slabwin); - MPI_Barrier(MPI_COMM_WORLD); - #else - MPI_Barrier(MPI_COMM_WORLD); - #endif - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; - timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); - timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - clock_gettime(CLOCK_MONOTONIC, &begin); - - #endif - -} - -void write_fftw_data(){ - - #ifdef USE_FFTW - #ifdef WRITE_DATA - // Write results - #ifdef USE_MPI - MPI_Win writewin; - MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); - MPI_Win_fence(0,writewin); - #endif - if (rank == 0) - { - printf("WRITING FFT TRANSFORMED DATA\n"); - file.pFilereal = fopen (out.fftfile2,"wb"); - file.pFileimg = fopen (out.fftfile3,"wb"); - #ifdef USE_MPI - for (int isector=0; isector<nsectors; isector++) - { - MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); - MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); - MPI_Win_unlock(isector,writewin); - for (long i=0; i<size_of_grid/2; i++) - { - gridss_real[i] = gridss_w[2*i]; - gridss_img[i] = gridss_w[2*i+1]; - } - if (param.num_w_planes > 1) - { - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(file.pFilereal, global_index, SEEK_SET); - fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); - } - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<yaxis; iv++) - for (int iu=0; iu<xaxis; iu++) - { - long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); - long index = iu + iv*xaxis + iw*xaxis*yaxis; - fseek(file.pFileimg, global_index, SEEK_SET); - fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); - } - } - else - { - fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); - fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); - } - - } - #else - /* - for (int iw=0; iw<param.num_w_planes; iw++) - for (int iv=0; iv<grid_size_y; iv++) - for (int iu=0; iu<grid_size_x; iu++) - { - int isector = 0; - long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); - double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); - fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); - } - */ - #endif - - fclose(file.pFilereal); - fclose(file.pFileimg); - } - #ifdef USE_MPI - MPI_Win_fence(0,writewin); - MPI_Win_free(&writewin); - MPI_Barrier(MPI_COMM_WORLD); - #endif - #endif //WRITE_DATA - - - // Phase correction - - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - if(rank == 0)printf("PHASE CORRECTION\n"); - double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); - double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); - - phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); - - end = clock(); - clock_gettime(CLOCK_MONOTONIC, &finish); - timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; - timing.phase_time1 = (finish.tv_sec - begin.tv_sec); - timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - - #ifdef WRITE_IMAGE - - if(rank == 0) - { - file.pFilereal = fopen (out.fftfile2,"wb"); - file.pFileimg = fopen (out.fftfile3,"wb"); - fclose(file.pFilereal); - fclose(file.pFileimg); - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(rank == 0)printf("WRITING IMAGE\n"); - for (int isector=0; isector<size; isector++) - { - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - if(isector == rank) - { - printf("%d writing\n",isector); - file.pFilereal = fopen (out.fftfile2,"ab"); - file.pFileimg = fopen (out.fftfile3,"ab"); - - long global_index = isector*(xaxis*yaxis)*sizeof(double); - - fseek(file.pFilereal, global_index, SEEK_SET); - fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); - fseek(file.pFileimg, global_index, SEEK_SET); - fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); - - fclose(file.pFilereal); - fclose(file.pFileimg); - } - } - #ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - - #endif //WRITE_IMAGE - - #endif //FFTW -}