Skip to content
Snippets Groups Projects
Commit f7773175 authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Useless 3D FFT test removed

parent 76ca4e00
No related branches found
No related tags found
No related merge requests found
#include <stdio.h>
#include "allvars.h"
#include "proto.h"
#include <vector>
#include <complex>
#include <iostream>
#ifdef CUFFTMP
#include <cufftMp.h>
#include <cuda_runtime.h>
#endif
void fftw_data(){
#ifdef USE_FFTW
// FFT transform the data (using distributed FFTW)
if(rank == 0)printf("PERFORMING FFT\n");
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
#ifndef CUFFTMP
fftw_plan plan;
fftw_complex *fftwgrid;
ptrdiff_t alloc_local, local_n0, local_0_start;
double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);
// map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
// x is the direction of contiguous data and maps to the second parameter
// y is the parallelized direction and corresponds to the first parameter (--> n0)
// and perform the FFT per w plane
alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
fftwgrid = fftw_alloc_complex(alloc_local);
plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
long fftwindex = 0;
long fftwindex2D = 0;
for (int iw=0; iw<param.num_w_planes; iw++)
{
//printf("FFTing plan %d\n",iw);
//select the w-plane to transform
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
fftwgrid[fftwindex2D][0] = grid[fftwindex];
fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
}
}
// do the transform for each w-plane
fftw_execute(plan);
// save the transformed w-plane
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
}
}
}
fftw_destroy_plan(plan);
fftw_free(fftwgrid);
//Implementation of cufftMp
#else
//Select the default device per each MPI task
int ndevices;
cudaGetDeviceCount(&ndevices);
cudaSetDevice(rank % ndevices);
//long my_start = rank*( param.grid_size_y/size );
//int my_ystart = rank*yaxis;
//int my_yend = (rank+1)*yaxis;
double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);
//Choose the communicator and allocate the vector
MPI_Comm comm = MPI_COMM_WORLD;
size_t Ny = param.grid_size_y;
size_t Nx = param.grid_size_x;
std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx );
cufftHandle plan = 0;
cudaStream_t stream = nullptr;
cudaStreamCreate(&stream);
//Create the plan for the FFT
cufftCreate(&plan);
size_t worksize;
//Domain decomposition along the y-axis
cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);
cufftSetStream(plan, stream);
cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize);
long fftwindex = 0;
long fftwindex2D = 0;
for (int iw=0; iw<param.num_w_planes; iw++)
{
//printf("FFTing plan %d\n",iw);
//select the w-plane to transform
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]};
}
}
// do the transform for each w-plane
//printf("Task %d: ffting the %d plane\n", rank, iw);
cudaLibXtDesc *desc, *desc_mean;
cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE);
cufftXtMalloc(plan, &desc_mean, CUFFT_XT_FORMAT_INPLACE);
cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE);
cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE);
cudaStreamSynchronize(stream);
cufftXtMemcpy (plan, desc_mean, desc, CUFFT_COPY_DEVICE_TO_DEVICE);
cufftXtMemcpy(plan, fftwgrid.data(), desc_mean, CUFFT_COPY_DEVICE_TO_HOST);
cufftXtFree(desc);
cufftXtFree(desc_mean);
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real();
gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag();
}
}
}
cufftDestroy(plan);
#endif //close ifndef ACCOMP
#ifdef ONE_SIDE
MPI_Win_fence(0,slabwin);
MPI_Barrier(MPI_COMM_WORLD);
#else
MPI_Barrier(MPI_COMM_WORLD);
#endif
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
timing.fftw_time1 = (finish.tv_sec - begin.tv_sec);
timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
clock_gettime(CLOCK_MONOTONIC, &begin);
#endif
}
void write_fftw_data(){
#ifdef USE_FFTW
#ifdef WRITE_DATA
// Write results
#ifdef USE_MPI
MPI_Win writewin;
MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
MPI_Win_fence(0,writewin);
#endif
if (rank == 0)
{
printf("WRITING FFT TRANSFORMED DATA\n");
file.pFilereal = fopen (out.fftfile2,"wb");
file.pFileimg = fopen (out.fftfile3,"wb");
#ifdef USE_MPI
for (int isector=0; isector<nsectors; isector++)
{
MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
MPI_Win_unlock(isector,writewin);
for (long i=0; i<size_of_grid/2; i++)
{
gridss_real[i] = gridss_w[2*i];
gridss_img[i] = gridss_w[2*i+1];
}
if (param.num_w_planes > 1)
{
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(file.pFilereal, global_index, SEEK_SET);
fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal);
}
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(file.pFileimg, global_index, SEEK_SET);
fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg);
}
}
else
{
fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal);
fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg);
}
}
#else
/*
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<grid_size_y; iv++)
for (int iu=0; iu<grid_size_x; iu++)
{
int isector = 0;
long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
*/
#endif
fclose(file.pFilereal);
fclose(file.pFileimg);
}
#ifdef USE_MPI
MPI_Win_fence(0,writewin);
MPI_Win_free(&writewin);
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_DATA
// Phase correction
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
if(rank == 0)printf("PHASE CORRECTION\n");
double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));
phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank);
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
timing.phase_time1 = (finish.tv_sec - begin.tv_sec);
timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
#ifdef WRITE_IMAGE
if(rank == 0)
{
file.pFilereal = fopen (out.fftfile2,"wb");
file.pFileimg = fopen (out.fftfile3,"wb");
fclose(file.pFilereal);
fclose(file.pFileimg);
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(rank == 0)printf("WRITING IMAGE\n");
for (int isector=0; isector<size; isector++)
{
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(isector == rank)
{
printf("%d writing\n",isector);
file.pFilereal = fopen (out.fftfile2,"ab");
file.pFileimg = fopen (out.fftfile3,"ab");
long global_index = isector*(xaxis*yaxis)*sizeof(double);
fseek(file.pFilereal, global_index, SEEK_SET);
fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal);
fseek(file.pFileimg, global_index, SEEK_SET);
fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg);
fclose(file.pFilereal);
fclose(file.pFileimg);
}
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_IMAGE
#endif //FFTW
}
#include <stdio.h>
#include "allvars.h"
#include "proto.h"
#include <vector>
#include <complex>
#include <iostream>
#ifdef CUFFTMP
#include <cufftMp.h>
#include <cuda_runtime.h>
#endif
void fftw_data(){
#ifdef USE_FFTW
// FFT transform the data (using distributed FFTW)
if(rank == 0)printf("PERFORMING FFT\n");
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
#ifndef CUFFTMP
fftw_plan plan;
fftw_complex *fftwgrid;
ptrdiff_t alloc_local, local_n0, local_0_start;
double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);
// map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
// x is the direction of contiguous data and maps to the second parameter
// y is the parallelized direction and corresponds to the first parameter (--> n0)
// and perform the FFT per w plane
alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
fftwgrid = fftw_alloc_complex(alloc_local);
plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
long fftwindex = 0;
long fftwindex2D = 0;
for (int iw=0; iw<param.num_w_planes; iw++)
{
//printf("FFTing plan %d\n",iw);
//select the w-plane to transform
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
fftwgrid[fftwindex2D][0] = grid[fftwindex];
fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
}
}
// do the transform for each w-plane
fftw_execute(plan);
// save the transformed w-plane
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
}
}
}
fftw_destroy_plan(plan);
fftw_free(fftwgrid);
//Implementation of cufftMp
#else
//Select the default device per each MPI task
int ndevices;
cudaGetDeviceCount(&ndevices);
cudaSetDevice(rank % ndevices);
//long my_start = rank*( param.grid_size_y/size );
//int my_ystart = rank*yaxis;
//int my_yend = (rank+1)*yaxis;
double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);
//Choose the communicator and allocate the vector
MPI_Comm comm = MPI_COMM_WORLD;
size_t Ny = param.grid_size_y;
size_t Nx = param.grid_size_x;
std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx );
cufftHandle plan = 0;
cudaStream_t stream = nullptr;
cudaStreamCreate(&stream);
//Create the plan for the FFT
cufftCreate(&plan);
size_t worksize;
//Domain decomposition along the y-axis
cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);
cufftSetStream(plan, stream);
cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize);
long fftwindex = 0;
long fftwindex2D = 0;
for (int iw=0; iw<param.num_w_planes; iw++)
{
//printf("FFTing plan %d\n",iw);
//select the w-plane to transform
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]};
}
}
// do the transform for each w-plane
//printf("Task %d: ffting the %d plane\n", rank, iw);
cudaLibXtDesc *desc;
cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE);
cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE);
cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE);
cudaStreamSynchronize(stream);
cufftXtMemcpy (plan, desc, desc, CUFFT_COPY_DEVICE_TO_DEVICE);
cufftXtMemcpy(plan, fftwgrid.data(), desc, CUFFT_COPY_DEVICE_TO_HOST);
cufftXtFree(desc);
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real();
gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag();
}
}
}
cufftDestroy(plan);
#endif //close ifndef ACCOMP
#ifdef ONE_SIDE
MPI_Win_fence(0,slabwin);
MPI_Barrier(MPI_COMM_WORLD);
#else
MPI_Barrier(MPI_COMM_WORLD);
#endif
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
timing.fftw_time1 = (finish.tv_sec - begin.tv_sec);
timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
clock_gettime(CLOCK_MONOTONIC, &begin);
#endif
}
void write_fftw_data(){
#ifdef USE_FFTW
#ifdef WRITE_DATA
// Write results
#ifdef USE_MPI
MPI_Win writewin;
MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
MPI_Win_fence(0,writewin);
#endif
if (rank == 0)
{
printf("WRITING FFT TRANSFORMED DATA\n");
file.pFilereal = fopen (out.fftfile2,"wb");
file.pFileimg = fopen (out.fftfile3,"wb");
#ifdef USE_MPI
for (int isector=0; isector<nsectors; isector++)
{
MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
MPI_Win_unlock(isector,writewin);
for (long i=0; i<size_of_grid/2; i++)
{
gridss_real[i] = gridss_w[2*i];
gridss_img[i] = gridss_w[2*i+1];
}
if (param.num_w_planes > 1)
{
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(file.pFilereal, global_index, SEEK_SET);
fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal);
}
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(file.pFileimg, global_index, SEEK_SET);
fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg);
}
}
else
{
fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal);
fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg);
}
}
#else
/*
for (int iw=0; iw<param.num_w_planes; iw++)
for (int iv=0; iv<grid_size_y; iv++)
for (int iu=0; iu<grid_size_x; iu++)
{
int isector = 0;
long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
*/
#endif
fclose(file.pFilereal);
fclose(file.pFileimg);
}
#ifdef USE_MPI
MPI_Win_fence(0,writewin);
MPI_Win_free(&writewin);
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_DATA
// Phase correction
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
if(rank == 0)printf("PHASE CORRECTION\n");
double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));
phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank);
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
timing.phase_time1 = (finish.tv_sec - begin.tv_sec);
timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
#ifdef WRITE_IMAGE
if(rank == 0)
{
file.pFilereal = fopen (out.fftfile2,"wb");
file.pFileimg = fopen (out.fftfile3,"wb");
fclose(file.pFilereal);
fclose(file.pFileimg);
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(rank == 0)printf("WRITING IMAGE\n");
for (int isector=0; isector<size; isector++)
{
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(isector == rank)
{
printf("%d writing\n",isector);
file.pFilereal = fopen (out.fftfile2,"ab");
file.pFileimg = fopen (out.fftfile3,"ab");
long global_index = isector*(xaxis*yaxis)*sizeof(double);
fseek(file.pFilereal, global_index, SEEK_SET);
fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal);
fseek(file.pFileimg, global_index, SEEK_SET);
fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg);
fclose(file.pFilereal);
fclose(file.pFileimg);
}
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_IMAGE
#endif //FFTW
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment