Select Git revision
w-stacking-fftw.c
-
Claudio Gheller authoredClaudio Gheller authored
w-stacking-fftw.c 30.79 KiB
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef USE_MPI
#include <mpi.h>
#ifdef USE_FFTW
#include <fftw3-mpi.h>
#endif
#endif
#include <math.h>
#include <time.h>
#include "w-stacking.h"
#define PI 3.14159265359
#define NUM_OF_SECTORS -1
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
#define NOVERBOSE
#define NFILES 10
// Linked List set-up
struct sectorlist {
long index;
struct sectorlist * next;
};
void Push(struct sectorlist** headRef, long data) {
struct sectorlist* newNode = malloc(sizeof(struct sectorlist));
newNode->index = data;
newNode->next = *headRef;
*headRef = newNode;
}
// Main Code
int main(int argc, char * argv[])
{
int rank;
int size;
FILE * pFile;
FILE * pFile1;
FILE * pFilereal;
FILE * pFileimg;
char filename[1000];
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/old/data/gauss2_t201806301100_SBL180.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/newgauss2noconj_t201806301100_SBL180.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/gauss1_t201806301100_SBL180.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/gauss4_t201806301100_SBL180.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/hba-8hrs_t201806301100_SBH255i-test.binMS/";
//
//char datapath[900] = "/m100_scratch/userexternal/cgheller/gridding/hba-8hrs_gauss4new.binMS/";
//char datapath[900] = "/m100_scratch/userexternal/cgheller/Lofar/Observations/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/";
char datapath[900];
char datapath_multi[NFILES][900];
char ufile[30] = "ucoord.bin";
char vfile[30] = "vcoord.bin";
char wfile[30] = "wcoord.bin";
char weightsfile[30] = "weights.bin";
char visrealfile[30] = "visibilities_real.bin";
char visimgfile[30] = "visibilities_img.bin";
char metafile[30] = "meta.txt";
char outfile[30] = "grid.txt";
char outfile1[30] = "coords.txt";
char outfile2[30] = "grid_real.bin";
char outfile3[30] = "grid_img.bin";
char fftfile[30] = "fft.txt";
char fftfile2[30] = "fft_real.bin";
char fftfile3[30] = "fft_img.bin";
char logfile[30] = "run.log";
char extension[30] = ".txt";
char srank[4];
double * uu;
double * vv;
double * ww;
float * weights;
float * visreal;
float * visimg;
long Nmeasures;
long Nvis;
long Nweights;
long freq_per_chan;
long polarisations;
long Ntimes;
double dt;
double thours;
long baselines;
double uvmin;
double uvmax;
double wmin;
double wmax;
// MESH SIZE
int grid_size_x = 2048;
int grid_size_y = 2048;
int local_grid_size_x;// = 100;
int local_grid_size_y;// = 100;
int xaxis;
int yaxis;
int num_w_planes = 1;
// DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization
int w_support = 7;
int num_threads;// = 4;
double dx = 1.0/(double)grid_size_x;
double dw = 1.0/(double)num_w_planes;
double w_supporth = (double)((w_support-1)/2)*dx;
clock_t start, end, start0, startk, endk;
double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time;
double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1;
double writetime, writetime1;
struct timespec begin, finish, begin0, begink, finishk;
double elapsed;
long nsectors;
clock_gettime(CLOCK_MONOTONIC, &begin0);
start0 = clock();
// Set the number of OpenMP threads
num_threads = atoi(argv[1]);
// Intialize MPI environment
#ifdef USE_MPI
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(rank == 0)printf("Running with %d MPI tasks\n",size);
#ifdef USE_FFTW
fftw_mpi_init();
#endif
#else
rank = 0;
size = 1;
#endif
if(rank == 0)printf("Running with %d threads\n",num_threads);
// set the local size of the image
local_grid_size_x = grid_size_x;
nsectors = NUM_OF_SECTORS;
if (nsectors < 0) nsectors = size;
local_grid_size_y = grid_size_y/nsectors;
//nsectors = size;
// LOCAL grid size
xaxis = local_grid_size_x;
yaxis = local_grid_size_y;
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
// INPUT FILES (only the first ndatasets entries are used)
int ndatasets = 1;
strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
//strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
strcpy(datapath,datapath_multi[0]);
// Read metadata
strcpy(filename,datapath);
strcat(filename,metafile);
pFile = fopen (filename,"r");
fscanf(pFile,"%ld",&Nmeasures);
fscanf(pFile,"%ld",&Nvis);
fscanf(pFile,"%ld",&freq_per_chan);
fscanf(pFile,"%ld",&polarisations);
fscanf(pFile,"%ld",&Ntimes);
fscanf(pFile,"%lf",&dt);
fscanf(pFile,"%lf",&thours);
fscanf(pFile,"%ld",&baselines);
fscanf(pFile,"%lf",&uvmin);
fscanf(pFile,"%lf",&uvmax);
fscanf(pFile,"%lf",&wmin);
fscanf(pFile,"%lf",&wmax);
fclose(pFile);
// WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
int nsub = 1000;
//int nsub = 10;
printf("Subtracting last %d measurements\n",nsub);
Nmeasures = Nmeasures-nsub;
Nvis = Nmeasures*freq_per_chan*polarisations;
// calculate the coordinates of the center
double uvshift = uvmin/(uvmax-uvmin);
//printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift);
if (rank == 0)
{
printf("N. measurements %ld\n",Nmeasures);
printf("N. visibilities %ld\n",Nvis);
}
// Set temporary local size of points
long nm_pe = (long)(Nmeasures/size);
long remaining = Nmeasures%size;
long startrow = rank*nm_pe;
if (rank == size-1)nm_pe = nm_pe+remaining;
long Nmeasures_tot = Nmeasures;
Nmeasures = nm_pe;
long Nvis_tot = Nvis;
Nvis = Nmeasures*freq_per_chan*polarisations;
Nweights = Nmeasures*polarisations;
#ifdef VERBOSE
printf("N. measurements on %d %ld\n",rank,Nmeasures);
printf("N. visibilities on %d %ld\n",rank,Nvis);
#endif
// DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used
// all the sizes are rescaled by the number of MPI tasks
// Allocate arrays
uu = (double*) calloc(Nmeasures,sizeof(double));
vv = (double*) calloc(Nmeasures,sizeof(double));
ww = (double*) calloc(Nmeasures,sizeof(double));
weights = (float*) calloc(Nweights,sizeof(float));
visreal = (float*) calloc(Nvis,sizeof(float));
visimg = (float*) calloc(Nvis,sizeof(float));
if(rank == 0)printf("READING DATA\n");
// Read data
strcpy(filename,datapath);
strcat(filename,ufile);
//printf("Reading %s\n",filename);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*sizeof(double),SEEK_SET);
fread(uu,Nmeasures*sizeof(double),1,pFile);
fclose(pFile);
strcpy(filename,datapath);
strcat(filename,vfile);
//printf("Reading %s\n",filename);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*sizeof(double),SEEK_SET);
fread(vv,Nmeasures*sizeof(double),1,pFile);
fclose(pFile);
strcpy(filename,datapath);
strcat(filename,wfile);
//printf("Reading %s\n",filename);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*sizeof(double),SEEK_SET);
fread(ww,Nmeasures*sizeof(double),1,pFile);
fclose(pFile);
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
clock_gettime(CLOCK_MONOTONIC, &finish);
end = clock();
setup_time = ((double) (end - start)) / CLOCKS_PER_SEC;
setup_time1 = (finish.tv_sec - begin.tv_sec);
setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
if(rank == 0)printf("GRIDDING DATA\n");
// Create histograms and linked lists
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
//CLAAA
// Initialize linked list
struct sectorlist ** sectorhead;
sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist));
for (int isec=0; isec<=nsectors; isec++)
{
sectorhead[isec] = malloc(sizeof(struct sectorlist));
sectorhead[isec]->index = -1;
sectorhead[isec]->next = NULL;
}
long * histo_send = (long*) calloc(nsectors+1,sizeof(long));
int * boundary = (int*) calloc(Nmeasures,sizeof(int));
double uuh,vvh;
for (long iphi = 0; iphi < Nmeasures; iphi++)
{
boundary[iphi] = -1;
uuh = uu[iphi];
vvh = vv[iphi];
int binphi = (int)(vvh*nsectors);
// check if the point influence also neighboring slabs
double updist = (double)((binphi+1)*yaxis)*dx - vvh;
double downdist = vvh - (double)(binphi*yaxis)*dx;
//
histo_send[binphi]++;
Push(§orhead[binphi],iphi);
if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);};
if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);};
}
#ifdef PIPPO
struct sectorlist * current;
long iiii = 0;
for (int j=0; j<nsectors; j++)
{
current = sectorhead[j];
iiii = 0;
while (current->index != -1)
{
printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index);
current = current->next;
iiii++;
}
}
#endif
#ifdef VERBOSE
for (int iii=0; iii<nsectors+1; iii++)printf("HISTO %d %d %ld\n",rank, iii, histo_send[iii]);
#endif
// Create sector grid
double * gridss;
double * gridss_w;
double * gridss_real;
double * gridss_img;
double * grid;
long size_of_grid;
size_of_grid = 2*num_w_planes*xaxis*yaxis;
gridss = (double*) calloc(size_of_grid,sizeof(double));
gridss_w = (double*) calloc(size_of_grid,sizeof(double));
gridss_real = (double*) calloc(size_of_grid/2,sizeof(double));
gridss_img = (double*) calloc(size_of_grid/2,sizeof(double));
// Create destination slab
grid = (double*) calloc(size_of_grid,sizeof(double));
// Create temporary global grid
#ifndef USE_MPI
double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double));
#endif
double shift = (double)(dx*yaxis);
// Open the MPI Memory Window for the slab
#ifdef USE_MPI
MPI_Win slabwin;
MPI_Win_create(grid, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &slabwin);
MPI_Win_fence(0,slabwin);
#endif
#ifndef USE_MPI
pFile1 = fopen (outfile1,"w");
#endif
// loop over files
//
kernel_time = 0.0;
kernel_time1 = 0.0;
reduce_time = 0.0;
reduce_time1 = 0.0;
compose_time = 0.0;
compose_time1 = 0.0;
for (int ifiles=0; ifiles<ndatasets; ifiles++)
{
strcpy(filename,datapath_multi[ifiles]);
printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets);
strcat(filename,weightsfile);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*polarisations*sizeof(float),SEEK_SET);
fread(weights,(Nweights)*sizeof(float),1,pFile);
fclose(pFile);
strcpy(filename,datapath);
strcat(filename,visrealfile);
//printf("Reading %s\n",filename);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET);
fread(visreal,Nvis*sizeof(float),1,pFile);
fclose(pFile);
strcpy(filename,datapath);
strcat(filename,visimgfile);
//printf("Reading %s\n",filename);
pFile = fopen (filename,"rb");
fseek (pFile,startrow*freq_per_chan*polarisations*sizeof(float),SEEK_SET);
fread(visimg,Nvis*sizeof(float),1,pFile);
fclose(pFile);
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
// Declare temporary arrays for the masking
double * uus;
double * vvs;
double * wws;
float * visreals;
float * visimgs;
float * weightss;
long isector;
for (long isector_count=0; isector_count<nsectors; isector_count++)
{
clock_gettime(CLOCK_MONOTONIC, &begink);
startk = clock();
// define local destination sector
//isector = (isector_count+rank)%size;
isector = isector_count;
// allocate sector arrays
long Nsec = histo_send[isector];
uus = (double*) malloc(Nsec*sizeof(double));
vvs = (double*) malloc(Nsec*sizeof(double));
wws = (double*) malloc(Nsec*sizeof(double));
long Nweightss = Nsec*polarisations;
long Nvissec = Nweightss*freq_per_chan;
weightss = (float*) malloc(Nweightss*sizeof(float));
visreals = (float*) malloc(Nvissec*sizeof(float));
visimgs = (float*) malloc(Nvissec*sizeof(float));
// select data for this sector
long icount = 0;
long ip = 0;
long inu = 0;
//CLAAAA
struct sectorlist * current;
current = sectorhead[isector];
while (current->index != -1)
{
long ilocal = current->index;
//double vvh = vv[ilocal];
//int binphi = (int)(vvh*nsectors);
//if (binphi == isector || boundary[ilocal] == isector)
//{
uus[icount] = uu[ilocal];
vvs[icount] = vv[ilocal]-isector*shift;
wws[icount] = ww[ilocal];
for (long ipol=0; ipol<polarisations; ipol++)
{
weightss[ip] = weights[ilocal*polarisations+ipol];
ip++;
}
for (long ifreq=0; ifreq<polarisations*freq_per_chan; ifreq++)
{
visreals[inu] = visreal[ilocal*polarisations*freq_per_chan+ifreq];
visimgs[inu] = visimg[ilocal*polarisations*freq_per_chan+ifreq];
//if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis);
inu++;
}
icount++;
//}
current = current->next;
}
clock_gettime(CLOCK_MONOTONIC, &finishk);
endk = clock();
compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
compose_time1 += (finishk.tv_sec - begink.tv_sec);
compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
#ifndef USE_MPI
double uumin = 1e20;
double vvmin = 1e20;
double uumax = -1e20;
double vvmax = -1e20;
for (long ipart=0; ipart<Nsec; ipart++)
{
uumin = MIN(uumin,uus[ipart]);
uumax = MAX(uumax,uus[ipart]);
vvmin = MIN(vvmin,vvs[ipart]);
vvmax = MAX(vvmax,vvs[ipart]);
if(ipart%10 == 0)fprintf (pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]);
}
printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax);
#endif
// Make convolution on the grid
#ifdef VERBOSE
printf("Processing sector %ld\n",isector);
#endif
clock_gettime(CLOCK_MONOTONIC, &begink);
startk = clock();
wstack(num_w_planes,
Nsec,
freq_per_chan,
polarisations,
uus,
vvs,
wws,
visreals,
visimgs,
weightss,
dx,
dw,
w_support,
xaxis,
yaxis,
gridss,
num_threads);
clock_gettime(CLOCK_MONOTONIC, &finishk);
endk = clock();
kernel_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
kernel_time1 += (finishk.tv_sec - begink.tv_sec);
kernel_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
#ifdef VERBOSE
printf("Processed sector %ld\n",isector);
#endif
clock_gettime(CLOCK_MONOTONIC, &begink);
startk = clock();
//for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]);
#ifndef USE_MPI
long stride = isector*2*xaxis*yaxis*num_w_planes;
for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii];
#endif
// Write grid in the corresponding remote slab
#ifdef USE_MPI
int target_rank = (int)isector;
//int target_rank = (int)(size-isector-1);
#ifdef ONE_SIDE
printf("One Side communication active\n");
MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin);
MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin);
MPI_Win_unlock(target_rank,slabwin);
//MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin);
#else
MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD);
#endif //ONE_SIDE
#endif //USE_MPI
clock_gettime(CLOCK_MONOTONIC, &finishk);
endk = clock();
reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
reduce_time1 += (finishk.tv_sec - begink.tv_sec);
reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
// Go to next sector
for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0;
// Deallocate all sector arrays
free(uus);
free(vvs);
free(wws);
free(weightss);
free(visreals);
free(visimgs);
// End of loop over sectors
}
// End of loop over input files
}
// Finalize MPI communication
#ifdef USE_MPI
MPI_Win_fence(0,slabwin);
#endif
// Swap left and right parts APPARENTLY NOT NECESSARY
/*
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=0; iswap<xaxis; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
gridss[index_origin] = grid[index_origin];
gridss[index_origin+1] = grid[index_origin+1];
}
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=0; iswap<xaxis/2; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
long index_destination = 2*(iswap+xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
grid[index_destination] = gridss[index_origin];
grid[index_destination+1] = gridss[index_origin+1];
}
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=xaxis/2; iswap<xaxis; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
long index_destination = 2*(iswap-xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
grid[index_destination] = gridss[index_origin];
grid[index_destination+1] = gridss[index_origin+1];
}
*/
#ifndef USE_MPI
fclose(pFile1);
#endif
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
process_time = ((double) (end - start)) / CLOCKS_PER_SEC;
process_time1 = (finish.tv_sec - begin.tv_sec);
process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
clock_gettime(CLOCK_MONOTONIC, &begin);
#ifdef WRITE_DATA
// Write results
if (rank == 0)
{
printf("WRITING GRIDDED DATA\n");
pFilereal = fopen (outfile2,"wb");
pFileimg = fopen (outfile3,"wb");
#ifdef USE_MPI
for (int isector=0; isector<nsectors; isector++)
{
MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin);
MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin);
MPI_Win_unlock(isector,slabwin);
for (long i=0; i<size_of_grid/2; i++)
{
gridss_real[i] = gridss[2*i];
gridss_img[i] = gridss[2*i+1];
}
if (num_w_planes > 1)
{
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(pFilereal, global_index, SEEK_SET);
fwrite(&gridss_real[index], 1, sizeof(double), pFilereal);
}
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(pFileimg, global_index, SEEK_SET);
fwrite(&gridss_img[index], 1, sizeof(double), pFileimg);
//double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]);
//fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm);
}
} else {
for (int iw=0; iw<num_w_planes; iw++)
{
long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
long index = iw*xaxis*yaxis;
fseek(pFilereal, global_index, SEEK_SET);
fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), pFilereal);
fseek(pFileimg, global_index, SEEK_SET);
fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), pFileimg);
}
}
}
#else
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<grid_size_y; iv++)
for (int iu=0; iu<grid_size_x; iu++)
{
long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
fwrite(&gridtot[index], 1, sizeof(double), pFilereal);
fwrite(&gridtot[index+1], 1, sizeof(double), pFileimg);
//double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
//fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
#endif
fclose(pFilereal);
fclose(pFileimg);
}
#ifdef USE_MPI
MPI_Win_fence(0,slabwin);
#endif
#endif //WRITE_DATA
#ifdef USE_FFTW
// FFT transform the data (using distributed FFTW)
if(rank == 0)printf("PERFORMING FFT\n");
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
fftw_plan plan;
fftw_complex *fftwgrid;
ptrdiff_t alloc_local, local_n0, local_0_start;
double norm = 1.0/(double)(grid_size_x*grid_size_y);
// map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
// x is the direction of contiguous data and maps to the second parameter
// y is the parallelized direction and corresponds to the first parameter (--> n0)
// and perform the FFT per w plane
alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
fftwgrid = fftw_alloc_complex(alloc_local);
plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
long fftwindex = 0;
long fftwindex2D = 0;
for (int iw=0; iw<num_w_planes; iw++)
{
//printf("FFTing plan %d\n",iw);
// select the w-plane to transform
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
fftwgrid[fftwindex2D][0] = grid[fftwindex];
fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
}
}
// do the transform for each w-plane
fftw_execute(plan);
// save the transformed w-plane
for (int iv=0; iv<yaxis; iv++)
{
for (int iu=0; iu<xaxis; iu++)
{
fftwindex2D = iu + iv*xaxis;
fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
}
}
}
fftw_destroy_plan(plan);
#ifdef USE_MPI
MPI_Win_fence(0,slabwin);
MPI_Barrier(MPI_COMM_WORLD);
#endif
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
fftw_time1 = (finish.tv_sec - begin.tv_sec);
fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
clock_gettime(CLOCK_MONOTONIC, &begin);
#ifdef WRITE_DATA
// Write results
#ifdef USE_MPI
MPI_Win writewin;
MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
MPI_Win_fence(0,writewin);
#endif
if (rank == 0)
{
printf("WRITING FFT TRANSFORMED DATA\n");
pFilereal = fopen (fftfile2,"wb");
pFileimg = fopen (fftfile3,"wb");
#ifdef USE_MPI
for (int isector=0; isector<nsectors; isector++)
{
MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
MPI_Win_unlock(isector,writewin);
for (long i=0; i<size_of_grid/2; i++)
{
gridss_real[i] = gridss_w[2*i];
gridss_img[i] = gridss_w[2*i+1];
}
if (num_w_planes > 1)
{
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(pFilereal, global_index, SEEK_SET);
fwrite(&gridss_real[index], 1, sizeof(double), pFilereal);
}
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<yaxis; iv++)
for (int iu=0; iu<xaxis; iu++)
{
long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
long index = iu + iv*xaxis + iw*xaxis*yaxis;
fseek(pFileimg, global_index, SEEK_SET);
fwrite(&gridss_img[index], 1, sizeof(double), pFileimg);
}
} else {
fwrite(gridss_real, size_of_grid/2, sizeof(double), pFilereal);
fwrite(gridss_img, size_of_grid/2, sizeof(double), pFileimg);
}
}
#else
/*
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<grid_size_y; iv++)
for (int iu=0; iu<grid_size_x; iu++)
{
int isector = 0;
long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
*/
#endif
fclose(pFilereal);
fclose(pFileimg);
}
#ifdef USE_MPI
//MPI_Win_fence(0,writewin);
MPI_Win_fence(0,writewin);
MPI_Win_free(&writewin);
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_DATA
fftw_free(fftwgrid);
// Phase correction
clock_gettime(CLOCK_MONOTONIC, &begin);
start = clock();
if(rank == 0)printf("PHASE CORRECTION\n");
double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));
phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y);
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
phase_time1 = (finish.tv_sec - begin.tv_sec);
phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
#ifdef WRITE_IMAGE
if(rank == 0)
{
pFilereal = fopen (fftfile2,"wb");
pFileimg = fopen (fftfile3,"wb");
fclose(pFilereal);
fclose(pFileimg);
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(rank == 0)printf("WRITING IMAGE\n");
for (int isector=0; isector<size; isector++)
{
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if(isector == rank)
{
printf("%d writing\n",isector);
pFilereal = fopen (fftfile2,"ab");
pFileimg = fopen (fftfile3,"ab");
long global_index = isector*(xaxis*yaxis)*sizeof(double);
fseek(pFilereal, global_index, SEEK_SET);
fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal);
fseek(pFileimg, global_index, SEEK_SET);
fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg);
fclose(pFilereal);
fclose(pFileimg);
}
}
#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif //WRITE_IMAGE
#endif //USE_FFTW
end = clock();
clock_gettime(CLOCK_MONOTONIC, &finish);
tot_time = ((double) (end - start0)) / CLOCKS_PER_SEC;
tot_time1 = (finish.tv_sec - begin0.tv_sec);
tot_time1 += (finish.tv_nsec - begin0.tv_nsec) / 1000000000.0;
if (rank == 0)
{
printf("Setup time: %f sec\n",setup_time);
printf("Process time: %f sec\n",process_time);
printf("Kernel time = %f, Array Composition time %f, Reduce time: %f sec\n",kernel_time,compose_time,reduce_time);
#ifdef USE_FFTW
printf("FFTW time: %f sec\n",fftw_time);
printf("Phase time: %f sec\n",phase_time);
#endif
printf("TOT time: %f sec\n",tot_time);
if(num_threads > 1)
{
printf("PSetup time: %f sec\n",setup_time1);
printf("PProcess time: %f sec\n",process_time1);
printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1);
#ifdef USE_FFTW
printf("PFFTW time: %f sec\n",fftw_time1);
printf("PPhase time: %f sec\n",phase_time1);
#endif
printf("PTOT time: %f sec\n",tot_time1);
}
}
// Close MPI environment
#ifdef USE_MPI
MPI_Win_fence(0,slabwin);
MPI_Win_free(&slabwin);
MPI_Finalize();
#endif
}