Skip to content
Snippets Groups Projects
Commit 9d47e94e authored by Claudio Gheller's avatar Claudio Gheller
Browse files

first CUDA implementation of phase_correction. Still buggy

parent 96c6e08a
No related branches found
No related tags found
No related merge requests found
...@@ -10,7 +10,7 @@ OPT += -DONE_SIDE ...@@ -10,7 +10,7 @@ OPT += -DONE_SIDE
# write the final image # write the final image
OPT += -DWRITE_IMAGE OPT += -DWRITE_IMAGE
# perform w-stacking phase correction # perform w-stacking phase correction
OPT += -DPHASE_ON #OPT += -DPHASE_ON
CC = gcc CC = gcc
CXX = g++ CXX = g++
...@@ -46,7 +46,7 @@ serial: $(COBJ) ...@@ -46,7 +46,7 @@ serial: $(COBJ)
$(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm $(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm
serial_cuda: serial_cuda:
$(NVCC) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB) $(NVCC) $(OPT) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB)
$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm
...@@ -54,7 +54,7 @@ mpi: $(COBJ) ...@@ -54,7 +54,7 @@ mpi: $(COBJ)
$(CC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS) $(CC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)
mpi_cuda: mpi_cuda:
$(NVCC) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB) $(NVCC) $(NVFLAGS) $(OPT) -c w-stacking.cu phase_correction.cu $(NVLIB)
$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm
......
...@@ -7,19 +7,126 @@ ...@@ -7,19 +7,126 @@
#include <stdio.h> #include <stdio.h>
#ifdef __CUDACC__ #ifdef __CUDACC__
__global__ void phase_g(int xaxis,
int yaxis,
int num_w_planes,
double * gridss,
double * image_real,
double * image_imag,
double wmin,
double dw,
double dwnorm,
int xaxistot,
int yaxistot,
double resolution)
{
long gid = blockIdx.x*blockDim.x + threadIdx.x;
double add_term_real;
double add_term_img;
double wterm;
long arraysize = xaxis*yaxis*num_w_planes;
if(gid < arraysize)
{
int iw = (int)(gid/(xaxis*yaxis));
int iv = (int)((gid%(xaxis*yaxis))/xaxis);
int iu = (iv%yaxis);
long index = 2*gid;
long img_index = iu+iv*xaxis;
wterm = wmin + iw*dw;
#ifdef PHASE_ON
if (num_w_planes > 1)
{
double xcoord = (double)(iu-xaxistot/2);
if(xcoord < 0.0)xcoord = (double)(iu+xaxistot/2);
xcoord = sin(xcoord*resolution);
double ycoord = (double)(iv-yaxistot/2);
if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2);
ycoord = sin(ycoord*resolution);
double preal, pimag;
double radius2 = (xcoord*xcoord+ycoord*ycoord);
preal = cos(2.0*PI*wterm*(sqrt(1-radius2)-1.0));
pimag = sin(2.0*PI*wterm*(sqrt(1-radius2)-1.0));
double p,q,r,s;
p = gridss[index];
q = gridss[index+1];
r = preal;
s = pimag;
//printf("%d %d %d %ld %ld\n",iu,iv,iw,index,img_index);
add_term_real = (p*r-q*s)*dwnorm*sqrt(1-radius2);
add_term_img = (p*s+q*r)*dwnorm*sqrt(1-radius2);
atomicAdd(&(image_real[img_index]),add_term_real);
atomicAdd(&(image_imag[img_index]),add_term_img);
} else {
atomicAdd(&(image_real[img_index]),gridss[index]);
atomicAdd(&(image_imag[img_index]),gridss[index+1]);
}
#else
atomicAdd(&(image_real[img_index]),gridss[index]);
atomicAdd(&(image_imag[img_index]),gridss[index+1]);
#endif // end of PHASE_ON
}
}
#endif #endif
void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes, int xaxistot, int yaxistot, void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes, int xaxistot, int yaxistot,
double resolution, double wmin, double wmax) double resolution, double wmin, double wmax)
{ {
double dnum_w_planes = (double)num_w_planes;
double dxaxistot = (double)xaxistot;
double dyaxistot = (double)yaxistot;
double diagonal;
double dw = (wmax-wmin)/(double)num_w_planes; double dw = (wmax-wmin)/(double)num_w_planes;
double wterm = wmin+0.5*dw; double wterm = wmin+0.5*dw;
double dwnorm = dw/(wmax-wmin); double dwnorm = dw/(wmax-wmin);
#ifdef __CUDACC__
int Nth = NTHREADS;
long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
cudaError_t mmm;
double * image_real_g;
double * image_imag_g;
double * gridss_g;
mmm=cudaMalloc(&gridss_g, 2*num_w_planes*xaxis*yaxis*sizeof(double));
mmm=cudaMalloc(&image_real_g, xaxis*yaxis*sizeof(double));
mmm=cudaMalloc(&image_imag_g, xaxis*yaxis*sizeof(double));
mmm=cudaMemcpy(gridss_g, gridss, 2*num_w_planes*xaxis*yaxis*sizeof(double), cudaMemcpyHostToDevice);
mmm=cudaMemset(image_real_g, 0.0, xaxis*yaxis*sizeof(double));
mmm=cudaMemset(image_imag_g, 0.0, xaxis*yaxis*sizeof(double));
// call the phase correction kernel
phase_g <<<Nbl,Nth>>> (xaxis,
yaxis,
num_w_planes,
gridss_g,
image_real_g,
image_imag_g,
wmin,
dw,
dwnorm,
xaxistot,
yaxistot,
resolution);
mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
#else
for (int iw=0; iw<num_w_planes; iw++) for (int iw=0; iw<num_w_planes; iw++)
{ {
for (int iv=0; iv<yaxis; iv++) for (int iv=0; iv<yaxis; iv++)
...@@ -38,7 +145,6 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in ...@@ -38,7 +145,6 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2); if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2);
ycoord = sin(ycoord*resolution); ycoord = sin(ycoord*resolution);
double efact;
double preal, pimag; double preal, pimag;
double radius2 = (xcoord*xcoord+ycoord*ycoord); double radius2 = (xcoord*xcoord+ycoord*ycoord);
...@@ -61,12 +167,13 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in ...@@ -61,12 +167,13 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else #else
image_real[img_index] += gridss[index]; image_real[img_index] += gridss[index];
image_imag[img_index] += gridss[index+1]; image_imag[img_index] += gridss[index+1];
#endif #endif // end of PHASE_ON
} }
wterm += dw; wterm += dw;
} }
#endif // end of __CUDACC__
} }
...@@ -78,24 +78,24 @@ int main(int argc, char * argv[]) ...@@ -78,24 +78,24 @@ int main(int argc, char * argv[])
float * visreal; float * visreal;
float * visimg; float * visimg;
long Nmeasures; long Nmeasures,Nmeasures0;
long Nvis; long Nvis,Nvis0;
long Nweights; long Nweights,Nweights0;
long freq_per_chan; long freq_per_chan,freq_per_chan0;
long polarisations; long polarisations,polarisations0;
long Ntimes; long Ntimes,Ntimes0;
double dt; double dt,dt0;
double thours; double thours,thours0;
long baselines; long baselines,baselines0;
double uvmin; double uvmin,uvmin0;
double uvmax; double uvmax,uvmax0;
double wmin; double wmin,wmin0;
double wmax; double wmax,wmax0;
double resolution; double resolution;
// MESH SIZE // MESH SIZE
int grid_size_x = 2048; int grid_size_x = 256;
int grid_size_y = 2048; int grid_size_y = 256;
int local_grid_size_x;// = 8; int local_grid_size_x;// = 8;
int local_grid_size_y;// = 8; int local_grid_size_y;// = 8;
int xaxis; int xaxis;
...@@ -372,18 +372,18 @@ int main(int argc, char * argv[]) ...@@ -372,18 +372,18 @@ int main(int argc, char * argv[])
strcpy(filename,datapath); strcpy(filename,datapath);
strcat(filename,metafile); strcat(filename,metafile);
pFile = fopen (filename,"r"); pFile = fopen (filename,"r");
fscanf(pFile,"%ld",&Nmeasures); fscanf(pFile,"%ld",&Nmeasures0);
fscanf(pFile,"%ld",&Nvis); fscanf(pFile,"%ld",&Nvis0);
fscanf(pFile,"%ld",&freq_per_chan); fscanf(pFile,"%ld",&freq_per_chan0);
fscanf(pFile,"%ld",&polarisations); fscanf(pFile,"%ld",&polarisations0);
fscanf(pFile,"%ld",&Ntimes); fscanf(pFile,"%ld",&Ntimes0);
fscanf(pFile,"%lf",&dt); fscanf(pFile,"%lf",&dt0);
fscanf(pFile,"%lf",&thours); fscanf(pFile,"%lf",&thours0);
fscanf(pFile,"%ld",&baselines); fscanf(pFile,"%ld",&baselines0);
fscanf(pFile,"%lf",&uvmin); fscanf(pFile,"%lf",&uvmin);
fscanf(pFile,"%lf",&uvmax); fscanf(pFile,"%lf",&uvmax);
fscanf(pFile,"%lf",&wmin); fscanf(pFile,"%lf",&wmin0);
fscanf(pFile,"%lf",&wmax); fscanf(pFile,"%lf",&wmax0);
fclose(pFile); fclose(pFile);
// calculate the resolution in radians // calculate the resolution in radians
......
...@@ -147,7 +147,7 @@ void wstack( ...@@ -147,7 +147,7 @@ void wstack(
#ifdef __CUDACC__ #ifdef __CUDACC__
// Define the CUDA set up // Define the CUDA set up
int Nth = NTHREADS; int Nth = NTHREADS;
int Nbl = num_points/Nth + 1; long Nbl = (long)(num_points/Nth) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
long Nvis = num_points*freq_per_chan*polarizations; long Nvis = num_points*freq_per_chan*polarizations;
printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment