diff --git a/w-stacking_omp.c b/w-stacking_omp.c new file mode 100644 index 0000000000000000000000000000000000000000..5403f2d8b620045d5767d934a3702a189462dfa3 --- /dev/null +++ b/w-stacking_omp.c @@ -0,0 +1,150 @@ +#include <omp.h> +#include "w-stacking_omp.h" +#include <math.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef ACCOMP +#pragma omp declare target +#endif +double gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist) +{ + double conv_weight; + conv_weight = norm * exp(-((u_dist*u_dist)+(v_dist*v_dist))*std22); + return conv_weight; +} +#ifdef ACCOMP +#pragma omp end declare target +#endif + +#ifdef ACCOMP +#pragma omp declare target +#endif +void wstack( + int num_w_planes, + long num_points, + long freq_per_chan, + long polarizations, + double* uu, + double* vv, + double* ww, + float* vis_real, + float* vis_img, + float* weight, + double dx, + double dw, + int w_support, + int grid_size_x, + int grid_size_y, + double* grid, + int num_threads) +{ + long i; + long index; + long visindex; + + // initialize the convolution kernel + // gaussian: + int KernelLen = (w_support-1)/2; + double std = 1.0; + double std22 = 1.0/(2.0*std*std); + double norm = std22/PI; + + // Loop over visibilities. +#ifdef _OPENMP + omp_set_num_threads(num_threads); +#endif + +#ifdef ACCOMP + long Nvis = num_points*freq_per_chan*polarizations; + // #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) + #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y]) +#else + #pragma omp parallel for private(visindex) +#endif + for (i = 0; i < num_points; i++) + { +#ifdef _OPENMP + //int tid; + //tid = omp_get_thread_num(); + //printf("%d\n",tid); +#endif + + visindex = i*freq_per_chan*polarizations; + + double sum = 0.0; + int j, k; + //if (i%1000 == 0)printf("%ld\n",i); + + /* Convert UV coordinates to grid coordinates. */ + double pos_u = uu[i] / dx; + double pos_v = vv[i] / dx; + double ww_i = ww[i] / dw; + int grid_w = (int)ww_i; + int grid_u = (int)pos_u; + int grid_v = (int)pos_v; + + // check the boundaries + long jmin = (grid_u > KernelLen - 1) ? grid_u - KernelLen : 0; + long jmax = (grid_u < grid_size_x - KernelLen) ? grid_u + KernelLen : grid_size_x - 1; + long kmin = (grid_v > KernelLen - 1) ? grid_v - KernelLen : 0; + long kmax = (grid_v < grid_size_y - KernelLen) ? grid_v + KernelLen : grid_size_y - 1; + //printf("%d, %ld, %ld, %d, %ld, %ld\n",grid_u,jmin,jmax,grid_v,kmin,kmax); + + + // Convolve this point onto the grid. + for (k = kmin; k <= kmax; k++) + { + + double v_dist = (double)k+0.5 - pos_v; + + for (j = jmin; j <= jmax; j++) + { + double u_dist = (double)j+0.5 - pos_u; + long iKer = 2 * (j + k*grid_size_x + grid_w*grid_size_x*grid_size_y); + + + double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); + // Loops over frequencies and polarizations + double add_term_real = 0.0; + double add_term_img = 0.0; + long ifine = visindex; + // DAV: the following two loops are performend by each thread separately: no problems of race conditions + for (long ifreq=0; ifreq<freq_per_chan; ifreq++) + { + long iweight = visindex/freq_per_chan; + for (long ipol=0; ipol<polarizations; ipol++) + { + if (!isnan(vis_real[ifine])) + { + //printf("%f %ld\n",weight[iweight],iweight); + add_term_real += weight[iweight] * vis_real[ifine] * conv_weight; + add_term_img += weight[iweight] * vis_img[ifine] * conv_weight; + //if(vis_img[ifine]>1e10 || vis_img[ifine]<-1e10)printf("%f %f %f %f %ld %ld\n",vis_real[ifine],vis_img[ifine],weight[iweight],conv_weight,ifine,num_points*freq_per_chan*polarizations); + } + ifine++; + iweight++; + } + } + // DAV: this is the critical call in terms of correctness of the results and of performance + #pragma omp atomic + grid[iKer] += add_term_real; + #pragma omp atomic + grid[iKer+1] += add_term_img; + } + } + + } + //for (int i=0; i<100000; i++)printf("%f\n",grid[i]); +} +#ifdef ACCOMP +#pragma omp end declare target +#endif + +int test(int nnn) +{ + int mmm; + + mmm = nnn+1; + return mmm; +} diff --git a/w-stacking_omp.h b/w-stacking_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..d07e2bffd8479f67d345381dec4a3658cb976db9 --- /dev/null +++ b/w-stacking_omp.h @@ -0,0 +1,45 @@ +#ifndef W_PROJECT_H_ +#define W_PROJECT_H_ +#endif + +#define NWORKERS -1 //100 +#define NTHREADS 32 +#define PI 3.14159265359 +#define REAL_TYPE double + +void wstack( + int, + long, + long, + long, + double*, + double*, + double*, + float*, + float*, + float*, + double, + double, + int, + int, + int, + double*, + int); + +int test(int nnn); + +double gauss_kernel_norm( + double norm, + double std22, + double u_dist, + double v_dist); + +void phase_correction( + double*, + double*, + double*, + int, + int, + int, + int, + int);