From 7205d63dcca9d2196b6640b1b755077ae5f30589 Mon Sep 17 00:00:00 2001 From: Claudio Gheller <cgheller@login01.m100.cineca.it> Date: Fri, 17 Mar 2023 12:11:58 +0100 Subject: [PATCH] Precalculated Gaussian Kernel added --- w-stacking-fftw.c | 4 ++-- w-stacking.cu | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index f13b3c1..ef56aa4 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -643,8 +643,8 @@ if(rank == 0){ dx, dw, w_support, - xaxis, - yaxis, + xaxis, + yaxis, gridss, num_threads); diff --git a/w-stacking.cu b/w-stacking.cu index d5e8e8c..8728fda 100644 --- a/w-stacking.cu +++ b/w-stacking.cu @@ -22,6 +22,22 @@ gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist) return conv_weight; } +void makeGaussKernel(double * kernel, + int KernelLen, + double std22) +{ + + double norm = std22/PI; + int n = KernelLen, mid = n / 2; + for (int i = 0; i != mid + 1; i++) { + double term = (double)i / mid; + kernel[mid + i] = sqrt(norm) * exp(-(term*term)*std22); + } + + for (int i = 0; i != mid; i++) kernel[i] = kernel[n - 1 - i]; + +} + // Kaiser-Bessel Kernel: it is adapted from WSClean double bessel0(double x, double precision) { // Calculate I_0 = SUM of m 0 -> inf [ (x/2)^(2m) ] @@ -188,7 +204,9 @@ void wstack( double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; - + double * convkernel = malloc(w_support*sizeof(*convkernel)); + makeGaussKernel(convkernel,w_support,std22); + // Loop over visibilities. // Switch between CUDA and GPU versions #ifdef __CUDACC__ @@ -309,11 +327,14 @@ void wstack( for (j = jmin; j <= jmax; j++) { + int jKer = (int)j-pos_u+KernelLen; + int kKer = (int)k-pos_v+KernelLen; double u_dist = (double)j+0.5 - pos_u; long iKer = 2 * (j + k*grid_size_x + grid_w*grid_size_x*grid_size_y); - double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); + //double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); + double conv_weight = convkernel[jKer]*convkernel[kKer]; // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; -- GitLab