Skip to content
Snippets Groups Projects
Commit 3835bd76 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Checking GPU scalability

parent 889e3c95
No related branches found
No related tags found
No related merge requests found
......@@ -190,14 +190,15 @@ void gridding_data(){
printf("Processing sector %ld\n",isector);
#endif
start = CPU_TIME_wt;
double *stacking_target_array;
if ( size > 1 )
stacking_target_array = gridss;
else
stacking_target_array = grid;
start = CPU_TIME_wt;
//We have to call different GPUs per MPI task!!! [GL]
wstack(param.num_w_planes,
Nsec,
......
......@@ -234,13 +234,15 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
}
#else
omp_set_default_device(rank % omp_get_num_devices());
#if !defined(__clang__)
#pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
#pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis])
#else
#pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
#pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis])
#endif
for (int iw=0; iw<num_w_planes; iw++)
......
......@@ -5,12 +5,9 @@
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "errcodes.h"
#ifdef __CUDACC__
#include "allvars_nccl.h"
#else
#include "allvars.h"
#endif
#include "proto.h"
......@@ -209,7 +206,7 @@ void wstack(
int rank)
{
uint i;
uint index;
//uint index;
uint visindex;
// initialize the convolution kernel
......@@ -220,13 +217,14 @@ void wstack(
double std22 = 1.0/(2.0*std*std);
double norm = std22/PI;
double * convkernel = (double*)malloc(increaseprecision*w_support*sizeof(*convkernel));
double overSamplingFactor = 1.0;
int withSinc = 0;
double alpha = 8.6;
#ifdef GAUSS
makeGaussKernel(convkernel,w_support,increaseprecision,std22);
#endif
#ifdef KAISERBESSEL
double overSamplingFactor = 1.0;
int withSinc = 0;
double alpha = 8.6;
makeKaiserBesselKernel(convkernel, w_support, increaseprecision, alpha, overSamplingFactor, withSinc);
#endif
......@@ -328,9 +326,9 @@ void wstack(
#endif
#ifdef ACCOMP
omp_set_default_device(rank % omp_get_num_devices());
uint Nvis = num_points*freq_per_chan*polarizations;
// #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan])
// #pragma omp target teams distribute parallel for map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
#pragma omp parallel for private(visindex)
#endif
......@@ -422,6 +420,8 @@ void wstack(
}
}
#pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan],grid[0:2*num_w_planes*grid_size_x*grid_size_y])
// End switch between CUDA and CPU versions
#endif
//for (int i=0; i<100000; i++)printf("%f\n",grid[i]);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment