Skip to content
Snippets Groups Projects
Commit 404e0296 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Choose whether performing stacking on CPUs or GPUs

parent ac418ec4
No related merge requests found
......@@ -59,9 +59,9 @@ OPT += -DPHASE_ON
#OPT += -DNORMALIZE_UVW
# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
#OPT += -DGAUSS_HI_PRECISION
OPT += -DGAUSS_HI_PRECISION
OPT += -DGAUSS
#OPT += -DGAUSS
#OPT += -DKAISERBESSEL
......@@ -77,7 +77,10 @@ OPT += -DGAUSS
#OPT += -DCUDACC
# use GPU acceleration via OMP
#OPT += -DACCOMP
OPT += -DACCOMP
# perform stacking on GPUs
#OPT += -DGPU_STACKING
# use NVIDIA GPU to perform the reduce
#OPT += -DNCCL_REDUCE
......@@ -135,8 +138,8 @@ OBJ_ACC_CUDA = phase_correction.o w-stacking.o
# ----- define which files will be compiled by NVC with OMP offloading for wither Nvidia or AMD
#
DEPS_ACC_OMP = w-stacking_omp.h phase_correction.c w-stacking_omp.c
OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o
DEPS_ACC_OMP = w-stacking.h phase_correction.c w-stacking.c
OBJ_ACC_OMP = phase_correction.o w-stacking.o
# ----- define what files will be compiled by NVC with OMP offloading when the stacking reduce is
......@@ -174,7 +177,7 @@ endif
ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
CFLAGS += $(FFTW_MPI_INC)
ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
ifeq (HYBRID_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm
else
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm
......
......@@ -325,7 +325,7 @@ void wstack(
omp_set_num_threads(num_threads);
#endif
#ifdef ACCOMP
#if defined(ACCOMP) && (GPU_STACKING)
omp_set_default_device(rank % omp_get_num_devices());
uint Nvis = num_points*freq_per_chan*polarizations;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
......@@ -420,7 +420,7 @@ void wstack(
}
}
#ifdef ACCOMP
#if defined(ACCOMP) && (GPU_STACKING)
#pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#endif
// End switch between CUDA and CPU versions
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment