Skip to content
Snippets Groups Projects
Commit 52ebffe4 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

AMD LUMI updates

parent 4eb0affa
Branches
Tags
No related merge requests found
...@@ -20,8 +20,8 @@ endif ...@@ -20,8 +20,8 @@ endif
LINKER=$(MPICC) LINKER=$(MPICC)
FFTW_MPI_INC = FFTW_MPI_INC = -I/opt/cray/pe/fftw/3.3.10.5/x86_rome/include
FFTW_MPI_LIB = FFTW_MPI_LIB = -L/opt/cray/pe/fftw/3.3.10.5/x86_rome/lib
CFLAGS += -I./ CFLAGS += -I./
...@@ -63,11 +63,11 @@ OPT += -DPHASE_ON ...@@ -63,11 +63,11 @@ OPT += -DPHASE_ON
# SELECT THE GRIDDING KERNEL: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL # SELECT THE GRIDDING KERNEL: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS_HI_PRECISION
#OPT += -DGAUSS #OPT += -DGAUSS
#OPT += -DKAISERBESSEL OPT += -DKAISERBESSEL
# ======================================================== # ========================================================
...@@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION ...@@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION
#OPT += -DCUFFTMP #OPT += -DCUFFTMP
# FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution
OPT += -DFULL_NVIDIA #OPT += -DFULL_NVIDIA
ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT)))
OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP
endif endif
...@@ -109,7 +109,7 @@ endif ...@@ -109,7 +109,7 @@ endif
#OPT += -DRCCL_REDUCE #OPT += -DRCCL_REDUCE
# FULL AMD GPU SUPPORT - Recommended for full AMD GPU code execution # FULL AMD GPU SUPPORT - Recommended for full AMD GPU code execution
#OPT += -DFULL_AMD OPT += -DFULL_AMD
ifeq (FULL_AMD,$(findstring FULL_AMD,$(OPT))) ifeq (FULL_AMD,$(findstring FULL_AMD,$(OPT)))
OPT += -DHIPCC -DRCCL_REDUCE -D__HIP_PLATFORM_AMD__ OPT += -DHIPCC -DRCCL_REDUCE -D__HIP_PLATFORM_AMD__
endif endif
...@@ -384,9 +384,9 @@ ifeq (RCCL_REDUCE,$(findstring RCCL_REDUCE,$(OPT))) ...@@ -384,9 +384,9 @@ ifeq (RCCL_REDUCE,$(findstring RCCL_REDUCE,$(OPT)))
EXEC_EXT := $(EXEC_EXT)_acc-reduce EXEC_EXT := $(EXEC_EXT)_acc-reduce
LINKER=$(MPIC++) LINKER=$(MPIC++)
FLAGS=$(OPTIMIZE_AMD) $(CFLAGS) FLAGS=$(OPTIMIZE_AMD) $(CFLAGS)
LIBS=$(AMDLIB) LIBS=$(AMDLIB_3)
$(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE) $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE)
$(MPIC++) $(FLAGS) $(OPT) -c $^ $(CFLAGS) $(LIBS) $(HIPCC) $(FLAGS) $(OPT) -c $^ $(CFLAGS) $(LIBS)
OBJ += $(OBJ_RCCL_REDUCE) OBJ += $(OBJ_RCCL_REDUCE)
endif endif
......
...@@ -92,7 +92,7 @@ void gridding_data(){ ...@@ -92,7 +92,7 @@ void gridding_data(){
if (rank == 0) ncclGetUniqueId(&id); if (rank == 0) ncclGetUniqueId(&id);
MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD); MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD);
hipSetDevice(local_rank); int h = hipSetDevice(local_rank);
int n = hipMalloc(&grid_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double)); int n = hipMalloc(&grid_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double));
n = hipMalloc(&gridss_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double)); n = hipMalloc(&gridss_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double));
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <stdio.h> #include <stdio.h>
#ifdef __HIPCC__ #ifdef __HIPCC__
#include "allvars_nccl.hip.hpp" #include "allvars_rccl.hip.hpp"
#endif #endif
#include "proto.h" #include "proto.h"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment