diff --git a/Makefile b/Makefile index 6c927cf832feaf1f51aeb578f146f4b1d7e73b80..f8ed68f892a75d61fc1863dedd609ab2f8d9ddcc 100755 --- a/Makefile +++ b/Makefile @@ -20,8 +20,8 @@ endif LINKER=$(MPICC) -FFTW_MPI_INC = -FFTW_MPI_LIB = +FFTW_MPI_INC = -I/opt/cray/pe/fftw/3.3.10.5/x86_rome/include +FFTW_MPI_LIB = -L/opt/cray/pe/fftw/3.3.10.5/x86_rome/lib CFLAGS += -I./ @@ -63,11 +63,11 @@ OPT += -DPHASE_ON # SELECT THE GRIDDING KERNEL: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL -OPT += -DGAUSS_HI_PRECISION +#OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS -#OPT += -DKAISERBESSEL +OPT += -DKAISERBESSEL # ======================================================== @@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION #OPT += -DCUFFTMP # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution -OPT += -DFULL_NVIDIA +#OPT += -DFULL_NVIDIA ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP endif @@ -109,7 +109,7 @@ endif #OPT += -DRCCL_REDUCE # FULL AMD GPU SUPPORT - Recommended for full AMD GPU code execution -#OPT += -DFULL_AMD +OPT += -DFULL_AMD ifeq (FULL_AMD,$(findstring FULL_AMD,$(OPT))) OPT += -DHIPCC -DRCCL_REDUCE -D__HIP_PLATFORM_AMD__ endif @@ -384,9 +384,9 @@ ifeq (RCCL_REDUCE,$(findstring RCCL_REDUCE,$(OPT))) EXEC_EXT := $(EXEC_EXT)_acc-reduce LINKER=$(MPIC++) FLAGS=$(OPTIMIZE_AMD) $(CFLAGS) -LIBS=$(AMDLIB) +LIBS=$(AMDLIB_3) $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE) - $(MPIC++) $(FLAGS) $(OPT) -c $^ $(CFLAGS) $(LIBS) + $(HIPCC) $(FLAGS) $(OPT) -c $^ $(CFLAGS) $(LIBS) OBJ += $(OBJ_RCCL_REDUCE) endif diff --git a/gridding_rccl.hip.cpp b/gridding_rccl.hip.cpp index dba42eeec2580d387322856e21392998c25d7791..e54fa4d6d0a28f83d4c42716d50036466541eedb 100755 --- a/gridding_rccl.hip.cpp +++ b/gridding_rccl.hip.cpp @@ -92,7 +92,7 @@ void gridding_data(){ if (rank == 0) ncclGetUniqueId(&id); MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD); - hipSetDevice(local_rank); + int h = hipSetDevice(local_rank); int n = hipMalloc(&grid_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double)); n = hipMalloc(&gridss_gpu, 2*param.num_w_planes*xaxis*yaxis * sizeof(double)); diff --git a/w-stacking.hip.cpp b/w-stacking.hip.cpp index 37db97cd94eaa78afc9133602cd93f7f287fd326..5c376f3c7d8a6566d84593bbeafe7dd0162edfe9 100755 --- a/w-stacking.hip.cpp +++ b/w-stacking.hip.cpp @@ -7,7 +7,7 @@ #include #ifdef __HIPCC__ -#include "allvars_nccl.hip.hpp" +#include "allvars_rccl.hip.hpp" #endif #include "proto.h"