diff --git a/jacobi/gpu/openacc/Makefile b/jacobi/gpu/openacc/Makefile deleted file mode 100644 index 2a7fd8b407d50f92f26af8f404c312bda712a1fd..0000000000000000000000000000000000000000 --- a/jacobi/gpu/openacc/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -####################################################################### -# Author: David Goz (david.goz@inaf.it) # -# June 2024 # -####################################################################### -# -# To see all the compilation options -# $ make info -####################################################################### - -# make.def defines how the application is compiled -include make.def -####################################################################### - -.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean - -info: - @echo ' ' - @echo '-----------------------------------------------------------------------------------------------' - @echo '$$ make serial ---> compile the serial application ' - @echo '$$ make debug ---> debug the serial application ' - @echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck ' - @echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind ' - @echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind ' - @echo '$$ make clean ---> clean up all ' - @echo '$$ make info ---> get make info ' - @echo '-----------------------------------------------------------------------------------------------' - @echo ' ' - -serial: $(PROG) - -debug: $(PROG_DEBUG) - @echo 'OOOooo... debugging ...oooOOO' - gdb --args ./$< - @echo 'OOOooo... debugging ... oooOOO' - -valgrind_memcheck: $(PROG_MEMCHECK) - @echo 'oooOOO... valgrind_memcheck ...OOOooo' - valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 - @echo 'oooOOO... valgrind_memcheck ...OOOooo' - -valgrind_callgrind: $(PROG_CALLGRIND) - @echo 'oooOOO... valgrind_callgrind ...OOOooo' - valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 - @echo ' ' - @echo 'To generate a function-by-function summary from the profile data file:' - @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' - @echo '(kcachegrind is required in order to visualize the output using the GUI)' - -valgrind_cachegrind: $(PROG_CACHEGRIND) - @echo 'oooOOO... valgrind_cachegrind ...OOOooo' - valgrind --tool=cachegrind --cache-sim=yes --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 - @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' - @echo '(kcachegrind is required in order to visualize the output using the GUI)' - @echo 'oooOOO... valgrind_cachegrind ...OOOooo' - -clean: - rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ - rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) - rm -f valgrind_*.txt - rm -f cachegrind.out.* - rm -f callgrind.* - rm -f *bin - rm -f jacobi_serial_opt_* diff --git a/jacobi/gpu/openacc/make.def b/jacobi/gpu/openacc/make.def deleted file mode 100644 index d434ce25f2b9217543c5b55bf5f1bf03bef58a1c..0000000000000000000000000000000000000000 --- a/jacobi/gpu/openacc/make.def +++ /dev/null @@ -1,45 +0,0 @@ -CC = gcc -CFLAGS = -Wall -Wextra -march=native -LIBS = -lm - -SYSTYPE = $(strip $(shell uname -n)) - -PROG = jacobi_serial_opt_$(SYSTYPE) -PROG_DEBUG = $(PROG)_DEBUG -PROG_MEMCHECK = $(PROG)_MEMCHECK -PROG_CALLGRIND = $(PROG)_CALLGRIND -PROG_CACHEGRIND = $(PROG)_CACHEGRIND - -HEADERS = $(wildcard ./include/*.h) -SOURCES = $(wildcard ./src/*.c) -DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile - -$(PROG): $(DEPENDENCIES) - $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) - @echo ' ' - @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' - @echo ' ' - -$(PROG_DEBUG): $(DEPENDENCIES) - $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) - @echo ' ' - @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' - @echo ' ' - -$(PROG_MEMCHECK): $(DEPENDENCIES) - $(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS) - @echo ' ' - @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' - @echo ' ' - -$(PROG_CALLGRIND): $(DEPENDENCIES) - $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) - @echo ' ' - @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' - @echo ' ' - -$(PROG_CACHEGRIND): $(DEPENDENCIES) - $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) - @echo ' ' - @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' - @echo ' ' diff --git a/jacobi_solutions/gpu/openacc/Makefile b/jacobi_solutions/gpu/openacc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f1b8f73a0a66f0bb3eec55944665c7e5c0319436 --- /dev/null +++ b/jacobi_solutions/gpu/openacc/Makefile @@ -0,0 +1,34 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info openacc serial clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------------' + @echo '$$ make OpenACC ---> compile the OpenACC application ' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------------' + @echo ' ' + +openacc: $(PROG) + +serial: $(PROG_SERIAL) + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_SERIAL) + rm -f *bin + rm -f jacobi_OpeACC* + rm -rf jacobi_serial* diff --git a/jacobi/gpu/openacc/include/allvars.h b/jacobi_solutions/gpu/openacc/include/allvars.h similarity index 100% rename from jacobi/gpu/openacc/include/allvars.h rename to jacobi_solutions/gpu/openacc/include/allvars.h diff --git a/jacobi/gpu/openacc/include/tools.h b/jacobi_solutions/gpu/openacc/include/tools.h similarity index 100% rename from jacobi/gpu/openacc/include/tools.h rename to jacobi_solutions/gpu/openacc/include/tools.h diff --git a/jacobi_solutions/gpu/openacc/make.def b/jacobi_solutions/gpu/openacc/make.def new file mode 100644 index 0000000000000000000000000000000000000000..67a0712729e8d28ca0df6530f95725a30ead4533 --- /dev/null +++ b/jacobi_solutions/gpu/openacc/make.def @@ -0,0 +1,26 @@ +CC = gcc +CC_OPENACC = pgcc +CFLAGS_OPENACC = -acc -fast -Minfo=accel -ta=tesla:cc80 -ta=tesla:maxregcount:32 -ta=tesla:ptxinfo -g +CFLAGS = -Wall -Wextra -march=native -mtune=native +LIBS = -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_OpenACC_$(SYSTYPE) +PROG_SERIAL = jacobi_serial_$(SYSTYPE) + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC_OPENACC) $(CFLAGS_OPENACC) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_SERIAL): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_SERIAL) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi_solutions/gpu/openacc/script/leonardo.sbatch b/jacobi_solutions/gpu/openacc/script/leonardo.sbatch new file mode 100644 index 0000000000000000000000000000000000000000..92623408dbd9e7f1c926977037a8dda73aca2a8a --- /dev/null +++ b/jacobi_solutions/gpu/openacc/script/leonardo.sbatch @@ -0,0 +1,59 @@ +#!/bin/bash + +#SBATCH --job-name=Jacobi-OpenACC +#SBATCH --account=IscrC_SCGPCT +#SBATCH --partition=boost_usr_prod +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=1 +#SBATCH --gres=gpu:1 +#SBATCH --output=Jacobi-OpenACC-%j.out +#SBATCH --error=Jacobi-OpenACC-%j.err +#SBATCH --time=0:10:00 + +# module purge +# module load scorep/8.1--openmpi--4.1.4--nvhpc--23.1-cuda-11.8 +# module load openmpi/4.1.6--nvhpc--23.11 nvhpc/23.11 +module load nvhpc/23.11 + +SCOREP= #"scorep --user" +CC = "nvcc" +OPT = +#OPT="-g -mp=gpu -gpu=ccnative,debug,lineinfo -target=gpu -Minfo=all -v" +#LIB="-L/leonardo/prod/opt/libraries/openmpi/4.1.6/nvhpc--24.3/lib/ -lmpi -lm" +LIB = "-lm" + +COMPUTE_SANITIZER="compute-sanitizer --generate-coredump " + +### script path +SCRIPT_PATH=/leonardo/home/userexternal/dgoz0000/test/openmp-gpu/script + +################## source list (C CPP) +# on Leonardo compile the CPP code +SOURCE_CODE=( CPP ) + +################################################################################# +################################################################################# +if [ ! -d ${SCRIPT_PATH} ] +then + printf "\n\t SCRIPT_PATH invalid... aborting..." + exit 0 +fi + +printf "\n\t SLURM:" +printf "\n\t\t NODES : ${SLURM_NNODES} (${SLURM_NODELIST})" +printf "\n\t\t NTASKS : ${SLURM_NTASKS}" +printf "\n\t\t CPU-PER-TASK: ${SLURM_CPUS_PER_TASK}" +printf "\n\t\t GPUs : ${SLURM_JOB_GPUS}" +printf "\n\n" + +# working directory +WORKDIR=${PWD} + +# compile the sources +source ${SCRIPT_PATH}/compile_OpenMP-GPU.sh + +# run the executable getting the profile using score-p +source ${SCRIPT_PATH}/run_OpenMP-GPU.sh + +printf "\n\t End of game!!! \n" diff --git a/jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c b/jacobi_solutions/gpu/openacc/src/jacobi_2D_OpenACC.c similarity index 98% rename from jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c rename to jacobi_solutions/gpu/openacc/src/jacobi_2D_OpenACC.c index 43541125f8a47aa8d741d3d9f9a45a6888b2d92d..dc1ccc1f9ab7ed47e4e6fff9babc4edcfa2bf74a 100644 --- a/jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c +++ b/jacobi_solutions/gpu/openacc/src/jacobi_2D_OpenACC.c @@ -214,7 +214,7 @@ void JacobiAlgorithm(MyData **const restrict Phi, { double gpu_err=0.0; -#pragma acc parallel loop reduction(+: err) +#pragma acc parallel loop reduction(+: gpu_err) for (int j=jbeg ; j<=jend ; j++) { for (int i=ibeg ; i<=iend ; i++) @@ -226,7 +226,7 @@ void JacobiAlgorithm(MyData **const restrict Phi, /* avoid fabs from math library */ const MyData diff = (Phi[j][i] - Phi0[j][i]); - *error += ((diff > 0) ? diff : -diff); + gpu_err += ((diff > 0) ? diff : -diff); } /* loop over columns */ } /* loop over rows */ diff --git a/jacobi/gpu/openacc/src/tools.c b/jacobi_solutions/gpu/openacc/src/tools.c similarity index 96% rename from jacobi/gpu/openacc/src/tools.c rename to jacobi_solutions/gpu/openacc/src/tools.c index 5e1f75a00f79bb5f8d32a94334a4f36e21730be9..22d6c582bfd361a79f45124de43bd474bb2ced46 100644 --- a/jacobi/gpu/openacc/src/tools.c +++ b/jacobi_solutions/gpu/openacc/src/tools.c @@ -16,7 +16,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny) * nx rows and ny columns *********************************************************************** */ { - MyData **buf = malloc(nx * sizeof(MyData *)); + MyData **buf = (MyData **)malloc(nx * sizeof(MyData *)); assert(buf != NULL); buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));