Skip to content
Snippets Groups Projects
Commit ccd06a9a authored by David Goz's avatar David Goz
Browse files

jacobi_solution/gpu/openacc added

parent e0a23993
No related branches found
No related tags found
No related merge requests found
CC = gcc
CFLAGS = -Wall -Wextra -march=native
LIBS = -lm
SYSTYPE = $(strip $(shell uname -n))
PROG = jacobi_serial_opt_$(SYSTYPE)
PROG_DEBUG = $(PROG)_DEBUG
PROG_MEMCHECK = $(PROG)_MEMCHECK
PROG_CALLGRIND = $(PROG)_CALLGRIND
PROG_CACHEGRIND = $(PROG)_CACHEGRIND
HEADERS = $(wildcard ./include/*.h)
SOURCES = $(wildcard ./src/*.c)
DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile
$(PROG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_DEBUG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_MEMCHECK): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CALLGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CACHEGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
...@@ -11,53 +11,24 @@ ...@@ -11,53 +11,24 @@
include make.def include make.def
####################################################################### #######################################################################
.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean .PHONY: info openacc serial clean
info: info:
@echo ' ' @echo ' '
@echo '-----------------------------------------------------------------------------------------------' @echo '-----------------------------------------------------------------------------------------------'
@echo '$$ make OpenACC ---> compile the OpenACC application '
@echo '$$ make serial ---> compile the serial application ' @echo '$$ make serial ---> compile the serial application '
@echo '$$ make debug ---> debug the serial application '
@echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck '
@echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind '
@echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind '
@echo '$$ make clean ---> clean up all '
@echo '$$ make info ---> get make info ' @echo '$$ make info ---> get make info '
@echo '-----------------------------------------------------------------------------------------------' @echo '-----------------------------------------------------------------------------------------------'
@echo ' ' @echo ' '
serial: $(PROG) openacc: $(PROG)
debug: $(PROG_DEBUG) serial: $(PROG_SERIAL)
@echo 'OOOooo... debugging ...oooOOO'
gdb --args ./$<
@echo 'OOOooo... debugging ... oooOOO'
valgrind_memcheck: $(PROG_MEMCHECK)
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
valgrind_callgrind: $(PROG_CALLGRIND)
@echo 'oooOOO... valgrind_callgrind ...OOOooo'
valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
@echo ' '
@echo 'To generate a function-by-function summary from the profile data file:'
@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
valgrind_cachegrind: $(PROG_CACHEGRIND)
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
valgrind --tool=cachegrind --cache-sim=yes --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
clean: clean:
rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) rm -f $(PROG) $(PROG_SERIAL)
rm -f valgrind_*.txt
rm -f cachegrind.out.*
rm -f callgrind.*
rm -f *bin rm -f *bin
rm -f jacobi_serial_opt_* rm -f jacobi_OpeACC*
rm -rf jacobi_serial*
CC = gcc
CC_OPENACC = pgcc
CFLAGS_OPENACC = -acc -fast -Minfo=accel -ta=tesla:cc80 -ta=tesla:maxregcount:32 -ta=tesla:ptxinfo -g
CFLAGS = -Wall -Wextra -march=native -mtune=native
LIBS = -lm
SYSTYPE = $(strip $(shell uname -n))
PROG = jacobi_OpenACC_$(SYSTYPE)
PROG_SERIAL = jacobi_serial_$(SYSTYPE)
HEADERS = $(wildcard ./include/*.h)
SOURCES = $(wildcard ./src/*.c)
DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile
$(PROG): $(DEPENDENCIES)
$(CC_OPENACC) $(CFLAGS_OPENACC) -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_SERIAL): $(DEPENDENCIES)
$(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_SERIAL) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
#!/bin/bash
#SBATCH --job-name=Jacobi-OpenACC
#SBATCH --account=IscrC_SCGPCT
#SBATCH --partition=boost_usr_prod
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:1
#SBATCH --output=Jacobi-OpenACC-%j.out
#SBATCH --error=Jacobi-OpenACC-%j.err
#SBATCH --time=0:10:00
# module purge
# module load scorep/8.1--openmpi--4.1.4--nvhpc--23.1-cuda-11.8
# module load openmpi/4.1.6--nvhpc--23.11 nvhpc/23.11
module load nvhpc/23.11
SCOREP= #"scorep --user"
CC = "nvcc"
OPT =
#OPT="-g -mp=gpu -gpu=ccnative,debug,lineinfo -target=gpu -Minfo=all -v"
#LIB="-L/leonardo/prod/opt/libraries/openmpi/4.1.6/nvhpc--24.3/lib/ -lmpi -lm"
LIB = "-lm"
COMPUTE_SANITIZER="compute-sanitizer --generate-coredump "
### script path
SCRIPT_PATH=/leonardo/home/userexternal/dgoz0000/test/openmp-gpu/script
################## source list (C CPP)
# on Leonardo compile the CPP code
SOURCE_CODE=( CPP )
#################################################################################
#################################################################################
if [ ! -d ${SCRIPT_PATH} ]
then
printf "\n\t SCRIPT_PATH invalid... aborting..."
exit 0
fi
printf "\n\t SLURM:"
printf "\n\t\t NODES : ${SLURM_NNODES} (${SLURM_NODELIST})"
printf "\n\t\t NTASKS : ${SLURM_NTASKS}"
printf "\n\t\t CPU-PER-TASK: ${SLURM_CPUS_PER_TASK}"
printf "\n\t\t GPUs : ${SLURM_JOB_GPUS}"
printf "\n\n"
# working directory
WORKDIR=${PWD}
# compile the sources
source ${SCRIPT_PATH}/compile_OpenMP-GPU.sh
# run the executable getting the profile using score-p
source ${SCRIPT_PATH}/run_OpenMP-GPU.sh
printf "\n\t End of game!!! \n"
...@@ -214,7 +214,7 @@ void JacobiAlgorithm(MyData **const restrict Phi, ...@@ -214,7 +214,7 @@ void JacobiAlgorithm(MyData **const restrict Phi,
{ {
double gpu_err=0.0; double gpu_err=0.0;
#pragma acc parallel loop reduction(+: err) #pragma acc parallel loop reduction(+: gpu_err)
for (int j=jbeg ; j<=jend ; j++) for (int j=jbeg ; j<=jend ; j++)
{ {
for (int i=ibeg ; i<=iend ; i++) for (int i=ibeg ; i<=iend ; i++)
...@@ -226,7 +226,7 @@ void JacobiAlgorithm(MyData **const restrict Phi, ...@@ -226,7 +226,7 @@ void JacobiAlgorithm(MyData **const restrict Phi,
/* avoid fabs from math library */ /* avoid fabs from math library */
const MyData diff = (Phi[j][i] - Phi0[j][i]); const MyData diff = (Phi[j][i] - Phi0[j][i]);
*error += ((diff > 0) ? diff : -diff); gpu_err += ((diff > 0) ? diff : -diff);
} /* loop over columns */ } /* loop over columns */
} /* loop over rows */ } /* loop over rows */
......
...@@ -16,7 +16,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny) ...@@ -16,7 +16,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny)
* nx rows and ny columns * nx rows and ny columns
*********************************************************************** */ *********************************************************************** */
{ {
MyData **buf = malloc(nx * sizeof(MyData *)); MyData **buf = (MyData **)malloc(nx * sizeof(MyData *));
assert(buf != NULL); assert(buf != NULL);
buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment