diff --git a/jacobi/gpu/openacc/Makefile b/jacobi/gpu/openacc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2a7fd8b407d50f92f26af8f404c312bda712a1fd --- /dev/null +++ b/jacobi/gpu/openacc/Makefile @@ -0,0 +1,63 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------------' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make debug ---> debug the serial application ' + @echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------------' + @echo ' ' + +serial: $(PROG) + +debug: $(PROG_DEBUG) + @echo 'OOOooo... debugging ...oooOOO' + gdb --args ./$< + @echo 'OOOooo... debugging ... oooOOO' + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + valgrind --tool=cachegrind --cache-sim=yes --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_serial_opt_* diff --git a/jacobi/gpu/openacc/include/allvars.h b/jacobi/gpu/openacc/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..468de4a19bebf2c0d5c59cd24ad3aab94b8df0f6 --- /dev/null +++ b/jacobi/gpu/openacc/include/allvars.h @@ -0,0 +1,9 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +typedef double MyData; diff --git a/jacobi/gpu/openacc/include/tools.h b/jacobi/gpu/openacc/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/gpu/openacc/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/gpu/openacc/make.def b/jacobi/gpu/openacc/make.def new file mode 100644 index 0000000000000000000000000000000000000000..d434ce25f2b9217543c5b55bf5f1bf03bef58a1c --- /dev/null +++ b/jacobi/gpu/openacc/make.def @@ -0,0 +1,45 @@ +CC = gcc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_serial_opt_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c b/jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c new file mode 100644 index 0000000000000000000000000000000000000000..43541125f8a47aa8d741d3d9f9a45a6888b2d92d --- /dev/null +++ b/jacobi/gpu/openacc/src/jacobi_2D_OpenACC.c @@ -0,0 +1,269 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +int main(int argc, char **argv) +{ + if (argc <= 2) + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> \n\n"); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = (int) strtol(argv[2], NULL, 10); + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + phi0[j][i] = 0.0; + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = seconds(); + + /* -- 4a. Set boundary conditions first -- */ + + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + + MyData err = 1.0; + /* iterations */ + int k = 0; + + /* - allocate phi0 and phi on GPU */ + /* - copy phi0 from host to gpu */ + #pragma acc data copy(phi0[0:nx_tot][0:ny_tot]), create(phi[0:nx_tot][0:ny_tot]) + while (1) + { + /* -- 4b. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, jbeg, jend); + + printf ("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* check convergence */ + if (err <= TOL) + break; + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", seconds() - time_start); + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + double gpu_err=0.0; + +#pragma acc parallel loop reduction(+: err) + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j ][i-1] + + Phi0[j ][i+1] + + Phi0[j-1][i ] + + Phi0[j+1][i ]); + + /* avoid fabs from math library */ + const MyData diff = (Phi[j][i] - Phi0[j][i]); + *error += ((diff > 0) ? diff : -diff); + } /* loop over columns */ + } /* loop over rows */ + + gpu_err *= (delta[X] * delta[Y]); + + *error = gpu_err; + + return; +} + +/* ********************************************************************* */ + +/* ********************************************************************* */ +void WriteSolution (MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_serial_opt_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/gpu/openacc/src/tools.c b/jacobi/gpu/openacc/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..5e1f75a00f79bb5f8d32a94334a4f36e21730be9 --- /dev/null +++ b/jacobi/gpu/openacc/src/tools.c @@ -0,0 +1,58 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timespec ts; + return (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), + (double)ts.tv_sec + + (double)ts.tv_nsec * 1e-9); +} diff --git a/jacobi_solutions/openmp/miscellaneous/custom_parallel_loop.c b/jacobi_solutions/openmp/miscellaneous/custom_parallel_loop.c new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391