diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c18d596b8fd9d43974ccc340974174d7b0d22cbc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# put here the files which are not going to be committed +*.txt +*~ diff --git a/jacobi/mpi/SendRecv/Makefile b/jacobi/mpi/SendRecv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d38d38eabe9aa2bade6928c4395d23debcbd4e0 --- /dev/null +++ b/jacobi/mpi/SendRecv/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/SendRecv/include/allvars.h b/jacobi/mpi/SendRecv/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/SendRecv/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/SendRecv/include/tools.h b/jacobi/mpi/SendRecv/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/SendRecv/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/SendRecv/make.def b/jacobi/mpi/SendRecv/make.def new file mode 100644 index 0000000000000000000000000000000000000000..21f903d0773e74a9bb7cac39f1e98e4687617678 --- /dev/null +++ b/jacobi/mpi/SendRecv/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_SendRecv_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/SendRecv/make_mpi_path b/jacobi/mpi/SendRecv/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/SendRecv/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c new file mode 100644 index 0000000000000000000000000000000000000000..9c57bafb03ac4b795d242921ea13cb62fa36c956 --- /dev/null +++ b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c @@ -0,0 +1,505 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_sendrecv_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_sendrecv_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_sendrecv_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank */ + /* - nbrtop is the MPI process with rank + 1 */ + /* - nbrbottom is the MPI process with rank - 1 */ + + MPI_Sendrecv(&buffer[end][1], n, MPI_MyDatatype, nbrtop, 0, + &buffer[start-1][1], n, MPI_MyDatatype, nbrbottom, 0, + comm1d, MPI_STATUS_IGNORE); + + MPI_Sendrecv(&buffer[start][1], n, MPI_MyDatatype, nbrbottom, 1, + &buffer[end+1][1], n, MPI_MyDatatype, nbrtop, 1, + comm1d, MPI_STATUS_IGNORE); + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_mpi_sendrecv_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/SendRecv/src/tools.c b/jacobi/mpi/SendRecv/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/SendRecv/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_blocking/Makefile b/jacobi/mpi/Send_Recv_blocking/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d38d38eabe9aa2bade6928c4395d23debcbd4e0 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/Send_Recv_blocking/include/allvars.h b/jacobi/mpi/Send_Recv_blocking/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_blocking/include/tools.h b/jacobi/mpi/Send_Recv_blocking/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/Send_Recv_blocking/make.def b/jacobi/mpi/Send_Recv_blocking/make.def new file mode 100644 index 0000000000000000000000000000000000000000..27810e9c27a0cb14d1f42186b83e983f3beea667 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_Send_Recv_blocking_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_blocking/make_mpi_path b/jacobi/mpi/Send_Recv_blocking/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c new file mode 100644 index 0000000000000000000000000000000000000000..936e6636aacf3925d610c11012c1122c13a476c4 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c @@ -0,0 +1,515 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank + - nbrtop is the MPI process with rank + 1 + - nbrbottom is the MPI process with rank - 1 */ + + /***************** First communication stage *******************/ + /* Perform a blocking send to the top (rank+1) process */ + MPI_Send(&buffer[end][1], n, MPI_MyDatatype, nbrtop, 0, comm1d); + + /* Perform a blocking receive from the bottom (rank-1) process */ + MPI_Recv(&buffer[start-1][1], n, MPI_MyDatatype, + nbrbottom, 0, comm1d, MPI_STATUS_IGNORE); + /***************************************************************/ + + /**************** Second communication stage *******************/ + /* - Perform a blocking send to the bottom (rank-1) process */ + MPI_Send(&buffer[start][1], n, MPI_MyDatatype, nbrbottom, 1, comm1d); + + /* Perform a blocking receive from the top (rank+1) process */ + MPI_Recv(&buffer[end+1][1], n, MPI_MyDatatype, + nbrtop, 1, comm1d, MPI_STATUS_IGNORE); + /***************************************************************/ + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_mpi_send_recv_blocking_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_blocking/src/tools.c b/jacobi/mpi/Send_Recv_blocking/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_nonblocking/Makefile b/jacobi/mpi/Send_Recv_nonblocking/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d38d38eabe9aa2bade6928c4395d23debcbd4e0 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/tools.h b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/Send_Recv_nonblocking/make.def b/jacobi/mpi/Send_Recv_nonblocking/make.def new file mode 100644 index 0000000000000000000000000000000000000000..949ebfc52229ca2c73fa1cf642bc1b5219f8dd3b --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_Send_Recv_nonblocking_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c new file mode 100644 index 0000000000000000000000000000000000000000..8add90d0298826b835d88f10ff2c55518fa37960 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c @@ -0,0 +1,522 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_nonblocking_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_nonblocking_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_nonblocking_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank + - nbrtop is the MPI process with rank + 1 + - nbrbottom is the MPI process with rank - 1 */ + + /* communication request (handle) */ + MPI_Request request[4]; + + /************************* Process is ready to receive data ****************************/ + /* Perform a nonblocking receive from the bottom (rank-1) process */ + MPI_Irecv(&buffer[start-1][1], n, MPI_MyDatatype, nbrbottom, 0, comm1d, &request[0]); + + /* Perform a nonblocking receive from the top (rank+1) process */ + MPI_Irecv(&buffer[end+1][1], n, MPI_MyDatatype, nbrtop, 1, comm1d, &request[1]); + /***************************************************************************************/ + + /*************** Process begins to send data *******************************************/ + /* Perform a nonblocking send to the top (rank+1) process */ + MPI_Isend(&buffer[end][1], n, MPI_MyDatatype, nbrtop, 0, comm1d, &request[2]); + + /* - Perform a nonblocking send to the bottom (rank-1) process */ + MPI_Isend(&buffer[start][1], n, MPI_MyDatatype, nbrbottom, 1, comm1d, &request[3]); + /***************************************************************************************/ + + /* some useful work (computation) could be performed (while the buffer containing the */ + /* message has not to be modified) */ + + /******************** Waits for all given MPI Requests to complete *********************/ + MPI_Waitall(4, request, MPI_STATUSES_IGNORE); + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_send_recv_nonblocking_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/tools.c b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_paired/Makefile b/jacobi/mpi/Send_Recv_paired/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d38d38eabe9aa2bade6928c4395d23debcbd4e0 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/Send_Recv_paired/include/allvars.h b/jacobi/mpi/Send_Recv_paired/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_paired/include/tools.h b/jacobi/mpi/Send_Recv_paired/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/Send_Recv_paired/make.def b/jacobi/mpi/Send_Recv_paired/make.def new file mode 100644 index 0000000000000000000000000000000000000000..3a66a18bfeab4440599ee89539e658d9a376dbf6 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_Send_Recv_paired_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_paired/make_mpi_path b/jacobi/mpi/Send_Recv_paired/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c new file mode 100644 index 0000000000000000000000000000000000000000..c78c890e39e8738b54a6d54025b607eefd24b3f5 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c @@ -0,0 +1,537 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_paired_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_paired_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_paired_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank */ + /* - nbrtop is the MPI process with rank + 1 */ + /* - nbrbottom is the MPI process with rank - 1 */ + + int rank; + MPI_Comm_rank(comm1d, &rank); + + if ((rank % 2) == 0) /* even rank */ + { + /***************** First communication stage ************************************************/ + /* Perform a blocking send to the top (rank+1) process */ + MPI_Send(&buffer[end][1], n, MPI_MyDatatype, nbrtop, 0, comm1d); + + /* Perform a blocking receive from the bottom (rank-1) process */ + MPI_Recv(&buffer[start-1][1], n, MPI_MyDatatype, nbrbottom, 0, comm1d, MPI_STATUS_IGNORE); + /********************************************************************************************/ + + /**************** Second communication stage ************************************************/ + /* - Perform a blocking send to the bottom (rank-1) process */ + MPI_Send(&buffer[start][1], n, MPI_MyDatatype, nbrbottom, 1, comm1d); + + /* Perform a blocking receive from the top (rank+1) process */ + MPI_Recv(&buffer[end+1][1], n, MPI_MyDatatype, nbrtop, 1, comm1d, MPI_STATUS_IGNORE); + /********************************************************************************************/ + } + else /* odd rank */ + { + /***************** First communication stage ************************************************/ + /* Perform a blocking receive from the bottom (rank-1) process */ + MPI_Recv(&buffer[start-1][1], n, MPI_MyDatatype, nbrbottom, 0, comm1d, MPI_STATUS_IGNORE); + + /* Perform a blocking send to the top (rank+1) process */ + MPI_Send(&buffer[end][1], n, MPI_MyDatatype, nbrtop, 0, comm1d); + /********************************************************************************************/ + + /**************** Second communication stage ************************************************/ + /* Perform a blocking receive from the top (rank+1) process */ + MPI_Recv(&buffer[end+1][1], n, MPI_MyDatatype, nbrtop, 1, comm1d, MPI_STATUS_IGNORE); + + /* - Perform a blocking send to the bottom (rank-1) process */ + MPI_Send(&buffer[start][1], n, MPI_MyDatatype, nbrbottom, 1, comm1d); + /********************************************************************************************/ + } + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) + { + free(boundaries); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_mpi_send_recv_paired_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_paired/src/tools.c b/jacobi/mpi/Send_Recv_paired/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/cartesian/Makefile b/jacobi/mpi/cartesian/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..02695b48106ca217759865cd3708f7ffe738fb1c --- /dev/null +++ b/jacobi/mpi/cartesian/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/cartesian/include/allvars.h b/jacobi/mpi/cartesian/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/cartesian/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/cartesian/include/tools.h b/jacobi/mpi/cartesian/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/cartesian/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/cartesian/make.def b/jacobi/mpi/cartesian/make.def new file mode 100644 index 0000000000000000000000000000000000000000..9b011868551a84d967d65b3d5b9ab4cab794e61e --- /dev/null +++ b/jacobi/mpi/cartesian/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_cartesian_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/cartesian/make_mpi_path b/jacobi/mpi/cartesian/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/cartesian/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c b/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c new file mode 100644 index 0000000000000000000000000000000000000000..826c7188482bbfe40983df8f4098cfdfa9297d35 --- /dev/null +++ b/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c @@ -0,0 +1,626 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +/* #define DEBUG */ + +typedef struct MyGrid +{ + int start[NDIM]; + int end[NDIM]; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const MPI_Comm comm); + +void mpi_exchange_cartesian(MyData **const buffer, + const int nbrtop, + const int nbrbottom, + const int nbrleft, + const int nbrright, + myDomain *const domain, + const int grid_y, + const MPI_Comm comm2d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 2) && (!rank)) + { + printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /********************************** Cartesin topology *******************************************************/ + const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10); + const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!rank) + { + printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n"); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y}; + const int periods[NDIM] = {0 ,0}; /* not periodic */ + const int reorder = 0; /* setting the logical value reorder to TRUE allows MPI to */ + /* reorder the rank of the processes figuring out the */ + /* neighbors in the actual hardware for better performance */ + + /* make a new communicator to which topology information has been attached */ + MPI_Comm comm2d = MPI_COMM_NULL; + MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d); + if (comm2d == MPI_COMM_NULL) + { + printf("\n\t Process %d is not part of the new communicator \n", rank); + fflush(stdout); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* determine the process coords in cartesian topology given rank in group */ + int coords[NDIM]; + MPI_Cart_coords(comm2d, rank, NDIM, coords); + + /* get bottom and top neighbors (X direction) */ + int nbrtop, nbrbottom; + MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop); + + /* get left and right neighbors (Y direction) */ + int nbrright, nbrleft; + MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright); + + /************************************************************************************************************/ + /* 2D MPI-cartesian decomposition: + the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem[NDIM] = {(NX_GLOB + 2) % dims[X], + (NY_GLOB + 2) % dims[Y]}; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X], + (NY_GLOB + 2 - rem[Y]) / dims[Y]}; + if ((chunk[X] < 1) || (chunk[Y] < 1)) + { + printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Comm_free(&comm2d); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the subgrid dimension along X and Y directions: */ + int incr[NDIM], offset[NDIM]; + for (int dim=0 ; dim<NDIM ; dim++) + { + if (coords[dim] < rem[dim]) + { + incr[dim] = chunk[dim] + 1; + offset[dim] = 0; + } + else + { + incr[dim] = chunk[dim]; + offset[dim] = rem[dim]; + } + } + + /* subdomain managed by the task */ + myDomain domain; + for (int dim=0 ; dim<NDIM ; dim++) + { + domain.start[dim] = ((coords[dim] * incr[dim]) + offset[dim]); + domain.end[dim] = (domain.start[dim] + incr[dim]) - 1; + + /* boundaries */ + domain.start[dim] = ((domain.start[dim] == 0) ? NGHOST : domain.start[dim]); + domain.end[dim] = ((domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : domain.end[dim]); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t coords = [%d, %d]", coords[X], coords[Y]); + printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", domain.start[X], domain.end[X]); + printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", domain.start[Y], domain.end[Y]); + printf("\n\t\t nbrtop : %d - nbrbottom: %d", nbrtop, nbrbottom); + printf("\n\t\t nbrright: %d - nbrleft : %d \n", nbrright, nbrleft); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + domain.start[Y], domain.end[Y], + domain.start[X], domain.end[X]); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, comm2d); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_cartesian(phi, + nbrtop, nbrbottom, nbrleft, nbrright, + &domain, NY_GLOB, + comm2d); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Comm_free(&comm2d); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_cartesian(MyData **const buffer, + const int nbrtop, + const int nbrbottom, + const int nbrleft, + const int nbrright, + myDomain *const domain, + const int grid_y, + const MPI_Comm comm2d) +{ + /* In the data exchange 1D routines seen so far, buffers are contiguous areas in memory, + so that, for example, the datatypes MPI_DOUBLE_PRECISION coupled with the count of + occurrences are sufficient to describe the buffer to be sent. + MPI derived datatypes allow the programmer to specify non-contiguous areas in memory, + such as a column of an array stored in a row-major order like in C programming language */ + + /* Creates a vector (strided) datatype: */ + /* - 1st argument (count) : number of blocks; */ + /* - 2nd argument (blocklength): number of elements in each block; */ + /* - 3rd argument (stride) : number of elements between the start of each block; */ + /* - 4th argument (oldtype) : old datatype; */ + /* - 5th argument (newtype) : new datatype */ + + /* count = (x_end - x_start + 1), i.e. the size of each column */ + /* blocklength = 1, i.e. one double (old datatype) in each block */ + /* stride = NY_GLOB + 2, i.e. ghost points are discarded (grid is replicated) */ + /* oldtype = MPI_MyDatatype */ + + MPI_Datatype column; + MPI_Type_vector((domain->end[X] - domain->start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column); + /* commits the datatype */ + MPI_Type_commit(&column); + + /* communication with nbrtop and nbrbottom */ + const int data_row_size = (domain->end[Y] - domain->start[Y] + 1); + MPI_Sendrecv(&buffer[domain->end[X]][domain->start[Y]] , data_row_size, MPI_MyDatatype, nbrtop, 0, + &buffer[domain->start[X]-1][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrbottom, 0, + comm2d, MPI_STATUS_IGNORE); + + MPI_Sendrecv(&buffer[domain->start[X]][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrbottom, 1, + &buffer[domain->end[X]+1][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrtop, 1, + comm2d, MPI_STATUS_IGNORE); + + /* communication with nbrleft, nbrright - vector datatype is used */ + MPI_Sendrecv(&buffer[domain->start[X]][domain->end[Y]], 1, column, nbrright, 0, + &buffer[domain->start[X]][domain->start[Y]-1], 1, column, nbrleft, 0, + comm2d, MPI_STATUS_IGNORE); + + MPI_Sendrecv(&buffer[domain->start[X]][domain->start[Y]], 1, column, nbrleft, 1, + &buffer[domain->start[X]][domain->end[Y]+1], 1, column, nbrright, 1, + comm2d, MPI_STATUS_IGNORE); + + /* when the datatype is no longer needed, it should be freed, */ + /* i.e. datatype is set to MPI_TYPE_NULL */ + MPI_Type_free(&column); + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const MPI_Comm comm) +{ + int rank, nranks; + MPI_Comm_size(comm, &nranks); + MPI_Comm_rank(comm, &rank); + + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + assert(boundaries != NULL); + } + + MPI_Gather(domain, sizeof(*domain), MPI_BYTE, + boundaries, sizeof(*domain), MPI_BYTE, + MASTER, comm); + +#if defined(DEBUG) + if (rank == MASTER) + { + for (int task=0 ; task<nranks ; task++) + { + printf("\n\t Task: %d", task); + printf("\n\t\t boundaries[%d].start[X] = %d - boundaries[%d].end[X] = %d", + task, boundaries[task].start[X], task, boundaries[task].end[X]); + printf("\n\t\t boundaries[%d].start[Y] = %d - boundaries[%d].end[Y] = %d\n", + task, boundaries[task].start[Y], task, boundaries[task].end[Y]); + } + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + /* MPI_Gatherv + Gathers into specified locations from all processes in a group + + Synopsis + int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, int root, MPI_Comm comm) */ + + int *recvcounts = NULL; + int *displs = NULL; + MyData *recvbuf = NULL; + MyData *sendbuf = NULL; + + if (rank == MASTER) + { + recvcounts = (int *)malloc(nranks * sizeof(*recvcounts)); + assert(recvcounts != NULL); + + displs = (int *)malloc(nranks * sizeof(*displs)); + assert(displs != NULL); + + for (int task=0 ; task<nranks ; task++) + { + const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); + const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); + recvcounts[task] = (nrows * ncols); + } + + displs[0] = 0; + for (int task=1 ; task<nranks ; task++) + displs[task] = (recvcounts[task - 1] + displs[task - 1]); + + /* allocate the recvbuffer */ + recvbuf = (MyData *)malloc((displs[nranks - 1] + recvcounts[nranks - 1]) * sizeof(*recvbuf)); + assert(recvbuf != NULL); + +#if defined(DEBUG) + printf("\n\t********************** recvcounts - displs *************************"); + for (int task=0 ; task<nranks ; task++) + { + printf("\n\t recvcounts[%d] = %d - displs[%d] = %d", + task, recvcounts[task], task, displs[task]); + } + printf("\n\t********************************************************************\n"); + fflush(stdout); +#endif /* DEBUG */ + + } /* MASTER */ + + /* every process sets up the sendbuf */ + const int nrows = (domain->end[X] - domain->start[X] + 1); + const int ncols = (domain->end[Y] - domain->start[Y] + 1); + sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf)); + assert(sendbuf != NULL); + /* store the actual grid owned by each process into sendbuf */ + for (int row=0 ; row<nrows ; row++) + memcpy(&sendbuf[row * ncols], &buffer[domain->start[X]+row][domain->start[Y]], (ncols * sizeof(MyData))); + + /* perform the MPI collective */ + MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype, + recvbuf, recvcounts, displs, MPI_MyDatatype, + MASTER, comm); + + /* store the recvbuf into the actual grid owned by the MASTER task */ + if (rank == MASTER) + { + for (int task=1 ; task<nranks ; task++) + { + const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); + const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); + for (int row=0 ; row<nrows ; row++) + memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]], + &recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData))); + } + } + + free(sendbuf); + if (rank == MASTER) + { + free(recvbuf); + free(displs); + free(recvcounts); + free(boundaries); + } + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_cartesian_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/cartesian/src/tools.c b/jacobi/mpi/cartesian/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/cartesian/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/miscellaneous/cartesian.cpp b/jacobi/mpi/miscellaneous/cartesian.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4a97031a79e7db5cdda3bc42efe3f3afdc7e39c --- /dev/null +++ b/jacobi/mpi/miscellaneous/cartesian.cpp @@ -0,0 +1,71 @@ +#include <mpi.h> +#include <iostream> + +#define SIZE 2 +#define X 0 +#define Y 1 + +int main(int argc, char **argv) +{ + using namespace std; + + int task, ntasks; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &ntasks); + MPI_Comm_rank(MPI_COMM_WORLD, &task); + + if (argc < 1) + { + if (!task) + { + cout << "\n\t Usage: <executable> <number processes along X> \n" << endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int cartesian_grid_x = (int)strtol(argv[1], NULL, 10); + const int cartesian_grid_y = ((ntasks % cartesian_grid_x == 0) ? (ntasks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!task) + { + cout << "\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n" << endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + static int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y}; + static int periods[SIZE] = {0, 0}; + static int reorder = 0; + MPI_Comm comm2d; + MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d); + + int coords[SIZE]; + MPI_Cart_coords(comm2d, task, SIZE, coords); + + int nbrright, nbrleft; + MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright); + + int nbrtop, nbrbottom; + MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop); + + for (int rank=0 ; rank<ntasks; rank++) + { + MPI_Barrier(MPI_COMM_WORLD); + if (rank == task) + { + cout << "\n\t Task: " << task << endl; + cout << "\t\t coords[" << coords[X] << "," << coords[Y] << "]" << endl; + cout << "\t\t nbrright: " << nbrright << " - nbrleft : " << nbrleft << endl; + cout << "\t\t nbrtop : " << nbrtop << " - nbrbottom: " << nbrbottom << endl; + cout << endl; + } + } + + MPI_Comm_free(&comm2d); + MPI_Finalize(); + + return 0; +} diff --git a/jacobi/openmp/not_opt/Makefile b/jacobi/openmp/not_opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f1dca1984b87035b5b6a384244aaae3497018b6e --- /dev/null +++ b/jacobi/openmp/not_opt/Makefile @@ -0,0 +1,65 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info serial omp valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean + +info: + @echo ' ' + @echo '--------------------------------------------------------------------------------------------' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make omp ---> compile the omp application ' + @echo '$$ make debug ---> debug the omp application ' + @echo '$$ make valgrind_memcheck ---> run the omp application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the omp application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the omp application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '--------------------------------------------------------------------------------------------' + @echo ' ' + +serial: $(PROG) + +omp: $(PROG_OMP) + +debug: $(PROG_DEBUG) + @echo 'OOOooo... debugging ...oooOOO' + gdb --args ./$< + @echo 'OOOooo... debugging ... oooOOO' + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 2 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 2 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 2 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_OMP) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/openmp/not_opt/include/allvars.h b/jacobi/openmp/not_opt/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..6ffdbdae9e5b89a38a5dc6747bbd73edecb77ba3 --- /dev/null +++ b/jacobi/openmp/not_opt/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +typedef double MyData; + +#if defined(_OPENMP) + +#include <omp.h> + +#else + +#define omp_get_thread_num() 0 +#define omp_get_num_threads() 1 + +#endif /* _OPENMP */ diff --git a/jacobi/openmp/not_opt/include/tools.h b/jacobi/openmp/not_opt/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/openmp/not_opt/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/openmp/not_opt/make.def b/jacobi/openmp/not_opt/make.def new file mode 100644 index 0000000000000000000000000000000000000000..ea4ceb7059794adfebd9bf8a956567451d1eb879 --- /dev/null +++ b/jacobi/openmp/not_opt/make.def @@ -0,0 +1,53 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +OMP ?= -fopenmp +LIBS ?= -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_serial_not_opt_$(SYSTYPE) +PROG_OMP ?= jacobi_omp_not_opt_$(SYSTYPE) +PROG_DEBUG = $(PROG_OMP)_DEBUG +PROG_MEMCHECK = $(PROG_OMP)_MEMCHECK +PROG_CALLGRIND = $(PROG_OMP)_CALLGRIND +PROG_CACHEGRIND = $(PROG_OMP)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_OMP): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 $(OMP) -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/openmp/not_opt/src/jacobi_2D_omp_not_opt.c b/jacobi/openmp/not_opt/src/jacobi_2D_omp_not_opt.c new file mode 100644 index 0000000000000000000000000000000000000000..a43c5bbcb2f75e956b8933784327773c9ce94e46 --- /dev/null +++ b/jacobi/openmp/not_opt/src/jacobi_2D_omp_not_opt.c @@ -0,0 +1,309 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, int, int, int, int, const int); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ +#if defined(_OPENMP) + if (argc <= 3) + + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> <omp threads>\n\n"); + exit(EXIT_FAILURE); + } +#else + if (argc <= 2) + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size>\n\n"); + exit(EXIT_FAILURE); + } +#endif /* _OPENMP */ + + /* global X and Y grid size */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = (int) strtol(argv[2], NULL, 10); +#if defined(_OPENMP) + const int Nthread = (int) strtol(argv[3], NULL, 10); +#else + const int Nthread = 1; +#endif /* _OPENMP */ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + phi0[j][i] = 0.0; + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = seconds(); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (err > TOL) + { + /* -- 4a. Set boundary conditions first -- */ + + BoundaryConditions(phi0, x, y, nx, ny); + + /* -- 4b. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, jbeg, jend, Nthread); + + /* -- 4c. Copy grids --*/ + copy_grids(phi0, phi, + jbeg, jend, + ibeg, jend); + + printf ("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + } + + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", seconds() - time_start); + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + int ibeg, + int iend, + int jbeg, + int jend, + const int Nthreads) +{ + *error = 0.0; + +#pragma omp parallel default(none) shared(error, delta, Phi, Phi0) \ + firstprivate(ibeg, iend, jbeg, jend) \ + num_threads(Nthreads) + { +#if defined(DEBUG) + const int id = omp_get_thread_num(); + const int thr = omp_get_num_threads(); + printf("\n\t I am the omp thread %d out of %d", id, thr); + #pragma omp barrier +#endif /* DEBUG */ + + #pragma omp for schedule(static) + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + #pragma omp critical + { + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } + } /* loop over columns */ + } /* loop over rows */ + } /* omp parallel */ + + return; +} + +/* ********************************************************************* */ + +/* ********************************************************************* */ +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend) +{ + for (int i=xbeg ; i<=xend ; i++) + for (int j=ybeg ; j<=yend ; j++) + A[i][j] = B[i][j]; + + return; +} +/* +************************************************************************ */ + + +/* ********************************************************************* */ +void WriteSolution (MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; +#if defined(_OPENMP) + sprintf(fname,"jacobi2D_omp_not_opt_%02d.bin", nfile); +#else + sprintf(fname,"jacobi2D_serial_not_opt_%02d.bin", nfile); +#endif /* _OPENMP */ + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/openmp/not_opt/src/tools.c b/jacobi/openmp/not_opt/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/openmp/not_opt/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/openmp/opt/Makefile b/jacobi/openmp/opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f1dca1984b87035b5b6a384244aaae3497018b6e --- /dev/null +++ b/jacobi/openmp/opt/Makefile @@ -0,0 +1,65 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info serial omp valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean + +info: + @echo ' ' + @echo '--------------------------------------------------------------------------------------------' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make omp ---> compile the omp application ' + @echo '$$ make debug ---> debug the omp application ' + @echo '$$ make valgrind_memcheck ---> run the omp application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the omp application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the omp application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '--------------------------------------------------------------------------------------------' + @echo ' ' + +serial: $(PROG) + +omp: $(PROG_OMP) + +debug: $(PROG_DEBUG) + @echo 'OOOooo... debugging ...oooOOO' + gdb --args ./$< + @echo 'OOOooo... debugging ... oooOOO' + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 2 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 2 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 2 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_OMP) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/openmp/opt/include/allvars.h b/jacobi/openmp/opt/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..468de4a19bebf2c0d5c59cd24ad3aab94b8df0f6 --- /dev/null +++ b/jacobi/openmp/opt/include/allvars.h @@ -0,0 +1,9 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +typedef double MyData; diff --git a/jacobi/openmp/opt/include/tools.h b/jacobi/openmp/opt/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/openmp/opt/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/openmp/opt/make.def b/jacobi/openmp/opt/make.def new file mode 100644 index 0000000000000000000000000000000000000000..a5e01e252c90e4ad677132cbfbe3b7e0a9885b1e --- /dev/null +++ b/jacobi/openmp/opt/make.def @@ -0,0 +1,53 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +OMP ?= -fopenmp +LIBS ?= -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_serial_opt_$(SYSTYPE) +PROG_OMP ?= jacobi_omp_opt_$(SYSTYPE) +PROG_DEBUG = $(PROG_OMP)_DEBUG +PROG_MEMCHECK = $(PROG_OMP)_MEMCHECK +PROG_CALLGRIND = $(PROG_OMP)_CALLGRIND +PROG_CACHEGRIND = $(PROG_OMP)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_OMP): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 $(OMP) -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 $(OMP) -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/openmp/opt/src/jacobi_2D_omp_opt.c b/jacobi/openmp/opt/src/jacobi_2D_omp_opt.c new file mode 100644 index 0000000000000000000000000000000000000000..2a47ed64cf5b577ef13eb78610e14b7d3677d9eb --- /dev/null +++ b/jacobi/openmp/opt/src/jacobi_2D_omp_opt.c @@ -0,0 +1,277 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int, const int); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +int main(int argc, char **argv) +{ +#if defined(_OPENMP) + if (argc <= 3) + + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> <omp threads>\n\n"); + exit(EXIT_FAILURE); + } +#else + if (argc <= 2) + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size>\n\n"); + exit(EXIT_FAILURE); + } +#endif /* _OPENMP */ + + /* global X and Y grid size */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = (int) strtol(argv[2], NULL, 10); +#if defined(_OPENMP) + const int Nthread = (int) strtol(argv[3], NULL, 10); +#else + const int Nthread = 1; +#endif /* _OPENMP */ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + phi0[j][i] = 0.0; + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = seconds(); + + /* -- 4a. Set boundary conditions first -- */ + + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4b. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, jbeg, jend, Nthread); + + printf ("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* check convergence */ + if (err <= TOL) + break; + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", seconds() - time_start); + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend, + const int Nthreads) +{ + double err = 0.0; + +#pragma omp parallel for collapse(2) schedule(static) reduction(+: err) num_threads(Nthreads) + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + err += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + *error = err; + + return; +} + +/* ********************************************************************* */ + +/* ********************************************************************* */ +void WriteSolution (MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; +#if defined(_OPENMP) + sprintf(fname,"jacobi2D_omp_opt_%02d.bin", nfile); +#else + sprintf(fname,"jacobi2D_serial_opt_%02d.bin", nfile); +#endif /* _OPENMP */ + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/openmp/opt/src/tools.c b/jacobi/openmp/opt/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/openmp/opt/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/serial/not_opt/Makefile b/jacobi/serial/not_opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..01dd641863b57c577e1dcc2de14db298269cbf62 --- /dev/null +++ b/jacobi/serial/not_opt/Makefile @@ -0,0 +1,62 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------------' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make debug ---> debug the serial application ' + @echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------------' + @echo ' ' + +serial: $(PROG) + +debug: $(PROG_DEBUG) + @echo 'OOOooo... debugging ...oooOOO' + gdb --args ./$< + @echo 'OOOooo... debugging ... oooOOO' + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/serial/not_opt/include/allvars.h b/jacobi/serial/not_opt/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..468de4a19bebf2c0d5c59cd24ad3aab94b8df0f6 --- /dev/null +++ b/jacobi/serial/not_opt/include/allvars.h @@ -0,0 +1,9 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +typedef double MyData; diff --git a/jacobi/serial/not_opt/include/tools.h b/jacobi/serial/not_opt/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/serial/not_opt/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/serial/not_opt/make.def b/jacobi/serial/not_opt/make.def new file mode 100644 index 0000000000000000000000000000000000000000..86d495638cbbd34c831b616944ed89375e47a43a --- /dev/null +++ b/jacobi/serial/not_opt/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_serial_not_opt_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/serial/not_opt/script/README b/jacobi/serial/not_opt/script/README new file mode 100644 index 0000000000000000000000000000000000000000..af3694a9dffe54c3a06ea8f1b574e09613071cf3 --- /dev/null +++ b/jacobi/serial/not_opt/script/README @@ -0,0 +1,7 @@ +This folder contains bash scripts conceived to test +the C application varying the number of +MPI tasks, OMP threads, and particles. + +- parameters setting 'input_parameters' +- executable of the application should reside in the parent directory +- $ ./run.sh \ No newline at end of file diff --git a/jacobi/serial/not_opt/script/input_parameters b/jacobi/serial/not_opt/script/input_parameters new file mode 100644 index 0000000000000000000000000000000000000000..8d6804aece28848578979767e3f3247e669e6095 --- /dev/null +++ b/jacobi/serial/not_opt/script/input_parameters @@ -0,0 +1,23 @@ +########################################################################## +####################### paramfile ######################################## + +# set the template of the paramfile +TEMPLATE="paramfile_template.txt" +# set the name of the paramfile used for each run +PARAMFILE="paramfile_JUWELS.txt" + +########################################################################## +####################### run parameters ################################### + +# Delete output files (Yes if 1) +DELETE_OUTPUT_FILES=0 +# Output directory +OUTPUTDIR="./TEST/" +# number of particles +N=(65536) +# MPI tasks +MPI=(1 2) +# OMP threads +THREADS=(1 2 4) + +########################################################################## diff --git a/jacobi/serial/not_opt/script/run.sh b/jacobi/serial/not_opt/script/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..d88cce02f2694fd9a6dcc487090e9cba366a5ac0 --- /dev/null +++ b/jacobi/serial/not_opt/script/run.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# compile the application +# select flags inside the Makefile (parent directory) +cd .. && make mpi_omp_step_profiling -f Makefile + +if [[ "$?" != "0" ]] +then + echo "Cannot compile the application ...aborting..." + exit 1 +fi + +# the executable +EXEC=$(find . -name *$(hostname)* -executable -type f) +if [[ "$?" != "0" ]] +then + echo "Cannot find the executable ...aborting..." + exit 2 +fi + +# return to ./script and source input parameters and function +cd - && source input_parameters && source write_paramfile.sh + +# loop over particles +for PART in "${N[@]}" +do + # loop over MPI tasks + for TASK in "${MPI[@]}" + do + # loop over OMP threads + for THR in "${THREADS[@]}" + do + # write the paramfile + write_paramfile ${TEMPLATE} ${PARAMFILE} \ + ${DELETE_OUTPUT_FILES} ${OUTPUTDIR} \ + ${THR} ${PART} + + # check the status + if [[ "$?" != "0" ]] + then + echo "Error while writing ${PARAMFILE} ...aborting..." + exit 3 + fi + + # move the paramfile and move to the parent directory where the executable resides + mv ${PARAMFILE} ../ && cd .. + + # run the application + mpirun -np ${TASK} ${EXEC} ${PARAMFILE} + # check exit status + if [[ "$?" != "0" ]] + then + echo "####################################################################" + echo "ERROR while running the app using: ${TASK} MPI tasks and ${GPU} GPUs" + echo "####################################################################" + fi + + # delete the paramfile and return to the previous directory + rm -f ${PARAMFILE} && cd - + done # loop over THR + done # loop over TASKS +done # looop over particles + +# Everything is OK! +exit 0 diff --git a/jacobi/serial/not_opt/script/run_JUWELS.sh b/jacobi/serial/not_opt/script/run_JUWELS.sh new file mode 100755 index 0000000000000000000000000000000000000000..b3908e9bb94c04dc637ca947c1958bda95e8c538 --- /dev/null +++ b/jacobi/serial/not_opt/script/run_JUWELS.sh @@ -0,0 +1,95 @@ +#!/bin/bash -x + +######################### RESOURSE ALLOCATION ##################################### +# ACCOUNT = prpb93 +# NODES = 2 (2 nodes) +# PARTITION = batch (use Cluster Module) +# ERROR = Nbody-err.%j (error handle) +# TIME = 00:15:00 (enough for tests) +# EXCLUSIVE = node should not be shared with other jobs + +#SBATCH --account=prpb93 +#SBATCH --nodes=1 +#SBATCH --partition=batch +#SBATCH --error=Nbody-err.%j +#SBATCH --time=00:03:00 +#SBATCH --job-name="Nbody" +#SBATCH --exclusive +################################################################################### + +################################## MODULES ######################################## +# module purge +module purge +# load GCC +module load GCC/9.3.0 +# load OpenMPI +module load OpenMPI +################################################################################### + +# compile the application +# select flags inside the Makefile (parent directory) +cd .. && make mpi_omp_step_profiling -f Makefile + +if [[ "$?" != "0" ]] +then + echo "Cannot compile the application ...aborting..." + exit 1 +fi + +# the executable +EXEC=$(find . -name *$(hostname)* -executable -type f) +if [[ "$?" != "0" ]] +then + echo "Cannot find the executable ...aborting..." + exit 2 +fi + +# return to ./script and source input parameters and function +cd - && source input_parameters && source write_paramfile.sh + +# loop over particles +for PART in "${N[@]}" +do + # loop over MPI tasks + for TASK in "${MPI[@]}" + do + # loop over OMP threads + for THR in "${THREADS[@]}" + do + # write the paramfile + write_paramfile ${TEMPLATE} ${PARAMFILE} \ + ${DELETE_OUTPUT_FILES} ${OUTPUTDIR} \ + ${THR} ${PART} + + # check the status + if [[ "$?" != "0" ]] + then + echo "Error while writing ${PARAMFILE} ...aborting..." + exit 3 + fi + + # move the paramfile and move to the parent directory where the executable resides + mv ${PARAMFILE} ../ && cd .. + + # run the application + srun --ntasks ${TASK} ${EXEC} ${PARAMFILE} >& $SLURM_JOB_ID.log + + # check exit status + if [[ "$?" != "0" ]] + then + echo "####################################################################" + echo "ERROR while running the app using: ${TASK} MPI tasks and ${GPU} GPUs" + echo "####################################################################" + fi + + # delete the paramfile and return to the previous directory + rm -f ${PARAMFILE} && cd - + done # loop over THR + done # loop over TASKS +done # looop over particles + +# make clean +cd .. && make clean + +# Everything is OK! +exit 0 diff --git a/jacobi/serial/not_opt/script/write_paramfile.sh b/jacobi/serial/not_opt/script/write_paramfile.sh new file mode 100755 index 0000000000000000000000000000000000000000..6af6591767b2574370a67b652024d7da4065c9c3 --- /dev/null +++ b/jacobi/serial/not_opt/script/write_paramfile.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# NAME: write_paramfile +# PURPOSE: to write a user defined paramfile for the X NBody app +# USAGE: write_paramfile [template_file] [paramfile_to_write] \ +# [DeleteOutputFiles] [OutputDir] \ +# [OMP threads] [N] + +function write_paramfile() +{ + local TEMPLATE=$1 + + # check if TEMPLATE is a regular file + if [ ! -f ${TEMPLATE} ] + then + echo "${TEMPLATE} is not a regular file.. aborting..." + return 1 + fi + + # copy template file + local PARAMFILE=$2 + cp -v ${TEMPLATE} ${PARAMFILE} + + local DELETE_OUTPUT_FILES=$3 + local OUTPUTDIR=$4 + local THREADS=$5 + local N=$6 + + # add parameters + echo "DeleteOutputFiles ${DELETE_OUTPUT_FILES}" >> ${PARAMFILE} + echo "OutputDir ${OUTPUTDIR}" >> ${PARAMFILE} + echo "RequestedNThreads ${THREADS}" >> ${PARAMFILE} + echo "N ${N}" >> ${PARAMFILE} + + echo "${PARAMFILE} written" + + return 0 +} diff --git a/jacobi/serial/not_opt/src/jacobi_2D_serial_not_opt.c b/jacobi/serial/not_opt/src/jacobi_2D_serial_not_opt.c new file mode 100644 index 0000000000000000000000000000000000000000..ba53f321242432290dca329fae94271189fadf82 --- /dev/null +++ b/jacobi/serial/not_opt/src/jacobi_2D_serial_not_opt.c @@ -0,0 +1,274 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + if (argc <= 2) + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> \n\n"); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = (int) strtol(argv[2], NULL, 10); + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + phi0[j][i] = 0.0; + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = seconds(); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (err > TOL) + { + /* -- 4a. Set boundary conditions first -- */ + + BoundaryConditions(phi0, x, y, nx, ny); + + /* -- 4b. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, jbeg, jend); + + /* -- 4c. Copy grids --*/ + copy_grids(phi0, phi, + jbeg, jend, + ibeg, jend); + + printf ("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + } + + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", seconds() - time_start); + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ + +/* ********************************************************************* */ +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend) +{ + for (int i=xbeg ; i<=xend ; i++) + for (int j=ybeg ; j<=yend ; j++) + A[i][j] = B[i][j]; + + return; +} +/* +************************************************************************ */ + + +/* ********************************************************************* */ +void WriteSolution (MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_serial_not_opt_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/serial/not_opt/src/tools.c b/jacobi/serial/not_opt/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/serial/not_opt/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/serial/opt/Makefile b/jacobi/serial/opt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..01dd641863b57c577e1dcc2de14db298269cbf62 --- /dev/null +++ b/jacobi/serial/opt/Makefile @@ -0,0 +1,62 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +####################################################################### + +.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------------' + @echo '$$ make serial ---> compile the serial application ' + @echo '$$ make debug ---> debug the serial application ' + @echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------------' + @echo ' ' + +serial: $(PROG) + +debug: $(PROG_DEBUG) + @echo 'OOOooo... debugging ...oooOOO' + gdb --args ./$< + @echo 'OOOooo... debugging ... oooOOO' + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/serial/opt/include/allvars.h b/jacobi/serial/opt/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..468de4a19bebf2c0d5c59cd24ad3aab94b8df0f6 --- /dev/null +++ b/jacobi/serial/opt/include/allvars.h @@ -0,0 +1,9 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +typedef double MyData; diff --git a/jacobi/serial/opt/include/tools.h b/jacobi/serial/opt/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/serial/opt/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/serial/opt/make.def b/jacobi/serial/opt/make.def new file mode 100644 index 0000000000000000000000000000000000000000..55e3b7d2673ab472796fc226043b98e57802d656 --- /dev/null +++ b/jacobi/serial/opt/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_serial_opt_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/serial/opt/src/jacobi_2D_serial_opt.c b/jacobi/serial/opt/src/jacobi_2D_serial_opt.c new file mode 100644 index 0000000000000000000000000000000000000000..a14c810679a0d4087e1df3159ec7f1a8d98f238f --- /dev/null +++ b/jacobi/serial/opt/src/jacobi_2D_serial_opt.c @@ -0,0 +1,254 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +int main(int argc, char **argv) +{ + if (argc <= 2) + { + printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> \n\n"); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = (int) strtol(argv[2], NULL, 10); + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + phi0[j][i] = 0.0; + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = seconds(); + + /* -- 4a. Set boundary conditions first -- */ + + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4b. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, jbeg, jend); + + printf ("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* check convergence */ + if (err <= TOL) + break; + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", seconds() - time_start); + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ + +/* ********************************************************************* */ +void WriteSolution (MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[32]; + sprintf(fname,"jacobi2D_serial_opt_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/serial/opt/src/tools.c b/jacobi/serial/opt/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/serial/opt/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +}