diff --git a/jacobi/mpi/SendRecv/Makefile b/jacobi/mpi/SendRecv/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d37755442afd19b56734dad95d99da5bad2da49c --- /dev/null +++ b/jacobi/mpi/SendRecv/Makefile @@ -0,0 +1,61 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make mpi ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugger ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +debug: $(PROG_DEBUG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_mpi_SendRecv_* diff --git a/jacobi/mpi/SendRecv/include/allvars.h b/jacobi/mpi/SendRecv/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/SendRecv/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/SendRecv/include/tools.h b/jacobi/mpi/SendRecv/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/SendRecv/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/SendRecv/make.def b/jacobi/mpi/SendRecv/make.def new file mode 100644 index 0000000000000000000000000000000000000000..6548f4b6b2d1ef3c78434166f88adac85b12801b --- /dev/null +++ b/jacobi/mpi/SendRecv/make.def @@ -0,0 +1,45 @@ +CC = mpicc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_mpi_SendRecv_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/SendRecv/make_mpi_path b/jacobi/mpi/SendRecv/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93 --- /dev/null +++ b/jacobi/mpi/SendRecv/make_mpi_path @@ -0,0 +1,10 @@ +# set the MPI install path + +# pleiadi +MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c new file mode 100644 index 0000000000000000000000000000000000000000..a0cd0c19d039a6c6f38244e2ffe7e01d04313423 --- /dev/null +++ b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c @@ -0,0 +1,502 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_sendrecv_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_sendrecv_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_sendrecv_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank */ + /* - nbrtop is the MPI process with rank + 1 */ + /* - nbrbottom is the MPI process with rank - 1 */ + + /* SendRecv to/from nbrtop/nbrbottom */ + + + /* SendRecv to/from nbrbottom/nbrtop */ + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_sendrecv_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/SendRecv/src/tools.c b/jacobi/mpi/SendRecv/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/SendRecv/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_blocking/Makefile b/jacobi/mpi/Send_Recv_blocking/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..aa0427cc27994561b2d6e0aa13eb77f8f9cd8b19 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/Makefile @@ -0,0 +1,61 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugging ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +debug: $(PROG_DEBUG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_mpi_Send_Recv_blocking_* diff --git a/jacobi/mpi/Send_Recv_blocking/include/allvars.h b/jacobi/mpi/Send_Recv_blocking/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_blocking/include/tools.h b/jacobi/mpi/Send_Recv_blocking/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..066ae344777161b1979234fbf4fa9eece99d03b5 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/include/tools.h @@ -0,0 +1,19 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); + diff --git a/jacobi/mpi/Send_Recv_blocking/make.def b/jacobi/mpi/Send_Recv_blocking/make.def new file mode 100644 index 0000000000000000000000000000000000000000..b6e3ba0df86a743b4f7af448d637d5b096080273 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/make.def @@ -0,0 +1,45 @@ +CC = mpicc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_mpi_Send_Recv_blocking_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -DDEBUG -Og -ggdb3 -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_blocking/make_mpi_path b/jacobi/mpi/Send_Recv_blocking/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/make_mpi_path @@ -0,0 +1,10 @@ +# set the MPI install path + +# pleiadi +MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_blocking/script/input_parameters b/jacobi/mpi/Send_Recv_blocking/script/input_parameters new file mode 100644 index 0000000000000000000000000000000000000000..be0cc32d813a1c58dfc2623b7c33ba4efd81bde7 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/script/input_parameters @@ -0,0 +1,10 @@ +########################################################################## + +# set the grid size + +GRID_SIZE_X=128 +GRID_SIZE_Y=128 + +TASKS=(2 4 8) + +########################################################################## diff --git a/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh b/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh new file mode 100755 index 0000000000000000000000000000000000000000..1043d5ac7180019a9f27d9b8c8524c129c2df20a --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +######################### RESOURSE ALLOCATION ##################################### +##SBATCH --account=???????? + +#SBATCH --partition=pleiadi +#SBATCH --job-name="Jacobi" +#SBATCH --nodes=1 +#SBATCH --exclusive +#SBATCH --output=Jacobi-mpi-Send_Recv_nonblocking-%j.out +#SBATCH --error=Jacobi-mpi-Send_Recv_nonblocking.%j.err +#SBATCH --time=00:05:00 +################################################################################### + +################################## MODULES ######################################## +export MODULE_VERSION=5.0.1 +source /opt/cluster/spack/share/spack/setup-env.sh + +# module purge +module purge +# load GCC +module load default-gcc-11.2.0 +################################################################################### + +# input parameters +source input_parameters + +WORKDIR=${PWD} +# compile the application +cd .. && make clean && make mpi +if [[ "$?" != "0" ]] +then + echo "Cannot compile the application ...aborting..." + exit 1 +fi + +# get the executable +EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print) +if [[ "$?" != "0" ]] +then + echo "Cannot find the executable ...aborting..." + exit 2 +fi + +for TASK in ${TASKS[@]} +do + # run the application + time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt +done + +cd ${WORKDIR} + +exit 0 diff --git a/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c new file mode 100644 index 0000000000000000000000000000000000000000..b33bbedfa2a19cfc30482e26702540c3de12d2a6 --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c @@ -0,0 +1,524 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank + - nbrtop is the MPI process with rank + 1 + - nbrbottom is the MPI process with rank - 1 */ + + /***************** First communication stage *******************/ + /* Perform a blocking send to the top (rank+1) process */ + + + + + /* Perform a blocking receive from the bottom (rank-1) process */ + + + + + + /***************************************************************/ + + /**************** Second communication stage *******************/ + /* - Perform a blocking send to the bottom (rank-1) process */ + + + + + /* Perform a blocking receive from the top (rank+1) process */ + + + + + /***************************************************************/ + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_send_recv_blocking_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_blocking/src/tools.c b/jacobi/mpi/Send_Recv_blocking/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_blocking/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_nonblocking/Makefile b/jacobi/mpi/Send_Recv_nonblocking/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7013e43ef2a715a3f238199d3030392d79048527 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/Makefile @@ -0,0 +1,61 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make mpi ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugger ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +debug: $(PROG_DEBUG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_mpi_Send_Recv_nonblocking_* diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/tools.h b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/Send_Recv_nonblocking/make.def b/jacobi/mpi/Send_Recv_nonblocking/make.def new file mode 100644 index 0000000000000000000000000000000000000000..31f5cdfbddd736474560463e767954e9cf870389 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/make.def @@ -0,0 +1,45 @@ +CC = mpicc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_mpi_Send_Recv_nonblocking_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path @@ -0,0 +1,10 @@ +# set the MPI install path + +# pleiadi +MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters b/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters new file mode 100644 index 0000000000000000000000000000000000000000..be0cc32d813a1c58dfc2623b7c33ba4efd81bde7 --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters @@ -0,0 +1,10 @@ +########################################################################## + +# set the grid size + +GRID_SIZE_X=128 +GRID_SIZE_Y=128 + +TASKS=(2 4 8) + +########################################################################## diff --git a/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh b/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh new file mode 100755 index 0000000000000000000000000000000000000000..1043d5ac7180019a9f27d9b8c8524c129c2df20a --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +######################### RESOURSE ALLOCATION ##################################### +##SBATCH --account=???????? + +#SBATCH --partition=pleiadi +#SBATCH --job-name="Jacobi" +#SBATCH --nodes=1 +#SBATCH --exclusive +#SBATCH --output=Jacobi-mpi-Send_Recv_nonblocking-%j.out +#SBATCH --error=Jacobi-mpi-Send_Recv_nonblocking.%j.err +#SBATCH --time=00:05:00 +################################################################################### + +################################## MODULES ######################################## +export MODULE_VERSION=5.0.1 +source /opt/cluster/spack/share/spack/setup-env.sh + +# module purge +module purge +# load GCC +module load default-gcc-11.2.0 +################################################################################### + +# input parameters +source input_parameters + +WORKDIR=${PWD} +# compile the application +cd .. && make clean && make mpi +if [[ "$?" != "0" ]] +then + echo "Cannot compile the application ...aborting..." + exit 1 +fi + +# get the executable +EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print) +if [[ "$?" != "0" ]] +then + echo "Cannot find the executable ...aborting..." + exit 2 +fi + +for TASK in ${TASKS[@]} +do + # run the application + time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt +done + +cd ${WORKDIR} + +exit 0 diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c new file mode 100644 index 0000000000000000000000000000000000000000..e0200d938eee8804c6578a7e98213f7d8892e95d --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c @@ -0,0 +1,533 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_nonblocking_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_nonblocking_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_nonblocking_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank + - nbrtop is the MPI process with rank + 1 + - nbrbottom is the MPI process with rank - 1 */ + + /* communication request (handle) */ + + + + /************************* Process is ready to receive data ****************************/ + /* Perform a nonblocking receive from the bottom (rank-1) process */ + + + + /* Perform a nonblocking receive from the top (rank+1) process */ + + + + + /***************************************************************************************/ + + /*************** Process begins to send data *******************************************/ + /* Perform a nonblocking send to the top (rank+1) process */ + + + + + /* - Perform a nonblocking send to the bottom (rank-1) process */ + + + + /***************************************************************************************/ + + /* some useful work (computation) could be performed (while the buffer containing the */ + /* message has not to be modified). We will see how to overlap communication and */ + /* computation in the next exercises */ + + + /******************** Waits for all given MPI Requests to complete *********************/ + + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_send_recv_nonblocking_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/tools.c b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/Send_Recv_paired/Makefile b/jacobi/mpi/Send_Recv_paired/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5fab606c821047892429f65740ee8f2f4f9473ec --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/Makefile @@ -0,0 +1,61 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make mpi ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugger ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +debug: $(PROG_DEBUG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_mpi_Send_Recv_paired_* diff --git a/jacobi/mpi/Send_Recv_paired/include/allvars.h b/jacobi/mpi/Send_Recv_paired/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/Send_Recv_paired/include/tools.h b/jacobi/mpi/Send_Recv_paired/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/Send_Recv_paired/make.def b/jacobi/mpi/Send_Recv_paired/make.def new file mode 100644 index 0000000000000000000000000000000000000000..fd62c5b50dda4b59673ee638fce2fc1c584aff37 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/make.def @@ -0,0 +1,45 @@ +CC = mpicc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_mpi_Send_Recv_paired_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/Send_Recv_paired/make_mpi_path b/jacobi/mpi/Send_Recv_paired/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/make_mpi_path @@ -0,0 +1,10 @@ +# set the MPI install path + +# pleiadi +MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c new file mode 100644 index 0000000000000000000000000000000000000000..580e80509d27bebc360590223c288f959e587847 --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c @@ -0,0 +1,542 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +/* #define DEBUG */ + +typedef struct MyRows +{ + int start; + int end; +} myDomain; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict, + const MyData *const restrict, const int, const int, const int, const int); + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm); + +void mpi_exchange_paired_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +void copy_grids(MyData **const restrict A, + MyData **const restrict B, + const int xbeg, + const int xend, + const int ybeg, + const int yend); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 1) && !rank) + { + printf("\n\t Usage: <executable> <grid_size> \n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /******************************************************************************************************/ + /* 1D MPI-decomposition: + the physical domain is sliced into slabs along the vertical direction, + while the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem = (NX_GLOB + 2) % Nranks; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk = (NX_GLOB + 2 - rem) / Nranks; + if (chunk < 1) + { + printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the slab dimension along vertical direction: */ + int incr, offset; + if (rank < rem) + { + incr = chunk + 1; + offset = 0; + } + else + { + incr = chunk; + offset = rem; + } + + myDomain domain; + domain.start = ((rank * incr) + offset); + domain.end = (domain.start + incr) - 1; + /* boundaries */ + domain.start = ((domain.start == 0) ? NGHOST : domain.start); + domain.end = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end); + + /* rank of the top process */ + const int nbrtop = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1)); + /* rank of the bottom process */ + const int nbrbottom = (((rank - 1) < 0) ? MPI_PROC_NULL : (rank - 1)); + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + +#if defined(DEBUG) + for (int task=0 ; task<Nranks ; task++) + { + MPI_Barrier(comm); + + if (task == rank) + { + printf("\n\t rank: %d", rank); + printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end); + printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Barrier(comm); +#endif /* DEBUG */ + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + JacobiAlgorithm(phi, phi0, &err, delta, + ibeg, iend, domain.start, domain.end); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm); + + break; + } + + /* -- 4b. MPI communications */ + mpi_exchange_paired_1d(phi, NX_GLOB, + nbrtop, nbrbottom, + domain.start, domain.end, comm); + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData *const restrict delta, + const int ibeg, + const int iend, + const int jbeg, + const int jend) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +/* ********************************************************************* */ +void mpi_exchange_paired_1d(MyData **const buffer, + const int n, + const int nbrtop, + const int nbrbottom, + const int start, + const int end, + const MPI_Comm comm1d) +{ + /* The function is called by each MPI rank */ + /* - nbrtop is the MPI process with rank + 1 */ + /* - nbrbottom is the MPI process with rank - 1 */ + + int rank; + MPI_Comm_rank(comm1d, &rank); + + if ((rank % 2) == 0) /* even rank */ + { + /***************** First communication stage ************************************************/ + /* Perform a blocking send to the top (rank+1) process */ + + + /* Perform a blocking receive from the bottom (rank-1) process */ + + + + /********************************************************************************************/ + + /**************** Second communication stage ************************************************/ + /* - Perform a blocking send to the bottom (rank-1) process */ + + + /* Perform a blocking receive from the top (rank+1) process */ + + + /********************************************************************************************/ + } + else /* odd rank */ + { + /***************** First communication stage ************************************************/ + /* Perform a blocking receive from the bottom (rank-1) process */ + + + /* Perform a blocking send to the top (rank+1) process */ + + + /********************************************************************************************/ + + /**************** Second communication stage ************************************************/ + /* Perform a blocking receive from the top (rank+1) process */ + + + /* - Perform a blocking send to the bottom (rank-1) process */ + + + /********************************************************************************************/ + } + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + myDomain *const domain, + const int grid_dim, + const int rank, + const int nranks, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (rank == MASTER) + { + boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries)); + if (boundaries == NULL) + { + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + boundaries[MASTER].start = domain->start; + boundaries[MASTER].end = domain->end; + } + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } +#if defined(DEBUG) + printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d", + task, boundaries[task].start, task, boundaries[task].end); +#endif /* DEBUG */ + } /* MASTER */ + else if (task == rank) + { + MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm); + } + } /* loop over nranks */ + +#if defined(DEBUG) + if (rank == MASTER) + { + printf("\n"); + fflush(stdout); + } +#endif /* DEBUG */ + + /**************************************************************************************************/ + + /***************************************** get the domain from each process *************************/ + + for (int task=0 ; task<nranks ; task++) + { + if (rank == MASTER) + { + if (task) + { + MPI_Status status; + /* number of grid points to receive (including ghost points) */ + const int nrows = (boundaries[task].end - boundaries[task].start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status); + /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */ + /* { */ + /* free(boundaries); */ + /* MPI_Abort(comm, EXIT_FAILURE); */ + /* exit(EXIT_FAILURE); */ + /* } */ + } + } /* MASTER */ + else if (task == rank) + { + const int nrows = (domain->end - domain->start + 1); + const int elements = (nrows * (grid_dim + 2)); + MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm); + } + } /* loop over nranks */ + + /***************************************************************************************************/ + + if (rank == MASTER) + free(boundaries); + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi2D_mpi_send_recv_paired_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/Send_Recv_paired/src/tools.c b/jacobi/mpi/Send_Recv_paired/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/Send_Recv_paired/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc new file mode 100644 index 0000000000000000000000000000000000000000..88302cf76d4366a1831e639d3f554d28e2dd320f --- /dev/null +++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc @@ -0,0 +1,88 @@ +#include <iostream> + +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> + +#define NELEMENTS 7 + +using namespace std; + +int main(int argc, char ** argv) +{ + int rank, size; + int buf[NELEMENTS]; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (rank == 0) { + for(int i = 0; i < NELEMENTS; i++) buf[i] = 1 + i*i; + } + + MPI_Bcast(buf, NELEMENTS, MPI_INT, 0, MPI_COMM_WORLD); + +// for(rank = 1; rank < size; rank++) +// { +// for(int i = 0; i < NELEMENTS; i++) +// { +// cout<<"buf["<<i<<"] = "<<buf[i]<<endl; +// } +// cout<<endl; +// } + + if (rank == 0) { + cout<<"I am processor "<<rank<<endl; + cout<<endl; + for(int i = 0; i < NELEMENTS; i++) + { + cout<<"buf["<<i<<"] = "<<buf[i]<<endl; + } + } + + cout<<endl; + + if (rank == 1) { + cout<<"I am processor "<<rank<<endl; + cout<<endl; + for(int i = 0; i < NELEMENTS; i++) + { + cout<<"buf["<<i<<"] = "<<buf[i]<<endl; + } + } + + cout<<endl; + + if (rank == 2) { + cout<<"I am processor "<<rank<<endl; + cout<<endl; + for(int i = 0; i < NELEMENTS; i++) + { + cout<<"buf["<<i<<"] = "<<buf[i]<<endl; + } + } + + cout<<endl; + + if (rank == 3) { + cout<<"I am processor "<<rank<<endl; + cout<<endl; + for(int i = 0; i < NELEMENTS; i++) + { + cout<<"buf["<<i<<"] = "<<buf[i]<<endl; + } + } + +// if (rank == 0) { +// cout<<"I am processor "<<rank<<endl; +// cout<<endl; +// for(int i = 0; i < NELEMENTS; i++) +// { +// cout<<"buf["<<i<<"] = "<<buf[i]<<endl; +// } +// } + + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc new file mode 100644 index 0000000000000000000000000000000000000000..b23b1c8dcf03d484cf0f5d3fb72249a2815be937 --- /dev/null +++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc @@ -0,0 +1,99 @@ +#include <iostream> + +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> + +//#define STR_LENGTH 32 +#define STR_LENGTH 21 + +using namespace std; + +int main(int argc, char ** argv) +{ + int rank, size; + /* NONONONONO*/ +// char string[STR_LENGTH]; + +// string[] = {'H','e','l','l','o',',',' ','I',' ','a','m',' ','p','r','o','c','e','s','s','o','r'}; + +// for(int i = 0; i < STR_LENGTH; i++) string[i] = "Hello, I'm processor"; + +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]<<endl;; + +// char mystring[] = { 'H', 'e', 'l', 'l', 'o', '\0' }; +// for(int i = 0; i < STR_LENGTH; i++) + /* NONONONONO*/ + + + + + char mystring[] = {'H','e','l','l','o',',',' ','I',' ','a','m',' ','p','r','o','c','e','s','s','o','r','\0'}; + cout<<mystring<<endl; + + char string[STR_LENGTH]; + +// for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i]; +// +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// for(int i = 0; i < STR_LENGTH; i++) cout<<mystring[i]; +// cout<<endl; + + + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + +// if (rank == 0) { +// for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i]; +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// cout<<endl; +// } + + if (rank == 0) { + for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i]; + for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; + cout<<endl; + } + + MPI_Bcast(string, STR_LENGTH, MPI_CHAR, 0, MPI_COMM_WORLD); + +// if (rank == 0) { +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// cout<<" "<<rank<<endl; +// } +// +// cout<<endl; +// +// if (rank == 1) { +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// cout<<" "<<rank<<endl; +// } +// +// cout<<endl; +// +// if (rank == 2) { +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// cout<<" "<<rank<<endl; +// } +// +// cout<<endl; +// +// if (rank == 3) { +// for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; +// cout<<" "<<rank<<endl; +// } +// +// cout<<endl; + +// for(rank = 0; rank < size; rank++) + for(int n = 0; n < size; n++) + { + for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]; + cout<<" "<<rank<<endl; + } + + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c new file mode 100644 index 0000000000000000000000000000000000000000..917dc94c3a19ab8a4d68c07b423174b9b83848ed --- /dev/null +++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c @@ -0,0 +1,36 @@ +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char ** argv) +{ + int n, rank, size; + double data; + double *send_buf, *recv_buf; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + recv_buf = (double *) malloc(size*sizeof(double)); // allocate memory + send_buf = (double *) malloc(size*sizeof(double)); + + data = rank*rank + 1.0; // generate data on different procs + MPI_Gather(&data, 1, MPI_DOUBLE, recv_buf, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + if (rank == 0){ + printf ("[Gather()]:\n"); + for (n = 0; n < size; n++) printf ("rnd[%d] = %f\n",n,recv_buf[n]); + } + + if (rank == 0){ + for (n = 0; n < size; n++) send_buf[n] = n*n - 1.0; // Generate “size” random numbers + } + + MPI_Scatter(send_buf, 1, MPI_DOUBLE, &data, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + printf ("[Scatter, proc #%d] = %f\n",rank,data); + + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c new file mode 100644 index 0000000000000000000000000000000000000000..3e80507fd25e7c20f3a6e9c79b6cd06b2d26f07d --- /dev/null +++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c @@ -0,0 +1,251 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/*! + \file + \brief Solve 1D heat equation. + + Solve the 1D heat equation using a 1st order explicit method + on a parallel domain. + + \author A. Mignone (mignone@to.infn.it) + \date March 12, 2020 + */ +/* ///////////////////////////////////////////////////////////////////// */ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#define PARALLEL + +#ifdef PARALLEL +#include <mpi.h> +#endif + +#define NX_GLOB 64 /* Global number of interior points */ +#define NGHOST 1 + +void Write (double *, double *, int, int); + +int main(int argc, char ** argv) +{ + int i, k, beg, end; + int nx_loc; /* Local grid size */ + int dstL = -1, dstR=-1; /* Rank of left and right neighbour procs */ + int rank=0, size=1; + double t, tstop, dt, cfl = 0.5; + double *u0; + double *u1; + double xbeg = 0.0; + double xend = +1.0; + double xglob[NX_GLOB + 2*NGHOST]; // Global grid array + double *xloc; + double dx; /* Mesh spacing */ +#ifdef PARALLEL + double *send_buf; + double *recv_buf; +#endif + FILE *fp; + + /* -------------------------------------------------------- + 0. Initialize parallel environment & get neighbour + proc rank + -------------------------------------------------------- */ + +#ifdef PARALLEL + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + dstL = rank - 1; + dstR = rank + 1; + if (dstL < 0) dstL = MPI_PROC_NULL; + if (dstR >= size) dstR = MPI_PROC_NULL; +#endif + + /* -------------------------------------------------------- + 1. Generate global & local grids + -------------------------------------------------------- */ + +#ifdef PARALLEL + nx_loc = NX_GLOB/size; + beg = NGHOST; + end = beg + nx_loc - 1; + + dx = (xend - xbeg)/(NX_GLOB+1); + for (i = 0; i < NX_GLOB + 2*NGHOST; i++){ + xglob[i] = xbeg + (i-beg+1)*dx; + } + xloc = xglob + nx_loc*rank; /* Use pointer arithmetic */ +#else + nx_loc = NX_GLOB; + beg = NGHOST; + end = beg + nx_loc - 1; + + dx = (xend - xbeg)/(NX_GLOB+1); + for (i = 0; i < NX_GLOB + 2*NGHOST; i++){ + xglob[i] = xbeg + (i-beg+1)*dx; + } + xloc = xglob; /* Use pointer arithmetic */ +#endif + + /* -------------------------------------------------------- + 2. Allocate memory on local grids + -------------------------------------------------------- */ + + u0 = (double *) malloc((nx_loc + 2*NGHOST)*sizeof(double)); + u1 = (double *) malloc((nx_loc + 2*NGHOST)*sizeof(double)); + +#ifdef PARALLEL + { + int proc, go; + for (proc = 0; proc < size; proc++){ + go = proc; + MPI_Bcast(&go, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rank == go) { + printf ("[Rank %d]\n",rank); + printf (" dstL = %d, dstR = %d\n",dstL, dstR); + printf (" beg, end = %d, %d; x = [%f, %f]\n", + beg, end, xloc[beg],xloc[end]); + } + MPI_Barrier(MPI_COMM_WORLD); + } + } +#endif + + /* -------------------------------------------------------- + 3. Set initial condition + -------------------------------------------------------- */ + + for (i = beg; i <= end; i++){ + u0[i] = sin(M_PI*xloc[i]); + } + + /* -------------------------------------------------------- + 4. Advance solution + -------------------------------------------------------- */ + + t = 0.0; + tstop = 0.1; + dt = cfl*dx*dx; + k = 0; + + Write (xloc, u0, beg, end); + while (t < tstop){ + + if (rank == 0){ + printf ("step #%d; t = %8.3e\n",k,t); + } + + /* -- 4a. Set physical boundary conditions -- */ + + if (dstL == MPI_PROC_NULL){ + u0[beg-1] = 0.0; + } + if (dstR == MPI_PROC_NULL){ + u0[end+1] = 0.0; + } + + /* -- 4b. Set inter-process boundary conditions -- */ + +#ifdef PARALLEL + send_buf = u0 + end - (NGHOST - 1); // Address of rightmost interior point + recv_buf = u0 + 0; // Address of leftmost ghost zone + MPI_Sendrecv (send_buf, NGHOST, MPI_DOUBLE, dstR, 0, + recv_buf, NGHOST, MPI_DOUBLE, dstL, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + send_buf = u0 + beg; // Address of leftmost interior point + recv_buf = u0 + end + 1; // Address of first ghost zone on the right + MPI_Sendrecv (send_buf, NGHOST, MPI_DOUBLE, dstL, 0, + recv_buf, NGHOST, MPI_DOUBLE, dstR, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); +#endif + + /* -- 4c. Advance solution by one time step -- */ + + for (i = beg; i <= end; i++){ + u1[i] = u0[i] + dt/(dx*dx)*(u0[i-1] - 2.0*u0[i] + u0[i+1]); + } + t += dt; + k++; + + /* -- 4d. Copy arrays for next time level -- */ + + for (i = beg; i <= end; i++) u0[i] = u1[i]; + } + Write (xloc, u0, beg, end); + +#ifdef PARALLEL + MPI_Finalize(); +#endif + return 0; +} + +/* ********************************************************************* */ +void Write (double *x, double *u, int beg, int end) +/* + *********************************************************************** */ +{ + int i; + int rank; + static int n = 0; /* File number */ + FILE *fp; + char fname[32]; + + /* -------------------------------------------------------- + 1. Serial output + -------------------------------------------------------- */ + +#ifndef PARALLEL + sprintf (fname,"heat_eq%02d.dat",n); + fp = fopen (fname,"w"); + for (i = beg; i <= end; i++) fprintf (fp, "%12.6e %12.6e\n", x[i], u[i]); + fclose(fp); +#endif + + /* -------------------------------------------------------- + 2. Parallel output + -------------------------------------------------------- */ + +#ifdef PARALLEL + + /* -- 2a. Process #0 gathers data and does the writing -- */ + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + int nx_loc = end - beg + 1; + static double *recv_buf; + if (recv_buf == NULL) { + recv_buf = (double *) malloc((NX_GLOB + 2*NGHOST)*sizeof(double)); + } + + MPI_Gather (u + beg, nx_loc, MPI_DOUBLE, + recv_buf + beg, nx_loc, MPI_DOUBLE, 0, MPI_COMM_WORLD); + if (rank == 0){ + sprintf (fname,"heat_eq%02d.dat",n); + fp = fopen (fname,"w"); + for (i = beg; i < beg+NX_GLOB; i++) { + fprintf (fp, "%f %f\n", x[i], recv_buf[i]); + } + fclose(fp); + } + + /* -- 2b. Shared file pointer -- */ + + /* -- 2c. Individual file pointer -- */ + +#endif + + n++; +} + + + +/* + MAPLE Script: + + restart; + u := A*exp(-D*mu^2*t)*sin(mu*x + B) + C; + eq := diff(u,t) - D*diff(diff(u,x),x); + simplify(eq); + + + */ diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b31c13174dc4baf16637694a733b0125dc2fc950 Binary files /dev/null and b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf differ diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c b/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c new file mode 100644 index 0000000000000000000000000000000000000000..157ed4519c5784c2730ae188d0cc4b1c290615b4 --- /dev/null +++ b/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c @@ -0,0 +1,96 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/*! + \file + \brief Subarray example. + + Proc #0 creates a large array Abig with NROWS rows and NCOLS columns. + Proc #1 receives a subarray with nrows_sub rows and ncols_sub + columns starting at starts[]. + + \author A. Mignone (mignone@to.infn.it) + \date March 14, 2020 + */ +/* ///////////////////////////////////////////////////////////////////// */ +#include <stdio.h> +#include <stdlib.h> +#include <mpi.h> +#include "tools.c" + +#define NROWS 5 +#define NCOLS 6 + +int main(int argc, char **argv) +{ + int i,j; + int rank, size; + int nrows_sub = 3; + int ncols_sub = 2; + int starts[2] = {1,3}; + int subsizes[2] = {nrows_sub,ncols_sub}; + int bigsizes[2] = {NROWS, NCOLS}; + int **Abig; + int **Asub; + MPI_Datatype MPI_Subarr; + + /* -------------------------------------------------------- + 0. Initialize the MPI execution environment, + create subarray type + -------------------------------------------------------- */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + MPI_Type_create_subarray(2, bigsizes, subsizes, starts, + MPI_ORDER_C, MPI_INT, &MPI_Subarr); + MPI_Type_commit(&MPI_Subarr); + + if (size < 2) { + if (rank == 0){ + fprintf(stderr,"! Need at least 2 processors.\n"); + } + MPI_Finalize(); + return 1; + } + + /* -------------------------------------------------------- + 1. Proc #0 creates the big array, + Proc #1 receives the subarray. + -------------------------------------------------------- */ + + if (rank == 0) { + Abig = Allocate_2DintArray(NROWS, NCOLS); + + /* -- 1a. Fill array -- */ + + for (i = 0; i < NROWS; i++){ + for (j = 0; j < NCOLS; j++){ + Abig[i][j] = j + i*NCOLS; + }} + + /* -- 1b. Show array -- */ + + Show_2DintArray(Abig, NROWS, NCOLS, "Big array (proc #0):"); + + MPI_Send(&(Abig[0][0]), 1, MPI_Subarr, 1, 123, MPI_COMM_WORLD); + + free(Abig[0]); + free(Abig); + + } else if (rank == 1) { + + Asub = Allocate_2DintArray(nrows_sub, ncols_sub); + + MPI_Recv(&(Asub[0][0]), nrows_sub*ncols_sub, MPI_INT, 0, + 123, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + Show_2DintArray(Asub, nrows_sub, ncols_sub, "Received subarray (proc #1):"); + + free(Asub[0]); + free(Asub); + } + + MPI_Type_free(&MPI_Subarr); + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf b/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf new file mode 100644 index 0000000000000000000000000000000000000000..088b82d3adff8d2871ec9d9294f6640900f6cbef Binary files /dev/null and b/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf differ diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c b/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..58d03366e6ba5027d07cd155f396375e8d991d0d --- /dev/null +++ b/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c @@ -0,0 +1,104 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/*! + \file + \brief Collection of useful functions for the MPI course. + + This file provides some simple function for memory allocation + (dbl and int 2D array), printing. + Function prototyping is already included here and should not be + repeated elsewhere. + Simply include this file after the main header section when needed: + \code + #include <stdio.h> + ... + #include "tools.c" + ... + int main() + { + . + . + . + } + \endcodes + + \author A. Mignone (mignone@to.infn.it) + \date March 14, 2020 + */ +/* ///////////////////////////////////////////////////////////////////// */ +double **Allocate_2DdblArray(int, int); +int **Allocate_2DintArray(int, int); +void Show_2DdblArray(double **, int, int, const char *); +void Show_2DintArray(int **, int, int, const char *); + +/* ********************************************************************* */ +double **Allocate_2DdblArray(int nx, int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + int i,j; + double **buf; + + buf = (double **)malloc (nx*sizeof(double *)); + buf[0] = (double *) malloc (nx*ny*sizeof(double)); + for (j = 1; j < nx; j++) buf[j] = buf[j-1] + ny; + + return buf; +} +/* ********************************************************************* */ +int **Allocate_2DintArray(int nx, int ny) +/* + * Allocate memory for an integer-type array with + * nx rows and ny columns + *********************************************************************** */ +{ + int i,j; + int **buf; + + buf = (int **)malloc (nx*sizeof(int *)); + buf[0] = (int *) malloc (nx*ny*sizeof(int)); + for (j = 1; j < nx; j++) buf[j] = buf[j-1] + ny; + + return buf; +} + + +/* ********************************************************************* */ +void Show_2DdblArray(double **A, int nx, int ny, const char *string) +/* + *********************************************************************** */ +{ + int i, j; + + printf ("%s\n",string); + printf ("------------------------------\n"); + for (i = 0; i < nx; i++) { + for (j = 0; j < ny; j++) { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ +void Show_2DintArray(int **A, int nx, int ny, const char *string) +/* + *********************************************************************** */ +{ + int i, j; + + printf ("%s\n",string); + for (j = 0; j < ny; j++) printf ("-----"); + printf ("\n"); + + for (i = 0; i < nx; i++) { + for (j = 0; j < ny; j++) { + printf ("%03d ", A[i][j]); + } + printf ("\n"); + } + + for (j = 0; j < ny; j++) printf ("-----"); + printf ("\n"); +} diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf b/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf new file mode 100644 index 0000000000000000000000000000000000000000..befbe43337c70f92e19b5af9857fda500df097fc Binary files /dev/null and b/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf differ diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c new file mode 100644 index 0000000000000000000000000000000000000000..cf1cf14136b40806c3c3df95bac48f75ba21ffbc --- /dev/null +++ b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c @@ -0,0 +1,101 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/*! + \file + \brief Writing of a 1D buffer in parallel. + + Write a 1D buffer in parallel using 4 different versions. + For a contiguous data type, use: + + VERSION == 1 employs shared file pointer + VERSION == 2 employs individual file pointer with offset computed + by the MPI_File_seek() + VERSION == 3 defines a file view with offset depending on the process + rank + VERSION == 4 similar to VERSION == 3, but using a contiguous MPI + datatype + + A non-contiguous version is handled with VERSION == 5, which defines + a MPI vector datatype and a file view. + + \author A. Mignone (mignone@to.infn.it) + \date March 1, 2020 + */ +/* ///////////////////////////////////////////////////////////////////// */ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <mpi.h> + +#define NELEM 3 + +#define VERSION 5 +int main(int argc, char **argv) +{ + int i, rank, size; + double buf[NELEM]; + char fname[] = "arr1D.bin"; + MPI_File fh; + MPI_Offset disp; + + /* -------------------------------------------------------- + 0. Initialize the MPI execution environment + -------------------------------------------------------- */ + + MPI_Init (&argc, &argv); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + MPI_Comm_size (MPI_COMM_WORLD, &size); + + /* -------------------------------------------------------- + 1. Initialize array + -------------------------------------------------------- */ + + for (i = 0; i < NELEM; i++) buf[i] = rank; + + /* -------------------------------------------------------- + 2. Delete, re-open file and write + -------------------------------------------------------- */ + + MPI_File_delete(fname, MPI_INFO_NULL); + MPI_File_open( MPI_COMM_WORLD, fname, + MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_INFO_NULL, &fh ); + +#if VERSION == 1 // Contiguous data, shared file pointer + MPI_File_write_ordered(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE); +#elif VERSION == 2 // Contiguous data, individual file pointer + disp = rank*NELEM*sizeof(double); // In bytes + MPI_File_seek(fh, disp, MPI_SEEK_SET); + MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE); +#elif VERSION == 3 // Contiguous data, file view + disp = rank*NELEM*sizeof(double); + MPI_File_set_view(fh, disp, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL); + MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE); +#elif VERSION == 4 // Contiguous data, file view with MPI datatype + MPI_Datatype cntg_type; + + MPI_Type_contiguous(NELEM, MPI_DOUBLE, &cntg_type); + MPI_Type_commit(&cntg_type); + + disp = rank*NELEM*sizeof(double); + + MPI_File_set_view(fh, disp, MPI_BYTE, cntg_type, "native", MPI_INFO_NULL); + MPI_File_write(fh, buf, 1, cntg_type, MPI_STATUS_IGNORE); + MPI_Type_free(&cntg_type); +#elif VERSION == 5 // Non-contiguous data, file view, vector type + MPI_Datatype vec_type; + + for (i = 0; i < NELEM; i++) buf[i] = rank + 0.1*i; + + MPI_Type_vector(NELEM, 1, size, MPI_DOUBLE, &vec_type); + MPI_Type_commit(&vec_type); + + disp = rank*sizeof(double); + MPI_File_set_view(fh, disp, MPI_DOUBLE, vec_type, "native", MPI_INFO_NULL); + MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE); + MPI_Type_free(&vec_type); +#endif + + MPI_File_close(&fh); + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c new file mode 100644 index 0000000000000000000000000000000000000000..1aaf24504595d0ebf2e3d5e3212f03179c5955d1 --- /dev/null +++ b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c @@ -0,0 +1,105 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <mpi.h> +#include "tools.c" + +#define NDIM 2 +#define NX_GLOB 16 +#define NY_GLOB 8 + +#define VERSION 2 + +int main(int argc, char ** argv) +{ + int i, j, rank, size; + int nx, ny; + int nprocs[NDIM]; + int gsizes[NDIM]; + int lsizes[NDIM]; + int periods[NDIM] = {0,0}; + int coords[NDIM]; + int start[NDIM]; + double **A; + MPI_Datatype subarr_type; + MPI_Comm MPI_COMM_CART; + char fname[] = "arr2D.bin"; + + /* -------------------------------------------------------- + 0. Initialize the MPI execution environment + -------------------------------------------------------- */ + + MPI_Init (&argc, &argv); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + MPI_Comm_size (MPI_COMM_WORLD, &size); + + /* -------------------------------------------------------- + 1. Create a 2D domain decomposition + -------------------------------------------------------- */ + + /* -- 1a. Attempt to find a maximally cubic decomposition -- */ + + nprocs[0] = (int)sqrt(size); + nprocs[1] = size/nprocs[0]; + if (nprocs[0]*nprocs[1] != size){ + if (rank == 0) printf ("! Cannot decompose\n"); + MPI_Finalize(); + return 1; + } + + /* -- 1b. Create communicator -- */ + + MPI_Cart_create(MPI_COMM_WORLD, NDIM, nprocs, periods, 0, &MPI_COMM_CART); + MPI_Cart_get(MPI_COMM_CART, NDIM, nprocs, periods, coords); + + gsizes[0] = NX_GLOB; + gsizes[1] = NY_GLOB; + + lsizes[0] = nx = NX_GLOB/nprocs[0]; + lsizes[1] = ny = NY_GLOB/nprocs[1]; + + if (rank == 0){ + printf ("Domain decomposed in %d X %d procs\n",nprocs[0],nprocs[1]); + printf ("Local grid size = %d X %d\n",lsizes[0], lsizes[1]); + } + + /* -------------------------------------------------------- + 2. Allocate memory and fill 2D array on local domain + -------------------------------------------------------- */ + + A = Allocate_2DdblArray(ny,nx); + for (j = 0; j < ny; j++) { + for (i = 0; i < nx; i++) { + A[j][i] = rank; + }} + + /* -------------------------------------------------------- + 3. Create new datatypes + -------------------------------------------------------- */ + + start[0] = coords[0]*lsizes[0]; + start[1] = coords[1]*lsizes[1]; + + MPI_Type_create_subarray (NDIM, gsizes, lsizes, start, MPI_ORDER_FORTRAN, + MPI_DOUBLE, &subarr_type); + MPI_Type_commit (&subarr_type); + + /* -------------------------------------------------------- + 4. Open file for writing + -------------------------------------------------------- */ + + MPI_File_delete(fname, MPI_INFO_NULL); + + MPI_File fh; + MPI_Status status; + + MPI_File_open(MPI_COMM_CART, fname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); + MPI_File_set_view(fh, 0, MPI_DOUBLE, subarr_type, "native", MPI_INFO_NULL); + MPI_File_write_all(fh, A[0], nx*ny, MPI_DOUBLE, MPI_STATUS_IGNORE); + MPI_File_close(&fh); + + MPI_Type_free(&subarr_type); + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/cartesian b/jacobi/mpi/miscellaneous/cartesian new file mode 100644 index 0000000000000000000000000000000000000000..cef6cb6704f8709644945aeb816d610ad2814635 Binary files /dev/null and b/jacobi/mpi/miscellaneous/cartesian differ diff --git a/jacobi/mpi/miscellaneous/cartesian.c b/jacobi/mpi/miscellaneous/cartesian.c new file mode 100644 index 0000000000000000000000000000000000000000..8778f5828c609db41ef8aef95f8bcd484fdceb25 --- /dev/null +++ b/jacobi/mpi/miscellaneous/cartesian.c @@ -0,0 +1,71 @@ +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> + +#define SIZE 2 +#define X 0 +#define Y 1 + +int main(int argc, char **argv) +{ + int task, ntasks; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &ntasks); + MPI_Comm_rank(MPI_COMM_WORLD, &task); + + if (argc <= 1) + { + if (!task) + { + printf("\n\t Usage: <executable> <number processes along X> \n"); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int cartesian_grid_x = (int)strtol(argv[1], NULL, 10); + const int cartesian_grid_y = ((ntasks % cartesian_grid_x == 0) ? (ntasks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!task) + { + printf("\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n"); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y}; + const int periods[SIZE] = {0, 0}; + const int reorder = 0; + MPI_Comm comm2d; + MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d); + + int coords[SIZE]; + MPI_Cart_coords(comm2d, task, SIZE, coords); + + int nbrright, nbrleft; + MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright); + + int nbrtop, nbrbottom; + MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop); + + for (int rank=0 ; rank<ntasks; rank++) + { + MPI_Barrier(MPI_COMM_WORLD); + if (rank == task) + { + printf("\n\t Task: %d\n", task); + printf("\t\t coords[%d, %d]", coords[X], coords[Y]); + printf("\n\t\t nbrright: %d - nbrleft : %d", nbrright, nbrleft); + printf("\n\t\t nbrtop : %d - nbrbottom: %d \n\n", nbrtop, nbrbottom); + fflush(stdout); + } + } + + MPI_Comm_free(&comm2d); + MPI_Finalize(); + + return 0; +} diff --git a/jacobi/mpi/miscellaneous/cartesian.cpp b/jacobi/mpi/miscellaneous/cartesian.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7b5a7e505a9d63ade4f4e8e35f48cb895e8e1660 --- /dev/null +++ b/jacobi/mpi/miscellaneous/cartesian.cpp @@ -0,0 +1,71 @@ +#include <mpi.h> +#include <iostream> + +#define SIZE 2 +#define X 0 +#define Y 1 + +int main(int argc, char **argv) +{ + using namespace std; + + int task, ntasks; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &ntasks); + MPI_Comm_rank(MPI_COMM_WORLD, &task); + + if (argc <= 1) + { + if (!task) + { + cout << "\n\t Usage: <executable> <number processes along X> \n" << endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int cartesian_grid_x = (int)strtol(argv[1], NULL, 10); + const int cartesian_grid_y = ((ntasks % cartesian_grid_x == 0) ? (ntasks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!task) + { + cout << "\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n" << endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + static int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y}; + static int periods[SIZE] = {0, 0}; + static int reorder = 0; + MPI_Comm comm2d; + MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d); + + int coords[SIZE]; + MPI_Cart_coords(comm2d, task, SIZE, coords); + + int nbrright, nbrleft; + MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright); + + int nbrtop, nbrbottom; + MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop); + + for (int rank=0 ; rank<ntasks; rank++) + { + MPI_Barrier(MPI_COMM_WORLD); + if (rank == task) + { + cout << "\n\t Task: " << task << endl; + cout << "\t\t coords[" << coords[X] << "," << coords[Y] << "]" << endl; + cout << "\t\t nbrright: " << nbrright << " - nbrleft : " << nbrleft << endl; + cout << "\t\t nbrtop : " << nbrtop << " - nbrbottom: " << nbrbottom << endl; + cout << endl; + } + } + + MPI_Comm_free(&comm2d); + MPI_Finalize(); + + return 0; +} diff --git a/jacobi/mpi/miscellaneous/color_ring.c b/jacobi/mpi/miscellaneous/color_ring.c new file mode 100644 index 0000000000000000000000000000000000000000..70ff1b544ee674dd68d785f565de4958344e82fb --- /dev/null +++ b/jacobi/mpi/miscellaneous/color_ring.c @@ -0,0 +1,88 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/*! + \file + \brief Data communication in a ring. + + For each processor, define a unique string defining the process + color and perform n cyclic permutations by transferring the color + name to the process to the right. + Three versions are provided: + + i) using MPI_Send / Recv() (select VERSION == 0); + ii) using MPI_Sendrecv() (select VERSION == 1); + iii) using MPI_Isend / Irecv() (select VERSION == 2). + + \author A. Mignone (mignone@to.infn.it) + \date March 10, 2020 + */ +/* ///////////////////////////////////////////////////////////////////// */ + + +#include <mpi.h> +#include <stdio.h> + +#define STR_LENGTH 32 +#define VERSION 1 + +int main(int argc, char ** argv) +{ + int rank, size; + int dstL, dstR; + char recv_buf[STR_LENGTH]; + char send_buf[STR_LENGTH]; + MPI_Request req; + + /* -- 1. Initialize the MPI execution environment -- */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + /* -- 2. Define color -- */ + + if (rank == 0) sprintf (send_buf, "red"); + if (rank == 1) sprintf (send_buf, "blue"); + if (rank == 2) sprintf (send_buf, "green"); + if (rank == 3) sprintf (send_buf, "black"); + if (rank == 4) sprintf (send_buf, "orange"); + if (rank == 5) sprintf (send_buf, "yellow"); + + if (size > 6){ + if (rank == 0) printf ("! Cannot execute with more than six processes\n"); + MPI_Finalize(); + return 0; + } + + /* -- 3. Determine neighbour processors -- */ + + dstL = rank - 1; + dstR = rank + 1; + if (dstL < 0) dstL = size-1; + if (dstR >= size) dstR = 0; + + int n; + for (n = 0; n < size; n++){ +#if VERSION == 1 + MPI_Send(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0, MPI_COMM_WORLD); + MPI_Recv(recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); +#elif VERSION == 2 + MPI_Sendrecv(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0, + recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#elif VERSION == 3 + MPI_Isend(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0, MPI_COMM_WORLD, &req); + MPI_Irecv(recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD, &req); + MPI_Wait (&req, MPI_STATUS_IGNORE); +#endif + + /* -- Replace color -- */ + + sprintf (send_buf,"%s",recv_buf); + if (rank == 0){ + printf ("Proc #%d, I've changed my color is %s\n", rank, send_buf); + } + } + + MPI_Finalize(); + return 0; +} diff --git a/jacobi/mpi/miscellaneous/lecture_color_ring.pdf b/jacobi/mpi/miscellaneous/lecture_color_ring.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dc1f501c160a2157484c67d32cf6a185a826eb40 Binary files /dev/null and b/jacobi/mpi/miscellaneous/lecture_color_ring.pdf differ diff --git a/jacobi/mpi/miscellaneous/subarray b/jacobi/mpi/miscellaneous/subarray new file mode 100644 index 0000000000000000000000000000000000000000..5295b41860fe0dfe7b99d8b60a0c49a893e90303 Binary files /dev/null and b/jacobi/mpi/miscellaneous/subarray differ diff --git a/jacobi/mpi/miscellaneous/subarray.c b/jacobi/mpi/miscellaneous/subarray.c new file mode 100644 index 0000000000000000000000000000000000000000..254b3d43bb7c4cd9af9a675e29450f12f8eb185b --- /dev/null +++ b/jacobi/mpi/miscellaneous/subarray.c @@ -0,0 +1,114 @@ +#include <stdio.h> +#include <stdlib.h> +#include <mpi.h> +#include <assert.h> + +void printarr(int **data, int n, char *str); +int **allocarray(const int n); +void deallocarray(int **); + +int main(int argc, char **argv) +{ + /* array sizes */ + const int bigsize = 10; + const int subsize = 5; + + /* communications parameters */ + const int sender = 0; + const int receiver = 1; + const int ourtag = 2; + + int rank, size; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size < (receiver + 1)) + { + if (rank == 0) + fprintf(stderr,"\nt %s: Needs at least %d processors.\n", argv[0], receiver+1); + + MPI_Finalize(); + return 1; + } + + if (rank == sender) + { + int **bigarray = allocarray(bigsize); + for (int i=0 ; i<bigsize ; i++) + for (int j=0 ; j<bigsize ; j++) + bigarray[i][j] = (i * bigsize + j); + + printarr(bigarray, bigsize, "\n Sender: Big array \n"); + + MPI_Datatype mysubarray; + int starts[2] = {5,3}; + int subsizes[2] = {subsize,subsize}; + int bigsizes[2] = {bigsize, bigsize}; + MPI_Type_create_subarray(2, bigsizes, subsizes, starts, + MPI_ORDER_C, MPI_INT, &mysubarray); + MPI_Type_commit(&mysubarray); + + MPI_Send(&(bigarray[0][0]), 1, mysubarray, receiver, ourtag, MPI_COMM_WORLD); + MPI_Type_free(&mysubarray); + + deallocarray(bigarray); + } + else if (rank == receiver) + { + int **subarray = allocarray(subsize); + + for (int i=0; i<subsize; i++) + for (int j=0; j<subsize; j++) + subarray[i][j] = 0; + + MPI_Recv(&(subarray[0][0]), subsize * subsize, MPI_INT, sender, ourtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + printarr(subarray, subsize, " Receiver: Subarray -- after receive"); + + deallocarray(subarray); + } + + MPI_Finalize(); + return 0; +} + +void printarr(int **data, + int n, + char *str) +{ + printf("-- %s --\n", str); + for (int i=0; i<n; i++) + { + for (int j=0; j<n; j++) + { + printf("%3d ", data[i][j]); + } + printf("\n"); + } +} + +int **allocarray(const int n) +{ + int *data = malloc(n * n * sizeof(int)); + assert(data != NULL); + + int **arr = malloc(n * sizeof(int *)); + assert(arr != NULL); + + for (int i=0 ; i<n ; i++) + arr[i] = &(data[i * n]); + + return arr; +} + +void deallocarray(int **array) +{ + if (array[0]) + free(array[0]); + + if (array) + free(array); + + return; +}