diff --git a/.gitignore b/.gitignore index c18d596b8fd9d43974ccc340974174d7b0d22cbc..7b3ba824bf13a3947e7a6cd9f04a851362f0a4a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ # put here the files which are not going to be committed *.txt *~ +*.bin \ No newline at end of file diff --git a/jacobi/mpi/comp_comm/Makefile b/jacobi/mpi/comp_comm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..02695b48106ca217759865cd3708f7ffe738fb1c --- /dev/null +++ b/jacobi/mpi/comp_comm/Makefile @@ -0,0 +1,57 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make ---> compile the mpi application ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin diff --git a/jacobi/mpi/comp_comm/include/allvars.h b/jacobi/mpi/comp_comm/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c --- /dev/null +++ b/jacobi/mpi/comp_comm/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTER 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/comp_comm/include/tools.h b/jacobi/mpi/comp_comm/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154 --- /dev/null +++ b/jacobi/mpi/comp_comm/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/comp_comm/make.def b/jacobi/mpi/comp_comm/make.def new file mode 100644 index 0000000000000000000000000000000000000000..082a9474ae3c4a9da1a2f23d1e9be1298faedc1e --- /dev/null +++ b/jacobi/mpi/comp_comm/make.def @@ -0,0 +1,45 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -march=native +LIBS ?= -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG ?= jacobi_mpi_comp_comm_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/comp_comm/make_mpi_path b/jacobi/mpi/comp_comm/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..f2c3de9e0d1796034a518159f4c53a40e76919fc --- /dev/null +++ b/jacobi/mpi/comp_comm/make_mpi_path @@ -0,0 +1,9 @@ +# set the MPI install path + +MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3 + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c new file mode 100644 index 0000000000000000000000000000000000000000..b36017e892886068eaeb932ea1fbb79837230513 --- /dev/null +++ b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c @@ -0,0 +1,620 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +/* #define DEBUG */ + +typedef struct MyGrid +{ + int start[NDIM]; + int end[NDIM]; +} myDomain; + + +typedef struct Task_2D_Cartesian +{ + int rank; + int nranks; + int coords[NDIM]; + myDomain domain; + int nbrtop; + int nbrbottom; + int nbrleft; + int nbrright; +} Task; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + const int, + const int); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict, + const int, const int, const int, const int, MyData *const restrict); + +void Jacobi_Communication(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData * restrict delta, + const int grid_y, + Task *const restrict ThisTask, + MPI_Comm *const restrict comm); + +void get_domains(MyData **const buffer, + Task *const ThisTask, + const MPI_Comm comm); + +void WriteSolution(MyData **const phi, const int nx, const int ny); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 2) && (!rank)) + { + printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + const int NX_GLOB = (int) strtol(argv[1], NULL, 10); + const int NY_GLOB = NX_GLOB; + + /********************************** Cartesin topology *******************************************************/ + const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10); + const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!rank) + { + printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n"); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y}; + const int periods[NDIM] = {0 ,0}; /* not periodic */ + const int reorder = 0; /* setting the logical value reorder to TRUE allows MPI to */ + /* reorder the rank of the processes figuring out the */ + /* neighbors in the actual hardware for better performance */ + + /* make a new communicator to which topology information has been attached */ + MPI_Comm comm2d = MPI_COMM_NULL; + MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d); + if (comm2d == MPI_COMM_NULL) + { + printf("\n\t Process %d is not part of the new communicator \n", rank); + fflush(stdout); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + Task ThisTask; + + /* get the comm size */ + MPI_Comm_size(comm2d, &ThisTask.nranks); + + /* determine the process coords in cartesian topology given rank in group */ + MPI_Cart_coords(comm2d, rank, NDIM, ThisTask.coords); + + /* determines process rank in communicator given Cartesian location */ + MPI_Cart_rank(comm2d, ThisTask.coords, &ThisTask.rank); + + /* get bottom and top neighbors (X direction) */ + MPI_Cart_shift(comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop); + + /* get left and right neighbors (Y direction) */ + MPI_Cart_shift(comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright); + + /************************************************************************************************************/ + /* 2D MPI-cartesian decomposition: + the grids are replicated across the MPI processes. + This approach is not the most efficient in terms of memory usage, + because the arrays are replicated across MPI process instead of to be distributed */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem[NDIM] = {(NX_GLOB + 2) % dims[X], + (NY_GLOB + 2) % dims[Y]}; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X], + (NY_GLOB + 2 - rem[Y]) / dims[Y]}; + if ((chunk[X] < 1) || (chunk[Y] < 1)) + { + printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Comm_free(&comm2d); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the subgrid dimension along X and Y directions: */ + int incr[NDIM], offset[NDIM]; + for (int dim=0 ; dim<NDIM ; dim++) + { + if (ThisTask.coords[dim] < rem[dim]) + { + incr[dim] = chunk[dim] + 1; + offset[dim] = 0; + } + else + { + incr[dim] = chunk[dim]; + offset[dim] = rem[dim]; + } + } + + /* subdomain managed by the task */ + for (int dim=0 ; dim<NDIM ; dim++) + { + ThisTask.domain.start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]); + ThisTask.domain.end[dim] = (ThisTask.domain.start[dim] + incr[dim]) - 1; + + /* boundaries */ + ThisTask.domain.start[dim] = ((ThisTask.domain.start[dim] == 0) ? NGHOST : ThisTask.domain.start[dim]); + ThisTask.domain.end[dim] = ((ThisTask.domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.end[dim]); + } + +#if defined(DEBUG) + for (int task=0 ; task<ThisTask.nranks ; task++) + { + if (ThisTask.rank == task) + { + printf("\n\t rank: %d", task); + printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]); + printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", ThisTask.domain.start[X], ThisTask.domain.end[X]); + printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", ThisTask.domain.start[Y], ThisTask.domain.end[Y]); + printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom); + printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright); + fflush(stdout); + } + MPI_Barrier(comm2d); + } +#endif /* DEBUG */ + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int iend = ibeg + NX_GLOB - 1; + const int nx = iend - ibeg + 1; + const int nx_tot = nx + 2 * NGHOST; + + const int jbeg = NGHOST; + const int jend = jbeg + NY_GLOB - 1; + const int ny = jend - jbeg + 1; + const int ny_tot = ny + 2 * NGHOST; + + if (rank == MASTER) + { + printf("\n\t Grid indices:"); + printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot); + printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot); + } + + /* -------------------------------------------------------- + 2. Generate grid, allocate memory + Not optimized because the grids are (unnecessarily) + replicated across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + MyData *x = xg; /* Global and local grids are the same */ + MyData *y = yg; /* for serial version of the code */ + + /* grids memory allocation */ + MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot); + MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot); + + /* -------------------------------------------------------- + 3. Initialize solution array to 0 + -------------------------------------------------------- */ + + for (int j=jbeg ; j<=jend ; j++) + for (int i=ibeg ; i<=iend ; i++) + { + phi0[j][i] = 0.0; + phi[j][i] = 0.0; + } + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* -- 4a. Set boundary conditions first -- */ + BoundaryConditions(phi0, x, y, nx, ny); + BoundaryConditions(phi, x, y, nx, ny); + + MyData err = 1.0; + /* iterations */ + int k = 0; + while (1) + { + /* -- 4c. Jacobi's method and residual (interior points) -- */ + /* core algorithm */ + + err = 0.0; + Jacobi_Communication(phi, phi0, &err, delta, NY_GLOB, &ThisTask, &comm2d); + + if (!rank) + printf("\n\t Iteration = %d - err = %lg\n",k, err); + + /* increase the counter of loop iterations */ + k++; + + /* get the total error */ + MyData toterr; + /* combines values from all processes and distributes the result back to all processes */ + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d); + + /* check convergence */ + if (toterr <= TOL) + { + /* master task gathers all the domains */ + get_domains(phi, &ThisTask, comm2d); + + break; + } + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* master rank writes the solution */ + if (!rank) + { + WriteSolution(phi, nx, ny); + + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + if (yg) + free(yg); + + if (xg) + free(xg); + + MPI_Comm_free(&comm2d); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int iend = ibeg + nx - 1; + + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + int i,j; + + /* -- Left -- */ + i = ibeg - 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (1.0 - y[j]); + + /* -- Right -- */ + i = jend + 1; + for (int j=jbeg ; j<=jend ; j++) + phi[j][i] = (y[j] * y[j]); + + /* -- Bottom -- */ + j = jbeg - 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = (1.0 - x[i]); + + /* -- Top -- */ + j = jend + 1; + for (int i=ibeg ; i<=iend ; i++) + phi[j][i] = x[i]; + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + const MyData *restrict delta, + const int jbeg, + const int jend, + const int ibeg, + const int iend, + MyData *const restrict error) +{ + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +void Jacobi_Communication(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData * restrict delta, + const int grid_y, + Task *const restrict ThisTask, + MPI_Comm *const restrict comm) +{ + /* custom datatype */ + MPI_Datatype column; + MPI_Type_vector((ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column); + /* commits the datatype */ + MPI_Type_commit(&column); + + const int data_row_size = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1); + + /* First task: issue the communication */ + MPI_Request request[8]; + + MyData **const restrict buffer = Phi0; + + /* 4 nonblocking MPI_Irecv */ + MPI_Irecv(&buffer[ThisTask->domain.start[X] - 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, *comm, &request[0]); + MPI_Irecv(&buffer[ThisTask->domain.end[X] + 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, *comm, &request[1]); + MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] - 1], 1, column, ThisTask->nbrleft, 2, *comm, &request[2]); + MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] + 1], 1, column, ThisTask->nbrright, 3, *comm, &request[3]); + + /* 4 nonblocking MPI_Isend */ + MPI_Isend(&buffer[ThisTask->domain.end[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, *comm, &request[4]); + MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, *comm, &request[5]); + MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] ], 1, column, ThisTask->nbrright, 2, *comm, &request[6]); + MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], 1, column, ThisTask->nbrleft, 3, *comm, &request[7]); + + /**************************************** computation ****************************************/ + /* perform the computation with the local data, (i.e. ghost cells are not required) */ + /* so overlapping computation and communication */ + + const int jbeg = ThisTask->domain.start[X] + 1; + const int jend = ThisTask->domain.end[X] - 1; + const int ibeg = ThisTask->domain.start[Y] + 1; + const int iend = ThisTask->domain.end[Y] - 1; + + *error = 0.0; + JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); + + /**********************************************************************************************/ + + /* waits for any specified MPI Request to complete */ + for (int req=0 ; req<8 ; req++) + { + int idx; + MPI_Status status; + + MPI_Waitany(8, request, &idx, &status); + + switch(status.MPI_TAG) + { + /* communication with tag 0 completed */ + case 0: + /* data from nbrbottom have been received */ + JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jbeg - 1), (ibeg - 1) , (iend + 1), error); + break; + + /* communication with tag 1 completed */ + case 1: + /* data from nbrtop have been received */ + JacobiAlgorithm(Phi, Phi0, delta, (jend + 1), (jend + 1), (ibeg - 1) , (iend + 1), error); + break; + + /* communication with tag 2 completed */ + case 2: + /* data from nbrleft have been received */ + JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (ibeg - 1), (ibeg - 1), error); + break; + + /* communication with tag 3 completed */ + case 3: + /* data from nbrright have been received */ + JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (iend + 1), (iend + 1), error); + break; + } + } + + MPI_Type_free(&column); + + return; +} + +/* ********************************************************************* */ + +void get_domains(MyData **const buffer, + Task *const ThisTask, + const MPI_Comm comm) +{ + /* Master process gathers all the domains */ + + /***************************** get the domain boundaries from each process *************************************/ + myDomain *boundaries = NULL; + if (ThisTask->rank == MASTER) + { + boundaries = (myDomain *)malloc(ThisTask->nranks * sizeof(*boundaries)); + assert(boundaries != NULL); + } + + MPI_Gather(&ThisTask->domain, sizeof(myDomain), MPI_BYTE, + boundaries, sizeof(myDomain), MPI_BYTE, + MASTER, comm); + + /**************************************************************************************************/ + + /*************************************** get the domain from each process *************************/ + /* MPI_Gatherv + Gathers into specified locations from all processes in a group + + Synopsis + int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, int root, MPI_Comm comm) */ + + int *recvcounts = NULL; + int *displs = NULL; + MyData *recvbuf = NULL; + MyData *sendbuf = NULL; + + if (ThisTask->rank == MASTER) + { + recvcounts = (int *)malloc(ThisTask->nranks * sizeof(*recvcounts)); + assert(recvcounts != NULL); + + displs = (int *)malloc(ThisTask->nranks * sizeof(*displs)); + assert(displs != NULL); + + for (int task=0 ; task<ThisTask->nranks ; task++) + { + const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); + const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); + recvcounts[task] = (nrows * ncols); + } + + displs[0] = 0; + for (int task=1 ; task<ThisTask->nranks ; task++) + displs[task] = (recvcounts[task - 1] + displs[task - 1]); + + /* allocate the recvbuffer */ + recvbuf = (MyData *)malloc((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) * sizeof(*recvbuf)); + assert(recvbuf != NULL); + } /* MASTER */ + + /* every process sets up the sendbuf */ + const int nrows = (ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1); + const int ncols = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1); + sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf)); + assert(sendbuf != NULL); + /* store the actual grid owned by each process into sendbuf */ + for (int row=0 ; row<nrows ; row++) + memcpy(&sendbuf[row * ncols], &buffer[ThisTask->domain.start[X] + row][ThisTask->domain.start[Y]], (ncols * sizeof(MyData))); + + /* perform the MPI collective */ + MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype, + recvbuf, recvcounts, displs, MPI_MyDatatype, + MASTER, comm); + + /* store the recvbuf into the actual grid owned by the MASTER task */ + if (ThisTask->rank == MASTER) + { + for (int task=1 ; task<ThisTask->nranks ; task++) + { + const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); + const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); + for (int row=0 ; row<nrows ; row++) + memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]], + &recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData))); + } + } + + free(sendbuf); + if (ThisTask->rank == MASTER) + { + free(recvbuf); + free(displs); + free(recvcounts); + free(boundaries); + } + + return; +} + +/* ********************************************************************* */ +void WriteSolution(MyData **const phi, + const int nx, + const int ny) +/* +*********************************************************************** */ +{ + const int ibeg = NGHOST; + const int jbeg = NGHOST; + const int jend = jbeg + ny - 1; + + static int nfile = 0; /* File counter */ + + char fname[128]; + sprintf(fname,"jacobi_2D_mpi_comp_comm_%02d.bin", nfile); + + FILE *fp; + printf ("> Writing %s\n",fname); + fp = fopen(fname, "wb"); + + /* discard boundaies */ + for (int j=jbeg ; j<=jend ; j++) + { + fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp); + } + + nfile++; + fclose(fp); +} diff --git a/jacobi/mpi/comp_comm/src/tools.c b/jacobi/mpi/comp_comm/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce --- /dev/null +++ b/jacobi/mpi/comp_comm/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate memory for a double precision array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = malloc(nx * sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray(const MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +}