From 2fb621e00f30da096cb7f32ba3012d911b36426f Mon Sep 17 00:00:00 2001 From: David Goz <david.goz@inaf.it> Date: Thu, 20 Jun 2024 19:27:16 +0200 Subject: [PATCH] comp_comm debugging phase --- jacobi/mpi/comp_comm/Makefile | 3 + jacobi/mpi/comp_comm/include/allvars.h | 2 +- jacobi/mpi/comp_comm/include/tools.h | 2 +- jacobi/mpi/comp_comm/make.def | 2 +- .../comp_comm/src/jacobi_2D_mpi_comp_comm.c | 384 ++++++++++-------- jacobi/mpi/comp_comm/src/tools.c | 2 +- 6 files changed, 224 insertions(+), 171 deletions(-) diff --git a/jacobi/mpi/comp_comm/Makefile b/jacobi/mpi/comp_comm/Makefile index 02695b4..9a158f6 100644 --- a/jacobi/mpi/comp_comm/Makefile +++ b/jacobi/mpi/comp_comm/Makefile @@ -18,6 +18,7 @@ info: @echo ' ' @echo '-----------------------------------------------------------------------------------------' @echo '$$ make ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugging ' @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' @@ -28,6 +29,8 @@ info: mpi: $(PROG) +debug: $(PROG_DEBUG) + valgrind_memcheck: $(PROG_MEMCHECK) @echo 'oooOOO... valgrind_memcheck ...OOOooo' mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2 diff --git a/jacobi/mpi/comp_comm/include/allvars.h b/jacobi/mpi/comp_comm/include/allvars.h index 798e4e6..f6f93c1 100644 --- a/jacobi/mpi/comp_comm/include/allvars.h +++ b/jacobi/mpi/comp_comm/include/allvars.h @@ -8,7 +8,7 @@ #include <mpi.h> -#define MASTER 0 +#define MASTERTASK 0 #if defined(SINGLE_PRECISION) typedef float MyData; diff --git a/jacobi/mpi/comp_comm/include/tools.h b/jacobi/mpi/comp_comm/include/tools.h index 82859ec..0167c89 100644 --- a/jacobi/mpi/comp_comm/include/tools.h +++ b/jacobi/mpi/comp_comm/include/tools.h @@ -10,7 +10,7 @@ /* function prototypes */ MyData **Allocate_2DdblArray(const int nx, const int ny); -void Show_2DdblArray(const MyData **const A, +void Show_2DdblArray( MyData **const A, const int nx, const int ny, const char *const string); diff --git a/jacobi/mpi/comp_comm/make.def b/jacobi/mpi/comp_comm/make.def index 082a947..bacfcdd 100644 --- a/jacobi/mpi/comp_comm/make.def +++ b/jacobi/mpi/comp_comm/make.def @@ -21,7 +21,7 @@ $(PROG): $(DEPENDENCIES) @echo ' ' $(PROG_DEBUG): $(DEPENDENCIES) - $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) @echo ' ' @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' @echo ' ' diff --git a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c index 234d06a..bbd6ffb 100644 --- a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c +++ b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c @@ -14,12 +14,17 @@ #include <math.h> #include <string.h> -/* #define DEBUG */ +#define DEBUG + +static int NX_GLOB, NY_GLOB; +MyData **global_phi; typedef struct MyGrid { - int start[NDIM]; - int end[NDIM]; + int local_start[NDIM]; + int local_end[NDIM]; + int global_start[NDIM]; + int global_end[NDIM]; int dim[NDIM]; } myDomain; @@ -34,14 +39,14 @@ typedef struct Task_2D_Cartesian int nbrbottom; int nbrleft; int nbrright; + MPI_Comm comm2d; } Task; /* function prototypes */ void BoundaryConditions(MyData **const restrict, MyData *const restrict, MyData *const restrict, - const int, - const int); + Task *const restrict); void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict, const int, const int, const int, const int, MyData *const restrict); @@ -50,13 +55,10 @@ void Jacobi_Communication(MyData **const restrict Phi, MyData **const restrict Phi0, MyData *const restrict error, const MyData * restrict delta, - const int grid_y, - Task *const restrict ThisTask, - MPI_Comm *const restrict comm); + Task *const restrict ThisTask); void get_domains(MyData **const buffer, - Task *const ThisTask, - const MPI_Comm comm); + Task *const ThisTask); void WriteSolution(MyData **const phi, const int nx, const int ny); @@ -76,8 +78,8 @@ int main(int argc, char **argv) } /* global X and Y grid size (square matrix supposed) */ - const int NX_GLOB = (int) strtol(argv[1], NULL, 10); - const int NY_GLOB = NX_GLOB; + NX_GLOB = (int) strtol(argv[1], NULL, 10); + NY_GLOB = NX_GLOB; /********************************** Cartesin topology *******************************************************/ const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10); @@ -99,9 +101,10 @@ int main(int argc, char **argv) /* neighbors in the actual hardware for better performance */ /* make a new communicator to which topology information has been attached */ - MPI_Comm comm2d = MPI_COMM_NULL; - MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d); - if (comm2d == MPI_COMM_NULL) + Task ThisTask; + ThisTask.comm2d = MPI_COMM_NULL; + MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &ThisTask.comm2d); + if (ThisTask.comm2d == MPI_COMM_NULL) { printf("\n\t Process %d is not part of the new communicator \n", rank); fflush(stdout); @@ -109,22 +112,22 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - Task ThisTask; + /* get the comm size */ - MPI_Comm_size(comm2d, &ThisTask.nranks); + MPI_Comm_size(ThisTask.comm2d, &ThisTask.nranks); /* determine the process coords in cartesian topology given rank in group */ - MPI_Cart_coords(comm2d, rank, NDIM, ThisTask.coords); + MPI_Cart_coords(ThisTask.comm2d, rank, NDIM, ThisTask.coords); /* determines process rank in communicator given Cartesian location */ - MPI_Cart_rank(comm2d, ThisTask.coords, &ThisTask.rank); + MPI_Cart_rank(ThisTask.comm2d, ThisTask.coords, &ThisTask.rank); /* get bottom and top neighbors (X direction) */ - MPI_Cart_shift(comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop); + MPI_Cart_shift(ThisTask.comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop); /* get left and right neighbors (Y direction) */ - MPI_Cart_shift(comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright); + MPI_Cart_shift(ThisTask.comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright); /************************************************************************************************************/ /* 2D MPI-cartesian decomposition: @@ -144,7 +147,7 @@ int main(int argc, char **argv) if ((chunk[X] < 1) || (chunk[Y] < 1)) { printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank); - MPI_Comm_free(&comm2d); + MPI_Comm_free(&ThisTask.comm2d); MPI_Abort(comm, EXIT_FAILURE); exit(EXIT_FAILURE); } @@ -168,15 +171,20 @@ int main(int argc, char **argv) /* subdomain managed by the task */ for (int dim=0 ; dim<NDIM ; dim++) { - ThisTask.domain.start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]); - ThisTask.domain.end[dim] = (ThisTask.domain.start[dim] + incr[dim]) - 1; + ThisTask.domain.global_start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]); + ThisTask.domain.global_end[dim] = (ThisTask.domain.global_start[dim] + incr[dim]) - 1; /* boundaries */ - ThisTask.domain.start[dim] = ((ThisTask.domain.start[dim] == 0) ? NGHOST : ThisTask.domain.start[dim]); - ThisTask.domain.end[dim] = ((ThisTask.domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.end[dim]); + ThisTask.domain.global_start[dim] = ((ThisTask.domain.global_start[dim] == 0) ? NGHOST : ThisTask.domain.global_start[dim]); + ThisTask.domain.global_end[dim] = ((ThisTask.domain.global_end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.global_end[dim]); + + ThisTask.domain.dim[X] = (ThisTask.domain.global_end[X] - ThisTask.domain.global_start[X] + 1); + ThisTask.domain.dim[Y] = (ThisTask.domain.global_end[Y] - ThisTask.domain.global_start[Y] + 1); - ThisTask.domain.dim[X] = (ThisTask.domain.end[X] - ThisTask.domain.start[X]); - ThisTask.domain.dim[Y] = (ThisTask.domain.end[Y] - ThisTask.domain.start[Y]); + /* local index */ + ThisTask.domain.local_start[X] = ThisTask.domain.local_start[Y] = 1; + ThisTask.domain.local_end[X] = ThisTask.domain.dim[X]; + ThisTask.domain.local_end[Y] = ThisTask.domain.dim[Y]; } #if defined(DEBUG) @@ -186,13 +194,16 @@ int main(int argc, char **argv) { printf("\n\t rank: %d", task); printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]); - printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", ThisTask.domain.start[X], ThisTask.domain.end[X]); - printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", ThisTask.domain.start[Y], ThisTask.domain.end[Y]); + printf("\n\t\t domain.global_start[X] = %d - domain.global_end[X] = %d", ThisTask.domain.global_start[X], ThisTask.domain.global_end[X]); + printf("\n\t\t domain.global_start[Y] = %d - domain.global_end[Y] = %d", ThisTask.domain.global_start[Y], ThisTask.domain.global_end[Y]); + printf("\n\t\t domain.local_start[X] = %d - domain.local_end[X] = %d", ThisTask.domain.local_start[X], ThisTask.domain.local_end[X]); + printf("\n\t\t domain.local_start[Y] = %d - domain.local_end[Y] = %d", ThisTask.domain.local_start[Y], ThisTask.domain.local_end[Y]); + printf("\n\t\t domain.dim[X] = %d - domain.dim[Y] = %d", ThisTask.domain.dim[X], ThisTask.domain.dim[Y]); printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom); printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright); fflush(stdout); } - MPI_Barrier(comm2d); + MPI_Barrier(ThisTask.comm2d); } #endif /* DEBUG */ @@ -242,22 +253,16 @@ int main(int argc, char **argv) for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; /* grids memory allocation distributed across MPI processes */ - MyData **phi = Allocate_2DdblArray(ThisTask.domain.dim[Y] + 2, ThisTask.domain.dim[X] + 2); - MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[Y] + 2, ThisTask.domain.dim[X] + 2); + MyData **phi = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2); + MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2); /* -------------------------------------------------------- - 3. Set boundary conditions + 3. Set boundary conditions -------------------------------------------------------- */ - - BoundaryConditions(phi0, xg, yg, nx, ny); - BoundaryConditions(phi, xg, yg, nx, ny); - - - - - - - + BoundaryConditions(phi0, xg, yg, &ThisTask); + BoundaryConditions(phi, xg, yg, &ThisTask); + free(yg); + free(xg); /* -------------------------------------------------------- 4. Main iteration cycle @@ -265,35 +270,30 @@ int main(int argc, char **argv) const double time_start = MPI_Wtime(); - /* -- 4a. Set boundary conditions first -- */ - - MyData err = 1.0; /* iterations */ int k = 0; while (1) { - /* -- 4c. Jacobi's method and residual (interior points) -- */ - /* core algorithm */ - - err = 0.0; - Jacobi_Communication(phi, phi0, &err, delta, NY_GLOB, &ThisTask, &comm2d); + /* -- 4a. Jacobi algorithm overlapping computation and communication */ + MyData err; + Jacobi_Communication(phi, phi0, &err, delta, &ThisTask); - if (!rank) - printf("\n\t Iteration = %d - err = %lg\n",k, err); + /* -- 4b. Get the total error */ + /* combines values from all processes and distributes the result back to all processes */ + MyData toterr; + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, ThisTask.comm2d); + + if (ThisTask.rank == MASTERTASK) + printf("\n\t Iteration = %d - err = %lg\n",k, toterr); /* increase the counter of loop iterations */ k++; - - /* get the total error */ - MyData toterr; - /* combines values from all processes and distributes the result back to all processes */ - MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d); /* check convergence */ if (toterr <= TOL) { /* master task gathers all the domains */ - get_domains(phi, &ThisTask, comm2d); + get_domains(phi, &ThisTask); break; } @@ -305,9 +305,9 @@ int main(int argc, char **argv) } /* master rank writes the solution */ - if (!rank) + if (ThisTask.rank == MASTERTASK) { - WriteSolution(phi, nx, ny); + WriteSolution(global_phi, NX_GLOB, NY_GLOB); printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); @@ -326,13 +326,11 @@ int main(int argc, char **argv) free(phi); } - if (yg) - free(yg); + if (ThisTask.rank == MASTERTASK) + if (global_phi) + free(global_phi); - if (xg) - free(xg); - - MPI_Comm_free(&comm2d); + MPI_Comm_free(&ThisTask.comm2d); MPI_Finalize(); @@ -343,71 +341,74 @@ int main(int argc, char **argv) void BoundaryConditions(MyData **const restrict phi, MyData *const restrict x, MyData *const restrict y, - const int nx, - const int ny, Task *const restrict ThisTask) -/**********************************************************************************/ +/*************************************************************************/ { /* left */ if (ThisTask->nbrleft == MPI_PROC_NULL) /* no left neighbor */ { - for (int j=ThisTask->domain.start[X] ; j<ThisTask->domain.end[X] ; j++) - phi[j][0] = (1.0 - y[j]); + const int global_start_j = ThisTask->domain.global_start[X]; + for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++) + phi[j][0] = (1.0 - y[global_start_j + j - 1]); } /* right */ if (ThisTask->nbrright == MPI_PROC_NULL) /* no right neighbor */ { - for (int j=ThisTask->domain.start[X] ; j<ThisTask->domain.end[X] ; j++) - phi[j][ThisTask->domain.end[Y] + 1] = (y[j] * y[j]); + const int global_start_j = ThisTask->domain.global_start[X]; + for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++) + phi[j][ThisTask->domain.local_end[Y] + 1] = (y[global_start_j + j - 1] * y[global_start_j + j - 1]); } - + /* bottom */ if (ThisTask->nbrbottom == MPI_PROC_NULL) /* no bottom neighbor */ { - for (int i=ThisTask->domain.start[Y] ; i<ThisTask->domain.end[Y] ; i++) - phi[0][i] = (1.0 - x[i]); + const int global_start_i = ThisTask->domain.global_start[Y]; + for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++) + phi[0][i] = (1.0 - x[global_start_i + i - 1]); } /* top */ - if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neghbor */ + if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neighbor */ { - for (int i=ThisTask->domain.start[Y] ; i<ThisTask->domain.end[Y] ; i++) - phi[ThisTask->domain.end[X] + 1][i] = x[i]; + const int global_start_i = ThisTask->domain.global_start[Y]; + for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++) + phi[ThisTask->domain.local_end[X] + 1][i] = x[global_start_i + i - 1]; } +#if defined(DEBUG) - - return; + if (ThisTask->rank == MASTERTASK) + { + printf("\n"); + for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++) + printf("\n\t x[%d] = %8.2f", i, x[i]); - const int ibeg = NGHOST; - const int iend = ibeg + nx - 1; - - const int jbeg = NGHOST; - const int jend = jbeg + ny - 1; + printf("\n"); + for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++) + printf("\n\t y[%d] = %8.2f", i, y[i]); - int i,j; - - /* -- Left -- */ - i = ibeg - 1; - for (int j=jbeg ; j<=jend ; j++) - phi[j][i] = (1.0 - y[j]); - - /* -- Right -- */ - i = jend + 1; - for (int j=jbeg ; j<=jend ; j++) - phi[j][i] = (y[j] * y[j]); - - /* -- Bottom -- */ - j = jbeg - 1; - for (int i=ibeg ; i<=iend ; i++) - phi[j][i] = (1.0 - x[i]); - - /* -- Top -- */ - j = jend + 1; - for (int i=ibeg ; i<=iend ; i++) - phi[j][i] = x[i]; + printf("\n"); + fflush(stdout); + } + + MPI_Barrier(ThisTask->comm2d); + for (int task=0 ; task<ThisTask->nranks ; task++) + { + if (task == ThisTask->rank) + { + char string[128]; + sprintf(string, "Task %d - Phi", task); + + Show_2DdblArray(phi, ThisTask->domain.dim[X] + 2, ThisTask->domain.dim[Y] + 2, string); + } + + MPI_Barrier(ThisTask->comm2d); + } + +#endif /* DEBUG */ + return; } @@ -436,22 +437,19 @@ void JacobiAlgorithm(MyData **const restrict Phi, return; } - void Jacobi_Communication(MyData **const restrict Phi, MyData **const restrict Phi0, MyData *const restrict error, const MyData * restrict delta, - const int grid_y, - Task *const restrict ThisTask, - MPI_Comm *const restrict comm) + Task *const restrict ThisTask) { /* custom datatype */ MPI_Datatype column; - MPI_Type_vector((ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column); + MPI_Type_vector(ThisTask->domain.dim[X], 1, ThisTask->domain.dim[Y] + 2, MPI_MyDatatype, &column); /* commits the datatype */ MPI_Type_commit(&column); - const int data_row_size = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1); + const int data_row_size = ThisTask->domain.dim[Y]; /* First task: issue the communication */ MPI_Request request[8]; @@ -459,30 +457,67 @@ void Jacobi_Communication(MyData **const restrict Phi, MyData **const restrict buffer = Phi0; /* 4 nonblocking MPI_Irecv */ - MPI_Irecv(&buffer[ThisTask->domain.start[X] - 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, *comm, &request[0]); - MPI_Irecv(&buffer[ThisTask->domain.end[X] + 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, *comm, &request[1]); - MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] - 1], 1, column, ThisTask->nbrleft, 2, *comm, &request[2]); - MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] + 1], 1, column, ThisTask->nbrright, 3, *comm, &request[3]); + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, ThisTask->comm2d, &request[0]); + MPI_Irecv(&buffer[ThisTask->domain.local_end[X] + 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, ThisTask->comm2d, &request[1]); + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] - 1], 1, column, ThisTask->nbrleft, 2, ThisTask->comm2d, &request[2]); + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] + 1], 1, column, ThisTask->nbrright, 3, ThisTask->comm2d, &request[3]); /* 4 nonblocking MPI_Isend */ - MPI_Isend(&buffer[ThisTask->domain.end[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, *comm, &request[4]); - MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, *comm, &request[5]); - MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] ], 1, column, ThisTask->nbrright, 2, *comm, &request[6]); - MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], 1, column, ThisTask->nbrleft, 3, *comm, &request[7]); + MPI_Isend(&buffer[ThisTask->domain.local_end[X] ][ThisTask->domain.local_start[Y]], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, ThisTask->comm2d, &request[4]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_start[Y]], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, ThisTask->comm2d, &request[5]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_end[Y] ], 1, column, ThisTask->nbrright, 2, ThisTask->comm2d, &request[6]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_start[Y]], 1, column, ThisTask->nbrleft, 3, ThisTask->comm2d, &request[7]); /**************************************** computation ****************************************/ /* perform the computation with the local data, (i.e. ghost cells are not required) */ /* so overlapping computation and communication */ - const int jbeg = ThisTask->domain.start[X] + 1; - const int jend = ThisTask->domain.end[X] - 1; - const int ibeg = ThisTask->domain.start[Y] + 1; - const int iend = ThisTask->domain.end[Y] - 1; + const int jbeg = ThisTask->domain.local_start[X] + 1; + const int jend = ThisTask->domain.local_end[X] - 1; + const int ibeg = ThisTask->domain.local_start[Y] + 1; + const int iend = ThisTask->domain.local_end[Y] - 1; + +#if defined(DEBUG) + + MPI_Waitall(8, request, MPI_STATUSES_IGNORE); *error = 0.0; - JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); + /* JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X]+1, ThisTask->domain.local_end[X]-1, + ThisTask->domain.local_start[Y] , ThisTask->domain.local_end[Y], + error); + + /* /\* data from nbrbottom have been received *\/ */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_start[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); + + /* /\* data from nbrtop have been received *\/ */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_end[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); + + /* /\* data from nbrleft have been received *\/ */ + /* JacobiAlgorithm(Phi, Phi0, delta, */ + /* ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], */ + /* ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y], */ + /* error); */ + + /* /\* data from nbrright have been received *\/ */ + /* JacobiAlgorithm(Phi, Phi0, delta, */ + /* ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], */ + /* ThisTask->domain.local_end[Y], ThisTask->domain.local_end[Y], */ + /* error); */ + +#else /**********************************************************************************************/ + + *error = 0.0; + JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); /* waits for any specified MPI Request to complete */ for (int req=0 ; req<8 ; req++) @@ -497,28 +532,42 @@ void Jacobi_Communication(MyData **const restrict Phi, /* communication with tag 0 completed */ case 0: /* data from nbrbottom have been received */ - JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jbeg - 1), (ibeg - 1) , (iend + 1), error); + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_start[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); break; /* communication with tag 1 completed */ case 1: /* data from nbrtop have been received */ - JacobiAlgorithm(Phi, Phi0, delta, (jend + 1), (jend + 1), (ibeg - 1) , (iend + 1), error); + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_end[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); break; /* communication with tag 2 completed */ case 2: /* data from nbrleft have been received */ - JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (ibeg - 1), (ibeg - 1), error); + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y], + error); break; /* communication with tag 3 completed */ case 3: /* data from nbrright have been received */ - JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (iend + 1), (iend + 1), error); + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_end[Y], ThisTask->domain.local_end[Y], + error); break; } } + +#endif /* DEBUG */ MPI_Type_free(&column); @@ -528,14 +577,13 @@ void Jacobi_Communication(MyData **const restrict Phi, /* ********************************************************************* */ void get_domains(MyData **const buffer, - Task *const ThisTask, - const MPI_Comm comm) + Task *const ThisTask) { /* Master process gathers all the domains */ /***************************** get the domain boundaries from each process *************************************/ myDomain *boundaries = NULL; - if (ThisTask->rank == MASTER) + if (ThisTask->rank == MASTERTASK) { boundaries = (myDomain *)malloc(ThisTask->nranks * sizeof(*boundaries)); assert(boundaries != NULL); @@ -543,7 +591,7 @@ void get_domains(MyData **const buffer, MPI_Gather(&ThisTask->domain, sizeof(myDomain), MPI_BYTE, boundaries, sizeof(myDomain), MPI_BYTE, - MASTER, comm); + MASTERTASK, ThisTask->comm2d); /**************************************************************************************************/ @@ -561,7 +609,7 @@ void get_domains(MyData **const buffer, MyData *recvbuf = NULL; MyData *sendbuf = NULL; - if (ThisTask->rank == MASTER) + if (ThisTask->rank == MASTERTASK) { recvcounts = (int *)malloc(ThisTask->nranks * sizeof(*recvcounts)); assert(recvcounts != NULL); @@ -570,56 +618,58 @@ void get_domains(MyData **const buffer, assert(displs != NULL); for (int task=0 ; task<ThisTask->nranks ; task++) - { - const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); - const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); - recvcounts[task] = (nrows * ncols); - } + recvcounts[task] = (boundaries[task].dim[X] * boundaries[task].dim[Y]); displs[0] = 0; for (int task=1 ; task<ThisTask->nranks ; task++) displs[task] = (recvcounts[task - 1] + displs[task - 1]); + /* check if the size is the entire domain */ + assert((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) == (NX_GLOB * NY_GLOB)); + /* allocate the recvbuffer */ - recvbuf = (MyData *)malloc((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) * sizeof(*recvbuf)); + recvbuf = (MyData *)malloc(NX_GLOB * NY_GLOB * sizeof(*recvbuf)); assert(recvbuf != NULL); - } /* MASTER */ + } /* MASTERTASK */ /* every process sets up the sendbuf */ - const int nrows = (ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1); - const int ncols = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1); - sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf)); + sendbuf = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*sendbuf)); assert(sendbuf != NULL); /* store the actual grid owned by each process into sendbuf */ - for (int row=0 ; row<nrows ; row++) - memcpy(&sendbuf[row * ncols], &buffer[ThisTask->domain.start[X] + row][ThisTask->domain.start[Y]], (ncols * sizeof(MyData))); + for (int row=0 ; row<ThisTask->domain.dim[X] ; row++) + memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], (ThisTask->domain.dim[Y] * sizeof(MyData))); /* perform the MPI collective */ - MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype, + MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype, recvbuf, recvcounts, displs, MPI_MyDatatype, - MASTER, comm); + MASTERTASK, ThisTask->comm2d); - /* store the recvbuf into the actual grid owned by the MASTER task */ - if (ThisTask->rank == MASTER) + free(sendbuf); + if (ThisTask->rank == MASTERTASK) { - for (int task=1 ; task<ThisTask->nranks ; task++) - { - const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); - const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); - for (int row=0 ; row<nrows ; row++) - memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]], - &recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData))); - } + free(recvcounts); } - free(sendbuf); - if (ThisTask->rank == MASTER) + /* store the recvbuf into the global_grid allocated by the MASTER task */ + if (ThisTask->rank == MASTERTASK) { - free(recvbuf); + global_phi = Allocate_2DdblArray(NX_GLOB + 2, NY_GLOB + 2); + assert(global_phi != NULL); + + for (int task=0 ; task<ThisTask->nranks ; task++) + { + for (int row=0 ; row<boundaries[task].dim[X] ; row++) + { + memcpy(&global_phi[boundaries[task].global_start[X] + row][boundaries[task].global_start[Y]], + &recvbuf[displs[task] + (row * boundaries[task].dim[Y])], + (boundaries[task].dim[Y] * sizeof(MyData))); + } + } + free(displs); - free(recvcounts); + free(recvbuf); free(boundaries); - } + } /* MASTERTASK */ return; } @@ -631,7 +681,7 @@ void WriteSolution(MyData **const phi, /* *********************************************************************** */ { - const int ibeg = NGHOST; + const int ibeg = NGHOST; const int jbeg = NGHOST; const int jend = jbeg + ny - 1; diff --git a/jacobi/mpi/comp_comm/src/tools.c b/jacobi/mpi/comp_comm/src/tools.c index 547d9a9..0e282ee 100644 --- a/jacobi/mpi/comp_comm/src/tools.c +++ b/jacobi/mpi/comp_comm/src/tools.c @@ -29,7 +29,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny) } /* ********************************************************************* */ -void Show_2DdblArray(const MyData **const A, +void Show_2DdblArray( MyData **const A, const int nx, const int ny, const char *const string) -- GitLab