Skip to content
Snippets Groups Projects
Commit 2fb621e0 authored by David Goz's avatar David Goz :sleeping:
Browse files

comp_comm debugging phase

parent c53a7c34
No related branches found
No related tags found
No related merge requests found
...@@ -18,6 +18,7 @@ info: ...@@ -18,6 +18,7 @@ info:
@echo ' ' @echo ' '
@echo '-----------------------------------------------------------------------------------------' @echo '-----------------------------------------------------------------------------------------'
@echo '$$ make ---> compile the mpi application ' @echo '$$ make ---> compile the mpi application '
@echo '$$ make debug ---> compile the mpi application for debugging '
@echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck '
@echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind '
@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
...@@ -28,6 +29,8 @@ info: ...@@ -28,6 +29,8 @@ info:
mpi: $(PROG) mpi: $(PROG)
debug: $(PROG_DEBUG)
valgrind_memcheck: $(PROG_MEMCHECK) valgrind_memcheck: $(PROG_MEMCHECK)
@echo 'oooOOO... valgrind_memcheck ...OOOooo' @echo 'oooOOO... valgrind_memcheck ...OOOooo'
mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2 mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <mpi.h> #include <mpi.h>
#define MASTER 0 #define MASTERTASK 0
#if defined(SINGLE_PRECISION) #if defined(SINGLE_PRECISION)
typedef float MyData; typedef float MyData;
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
/* function prototypes */ /* function prototypes */
MyData **Allocate_2DdblArray(const int nx, const int ny); MyData **Allocate_2DdblArray(const int nx, const int ny);
void Show_2DdblArray(const MyData **const A, void Show_2DdblArray( MyData **const A,
const int nx, const int nx,
const int ny, const int ny,
const char *const string); const char *const string);
......
...@@ -21,7 +21,7 @@ $(PROG): $(DEPENDENCIES) ...@@ -21,7 +21,7 @@ $(PROG): $(DEPENDENCIES)
@echo ' ' @echo ' '
$(PROG_DEBUG): $(DEPENDENCIES) $(PROG_DEBUG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) $(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' ' @echo ' '
@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' ' @echo ' '
......
...@@ -14,12 +14,17 @@ ...@@ -14,12 +14,17 @@
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
/* #define DEBUG */ #define DEBUG
static int NX_GLOB, NY_GLOB;
MyData **global_phi;
typedef struct MyGrid typedef struct MyGrid
{ {
int start[NDIM]; int local_start[NDIM];
int end[NDIM]; int local_end[NDIM];
int global_start[NDIM];
int global_end[NDIM];
int dim[NDIM]; int dim[NDIM];
} myDomain; } myDomain;
...@@ -34,14 +39,14 @@ typedef struct Task_2D_Cartesian ...@@ -34,14 +39,14 @@ typedef struct Task_2D_Cartesian
int nbrbottom; int nbrbottom;
int nbrleft; int nbrleft;
int nbrright; int nbrright;
MPI_Comm comm2d;
} Task; } Task;
/* function prototypes */ /* function prototypes */
void BoundaryConditions(MyData **const restrict, void BoundaryConditions(MyData **const restrict,
MyData *const restrict, MyData *const restrict,
MyData *const restrict, MyData *const restrict,
const int, Task *const restrict);
const int);
void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict, void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict,
const int, const int, const int, const int, MyData *const restrict); const int, const int, const int, const int, MyData *const restrict);
...@@ -50,13 +55,10 @@ void Jacobi_Communication(MyData **const restrict Phi, ...@@ -50,13 +55,10 @@ void Jacobi_Communication(MyData **const restrict Phi,
MyData **const restrict Phi0, MyData **const restrict Phi0,
MyData *const restrict error, MyData *const restrict error,
const MyData * restrict delta, const MyData * restrict delta,
const int grid_y, Task *const restrict ThisTask);
Task *const restrict ThisTask,
MPI_Comm *const restrict comm);
void get_domains(MyData **const buffer, void get_domains(MyData **const buffer,
Task *const ThisTask, Task *const ThisTask);
const MPI_Comm comm);
void WriteSolution(MyData **const phi, const int nx, const int ny); void WriteSolution(MyData **const phi, const int nx, const int ny);
...@@ -76,8 +78,8 @@ int main(int argc, char **argv) ...@@ -76,8 +78,8 @@ int main(int argc, char **argv)
} }
/* global X and Y grid size (square matrix supposed) */ /* global X and Y grid size (square matrix supposed) */
const int NX_GLOB = (int) strtol(argv[1], NULL, 10); NX_GLOB = (int) strtol(argv[1], NULL, 10);
const int NY_GLOB = NX_GLOB; NY_GLOB = NX_GLOB;
/********************************** Cartesin topology *******************************************************/ /********************************** Cartesin topology *******************************************************/
const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10); const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10);
...@@ -99,9 +101,10 @@ int main(int argc, char **argv) ...@@ -99,9 +101,10 @@ int main(int argc, char **argv)
/* neighbors in the actual hardware for better performance */ /* neighbors in the actual hardware for better performance */
/* make a new communicator to which topology information has been attached */ /* make a new communicator to which topology information has been attached */
MPI_Comm comm2d = MPI_COMM_NULL; Task ThisTask;
MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d); ThisTask.comm2d = MPI_COMM_NULL;
if (comm2d == MPI_COMM_NULL) MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &ThisTask.comm2d);
if (ThisTask.comm2d == MPI_COMM_NULL)
{ {
printf("\n\t Process %d is not part of the new communicator \n", rank); printf("\n\t Process %d is not part of the new communicator \n", rank);
fflush(stdout); fflush(stdout);
...@@ -109,22 +112,22 @@ int main(int argc, char **argv) ...@@ -109,22 +112,22 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
Task ThisTask;
/* get the comm size */ /* get the comm size */
MPI_Comm_size(comm2d, &ThisTask.nranks); MPI_Comm_size(ThisTask.comm2d, &ThisTask.nranks);
/* determine the process coords in cartesian topology given rank in group */ /* determine the process coords in cartesian topology given rank in group */
MPI_Cart_coords(comm2d, rank, NDIM, ThisTask.coords); MPI_Cart_coords(ThisTask.comm2d, rank, NDIM, ThisTask.coords);
/* determines process rank in communicator given Cartesian location */ /* determines process rank in communicator given Cartesian location */
MPI_Cart_rank(comm2d, ThisTask.coords, &ThisTask.rank); MPI_Cart_rank(ThisTask.comm2d, ThisTask.coords, &ThisTask.rank);
/* get bottom and top neighbors (X direction) */ /* get bottom and top neighbors (X direction) */
MPI_Cart_shift(comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop); MPI_Cart_shift(ThisTask.comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop);
/* get left and right neighbors (Y direction) */ /* get left and right neighbors (Y direction) */
MPI_Cart_shift(comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright); MPI_Cart_shift(ThisTask.comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright);
/************************************************************************************************************/ /************************************************************************************************************/
/* 2D MPI-cartesian decomposition: /* 2D MPI-cartesian decomposition:
...@@ -144,7 +147,7 @@ int main(int argc, char **argv) ...@@ -144,7 +147,7 @@ int main(int argc, char **argv)
if ((chunk[X] < 1) || (chunk[Y] < 1)) if ((chunk[X] < 1) || (chunk[Y] < 1))
{ {
printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank); printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank);
MPI_Comm_free(&comm2d); MPI_Comm_free(&ThisTask.comm2d);
MPI_Abort(comm, EXIT_FAILURE); MPI_Abort(comm, EXIT_FAILURE);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
...@@ -168,15 +171,20 @@ int main(int argc, char **argv) ...@@ -168,15 +171,20 @@ int main(int argc, char **argv)
/* subdomain managed by the task */ /* subdomain managed by the task */
for (int dim=0 ; dim<NDIM ; dim++) for (int dim=0 ; dim<NDIM ; dim++)
{ {
ThisTask.domain.start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]); ThisTask.domain.global_start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]);
ThisTask.domain.end[dim] = (ThisTask.domain.start[dim] + incr[dim]) - 1; ThisTask.domain.global_end[dim] = (ThisTask.domain.global_start[dim] + incr[dim]) - 1;
/* boundaries */ /* boundaries */
ThisTask.domain.start[dim] = ((ThisTask.domain.start[dim] == 0) ? NGHOST : ThisTask.domain.start[dim]); ThisTask.domain.global_start[dim] = ((ThisTask.domain.global_start[dim] == 0) ? NGHOST : ThisTask.domain.global_start[dim]);
ThisTask.domain.end[dim] = ((ThisTask.domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.end[dim]); ThisTask.domain.global_end[dim] = ((ThisTask.domain.global_end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.global_end[dim]);
ThisTask.domain.dim[X] = (ThisTask.domain.global_end[X] - ThisTask.domain.global_start[X] + 1);
ThisTask.domain.dim[Y] = (ThisTask.domain.global_end[Y] - ThisTask.domain.global_start[Y] + 1);
ThisTask.domain.dim[X] = (ThisTask.domain.end[X] - ThisTask.domain.start[X]); /* local index */
ThisTask.domain.dim[Y] = (ThisTask.domain.end[Y] - ThisTask.domain.start[Y]); ThisTask.domain.local_start[X] = ThisTask.domain.local_start[Y] = 1;
ThisTask.domain.local_end[X] = ThisTask.domain.dim[X];
ThisTask.domain.local_end[Y] = ThisTask.domain.dim[Y];
} }
#if defined(DEBUG) #if defined(DEBUG)
...@@ -186,13 +194,16 @@ int main(int argc, char **argv) ...@@ -186,13 +194,16 @@ int main(int argc, char **argv)
{ {
printf("\n\t rank: %d", task); printf("\n\t rank: %d", task);
printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]); printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]);
printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", ThisTask.domain.start[X], ThisTask.domain.end[X]); printf("\n\t\t domain.global_start[X] = %d - domain.global_end[X] = %d", ThisTask.domain.global_start[X], ThisTask.domain.global_end[X]);
printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", ThisTask.domain.start[Y], ThisTask.domain.end[Y]); printf("\n\t\t domain.global_start[Y] = %d - domain.global_end[Y] = %d", ThisTask.domain.global_start[Y], ThisTask.domain.global_end[Y]);
printf("\n\t\t domain.local_start[X] = %d - domain.local_end[X] = %d", ThisTask.domain.local_start[X], ThisTask.domain.local_end[X]);
printf("\n\t\t domain.local_start[Y] = %d - domain.local_end[Y] = %d", ThisTask.domain.local_start[Y], ThisTask.domain.local_end[Y]);
printf("\n\t\t domain.dim[X] = %d - domain.dim[Y] = %d", ThisTask.domain.dim[X], ThisTask.domain.dim[Y]);
printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom); printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom);
printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright); printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright);
fflush(stdout); fflush(stdout);
} }
MPI_Barrier(comm2d); MPI_Barrier(ThisTask.comm2d);
} }
#endif /* DEBUG */ #endif /* DEBUG */
...@@ -242,22 +253,16 @@ int main(int argc, char **argv) ...@@ -242,22 +253,16 @@ int main(int argc, char **argv)
for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
/* grids memory allocation distributed across MPI processes */ /* grids memory allocation distributed across MPI processes */
MyData **phi = Allocate_2DdblArray(ThisTask.domain.dim[Y] + 2, ThisTask.domain.dim[X] + 2); MyData **phi = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2);
MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[Y] + 2, ThisTask.domain.dim[X] + 2); MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2);
/* -------------------------------------------------------- /* --------------------------------------------------------
3. Set boundary conditions 3. Set boundary conditions
-------------------------------------------------------- */ -------------------------------------------------------- */
BoundaryConditions(phi0, xg, yg, &ThisTask);
BoundaryConditions(phi0, xg, yg, nx, ny); BoundaryConditions(phi, xg, yg, &ThisTask);
BoundaryConditions(phi, xg, yg, nx, ny); free(yg);
free(xg);
/* -------------------------------------------------------- /* --------------------------------------------------------
4. Main iteration cycle 4. Main iteration cycle
...@@ -265,35 +270,30 @@ int main(int argc, char **argv) ...@@ -265,35 +270,30 @@ int main(int argc, char **argv)
const double time_start = MPI_Wtime(); const double time_start = MPI_Wtime();
/* -- 4a. Set boundary conditions first -- */
MyData err = 1.0;
/* iterations */ /* iterations */
int k = 0; int k = 0;
while (1) while (1)
{ {
/* -- 4c. Jacobi's method and residual (interior points) -- */ /* -- 4a. Jacobi algorithm overlapping computation and communication */
/* core algorithm */ MyData err;
Jacobi_Communication(phi, phi0, &err, delta, &ThisTask);
err = 0.0; /* -- 4b. Get the total error */
Jacobi_Communication(phi, phi0, &err, delta, NY_GLOB, &ThisTask, &comm2d); /* combines values from all processes and distributes the result back to all processes */
MyData toterr;
MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, ThisTask.comm2d);
if (!rank) if (ThisTask.rank == MASTERTASK)
printf("\n\t Iteration = %d - err = %lg\n",k, err); printf("\n\t Iteration = %d - err = %lg\n",k, toterr);
/* increase the counter of loop iterations */ /* increase the counter of loop iterations */
k++; k++;
/* get the total error */
MyData toterr;
/* combines values from all processes and distributes the result back to all processes */
MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d);
/* check convergence */ /* check convergence */
if (toterr <= TOL) if (toterr <= TOL)
{ {
/* master task gathers all the domains */ /* master task gathers all the domains */
get_domains(phi, &ThisTask, comm2d); get_domains(phi, &ThisTask);
break; break;
} }
...@@ -305,9 +305,9 @@ int main(int argc, char **argv) ...@@ -305,9 +305,9 @@ int main(int argc, char **argv)
} }
/* master rank writes the solution */ /* master rank writes the solution */
if (!rank) if (ThisTask.rank == MASTERTASK)
{ {
WriteSolution(phi, nx, ny); WriteSolution(global_phi, NX_GLOB, NY_GLOB);
printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
...@@ -326,13 +326,11 @@ int main(int argc, char **argv) ...@@ -326,13 +326,11 @@ int main(int argc, char **argv)
free(phi); free(phi);
} }
if (yg) if (ThisTask.rank == MASTERTASK)
free(yg); if (global_phi)
free(global_phi);
if (xg)
free(xg);
MPI_Comm_free(&comm2d); MPI_Comm_free(&ThisTask.comm2d);
MPI_Finalize(); MPI_Finalize();
...@@ -343,70 +341,73 @@ int main(int argc, char **argv) ...@@ -343,70 +341,73 @@ int main(int argc, char **argv)
void BoundaryConditions(MyData **const restrict phi, void BoundaryConditions(MyData **const restrict phi,
MyData *const restrict x, MyData *const restrict x,
MyData *const restrict y, MyData *const restrict y,
const int nx,
const int ny,
Task *const restrict ThisTask) Task *const restrict ThisTask)
/**********************************************************************************/ /*************************************************************************/
{ {
/* left */ /* left */
if (ThisTask->nbrleft == MPI_PROC_NULL) /* no left neighbor */ if (ThisTask->nbrleft == MPI_PROC_NULL) /* no left neighbor */
{ {
for (int j=ThisTask->domain.start[X] ; j<ThisTask->domain.end[X] ; j++) const int global_start_j = ThisTask->domain.global_start[X];
phi[j][0] = (1.0 - y[j]); for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++)
phi[j][0] = (1.0 - y[global_start_j + j - 1]);
} }
/* right */ /* right */
if (ThisTask->nbrright == MPI_PROC_NULL) /* no right neighbor */ if (ThisTask->nbrright == MPI_PROC_NULL) /* no right neighbor */
{ {
for (int j=ThisTask->domain.start[X] ; j<ThisTask->domain.end[X] ; j++) const int global_start_j = ThisTask->domain.global_start[X];
phi[j][ThisTask->domain.end[Y] + 1] = (y[j] * y[j]); for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++)
phi[j][ThisTask->domain.local_end[Y] + 1] = (y[global_start_j + j - 1] * y[global_start_j + j - 1]);
} }
/* bottom */ /* bottom */
if (ThisTask->nbrbottom == MPI_PROC_NULL) /* no bottom neighbor */ if (ThisTask->nbrbottom == MPI_PROC_NULL) /* no bottom neighbor */
{ {
for (int i=ThisTask->domain.start[Y] ; i<ThisTask->domain.end[Y] ; i++) const int global_start_i = ThisTask->domain.global_start[Y];
phi[0][i] = (1.0 - x[i]); for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++)
phi[0][i] = (1.0 - x[global_start_i + i - 1]);
} }
/* top */ /* top */
if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neghbor */ if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neighbor */
{ {
for (int i=ThisTask->domain.start[Y] ; i<ThisTask->domain.end[Y] ; i++) const int global_start_i = ThisTask->domain.global_start[Y];
phi[ThisTask->domain.end[X] + 1][i] = x[i]; for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++)
phi[ThisTask->domain.local_end[X] + 1][i] = x[global_start_i + i - 1];
} }
#if defined(DEBUG)
if (ThisTask->rank == MASTERTASK)
{
printf("\n");
for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++)
printf("\n\t x[%d] = %8.2f", i, x[i]);
return; printf("\n");
for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++)
const int ibeg = NGHOST; printf("\n\t y[%d] = %8.2f", i, y[i]);
const int iend = ibeg + nx - 1;
const int jbeg = NGHOST; printf("\n");
const int jend = jbeg + ny - 1; fflush(stdout);
}
int i,j; MPI_Barrier(ThisTask->comm2d);
/* -- Left -- */ for (int task=0 ; task<ThisTask->nranks ; task++)
i = ibeg - 1; {
for (int j=jbeg ; j<=jend ; j++) if (task == ThisTask->rank)
phi[j][i] = (1.0 - y[j]); {
char string[128];
sprintf(string, "Task %d - Phi", task);
/* -- Right -- */ Show_2DdblArray(phi, ThisTask->domain.dim[X] + 2, ThisTask->domain.dim[Y] + 2, string);
i = jend + 1; }
for (int j=jbeg ; j<=jend ; j++)
phi[j][i] = (y[j] * y[j]);
/* -- Bottom -- */ MPI_Barrier(ThisTask->comm2d);
j = jbeg - 1; }
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = (1.0 - x[i]);
/* -- Top -- */ #endif /* DEBUG */
j = jend + 1;
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = x[i];
return; return;
} }
...@@ -436,22 +437,19 @@ void JacobiAlgorithm(MyData **const restrict Phi, ...@@ -436,22 +437,19 @@ void JacobiAlgorithm(MyData **const restrict Phi,
return; return;
} }
void Jacobi_Communication(MyData **const restrict Phi, void Jacobi_Communication(MyData **const restrict Phi,
MyData **const restrict Phi0, MyData **const restrict Phi0,
MyData *const restrict error, MyData *const restrict error,
const MyData * restrict delta, const MyData * restrict delta,
const int grid_y, Task *const restrict ThisTask)
Task *const restrict ThisTask,
MPI_Comm *const restrict comm)
{ {
/* custom datatype */ /* custom datatype */
MPI_Datatype column; MPI_Datatype column;
MPI_Type_vector((ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column); MPI_Type_vector(ThisTask->domain.dim[X], 1, ThisTask->domain.dim[Y] + 2, MPI_MyDatatype, &column);
/* commits the datatype */ /* commits the datatype */
MPI_Type_commit(&column); MPI_Type_commit(&column);
const int data_row_size = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1); const int data_row_size = ThisTask->domain.dim[Y];
/* First task: issue the communication */ /* First task: issue the communication */
MPI_Request request[8]; MPI_Request request[8];
...@@ -459,31 +457,68 @@ void Jacobi_Communication(MyData **const restrict Phi, ...@@ -459,31 +457,68 @@ void Jacobi_Communication(MyData **const restrict Phi,
MyData **const restrict buffer = Phi0; MyData **const restrict buffer = Phi0;
/* 4 nonblocking MPI_Irecv */ /* 4 nonblocking MPI_Irecv */
MPI_Irecv(&buffer[ThisTask->domain.start[X] - 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, *comm, &request[0]); MPI_Irecv(&buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, ThisTask->comm2d, &request[0]);
MPI_Irecv(&buffer[ThisTask->domain.end[X] + 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, *comm, &request[1]); MPI_Irecv(&buffer[ThisTask->domain.local_end[X] + 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, ThisTask->comm2d, &request[1]);
MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] - 1], 1, column, ThisTask->nbrleft, 2, *comm, &request[2]); MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] - 1], 1, column, ThisTask->nbrleft, 2, ThisTask->comm2d, &request[2]);
MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] + 1], 1, column, ThisTask->nbrright, 3, *comm, &request[3]); MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] + 1], 1, column, ThisTask->nbrright, 3, ThisTask->comm2d, &request[3]);
/* 4 nonblocking MPI_Isend */ /* 4 nonblocking MPI_Isend */
MPI_Isend(&buffer[ThisTask->domain.end[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, *comm, &request[4]); MPI_Isend(&buffer[ThisTask->domain.local_end[X] ][ThisTask->domain.local_start[Y]], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, ThisTask->comm2d, &request[4]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, *comm, &request[5]); MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_start[Y]], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, ThisTask->comm2d, &request[5]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] ], 1, column, ThisTask->nbrright, 2, *comm, &request[6]); MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_end[Y] ], 1, column, ThisTask->nbrright, 2, ThisTask->comm2d, &request[6]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], 1, column, ThisTask->nbrleft, 3, *comm, &request[7]); MPI_Isend(&buffer[ThisTask->domain.local_start[X]][ThisTask->domain.local_start[Y]], 1, column, ThisTask->nbrleft, 3, ThisTask->comm2d, &request[7]);
/**************************************** computation ****************************************/ /**************************************** computation ****************************************/
/* perform the computation with the local data, (i.e. ghost cells are not required) */ /* perform the computation with the local data, (i.e. ghost cells are not required) */
/* so overlapping computation and communication */ /* so overlapping computation and communication */
const int jbeg = ThisTask->domain.start[X] + 1; const int jbeg = ThisTask->domain.local_start[X] + 1;
const int jend = ThisTask->domain.end[X] - 1; const int jend = ThisTask->domain.local_end[X] - 1;
const int ibeg = ThisTask->domain.start[Y] + 1; const int ibeg = ThisTask->domain.local_start[Y] + 1;
const int iend = ThisTask->domain.end[Y] - 1; const int iend = ThisTask->domain.local_end[Y] - 1;
#if defined(DEBUG)
MPI_Waitall(8, request, MPI_STATUSES_IGNORE);
*error = 0.0; *error = 0.0;
JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); /* JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); */
JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_start[X]+1, ThisTask->domain.local_end[X]-1,
ThisTask->domain.local_start[Y] , ThisTask->domain.local_end[Y],
error);
/* /\* data from nbrbottom have been received *\/ */
JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_start[X], ThisTask->domain.local_start[X],
ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
error);
/* /\* data from nbrtop have been received *\/ */
JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_end[X], ThisTask->domain.local_end[X],
ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
error);
/* /\* data from nbrleft have been received *\/ */
/* JacobiAlgorithm(Phi, Phi0, delta, */
/* ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], */
/* ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y], */
/* error); */
/* /\* data from nbrright have been received *\/ */
/* JacobiAlgorithm(Phi, Phi0, delta, */
/* ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], */
/* ThisTask->domain.local_end[Y], ThisTask->domain.local_end[Y], */
/* error); */
#else
/**********************************************************************************************/ /**********************************************************************************************/
*error = 0.0;
JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error);
/* waits for any specified MPI Request to complete */ /* waits for any specified MPI Request to complete */
for (int req=0 ; req<8 ; req++) for (int req=0 ; req<8 ; req++)
{ {
...@@ -497,29 +532,43 @@ void Jacobi_Communication(MyData **const restrict Phi, ...@@ -497,29 +532,43 @@ void Jacobi_Communication(MyData **const restrict Phi,
/* communication with tag 0 completed */ /* communication with tag 0 completed */
case 0: case 0:
/* data from nbrbottom have been received */ /* data from nbrbottom have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jbeg - 1), (ibeg - 1) , (iend + 1), error); JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_start[X], ThisTask->domain.local_start[X],
ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
error);
break; break;
/* communication with tag 1 completed */ /* communication with tag 1 completed */
case 1: case 1:
/* data from nbrtop have been received */ /* data from nbrtop have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jend + 1), (jend + 1), (ibeg - 1) , (iend + 1), error); JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_end[X], ThisTask->domain.local_end[X],
ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
error);
break; break;
/* communication with tag 2 completed */ /* communication with tag 2 completed */
case 2: case 2:
/* data from nbrleft have been received */ /* data from nbrleft have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (ibeg - 1), (ibeg - 1), error); JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_start[X], ThisTask->domain.local_end[X],
ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y],
error);
break; break;
/* communication with tag 3 completed */ /* communication with tag 3 completed */
case 3: case 3:
/* data from nbrright have been received */ /* data from nbrright have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (iend + 1), (iend + 1), error); JacobiAlgorithm(Phi, Phi0, delta,
ThisTask->domain.local_start[X], ThisTask->domain.local_end[X],
ThisTask->domain.local_end[Y], ThisTask->domain.local_end[Y],
error);
break; break;
} }
} }
#endif /* DEBUG */
MPI_Type_free(&column); MPI_Type_free(&column);
return; return;
...@@ -528,14 +577,13 @@ void Jacobi_Communication(MyData **const restrict Phi, ...@@ -528,14 +577,13 @@ void Jacobi_Communication(MyData **const restrict Phi,
/* ********************************************************************* */ /* ********************************************************************* */
void get_domains(MyData **const buffer, void get_domains(MyData **const buffer,
Task *const ThisTask, Task *const ThisTask)
const MPI_Comm comm)
{ {
/* Master process gathers all the domains */ /* Master process gathers all the domains */
/***************************** get the domain boundaries from each process *************************************/ /***************************** get the domain boundaries from each process *************************************/
myDomain *boundaries = NULL; myDomain *boundaries = NULL;
if (ThisTask->rank == MASTER) if (ThisTask->rank == MASTERTASK)
{ {
boundaries = (myDomain *)malloc(ThisTask->nranks * sizeof(*boundaries)); boundaries = (myDomain *)malloc(ThisTask->nranks * sizeof(*boundaries));
assert(boundaries != NULL); assert(boundaries != NULL);
...@@ -543,7 +591,7 @@ void get_domains(MyData **const buffer, ...@@ -543,7 +591,7 @@ void get_domains(MyData **const buffer,
MPI_Gather(&ThisTask->domain, sizeof(myDomain), MPI_BYTE, MPI_Gather(&ThisTask->domain, sizeof(myDomain), MPI_BYTE,
boundaries, sizeof(myDomain), MPI_BYTE, boundaries, sizeof(myDomain), MPI_BYTE,
MASTER, comm); MASTERTASK, ThisTask->comm2d);
/**************************************************************************************************/ /**************************************************************************************************/
...@@ -561,7 +609,7 @@ void get_domains(MyData **const buffer, ...@@ -561,7 +609,7 @@ void get_domains(MyData **const buffer,
MyData *recvbuf = NULL; MyData *recvbuf = NULL;
MyData *sendbuf = NULL; MyData *sendbuf = NULL;
if (ThisTask->rank == MASTER) if (ThisTask->rank == MASTERTASK)
{ {
recvcounts = (int *)malloc(ThisTask->nranks * sizeof(*recvcounts)); recvcounts = (int *)malloc(ThisTask->nranks * sizeof(*recvcounts));
assert(recvcounts != NULL); assert(recvcounts != NULL);
...@@ -570,56 +618,58 @@ void get_domains(MyData **const buffer, ...@@ -570,56 +618,58 @@ void get_domains(MyData **const buffer,
assert(displs != NULL); assert(displs != NULL);
for (int task=0 ; task<ThisTask->nranks ; task++) for (int task=0 ; task<ThisTask->nranks ; task++)
{ recvcounts[task] = (boundaries[task].dim[X] * boundaries[task].dim[Y]);
const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1);
const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1);
recvcounts[task] = (nrows * ncols);
}
displs[0] = 0; displs[0] = 0;
for (int task=1 ; task<ThisTask->nranks ; task++) for (int task=1 ; task<ThisTask->nranks ; task++)
displs[task] = (recvcounts[task - 1] + displs[task - 1]); displs[task] = (recvcounts[task - 1] + displs[task - 1]);
/* check if the size is the entire domain */
assert((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) == (NX_GLOB * NY_GLOB));
/* allocate the recvbuffer */ /* allocate the recvbuffer */
recvbuf = (MyData *)malloc((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) * sizeof(*recvbuf)); recvbuf = (MyData *)malloc(NX_GLOB * NY_GLOB * sizeof(*recvbuf));
assert(recvbuf != NULL); assert(recvbuf != NULL);
} /* MASTER */ } /* MASTERTASK */
/* every process sets up the sendbuf */ /* every process sets up the sendbuf */
const int nrows = (ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1); sendbuf = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*sendbuf));
const int ncols = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1);
sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf));
assert(sendbuf != NULL); assert(sendbuf != NULL);
/* store the actual grid owned by each process into sendbuf */ /* store the actual grid owned by each process into sendbuf */
for (int row=0 ; row<nrows ; row++) for (int row=0 ; row<ThisTask->domain.dim[X] ; row++)
memcpy(&sendbuf[row * ncols], &buffer[ThisTask->domain.start[X] + row][ThisTask->domain.start[Y]], (ncols * sizeof(MyData))); memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], (ThisTask->domain.dim[Y] * sizeof(MyData)));
/* perform the MPI collective */ /* perform the MPI collective */
MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype, MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype,
recvbuf, recvcounts, displs, MPI_MyDatatype, recvbuf, recvcounts, displs, MPI_MyDatatype,
MASTER, comm); MASTERTASK, ThisTask->comm2d);
/* store the recvbuf into the actual grid owned by the MASTER task */ free(sendbuf);
if (ThisTask->rank == MASTER) if (ThisTask->rank == MASTERTASK)
{ {
for (int task=1 ; task<ThisTask->nranks ; task++) free(recvcounts);
}
/* store the recvbuf into the global_grid allocated by the MASTER task */
if (ThisTask->rank == MASTERTASK)
{
global_phi = Allocate_2DdblArray(NX_GLOB + 2, NY_GLOB + 2);
assert(global_phi != NULL);
for (int task=0 ; task<ThisTask->nranks ; task++)
{
for (int row=0 ; row<boundaries[task].dim[X] ; row++)
{ {
const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1); memcpy(&global_phi[boundaries[task].global_start[X] + row][boundaries[task].global_start[Y]],
const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1); &recvbuf[displs[task] + (row * boundaries[task].dim[Y])],
for (int row=0 ; row<nrows ; row++) (boundaries[task].dim[Y] * sizeof(MyData)));
memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]],
&recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData)));
} }
} }
free(sendbuf);
if (ThisTask->rank == MASTER)
{
free(recvbuf);
free(displs); free(displs);
free(recvcounts); free(recvbuf);
free(boundaries); free(boundaries);
} } /* MASTERTASK */
return; return;
} }
......
...@@ -29,7 +29,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny) ...@@ -29,7 +29,7 @@ MyData **Allocate_2DdblArray(const int nx, const int ny)
} }
/* ********************************************************************* */ /* ********************************************************************* */
void Show_2DdblArray(const MyData **const A, void Show_2DdblArray( MyData **const A,
const int nx, const int nx,
const int ny, const int ny,
const char *const string) const char *const string)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment