Skip to content
Snippets Groups Projects
Commit 26284f2f authored by David Goz's avatar David Goz :sleeping:
Browse files

mpi overlapping computation and communication added

parent 997e7058
No related branches found
No related tags found
No related merge requests found
# put here the files which are not going to be committed
*.txt
*~
*.bin
\ No newline at end of file
#######################################################################
# Author: David Goz (david.goz@inaf.it) #
# June 2024 #
#######################################################################
#
# To see all the compilation options
# $ make info
#######################################################################
# make.def defines how the application is compiled
include make.def
include make_mpi_path
#######################################################################
.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
info:
@echo ' '
@echo '-----------------------------------------------------------------------------------------'
@echo '$$ make ---> compile the mpi application '
@echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck '
@echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind '
@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
@echo '$$ make clean ---> clean up all '
@echo '$$ make info ---> get make info '
@echo '-----------------------------------------------------------------------------------------'
@echo ' '
mpi: $(PROG)
valgrind_memcheck: $(PROG_MEMCHECK)
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
valgrind_callgrind: $(PROG_CALLGRIND)
@echo 'oooOOO... valgrind_callgrind ...OOOooo'
mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2
@echo ' '
@echo 'To generate a function-by-function summary from the profile data file:'
@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
valgrind_cachegrind: $(PROG_CACHEGRIND)
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2
@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
clean:
rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
rm -f valgrind_*.txt
rm -f cachegrind.out.*
rm -f callgrind.*
rm -f *bin
#pragma once
#define NGHOST 1
#define NDIM 2 /* 2D */
#define X 0
#define Y 1
#define TOL 1.e-5
#include <mpi.h>
#define MASTER 0
#if defined(SINGLE_PRECISION)
typedef float MyData;
#define MPI_MyDatatype MPI_FLOAT_PRECISION
#else
/* double precision is assumed by default */
typedef double MyData;
#define MPI_MyDatatype MPI_DOUBLE_PRECISION
#endif /* SINGLE_PRECISION */
#pragma once
#include "allvars.h"
#include <assert.h>
#include <sys/time.h>
#include <time.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
/* function prototypes */
MyData **Allocate_2DdblArray(const int nx, const int ny);
void Show_2DdblArray(const MyData **const A,
const int nx,
const int ny,
const char *const string);
double seconds(void);
CC ?= gcc
CFLAGS ?= -Wall -Wextra -march=native
LIBS ?= -lm -lmpi
SYSTYPE = $(strip $(shell uname -n))
PROG ?= jacobi_mpi_comp_comm_$(SYSTYPE)
PROG_DEBUG = $(PROG)_DEBUG
PROG_MEMCHECK = $(PROG)_MEMCHECK
PROG_CALLGRIND = $(PROG)_CALLGRIND
PROG_CACHEGRIND = $(PROG)_CACHEGRIND
HEADERS = $(wildcard ./include/*.h)
SOURCES = $(wildcard ./src/*.c)
DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile
$(PROG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' '
@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_DEBUG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' '
@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_MEMCHECK): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' '
@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CALLGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CACHEGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
# set the MPI install path
MPI_INSTALL_PATH = /home/gozzilla/software/openmpi/openmpi-5.0.3
MPI = $(MPI_INSTALL_PATH)
MPI_INC = $(MPI)/include
MPI_LIB = $(MPI)/lib
\ No newline at end of file
/* ///////////////////////////////////////////////////////////////////// */
/* Authors: A. Mignone (mignone@to.infn.it) */
/* V. Cesare (valentina.cesare@inaf.it) */
/* D. Goz (david.goz@inaf.it) */
/* */
/* Date : June 2024 */
/* */
/* ///////////////////////////////////////////////////////////////////// */
#include "allvars.h"
#include "tools.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
/* #define DEBUG */
typedef struct MyGrid
{
int start[NDIM];
int end[NDIM];
} myDomain;
typedef struct Task_2D_Cartesian
{
int rank;
int nranks;
int coords[NDIM];
myDomain domain;
int nbrtop;
int nbrbottom;
int nbrleft;
int nbrright;
} Task;
/* function prototypes */
void BoundaryConditions(MyData **const restrict,
MyData *const restrict,
MyData *const restrict,
const int,
const int);
void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict,
const int, const int, const int, const int, MyData *const restrict);
void Jacobi_Communication(MyData **const restrict Phi,
MyData **const restrict Phi0,
MyData *const restrict error,
const MyData * restrict delta,
const int grid_y,
Task *const restrict ThisTask,
MPI_Comm *const restrict comm);
void get_domains(MyData **const buffer,
Task *const ThisTask,
const MPI_Comm comm);
void WriteSolution(MyData **const phi, const int nx, const int ny);
int main(int argc, char **argv)
{
int rank, Nranks;
const MPI_Comm comm = MPI_COMM_WORLD;
MPI_Init(&argc, &argv);
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &Nranks);
if ((argc <= 2) && (!rank))
{
printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n");
MPI_Abort(comm, EXIT_FAILURE);
exit(EXIT_FAILURE);
}
/* global X and Y grid size (square matrix supposed) */
const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
const int NY_GLOB = NX_GLOB;
/********************************** Cartesin topology *******************************************************/
const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10);
const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1);
if (cartesian_grid_y == -1)
{
if (!rank)
{
printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
exit(EXIT_FAILURE);
}
}
const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y};
const int periods[NDIM] = {0 ,0}; /* not periodic */
const int reorder = 0; /* setting the logical value reorder to TRUE allows MPI to */
/* reorder the rank of the processes figuring out the */
/* neighbors in the actual hardware for better performance */
/* make a new communicator to which topology information has been attached */
MPI_Comm comm2d = MPI_COMM_NULL;
MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d);
if (comm2d == MPI_COMM_NULL)
{
printf("\n\t Process %d is not part of the new communicator \n", rank);
fflush(stdout);
MPI_Abort(comm, EXIT_FAILURE);
exit(EXIT_FAILURE);
}
Task ThisTask;
/* get the comm size */
MPI_Comm_size(comm2d, &ThisTask.nranks);
/* determine the process coords in cartesian topology given rank in group */
MPI_Cart_coords(comm2d, rank, NDIM, ThisTask.coords);
/* determines process rank in communicator given Cartesian location */
MPI_Cart_rank(comm2d, ThisTask.coords, &ThisTask.rank);
/* get bottom and top neighbors (X direction) */
MPI_Cart_shift(comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop);
/* get left and right neighbors (Y direction) */
MPI_Cart_shift(comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright);
/************************************************************************************************************/
/* 2D MPI-cartesian decomposition:
the grids are replicated across the MPI processes.
This approach is not the most efficient in terms of memory usage,
because the arrays are replicated across MPI process instead of to be distributed */
/* get the reminder, i.e. take into account uneven
decomposition of points among the processes */
const int rem[NDIM] = {(NX_GLOB + 2) % dims[X],
(NY_GLOB + 2) % dims[Y]};
/* get the amount of data for each MPI process:
- chunk is supposed to be >= 1 */
const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X],
(NY_GLOB + 2 - rem[Y]) / dims[Y]};
if ((chunk[X] < 1) || (chunk[Y] < 1))
{
printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank);
MPI_Comm_free(&comm2d);
MPI_Abort(comm, EXIT_FAILURE);
exit(EXIT_FAILURE);
}
/* get the subgrid dimension along X and Y directions: */
int incr[NDIM], offset[NDIM];
for (int dim=0 ; dim<NDIM ; dim++)
{
if (ThisTask.coords[dim] < rem[dim])
{
incr[dim] = chunk[dim] + 1;
offset[dim] = 0;
}
else
{
incr[dim] = chunk[dim];
offset[dim] = rem[dim];
}
}
/* subdomain managed by the task */
for (int dim=0 ; dim<NDIM ; dim++)
{
ThisTask.domain.start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]);
ThisTask.domain.end[dim] = (ThisTask.domain.start[dim] + incr[dim]) - 1;
/* boundaries */
ThisTask.domain.start[dim] = ((ThisTask.domain.start[dim] == 0) ? NGHOST : ThisTask.domain.start[dim]);
ThisTask.domain.end[dim] = ((ThisTask.domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.end[dim]);
}
#if defined(DEBUG)
for (int task=0 ; task<ThisTask.nranks ; task++)
{
if (ThisTask.rank == task)
{
printf("\n\t rank: %d", task);
printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]);
printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", ThisTask.domain.start[X], ThisTask.domain.end[X]);
printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", ThisTask.domain.start[Y], ThisTask.domain.end[Y]);
printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom);
printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright);
fflush(stdout);
}
MPI_Barrier(comm2d);
}
#endif /* DEBUG */
/******************************************************************************************************/
const MyData xbeg = 0.0;
const MyData xend = 1.0;
const MyData ybeg = 0.0;
const MyData yend = 1.0;
const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
(yend - ybeg)/(NY_GLOB + 1)};
/* --------------------------------------------------------
1. Set grid indices
-------------------------------------------------------- */
const int ibeg = NGHOST;
const int iend = ibeg + NX_GLOB - 1;
const int nx = iend - ibeg + 1;
const int nx_tot = nx + 2 * NGHOST;
const int jbeg = NGHOST;
const int jend = jbeg + NY_GLOB - 1;
const int ny = jend - jbeg + 1;
const int ny_tot = ny + 2 * NGHOST;
if (rank == MASTER)
{
printf("\n\t Grid indices:");
printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot);
printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
}
/* --------------------------------------------------------
2. Generate grid, allocate memory
Not optimized because the grids are (unnecessarily)
replicated across MPI processes
-------------------------------------------------------- */
/* memory allocation */
MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
assert((xg != NULL) && (yg != NULL));
/* initial conditions */
for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
MyData *x = xg; /* Global and local grids are the same */
MyData *y = yg; /* for serial version of the code */
/* grids memory allocation */
MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot);
MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
/* --------------------------------------------------------
3. Initialize solution array to 0
-------------------------------------------------------- */
for (int j=jbeg ; j<=jend ; j++)
for (int i=ibeg ; i<=iend ; i++)
{
phi0[j][i] = 0.0;
phi[j][i] = 0.0;
}
/* --------------------------------------------------------
4. Main iteration cycle
-------------------------------------------------------- */
const double time_start = MPI_Wtime();
/* -- 4a. Set boundary conditions first -- */
BoundaryConditions(phi0, x, y, nx, ny);
BoundaryConditions(phi, x, y, nx, ny);
MyData err = 1.0;
/* iterations */
int k = 0;
while (1)
{
/* -- 4c. Jacobi's method and residual (interior points) -- */
/* core algorithm */
err = 0.0;
Jacobi_Communication(phi, phi0, &err, delta, NY_GLOB, &ThisTask, &comm2d);
if (!rank)
printf("\n\t Iteration = %d - err = %lg\n",k, err);
/* increase the counter of loop iterations */
k++;
/* get the total error */
MyData toterr;
/* combines values from all processes and distributes the result back to all processes */
MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d);
/* check convergence */
if (toterr <= TOL)
{
/* master task gathers all the domains */
get_domains(phi, &ThisTask, comm2d);
break;
}
/* swap the pointers */
MyData **tmp = phi;
phi = phi0;
phi0 = tmp;
}
/* master rank writes the solution */
if (!rank)
{
WriteSolution(phi, nx, ny);
printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
}
// free memory
if (phi0)
{
free(phi0[0]);
free(phi0);
}
if (phi)
{
free(phi[0]);
free(phi);
}
if (yg)
free(yg);
if (xg)
free(xg);
MPI_Comm_free(&comm2d);
MPI_Finalize();
return 0;
}
/* ********************************************************************* */
void BoundaryConditions(MyData **const restrict phi,
MyData *const restrict x,
MyData *const restrict y,
const int nx,
const int ny)
/*
*********************************************************************** */
{
const int ibeg = NGHOST;
const int iend = ibeg + nx - 1;
const int jbeg = NGHOST;
const int jend = jbeg + ny - 1;
int i,j;
/* -- Left -- */
i = ibeg - 1;
for (int j=jbeg ; j<=jend ; j++)
phi[j][i] = (1.0 - y[j]);
/* -- Right -- */
i = jend + 1;
for (int j=jbeg ; j<=jend ; j++)
phi[j][i] = (y[j] * y[j]);
/* -- Bottom -- */
j = jbeg - 1;
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = (1.0 - x[i]);
/* -- Top -- */
j = jend + 1;
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = x[i];
return;
}
/* ********************************************************************* */
void JacobiAlgorithm(MyData **const restrict Phi,
MyData **const restrict Phi0,
const MyData *restrict delta,
const int jbeg,
const int jend,
const int ibeg,
const int iend,
MyData *const restrict error)
{
for (int j=jbeg ; j<=jend ; j++)
{
for (int i=ibeg ; i<=iend ; i++)
{
Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
Phi0[j-1][i] + Phi0[j+1][i]);
*error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
} /* loop over columns */
} /* loop over rows */
return;
}
void Jacobi_Communication(MyData **const restrict Phi,
MyData **const restrict Phi0,
MyData *const restrict error,
const MyData * restrict delta,
const int grid_y,
Task *const restrict ThisTask,
MPI_Comm *const restrict comm)
{
/* custom datatype */
MPI_Datatype column;
MPI_Type_vector((ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column);
/* commits the datatype */
MPI_Type_commit(&column);
const int data_row_size = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1);
/* First task: issue the communication */
MPI_Request request[8];
MyData **const restrict buffer = Phi0;
/* 4 nonblocking MPI_Irecv */
MPI_Irecv(&buffer[ThisTask->domain.start[X] - 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, *comm, &request[0]);
MPI_Irecv(&buffer[ThisTask->domain.end[X] + 1][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, *comm, &request[1]);
MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] - 1], 1, column, ThisTask->nbrleft, 2, *comm, &request[2]);
MPI_Irecv(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] + 1], 1, column, ThisTask->nbrright, 3, *comm, &request[3]);
/* 4 nonblocking MPI_Isend */
MPI_Isend(&buffer[ThisTask->domain.end[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, *comm, &request[4]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, *comm, &request[5]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.end[Y] ], 1, column, ThisTask->nbrright, 2, *comm, &request[6]);
MPI_Isend(&buffer[ThisTask->domain.start[X] ][ThisTask->domain.start[Y] ], 1, column, ThisTask->nbrleft, 3, *comm, &request[7]);
/**************************************** computation ****************************************/
/* perform the computation with the local data, (i.e. ghost cells are not required) */
/* so overlapping computation and communication */
const int jbeg = ThisTask->domain.start[X] + 1;
const int jend = ThisTask->domain.end[X] - 1;
const int ibeg = ThisTask->domain.start[Y] + 1;
const int iend = ThisTask->domain.end[Y] - 1;
*error = 0.0;
JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error);
/**********************************************************************************************/
/* waits for any specified MPI Request to complete */
for (int req=0 ; req<8 ; req++)
{
int idx;
MPI_Status status;
MPI_Waitany(8, request, &idx, &status);
switch(status.MPI_TAG)
{
/* communication with tag 0 completed */
case 0:
/* data from nbrbottom have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jbeg - 1), (ibeg - 1) , (iend + 1), error);
break;
/* communication with tag 1 completed */
case 1:
/* data from nbrtop have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jend + 1), (jend + 1), (ibeg - 1) , (iend + 1), error);
break;
/* communication with tag 2 completed */
case 2:
/* data from nbrleft have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (ibeg - 1), (ibeg - 1), error);
break;
/* communication with tag 3 completed */
case 3:
/* data from nbrright have been received */
JacobiAlgorithm(Phi, Phi0, delta, (jbeg - 1), (jend + 1), (iend + 1), (iend + 1), error);
break;
}
}
MPI_Type_free(&column);
return;
}
/* ********************************************************************* */
void get_domains(MyData **const buffer,
Task *const ThisTask,
const MPI_Comm comm)
{
/* Master process gathers all the domains */
/***************************** get the domain boundaries from each process *************************************/
myDomain *boundaries = NULL;
if (ThisTask->rank == MASTER)
{
boundaries = (myDomain *)malloc(ThisTask->nranks * sizeof(*boundaries));
assert(boundaries != NULL);
}
MPI_Gather(&ThisTask->domain, sizeof(myDomain), MPI_BYTE,
boundaries, sizeof(myDomain), MPI_BYTE,
MASTER, comm);
/**************************************************************************************************/
/*************************************** get the domain from each process *************************/
/* MPI_Gatherv
Gathers into specified locations from all processes in a group
Synopsis
int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, const int recvcounts[], const int displs[],
MPI_Datatype recvtype, int root, MPI_Comm comm) */
int *recvcounts = NULL;
int *displs = NULL;
MyData *recvbuf = NULL;
MyData *sendbuf = NULL;
if (ThisTask->rank == MASTER)
{
recvcounts = (int *)malloc(ThisTask->nranks * sizeof(*recvcounts));
assert(recvcounts != NULL);
displs = (int *)malloc(ThisTask->nranks * sizeof(*displs));
assert(displs != NULL);
for (int task=0 ; task<ThisTask->nranks ; task++)
{
const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1);
const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1);
recvcounts[task] = (nrows * ncols);
}
displs[0] = 0;
for (int task=1 ; task<ThisTask->nranks ; task++)
displs[task] = (recvcounts[task - 1] + displs[task - 1]);
/* allocate the recvbuffer */
recvbuf = (MyData *)malloc((displs[ThisTask->nranks - 1] + recvcounts[ThisTask->nranks - 1]) * sizeof(*recvbuf));
assert(recvbuf != NULL);
} /* MASTER */
/* every process sets up the sendbuf */
const int nrows = (ThisTask->domain.end[X] - ThisTask->domain.start[X] + 1);
const int ncols = (ThisTask->domain.end[Y] - ThisTask->domain.start[Y] + 1);
sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf));
assert(sendbuf != NULL);
/* store the actual grid owned by each process into sendbuf */
for (int row=0 ; row<nrows ; row++)
memcpy(&sendbuf[row * ncols], &buffer[ThisTask->domain.start[X] + row][ThisTask->domain.start[Y]], (ncols * sizeof(MyData)));
/* perform the MPI collective */
MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype,
recvbuf, recvcounts, displs, MPI_MyDatatype,
MASTER, comm);
/* store the recvbuf into the actual grid owned by the MASTER task */
if (ThisTask->rank == MASTER)
{
for (int task=1 ; task<ThisTask->nranks ; task++)
{
const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1);
const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1);
for (int row=0 ; row<nrows ; row++)
memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]],
&recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData)));
}
}
free(sendbuf);
if (ThisTask->rank == MASTER)
{
free(recvbuf);
free(displs);
free(recvcounts);
free(boundaries);
}
return;
}
/* ********************************************************************* */
void WriteSolution(MyData **const phi,
const int nx,
const int ny)
/*
*********************************************************************** */
{
const int ibeg = NGHOST;
const int jbeg = NGHOST;
const int jend = jbeg + ny - 1;
static int nfile = 0; /* File counter */
char fname[128];
sprintf(fname,"jacobi_2D_mpi_comp_comm_%02d.bin", nfile);
FILE *fp;
printf ("> Writing %s\n",fname);
fp = fopen(fname, "wb");
/* discard boundaies */
for (int j=jbeg ; j<=jend ; j++)
{
fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
}
nfile++;
fclose(fp);
}
/* ///////////////////////////////////////////////////////////////////// */
/* Authors: A. Mignone (mignone@to.infn.it) */
/* V. Cesare (valentina.cesare@inaf.it) */
/* D. Goz (david.goz@inaf.it) */
/* */
/* Date : June 2024 */
/* */
/* ///////////////////////////////////////////////////////////////////// */
#include "tools.h"
/* ********************************************************************* */
MyData **Allocate_2DdblArray(const int nx, const int ny)
/*
* Allocate memory for a double precision array with
* nx rows and ny columns
*********************************************************************** */
{
MyData **buf = malloc(nx * sizeof(MyData *));
assert(buf != NULL);
buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
assert(buf[0] != NULL);
for (int j=1 ; j<nx ; j++)
buf[j] = buf[j-1] + ny;
return buf;
}
/* ********************************************************************* */
void Show_2DdblArray(const MyData **const A,
const int nx,
const int ny,
const char *const string)
/* *********************************************************************** */
{
printf ("%s\n",string);
printf ("------------------------------\n");
for (int i=0 ; i<nx ; i++)
{
for (int j=0 ; j<ny ; j++)
{
printf ("%8.2f ", A[i][j]);
}
printf ("\n");
}
printf ("------------------------------\n");
}
/* ********************************************************************* */
double seconds()
{
struct timeval tmp;
gettimeofday(&tmp, (struct timezone *)0);
double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
return sec;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment