diff --git a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c index d4de3f485216ce03e65ce3e478001a79c16cfb26..0d9787146ffdcbb574b640b0bc00d6837b8a90c1 100644 --- a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c +++ b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c @@ -136,7 +136,7 @@ int main(int argc, char **argv) /************************************************************************************************************/ /* 2D MPI-cartesian decomposition: - the grids are distributed across the MPI processes. + the grids are distributed across the MPI processes. */ /* get the reminder, i.e. take into account uneven decomposition of points among the processes */ @@ -593,7 +593,6 @@ void get_domains(MyData **const buffer, int *recvcounts = NULL; int *displs = NULL; MyData *recvbuf = NULL; - MyData *sendbuf = NULL; if (ThisTask->rank == MASTERTASK) { @@ -618,19 +617,37 @@ void get_domains(MyData **const buffer, assert(recvbuf != NULL); } /* MASTERTASK */ - /* every process sets up the sendbuf */ - sendbuf = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*sendbuf)); - assert(sendbuf != NULL); - /* store the actual grid owned by each process into sendbuf */ - for (int row=0 ; row<ThisTask->domain.dim[X] ; row++) - memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], (ThisTask->domain.dim[Y] * sizeof(MyData))); + /*************************************** CUSTOM subarray ******************************************************************************/ + /* Do not do that !!! */ + /* Use instead MPI_Type_create_subarray */ + /* */ + /* MyData *subarray = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*subarray)); */ + /* for (int row=0 ; row<ThisTask->domain.dim[X] ; row++) */ + /* memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], */ + /* (ThisTask->domain.dim[Y] * sizeof(MyData))); */ + /* */ + /* Perform MPI_Gatherv collective... */ + /* */ + /* MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype, */ + /* recvbuf, recvcounts, displs, MPI_MyDatatype, */ + /* MASTERTASK, ThisTask->comm2d); */ + /**************************************************************************************************************************************/ + + /* every process sets up the subarray */ + MPI_Datatype subarray; + const int subsize[NDIM] = {ThisTask->domain.dim[X], ThisTask->domain.dim[Y]}; + const int bigsize[NDIM] = {subsize[X] + 2 *NGHOST, subsize[Y] + 2 *NGHOST}; + const int start[NDIM] = {NGHOST, NGHOST}; + MPI_Type_create_subarray(NDIM, bigsize, subsize, start, MPI_ORDER_C, MPI_MyDatatype, &subarray); + MPI_Type_commit(&subarray); /* perform the MPI collective */ - MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype, + MPI_Gatherv(&buffer[0][0], 1, subarray, recvbuf, recvcounts, displs, MPI_MyDatatype, MASTERTASK, ThisTask->comm2d); - free(sendbuf); + MPI_Type_free(&subarray); + if (ThisTask->rank == MASTERTASK) { free(recvcounts); diff --git a/jacobi/mpi/comp_comm_io/Makefile b/jacobi/mpi/comp_comm_io/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f83868ef964675323e46eabe255b84d79876d062 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/Makefile @@ -0,0 +1,61 @@ +####################################################################### +# Author: David Goz (david.goz@inaf.it) # +# June 2024 # +####################################################################### +# +# To see all the compilation options +# $ make info +####################################################################### + +# make.def defines how the application is compiled +include make.def +include make_mpi_path +####################################################################### + +.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean + +info: + @echo ' ' + @echo '-----------------------------------------------------------------------------------------' + @echo '$$ make mpi ---> compile the mpi application ' + @echo '$$ make debug ---> compile the mpi application for debugging ' + @echo '$$ make valgrind_memcheck ---> run the mpi application using Valgrind under Memcheck ' + @echo '$$ make valgrind_callgrind ---> run the mpi application using Valgrind under Callgrind ' + @echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind ' + @echo '$$ make clean ---> clean up all ' + @echo '$$ make info ---> get make info ' + @echo '-----------------------------------------------------------------------------------------' + @echo ' ' + +mpi: $(PROG) + +debug: $(PROG_DEBUG) + +valgrind_memcheck: $(PROG_MEMCHECK) + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2 + @echo 'oooOOO... valgrind_memcheck ...OOOooo' + +valgrind_callgrind: $(PROG_CALLGRIND) + @echo 'oooOOO... valgrind_callgrind ...OOOooo' + mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2 + @echo ' ' + @echo 'To generate a function-by-function summary from the profile data file:' + @echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + +valgrind_cachegrind: $(PROG_CACHEGRIND) + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2 + @echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less' + @echo '(kcachegrind is required in order to visualize the output using the GUI)' + @echo 'oooOOO... valgrind_cachegrind ...OOOooo' + +clean: + rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~ + rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND) + rm -f valgrind_*.txt + rm -f cachegrind.out.* + rm -f callgrind.* + rm -f *bin + rm -f jacobi_mpi_comp_comm_* diff --git a/jacobi/mpi/comp_comm_io/include/allvars.h b/jacobi/mpi/comp_comm_io/include/allvars.h new file mode 100644 index 0000000000000000000000000000000000000000..c065228237923d421eff50a673dfbd54d5c70f90 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/include/allvars.h @@ -0,0 +1,20 @@ +#pragma once + +#define NGHOST 1 +#define NDIM 2 /* 2D */ +#define X 0 +#define Y 1 +#define TOL 1.e-5 + +#include <mpi.h> + +#define MASTERTASK 0 + +#if defined(SINGLE_PRECISION) +typedef float MyData; +#define MPI_MyDatatype MPI_FLOAT_PRECISION +#else +/* double precision is assumed by default */ +typedef double MyData; +#define MPI_MyDatatype MPI_DOUBLE_PRECISION +#endif /* SINGLE_PRECISION */ diff --git a/jacobi/mpi/comp_comm_io/include/tools.h b/jacobi/mpi/comp_comm_io/include/tools.h new file mode 100644 index 0000000000000000000000000000000000000000..0167c89d349118ae2d970b23308de8abe19f350b --- /dev/null +++ b/jacobi/mpi/comp_comm_io/include/tools.h @@ -0,0 +1,18 @@ +#pragma once + +#include "allvars.h" +#include <assert.h> +#include <sys/time.h> +#include <time.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> + +/* function prototypes */ +MyData **Allocate_2DdblArray(const int nx, const int ny); +void Show_2DdblArray( MyData **const A, + const int nx, + const int ny, + const char *const string); + +double seconds(void); diff --git a/jacobi/mpi/comp_comm_io/make.def b/jacobi/mpi/comp_comm_io/make.def new file mode 100644 index 0000000000000000000000000000000000000000..911e34103f99fade5c989d4c1589588d139a2d26 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/make.def @@ -0,0 +1,45 @@ +CC = mpicc +CFLAGS = -Wall -Wextra -march=native +LIBS = -lm -lmpi + +SYSTYPE = $(strip $(shell uname -n)) + +PROG = jacobi_mpi_comp_comm_$(SYSTYPE) +PROG_DEBUG = $(PROG)_DEBUG +PROG_MEMCHECK = $(PROG)_MEMCHECK +PROG_CALLGRIND = $(PROG)_CALLGRIND +PROG_CACHEGRIND = $(PROG)_CACHEGRIND + +HEADERS = $(wildcard ./include/*.h) +SOURCES = $(wildcard ./src/*.c) +DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile + +$(PROG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_DEBUG): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_MEMCHECK): $(DEPENDENCIES) + $(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CALLGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' + +$(PROG_CACHEGRIND): $(DEPENDENCIES) + $(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS) + @echo ' ' + @echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine' + @echo ' ' diff --git a/jacobi/mpi/comp_comm_io/make_mpi_path b/jacobi/mpi/comp_comm_io/make_mpi_path new file mode 100644 index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/make_mpi_path @@ -0,0 +1,10 @@ +# set the MPI install path + +# pleiadi +MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg + + + +MPI = $(MPI_INSTALL_PATH) +MPI_INC = $(MPI)/include +MPI_LIB = $(MPI)/lib \ No newline at end of file diff --git a/jacobi/mpi/comp_comm_io/script/input_parameters b/jacobi/mpi/comp_comm_io/script/input_parameters new file mode 100644 index 0000000000000000000000000000000000000000..53a1b43b137c6fdcb3cf54696925d0b310b2a369 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/script/input_parameters @@ -0,0 +1,10 @@ +########################################################################## + +# set the grid size + +GRID_SIZE_X=128 +GRID_SIZE_Y=128 + +TASKS=(2 4 8 16) + +########################################################################## diff --git a/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh b/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh new file mode 100755 index 0000000000000000000000000000000000000000..225dedb5e28e045ac243e1f79aa9a79ee7efa546 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +######################### RESOURSE ALLOCATION ##################################### +##SBATCH --account=???????? + +#SBATCH --partition=pleiadi +#SBATCH --job-name="Jacobi" +#SBATCH --nodes=1 +#SBATCH --exclusive +#SBATCH --output=Jacobi-mpi-comp-comm-%j.out +#SBATCH --error=Jacobi-mpi-comp-comm.%j.err +#SBATCH --time=00:05:00 +################################################################################### + +################################## MODULES ######################################## +export MODULE_VERSION=5.0.1 +source /opt/cluster/spack/share/spack/setup-env.sh + +# module purge +module purge +# load GCC +module load default-gcc-11.2.0 +################################################################################### + +# input parameters +source input_parameters + +WORKDIR=${PWD} +# compile the application +cd .. && make clean && make mpi +if [[ "$?" != "0" ]] +then + echo "Cannot compile the application ...aborting..." + exit 1 +fi + +# get the executable +EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print) +if [[ "$?" != "0" ]] +then + echo "Cannot find the executable ...aborting..." + exit 2 +fi + +for TASK in ${TASKS[@]} +do + # run the application + time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt +done + +cd ${WORKDIR} + +exit 0 diff --git a/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c b/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c new file mode 100644 index 0000000000000000000000000000000000000000..9522d6c149b73cdd216ae3e573821ecc919398d7 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c @@ -0,0 +1,597 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "allvars.h" +#include "tools.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +static int NX_GLOB, NY_GLOB; + +typedef struct MyGrid +{ + int local_start[NDIM]; /* Local start index in each dimension */ + int local_end[NDIM]; /* Local end index in each dimension */ + int global_start[NDIM]; /* Global start index in each dimension */ + int global_end[NDIM]; /* Global end index in each dimension */ + int dim[NDIM]; /* Local domain size (no ghosts) */ +} myDomain; + + +typedef struct Task_2D_Cartesian +{ + int rank; /* Local process rank */ + int nranks; /* Communicator size */ + int coords[NDIM]; /* Cartesian topology coordinate */ + myDomain domain; /* MyGrid structure (defined above) */ + int nbrtop; /* Top neighbor process in cartesian topology */ + int nbrbottom; /* Bottom neighbor process in cartesian topology */ + int nbrleft; /* Left neighbor process in cartesian topology */ + int nbrright; /* Right neighbor process in cartesian topology */ + MPI_Comm comm2d; /* Cartesian communicator */ +} Task; + +/* function prototypes */ +void BoundaryConditions(MyData **const restrict, + MyData *const restrict, + MyData *const restrict, + Task *const restrict); + +void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict, + const int, const int, const int, const int, MyData *const restrict); + +void Jacobi_Communication(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData * restrict delta, + Task *const restrict ThisTask); + +/* void WriteSolution(MyData **const phi, const int nx, const int ny); */ +void WriteSolutionParallel(MyData **const phi, Task *const ThisTask); + +int main(int argc, char **argv) +{ + int rank, Nranks; + const MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &Nranks); + + if ((argc <= 2) && (!rank)) + { + printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n"); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* global X and Y grid size (square matrix supposed) */ + NX_GLOB = (int) strtol(argv[1], NULL, 10); + NY_GLOB = NX_GLOB; + + /********************************** Cartesin topology *******************************************************/ + const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10); + const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1); + if (cartesian_grid_y == -1) + { + if (!rank) + { + printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n"); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + } + + const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y}; + const int periods[NDIM] = {0 ,0}; /* not periodic */ + const int reorder = 0; /* setting the logical value reorder to TRUE allows MPI */ + /* to reorder the rank of the processes figuring out the */ + /* neighbors in the actual hardware for better performance */ + + /* make a new communicator to which topology information has been attached */ + Task ThisTask; + ThisTask.comm2d = MPI_COMM_NULL; + MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &ThisTask.comm2d); + if (ThisTask.comm2d == MPI_COMM_NULL) + { + printf("\n\t Process %d is not part of the new communicator \n", rank); + fflush(stdout); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the comm size */ + MPI_Comm_size(ThisTask.comm2d, &ThisTask.nranks); + + /* determine the process coords in cartesian topology given rank in group */ + MPI_Cart_coords(ThisTask.comm2d, rank, NDIM, ThisTask.coords); + + /* determines process rank in communicator given Cartesian location */ + MPI_Cart_rank(ThisTask.comm2d, ThisTask.coords, &ThisTask.rank); + + /* get bottom and top neighbors (X direction) */ + MPI_Cart_shift(ThisTask.comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop); + + /* get left and right neighbors (Y direction) */ + MPI_Cart_shift(ThisTask.comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright); + + /************************************************************************************************************/ + /* 2D MPI-cartesian decomposition: + the grids are distributed across the MPI processes. */ + + /* get the reminder, i.e. take into account uneven + decomposition of points among the processes */ + const int rem[NDIM] = {(NX_GLOB + 2) % dims[X], + (NY_GLOB + 2) % dims[Y]}; + + /* get the amount of data for each MPI process: + - chunk is supposed to be >= 1 */ + const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X], + (NY_GLOB + 2 - rem[Y]) / dims[Y]}; + if ((chunk[X] < 1) || (chunk[Y] < 1)) + { + printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank); + MPI_Comm_free(&ThisTask.comm2d); + MPI_Abort(comm, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + + /* get the subgrid dimension along X and Y directions: */ + int incr[NDIM], offset[NDIM]; + for (int dim=0 ; dim<NDIM ; dim++) + { + if (ThisTask.coords[dim] < rem[dim]) + { + incr[dim] = chunk[dim] + 1; + offset[dim] = 0; + } + else + { + incr[dim] = chunk[dim]; + offset[dim] = rem[dim]; + } + } + + /* subdomain managed by the task */ + for (int dim=0 ; dim<NDIM ; dim++) + { + ThisTask.domain.global_start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]); + ThisTask.domain.global_end[dim] = (ThisTask.domain.global_start[dim] + incr[dim]) - 1; + + /* boundaries */ + ThisTask.domain.global_start[dim] = ((ThisTask.domain.global_start[dim] == 0) ? NGHOST : ThisTask.domain.global_start[dim]); + ThisTask.domain.global_end[dim] = ((ThisTask.domain.global_end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.global_end[dim]); + + ThisTask.domain.dim[X] = (ThisTask.domain.global_end[X] - ThisTask.domain.global_start[X] + 1); + ThisTask.domain.dim[Y] = (ThisTask.domain.global_end[Y] - ThisTask.domain.global_start[Y] + 1); + + /* local index */ + ThisTask.domain.local_start[X] = ThisTask.domain.local_start[Y] = 1; + ThisTask.domain.local_end[X] = ThisTask.domain.dim[X]; + ThisTask.domain.local_end[Y] = ThisTask.domain.dim[Y]; + } + +#if defined(DEBUG) + for (int task=0 ; task<ThisTask.nranks ; task++) + { + if (ThisTask.rank == task) + { + printf("\n\t rank: %d", task); + printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]); + printf("\n\t\t domain.global_start[X] = %d - domain.global_end[X] = %d", ThisTask.domain.global_start[X], ThisTask.domain.global_end[X]); + printf("\n\t\t domain.global_start[Y] = %d - domain.global_end[Y] = %d", ThisTask.domain.global_start[Y], ThisTask.domain.global_end[Y]); + printf("\n\t\t domain.local_start[X] = %d - domain.local_end[X] = %d", ThisTask.domain.local_start[X], ThisTask.domain.local_end[X]); + printf("\n\t\t domain.local_start[Y] = %d - domain.local_end[Y] = %d", ThisTask.domain.local_start[Y], ThisTask.domain.local_end[Y]); + printf("\n\t\t domain.dim[X] = %d - domain.dim[Y] = %d", ThisTask.domain.dim[X], ThisTask.domain.dim[Y]); + printf("\n\t\t nbrtop = %d - nbrbottom = %d", ThisTask.nbrtop, ThisTask.nbrbottom); + printf("\n\t\t nbrleft = %d - nbrright = %d\n", ThisTask.nbrleft, ThisTask.nbrright); + fflush(stdout); + } + MPI_Barrier(ThisTask.comm2d); + } +#endif /* DEBUG */ + + /******************************************************************************************************/ + + const MyData xbeg = 0.0; + const MyData xend = 1.0; + const MyData ybeg = 0.0; + const MyData yend = 1.0; + + const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1), + (yend - ybeg)/(NY_GLOB + 1)}; + + /* -------------------------------------------------------- + 1. Set grid indices + -------------------------------------------------------- */ + + const int ibeg = NGHOST; + const int jbeg = NGHOST; + + /* -------------------------------------------------------- + 2. Generate grids, allocate memory + distributed across MPI processes + -------------------------------------------------------- */ + + /* memory allocation */ + MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData)); + MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData)); + assert((xg != NULL) && (yg != NULL)); + + /* initial conditions */ + for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X]; + for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y]; + + /* grids memory allocation distributed across MPI processes */ + MyData **phi = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2); + MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2); + + /* -------------------------------------------------------- + 3. Set boundary conditions + -------------------------------------------------------- */ + BoundaryConditions(phi0, xg, yg, &ThisTask); + BoundaryConditions(phi, xg, yg, &ThisTask); + free(yg); + free(xg); + + /* -------------------------------------------------------- + 4. Main iteration cycle + -------------------------------------------------------- */ + + const double time_start = MPI_Wtime(); + + /* iterations */ + int k = 0; + while (1) + { + /* -- 4a. Jacobi algorithm overlapping computation and communication */ + MyData err; + Jacobi_Communication(phi, phi0, &err, delta, &ThisTask); + + /* -- 4b. Get the total error */ + /* combines values from all processes and distributes the result back to all processes */ + MyData toterr; + MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, ThisTask.comm2d); + + if (ThisTask.rank == MASTERTASK) + printf("\n\t Iteration = %d - err = %lg\n",k, toterr); + + /* increase the counter of loop iterations */ + k++; + + /* check convergence */ + if (toterr <= TOL) + { + break; + } + + /* swap the pointers */ + MyData **tmp = phi; + phi = phi0; + phi0 = tmp; + } + + /* ranks write the solution in parallel */ + WriteSolutionParallel(phi, &ThisTask); + + if (ThisTask.rank == MASTERTASK) + { + printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB); + printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start); + } + + // free memory + if (phi0) + { + free(phi0[0]); + free(phi0); + } + + if (phi) + { + free(phi[0]); + free(phi); + } + + MPI_Comm_free(&ThisTask.comm2d); + + MPI_Finalize(); + + return 0; +} + +/* ********************************************************************* */ +void BoundaryConditions(MyData **const restrict phi, + MyData *const restrict x, + MyData *const restrict y, + Task *const restrict ThisTask) +/*************************************************************************/ +{ + /* left */ + if (ThisTask->nbrleft == MPI_PROC_NULL) /* no left neighbor */ + { + const int global_start_j = ThisTask->domain.global_start[X]; + for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++) + phi[j][0] = (1.0 - y[global_start_j + j - 1]); + } + + /* right */ + if (ThisTask->nbrright == MPI_PROC_NULL) /* no right neighbor */ + { + const int global_start_j = ThisTask->domain.global_start[X]; + for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++) + phi[j][ThisTask->domain.local_end[Y] + 1] = (y[global_start_j + j - 1] * y[global_start_j + j - 1]); + } + + /* bottom */ + if (ThisTask->nbrbottom == MPI_PROC_NULL) /* no bottom neighbor */ + { + const int global_start_i = ThisTask->domain.global_start[Y]; + for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++) + phi[0][i] = (1.0 - x[global_start_i + i - 1]); + } + + /* top */ + if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neighbor */ + { + const int global_start_i = ThisTask->domain.global_start[Y]; + for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++) + phi[ThisTask->domain.local_end[X] + 1][i] = x[global_start_i + i - 1]; + } + +#if defined(DEBUG) + + if (ThisTask->rank == MASTERTASK) + { + printf("\n"); + for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++) + printf("\n\t x[%d] = %8.2f", i, x[i]); + + printf("\n"); + for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++) + printf("\n\t y[%d] = %8.2f", i, y[i]); + + printf("\n"); + fflush(stdout); + } + + MPI_Barrier(ThisTask->comm2d); + + for (int task=0 ; task<ThisTask->nranks ; task++) + { + if (task == ThisTask->rank) + { + char string[128]; + sprintf(string, "Task %d - Phi", task); + + Show_2DdblArray(phi, ThisTask->domain.dim[X] + 2, ThisTask->domain.dim[Y] + 2, string); + } + + MPI_Barrier(ThisTask->comm2d); + } + +#endif /* DEBUG */ + + return; +} + +/* ********************************************************************* */ + +void JacobiAlgorithm(MyData **const restrict Phi, + MyData **const restrict Phi0, + const MyData *restrict delta, + const int jbeg, + const int jend, + const int ibeg, + const int iend, + MyData *const restrict error) +{ + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] + + Phi0[j-1][i] + Phi0[j+1][i]); + + *error += (delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i])); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +void get_error(MyData **const restrict Phi, + MyData **const restrict Phi0, + const MyData *restrict delta, + const int jbeg, + const int jend, + const int ibeg, + const int iend, + MyData *const restrict error) +{ + *error = 0.0; + for (int j=jbeg ; j<=jend ; j++) + { + for (int i=ibeg ; i<=iend ; i++) + { + *error += (delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i])); + } /* loop over columns */ + } /* loop over rows */ + + return; +} + +void Jacobi_Communication(MyData **const restrict Phi, + MyData **const restrict Phi0, + MyData *const restrict error, + const MyData * restrict delta, + Task *const restrict ThisTask) +{ + /* custom datatype */ + MPI_Datatype column; + MPI_Type_vector(ThisTask->domain.dim[X], 1, ThisTask->domain.dim[Y] + 2, MPI_MyDatatype, &column); + /* commits the datatype */ + MPI_Type_commit(&column); + + const int data_row_size = ThisTask->domain.dim[Y]; + + /* First task: issue the communication */ + + MyData **const restrict buffer = Phi0; + +#if MPI_VERSION > 4 + + MPI_Request request[4]; + + MPI_Isendrecv(&buffer[ThisTask->domain.local_end[X] ][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, + &buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, + ThisTask->comm2d, &request[0]); + + MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, + &buffer[ThisTask->domain.local_end[X] + 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 1, + ThisTask->comm2d, &request[1]); + + MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] ], 1, column, ThisTask->nbrright, 2, + &buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] - 1], 1, column, ThisTask->nbrleft, 2, + ThisTask->comm2d, &request[2]); + + MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] ], 1, column, ThisTask->nbrleft, 3, + &buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] + 1], 1, column, ThisTask->nbrright, 3, + ThisTask->comm2d, &request[3]); + +#else + + MPI_Request request[8]; + + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, ThisTask->comm2d, &request[0]); + MPI_Irecv(&buffer[ThisTask->domain.local_end[X] + 1][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop , 1, ThisTask->comm2d, &request[1]); + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] - 1], 1, column, ThisTask->nbrleft, 2, ThisTask->comm2d, &request[2]); + MPI_Irecv(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] + 1], 1, column, ThisTask->nbrright, 3, ThisTask->comm2d, &request[3]); + + MPI_Isend(&buffer[ThisTask->domain.local_end[X] ][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop, 0, ThisTask->comm2d, &request[4]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, ThisTask->comm2d, &request[5]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_end[Y] ], 1, column, ThisTask->nbrright, 2, ThisTask->comm2d, &request[6]); + MPI_Isend(&buffer[ThisTask->domain.local_start[X] ][ThisTask->domain.local_start[Y] ], 1, column, ThisTask->nbrleft, 3, ThisTask->comm2d, &request[7]); + +#endif + + /**************************************** computation ****************************************/ + /* perform the computation with the local data, (i.e. ghost cells are not required) */ + /* so overlapping computation and communication */ + + const int jbeg = ThisTask->domain.local_start[X] + 1; + const int jend = ThisTask->domain.local_end[X] - 1; + const int ibeg = ThisTask->domain.local_start[Y] + 1; + const int iend = ThisTask->domain.local_end[Y] - 1; + + *error = 0.0; + JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error); + + /*********************************************************************************************/ + +#if MPI_VERSION > 4 + /* wait the data on the boundaries */ + MPI_Waitall(4, request, MPI_STATUSES_IGNORE); +#else + MPI_Waitall(8, request, MPI_STATUSES_IGNORE); +#endif + + /* nbrbottom */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_start[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); + + /* nbrtop */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_end[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); + + /* nbrleft */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y], + error); + + /* nbrright */ + JacobiAlgorithm(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_end[Y], ThisTask->domain.local_end[Y], + error); + + /* Round-off error fixing ??? */ + { + *error = 0.0; + get_error(Phi, Phi0, delta, + ThisTask->domain.local_start[X], ThisTask->domain.local_end[X], + ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y], + error); + } + + MPI_Type_free(&column); + + return; +} + +void WriteSolutionParallel(MyData **const phi, Task *const ThisTask) +{ + /* 1. Every process sets up the subarray */ + /* (i.e. its own domain whithout ghost cells) */ + /* We will use this as arguments to MPI_File_write(), */ + /* which allows the program to write the file in parallel */ + MPI_Datatype subDomain; + /* (local) sub-domain size */ + const int lsize[NDIM] = {ThisTask->domain.dim[X], ThisTask->domain.dim[Y]}; + /* entire domain (i.e. with ghost cells) */ + const int domain[NDIM] = {lsize[X] + 2 * NGHOST, lsize[Y] + 2 * NGHOST}; + /* (local) starting coordinates of the subDomain in each dimension */ + const int lstart[NDIM] = {ThisTask->domain.local_start[X], ThisTask->domain.local_start[Y]}; + MPI_Type_create_subarray(NDIM, domain, lsize, lstart, MPI_ORDER_C, MPI_MyDatatype, &subDomain); + MPI_Type_commit(&subDomain); + + /* 2. The file view must be set by creating a second subarray datatype, */ + /* defining the process' view on the file. */ + MPI_Datatype ProcessDomain; + /* (global) domain size */ + const int gsize[NDIM] = {NX_GLOB, NY_GLOB}; + /* (global) starting coordinates of the subDomain in each dimension */ + const int gstart[NDIM] = {ThisTask->domain.global_start[X] - 1, ThisTask->domain.global_start[Y] - 1}; + MPI_Type_create_subarray(NDIM, gsize, lsize, gstart, MPI_ORDER_C, MPI_MyDatatype, &ProcessDomain); + MPI_Type_commit(&ProcessDomain); + + /* 3.a Open file for writing */ + static int nfile = 0; + char fname[128]; + sprintf(fname,"jacobi_2D_mpi_comp_comm_io_%02d.bin", nfile); + /* delete the file if already exists */ + /* MPI_File_delete(fname, MPI_INFO_NULL); */ + MPI_File fh; + MPI_File_open(ThisTask->comm2d, fname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, + MPI_INFO_NULL, &fh); + + + /* 3.b Set the process’s view of the data in the file */ + MPI_File_set_view(fh, 0, MPI_MyDatatype, ProcessDomain, "native", MPI_INFO_NULL); + + /* 3.c Writes a file starting at the locations specified by individual file pointers */ + /* (blocking collective) */ + MPI_File_write_all(fh, phi[0], 1, subDomain, MPI_STATUS_IGNORE); + + /* 3.d Close a file (collective). */ + MPI_File_close(&fh); + + /* free the MPI_Datatype */ + MPI_Type_free(&subDomain); + MPI_Type_free(&ProcessDomain); + + return; +} diff --git a/jacobi/mpi/comp_comm_io/src/tools.c b/jacobi/mpi/comp_comm_io/src/tools.c new file mode 100644 index 0000000000000000000000000000000000000000..0e282ee7953ced36d2d8d3060bad943ccf86f947 --- /dev/null +++ b/jacobi/mpi/comp_comm_io/src/tools.c @@ -0,0 +1,59 @@ +/* ///////////////////////////////////////////////////////////////////// */ +/* Authors: A. Mignone (mignone@to.infn.it) */ +/* V. Cesare (valentina.cesare@inaf.it) */ +/* D. Goz (david.goz@inaf.it) */ +/* */ +/* Date : June 2024 */ +/* */ +/* ///////////////////////////////////////////////////////////////////// */ + +#include "tools.h" + +/* ********************************************************************* */ +MyData **Allocate_2DdblArray(const int nx, const int ny) +/* + * Allocate contiguous memory for a MyData array with + * nx rows and ny columns + *********************************************************************** */ +{ + MyData **buf = calloc(nx, sizeof(MyData *)); + assert(buf != NULL); + + buf[0] = (MyData *) calloc(nx * ny, sizeof(MyData)); + assert(buf[0] != NULL); + + for (int j=1 ; j<nx ; j++) + buf[j] = buf[j-1] + ny; + + return buf; +} + +/* ********************************************************************* */ +void Show_2DdblArray( MyData **const A, + const int nx, + const int ny, + const char *const string) +/* *********************************************************************** */ +{ + printf ("%s\n",string); + printf ("------------------------------\n"); + for (int i=0 ; i<nx ; i++) + { + for (int j=0 ; j<ny ; j++) + { + printf ("%8.2f ", A[i][j]); + } + printf ("\n"); + } + printf ("------------------------------\n"); +} +/* ********************************************************************* */ + +double seconds() +{ + struct timeval tmp; + gettimeofday(&tmp, (struct timezone *)0); + double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; + + return sec; +} diff --git a/jacobi/mpi/miscellaneous/cartesian b/jacobi/mpi/miscellaneous/cartesian new file mode 100755 index 0000000000000000000000000000000000000000..cef6cb6704f8709644945aeb816d610ad2814635 Binary files /dev/null and b/jacobi/mpi/miscellaneous/cartesian differ diff --git a/jacobi/mpi/miscellaneous/cartesian.cpp b/jacobi/mpi/miscellaneous/cartesian.c similarity index 64% rename from jacobi/mpi/miscellaneous/cartesian.cpp rename to jacobi/mpi/miscellaneous/cartesian.c index 7b5a7e505a9d63ade4f4e8e35f48cb895e8e1660..8778f5828c609db41ef8aef95f8bcd484fdceb25 100644 --- a/jacobi/mpi/miscellaneous/cartesian.cpp +++ b/jacobi/mpi/miscellaneous/cartesian.c @@ -1,5 +1,6 @@ #include <mpi.h> -#include <iostream> +#include <stdio.h> +#include <stdlib.h> #define SIZE 2 #define X 0 @@ -7,8 +8,6 @@ int main(int argc, char **argv) { - using namespace std; - int task, ntasks; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &ntasks); @@ -18,7 +17,7 @@ int main(int argc, char **argv) { if (!task) { - cout << "\n\t Usage: <executable> <number processes along X> \n" << endl; + printf("\n\t Usage: <executable> <number processes along X> \n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); exit(EXIT_FAILURE); } @@ -30,15 +29,16 @@ int main(int argc, char **argv) { if (!task) { - cout << "\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n" << endl; + printf("\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n"); + fflush(stdout); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); exit(EXIT_FAILURE); } } - static int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y}; - static int periods[SIZE] = {0, 0}; - static int reorder = 0; + const int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y}; + const int periods[SIZE] = {0, 0}; + const int reorder = 0; MPI_Comm comm2d; MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d); @@ -56,11 +56,11 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); if (rank == task) { - cout << "\n\t Task: " << task << endl; - cout << "\t\t coords[" << coords[X] << "," << coords[Y] << "]" << endl; - cout << "\t\t nbrright: " << nbrright << " - nbrleft : " << nbrleft << endl; - cout << "\t\t nbrtop : " << nbrtop << " - nbrbottom: " << nbrbottom << endl; - cout << endl; + printf("\n\t Task: %d\n", task); + printf("\t\t coords[%d, %d]", coords[X], coords[Y]); + printf("\n\t\t nbrright: %d - nbrleft : %d", nbrright, nbrleft); + printf("\n\t\t nbrtop : %d - nbrbottom: %d \n\n", nbrtop, nbrbottom); + fflush(stdout); } } diff --git a/jacobi/mpi/miscellaneous/subarray b/jacobi/mpi/miscellaneous/subarray new file mode 100755 index 0000000000000000000000000000000000000000..5295b41860fe0dfe7b99d8b60a0c49a893e90303 Binary files /dev/null and b/jacobi/mpi/miscellaneous/subarray differ diff --git a/jacobi/mpi/miscellaneous/subarray.c b/jacobi/mpi/miscellaneous/subarray.c new file mode 100644 index 0000000000000000000000000000000000000000..254b3d43bb7c4cd9af9a675e29450f12f8eb185b --- /dev/null +++ b/jacobi/mpi/miscellaneous/subarray.c @@ -0,0 +1,114 @@ +#include <stdio.h> +#include <stdlib.h> +#include <mpi.h> +#include <assert.h> + +void printarr(int **data, int n, char *str); +int **allocarray(const int n); +void deallocarray(int **); + +int main(int argc, char **argv) +{ + /* array sizes */ + const int bigsize = 10; + const int subsize = 5; + + /* communications parameters */ + const int sender = 0; + const int receiver = 1; + const int ourtag = 2; + + int rank, size; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (size < (receiver + 1)) + { + if (rank == 0) + fprintf(stderr,"\nt %s: Needs at least %d processors.\n", argv[0], receiver+1); + + MPI_Finalize(); + return 1; + } + + if (rank == sender) + { + int **bigarray = allocarray(bigsize); + for (int i=0 ; i<bigsize ; i++) + for (int j=0 ; j<bigsize ; j++) + bigarray[i][j] = (i * bigsize + j); + + printarr(bigarray, bigsize, "\n Sender: Big array \n"); + + MPI_Datatype mysubarray; + int starts[2] = {5,3}; + int subsizes[2] = {subsize,subsize}; + int bigsizes[2] = {bigsize, bigsize}; + MPI_Type_create_subarray(2, bigsizes, subsizes, starts, + MPI_ORDER_C, MPI_INT, &mysubarray); + MPI_Type_commit(&mysubarray); + + MPI_Send(&(bigarray[0][0]), 1, mysubarray, receiver, ourtag, MPI_COMM_WORLD); + MPI_Type_free(&mysubarray); + + deallocarray(bigarray); + } + else if (rank == receiver) + { + int **subarray = allocarray(subsize); + + for (int i=0; i<subsize; i++) + for (int j=0; j<subsize; j++) + subarray[i][j] = 0; + + MPI_Recv(&(subarray[0][0]), subsize * subsize, MPI_INT, sender, ourtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + printarr(subarray, subsize, " Receiver: Subarray -- after receive"); + + deallocarray(subarray); + } + + MPI_Finalize(); + return 0; +} + +void printarr(int **data, + int n, + char *str) +{ + printf("-- %s --\n", str); + for (int i=0; i<n; i++) + { + for (int j=0; j<n; j++) + { + printf("%3d ", data[i][j]); + } + printf("\n"); + } +} + +int **allocarray(const int n) +{ + int *data = malloc(n * n * sizeof(int)); + assert(data != NULL); + + int **arr = malloc(n * sizeof(int *)); + assert(arr != NULL); + + for (int i=0 ; i<n ; i++) + arr[i] = &(data[i * n]); + + return arr; +} + +void deallocarray(int **array) +{ + if (array[0]) + free(array[0]); + + if (array) + free(array); + + return; +}