diff --git a/jacobi/mpi/SendRecv/Makefile b/jacobi/mpi/SendRecv/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d37755442afd19b56734dad95d99da5bad2da49c
--- /dev/null
+++ b/jacobi/mpi/SendRecv/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make mpi                 ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugger                '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_SendRecv_*
diff --git a/jacobi/mpi/SendRecv/include/allvars.h b/jacobi/mpi/SendRecv/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c
--- /dev/null
+++ b/jacobi/mpi/SendRecv/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTER 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/SendRecv/include/tools.h b/jacobi/mpi/SendRecv/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154
--- /dev/null
+++ b/jacobi/mpi/SendRecv/include/tools.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(const MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
diff --git a/jacobi/mpi/SendRecv/make.def b/jacobi/mpi/SendRecv/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..6548f4b6b2d1ef3c78434166f88adac85b12801b
--- /dev/null
+++ b/jacobi/mpi/SendRecv/make.def
@@ -0,0 +1,45 @@
+CC     = mpicc
+CFLAGS = -Wall -Wextra -march=native
+LIBS   = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_SendRecv_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/SendRecv/make_mpi_path b/jacobi/mpi/SendRecv/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/SendRecv/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c
new file mode 100644
index 0000000000000000000000000000000000000000..a0cd0c19d039a6c6f38244e2ffe7e01d04313423
--- /dev/null
+++ b/jacobi/mpi/SendRecv/src/jacobi_2D_mpi_sendrecv.c
@@ -0,0 +1,502 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* #define DEBUG */
+
+typedef struct MyRows
+{
+  int start;
+  int end;
+} myDomain;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			const int,
+			const int);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
+		     const MyData *const restrict, const int, const int, const int, const int);
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm);
+
+void mpi_exchange_sendrecv_1d(MyData **const buffer,
+			      const int      n,
+			      const int      nbrtop,
+			      const int      nbrbottom,
+			      const int      start,
+			      const int      end,
+			      const MPI_Comm comm1d);
+
+void WriteSolution(MyData **const phi, const int nx, const int ny);
+
+void copy_grids(MyData **const restrict A,
+                MyData **const restrict B,
+                const int               xbeg,
+                const int               xend,
+                const int               ybeg,
+		const int               yend);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 1) && !rank)
+    {
+      printf("\n\t Usage: <executable> <grid_size> \n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  const int NY_GLOB = NX_GLOB;
+
+  /******************************************************************************************************/
+  /* 1D MPI-decomposition:
+     the physical domain is sliced into slabs along the vertical direction,
+     while the grids are replicated across the MPI processes.
+     This approach is not the most efficient in terms of memory usage,
+     because the arrays are replicated across MPI process instead of to be distributed */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem = (NX_GLOB + 2) % Nranks;
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk = (NX_GLOB + 2 - rem) / Nranks;
+  if (chunk < 1)
+    {
+      printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the slab dimension along vertical direction: */
+  int incr, offset;
+  if (rank < rem)
+    {
+      incr = chunk + 1;
+      offset = 0;
+    }
+  else
+    {
+      incr = chunk;
+      offset = rem;
+    }
+
+  myDomain domain;
+  domain.start = ((rank * incr) + offset);
+  domain.end   = (domain.start + incr) - 1;
+  /* boundaries */
+  domain.start = ((domain.start == 0)         ? NGHOST  : domain.start);
+  domain.end   = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end);
+
+  /* rank of the top process */
+  const int nbrtop    = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1));
+  /* rank of the bottom process */
+  const int nbrbottom = (((rank - 1) < 0)       ? MPI_PROC_NULL : (rank - 1));
+
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg   = NGHOST;
+  const int iend   = ibeg + NX_GLOB - 1;
+  const int nx     = iend - ibeg + 1;
+  const int nx_tot = nx + 2 * NGHOST;
+    
+  const int jbeg   = NGHOST;
+  const int jend   = jbeg + NY_GLOB - 1;
+  const int ny     = jend - jbeg + 1;
+  const int ny_tot = ny + 2 * NGHOST;
+
+  if (rank == MASTER)
+    {
+      printf("\n\t Grid indices:");
+      printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
+      printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<Nranks ; task++)
+    {
+      MPI_Barrier(comm);
+      
+      if (task == rank)
+	{
+	  printf("\n\t rank: %d", rank);
+	  printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end);
+	  printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Barrier(comm);
+#endif /* DEBUG */
+  
+  /* --------------------------------------------------------
+     2. Generate grid, allocate memory
+        Not optimized because the grids are (unnecessarily)
+	replicated across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+  MyData *x = xg; /* Global and local grids are the same  */
+  MyData *y = yg; /* for serial version of the code       */
+
+  /* grids memory allocation */
+  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
+  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
+    
+  /* --------------------------------------------------------
+     3. Initialize solution array to 0
+     -------------------------------------------------------- */
+    
+  for (int j=jbeg ; j<=jend ; j++)
+    for (int i=ibeg ; i<=iend ; i++)
+      {
+	phi0[j][i] = 0.0;
+	phi[j][i]  = 0.0;
+      }
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* -- 4a. Set boundary conditions first -- */  
+  BoundaryConditions(phi0, x, y, nx, ny);
+  BoundaryConditions(phi, x, y, nx, ny);
+
+  MyData err = 1.0;
+  /* iterations */
+  int k = 0;
+  while (1)
+    {      
+      /* -- 4c. Jacobi's method and residual (interior points) -- */
+      /*       core algorithm                                     */
+
+      err = 0.0;
+      JacobiAlgorithm(phi, phi0, &err, delta,
+		      ibeg, iend, domain.start, domain.end);
+
+      if (!rank)
+	printf("\n\t Iteration = %d - err = %lg\n",k, err);
+
+      /* increase the counter of loop iterations */
+      k++;
+
+      /* get the total error */
+      MyData toterr;
+      /* combines values from all processes and distributes the result back to all processes */
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm);
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  /* master task gathers all the domains */
+	  get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm);
+
+	  break;
+	}
+
+      /* -- 4b. MPI communications */
+      mpi_exchange_sendrecv_1d(phi, NX_GLOB,
+			       nbrtop, nbrbottom,
+			       domain.start, domain.end, comm);
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* master rank writes the solution */
+  if (!rank)
+    {
+      WriteSolution(phi, nx, ny);
+      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  if (yg)
+    free(yg);
+  
+  if (xg)
+    free(xg);
+
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+                        const int               nx,
+			const int               ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;
+  const int iend = ibeg + nx - 1;
+    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+
+  int i,j;
+  
+  /* -- Left -- */
+  i = ibeg - 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (1.0 - y[j]);
+    
+  /* -- Right -- */
+  i = jend + 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (y[j] * y[j]);
+    
+  /* -- Bottom -- */    
+  j = jbeg - 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = (1.0 - x[i]);
+    
+  /* -- Top -- */
+  j = jend + 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = x[i];
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData      **const restrict Phi,
+		     MyData      **const restrict Phi0,
+		     MyData       *const restrict error,
+		     const MyData *const restrict delta,
+		     const int                    ibeg,
+		     const int                    iend,
+		     const int                    jbeg,
+		     const int                    jend)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+                
+	  *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
+	} /* loop over columns */
+    } /* loop over rows */
+  
+  return;
+}
+
+/* ********************************************************************* */
+void mpi_exchange_sendrecv_1d(MyData **const buffer,
+			      const int      n,
+			      const int      nbrtop,
+			      const int      nbrbottom,
+			      const int      start,
+			      const int      end,
+			      const MPI_Comm comm1d)
+{
+  /* The function is called by each MPI rank      */
+  /* - nbrtop    is the MPI process with rank + 1 */
+  /* - nbrbottom is the MPI process with rank - 1 */
+
+  /* SendRecv to/from nbrtop/nbrbottom */
+
+  
+  /* SendRecv to/from nbrbottom/nbrtop */
+  
+  return;
+}
+
+/* ********************************************************************* */
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm)
+{
+  /* Master process gathers all the domains */
+  
+  /***************************** get the domain boundaries from each process *************************************/
+  myDomain *boundaries = NULL;
+  if (rank == MASTER)
+    {
+      boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries));
+      if (boundaries == NULL)
+	{
+	  MPI_Abort(comm, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+      boundaries[MASTER].start = domain->start;
+      boundaries[MASTER].end   = domain->end;
+    }
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+#if defined(DEBUG)
+	  printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d",
+		 task, boundaries[task].start, task, boundaries[task].end);
+#endif /* DEBUG */	  
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm);
+	}
+    } /* loop over nranks */
+
+#if defined(DEBUG)
+  if (rank == MASTER)
+    {
+      printf("\n");
+      fflush(stdout);
+    }
+#endif /* DEBUG */
+
+  /**************************************************************************************************/
+
+  /***************************************** get the domain from each process *************************/
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      /* number of grid points to receive (including ghost points) */
+	      const int nrows = (boundaries[task].end - boundaries[task].start + 1);
+	      const int elements = (nrows * (grid_dim + 2));
+	      MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  const int nrows = (domain->end - domain->start + 1);
+	  const int elements = (nrows * (grid_dim + 2));
+	  MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm);
+	}
+    } /* loop over nranks */  
+  
+  /***************************************************************************************************/
+  
+  if (rank == MASTER)
+    free(boundaries);
+  
+  return;
+}
+
+/* ********************************************************************* */
+void WriteSolution(MyData **const phi,
+		   const int      nx,
+		   const int      ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+    
+  static int nfile = 0;  /* File counter */
+
+  char fname[128];
+  sprintf(fname,"jacobi2D_mpi_sendrecv_%02d.bin", nfile);
+    
+  FILE *fp;
+  printf ("> Writing %s\n",fname);
+  fp = fopen(fname, "wb");
+
+  /* discard boundaies */
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
+    }
+    
+  nfile++;
+  fclose(fp);
+}
diff --git a/jacobi/mpi/SendRecv/src/tools.c b/jacobi/mpi/SendRecv/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce
--- /dev/null
+++ b/jacobi/mpi/SendRecv/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = malloc(nx * sizeof(MyData *));
+  assert(buf != NULL);
+
+  buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(const MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/Send_Recv_blocking/Makefile b/jacobi/mpi/Send_Recv_blocking/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..aa0427cc27994561b2d6e0aa13eb77f8f9cd8b19
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make                     ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugging               '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_Send_Recv_blocking_*
diff --git a/jacobi/mpi/Send_Recv_blocking/include/allvars.h b/jacobi/mpi/Send_Recv_blocking/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTER 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/Send_Recv_blocking/include/tools.h b/jacobi/mpi/Send_Recv_blocking/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..066ae344777161b1979234fbf4fa9eece99d03b5
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/include/tools.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(const MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
+
diff --git a/jacobi/mpi/Send_Recv_blocking/make.def b/jacobi/mpi/Send_Recv_blocking/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..b6e3ba0df86a743b4f7af448d637d5b096080273
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/make.def
@@ -0,0 +1,45 @@
+CC      = mpicc
+CFLAGS  = -Wall -Wextra -march=native
+LIBS    = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_Send_Recv_blocking_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -DDEBUG -Og -ggdb3 -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/Send_Recv_blocking/make_mpi_path b/jacobi/mpi/Send_Recv_blocking/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/Send_Recv_blocking/script/input_parameters b/jacobi/mpi/Send_Recv_blocking/script/input_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..be0cc32d813a1c58dfc2623b7c33ba4efd81bde7
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/script/input_parameters
@@ -0,0 +1,10 @@
+##########################################################################
+
+# set the grid size
+
+GRID_SIZE_X=128
+GRID_SIZE_Y=128
+
+TASKS=(2 4 8)
+
+##########################################################################
diff --git a/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh b/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1043d5ac7180019a9f27d9b8c8524c129c2df20a
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/script/run_pleiadi.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+######################### RESOURSE ALLOCATION #####################################
+##SBATCH --account=????????
+
+#SBATCH --partition=pleiadi
+#SBATCH --job-name="Jacobi"
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --output=Jacobi-mpi-Send_Recv_nonblocking-%j.out
+#SBATCH --error=Jacobi-mpi-Send_Recv_nonblocking.%j.err
+#SBATCH --time=00:05:00
+###################################################################################
+
+################################## MODULES ########################################
+export MODULE_VERSION=5.0.1
+source /opt/cluster/spack/share/spack/setup-env.sh
+
+# module purge
+module purge
+# load GCC
+module load default-gcc-11.2.0
+###################################################################################
+
+# input parameters
+source input_parameters
+
+WORKDIR=${PWD}
+# compile the application
+cd .. && make clean && make mpi
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot compile the application ...aborting..."
+    exit 1
+fi
+
+# get the executable
+EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print)
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot find the executable ...aborting..."
+    exit 2
+fi
+
+for TASK in ${TASKS[@]}
+do
+    # run the application
+    time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt
+done
+
+cd ${WORKDIR}
+
+exit 0
diff --git a/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c
new file mode 100644
index 0000000000000000000000000000000000000000..b33bbedfa2a19cfc30482e26702540c3de12d2a6
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/src/jacobi_2D_mpi_send_recv_blocking.c
@@ -0,0 +1,524 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* #define DEBUG */
+
+typedef struct MyRows
+{
+  int start;
+  int end;
+} myDomain;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			const int,
+			const int);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
+		     const MyData *const restrict, const int, const int, const int, const int);
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm);
+
+void mpi_exchange_1d(MyData **const buffer,
+		     const int      n,
+		     const int      nbrtop,
+		     const int      nbrbottom,
+		     const int      start,
+		     const int      end,
+		     const MPI_Comm comm1d);
+
+void WriteSolution(MyData **const phi, const int nx, const int ny);
+
+void copy_grids(MyData **const restrict A,
+                MyData **const restrict B,
+                const int               xbeg,
+                const int               xend,
+                const int               ybeg,
+		const int               yend);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 1) && !rank)
+    {
+      printf("\n\t Usage: <executable> <grid_size> \n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  const int NY_GLOB = NX_GLOB;
+
+  /******************************************************************************************************/
+  /* 1D MPI-decomposition:
+     the physical domain is sliced into slabs along the vertical direction,
+     while the grids are replicated across the MPI processes.
+     This approach is not the most efficient in terms of memory usage,
+     because the arrays are replicated across MPI process instead of to be distributed */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem = (NX_GLOB + 2) % Nranks;
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk = (NX_GLOB + 2 - rem) / Nranks;
+  if (chunk < 1)
+    {
+      printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the slab dimension along vertical direction: */
+  int incr, offset;
+  if (rank < rem)
+    {
+      incr = chunk + 1;
+      offset = 0;
+    }
+  else
+    {
+      incr = chunk;
+      offset = rem;
+    }
+
+  myDomain domain;
+  domain.start = ((rank * incr) + offset);
+  domain.end   = (domain.start + incr) - 1;
+  /* boundaries */
+  domain.start = ((domain.start == 0)         ? NGHOST  : domain.start);
+  domain.end   = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end);
+
+  /* rank of the top process */
+  const int nbrtop    = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1));
+  /* rank of the bottom process */
+  const int nbrbottom = (((rank - 1) < 0)       ? MPI_PROC_NULL : (rank - 1));
+
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg   = NGHOST;
+  const int iend   = ibeg + NX_GLOB - 1;
+  const int nx     = iend - ibeg + 1;
+  const int nx_tot = nx + 2 * NGHOST;
+    
+  const int jbeg   = NGHOST;
+  const int jend   = jbeg + NY_GLOB - 1;
+  const int ny     = jend - jbeg + 1;
+  const int ny_tot = ny + 2 * NGHOST;
+
+  if (rank == MASTER)
+    {
+      printf("\n\t Grid indices:");
+      printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
+      printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<Nranks ; task++)
+    {
+      MPI_Barrier(comm);
+      
+      if (task == rank)
+	{
+	  printf("\n\t rank: %d", rank);
+	  printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end);
+	  printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Barrier(comm);
+#endif /* DEBUG */
+  
+  /* --------------------------------------------------------
+     2. Generate grid, allocate memory
+        Not optimized because the grids are (unnecessarily)
+	replicated across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+  MyData *x = xg; /* Global and local grids are the same  */
+  MyData *y = yg; /* for serial version of the code       */
+
+  /* grids memory allocation */
+  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
+  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
+    
+  /* --------------------------------------------------------
+     3. Initialize solution array to 0
+     -------------------------------------------------------- */
+    
+  for (int j=jbeg ; j<=jend ; j++)
+    for (int i=ibeg ; i<=iend ; i++)
+      {
+	phi0[j][i] = 0.0;
+	phi[j][i]  = 0.0;
+      }
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* -- 4a. Set boundary conditions first -- */  
+  BoundaryConditions(phi0, x, y, nx, ny);
+  BoundaryConditions(phi, x, y, nx, ny);
+
+  MyData err = 1.0;
+  /* iterations */
+  int k = 0;
+  while (1)
+    {      
+      /* -- 4c. Jacobi's method and residual (interior points) -- */
+      /*       core algorithm                                     */
+
+      err = 0.0;
+      JacobiAlgorithm(phi, phi0, &err, delta,
+		      ibeg, iend, domain.start, domain.end);
+
+      if (!rank)
+	printf("\n\t Iteration = %d - err = %lg\n",k, err);
+
+      /* increase the counter of loop iterations */
+      k++;
+
+      /* get the total error */
+      MyData toterr;
+      /* combines values from all processes and distributes the result back to all processes */
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm);
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  /* master task gathers all the domains */
+	  get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm);
+
+	  break;
+	}
+
+      /* -- 4b. MPI communications */
+      mpi_exchange_1d(phi, NX_GLOB,
+		      nbrtop, nbrbottom,
+		      domain.start, domain.end, comm);
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* master rank writes the solution */
+  if (!rank)
+    {
+      WriteSolution(phi, nx, ny);
+      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  if (yg)
+    free(yg);
+  
+  if (xg)
+    free(xg);
+
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+                        const int               nx,
+			const int               ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;
+  const int iend = ibeg + nx - 1;
+    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+
+  int i,j;
+  
+  /* -- Left -- */
+  i = ibeg - 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (1.0 - y[j]);
+    
+  /* -- Right -- */
+  i = jend + 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (y[j] * y[j]);
+    
+  /* -- Bottom -- */    
+  j = jbeg - 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = (1.0 - x[i]);
+    
+  /* -- Top -- */
+  j = jend + 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = x[i];
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData      **const restrict Phi,
+		     MyData      **const restrict Phi0,
+		     MyData       *const restrict error,
+		     const MyData *const restrict delta,
+		     const int                    ibeg,
+		     const int                    iend,
+		     const int                    jbeg,
+		     const int                    jend)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+                
+	  *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
+	} /* loop over columns */
+    } /* loop over rows */
+  
+  return;
+}
+
+/* ********************************************************************* */
+void mpi_exchange_1d(MyData **const buffer,
+		     const int      n,
+		     const int      nbrtop,
+		     const int      nbrbottom,
+		     const int      start,
+		     const int      end,
+		     const MPI_Comm comm1d)
+{
+  /* The function is called by each MPI rank
+     - nbrtop is the MPI process with rank + 1
+     - nbrbottom is the MPI process with rank - 1 */
+
+  /***************** First communication stage *******************/
+  /* Perform a blocking send to the top (rank+1) process */
+
+
+
+  
+  /* Perform a blocking receive from the bottom (rank-1) process */
+
+
+
+
+  
+  /***************************************************************/
+
+  /**************** Second communication stage *******************/
+  /* - Perform a blocking send to the bottom (rank-1) process */
+
+
+  
+
+  /* Perform a blocking receive from the top (rank+1) process */
+
+
+  
+  
+  /***************************************************************/
+  
+  return;
+}
+
+/* ********************************************************************* */
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm)
+{
+  /* Master process gathers all the domains */
+  
+  /***************************** get the domain boundaries from each process *************************************/
+  myDomain *boundaries = NULL;
+  if (rank == MASTER)
+    {
+      boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries));
+      if (boundaries == NULL)
+	{
+	  MPI_Abort(comm, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+      boundaries[MASTER].start = domain->start;
+      boundaries[MASTER].end   = domain->end;
+    }
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+#if defined(DEBUG)
+	  printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d",
+		 task, boundaries[task].start, task, boundaries[task].end);
+#endif /* DEBUG */	  
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm);
+	}
+    } /* loop over nranks */
+
+#if defined(DEBUG)
+  if (rank == MASTER)
+    {
+      printf("\n");
+      fflush(stdout);
+    }
+#endif /* DEBUG */
+
+  /**************************************************************************************************/
+
+  /***************************************** get the domain from each process *************************/
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      /* number of grid points to receive (including ghost points) */
+	      const int nrows = (boundaries[task].end - boundaries[task].start + 1);
+	      const int elements = (nrows * (grid_dim + 2));
+	      MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  const int nrows = (domain->end - domain->start + 1);
+	  const int elements = (nrows * (grid_dim + 2));
+	  MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm);
+	}
+    } /* loop over nranks */  
+  
+  /***************************************************************************************************/
+  
+  if (rank == MASTER)
+    free(boundaries);
+  
+  return;
+}
+
+/* ********************************************************************* */
+void WriteSolution(MyData **const phi,
+		   const int      nx,
+		   const int      ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+    
+  static int nfile = 0;  /* File counter */
+
+  char fname[128];
+  sprintf(fname,"jacobi2D_mpi_send_recv_blocking_%02d.bin", nfile);
+    
+  FILE *fp;
+  printf ("> Writing %s\n",fname);
+  fp = fopen(fname, "wb");
+
+  /* discard boundaies */
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
+    }
+    
+  nfile++;
+  fclose(fp);
+}
diff --git a/jacobi/mpi/Send_Recv_blocking/src/tools.c b/jacobi/mpi/Send_Recv_blocking/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_blocking/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = malloc(nx * sizeof(MyData *));
+  assert(buf != NULL);
+
+  buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(const MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/Send_Recv_nonblocking/Makefile b/jacobi/mpi/Send_Recv_nonblocking/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..7013e43ef2a715a3f238199d3030392d79048527
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make mpi                 ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugger                '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_Send_Recv_nonblocking_*
diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTER 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/Send_Recv_nonblocking/include/tools.h b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/include/tools.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(const MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
diff --git a/jacobi/mpi/Send_Recv_nonblocking/make.def b/jacobi/mpi/Send_Recv_nonblocking/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..31f5cdfbddd736474560463e767954e9cf870389
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/make.def
@@ -0,0 +1,45 @@
+CC     = mpicc
+CFLAGS = -Wall -Wextra -march=native
+LIBS   = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_Send_Recv_nonblocking_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters b/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..be0cc32d813a1c58dfc2623b7c33ba4efd81bde7
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/script/input_parameters
@@ -0,0 +1,10 @@
+##########################################################################
+
+# set the grid size
+
+GRID_SIZE_X=128
+GRID_SIZE_Y=128
+
+TASKS=(2 4 8)
+
+##########################################################################
diff --git a/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh b/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1043d5ac7180019a9f27d9b8c8524c129c2df20a
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/script/run_pleiadi.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+######################### RESOURSE ALLOCATION #####################################
+##SBATCH --account=????????
+
+#SBATCH --partition=pleiadi
+#SBATCH --job-name="Jacobi"
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --output=Jacobi-mpi-Send_Recv_nonblocking-%j.out
+#SBATCH --error=Jacobi-mpi-Send_Recv_nonblocking.%j.err
+#SBATCH --time=00:05:00
+###################################################################################
+
+################################## MODULES ########################################
+export MODULE_VERSION=5.0.1
+source /opt/cluster/spack/share/spack/setup-env.sh
+
+# module purge
+module purge
+# load GCC
+module load default-gcc-11.2.0
+###################################################################################
+
+# input parameters
+source input_parameters
+
+WORKDIR=${PWD}
+# compile the application
+cd .. && make clean && make mpi
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot compile the application ...aborting..."
+    exit 1
+fi
+
+# get the executable
+EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print)
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot find the executable ...aborting..."
+    exit 2
+fi
+
+for TASK in ${TASKS[@]}
+do
+    # run the application
+    time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt
+done
+
+cd ${WORKDIR}
+
+exit 0
diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c
new file mode 100644
index 0000000000000000000000000000000000000000..e0200d938eee8804c6578a7e98213f7d8892e95d
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/src/jacobi_2D_mpi_send_recv_nonblocking.c
@@ -0,0 +1,533 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* #define DEBUG */
+
+typedef struct MyRows
+{
+  int start;
+  int end;
+} myDomain;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			const int,
+			const int);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
+		     const MyData *const restrict, const int, const int, const int, const int);
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm);
+
+void mpi_exchange_nonblocking_1d(MyData **const buffer,
+				 const int      n,
+				 const int      nbrtop,
+				 const int      nbrbottom,
+				 const int      start,
+				 const int      end,
+				 const MPI_Comm comm1d);
+
+void WriteSolution(MyData **const phi, const int nx, const int ny);
+
+void copy_grids(MyData **const restrict A,
+                MyData **const restrict B,
+                const int               xbeg,
+                const int               xend,
+                const int               ybeg,
+		const int               yend);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 1) && !rank)
+    {
+      printf("\n\t Usage: <executable> <grid_size> \n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  const int NY_GLOB = NX_GLOB;
+
+  /******************************************************************************************************/
+  /* 1D MPI-decomposition:
+     the physical domain is sliced into slabs along the vertical direction,
+     while the grids are replicated across the MPI processes.
+     This approach is not the most efficient in terms of memory usage,
+     because the arrays are replicated across MPI process instead of to be distributed */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem = (NX_GLOB + 2) % Nranks;
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk = (NX_GLOB + 2 - rem) / Nranks;
+  if (chunk < 1)
+    {
+      printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the slab dimension along vertical direction: */
+  int incr, offset;
+  if (rank < rem)
+    {
+      incr = chunk + 1;
+      offset = 0;
+    }
+  else
+    {
+      incr = chunk;
+      offset = rem;
+    }
+
+  myDomain domain;
+  domain.start = ((rank * incr) + offset);
+  domain.end   = (domain.start + incr) - 1;
+  /* boundaries */
+  domain.start = ((domain.start == 0)         ? NGHOST  : domain.start);
+  domain.end   = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end);
+
+  /* rank of the top process */
+  const int nbrtop    = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1));
+  /* rank of the bottom process */
+  const int nbrbottom = (((rank - 1) < 0)       ? MPI_PROC_NULL : (rank - 1));
+
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg   = NGHOST;
+  const int iend   = ibeg + NX_GLOB - 1;
+  const int nx     = iend - ibeg + 1;
+  const int nx_tot = nx + 2 * NGHOST;
+    
+  const int jbeg   = NGHOST;
+  const int jend   = jbeg + NY_GLOB - 1;
+  const int ny     = jend - jbeg + 1;
+  const int ny_tot = ny + 2 * NGHOST;
+
+  if (rank == MASTER)
+    {
+      printf("\n\t Grid indices:");
+      printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
+      printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<Nranks ; task++)
+    {
+      MPI_Barrier(comm);
+      
+      if (task == rank)
+	{
+	  printf("\n\t rank: %d", rank);
+	  printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end);
+	  printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Barrier(comm);
+#endif /* DEBUG */
+  
+  /* --------------------------------------------------------
+     2. Generate grid, allocate memory
+        Not optimized because the grids are (unnecessarily)
+	replicated across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+  MyData *x = xg; /* Global and local grids are the same  */
+  MyData *y = yg; /* for serial version of the code       */
+
+  /* grids memory allocation */
+  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
+  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
+    
+  /* --------------------------------------------------------
+     3. Initialize solution array to 0
+     -------------------------------------------------------- */
+    
+  for (int j=jbeg ; j<=jend ; j++)
+    for (int i=ibeg ; i<=iend ; i++)
+      {
+	phi0[j][i] = 0.0;
+	phi[j][i]  = 0.0;
+      }
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* -- 4a. Set boundary conditions first -- */  
+  BoundaryConditions(phi0, x, y, nx, ny);
+  BoundaryConditions(phi, x, y, nx, ny);
+
+  MyData err = 1.0;
+  /* iterations */
+  int k = 0;
+  while (1)
+    {      
+      /* -- 4c. Jacobi's method and residual (interior points) -- */
+      /*       core algorithm                                     */
+
+      err = 0.0;
+      JacobiAlgorithm(phi, phi0, &err, delta,
+		      ibeg, iend, domain.start, domain.end);
+
+      if (!rank)
+	printf("\n\t Iteration = %d - err = %lg\n",k, err);
+
+      /* increase the counter of loop iterations */
+      k++;
+
+      /* get the total error */
+      MyData toterr;
+      /* combines values from all processes and distributes the result back to all processes */
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm);
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  /* master task gathers all the domains */
+	  get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm);
+
+	  break;
+	}
+
+      /* -- 4b. MPI communications */
+      mpi_exchange_nonblocking_1d(phi, NX_GLOB,
+				  nbrtop, nbrbottom,
+				  domain.start, domain.end, comm);
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* master rank writes the solution */
+  if (!rank)
+    {
+      WriteSolution(phi, nx, ny);
+      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  if (yg)
+    free(yg);
+  
+  if (xg)
+    free(xg);
+
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+                        const int               nx,
+			const int               ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;
+  const int iend = ibeg + nx - 1;
+    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+
+  int i,j;
+  
+  /* -- Left -- */
+  i = ibeg - 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (1.0 - y[j]);
+    
+  /* -- Right -- */
+  i = jend + 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (y[j] * y[j]);
+    
+  /* -- Bottom -- */    
+  j = jbeg - 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = (1.0 - x[i]);
+    
+  /* -- Top -- */
+  j = jend + 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = x[i];
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData      **const restrict Phi,
+		     MyData      **const restrict Phi0,
+		     MyData       *const restrict error,
+		     const MyData *const restrict delta,
+		     const int                    ibeg,
+		     const int                    iend,
+		     const int                    jbeg,
+		     const int                    jend)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+                
+	  *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
+	} /* loop over columns */
+    } /* loop over rows */
+  
+  return;
+}
+
+/* ********************************************************************* */
+void mpi_exchange_nonblocking_1d(MyData **const buffer,
+				 const int      n,
+				 const int      nbrtop,
+				 const int      nbrbottom,
+				 const int      start,
+				 const int      end,
+				 const MPI_Comm comm1d)
+{
+  /* The function is called by each MPI rank
+     - nbrtop is the MPI process with rank + 1
+     - nbrbottom is the MPI process with rank - 1 */
+
+  /* communication request (handle) */
+  
+
+  
+  /************************* Process is ready to receive data ****************************/
+  /* Perform a nonblocking receive from the bottom (rank-1) process                      */
+
+
+  
+  /* Perform a nonblocking receive from the top (rank+1) process                         */
+
+  
+
+  
+  /***************************************************************************************/
+
+  /*************** Process begins to send data *******************************************/
+  /* Perform a nonblocking send to the top (rank+1) process                              */
+
+
+  
+
+  /* - Perform a nonblocking send to the bottom (rank-1) process                         */
+
+
+
+  /***************************************************************************************/
+
+  /* some useful work (computation) could be performed (while the buffer containing the  */
+  /* message has not to be modified). We will see how to overlap communication and       */
+  /* computation in the next exercises                                                   */
+
+
+  /******************** Waits for all given MPI Requests to complete *********************/
+
+  
+  return;
+}
+
+/* ********************************************************************* */
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm)
+{
+  /* Master process gathers all the domains */
+  
+  /***************************** get the domain boundaries from each process *************************************/
+  myDomain *boundaries = NULL;
+  if (rank == MASTER)
+    {
+      boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries));
+      if (boundaries == NULL)
+	{
+	  MPI_Abort(comm, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+      boundaries[MASTER].start = domain->start;
+      boundaries[MASTER].end   = domain->end;
+    }
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+#if defined(DEBUG)
+	  printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d",
+		 task, boundaries[task].start, task, boundaries[task].end);
+#endif /* DEBUG */	  
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm);
+	}
+    } /* loop over nranks */
+
+#if defined(DEBUG)
+  if (rank == MASTER)
+    {
+      printf("\n");
+      fflush(stdout);
+    }
+#endif /* DEBUG */
+
+  /**************************************************************************************************/
+
+  /***************************************** get the domain from each process *************************/
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      /* number of grid points to receive (including ghost points) */
+	      const int nrows = (boundaries[task].end - boundaries[task].start + 1);
+	      const int elements = (nrows * (grid_dim + 2));
+	      MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  const int nrows = (domain->end - domain->start + 1);
+	  const int elements = (nrows * (grid_dim + 2));
+	  MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm);
+	}
+    } /* loop over nranks */  
+  
+  /***************************************************************************************************/
+  
+  if (rank == MASTER)
+    free(boundaries);
+  
+  return;
+}
+
+/* ********************************************************************* */
+void WriteSolution(MyData **const phi,
+		   const int      nx,
+		   const int      ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+    
+  static int nfile = 0;  /* File counter */
+
+  char fname[128];
+  sprintf(fname,"jacobi2D_mpi_send_recv_nonblocking_%02d.bin", nfile);
+    
+  FILE *fp;
+  printf ("> Writing %s\n",fname);
+  fp = fopen(fname, "wb");
+
+  /* discard boundaies */
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
+    }
+    
+  nfile++;
+  fclose(fp);
+}
diff --git a/jacobi/mpi/Send_Recv_nonblocking/src/tools.c b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_nonblocking/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = malloc(nx * sizeof(MyData *));
+  assert(buf != NULL);
+
+  buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(const MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/Send_Recv_paired/Makefile b/jacobi/mpi/Send_Recv_paired/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..5fab606c821047892429f65740ee8f2f4f9473ec
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make mpi                 ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugger                '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 2 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 5 5
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 2 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_Send_Recv_paired_*
diff --git a/jacobi/mpi/Send_Recv_paired/include/allvars.h b/jacobi/mpi/Send_Recv_paired/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTER 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/Send_Recv_paired/include/tools.h b/jacobi/mpi/Send_Recv_paired/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/include/tools.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(const MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
diff --git a/jacobi/mpi/Send_Recv_paired/make.def b/jacobi/mpi/Send_Recv_paired/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..fd62c5b50dda4b59673ee638fce2fc1c584aff37
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/make.def
@@ -0,0 +1,45 @@
+CC     = mpicc
+CFLAGS = -Wall -Wextra -march=native
+LIBS   = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_Send_Recv_paired_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/Send_Recv_paired/make_mpi_path b/jacobi/mpi/Send_Recv_paired/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c
new file mode 100644
index 0000000000000000000000000000000000000000..580e80509d27bebc360590223c288f959e587847
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/src/jacobi_2D_mpi_send_recv_paired.c
@@ -0,0 +1,542 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* #define DEBUG */
+
+typedef struct MyRows
+{
+  int start;
+  int end;
+} myDomain;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			const int,
+			const int);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
+		     const MyData *const restrict, const int, const int, const int, const int);
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm);
+
+void mpi_exchange_paired_1d(MyData **const buffer,
+			    const int      n,
+			    const int      nbrtop,
+			    const int      nbrbottom,
+			    const int      start,
+			    const int      end,
+			    const MPI_Comm comm1d);
+
+void WriteSolution(MyData **const phi, const int nx, const int ny);
+
+void copy_grids(MyData **const restrict A,
+                MyData **const restrict B,
+                const int               xbeg,
+                const int               xend,
+                const int               ybeg,
+		const int               yend);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 1) && !rank)
+    {
+      printf("\n\t Usage: <executable> <grid_size> \n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  const int NY_GLOB = NX_GLOB;
+
+  /******************************************************************************************************/
+  /* 1D MPI-decomposition:
+     the physical domain is sliced into slabs along the vertical direction,
+     while the grids are replicated across the MPI processes.
+     This approach is not the most efficient in terms of memory usage,
+     because the arrays are replicated across MPI process instead of to be distributed */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem = (NX_GLOB + 2) % Nranks;
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk = (NX_GLOB + 2 - rem) / Nranks;
+  if (chunk < 1)
+    {
+      printf("\n\t chunk < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the slab dimension along vertical direction: */
+  int incr, offset;
+  if (rank < rem)
+    {
+      incr = chunk + 1;
+      offset = 0;
+    }
+  else
+    {
+      incr = chunk;
+      offset = rem;
+    }
+
+  myDomain domain;
+  domain.start = ((rank * incr) + offset);
+  domain.end   = (domain.start + incr) - 1;
+  /* boundaries */
+  domain.start = ((domain.start == 0)         ? NGHOST  : domain.start);
+  domain.end   = ((domain.end == NX_GLOB + 1) ? NX_GLOB : domain.end);
+
+  /* rank of the top process */
+  const int nbrtop    = (((rank + 1) >= Nranks) ? MPI_PROC_NULL : (rank + 1));
+  /* rank of the bottom process */
+  const int nbrbottom = (((rank - 1) < 0)       ? MPI_PROC_NULL : (rank - 1));
+
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg   = NGHOST;
+  const int iend   = ibeg + NX_GLOB - 1;
+  const int nx     = iend - ibeg + 1;
+  const int nx_tot = nx + 2 * NGHOST;
+    
+  const int jbeg   = NGHOST;
+  const int jend   = jbeg + NY_GLOB - 1;
+  const int ny     = jend - jbeg + 1;
+  const int ny_tot = ny + 2 * NGHOST;
+
+  if (rank == MASTER)
+    {
+      printf("\n\t Grid indices:");
+      printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
+      printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<Nranks ; task++)
+    {
+      MPI_Barrier(comm);
+      
+      if (task == rank)
+	{
+	  printf("\n\t rank: %d", rank);
+	  printf("\n\t\t domain.start: %d - domain.end: %d", domain.start, domain.end);
+	  printf("\n\t\t nbrtop: %d - nbrbottom: %d \n", nbrtop, nbrbottom);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Barrier(comm);
+#endif /* DEBUG */
+  
+  /* --------------------------------------------------------
+     2. Generate grid, allocate memory
+        Not optimized because the grids are (unnecessarily)
+	replicated across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+  MyData *x = xg; /* Global and local grids are the same  */
+  MyData *y = yg; /* for serial version of the code       */
+
+  /* grids memory allocation */
+  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
+  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
+    
+  /* --------------------------------------------------------
+     3. Initialize solution array to 0
+     -------------------------------------------------------- */
+    
+  for (int j=jbeg ; j<=jend ; j++)
+    for (int i=ibeg ; i<=iend ; i++)
+      {
+	phi0[j][i] = 0.0;
+	phi[j][i]  = 0.0;
+      }
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* -- 4a. Set boundary conditions first -- */  
+  BoundaryConditions(phi0, x, y, nx, ny);
+  BoundaryConditions(phi, x, y, nx, ny);
+
+  MyData err = 1.0;
+  /* iterations */
+  int k = 0;
+  while (1)
+    {      
+      /* -- 4c. Jacobi's method and residual (interior points) -- */
+      /*       core algorithm                                     */
+
+      err = 0.0;
+      JacobiAlgorithm(phi, phi0, &err, delta,
+		      ibeg, iend, domain.start, domain.end);
+
+      if (!rank)
+	printf("\n\t Iteration = %d - err = %lg\n",k, err);
+
+      /* increase the counter of loop iterations */
+      k++;
+
+      /* get the total error */
+      MyData toterr;
+      /* combines values from all processes and distributes the result back to all processes */
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm);
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  /* master task gathers all the domains */
+	  get_domains(phi, &domain, NY_GLOB, rank, Nranks, comm);
+
+	  break;
+	}
+
+      /* -- 4b. MPI communications */
+      mpi_exchange_paired_1d(phi, NX_GLOB,
+			     nbrtop, nbrbottom,
+			     domain.start, domain.end, comm);
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* master rank writes the solution */
+  if (!rank)
+    {
+      WriteSolution(phi, nx, ny);
+      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  if (yg)
+    free(yg);
+  
+  if (xg)
+    free(xg);
+
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+                        const int               nx,
+			const int               ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;
+  const int iend = ibeg + nx - 1;
+    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+
+  int i,j;
+  
+  /* -- Left -- */
+  i = ibeg - 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (1.0 - y[j]);
+    
+  /* -- Right -- */
+  i = jend + 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (y[j] * y[j]);
+    
+  /* -- Bottom -- */    
+  j = jbeg - 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = (1.0 - x[i]);
+    
+  /* -- Top -- */
+  j = jend + 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = x[i];
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData      **const restrict Phi,
+		     MyData      **const restrict Phi0,
+		     MyData       *const restrict error,
+		     const MyData *const restrict delta,
+		     const int                    ibeg,
+		     const int                    iend,
+		     const int                    jbeg,
+		     const int                    jend)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+                
+	  *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
+	} /* loop over columns */
+    } /* loop over rows */
+  
+  return;
+}
+
+/* ********************************************************************* */
+void mpi_exchange_paired_1d(MyData **const buffer,
+			    const int      n,
+			    const int      nbrtop,
+			    const int      nbrbottom,
+			    const int      start,
+			    const int      end,
+			    const MPI_Comm comm1d)
+{
+  /* The function is called by each MPI rank      */
+  /* - nbrtop    is the MPI process with rank + 1 */
+  /* - nbrbottom is the MPI process with rank - 1 */
+
+  int rank;
+  MPI_Comm_rank(comm1d, &rank);
+  
+  if ((rank % 2) == 0) /* even rank */
+    {
+      /***************** First communication stage ************************************************/
+      /* Perform a blocking send to the top (rank+1) process */
+
+
+      /* Perform a blocking receive from the bottom (rank-1) process */
+
+
+      
+      /********************************************************************************************/
+
+      /**************** Second communication stage ************************************************/
+      /* - Perform a blocking send to the bottom (rank-1) process */
+
+
+      /* Perform a blocking receive from the top (rank+1) process */
+
+
+      /********************************************************************************************/
+    }
+  else /* odd rank */
+    {
+      /***************** First communication stage ************************************************/
+      /* Perform a blocking receive from the bottom (rank-1) process */
+
+
+      /* Perform a blocking send to the top (rank+1) process */
+
+      
+      /********************************************************************************************/
+
+      /**************** Second communication stage ************************************************/
+      /* Perform a blocking receive from the top (rank+1) process */
+
+
+      /* - Perform a blocking send to the bottom (rank-1) process */
+
+
+      /********************************************************************************************/      
+    }
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const int       grid_dim,
+		 const int       rank,
+		 const int       nranks,
+		 const MPI_Comm  comm)
+{
+  /* Master process gathers all the domains */
+  
+  /***************************** get the domain boundaries from each process *************************************/
+  myDomain *boundaries = NULL;
+  if (rank == MASTER)
+    {
+      boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries));
+      if (boundaries == NULL)
+	{
+	  MPI_Abort(comm, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+      boundaries[MASTER].start = domain->start;
+      boundaries[MASTER].end   = domain->end;
+    }
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      MPI_Recv((void *)&boundaries[task], sizeof(myDomain), MPI_BYTE, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+#if defined(DEBUG)
+	  printf("\n\t Diplacements[%d].start = %d - Diplacements[%d].end = %d",
+		 task, boundaries[task].start, task, boundaries[task].end);
+#endif /* DEBUG */	  
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  MPI_Send((void *)domain, sizeof(myDomain), MPI_BYTE, MASTER, 0, comm);
+	}
+    } /* loop over nranks */
+
+#if defined(DEBUG)
+  if (rank == MASTER)
+    {
+      printf("\n");
+      fflush(stdout);
+    }
+#endif /* DEBUG */
+
+  /**************************************************************************************************/
+
+  /***************************************** get the domain from each process *************************/
+
+  for (int task=0 ; task<nranks ; task++)
+    {
+      if (rank == MASTER)
+	{
+	  if (task)
+	    {
+	      MPI_Status status;
+	      /* number of grid points to receive (including ghost points) */
+	      const int nrows = (boundaries[task].end - boundaries[task].start + 1);
+	      const int elements = (nrows * (grid_dim + 2));
+	      MPI_Recv((void *)&buffer[boundaries[task].start][0], elements, MPI_MyDatatype, task, 0, comm, &status);
+	      /* if ((status.MPI_ERROR != MPI_SUCCESS) || (status.MPI_SOURCE != task)) */
+	      /* 	{ */
+	      /* 	  free(boundaries); */
+	      /* 	  MPI_Abort(comm, EXIT_FAILURE); */
+	      /* 	  exit(EXIT_FAILURE); */
+	      /* 	} */
+	    }
+	} /* MASTER */
+      else if (task == rank)
+	{
+	  const int nrows = (domain->end - domain->start + 1);
+	  const int elements = (nrows * (grid_dim + 2));
+	  MPI_Send((void *)&buffer[domain->start][0], elements, MPI_MyDatatype, MASTER, 0, comm);
+	}
+    } /* loop over nranks */  
+  
+  /***************************************************************************************************/
+  
+  if (rank == MASTER)
+    free(boundaries);
+  
+  return;
+}
+
+/* ********************************************************************* */
+void WriteSolution(MyData **const phi,
+		   const int      nx,
+		   const int      ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+    
+  static int nfile = 0;  /* File counter */
+
+  char fname[128];
+  sprintf(fname,"jacobi2D_mpi_send_recv_paired_%02d.bin", nfile);
+    
+  FILE *fp;
+  printf ("> Writing %s\n",fname);
+  fp = fopen(fname, "wb");
+
+  /* discard boundaies */
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
+    }
+    
+  nfile++;
+  fclose(fp);
+}
diff --git a/jacobi/mpi/Send_Recv_paired/src/tools.c b/jacobi/mpi/Send_Recv_paired/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce
--- /dev/null
+++ b/jacobi/mpi/Send_Recv_paired/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = malloc(nx * sizeof(MyData *));
+  assert(buf != NULL);
+
+  buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(const MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc
new file mode 100644
index 0000000000000000000000000000000000000000..88302cf76d4366a1831e639d3f554d28e2dd320f
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_1.cc
@@ -0,0 +1,88 @@
+#include <iostream>
+
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define NELEMENTS 7
+
+using namespace std;
+
+int main(int argc, char ** argv)
+{
+    int rank, size;
+    int buf[NELEMENTS];
+    
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    
+    if (rank == 0) {
+        for(int i = 0; i < NELEMENTS; i++) buf[i] = 1 + i*i;
+    }
+    
+    MPI_Bcast(buf, NELEMENTS, MPI_INT, 0, MPI_COMM_WORLD);
+    
+//    for(rank = 1; rank < size; rank++)
+//    {
+//        for(int i = 0; i < NELEMENTS; i++)
+//        {
+//            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+//        }
+//        cout<<endl;
+//    }
+    
+    if (rank == 0) {
+        cout<<"I am processor "<<rank<<endl;
+        cout<<endl;
+        for(int i = 0; i < NELEMENTS; i++)
+        {
+            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+        }
+    }
+    
+    cout<<endl;
+    
+    if (rank == 1) {
+        cout<<"I am processor "<<rank<<endl;
+        cout<<endl;
+        for(int i = 0; i < NELEMENTS; i++)
+        {
+            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+        }
+    }
+    
+    cout<<endl;
+    
+    if (rank == 2) {
+        cout<<"I am processor "<<rank<<endl;
+        cout<<endl;
+        for(int i = 0; i < NELEMENTS; i++)
+        {
+            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+        }
+    }
+    
+    cout<<endl;
+    
+    if (rank == 3) {
+        cout<<"I am processor "<<rank<<endl;
+        cout<<endl;
+        for(int i = 0; i < NELEMENTS; i++)
+        {
+            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+        }
+    }
+    
+//    if (rank == 0) {
+//        cout<<"I am processor "<<rank<<endl;
+//        cout<<endl;
+//        for(int i = 0; i < NELEMENTS; i++)
+//        {
+//            cout<<"buf["<<i<<"] = "<<buf[i]<<endl;
+//        }
+//    }
+    
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b23b1c8dcf03d484cf0f5d3fb72249a2815be937
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Broadcast_2.cc
@@ -0,0 +1,99 @@
+#include <iostream>
+
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+//#define STR_LENGTH 32
+#define STR_LENGTH 21
+
+using namespace std;
+
+int main(int argc, char ** argv)
+{
+    int rank, size;
+    /* NONONONONO*/
+//    char string[STR_LENGTH];
+    
+//    string[] = {'H','e','l','l','o',',',' ','I',' ','a','m',' ','p','r','o','c','e','s','s','o','r'};
+    
+//    for(int i = 0; i < STR_LENGTH; i++) string[i] = "Hello, I'm processor";
+    
+//    for(int i = 0; i < STR_LENGTH; i++) cout<<string[i]<<endl;;
+    
+//    char mystring[] = { 'H', 'e', 'l', 'l', 'o', '\0' };
+//    for(int i = 0; i < STR_LENGTH; i++)
+    /* NONONONONO*/
+ 
+    
+    
+    
+    char mystring[] = {'H','e','l','l','o',',',' ','I',' ','a','m',' ','p','r','o','c','e','s','s','o','r','\0'};
+    cout<<mystring<<endl;
+    
+    char string[STR_LENGTH];
+    
+//    for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i];
+//    
+//    for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//    for(int i = 0; i < STR_LENGTH; i++) cout<<mystring[i];
+//    cout<<endl;
+    
+
+    
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+//    if (rank == 0) {
+//        for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i];
+//        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//        cout<<endl;
+//    }
+    
+    if (rank == 0) {
+        for(int i = 0; i < STR_LENGTH; i++) string[i] = mystring[i];
+        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+        cout<<endl;
+    }
+    
+    MPI_Bcast(string, STR_LENGTH, MPI_CHAR, 0, MPI_COMM_WORLD);
+    
+//    if (rank == 0) {
+//        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//        cout<<" "<<rank<<endl;
+//    }
+//    
+//    cout<<endl;
+//    
+//    if (rank == 1) {
+//        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//        cout<<" "<<rank<<endl;
+//    }
+//    
+//    cout<<endl;
+//    
+//    if (rank == 2) {
+//        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//        cout<<" "<<rank<<endl;
+//    }
+//    
+//    cout<<endl;
+//    
+//    if (rank == 3) {
+//        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+//        cout<<" "<<rank<<endl;
+//    }
+//    
+//    cout<<endl;
+    
+//    for(rank = 0; rank < size; rank++)
+    for(int n = 0; n < size; n++)
+    {
+        for(int i = 0; i < STR_LENGTH; i++) cout<<string[i];
+        cout<<" "<<rank<<endl;
+    }
+
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c
new file mode 100644
index 0000000000000000000000000000000000000000..917dc94c3a19ab8a4d68c07b423174b9b83848ed
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Gather_Scatter.c
@@ -0,0 +1,36 @@
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char ** argv)
+{
+    int n, rank, size;
+    double data;
+    double *send_buf, *recv_buf;
+    
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    
+    recv_buf = (double *) malloc(size*sizeof(double)); // allocate memory
+    send_buf = (double *) malloc(size*sizeof(double));
+    
+    data = rank*rank + 1.0; // generate data on different procs
+    MPI_Gather(&data,    1, MPI_DOUBLE, recv_buf, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    
+    if (rank == 0){
+        printf ("[Gather()]:\n");
+        for (n = 0; n < size; n++) printf ("rnd[%d] = %f\n",n,recv_buf[n]);
+    }
+    
+    if (rank == 0){
+        for (n = 0; n < size; n++) send_buf[n] = n*n - 1.0; // Generate “size” random numbers
+    }
+    
+    MPI_Scatter(send_buf, 1, MPI_DOUBLE, &data,    1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    
+    printf ("[Scatter, proc #%d] = %f\n",rank,data);
+    
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c
new file mode 100644
index 0000000000000000000000000000000000000000..3e80507fd25e7c20f3a6e9c79b6cd06b2d26f07d
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/Heat_Equation_Gather.c
@@ -0,0 +1,251 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/*!
+ \file
+ \brief Solve 1D heat equation.
+ 
+ Solve the 1D heat equation using a 1st order explicit method
+ on a parallel domain.
+ 
+ \author A. Mignone (mignone@to.infn.it)
+ \date   March 12, 2020
+ */
+/* ///////////////////////////////////////////////////////////////////// */
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PARALLEL
+
+#ifdef PARALLEL
+#include <mpi.h>
+#endif
+
+#define NX_GLOB   64  /* Global number of interior points */
+#define NGHOST     1
+
+void Write (double *, double *, int, int);
+
+int main(int argc, char ** argv)
+{
+    int    i, k, beg, end;
+    int    nx_loc;               /* Local grid size */
+    int    dstL = -1, dstR=-1;   /* Rank of left and right neighbour procs */
+    int    rank=0, size=1;
+    double t, tstop, dt, cfl = 0.5;
+    double *u0;
+    double *u1;
+    double xbeg =  0.0;
+    double xend = +1.0;
+    double xglob[NX_GLOB + 2*NGHOST];   // Global grid array
+    double *xloc;
+    double dx;    /* Mesh spacing */
+#ifdef PARALLEL
+    double *send_buf;
+    double *recv_buf;
+#endif
+    FILE *fp;
+    
+    /* --------------------------------------------------------
+     0. Initialize parallel environment & get neighbour
+     proc rank
+     -------------------------------------------------------- */
+    
+#ifdef PARALLEL
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    
+    dstL   = rank - 1;
+    dstR   = rank + 1;
+    if (dstL < 0)     dstL = MPI_PROC_NULL;
+    if (dstR >= size) dstR = MPI_PROC_NULL;
+#endif
+    
+    /* --------------------------------------------------------
+     1. Generate global & local grids
+     -------------------------------------------------------- */
+    
+#ifdef PARALLEL
+    nx_loc = NX_GLOB/size;
+    beg    = NGHOST;
+    end    = beg + nx_loc - 1;
+    
+    dx = (xend - xbeg)/(NX_GLOB+1);
+    for (i = 0; i < NX_GLOB + 2*NGHOST; i++){
+        xglob[i] = xbeg + (i-beg+1)*dx;
+    }
+    xloc = xglob + nx_loc*rank;  /* Use pointer arithmetic */
+#else
+    nx_loc = NX_GLOB;
+    beg    = NGHOST;
+    end    = beg + nx_loc - 1;
+    
+    dx = (xend - xbeg)/(NX_GLOB+1);
+    for (i = 0; i < NX_GLOB + 2*NGHOST; i++){
+        xglob[i] = xbeg + (i-beg+1)*dx;
+    }
+    xloc = xglob;  /* Use pointer arithmetic */
+#endif
+    
+    /* --------------------------------------------------------
+     2. Allocate memory on local grids
+     -------------------------------------------------------- */
+    
+    u0 = (double *) malloc((nx_loc + 2*NGHOST)*sizeof(double));
+    u1 = (double *) malloc((nx_loc + 2*NGHOST)*sizeof(double));
+    
+#ifdef PARALLEL
+    {
+        int proc, go;
+        for (proc = 0; proc < size; proc++){
+            go = proc;
+            MPI_Bcast(&go, 1, MPI_INT, 0, MPI_COMM_WORLD);
+            if (rank == go) {
+                printf ("[Rank %d]\n",rank);
+                printf ("  dstL = %d, dstR = %d\n",dstL, dstR);
+                printf ("  beg, end = %d, %d; x = [%f, %f]\n",
+                        beg, end, xloc[beg],xloc[end]);
+            }
+            MPI_Barrier(MPI_COMM_WORLD);
+        }
+    }
+#endif
+    
+    /* --------------------------------------------------------
+     3. Set initial condition
+     -------------------------------------------------------- */
+    
+    for (i = beg; i <= end; i++){
+        u0[i] = sin(M_PI*xloc[i]);
+    }
+    
+    /* --------------------------------------------------------
+     4. Advance solution
+     -------------------------------------------------------- */
+    
+    t     = 0.0;
+    tstop = 0.1;
+    dt    = cfl*dx*dx;
+    k     = 0;
+    
+    Write (xloc, u0, beg, end);
+    while (t < tstop){
+        
+        if (rank == 0){
+            printf ("step #%d; t = %8.3e\n",k,t);
+        }
+        
+        /* -- 4a. Set physical boundary conditions -- */
+        
+        if (dstL == MPI_PROC_NULL){
+            u0[beg-1] = 0.0;
+        }
+        if (dstR == MPI_PROC_NULL){
+            u0[end+1] = 0.0;
+        }
+        
+        /* -- 4b. Set inter-process boundary conditions -- */
+        
+#ifdef PARALLEL
+        send_buf = u0 + end - (NGHOST - 1);  // Address of rightmost interior point
+        recv_buf = u0 + 0;                   // Address of leftmost ghost zone
+        MPI_Sendrecv (send_buf, NGHOST, MPI_DOUBLE, dstR, 0,
+                      recv_buf, NGHOST, MPI_DOUBLE, dstL, 0,
+                      MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        
+        send_buf = u0 + beg;     // Address of leftmost interior point
+        recv_buf = u0 + end + 1; // Address of first ghost zone on the right
+        MPI_Sendrecv (send_buf, NGHOST, MPI_DOUBLE, dstL, 0,
+                      recv_buf, NGHOST, MPI_DOUBLE, dstR, 0,
+                      MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+#endif
+        
+        /* -- 4c. Advance solution by one time step -- */
+        
+        for (i = beg; i <= end; i++){
+            u1[i] = u0[i] + dt/(dx*dx)*(u0[i-1] - 2.0*u0[i] + u0[i+1]);
+        }
+        t += dt;
+        k++;
+        
+        /* -- 4d. Copy arrays for next time level -- */
+        
+        for (i = beg; i <= end; i++) u0[i] = u1[i];
+    }
+    Write (xloc, u0, beg, end);
+    
+#ifdef PARALLEL
+    MPI_Finalize();
+#endif
+    return 0;
+}
+
+/* ********************************************************************* */
+void Write (double *x, double *u, int beg, int end)
+/*
+ *********************************************************************** */
+{
+    int    i;
+    int    rank;
+    static int n = 0;  /* File number */
+    FILE *fp;
+    char fname[32];
+    
+    /* --------------------------------------------------------
+     1. Serial output
+     -------------------------------------------------------- */
+    
+#ifndef PARALLEL
+    sprintf (fname,"heat_eq%02d.dat",n);
+    fp = fopen (fname,"w");
+    for (i = beg; i <= end; i++) fprintf (fp, "%12.6e  %12.6e\n", x[i], u[i]);
+    fclose(fp);
+#endif
+    
+    /* --------------------------------------------------------
+     2. Parallel output
+     -------------------------------------------------------- */
+    
+#ifdef PARALLEL
+    
+    /* -- 2a. Process #0 gathers data and does the writing -- */
+    
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    int nx_loc = end - beg + 1;
+    static double *recv_buf;
+    if (recv_buf == NULL) {
+        recv_buf = (double *) malloc((NX_GLOB + 2*NGHOST)*sizeof(double));
+    }
+    
+    MPI_Gather (u + beg, nx_loc, MPI_DOUBLE,
+                recv_buf + beg, nx_loc, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    if (rank == 0){
+        sprintf (fname,"heat_eq%02d.dat",n);
+        fp = fopen (fname,"w");
+        for (i = beg; i < beg+NX_GLOB; i++) {
+            fprintf (fp, "%f  %f\n", x[i], recv_buf[i]);
+        }   
+        fclose(fp);
+    }
+    
+    /* -- 2b. Shared file pointer -- */
+    
+    /* -- 2c. Individual file pointer -- */
+    
+#endif
+    
+    n++;
+}
+
+
+
+/*
+ MAPLE Script:
+ 
+ restart;
+ u := A*exp(-D*mu^2*t)*sin(mu*x + B) + C;
+ eq := diff(u,t) - D*diff(diff(u,x),x);
+ simplify(eq);
+ 
+ 
+ */
diff --git a/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..b31c13174dc4baf16637694a733b0125dc2fc950
Binary files /dev/null and b/jacobi/mpi/miscellaneous/Broadcast_Gather_Scatter/lecture_Broadcast_Gather_Scatter.pdf differ
diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c b/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c
new file mode 100644
index 0000000000000000000000000000000000000000..157ed4519c5784c2730ae188d0cc4b1c290615b4
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/MPI_Datatypes/Subarray.c
@@ -0,0 +1,96 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/*!
+ \file
+ \brief Subarray example.
+ 
+ Proc #0 creates a large array Abig with NROWS rows and NCOLS columns.
+ Proc #1 receives a subarray with nrows_sub rows and ncols_sub
+ columns starting at starts[].
+ 
+ \author A. Mignone (mignone@to.infn.it)
+ \date   March 14, 2020
+ */
+/* ///////////////////////////////////////////////////////////////////// */
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include "tools.c"
+
+#define NROWS   5
+#define NCOLS   6
+
+int main(int argc, char **argv)
+{
+    int i,j;
+    int rank, size;
+    int nrows_sub = 3;
+    int ncols_sub = 2;
+    int starts[2]    = {1,3};
+    int subsizes[2]  = {nrows_sub,ncols_sub};
+    int bigsizes[2]  = {NROWS, NCOLS};
+    int **Abig;
+    int **Asub;
+    MPI_Datatype MPI_Subarr;
+    
+    /* --------------------------------------------------------
+     0. Initialize the MPI execution environment,
+     create subarray type
+     -------------------------------------------------------- */
+    
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    
+    MPI_Type_create_subarray(2, bigsizes, subsizes, starts,
+                             MPI_ORDER_C, MPI_INT, &MPI_Subarr);
+    MPI_Type_commit(&MPI_Subarr);
+    
+    if (size < 2) {
+        if (rank == 0){
+            fprintf(stderr,"! Need at least 2  processors.\n");
+        }
+        MPI_Finalize();
+        return 1;
+    }
+    
+    /* --------------------------------------------------------
+     1. Proc #0 creates the big array,
+     Proc #1 receives the subarray.
+     -------------------------------------------------------- */
+    
+    if (rank == 0) {
+        Abig = Allocate_2DintArray(NROWS, NCOLS);
+        
+        /* -- 1a. Fill array -- */
+        
+        for (i = 0; i < NROWS; i++){
+            for (j = 0; j < NCOLS; j++){
+                Abig[i][j] = j + i*NCOLS;
+            }}
+        
+        /* -- 1b. Show array -- */
+        
+        Show_2DintArray(Abig, NROWS, NCOLS, "Big array (proc #0):");
+        
+        MPI_Send(&(Abig[0][0]), 1, MPI_Subarr, 1, 123, MPI_COMM_WORLD);
+        
+        free(Abig[0]);
+        free(Abig);
+        
+    } else if (rank == 1) {
+        
+        Asub = Allocate_2DintArray(nrows_sub, ncols_sub);
+        
+        MPI_Recv(&(Asub[0][0]), nrows_sub*ncols_sub, MPI_INT, 0,
+                 123, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        
+        Show_2DintArray(Asub, nrows_sub, ncols_sub, "Received subarray (proc #1):");
+        
+        free(Asub[0]);
+        free(Asub);
+    }
+    
+    MPI_Type_free(&MPI_Subarr);
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf b/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..088b82d3adff8d2871ec9d9294f6640900f6cbef
Binary files /dev/null and b/jacobi/mpi/miscellaneous/MPI_Datatypes/lecture_MPI_Datatypes.pdf differ
diff --git a/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c b/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..58d03366e6ba5027d07cd155f396375e8d991d0d
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/MPI_Datatypes/tools.c
@@ -0,0 +1,104 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/*!
+ \file
+ \brief Collection of useful functions for the MPI course.
+ 
+ This file provides some simple function for memory allocation
+ (dbl and int 2D array), printing.
+ Function prototyping is already included here and should not be
+ repeated elsewhere.
+ Simply include this file after the main header section when needed:
+ \code
+ #include <stdio.h>
+ ...
+ #include "tools.c"
+ ...
+ int main()
+ {
+ .
+ .
+ .
+ }
+ \endcodes
+ 
+ \author A. Mignone (mignone@to.infn.it)
+ \date   March 14, 2020
+ */
+/* ///////////////////////////////////////////////////////////////////// */
+double **Allocate_2DdblArray(int, int);
+int    **Allocate_2DintArray(int, int);
+void   Show_2DdblArray(double **, int, int, const char *);
+void   Show_2DintArray(int **, int, int, const char *);
+
+/* ********************************************************************* */
+double **Allocate_2DdblArray(int nx, int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+    int i,j;
+    double **buf;
+    
+    buf    = (double **)malloc (nx*sizeof(double *));
+    buf[0] = (double *) malloc (nx*ny*sizeof(double));
+    for (j = 1; j < nx; j++) buf[j] = buf[j-1] + ny;
+    
+    return buf;
+}
+/* ********************************************************************* */
+int **Allocate_2DintArray(int nx, int ny)
+/*
+ * Allocate memory for an integer-type array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+    int i,j;
+    int **buf;
+    
+    buf    = (int **)malloc (nx*sizeof(int *));
+    buf[0] = (int *) malloc (nx*ny*sizeof(int));
+    for (j = 1; j < nx; j++) buf[j] = buf[j-1] + ny;
+    
+    return buf;
+}
+
+
+/* ********************************************************************* */
+void Show_2DdblArray(double **A, int nx, int ny, const char *string)
+/*
+ *********************************************************************** */
+{
+    int i, j;
+    
+    printf ("%s\n",string);
+    printf ("------------------------------\n");
+    for (i = 0; i < nx; i++) {
+        for (j = 0; j < ny; j++) {
+            printf ("%8.2f  ", A[i][j]);
+        }
+        printf ("\n");
+    }
+    printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+void Show_2DintArray(int **A, int nx, int ny, const char *string)
+/*
+ *********************************************************************** */
+{
+    int i, j;
+    
+    printf ("%s\n",string);
+    for (j = 0; j < ny; j++) printf ("-----");
+    printf ("\n");
+    
+    for (i = 0; i < nx; i++) {
+        for (j = 0; j < ny; j++) {
+            printf ("%03d  ", A[i][j]);
+        }
+        printf ("\n");
+    }
+    
+    for (j = 0; j < ny; j++) printf ("-----");
+    printf ("\n");
+}
diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf b/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..befbe43337c70f92e19b5af9857fda500df097fc
Binary files /dev/null and b/jacobi/mpi/miscellaneous/Parallel_IO/lecture_Parallel_IO.pdf differ
diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf1cf14136b40806c3c3df95bac48f75ba21ffbc
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr1D.c
@@ -0,0 +1,101 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/*!
+ \file
+ \brief Writing of a 1D buffer in parallel.
+ 
+ Write a 1D buffer in parallel using 4 different versions.
+ For a contiguous data type, use:
+ 
+ VERSION == 1 employs shared file pointer
+ VERSION == 2 employs individual file pointer with offset computed
+ by the MPI_File_seek()
+ VERSION == 3 defines a file view with offset depending on the process
+ rank
+ VERSION == 4 similar to VERSION == 3, but using a contiguous MPI
+ datatype
+ 
+ A non-contiguous version is handled with VERSION == 5, which defines
+ a MPI vector datatype and a file view.
+ 
+ \author A. Mignone (mignone@to.infn.it)
+ \date   March 1, 2020
+ */
+/* ///////////////////////////////////////////////////////////////////// */
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <mpi.h>
+
+#define NELEM    3
+
+#define VERSION  5
+int main(int argc, char **argv)
+{
+    int i, rank, size;
+    double buf[NELEM];
+    char fname[] = "arr1D.bin";
+    MPI_File   fh;
+    MPI_Offset disp;
+    
+    /* --------------------------------------------------------
+     0. Initialize the MPI execution environment
+     -------------------------------------------------------- */
+    
+    MPI_Init (&argc, &argv);
+    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
+    MPI_Comm_size (MPI_COMM_WORLD, &size);
+    
+    /* --------------------------------------------------------
+     1. Initialize array
+     -------------------------------------------------------- */
+    
+    for (i = 0; i < NELEM; i++) buf[i] = rank;
+    
+    /* --------------------------------------------------------
+     2. Delete, re-open file and write
+     -------------------------------------------------------- */
+    
+    MPI_File_delete(fname, MPI_INFO_NULL);
+    MPI_File_open(  MPI_COMM_WORLD, fname,
+                  MPI_MODE_CREATE | MPI_MODE_RDWR,
+                  MPI_INFO_NULL, &fh  );
+    
+#if VERSION == 1     // Contiguous data, shared file pointer
+    MPI_File_write_ordered(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE);
+#elif VERSION == 2   // Contiguous data, individual file pointer
+    disp = rank*NELEM*sizeof(double);   // In bytes
+    MPI_File_seek(fh, disp, MPI_SEEK_SET);
+    MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE);
+#elif VERSION == 3   // Contiguous data, file view
+    disp = rank*NELEM*sizeof(double);
+    MPI_File_set_view(fh, disp, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
+    MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE);
+#elif VERSION == 4   // Contiguous data, file view with MPI datatype
+    MPI_Datatype cntg_type;
+    
+    MPI_Type_contiguous(NELEM, MPI_DOUBLE, &cntg_type);
+    MPI_Type_commit(&cntg_type);
+    
+    disp = rank*NELEM*sizeof(double);
+    
+    MPI_File_set_view(fh, disp, MPI_BYTE, cntg_type, "native", MPI_INFO_NULL);
+    MPI_File_write(fh, buf, 1, cntg_type, MPI_STATUS_IGNORE);
+    MPI_Type_free(&cntg_type);
+#elif VERSION == 5   // Non-contiguous data, file view, vector type
+    MPI_Datatype vec_type;
+    
+    for (i = 0; i < NELEM; i++) buf[i] = rank + 0.1*i;
+    
+    MPI_Type_vector(NELEM, 1, size, MPI_DOUBLE, &vec_type);
+    MPI_Type_commit(&vec_type);
+    
+    disp = rank*sizeof(double);
+    MPI_File_set_view(fh, disp, MPI_DOUBLE, vec_type, "native", MPI_INFO_NULL);
+    MPI_File_write(fh, buf, NELEM, MPI_DOUBLE, MPI_STATUS_IGNORE);
+    MPI_Type_free(&vec_type);
+#endif
+    
+    MPI_File_close(&fh);
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c
new file mode 100644
index 0000000000000000000000000000000000000000..1aaf24504595d0ebf2e3d5e3212f03179c5955d1
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/Parallel_IO/write_arr2D.c
@@ -0,0 +1,105 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <mpi.h>
+#include "tools.c"
+
+#define NDIM      2
+#define NX_GLOB   16
+#define NY_GLOB   8
+
+#define VERSION   2
+
+int main(int argc, char ** argv)
+{
+    int i, j, rank, size;
+    int nx, ny;
+    int nprocs[NDIM];
+    int gsizes[NDIM];
+    int lsizes[NDIM];
+    int periods[NDIM] = {0,0};
+    int coords[NDIM];
+    int start[NDIM];
+    double **A;
+    MPI_Datatype subarr_type;
+    MPI_Comm MPI_COMM_CART;
+    char fname[] = "arr2D.bin";
+    
+    /* --------------------------------------------------------
+     0. Initialize the MPI execution environment
+     -------------------------------------------------------- */
+    
+    MPI_Init (&argc, &argv);
+    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
+    MPI_Comm_size (MPI_COMM_WORLD, &size);
+    
+    /* --------------------------------------------------------
+     1. Create a 2D domain decomposition
+     -------------------------------------------------------- */
+    
+    /* -- 1a. Attempt to find a maximally cubic decomposition -- */
+    
+    nprocs[0] = (int)sqrt(size);
+    nprocs[1] = size/nprocs[0];
+    if (nprocs[0]*nprocs[1] != size){
+        if (rank == 0) printf ("! Cannot decompose\n");
+        MPI_Finalize();
+        return 1;
+    }
+    
+    /* -- 1b. Create communicator -- */
+    
+    MPI_Cart_create(MPI_COMM_WORLD, NDIM, nprocs, periods, 0, &MPI_COMM_CART);
+    MPI_Cart_get(MPI_COMM_CART, NDIM, nprocs, periods, coords);
+    
+    gsizes[0] = NX_GLOB;
+    gsizes[1] = NY_GLOB;
+    
+    lsizes[0] = nx = NX_GLOB/nprocs[0];
+    lsizes[1] = ny = NY_GLOB/nprocs[1];
+    
+    if (rank == 0){
+        printf ("Domain decomposed in %d X %d procs\n",nprocs[0],nprocs[1]);
+        printf ("Local grid size = %d X %d\n",lsizes[0], lsizes[1]);
+    }
+    
+    /* --------------------------------------------------------
+     2. Allocate memory and fill 2D array on local domain
+     -------------------------------------------------------- */
+    
+    A = Allocate_2DdblArray(ny,nx);
+    for (j = 0; j < ny; j++) {
+        for (i = 0; i < nx; i++) {
+            A[j][i] = rank;
+        }}
+    
+    /* --------------------------------------------------------
+     3. Create new datatypes
+     -------------------------------------------------------- */
+    
+    start[0] = coords[0]*lsizes[0];
+    start[1] = coords[1]*lsizes[1];
+    
+    MPI_Type_create_subarray (NDIM, gsizes, lsizes, start, MPI_ORDER_FORTRAN,
+                              MPI_DOUBLE, &subarr_type);
+    MPI_Type_commit (&subarr_type);
+    
+    /* --------------------------------------------------------
+     4. Open file for writing
+     -------------------------------------------------------- */
+    
+    MPI_File_delete(fname, MPI_INFO_NULL);
+    
+    MPI_File fh;
+    MPI_Status status;
+    
+    MPI_File_open(MPI_COMM_CART, fname,
+                  MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
+    MPI_File_set_view(fh, 0, MPI_DOUBLE, subarr_type, "native", MPI_INFO_NULL);
+    MPI_File_write_all(fh, A[0], nx*ny, MPI_DOUBLE, MPI_STATUS_IGNORE);
+    MPI_File_close(&fh);
+    
+    MPI_Type_free(&subarr_type);
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/cartesian b/jacobi/mpi/miscellaneous/cartesian
new file mode 100644
index 0000000000000000000000000000000000000000..cef6cb6704f8709644945aeb816d610ad2814635
Binary files /dev/null and b/jacobi/mpi/miscellaneous/cartesian differ
diff --git a/jacobi/mpi/miscellaneous/cartesian.c b/jacobi/mpi/miscellaneous/cartesian.c
new file mode 100644
index 0000000000000000000000000000000000000000..8778f5828c609db41ef8aef95f8bcd484fdceb25
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/cartesian.c
@@ -0,0 +1,71 @@
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SIZE 2
+#define X    0
+#define Y    1
+
+int main(int argc, char **argv)
+{
+  int task, ntasks;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
+  MPI_Comm_rank(MPI_COMM_WORLD, &task);
+
+  if (argc <= 1)
+    {
+      if (!task)
+	{
+	  printf("\n\t Usage: <executable> <number processes along X> \n");
+	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+    }
+  
+  const int cartesian_grid_x = (int)strtol(argv[1], NULL, 10);
+  const int cartesian_grid_y = ((ntasks % cartesian_grid_x == 0) ? (ntasks / cartesian_grid_x) : -1);
+  if (cartesian_grid_y == -1)
+    {
+      if (!task)
+	{
+	  printf("\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n");
+	  fflush(stdout);
+	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+    }
+  
+  const int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y};
+  const int periods[SIZE] = {0, 0};
+  const int reorder = 0;
+  MPI_Comm comm2d;
+  MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d);
+
+  int coords[SIZE];
+  MPI_Cart_coords(comm2d, task, SIZE, coords);
+
+  int nbrright, nbrleft;
+  MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright);
+
+  int nbrtop, nbrbottom;
+  MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop);
+
+  for (int rank=0 ; rank<ntasks; rank++)
+    {
+      MPI_Barrier(MPI_COMM_WORLD);
+      if (rank == task)
+	{
+	  printf("\n\t Task: %d\n", task);
+	  printf("\t\t coords[%d, %d]", coords[X], coords[Y]);
+	  printf("\n\t\t nbrright: %d - nbrleft  : %d", nbrright, nbrleft);
+	  printf("\n\t\t nbrtop  : %d - nbrbottom: %d \n\n", nbrtop, nbrbottom);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Comm_free(&comm2d);
+  MPI_Finalize();
+  
+  return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/cartesian.cpp b/jacobi/mpi/miscellaneous/cartesian.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7b5a7e505a9d63ade4f4e8e35f48cb895e8e1660
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/cartesian.cpp
@@ -0,0 +1,71 @@
+#include <mpi.h>
+#include <iostream>
+
+#define SIZE 2
+#define X    0
+#define Y    1
+
+int main(int argc, char **argv)
+{
+  using namespace std;
+  
+  int task, ntasks;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
+  MPI_Comm_rank(MPI_COMM_WORLD, &task);
+
+  if (argc <= 1)
+    {
+      if (!task)
+	{
+	  cout << "\n\t Usage: <executable> <number processes along X> \n" << endl;
+	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+    }
+  
+  const int cartesian_grid_x = (int)strtol(argv[1], NULL, 10);
+  const int cartesian_grid_y = ((ntasks % cartesian_grid_x == 0) ? (ntasks / cartesian_grid_x) : -1);
+  if (cartesian_grid_y == -1)
+    {
+      if (!task)
+	{
+	  cout << "\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n" << endl;
+	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+	  exit(EXIT_FAILURE);
+	}
+    }
+  
+  static int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y};
+  static int periods[SIZE] = {0, 0};
+  static int reorder = 0;
+  MPI_Comm comm2d;
+  MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d);
+
+  int coords[SIZE];
+  MPI_Cart_coords(comm2d, task, SIZE, coords);
+
+  int nbrright, nbrleft;
+  MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright);
+
+  int nbrtop, nbrbottom;
+  MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop);
+
+  for (int rank=0 ; rank<ntasks; rank++)
+    {
+      MPI_Barrier(MPI_COMM_WORLD);
+      if (rank == task)
+	{
+	  cout << "\n\t Task: " << task << endl;
+	  cout << "\t\t coords[" << coords[X] << "," << coords[Y] << "]" << endl;
+	  cout << "\t\t nbrright: " << nbrright << " - nbrleft  : " << nbrleft << endl;
+	  cout << "\t\t nbrtop  : " << nbrtop   << " - nbrbottom: " << nbrbottom << endl;
+	  cout << endl;
+	}
+    }
+
+  MPI_Comm_free(&comm2d);
+  MPI_Finalize();
+  
+  return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/color_ring.c b/jacobi/mpi/miscellaneous/color_ring.c
new file mode 100644
index 0000000000000000000000000000000000000000..70ff1b544ee674dd68d785f565de4958344e82fb
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/color_ring.c
@@ -0,0 +1,88 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/*!
+ \file
+ \brief Data communication in a ring.
+ 
+ For each processor, define a unique string defining the process
+ color and perform n cyclic permutations by transferring the color
+ name to the process to the right.
+ Three versions are provided:
+ 
+ i)   using MPI_Send / Recv()   (select VERSION == 0);
+ ii)  using MPI_Sendrecv()      (select VERSION == 1);
+ iii) using MPI_Isend / Irecv() (select VERSION == 2).
+ 
+ \author A. Mignone (mignone@to.infn.it)
+ \date   March 10, 2020
+ */
+/* ///////////////////////////////////////////////////////////////////// */
+
+
+#include <mpi.h>
+#include <stdio.h>
+
+#define STR_LENGTH 32
+#define VERSION 1
+
+int main(int argc, char ** argv)
+{
+    int rank, size;
+    int dstL, dstR;
+    char recv_buf[STR_LENGTH];
+    char send_buf[STR_LENGTH];
+    MPI_Request req;
+    
+    /* -- 1. Initialize the MPI execution environment -- */
+    
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    
+    /* -- 2. Define color -- */
+    
+    if (rank == 0) sprintf (send_buf, "red");
+    if (rank == 1) sprintf (send_buf, "blue");
+    if (rank == 2) sprintf (send_buf, "green");
+    if (rank == 3) sprintf (send_buf, "black");
+    if (rank == 4) sprintf (send_buf, "orange");
+    if (rank == 5) sprintf (send_buf, "yellow");
+    
+    if (size > 6){
+        if (rank == 0) printf ("! Cannot execute with more than six processes\n");
+        MPI_Finalize();
+        return 0;
+    }
+    
+    /* -- 3. Determine neighbour processors -- */
+    
+    dstL  = rank - 1;
+    dstR  = rank + 1;
+    if (dstL < 0)     dstL = size-1;
+    if (dstR >= size) dstR = 0;
+    
+    int n;
+    for (n = 0; n < size; n++){
+#if VERSION == 1
+        MPI_Send(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0, MPI_COMM_WORLD);
+        MPI_Recv(recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+#elif VERSION == 2
+        MPI_Sendrecv(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0,
+                     recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD,
+                     MPI_STATUS_IGNORE);
+#elif VERSION == 3
+        MPI_Isend(send_buf, STR_LENGTH, MPI_CHAR, dstR, 0, MPI_COMM_WORLD, &req);
+        MPI_Irecv(recv_buf, STR_LENGTH, MPI_CHAR, dstL, 0, MPI_COMM_WORLD, &req);
+        MPI_Wait (&req, MPI_STATUS_IGNORE);
+#endif
+        
+        /* -- Replace color -- */
+        
+        sprintf (send_buf,"%s",recv_buf);
+        if (rank == 0){
+            printf ("Proc #%d, I've changed my color is %s\n", rank, send_buf);
+        }
+    }
+    
+    MPI_Finalize();
+    return 0;
+}
diff --git a/jacobi/mpi/miscellaneous/lecture_color_ring.pdf b/jacobi/mpi/miscellaneous/lecture_color_ring.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..dc1f501c160a2157484c67d32cf6a185a826eb40
Binary files /dev/null and b/jacobi/mpi/miscellaneous/lecture_color_ring.pdf differ
diff --git a/jacobi/mpi/miscellaneous/subarray b/jacobi/mpi/miscellaneous/subarray
new file mode 100644
index 0000000000000000000000000000000000000000..5295b41860fe0dfe7b99d8b60a0c49a893e90303
Binary files /dev/null and b/jacobi/mpi/miscellaneous/subarray differ
diff --git a/jacobi/mpi/miscellaneous/subarray.c b/jacobi/mpi/miscellaneous/subarray.c
new file mode 100644
index 0000000000000000000000000000000000000000..254b3d43bb7c4cd9af9a675e29450f12f8eb185b
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/subarray.c
@@ -0,0 +1,114 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include <assert.h>
+
+void printarr(int **data, int n, char *str);
+int **allocarray(const int n);
+void deallocarray(int **);
+
+int main(int argc, char **argv)
+{
+  /* array sizes */
+  const int bigsize = 10;
+  const int subsize = 5;
+
+  /* communications parameters */
+  const int sender   = 0;
+  const int receiver = 1;
+  const int ourtag   = 2;
+
+  int rank, size;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+  if (size < (receiver + 1))
+    {
+      if (rank == 0)
+	fprintf(stderr,"\nt %s: Needs at least %d  processors.\n", argv[0], receiver+1);
+
+      MPI_Finalize();
+      return 1;
+    }
+
+  if (rank == sender)
+    {
+      int **bigarray = allocarray(bigsize);
+      for (int i=0 ; i<bigsize ; i++)
+	for (int j=0 ; j<bigsize ; j++)
+	  bigarray[i][j] = (i * bigsize + j);
+
+      printarr(bigarray, bigsize, "\n Sender: Big array \n");
+
+      MPI_Datatype mysubarray;
+      int starts[2]   = {5,3};
+      int subsizes[2] = {subsize,subsize};
+      int bigsizes[2] = {bigsize, bigsize};
+      MPI_Type_create_subarray(2, bigsizes, subsizes, starts,
+			       MPI_ORDER_C, MPI_INT, &mysubarray);
+      MPI_Type_commit(&mysubarray);
+
+      MPI_Send(&(bigarray[0][0]), 1, mysubarray, receiver, ourtag, MPI_COMM_WORLD);
+      MPI_Type_free(&mysubarray);
+
+      deallocarray(bigarray);
+    }
+  else if (rank == receiver)
+    {
+      int **subarray = allocarray(subsize);
+
+      for (int i=0; i<subsize; i++)
+	for (int j=0; j<subsize; j++)
+	  subarray[i][j] = 0;
+
+      MPI_Recv(&(subarray[0][0]), subsize * subsize, MPI_INT, sender, ourtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+      printarr(subarray, subsize, " Receiver: Subarray -- after receive");
+
+      deallocarray(subarray);
+    }
+
+  MPI_Finalize();
+  return 0;
+}
+
+void printarr(int **data,
+	      int   n,
+	      char *str)
+{
+  printf("-- %s --\n", str);
+  for (int i=0; i<n; i++)
+    {
+      for (int j=0; j<n; j++)
+	{
+	  printf("%3d ", data[i][j]);
+	}
+      printf("\n");
+    }
+}
+
+int **allocarray(const int n)
+{
+  int *data = malloc(n * n * sizeof(int));
+  assert(data != NULL);
+
+  int **arr = malloc(n * sizeof(int *));
+  assert(arr != NULL);
+  
+  for (int i=0 ; i<n ; i++)
+    arr[i] = &(data[i * n]);
+
+  return arr;
+}
+
+void deallocarray(int **array)
+{
+  if (array[0])
+    free(array[0]);
+
+  if (array)
+    free(array);
+
+  return;
+}