diff --git a/jacobi/mpi/cartesian/Makefile b/jacobi/mpi/cartesian/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..8b57b97d133c4acefdd2c0adc1a8966f4b6dc770
--- /dev/null
+++ b/jacobi/mpi/cartesian/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi debug valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make                     ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugger                '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_cartesian_*
diff --git a/jacobi/mpi/cartesian/include/allvars.h b/jacobi/mpi/cartesian/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..798e4e6a2f2a740b3ab5a1394ac85eef8da1255c
--- /dev/null
+++ b/jacobi/mpi/cartesian/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTER 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/cartesian/include/tools.h b/jacobi/mpi/cartesian/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..82859ec511710474547355ede3fc38cf33a92154
--- /dev/null
+++ b/jacobi/mpi/cartesian/include/tools.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(const MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
diff --git a/jacobi/mpi/cartesian/make.def b/jacobi/mpi/cartesian/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..2a18b507c5fb84155da02b47961bfdc51d4253e0
--- /dev/null
+++ b/jacobi/mpi/cartesian/make.def
@@ -0,0 +1,45 @@
+CC     = mpicc
+CFLAGS = -Wall -Wextra -march=native
+LIBS   = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_cartesian_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/cartesian/make_mpi_path b/jacobi/mpi/cartesian/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/cartesian/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c b/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c
new file mode 100644
index 0000000000000000000000000000000000000000..826c7188482bbfe40983df8f4098cfdfa9297d35
--- /dev/null
+++ b/jacobi/mpi/cartesian/src/jacobi_2D_mpi_cartesian.c
@@ -0,0 +1,626 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+/* #define DEBUG */
+
+typedef struct MyGrid
+{
+  int start[NDIM];
+  int end[NDIM];
+} myDomain;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			const int,
+			const int);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
+		     const MyData *const restrict, const int, const int, const int, const int);
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const MPI_Comm  comm);
+
+void mpi_exchange_cartesian(MyData **const  buffer,
+			    const int       nbrtop,
+			    const int       nbrbottom,
+			    const int       nbrleft,
+			    const int       nbrright,
+			    myDomain *const domain,
+			    const int       grid_y,
+			    const MPI_Comm  comm2d);
+
+void WriteSolution(MyData **const phi, const int nx, const int ny);
+
+void copy_grids(MyData **const restrict A,
+                MyData **const restrict B,
+                const int               xbeg,
+                const int               xend,
+                const int               ybeg,
+		const int               yend);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 2) && (!rank))
+    {
+      printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  const int NY_GLOB = NX_GLOB;
+
+  /********************************** Cartesin topology *******************************************************/
+  const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10);
+  const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1);
+  if (cartesian_grid_y == -1)
+    {
+      if (!rank)
+        {
+          printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n");
+          MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+          exit(EXIT_FAILURE);
+        }
+    }
+  
+  const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y};
+  const int periods[NDIM] = {0 ,0}; /* not periodic */
+  const int reorder = 0;            /* setting the logical value reorder to TRUE allows MPI to */
+                                    /* reorder the rank of the processes figuring out the      */
+                                    /* neighbors in the actual hardware for better performance */
+
+  /* make a new communicator to which topology information has been attached */
+  MPI_Comm comm2d = MPI_COMM_NULL;
+  MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &comm2d);
+  if (comm2d == MPI_COMM_NULL)
+    {
+      printf("\n\t Process %d is not part of the new communicator \n", rank);
+      fflush(stdout);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* determine the process coords in cartesian topology given rank in group */
+  int coords[NDIM];
+  MPI_Cart_coords(comm2d, rank, NDIM, coords);
+
+  /* get bottom and top neighbors (X direction) */
+  int nbrtop, nbrbottom;
+  MPI_Cart_shift(comm2d, X, 1, &nbrbottom, &nbrtop);
+
+  /* get left and right neighbors (Y direction) */
+  int nbrright, nbrleft;
+  MPI_Cart_shift(comm2d, Y, 1, &nbrleft, &nbrright);
+  
+  /************************************************************************************************************/
+  /* 2D MPI-cartesian decomposition:
+     the grids are replicated across the MPI processes.
+     This approach is not the most efficient in terms of memory usage,
+     because the arrays are replicated across MPI process instead of to be distributed */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem[NDIM] = {(NX_GLOB + 2) % dims[X],
+                         (NY_GLOB + 2) % dims[Y]};
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X],
+                           (NY_GLOB + 2 - rem[Y]) / dims[Y]};
+  if ((chunk[X] < 1) || (chunk[Y] < 1))
+    {
+      printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Comm_free(&comm2d);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the subgrid dimension along X and Y directions: */
+  int incr[NDIM], offset[NDIM];
+  for (int dim=0 ; dim<NDIM ; dim++)
+    {
+      if (coords[dim] < rem[dim])
+	{
+	  incr[dim] = chunk[dim] + 1;
+	  offset[dim] = 0;
+	}
+      else
+	{
+	  incr[dim] = chunk[dim];
+	  offset[dim] = rem[dim];
+	}
+    }
+
+  /* subdomain managed by the task */
+  myDomain domain;
+  for (int dim=0 ; dim<NDIM ; dim++)
+    {
+      domain.start[dim] = ((coords[dim] * incr[dim]) + offset[dim]);
+      domain.end[dim]   = (domain.start[dim] + incr[dim]) - 1;
+
+      /* boundaries */
+      domain.start[dim] = ((domain.start[dim] == 0)         ? NGHOST  : domain.start[dim]);
+      domain.end[dim]   = ((domain.end[dim] == NX_GLOB + 1) ? NX_GLOB : domain.end[dim]);
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<Nranks ; task++)
+    {
+      MPI_Barrier(comm);
+      
+      if (task == rank)
+	{
+	  printf("\n\t rank: %d", rank);
+	  printf("\n\t\t coords = [%d, %d]", coords[X], coords[Y]);
+	  printf("\n\t\t domain.start[X] = %d - domain.end[X] = %d", domain.start[X], domain.end[X]);
+	  printf("\n\t\t domain.start[Y] = %d - domain.end[Y] = %d", domain.start[Y], domain.end[Y]);
+	  printf("\n\t\t nbrtop  : %d - nbrbottom: %d", nbrtop, nbrbottom);
+	  printf("\n\t\t nbrright: %d - nbrleft  : %d \n", nbrright, nbrleft);
+	  fflush(stdout);
+	}
+    }
+
+  MPI_Barrier(comm);
+#endif /* DEBUG */
+  
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg   = NGHOST;
+  const int iend   = ibeg + NX_GLOB - 1;
+  const int nx     = iend - ibeg + 1;
+  const int nx_tot = nx + 2 * NGHOST;
+    
+  const int jbeg   = NGHOST;
+  const int jend   = jbeg + NY_GLOB - 1;
+  const int ny     = jend - jbeg + 1;
+  const int ny_tot = ny + 2 * NGHOST;
+
+  if (rank == MASTER)
+    {
+      printf("\n\t Grid indices:");
+      printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
+      printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
+    }
+  
+  /* --------------------------------------------------------
+     2. Generate grid, allocate memory
+        Not optimized because the grids are (unnecessarily)
+	replicated across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+  MyData *x = xg; /* Global and local grids are the same  */
+  MyData *y = yg; /* for serial version of the code       */
+
+  /* grids memory allocation */
+  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
+  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
+    
+  /* --------------------------------------------------------
+     3. Initialize solution array to 0
+     -------------------------------------------------------- */
+    
+  for (int j=jbeg ; j<=jend ; j++)
+    for (int i=ibeg ; i<=iend ; i++)
+      {
+	phi0[j][i] = 0.0;
+	phi[j][i]  = 0.0;
+      }
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* -- 4a. Set boundary conditions first -- */  
+  BoundaryConditions(phi0, x, y, nx, ny);
+  BoundaryConditions(phi, x, y, nx, ny);
+
+  MyData err = 1.0;
+  /* iterations */
+  int k = 0;
+  while (1)
+    {
+      /* -- 4c. Jacobi's method and residual (interior points) -- */
+      /*       core algorithm                                     */
+
+      err = 0.0;
+      JacobiAlgorithm(phi, phi0, &err, delta,
+		      domain.start[Y], domain.end[Y],
+		      domain.start[X], domain.end[X]);
+
+      if (!rank)
+	printf("\n\t Iteration = %d - err = %lg\n",k, err);
+
+      /* increase the counter of loop iterations */
+      k++;
+
+      /* get the total error */
+      MyData toterr;
+      /* combines values from all processes and distributes the result back to all processes */
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, comm2d);
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  /* master task gathers all the domains */
+	  get_domains(phi, &domain, comm2d);
+
+	  break;
+	}
+
+      /* -- 4b. MPI communications */
+      mpi_exchange_cartesian(phi,
+			     nbrtop, nbrbottom, nbrleft, nbrright,
+			     &domain, NY_GLOB,
+			     comm2d);
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* master rank writes the solution */
+  if (!rank)
+    {
+      WriteSolution(phi, nx, ny);
+      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  if (yg)
+    free(yg);
+  
+  if (xg)
+    free(xg);
+
+  MPI_Comm_free(&comm2d);
+  
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+                        const int               nx,
+			const int               ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;
+  const int iend = ibeg + nx - 1;
+    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+
+  int i,j;
+  
+  /* -- Left -- */
+  i = ibeg - 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (1.0 - y[j]);
+    
+  /* -- Right -- */
+  i = jend + 1;
+  for (int j=jbeg ; j<=jend ; j++)
+    phi[j][i] = (y[j] * y[j]);
+    
+  /* -- Bottom -- */    
+  j = jbeg - 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = (1.0 - x[i]);
+    
+  /* -- Top -- */
+  j = jend + 1;
+  for (int i=ibeg ; i<=iend ; i++)
+    phi[j][i] = x[i];
+
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData      **const restrict Phi,
+		     MyData      **const restrict Phi0,
+		     MyData       *const restrict error,
+		     const MyData *const restrict delta,
+		     const int                    ibeg,
+		     const int                    iend,
+		     const int                    jbeg,
+		     const int                    jend)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+                
+	  *error += delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]);
+	} /* loop over columns */
+    } /* loop over rows */
+  
+  return;
+}
+
+/* ********************************************************************* */
+void mpi_exchange_cartesian(MyData **const  buffer,
+			    const int       nbrtop,
+			    const int       nbrbottom,
+			    const int       nbrleft,
+			    const int       nbrright,
+			    myDomain *const domain,
+			    const int       grid_y,
+			    const MPI_Comm  comm2d)
+{
+ /* In the data exchange 1D routines seen so far, buffers are contiguous areas in memory,
+    so that, for example, the datatypes MPI_DOUBLE_PRECISION coupled with the count of
+    occurrences are sufficient to describe the buffer to be sent. 
+    MPI derived datatypes allow the programmer to specify non-contiguous areas in memory, 
+    such as a column of an array stored in a row-major order like in C programming language */
+  
+  /* Creates a vector (strided) datatype:                                              */
+  /* - 1st argument (count)      : number of blocks;                                   */
+  /* - 2nd argument (blocklength): number of elements in each block;                   */
+  /* - 3rd argument (stride)     : number of elements between the start of each block; */
+  /* - 4th argument (oldtype)    : old datatype;                                       */
+  /* - 5th argument (newtype)    : new datatype                                        */
+
+  /* count = (x_end - x_start + 1), i.e. the size of each column                       */
+  /* blocklength = 1, i.e. one double (old datatype) in each block                     */
+  /* stride = NY_GLOB + 2, i.e. ghost points are discarded (grid is replicated)        */
+  /* oldtype = MPI_MyDatatype                                                          */
+  
+  MPI_Datatype column;
+  MPI_Type_vector((domain->end[X] - domain->start[X] + 1), 1, grid_y + 2, MPI_MyDatatype, &column);
+  /* commits the datatype */
+  MPI_Type_commit(&column);
+
+  /* communication with nbrtop and nbrbottom */
+  const int data_row_size = (domain->end[Y] - domain->start[Y] + 1);
+  MPI_Sendrecv(&buffer[domain->end[X]][domain->start[Y]]    , data_row_size, MPI_MyDatatype, nbrtop,    0,
+	       &buffer[domain->start[X]-1][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrbottom, 0,
+	       comm2d, MPI_STATUS_IGNORE);
+
+  MPI_Sendrecv(&buffer[domain->start[X]][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrbottom, 1,
+	       &buffer[domain->end[X]+1][domain->start[Y]], data_row_size, MPI_MyDatatype, nbrtop,    1,
+	       comm2d, MPI_STATUS_IGNORE);
+
+  /* communication with nbrleft, nbrright - vector datatype is used */
+  MPI_Sendrecv(&buffer[domain->start[X]][domain->end[Y]],     1, column, nbrright, 0,
+	       &buffer[domain->start[X]][domain->start[Y]-1], 1, column, nbrleft,  0,
+	       comm2d, MPI_STATUS_IGNORE);
+
+  MPI_Sendrecv(&buffer[domain->start[X]][domain->start[Y]], 1, column, nbrleft, 1,
+	       &buffer[domain->start[X]][domain->end[Y]+1], 1, column, nbrright, 1,
+	       comm2d, MPI_STATUS_IGNORE);  
+  
+  /* when the datatype is no longer needed, it should be freed, */
+  /* i.e. datatype is set to MPI_TYPE_NULL                      */
+  MPI_Type_free(&column);
+  
+  return;
+}
+
+/* ********************************************************************* */
+
+void get_domains(MyData  **const buffer,
+		 myDomain *const domain,
+		 const MPI_Comm  comm)
+{
+  int rank, nranks;
+  MPI_Comm_size(comm, &nranks);
+  MPI_Comm_rank(comm, &rank);
+  
+  /* Master process gathers all the domains */
+  
+  /***************************** get the domain boundaries from each process *************************************/
+  myDomain *boundaries = NULL;
+  if (rank == MASTER)
+    {
+      boundaries = (myDomain *)malloc(nranks * sizeof(*boundaries));
+      assert(boundaries != NULL);
+    }
+
+  MPI_Gather(domain,     sizeof(*domain), MPI_BYTE,
+	     boundaries, sizeof(*domain), MPI_BYTE,
+	     MASTER, comm);
+
+#if defined(DEBUG)
+  if (rank == MASTER)
+    {
+      for (int task=0 ; task<nranks ; task++)
+	{
+	  printf("\n\t Task: %d", task);
+	  printf("\n\t\t boundaries[%d].start[X] = %d - boundaries[%d].end[X] = %d",
+		 task, boundaries[task].start[X], task, boundaries[task].end[X]);
+	  printf("\n\t\t boundaries[%d].start[Y] = %d - boundaries[%d].end[Y] = %d\n",
+		 task, boundaries[task].start[Y], task, boundaries[task].end[Y]);
+	}
+      printf("\n");
+      fflush(stdout);
+    }
+#endif /* DEBUG */
+
+  /**************************************************************************************************/
+  
+  /***************************************** get the domain from each process *************************/
+  /* MPI_Gatherv
+     Gathers into specified locations from all processes in a group
+
+     Synopsis
+     int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                     void *recvbuf, const int recvcounts[], const int displs[],
+                     MPI_Datatype recvtype, int root, MPI_Comm comm) */
+
+  int *recvcounts = NULL;
+  int *displs = NULL;
+  MyData *recvbuf = NULL;
+  MyData *sendbuf = NULL;
+  
+  if (rank == MASTER)
+    {
+      recvcounts = (int *)malloc(nranks * sizeof(*recvcounts));
+      assert(recvcounts != NULL);
+
+      displs = (int *)malloc(nranks * sizeof(*displs));
+      assert(displs != NULL);
+
+      for (int task=0 ; task<nranks ; task++)
+	{
+	  const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1);
+	  const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1);
+	  recvcounts[task] = (nrows * ncols);
+	}
+
+      displs[0] = 0;
+      for (int task=1 ; task<nranks ; task++)
+	displs[task] = (recvcounts[task - 1] + displs[task - 1]);
+
+      /* allocate the recvbuffer */
+      recvbuf = (MyData *)malloc((displs[nranks - 1] + recvcounts[nranks - 1]) * sizeof(*recvbuf));
+      assert(recvbuf != NULL);
+      
+#if defined(DEBUG)
+      printf("\n\t********************** recvcounts - displs *************************");
+      for (int task=0 ; task<nranks ; task++)
+	{
+	  printf("\n\t recvcounts[%d] = %d - displs[%d] = %d",
+		 task, recvcounts[task], task, displs[task]);
+	}
+      printf("\n\t********************************************************************\n");
+      fflush(stdout);
+#endif /* DEBUG */
+
+    } /* MASTER */
+
+  /* every process sets up the sendbuf */
+  const int nrows = (domain->end[X] - domain->start[X] + 1);
+  const int ncols = (domain->end[Y] - domain->start[Y] + 1);
+  sendbuf = (MyData *)malloc(nrows * ncols * sizeof(*sendbuf));
+  assert(sendbuf != NULL);
+  /* store the actual grid owned by each process into sendbuf */
+  for (int row=0 ; row<nrows ; row++)
+    memcpy(&sendbuf[row * ncols], &buffer[domain->start[X]+row][domain->start[Y]], (ncols * sizeof(MyData)));
+  
+  /* perform the MPI collective */
+  MPI_Gatherv(sendbuf, (nrows * ncols), MPI_MyDatatype,
+	      recvbuf, recvcounts, displs, MPI_MyDatatype,
+	      MASTER, comm);
+
+  /* store the recvbuf into the actual grid owned by the MASTER task */
+  if (rank == MASTER)
+    {
+      for (int task=1 ; task<nranks ; task++)
+	{
+	  const int nrows = (boundaries[task].end[X] - boundaries[task].start[X] + 1);
+	  const int ncols = (boundaries[task].end[Y] - boundaries[task].start[Y] + 1);
+	  for (int row=0 ; row<nrows ; row++)
+	    memcpy(&buffer[boundaries[task].start[X] + row][boundaries[task].start[Y]],
+		   &recvbuf[displs[task] + (row * ncols)], (ncols * sizeof(MyData)));
+	}
+    }
+
+  free(sendbuf);
+  if (rank == MASTER)
+    {
+      free(recvbuf);
+      free(displs);
+      free(recvcounts);
+      free(boundaries);
+    }
+  
+  return;
+}
+
+/* ********************************************************************* */
+void WriteSolution(MyData **const phi,
+		   const int      nx,
+		   const int      ny)
+/*
+*********************************************************************** */
+{
+  const int ibeg = NGHOST;    
+  const int jbeg = NGHOST;
+  const int jend = jbeg + ny - 1;
+    
+  static int nfile = 0;  /* File counter */
+
+  char fname[128];
+  sprintf(fname,"jacobi2D_mpi_cartesian_%02d.bin", nfile);
+    
+  FILE *fp;
+  printf ("> Writing %s\n",fname);
+  fp = fopen(fname, "wb");
+
+  /* discard boundaies */
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
+    }
+    
+  nfile++;
+  fclose(fp);
+}
diff --git a/jacobi/mpi/cartesian/src/tools.c b/jacobi/mpi/cartesian/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..be2761d170c23fde4d5d4aeda54af00e56abf3ce
--- /dev/null
+++ b/jacobi/mpi/cartesian/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate memory for a double precision array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = malloc(nx * sizeof(MyData *));
+  assert(buf != NULL);
+
+  buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(const MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/miscellaneous/cartesian b/jacobi/mpi/miscellaneous/cartesian
old mode 100644
new mode 100755
index cef6cb6704f8709644945aeb816d610ad2814635..c0bc080fe492702435b30f6f5d19d4a8fe49a821
Binary files a/jacobi/mpi/miscellaneous/cartesian and b/jacobi/mpi/miscellaneous/cartesian differ
diff --git a/jacobi/openmp/not_opt/script/run.sh b/jacobi/openmp/not_opt/script/run.sh
index eb86912f5c6b3abdfc8294465536de7e2d35e85a..4b5f02b60676e23d1f25c7ee20645bf512707bf1 100755
--- a/jacobi/openmp/not_opt/script/run.sh
+++ b/jacobi/openmp/not_opt/script/run.sh
@@ -47,7 +47,7 @@ fi
 export OMP_DISPLAY_AFFINITY=1
 export OMP_WAIT_POLICY=ACTIVE
 # export OMP_PLACES=( "cores", "threads" )
-# export BINDING=( "master" "close" "spread" )
+# export BINDING=( "none" "master" "close" "spread" )
 
 for OMP in ${OMP_THREADS[@]}
 do
diff --git a/material/slides/INAF_HPC_course_2024_hands-on_session-MPI.pdf b/material/slides/INAF_HPC_course_2024_hands-on_session-MPI.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..cf2d817f95ce79830bcbbf70da472330ca22598b
Binary files /dev/null and b/material/slides/INAF_HPC_course_2024_hands-on_session-MPI.pdf differ