diff --git a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c
index d4de3f485216ce03e65ce3e478001a79c16cfb26..0d9787146ffdcbb574b640b0bc00d6837b8a90c1 100644
--- a/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c
+++ b/jacobi/mpi/comp_comm/src/jacobi_2D_mpi_comp_comm.c
@@ -136,7 +136,7 @@ int main(int argc, char **argv)
   
   /************************************************************************************************************/
   /* 2D MPI-cartesian decomposition:
-     the grids are distributed across the MPI processes.
+     the grids are distributed across the MPI processes. */
 
   /* get the reminder, i.e. take into account uneven
      decomposition of points among the processes    */
@@ -593,7 +593,6 @@ void get_domains(MyData  **const buffer,
   int *recvcounts = NULL;
   int *displs = NULL;
   MyData *recvbuf = NULL;
-  MyData *sendbuf = NULL;
   
   if (ThisTask->rank == MASTERTASK)
     {
@@ -618,19 +617,37 @@ void get_domains(MyData  **const buffer,
       assert(recvbuf != NULL);
     } /* MASTERTASK */
 
-  /* every process sets up the sendbuf */
-  sendbuf = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*sendbuf));
-  assert(sendbuf != NULL);
-  /* store the actual grid owned by each process into sendbuf */
-  for (int row=0 ; row<ThisTask->domain.dim[X] ; row++)
-    memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], (ThisTask->domain.dim[Y] * sizeof(MyData)));
+  /*************************************** CUSTOM subarray ******************************************************************************/
+  /* Do not do that !!!                                                                                                                 */
+  /* Use instead MPI_Type_create_subarray                                                                                               */
+  /*                                                                                                                                    */
+  /* MyData *subarray = (MyData *)malloc(ThisTask->domain.dim[X] * ThisTask->domain.dim[Y] * sizeof(*subarray));                        */
+  /* for (int row=0 ; row<ThisTask->domain.dim[X] ; row++)                                                                              */
+  /*   memcpy(&sendbuf[row * ThisTask->domain.dim[Y]], &buffer[ThisTask->domain.local_start[X] + row][ThisTask->domain.local_start[Y]], */
+  /*            (ThisTask->domain.dim[Y] * sizeof(MyData)));                                                                            */
+  /*                                                                                                                                    */
+  /* Perform MPI_Gatherv collective...                                                                                                  */
+  /*                                                                                                                                    */
+  /* MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype,                                          */
+  /*             recvbuf, recvcounts, displs, MPI_MyDatatype,                                                                           */
+  /*             MASTERTASK, ThisTask->comm2d);                                                                                         */
+  /**************************************************************************************************************************************/
+  
+  /* every process sets up the subarray */
+  MPI_Datatype subarray;
+  const int subsize[NDIM] = {ThisTask->domain.dim[X], ThisTask->domain.dim[Y]};
+  const int bigsize[NDIM] = {subsize[X] + 2 *NGHOST, subsize[Y] + 2 *NGHOST};
+  const int start[NDIM] = {NGHOST, NGHOST};
+  MPI_Type_create_subarray(NDIM, bigsize, subsize, start, MPI_ORDER_C, MPI_MyDatatype, &subarray);
+  MPI_Type_commit(&subarray);
   
   /* perform the MPI collective */
-  MPI_Gatherv(sendbuf, (ThisTask->domain.dim[X] * ThisTask->domain.dim[Y]), MPI_MyDatatype,
+  MPI_Gatherv(&buffer[0][0], 1, subarray,
 	      recvbuf, recvcounts, displs, MPI_MyDatatype,
 	      MASTERTASK, ThisTask->comm2d);
 
-  free(sendbuf);
+  MPI_Type_free(&subarray);
+
   if (ThisTask->rank == MASTERTASK)
     {
       free(recvcounts);
diff --git a/jacobi/mpi/comp_comm_io/Makefile b/jacobi/mpi/comp_comm_io/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..f83868ef964675323e46eabe255b84d79876d062
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/Makefile
@@ -0,0 +1,61 @@
+#######################################################################
+# Author: David Goz (david.goz@inaf.it)                               #
+# June  2024                                                          #
+#######################################################################
+#
+# To see all the compilation options
+# $ make info
+#######################################################################
+
+# make.def defines how the application is compiled
+include make.def
+include make_mpi_path
+#######################################################################
+
+.PHONY: info mpi valgrind_memcheck valgrind_callgrind valgrind_cachegrind clean
+
+info:
+	@echo ' '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo '$$ make mpi                 ---> compile the mpi application                             '
+	@echo '$$ make debug               ---> compile the mpi application for debugging               '
+	@echo '$$ make valgrind_memcheck   ---> run the mpi application using Valgrind under Memcheck   '
+	@echo '$$ make valgrind_callgrind  ---> run the mpi application using Valgrind under Callgrind  '
+	@echo '$$ make valgrind_cachegrind ---> run the mpi application using Valgrind under Cachegrind '
+	@echo '$$ make clean               ---> clean up all                                            '
+	@echo '$$ make info                ---> get make info                                           '
+	@echo '-----------------------------------------------------------------------------------------'
+	@echo ' '
+
+mpi: $(PROG)
+
+debug: $(PROG_DEBUG)
+
+valgrind_memcheck: $(PROG_MEMCHECK)
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+	mpirun -n 4 valgrind --tool=memcheck -s --default-suppressions=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 9 2
+	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
+
+valgrind_callgrind: $(PROG_CALLGRIND)
+	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
+	mpirun -n 4 valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 9 2
+	@echo ' '
+	@echo 'To generate a function-by-function summary from the profile data file:'
+	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+
+valgrind_cachegrind: $(PROG_CACHEGRIND)
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+	mpirun -n 4 valgrind --tool=cachegrind --log-file=valgrind_cachegrind_log_.%p.txt ./$< 9 2
+	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
+	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
+	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
+
+clean:
+	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
+	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
+	rm -f valgrind_*.txt
+	rm -f cachegrind.out.*
+	rm -f callgrind.*
+	rm -f *bin
+	rm -f jacobi_mpi_comp_comm_*
diff --git a/jacobi/mpi/comp_comm_io/include/allvars.h b/jacobi/mpi/comp_comm_io/include/allvars.h
new file mode 100644
index 0000000000000000000000000000000000000000..c065228237923d421eff50a673dfbd54d5c70f90
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/include/allvars.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#define NGHOST   1
+#define NDIM     2 /* 2D */
+#define X        0
+#define Y        1
+#define TOL      1.e-5
+
+#include <mpi.h>
+
+#define MASTERTASK 0
+
+#if defined(SINGLE_PRECISION)
+typedef float MyData;
+#define MPI_MyDatatype MPI_FLOAT_PRECISION
+#else
+/* double precision is assumed by default */
+typedef double MyData;
+#define MPI_MyDatatype     MPI_DOUBLE_PRECISION
+#endif /* SINGLE_PRECISION */
diff --git a/jacobi/mpi/comp_comm_io/include/tools.h b/jacobi/mpi/comp_comm_io/include/tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..0167c89d349118ae2d970b23308de8abe19f350b
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/include/tools.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "allvars.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* function prototypes */
+MyData **Allocate_2DdblArray(const int nx, const int ny);
+void Show_2DdblArray(      MyData **const A,
+                     const int            nx,
+                     const int            ny,
+                     const char *const    string);
+
+double seconds(void);
diff --git a/jacobi/mpi/comp_comm_io/make.def b/jacobi/mpi/comp_comm_io/make.def
new file mode 100644
index 0000000000000000000000000000000000000000..911e34103f99fade5c989d4c1589588d139a2d26
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/make.def
@@ -0,0 +1,45 @@
+CC     = mpicc
+CFLAGS = -Wall -Wextra -march=native
+LIBS   = -lm -lmpi
+
+SYSTYPE = $(strip $(shell uname -n))
+
+PROG            = jacobi_mpi_comp_comm_$(SYSTYPE)
+PROG_DEBUG      = $(PROG)_DEBUG
+PROG_MEMCHECK   = $(PROG)_MEMCHECK
+PROG_CALLGRIND  = $(PROG)_CALLGRIND
+PROG_CACHEGRIND = $(PROG)_CACHEGRIND
+
+HEADERS       = $(wildcard ./include/*.h)
+SOURCES       = $(wildcard ./src/*.c)
+DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile
+
+$(PROG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_DEBUG): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -ggdb3 -DDEBUG -fno-omit-frame-pointer -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_MEMCHECK): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -Og -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CALLGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
+
+$(PROG_CACHEGRIND): $(DEPENDENCIES)
+	$(CC) $(CFLAGS) -g -O3 -I./include -I$(MPI_INC) $(SOURCES) -o $@ -L$(MPI_LIB) $(LIBS)
+	@echo ' '
+	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
+	@echo ' '
diff --git a/jacobi/mpi/comp_comm_io/make_mpi_path b/jacobi/mpi/comp_comm_io/make_mpi_path
new file mode 100644
index 0000000000000000000000000000000000000000..3fdf888da9d4837eb7eafb51c93e53516774ce93
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/make_mpi_path
@@ -0,0 +1,10 @@
+# set the MPI install path
+
+# pleiadi
+MPI_INSTALL_PATH = /opt/cluster/spack/opt/spack/linux-centos7-broadwell/gcc-11.2.0/openmpi-4.1.3-djxjqlmzbqwq76bhh3wvgxaefnoczleg
+
+
+
+MPI        = $(MPI_INSTALL_PATH)
+MPI_INC    = $(MPI)/include
+MPI_LIB    = $(MPI)/lib
\ No newline at end of file
diff --git a/jacobi/mpi/comp_comm_io/script/input_parameters b/jacobi/mpi/comp_comm_io/script/input_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..53a1b43b137c6fdcb3cf54696925d0b310b2a369
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/script/input_parameters
@@ -0,0 +1,10 @@
+##########################################################################
+
+# set the grid size
+
+GRID_SIZE_X=128
+GRID_SIZE_Y=128
+
+TASKS=(2 4 8 16)
+
+##########################################################################
diff --git a/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh b/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..225dedb5e28e045ac243e1f79aa9a79ee7efa546
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/script/run_pleiadi.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+######################### RESOURSE ALLOCATION #####################################
+##SBATCH --account=????????
+
+#SBATCH --partition=pleiadi
+#SBATCH --job-name="Jacobi"
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --output=Jacobi-mpi-comp-comm-%j.out
+#SBATCH --error=Jacobi-mpi-comp-comm.%j.err
+#SBATCH --time=00:05:00
+###################################################################################
+
+################################## MODULES ########################################
+export MODULE_VERSION=5.0.1
+source /opt/cluster/spack/share/spack/setup-env.sh
+
+# module purge
+module purge
+# load GCC
+module load default-gcc-11.2.0
+###################################################################################
+
+# input parameters
+source input_parameters
+
+WORKDIR=${PWD}
+# compile the application
+cd .. && make clean && make mpi
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot compile the application ...aborting..."
+    exit 1
+fi
+
+# get the executable
+EXEC=$(find $(realpath ./) -maxdepth 1 -executable -name "jacobi_*" -type f -print)
+if [[ "$?" != "0" ]]
+then
+    echo "Cannot find the executable ...aborting..."
+    exit 2
+fi
+
+for TASK in ${TASKS[@]}
+do
+    # run the application
+    time mpirun -n ${TASK} ${EXEC} ${GRID_SIZE_X} ${GRID_SIZE_Y} |& tee ${EXEC}_TASK_${TASK}_output.txt
+done
+
+cd ${WORKDIR}
+
+exit 0
diff --git a/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c b/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c
new file mode 100644
index 0000000000000000000000000000000000000000..9522d6c149b73cdd216ae3e573821ecc919398d7
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/src/jacobi_2D_mpi_comp_comm_io.c
@@ -0,0 +1,597 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "allvars.h"
+#include "tools.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+static int NX_GLOB, NY_GLOB;
+
+typedef struct MyGrid
+{
+  int local_start[NDIM];  /* Local start index in each dimension  */   
+  int local_end[NDIM];    /* Local end index in each dimension    */ 
+  int global_start[NDIM]; /* Global start index in each dimension */
+  int global_end[NDIM];   /* Global end index in each dimension   */
+  int dim[NDIM];          /* Local domain size (no ghosts)        */
+} myDomain;
+
+
+typedef struct Task_2D_Cartesian
+{
+  int rank;              /* Local process rank                            */
+  int nranks;            /* Communicator size                             */
+  int coords[NDIM];      /* Cartesian topology coordinate                 */
+  myDomain domain;       /* MyGrid structure (defined above)              */
+  int nbrtop;            /* Top neighbor process in cartesian topology    */
+  int nbrbottom;         /* Bottom neighbor process in cartesian topology */
+  int nbrleft;           /* Left neighbor process in cartesian topology   */
+  int nbrright;          /* Right neighbor process in cartesian topology  */
+  MPI_Comm comm2d;       /* Cartesian communicator                        */
+} Task;
+
+/* function prototypes */
+void BoundaryConditions(MyData **const restrict,
+			MyData  *const restrict,
+			MyData  *const restrict,
+			Task    *const restrict);
+
+void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, const MyData *restrict,
+		     const int, const int, const int, const int, MyData *const restrict);
+
+void Jacobi_Communication(MyData      **const restrict Phi,
+			  MyData      **const restrict Phi0,
+			  MyData   *const restrict error,
+			  const MyData *      restrict delta,
+			  Task         *const restrict ThisTask);
+
+/* void WriteSolution(MyData **const phi, const int nx, const int ny); */
+void WriteSolutionParallel(MyData **const phi, Task *const ThisTask);
+
+int main(int argc, char **argv)
+{
+  int rank, Nranks;
+  const MPI_Comm comm = MPI_COMM_WORLD;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &Nranks);
+
+  if ((argc <= 2) && (!rank))
+    {
+      printf("\n\t Usage: <executable> <grid_size> <cartesian communicator size_X>\n\n");
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* global X and Y grid size (square matrix supposed) */
+  NX_GLOB = (int) strtol(argv[1], NULL, 10);
+  NY_GLOB = NX_GLOB;
+
+  /********************************** Cartesin topology *******************************************************/
+  const int cartesian_grid_x = (int)strtol(argv[2], NULL, 10);
+  const int cartesian_grid_y = ((Nranks % cartesian_grid_x == 0) ? (Nranks / cartesian_grid_x) : -1);
+  if (cartesian_grid_y == -1)
+    {
+      if (!rank)
+        {
+          printf("\n\t Nranks mod cartesian_grid_x != 0 ... aborting ...\n");
+          MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+          exit(EXIT_FAILURE);
+        }
+    }
+  
+  const int dims[NDIM] = {cartesian_grid_x, cartesian_grid_y};
+  const int periods[NDIM] = {0 ,0}; /* not periodic */
+  const int reorder = 0;            /* setting the logical value reorder to TRUE allows MPI    */
+                                    /* to reorder the rank of the processes figuring out the   */
+                                    /* neighbors in the actual hardware for better performance */
+
+  /* make a new communicator to which topology information has been attached */
+  Task ThisTask;
+  ThisTask.comm2d = MPI_COMM_NULL;
+  MPI_Cart_create(MPI_COMM_WORLD, NDIM, dims, periods, reorder, &ThisTask.comm2d);
+  if (ThisTask.comm2d == MPI_COMM_NULL)
+    {
+      printf("\n\t Process %d is not part of the new communicator \n", rank);
+      fflush(stdout);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+
+  /* get the comm size */
+  MPI_Comm_size(ThisTask.comm2d, &ThisTask.nranks);
+  
+  /* determine the process coords in cartesian topology given rank in group */
+  MPI_Cart_coords(ThisTask.comm2d, rank, NDIM, ThisTask.coords);
+
+  /* determines process rank in communicator given Cartesian location */
+  MPI_Cart_rank(ThisTask.comm2d, ThisTask.coords, &ThisTask.rank);
+  
+  /* get bottom and top neighbors (X direction) */
+  MPI_Cart_shift(ThisTask.comm2d, X, 1, &ThisTask.nbrbottom, &ThisTask.nbrtop);
+
+  /* get left and right neighbors (Y direction) */
+  MPI_Cart_shift(ThisTask.comm2d, Y, 1, &ThisTask.nbrleft, &ThisTask.nbrright);
+  
+  /************************************************************************************************************/
+  /* 2D MPI-cartesian decomposition:
+     the grids are distributed across the MPI processes. */
+
+  /* get the reminder, i.e. take into account uneven
+     decomposition of points among the processes    */
+  const int rem[NDIM] = {(NX_GLOB + 2) % dims[X],
+                         (NY_GLOB + 2) % dims[Y]};
+
+  /* get the amount of data for each MPI process:
+     - chunk is supposed to be >= 1 */
+  const int chunk[NDIM] = {(NX_GLOB + 2 - rem[X]) / dims[X],
+                           (NY_GLOB + 2 - rem[Y]) / dims[Y]};
+  if ((chunk[X] < 1) || (chunk[Y] < 1))
+    {
+      printf("\n\t chunk[X] < 1 || chunk[Y] < 1 ... aborting ...[rank: %d]\n", rank);
+      MPI_Comm_free(&ThisTask.comm2d);
+      MPI_Abort(comm, EXIT_FAILURE);
+      exit(EXIT_FAILURE);
+    }
+  
+  /* get the subgrid dimension along X and Y directions: */
+  int incr[NDIM], offset[NDIM];
+  for (int dim=0 ; dim<NDIM ; dim++)
+    {
+      if (ThisTask.coords[dim] < rem[dim])
+	{
+	  incr[dim] = chunk[dim] + 1;
+	  offset[dim] = 0;
+	}
+      else
+	{
+	  incr[dim] = chunk[dim];
+	  offset[dim] = rem[dim];
+	}
+    }
+
+  /* subdomain managed by the task */
+  for (int dim=0 ; dim<NDIM ; dim++)
+    {
+      ThisTask.domain.global_start[dim] = ((ThisTask.coords[dim] * incr[dim]) + offset[dim]);
+      ThisTask.domain.global_end[dim]   = (ThisTask.domain.global_start[dim] + incr[dim]) - 1;
+
+      /* boundaries */
+      ThisTask.domain.global_start[dim] = ((ThisTask.domain.global_start[dim] == 0)         ? NGHOST  : ThisTask.domain.global_start[dim]);
+      ThisTask.domain.global_end[dim]   = ((ThisTask.domain.global_end[dim] == NX_GLOB + 1) ? NX_GLOB : ThisTask.domain.global_end[dim]);
+
+      ThisTask.domain.dim[X] = (ThisTask.domain.global_end[X] - ThisTask.domain.global_start[X] + 1);
+      ThisTask.domain.dim[Y] = (ThisTask.domain.global_end[Y] - ThisTask.domain.global_start[Y] + 1);
+
+      /* local index */
+      ThisTask.domain.local_start[X] = ThisTask.domain.local_start[Y] = 1;
+      ThisTask.domain.local_end[X]   = ThisTask.domain.dim[X];
+      ThisTask.domain.local_end[Y]   = ThisTask.domain.dim[Y];
+    }
+
+#if defined(DEBUG)
+  for (int task=0 ; task<ThisTask.nranks ; task++)
+    {
+      if (ThisTask.rank == task)
+	{
+	  printf("\n\t rank: %d", task);
+	  printf("\n\t\t coords = [%d, %d]", ThisTask.coords[X], ThisTask.coords[Y]);
+	  printf("\n\t\t domain.global_start[X] = %d - domain.global_end[X] = %d", ThisTask.domain.global_start[X], ThisTask.domain.global_end[X]);
+	  printf("\n\t\t domain.global_start[Y] = %d - domain.global_end[Y] = %d", ThisTask.domain.global_start[Y], ThisTask.domain.global_end[Y]);
+	  printf("\n\t\t domain.local_start[X]  = %d - domain.local_end[X]  = %d", ThisTask.domain.local_start[X],  ThisTask.domain.local_end[X]);
+	  printf("\n\t\t domain.local_start[Y]  = %d - domain.local_end[Y]  = %d", ThisTask.domain.local_start[Y],  ThisTask.domain.local_end[Y]);
+	  printf("\n\t\t domain.dim[X]          = %d - domain.dim[Y] = %d", ThisTask.domain.dim[X], ThisTask.domain.dim[Y]);
+	  printf("\n\t\t nbrtop  = %d - nbrbottom = %d",  ThisTask.nbrtop,  ThisTask.nbrbottom);
+	  printf("\n\t\t nbrleft = %d - nbrright  = %d\n", ThisTask.nbrleft, ThisTask.nbrright);
+	  fflush(stdout);
+	}
+      MPI_Barrier(ThisTask.comm2d);
+    }
+#endif /* DEBUG */
+  
+  /******************************************************************************************************/
+  
+  const MyData xbeg = 0.0;
+  const MyData xend = 1.0;
+  const MyData ybeg = 0.0;
+  const MyData yend = 1.0;
+
+  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
+			      (yend - ybeg)/(NY_GLOB + 1)};
+
+  /* --------------------------------------------------------
+     1. Set grid indices
+     -------------------------------------------------------- */
+    
+  const int ibeg = NGHOST;
+  const int jbeg = NGHOST;
+  
+  /* --------------------------------------------------------
+     2. Generate grids, allocate memory
+        distributed across MPI processes
+     -------------------------------------------------------- */
+
+  /* memory allocation */
+  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
+  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
+  assert((xg != NULL) && (yg != NULL));
+
+  /* initial conditions */
+  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
+  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
+
+  /* grids memory allocation distributed across MPI processes */
+  MyData **phi  = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2);
+  MyData **phi0 = Allocate_2DdblArray(ThisTask.domain.dim[X] + 2, ThisTask.domain.dim[Y] + 2);
+
+  /* --------------------------------------------------------
+     3. Set boundary conditions
+     -------------------------------------------------------- */
+  BoundaryConditions(phi0, xg, yg, &ThisTask);
+  BoundaryConditions(phi,  xg, yg, &ThisTask);
+  free(yg);
+  free(xg);
+  
+  /* --------------------------------------------------------
+     4. Main iteration cycle
+     -------------------------------------------------------- */
+
+  const double time_start = MPI_Wtime();
+  
+  /* iterations */
+  int k = 0;
+  while (1)
+    {
+      /* -- 4a. Jacobi algorithm overlapping computation and communication */
+      MyData err;
+      Jacobi_Communication(phi, phi0, &err, delta, &ThisTask);
+
+      /* -- 4b. Get the total error                                                                 */
+      /*        combines values from all processes and distributes the result back to all processes */
+      MyData toterr;
+      MPI_Allreduce(&err, &toterr, 1, MPI_MyDatatype, MPI_SUM, ThisTask.comm2d);
+      
+      if (ThisTask.rank == MASTERTASK)
+	printf("\n\t Iteration = %d - err = %lg\n",k, toterr);
+
+      /* increase the counter of loop iterations */
+      k++;
+      
+      /* check convergence */
+      if (toterr <= TOL)
+	{
+	  break;
+	}
+
+      /* swap the pointers */
+      MyData **tmp = phi;
+      phi = phi0;
+      phi0 = tmp;
+    }
+
+  /* ranks write the solution in parallel */
+  WriteSolutionParallel(phi, &ThisTask);
+
+  if (ThisTask.rank == MASTERTASK)
+    {      
+      printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
+      printf("\n\t Time = %lf [s]\n\n", MPI_Wtime() - time_start);
+    }
+
+  // free memory
+  if (phi0)
+    {
+      free(phi0[0]);
+      free(phi0);
+    }
+
+  if (phi)
+    {
+      free(phi[0]);
+      free(phi);
+    }
+
+  MPI_Comm_free(&ThisTask.comm2d);
+  
+  MPI_Finalize();
+  
+  return 0;
+}
+
+/* ********************************************************************* */
+void BoundaryConditions(MyData **const restrict phi,
+			MyData  *const restrict x,
+			MyData  *const restrict y,
+			Task    *const restrict ThisTask)
+/*************************************************************************/
+{
+  /* left */
+  if (ThisTask->nbrleft == MPI_PROC_NULL) /* no left neighbor */
+    {
+      const int global_start_j = ThisTask->domain.global_start[X];
+      for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++)
+	phi[j][0] = (1.0 - y[global_start_j + j - 1]);
+    }
+
+  /* right */
+  if (ThisTask->nbrright == MPI_PROC_NULL) /* no right neighbor */
+    {
+      const int global_start_j = ThisTask->domain.global_start[X];
+      for (int j=ThisTask->domain.local_start[X] ; j<=ThisTask->domain.local_end[X] ; j++)
+	phi[j][ThisTask->domain.local_end[Y] + 1] = (y[global_start_j + j - 1] * y[global_start_j + j - 1]);
+    }
+  
+  /* bottom */
+  if (ThisTask->nbrbottom == MPI_PROC_NULL) /* no bottom neighbor */
+    {
+      const int global_start_i = ThisTask->domain.global_start[Y];
+      for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++)
+	phi[0][i] = (1.0 - x[global_start_i + i - 1]);
+    }
+  
+  /* top */
+  if (ThisTask->nbrtop == MPI_PROC_NULL) /* no top neighbor */
+    {
+      const int global_start_i = ThisTask->domain.global_start[Y];
+      for (int i=ThisTask->domain.local_start[Y] ; i<=ThisTask->domain.local_end[Y] ; i++)
+	phi[ThisTask->domain.local_end[X] + 1][i] = x[global_start_i + i - 1];
+    }
+
+#if defined(DEBUG)
+
+  if (ThisTask->rank == MASTERTASK)
+    {
+      printf("\n");
+      for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++)
+	printf("\n\t x[%d] = %8.2f", i, x[i]);
+  
+      printf("\n");
+      for (int i=0 ; i<NX_GLOB+2*NGHOST ; i++)
+	printf("\n\t y[%d] = %8.2f", i, y[i]);
+
+      printf("\n");
+      fflush(stdout);
+    }
+
+  MPI_Barrier(ThisTask->comm2d);
+
+  for (int task=0 ; task<ThisTask->nranks ; task++)
+    {
+      if (task == ThisTask->rank)
+	{
+	  char string[128];
+	  sprintf(string, "Task %d - Phi", task);
+	  
+	  Show_2DdblArray(phi, ThisTask->domain.dim[X] + 2, ThisTask->domain.dim[Y] + 2, string);
+	}
+      
+      MPI_Barrier(ThisTask->comm2d);
+    }
+  
+#endif /* DEBUG */  
+  
+  return;
+}
+
+/* ********************************************************************* */
+
+void JacobiAlgorithm(MyData **const restrict Phi,
+		     MyData **const restrict Phi0,
+		     const MyData  *restrict delta,
+                     const int               jbeg,
+		     const int               jend,
+		     const int               ibeg,
+		     const int               iend,
+		     MyData *const restrict  error)
+{
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  Phi[j][i] = 0.25 * (Phi0[j][i-1] + Phi0[j][i+1] +
+			      Phi0[j-1][i] + Phi0[j+1][i]);
+
+	  *error += (delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]));
+	} /* loop over columns */
+    } /* loop over rows */
+
+  return;
+}
+
+void get_error(MyData **const restrict Phi,
+	       MyData **const restrict Phi0,
+	       const MyData  *restrict delta,
+	       const int               jbeg,
+	       const int               jend,
+	       const int               ibeg,
+	       const int               iend,
+	       MyData *const restrict  error)
+{
+  *error = 0.0;
+  for (int j=jbeg ; j<=jend ; j++)
+    {
+      for (int i=ibeg ; i<=iend ; i++)
+	{
+	  *error += (delta[X] * delta[Y] * fabs(Phi[j][i] - Phi0[j][i]));
+	} /* loop over columns */
+    } /* loop over rows */
+    
+  return;
+}
+
+void Jacobi_Communication(MyData      **const restrict Phi,
+			  MyData      **const restrict Phi0,
+			  MyData       *const restrict error,
+			  const MyData *      restrict delta,
+			  Task         *const restrict ThisTask)
+{
+  /* custom datatype */
+  MPI_Datatype column;
+  MPI_Type_vector(ThisTask->domain.dim[X], 1, ThisTask->domain.dim[Y] + 2, MPI_MyDatatype, &column);
+  /* commits the datatype */
+  MPI_Type_commit(&column);
+
+  const int data_row_size = ThisTask->domain.dim[Y];
+  
+  /* First task: issue the communication */
+
+  MyData **const restrict buffer = Phi0;
+
+#if MPI_VERSION > 4  
+
+  MPI_Request request[4];
+
+  MPI_Isendrecv(&buffer[ThisTask->domain.local_end[X]      ][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop,    0,
+  		&buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0,
+  		ThisTask->comm2d, &request[0]);
+
+  MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1,
+  		&buffer[ThisTask->domain.local_end[X]   + 1][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop,    1,
+  		ThisTask->comm2d, &request[1]);
+
+  MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_end[Y]      ], 1,             column,         ThisTask->nbrright,  2,
+  		&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y] - 1], 1,             column,         ThisTask->nbrleft,   2,
+  		ThisTask->comm2d, &request[2]);
+  
+  MPI_Isendrecv(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y]    ], 1,             column,         ThisTask->nbrleft,   3,
+  		&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_end[Y]   + 1], 1,             column,         ThisTask->nbrright,  3,
+  		ThisTask->comm2d, &request[3]);
+
+#else
+  
+  MPI_Request request[8];
+
+  MPI_Irecv(&buffer[ThisTask->domain.local_start[X] - 1][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 0, ThisTask->comm2d, &request[0]);
+  MPI_Irecv(&buffer[ThisTask->domain.local_end[X]   + 1][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop   , 1, ThisTask->comm2d, &request[1]);
+  MPI_Irecv(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y] - 1], 1,             column,         ThisTask->nbrleft,   2, ThisTask->comm2d, &request[2]);
+  MPI_Irecv(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_end[Y]   + 1], 1,             column,         ThisTask->nbrright,  3, ThisTask->comm2d, &request[3]);
+  
+  MPI_Isend(&buffer[ThisTask->domain.local_end[X]      ][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrtop,    0, ThisTask->comm2d, &request[4]);
+  MPI_Isend(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y]    ], data_row_size, MPI_MyDatatype, ThisTask->nbrbottom, 1, ThisTask->comm2d, &request[5]);
+  MPI_Isend(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_end[Y]      ], 1,             column,         ThisTask->nbrright,  2, ThisTask->comm2d, &request[6]);
+  MPI_Isend(&buffer[ThisTask->domain.local_start[X]    ][ThisTask->domain.local_start[Y]    ], 1,             column,         ThisTask->nbrleft,   3, ThisTask->comm2d, &request[7]);
+
+#endif
+
+  /**************************************** computation ****************************************/
+  /* perform the computation with the local data, (i.e. ghost cells are not required) */
+  /* so overlapping computation and communication */
+
+  const int jbeg = ThisTask->domain.local_start[X] + 1;
+  const int jend = ThisTask->domain.local_end[X]   - 1;
+  const int ibeg = ThisTask->domain.local_start[Y] + 1;
+  const int iend = ThisTask->domain.local_end[Y]   - 1;
+  
+  *error = 0.0;
+  JacobiAlgorithm(Phi, Phi0, delta, jbeg, jend, ibeg, iend, error);
+
+  /*********************************************************************************************/
+
+#if MPI_VERSION > 4  
+  /* wait the data on the boundaries */
+  MPI_Waitall(4, request, MPI_STATUSES_IGNORE);
+#else
+  MPI_Waitall(8, request, MPI_STATUSES_IGNORE);
+#endif
+  
+  /*  nbrbottom */
+  JacobiAlgorithm(Phi, Phi0, delta,
+		  ThisTask->domain.local_start[X], ThisTask->domain.local_start[X],
+		  ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
+		  error);
+
+  /* nbrtop */
+  JacobiAlgorithm(Phi, Phi0, delta,
+		  ThisTask->domain.local_end[X],   ThisTask->domain.local_end[X],
+		  ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
+		  error);
+
+  /* nbrleft */
+  JacobiAlgorithm(Phi, Phi0, delta,
+		  ThisTask->domain.local_start[X], ThisTask->domain.local_end[X],
+		  ThisTask->domain.local_start[Y], ThisTask->domain.local_start[Y],
+		  error);
+
+  /* nbrright */
+  JacobiAlgorithm(Phi, Phi0, delta,
+		  ThisTask->domain.local_start[X], ThisTask->domain.local_end[X],
+		  ThisTask->domain.local_end[Y],   ThisTask->domain.local_end[Y],
+		  error);
+
+  /* Round-off error fixing ??? */
+  {
+    *error = 0.0;
+    get_error(Phi, Phi0, delta,
+	      ThisTask->domain.local_start[X], ThisTask->domain.local_end[X],
+	      ThisTask->domain.local_start[Y], ThisTask->domain.local_end[Y],
+	      error);
+  }
+  
+  MPI_Type_free(&column);
+
+  return;
+}
+
+void WriteSolutionParallel(MyData **const phi, Task *const ThisTask)
+{
+  /* 1. Every process sets up the subarray                     */
+  /*    (i.e. its own domain whithout ghost cells)             */
+  /*    We will use this as arguments to MPI_File_write(),     */
+  /*    which allows the program to write the file in parallel */
+  MPI_Datatype subDomain;
+  /* (local) sub-domain size */
+  const int lsize[NDIM]  = {ThisTask->domain.dim[X], ThisTask->domain.dim[Y]};
+  /* entire domain (i.e. with ghost cells) */
+  const int domain[NDIM] = {lsize[X] + 2 * NGHOST, lsize[Y] + 2 * NGHOST};
+  /* (local) starting coordinates of the subDomain in each dimension */
+  const int lstart[NDIM] = {ThisTask->domain.local_start[X], ThisTask->domain.local_start[Y]};
+  MPI_Type_create_subarray(NDIM, domain, lsize, lstart, MPI_ORDER_C, MPI_MyDatatype, &subDomain);
+  MPI_Type_commit(&subDomain);
+
+  /* 2. The file view must be set by creating a second subarray datatype, */
+  /*    defining the process' view on the file.                           */  
+  MPI_Datatype ProcessDomain;
+  /* (global) domain size */
+  const int gsize[NDIM] = {NX_GLOB, NY_GLOB};
+  /* (global) starting coordinates of the subDomain in each dimension */
+  const int gstart[NDIM] = {ThisTask->domain.global_start[X] - 1, ThisTask->domain.global_start[Y] - 1};
+  MPI_Type_create_subarray(NDIM, gsize, lsize, gstart, MPI_ORDER_C, MPI_MyDatatype, &ProcessDomain);
+  MPI_Type_commit(&ProcessDomain);
+
+  /* 3.a Open file for writing */
+  static int nfile = 0;
+  char fname[128];
+  sprintf(fname,"jacobi_2D_mpi_comp_comm_io_%02d.bin", nfile);
+  /* delete the file if already exists */
+  /* MPI_File_delete(fname, MPI_INFO_NULL); */
+  MPI_File fh;
+  MPI_File_open(ThisTask->comm2d, fname,
+		MPI_MODE_CREATE | MPI_MODE_WRONLY,
+		MPI_INFO_NULL, &fh);
+
+  
+  /* 3.b Set the process’s view of the data in the file */
+  MPI_File_set_view(fh, 0, MPI_MyDatatype, ProcessDomain, "native", MPI_INFO_NULL);
+
+  /* 3.c Writes a file starting at the locations specified by individual file pointers */
+  /*      (blocking collective)                                                        */
+  MPI_File_write_all(fh, phi[0], 1, subDomain, MPI_STATUS_IGNORE);
+
+  /* 3.d Close a file (collective). */
+  MPI_File_close(&fh);
+  
+  /* free the MPI_Datatype */
+  MPI_Type_free(&subDomain);
+  MPI_Type_free(&ProcessDomain);
+  
+  return;
+}
diff --git a/jacobi/mpi/comp_comm_io/src/tools.c b/jacobi/mpi/comp_comm_io/src/tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..0e282ee7953ced36d2d8d3060bad943ccf86f947
--- /dev/null
+++ b/jacobi/mpi/comp_comm_io/src/tools.c
@@ -0,0 +1,59 @@
+/* ///////////////////////////////////////////////////////////////////// */
+/* Authors:  A. Mignone (mignone@to.infn.it)                             */
+/*           V. Cesare  (valentina.cesare@inaf.it)                       */
+/*           D. Goz     (david.goz@inaf.it)                              */
+/*                                                                       */
+/* Date   : June 2024                                                    */
+/*                                                                       */
+/* ///////////////////////////////////////////////////////////////////// */
+
+#include "tools.h"
+
+/* ********************************************************************* */
+MyData **Allocate_2DdblArray(const int nx, const int ny)
+/*
+ * Allocate contiguous memory for a MyData array with
+ * nx rows and ny columns
+ *********************************************************************** */
+{
+  MyData **buf = calloc(nx, sizeof(MyData *));
+  assert(buf != NULL);
+
+ buf[0] = (MyData *) calloc(nx * ny,  sizeof(MyData));
+  assert(buf[0] != NULL);
+  
+  for (int j=1 ; j<nx ; j++)
+    buf[j] = buf[j-1] + ny;
+    
+  return buf;
+}
+
+/* ********************************************************************* */
+void Show_2DdblArray(      MyData **const A,
+		     const int            nx,
+		     const int            ny,
+		     const char *const    string)
+/* *********************************************************************** */
+{
+  printf ("%s\n",string);
+  printf ("------------------------------\n");
+  for (int i=0 ; i<nx ; i++)
+    {
+      for (int j=0 ; j<ny ; j++)
+	{
+	  printf ("%8.2f  ", A[i][j]);
+	}
+      printf ("\n");
+    }
+  printf ("------------------------------\n");
+}
+/* ********************************************************************* */
+
+double seconds()
+{
+  struct timeval tmp;
+  gettimeofday(&tmp, (struct timezone *)0);
+  double sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
+
+  return sec;
+}
diff --git a/jacobi/mpi/miscellaneous/cartesian b/jacobi/mpi/miscellaneous/cartesian
new file mode 100755
index 0000000000000000000000000000000000000000..cef6cb6704f8709644945aeb816d610ad2814635
Binary files /dev/null and b/jacobi/mpi/miscellaneous/cartesian differ
diff --git a/jacobi/mpi/miscellaneous/cartesian.cpp b/jacobi/mpi/miscellaneous/cartesian.c
similarity index 64%
rename from jacobi/mpi/miscellaneous/cartesian.cpp
rename to jacobi/mpi/miscellaneous/cartesian.c
index 7b5a7e505a9d63ade4f4e8e35f48cb895e8e1660..8778f5828c609db41ef8aef95f8bcd484fdceb25 100644
--- a/jacobi/mpi/miscellaneous/cartesian.cpp
+++ b/jacobi/mpi/miscellaneous/cartesian.c
@@ -1,5 +1,6 @@
 #include <mpi.h>
-#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
 
 #define SIZE 2
 #define X    0
@@ -7,8 +8,6 @@
 
 int main(int argc, char **argv)
 {
-  using namespace std;
-  
   int task, ntasks;
   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
@@ -18,7 +17,7 @@ int main(int argc, char **argv)
     {
       if (!task)
 	{
-	  cout << "\n\t Usage: <executable> <number processes along X> \n" << endl;
+	  printf("\n\t Usage: <executable> <number processes along X> \n");
 	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 	  exit(EXIT_FAILURE);
 	}
@@ -30,15 +29,16 @@ int main(int argc, char **argv)
     {
       if (!task)
 	{
-	  cout << "\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n" << endl;
+	  printf("\n\t ntasks % cartesian_grid_x != 0 ... aborting ...\n");
+	  fflush(stdout);
 	  MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 	  exit(EXIT_FAILURE);
 	}
     }
   
-  static int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y};
-  static int periods[SIZE] = {0, 0};
-  static int reorder = 0;
+  const int dims[SIZE] = {cartesian_grid_x, cartesian_grid_y};
+  const int periods[SIZE] = {0, 0};
+  const int reorder = 0;
   MPI_Comm comm2d;
   MPI_Cart_create(MPI_COMM_WORLD, SIZE, dims, periods, reorder, &comm2d);
 
@@ -56,11 +56,11 @@ int main(int argc, char **argv)
       MPI_Barrier(MPI_COMM_WORLD);
       if (rank == task)
 	{
-	  cout << "\n\t Task: " << task << endl;
-	  cout << "\t\t coords[" << coords[X] << "," << coords[Y] << "]" << endl;
-	  cout << "\t\t nbrright: " << nbrright << " - nbrleft  : " << nbrleft << endl;
-	  cout << "\t\t nbrtop  : " << nbrtop   << " - nbrbottom: " << nbrbottom << endl;
-	  cout << endl;
+	  printf("\n\t Task: %d\n", task);
+	  printf("\t\t coords[%d, %d]", coords[X], coords[Y]);
+	  printf("\n\t\t nbrright: %d - nbrleft  : %d", nbrright, nbrleft);
+	  printf("\n\t\t nbrtop  : %d - nbrbottom: %d \n\n", nbrtop, nbrbottom);
+	  fflush(stdout);
 	}
     }
 
diff --git a/jacobi/mpi/miscellaneous/subarray b/jacobi/mpi/miscellaneous/subarray
new file mode 100755
index 0000000000000000000000000000000000000000..5295b41860fe0dfe7b99d8b60a0c49a893e90303
Binary files /dev/null and b/jacobi/mpi/miscellaneous/subarray differ
diff --git a/jacobi/mpi/miscellaneous/subarray.c b/jacobi/mpi/miscellaneous/subarray.c
new file mode 100644
index 0000000000000000000000000000000000000000..254b3d43bb7c4cd9af9a675e29450f12f8eb185b
--- /dev/null
+++ b/jacobi/mpi/miscellaneous/subarray.c
@@ -0,0 +1,114 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include <assert.h>
+
+void printarr(int **data, int n, char *str);
+int **allocarray(const int n);
+void deallocarray(int **);
+
+int main(int argc, char **argv)
+{
+  /* array sizes */
+  const int bigsize = 10;
+  const int subsize = 5;
+
+  /* communications parameters */
+  const int sender   = 0;
+  const int receiver = 1;
+  const int ourtag   = 2;
+
+  int rank, size;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+  if (size < (receiver + 1))
+    {
+      if (rank == 0)
+	fprintf(stderr,"\nt %s: Needs at least %d  processors.\n", argv[0], receiver+1);
+
+      MPI_Finalize();
+      return 1;
+    }
+
+  if (rank == sender)
+    {
+      int **bigarray = allocarray(bigsize);
+      for (int i=0 ; i<bigsize ; i++)
+	for (int j=0 ; j<bigsize ; j++)
+	  bigarray[i][j] = (i * bigsize + j);
+
+      printarr(bigarray, bigsize, "\n Sender: Big array \n");
+
+      MPI_Datatype mysubarray;
+      int starts[2]   = {5,3};
+      int subsizes[2] = {subsize,subsize};
+      int bigsizes[2] = {bigsize, bigsize};
+      MPI_Type_create_subarray(2, bigsizes, subsizes, starts,
+			       MPI_ORDER_C, MPI_INT, &mysubarray);
+      MPI_Type_commit(&mysubarray);
+
+      MPI_Send(&(bigarray[0][0]), 1, mysubarray, receiver, ourtag, MPI_COMM_WORLD);
+      MPI_Type_free(&mysubarray);
+
+      deallocarray(bigarray);
+    }
+  else if (rank == receiver)
+    {
+      int **subarray = allocarray(subsize);
+
+      for (int i=0; i<subsize; i++)
+	for (int j=0; j<subsize; j++)
+	  subarray[i][j] = 0;
+
+      MPI_Recv(&(subarray[0][0]), subsize * subsize, MPI_INT, sender, ourtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+      printarr(subarray, subsize, " Receiver: Subarray -- after receive");
+
+      deallocarray(subarray);
+    }
+
+  MPI_Finalize();
+  return 0;
+}
+
+void printarr(int **data,
+	      int   n,
+	      char *str)
+{
+  printf("-- %s --\n", str);
+  for (int i=0; i<n; i++)
+    {
+      for (int j=0; j<n; j++)
+	{
+	  printf("%3d ", data[i][j]);
+	}
+      printf("\n");
+    }
+}
+
+int **allocarray(const int n)
+{
+  int *data = malloc(n * n * sizeof(int));
+  assert(data != NULL);
+
+  int **arr = malloc(n * sizeof(int *));
+  assert(arr != NULL);
+  
+  for (int i=0 ; i<n ; i++)
+    arr[i] = &(data[i * n]);
+
+  return arr;
+}
+
+void deallocarray(int **array)
+{
+  if (array[0])
+    free(array[0]);
+
+  if (array)
+    free(array);
+
+  return;
+}