diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2e8242e27d8d8d75520fead5f939d5c103031c1e..18649ede698c6695a8dfe7342fe74da469a99d94 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -200,6 +200,11 @@ sanity_stage:
       - chmod +x test_inclusion_outputs
       - valgrind --leak-check=full --log-file=valgrind_inclusion.log ./test_inclusion_outputs
       - grep "0 errors from 0 contexts" valgrind_inclusion.log
+      - rm -rf c_OINCLU
+      - chmod +x test_sphere_outputs
+      - valgrind --leak-check=full --log-file=valgrind_sphere.log ./test_sphere_outputs
+      - grep "0 errors from 0 contexts" valgrind_sphere.log
+      - rm -rf c_OSPH
       
 running_stage:
    stage: run
@@ -230,7 +235,7 @@ running_stage:
       - cd ../sphere
       - echo "Running np_sphere"
       - chmod +x np_sphere
-      - ./np_sphere
+      - OMP_NUM_THREADS=1 ./np_sphere
       - cd ../cluster
       - echo "Running np_cluster"
       - chmod +x np_cluster
@@ -308,4 +313,9 @@ testing_stage:
       - export FFILE=../../test_data/inclusion/OINCLU
       - python3 ../../src/scripts/pycompare.py --no-progress --ffile $FFILE --cfile c_OINCLU
       - rm -rf c_OINCLU
+      - chmod u+x test_sphere_outputs
+      - ./test_sphere_outputs
+      - export FFILE=../../test_data/sphere/OSPH
+      - python3 ../../src/scripts/pycompare.py --no-progress --ffile $FFILE --cfile c_OSPH
+      - rm -rf c_OSPH
       
\ No newline at end of file
diff --git a/README.md b/README.md
index 88a2a929a0d03e0c122d92e7f7b8dbdcc4ce8f06..c92092892c2af54f6c7176a50a137d0044f60883 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Distributing the code and its sources is possible under the terms of the GNU GPL
 - Saija et al. 2001, ApJ, 559, 993, DOI:10.1086/322350
 - Borghese, Denti, Saija 2007, Scattering from Model Nonspherical Particles (ISBN 978-3-540-37413-8), DOI:10.1007/978-3-540-37414-5
 
-*NOTE:* The building process requires a working installation of a C++ and a FORTRAN compiler. Many solutions are available, but the recommended option is the *GNU Compiler Collection* `gcc` with the addition of `g++` and `gfortran`. The parallel code implementation further requires the use of parallel compilers complying with the MPI standard (*OpenMPI*, *MPICH*).
+*NOTE:* The building process requires a working installation of a C++ and a FORTRAN compiler. Many solutions are available, but the recommended option is the *GNU Compiler Collection* `gcc` with the addition of `g++` and `gfortran`. Single-workstation multi-threaded parallelism is supported through _OpenMP_, while the multi-node code implementation further requires the use of parallel compilers complying with the _MPI_ standard (_OpenMPI_, _MPICH_).
 
 # Acknowledgments
 
diff --git a/build/Makefile b/build/Makefile
index cba30075e68395cbbb87ea589cb98b5d691a00da..5ae144d03206c40cfbf01114f487808653b49d8d 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -29,8 +29,8 @@ NP_SPHERE_BINS=sphere/np_sphere
 NP_TRAPPING_SRCS=../src/trapping/np_trapping.cpp ../src/trapping/cfrfme.cpp ../src/trapping/clffft.cpp
 NP_TRAPPING_OBJS=../src/trapping/np_trapping.o ../src/trapping/cfrfme.o ../src/trapping/clffft.o
 NP_TRAPPING_BINS=trapping/np_trapping
-NP_TESTING_OBJS=../src/testing/test_cluster_outputs.o ../src/testing/test_inclusion_outputs.o ../src/testing/test_ParticleDescriptor.o ../src/testing/test_TEDF.o ../src/testing/test_TTMS.o
-NP_TESTING_BINS=testing/test_cluster_outputs testing/test_inclusion_outputs testing/test_ParticleDescriptor testing/test_TEDF testing/test_TTMS
+NP_TESTING_OBJS=../src/testing/test_cluster_outputs.o ../src/testing/test_inclusion_outputs.o ../src/testing/test_sphere_outputs.o ../src/testing/test_ParticleDescriptor.o ../src/testing/test_TEDF.o ../src/testing/test_TTMS.o
+NP_TESTING_BINS=testing/test_cluster_outputs testing/test_inclusion_outputs testing/test_sphere_outputs testing/test_ParticleDescriptor testing/test_TEDF testing/test_TTMS
 
 all: $(NPTM_LIB) $(FORTRAN_BINS) $(NP_CLUSTER_BINS) $(NP_INCLUSION_BINS) $(NP_SPHERE_BINS) $(NP_TRAPPING_BINS) $(NP_TESTING_BINS)
 
@@ -89,6 +89,9 @@ testing/test_cluster_outputs: $(NPTM_LIB) ../src/testing/test_cluster_outputs.o
 testing/test_inclusion_outputs: $(NPTM_LIB) ../src/testing/test_inclusion_outputs.o
 	$(CXX) $(CXXFLAGS) ../src/testing/test_inclusion_outputs.o -o $@ $(CXXLDFLAGS)
 
+testing/test_sphere_outputs: $(NPTM_LIB) ../src/testing/test_sphere_outputs.o
+	$(CXX) $(CXXFLAGS) ../src/testing/test_sphere_outputs.o -o $@ $(CXXLDFLAGS)
+
 testing/test_TEDF: $(NPTM_LIB) ../src/testing/test_TEDF.o
 	$(CXX) $(CXXFLAGS) ../src/testing/test_TEDF.o -o $@ $(CXXLDFLAGS)
 
diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index 7d2533eac1e1f313fcd14b29e8805c0aeb229534..3b50640f5c4ab253c5a40e8b8b0d1217cdaf4f32 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -95,9 +95,11 @@
 #include "../include/outputs.h"
 #endif
 
-using namespace std;
+#ifndef INCLUDE_ITERATION_DATA_H_
+#include "../include/IterationData.h"
+#endif
 
-// I would like to put it all in a struct, but then I'd have to write a constructor for it, due to members defined as references, creating a worse nightmare than the one I'd like to simplify...
+using namespace std;
 
 /*! \brief Main calculation loop.
  *
@@ -226,7 +228,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
     int nsph = gconf->number_of_spheres;
     // Sanity check on number of sphere consistency, should always be verified
     if (s_nsph == nsph) {
-      // Shortcuts to variables stored in configuration objects
       ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
       double wp = sconf->wp;
       // ClusterOutputInfo : Thread 0 of MPI process 0 allocates the memory to
@@ -295,8 +296,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
 #pragma omp single
 	{
 	  jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
-	}
-      }
+	} // OMP sinlge
+      } // OMP parallel
 #ifdef USE_NVTX
       nvtxRangePop();
 #endif
@@ -542,7 +543,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
     time_logger->log(message);
     fclose(timing_file);
     delete time_logger;
-  } // end instructions block of MPI process 0
+  } // end of instruction block for MPI process 0
   
     //===============================
     // instruction block for MPI processes different from 0
@@ -656,7 +657,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
 	}
       } // ixi488: close strided loop running on MPI processes
       
-	// Clean memory
+      // Clean memory
 #pragma omp barrier
       if (myompthread == 0) {
 	delete[] p_outarray;
@@ -736,6 +737,8 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
   if (jer != 0) {
     output->vec_ier[jindex - 1] = 1;
     output->vec_jxi[jindex - 1] = -jxi488;
+    logger->log("Error in HJV for scale " + to_string(jxi488) + "!", LOG_ERRO);
+    delete logger;
     return jer;
     // break; // rewrite this to go to the end of the function, to free locally allocated variables and return jer
   }
@@ -1531,3 +1534,536 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
 
   return jer;
 }
+
+// >>> IMPLEMENTATION OF ClusterIterationData CLASS <<<
+ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) {
+  c1 = new ParticleDescriptorCluster(gconf, sconf);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  for (int gi = 0; gi < 3; gi++) {
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  argi = new double[1]();
+  args = new double[1]();
+  duk = new double[3]();
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+  }
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[2];
+      for (int zk = 0; zk < 2; zk++) {
+	zpv[zi][zj][zk] = new double[2]();
+      }
+    }
+  }
+  am_vector = new dcomplex[ndit * ndit]();
+  am = new dcomplex*[ndit];
+  for (int ai = 0; ai < ndit; ai++) {
+    am[ai] = (am_vector + ai * ndit);
+  }
+  
+  arg = 0.0 + 0.0 * I;
+  // These are suspect initializations
+  scan = 0.0;
+  cfmp = 0.0;
+  sfmp = 0.0;
+  cfsp = 0.0;
+  sfsp = 0.0;
+  // End of suspect initializations
+  wn = sconf->wp / 3.0e8;
+  xip = sconf->xip;
+  sqsfi = 1.0;
+  vk = 0.0;
+  number_of_scales = sconf->number_of_scales;
+  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
+
+#ifdef USE_MAGMA
+  proc_device = mpidata->rank % device_count;
+#else
+  proc_device = 0;
+#endif
+
+  // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
+  refinemode = 2;
+  // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
+  maxrefiters = 20;
+  accuracygoal = 1e-6;
+}
+
+ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
+  c1 = new ParticleDescriptorCluster(reinterpret_cast<ParticleDescriptorCluster &>(*(rhs.c1)));
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi];
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  for (int ti = 0; ti < 3; ti++) {
+    tqev[ti] = rhs.tqev[ti];
+    tqsv[ti] = rhs.tqsv[ti];
+  }
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    for (int tj = 0; tj < c1->nsph; tj++) {
+      tqse[ti][tj] = rhs.tqse[ti][tj];
+      tqspe[ti][tj] = rhs.tqspe[ti][tj];
+      tqss[ti][tj] = rhs.tqss[ti][tj];
+      tqsps[ti][tj] = rhs.tqsps[ti][tj];
+    }
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+    for (int tj = 0; tj < 3; tj++) {
+      tqce[ti][tj] = rhs.tqce[ti][tj];
+      tqcpe[ti][tj] = rhs.tqcpe[ti][tj];
+      tqcs[ti][tj] = rhs.tqcs[ti][tj];
+      tqcps[ti][tj] = rhs.tqcps[ti][tj];
+    }
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  for (int gi = 0; gi < 3; gi++) {
+    gapv[gi] = rhs.gapv[gi];
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+    for (int gj = 0; gj < 2; gj++) {
+      gapp[gi][gj] = rhs.gapp[gi][gj];
+      gappm[gi][gj] = rhs.gappm[gi][gj];
+      gap[gi][gj] = rhs.gap[gi][gj];
+      gapm[gi][gj] = rhs.gapm[gi][gj];
+    }
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  duk = new double[3]();
+  for (int ui = 0; ui < 3; ui++) {
+    u[ui] = rhs.u[ui];
+    us[ui] = rhs.us[ui];
+    un[ui] = rhs.un[ui];
+    uns[ui] = rhs.uns[ui];
+    up[ui] = rhs.up[ui];
+    ups[ui] = rhs.ups[ui];
+    unmp[ui] = rhs.unmp[ui];
+    unsmp[ui] = rhs.unsmp[ui];
+    upmp[ui] = rhs.upmp[ui];
+    upsmp[ui] = rhs.upsmp[ui];
+    duk[ui] = rhs.duk[ui];
+  }
+  argi = new double[1]();
+  args = new double[1]();
+  argi[0] = rhs.argi[0];
+  args[0] = rhs.args[0];
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+    for (int cj = 0; cj < 4; cj++) {
+      cextlr[ci][cj] = rhs.cextlr[ci][cj];
+      cext[ci][cj] = rhs.cext[ci][cj];
+      cmullr[ci][cj] = rhs.cmullr[ci][cj];
+      cmul[ci][cj] = rhs.cmul[ci][cj];
+    }
+  }
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[2];
+      for (int zk = 0; zk < 2; zk++) {
+	zpv[zi][zj][zk] = new double[2]();
+	zpv[zi][zj][zk][0] = rhs.zpv[zi][zj][zk][0];
+	zpv[zi][zj][zk][1] = rhs.zpv[zi][zj][zk][1];
+      }
+    }
+  }
+  am_vector = new dcomplex[ndit * ndit]();
+  for (np_int ai = 0; ai < ndit * ndit; ai++) am_vector[ai] = rhs.am_vector[ai];
+  am = new dcomplex*[ndit];
+  for (np_int ai = 0; ai < ndit; ai++) {
+    am[ai] = (am_vector + ai * ndit);
+  }
+  
+  arg = rhs.arg;
+  // These are suspect initializations
+  scan = rhs.scan;
+  cfmp = rhs.cfmp;
+  sfmp = rhs.sfmp;
+  cfsp = rhs.cfsp;
+  sfsp = rhs.sfsp;
+  // End of suspect initializations
+  wn = rhs.wn;
+  xip = rhs.xip;
+  sqsfi = rhs.sqsfi;
+  vk = rhs.vk;
+  firstxi = rhs.firstxi;
+  lastxi = rhs.lastxi;
+  xiblock = rhs.xiblock;
+  number_of_scales = rhs.number_of_scales;
+
+  proc_device = rhs.proc_device;
+  refinemode = rhs.refinemode;
+  maxrefiters = rhs.maxrefiters;
+  accuracygoal = rhs.accuracygoal;
+}
+
+#ifdef MPI_VERSION
+ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) {
+  c1 = new ParticleDescriptorCluster(mpidata);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  duk = new double[3]();
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  argi = new double[1]();
+  args = new double[1]();
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[2];
+      for (int zk = 0; zk < 2; zk++) {
+	zpv[zi][zj][zk] = new double[2]();
+	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+      }
+    }
+  }
+  am_vector = new dcomplex[ndit * ndit]();
+  am = new dcomplex*[ndit];
+  for (np_int ai = 0; ai < ndit; ai++) {
+    am[ai] = (am_vector + ai * ndit);
+    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > number_of_scales) lastxi = number_of_scales;
+
+#ifdef USE_MAGMA
+  proc_device = mpidata->rank % device_count;
+#else
+  proc_device = 0;
+#endif
+  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+void ClusterIterationData::mpibcast(const mixMPI *mpidata) {
+  c1->mpibcast(mpidata);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ti = 0; ti < 2; ti++) {
+    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ci = 0; ci < 4; ci++) {
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  for (int zi = 0; zi < c1->lm; zi++) {
+    for (int zj = 0; zj < 3; zj++) {
+      for (int zk = 0; zk < 2; zk++) {
+	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+      }
+    }
+  }
+  // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time
+  for (int ai = 0; ai < ndit; ai++) {
+    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
+
+ClusterIterationData::~ClusterIterationData() {
+  const int nsph = c1->nsph;
+  delete[] am_vector;
+  delete[] am;
+  for (int zi = c1->lm - 1; zi > -1; zi--) {
+    for (int zj = 2; zj > -1; zj--) {
+      delete[] zpv[zi][zj][1];
+      delete[] zpv[zi][zj][0];
+      delete[] zpv[zi][zj];
+    }
+    delete[] zpv[zi];
+  }
+  delete[] zpv;
+  delete c1;
+  delete[] gaps;
+  for (int ti = 1; ti > -1; ti--) {
+    delete[] tqse[ti];
+    delete[] tqss[ti];
+    delete[] tqspe[ti];
+    delete[] tqsps[ti];
+    delete[] tqce[ti];
+    delete[] tqcpe[ti];
+    delete[] tqcs[ti];
+    delete[] tqcps[ti];
+  }
+  delete[] tqse;
+  delete[] tqss;
+  delete[] tqspe;
+  delete[] tqsps;
+  delete[] tqce;
+  delete[] tqcpe;
+  delete[] tqcs;
+  delete[] tqcps;
+  delete[] tqev;
+  delete[] tqsv;
+  for (int gi = 2; gi > -1; gi--) {
+    delete[] gapp[gi];
+    delete[] gappm[gi];
+    delete[] gap[gi];
+    delete[] gapm[gi];
+  }
+  delete[] gapp;
+  delete[] gappm;
+  delete[] gap;
+  delete[] gapm;
+  delete[] gapv;
+  delete[] u;
+  delete[] us;
+  delete[] un;
+  delete[] uns;
+  delete[] up;
+  delete[] ups;
+  delete[] unmp;
+  delete[] unsmp;
+  delete[] upmp;
+  delete[] upsmp;
+  delete[] argi;
+  delete[] args;
+  delete[] duk;
+  for (int ci = 3; ci > -1; ci--) {
+    delete[] cextlr[ci];
+    delete[] cext[ci];
+    delete[] cmullr[ci];
+    delete[] cmul[ci];
+  }
+  delete[] cextlr;
+  delete[] cext;
+  delete[] cmullr;
+  delete[] cmul;
+}
+// >>> END OF ClusterIterationData CLASS IMPLEMENTATION <<<
diff --git a/src/cluster/np_cluster.cpp b/src/cluster/np_cluster.cpp
index b34aca368fc9f0ebbc26dc19fcfc6a0542a2a84b..7beb785cf6003945889b2d9263bbf2fa23934942 100644
--- a/src/cluster/np_cluster.cpp
+++ b/src/cluster/np_cluster.cpp
@@ -70,26 +70,27 @@ extern void cluster(const string& config_file, const string& data_file, const st
  * \return result: `int` An exit code passed to the OS (0 for succesful execution).
  */
 int main(int argc, char **argv) {
+  int ierr = 0;
 #ifdef MPI_VERSION
-	int ierr = MPI_Init(&argc, &argv);
-	// create and initialise class with essential MPI data
-	mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD);
+  ierr = MPI_Init(&argc, &argv);
+  // create and initialise class with essential MPI data
+  mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD);
 #else
-	// create a the class with dummy data if we are not using MPI at all
-	mixMPI *mpidata = new mixMPI();
+  // create a the class with dummy data if we are not using MPI at all
+  mixMPI *mpidata = new mixMPI();
 #endif
-  	string config_file = "../../test_data/cluster/DEDFB";
-	string data_file = "../../test_data/cluster/DCLU";
-	string output_path = ".";
-	if (argc == 4) {
-		config_file = string(argv[1]);
-		data_file = string(argv[2]);
-		output_path = string(argv[3]);
-	}
-	cluster(config_file, data_file, output_path, mpidata);
+  string config_file = "../../test_data/cluster/DEDFB";
+  string data_file = "../../test_data/cluster/DCLU";
+  string output_path = ".";
+  if (argc == 4) {
+    config_file = string(argv[1]);
+    data_file = string(argv[2]);
+    output_path = string(argv[3]);
+  }
+  cluster(config_file, data_file, output_path, mpidata);
 #ifdef MPI_VERSION
-	MPI_Finalize();
+  MPI_Finalize();
 #endif
-	delete mpidata;
-	return 0;
+  delete mpidata;
+  return ierr;
 }
diff --git a/src/include/Commons.h b/src/include/Commons.h
index 2012d7f86ce35a1d3c770e38eb4ac16087b9594a..cce0a6537d1f4854619b10d1b01fd99ea867e071 100644
--- a/src/include/Commons.h
+++ b/src/include/Commons.h
@@ -65,162 +65,6 @@ public:
   ~mixMPI();
 };
 
-/*! \brief A data structure representing the information used for a single scale
- * of the CLUSTER case.
- */
-class ClusterIterationData {
-public:
-  //! \brief Pointer to a ParticleDescriptor structure.
-  ParticleDescriptor *c1;
-  //! \brief Vector of geometric asymmetry factors.
-  double *gaps;
-  //! \brief Components of extinction contribution to radiation torque on a single sphere along k.
-  double **tqse;
-  //! \brief Components of polarized extinction contribution to radiation torque on a single sphere along k.
-  dcomplex **tqspe;
-  //! \brief Components of scattering contribution to radiation torque on a single sphere along k.
-  double **tqss;
-  //! \brief Components of polarized scattering contribution to radiation torque on a single sphere along k.
-  dcomplex **tqsps;
-  //! \brief L-dependent coefficients of the geometric asymmetry parameter.
-  double ****zpv;
-  //! \brief Mean geometric asymmetry parameters.
-  double **gapm;
-  //! \brief Mean geometric asymmetry parameters referred to polarization plane.
-  dcomplex **gappm;
-  //! \brief Imaginary part of the harmonic functions argument.
-  double *argi;
-  //! \brief Argument of the harmonic functions referred to the scattering plane.
-  double *args;
-  //! \brief Geometric asymmetry parameters.
-  double **gap;
-  //! \brief Geometric asymmetry parameters referred to polarization plane.
-  dcomplex **gapp;
-  //! \brief Components of extinction contribution to radiation torque on the cluster along k.
-  double **tqce;
-  //! \brief Components of extinction contribution to radiation torque on the cluster along k referred to polarization plane.
-  dcomplex **tqcpe;
-  //! \brief Components of scattering contribution to radiation torque on the cluster along k.
-  double **tqcs;
-  //! \brief Components of scattering contribution to radiation torque on the cluster along k referred to polarization plane.
-  dcomplex **tqcps;
-  //! \brief Variation of unitary radiation vector. QUESTION: correct?
-  double *duk;
-  //! \brief Cluster extinction cross-section components referred to scattering plane.
-  double **cextlr;
-  //! \brief Cluster extinction cross-section components referred to meridional plane.
-  double **cext;
-  //! \brief Cluster Mueller Transformation Matrix components referred to scattering plane.
-  double **cmullr;
-  //! \brief Cluster Mueller Transformation Matrix components referred to meridional plane.
-  double **cmul;
-  //! \brief Geometric asymmetry parameter components.
-  double *gapv;
-  //! \brief Radiation extinction torque components.
-  double *tqev;
-  //! \brief Radiation scattering torque components.
-  double *tqsv;
-  //! \brief Incident unitary vector components.
-  double *u;
-  //! \brief Scattered unitary vector components.
-  double *us;
-  //! \brief Normal unitary vector components.
-  double *un;
-  //! \brief Normal scattered unitary vector components.
-  double *uns;
-  //! \brief Incident unitary vector components on polarization plane.
-  double *up;
-  //! \brief Scattered unitary vector components on polarization plane.
-  double *ups;
-  //! \brief Mean unitary vector components normal to polarization plane.
-  double *unmp;
-  //! \brief Mean scattered unitary vector components normal to polarization plane.
-  double *unsmp;
-  //! \brief Mean incident unitary vector components on polarization plane.
-  double *upmp;
-  //! \brief Mean scattered unitary vector components on polarization plane.
-  double *upsmp;
-  //! \brief Scattering angle.
-  double scan;
-  //! \brief Control parameter on incidence direction referred to meridional plane.
-  double cfmp;
-  //! \brief Control parameter on scattering direction referred to meridional plane.
-  double sfmp;
-  //! \brief Control parameter on incidence direction referred to scattering plane.
-  double cfsp;
-  //! \brief Control parameter on scattering direction referred to scattering plane.
-  double sfsp;
-  //! \brief SQSFI = XI^-2
-  double sqsfi;
-  //! \brief Vectorized scattering coefficient matrix.
-  dcomplex *am_vector;
-  //! \brief Scattering coefficient matrix.
-  dcomplex **am;
-  //! \brief Argument of harmonic functions. QUESTION: correct?
-  dcomplex arg;
-  //! \brief Vacuum magnitude of wave vector.
-  double vk;
-  //! \brief Wave number.
-  double wn;
-  //! \brief Normalization scale. QUESTION: correct?
-  double xip;
-  //! \brief Number of scales (wavelengths) to be computed.
-  int number_of_scales;
-  //! \brief Size of the block of scales handled by the current process.
-  int xiblock;
-  //! \brief Index of the first scale handled by the current process.
-  int firstxi;
-  //! \brief Index of the last scale handled by the current process.
-  int lastxi;
-  //! \brief ID of the GPU used by one MPI process.
-  int proc_device;
-  //! \brief Refinement mode selction flag.
-  int refinemode;
-  //! \brief Maximum number of refinement iterations.
-  int maxrefiters;
-  //! \brief Required accuracy level.
-  double accuracygoal;
-
-  /*! \brief `ClusterIterationData` default instance constructor.
-   *
-   * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
-   * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
-   * \param mpidata: `mixMPI *` Pointer to a `mixMPI` object.
-   * \param device_count: `const int` Number of offload devices available on the system.
-   */
-  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
-  
-  /*! \brief `ClusterIterationData` copy constructor.
-   *
-   * \param rhs: `const ClusterIterationData &` Reference to the `ClusterIterationData` object to be copied.
-   */
-  ClusterIterationData(const ClusterIterationData& rhs);
-
-#ifdef MPI_VERSION
-  /*! \brief `ClusterIterationData` MPI constructor.
-   *
-   * \param mpidata: `const mixMPI *` Pointer to a `mixMPI` instance.
-   * \param device_count: `const int` Number of offload devices available on the system.
-   */
-  ClusterIterationData(const mixMPI *mpidata, const int device_count);
-
-  /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others.
-   *
-   * When using MPI, the initial ClusterIterationData instance created by MPI process 0
-   * needs to be replicated on all other processes. This function sends it using
-   * MPI broadcast calls. The MPI broadcast calls in this function must match those
-   * in the constructor using the mixMPI pointer.
-   *
-   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
-   */
-  void mpibcast(const mixMPI *mpidata);
-#endif // MPI_VERSION
-
-  /*! \brief `ClusterIterationData` instance destroyer.
-   */
-  ~ClusterIterationData();
-};
-
 /*! \brief Basic data structure describing the particle model and its interaction with fields.
  *
  * This class forms a base of the data structure collections that are used by the
@@ -384,6 +228,8 @@ public:
   dcomplex *vkt;
   //! \brief Vector of sizes in units of 2*PI/LAMBDA
   double *vsz;
+  //! \brief Total geometric cross-section.
+  double gcs;
   // >>> END OF SECTION COMMON TO ALL DESCRIPTOR TYPES <<< //
   
   // >>> NEEDED BY SPHERE AND CLUSTER <<< //
@@ -461,8 +307,6 @@ public:
   const int& ndit = _ndit;
   //! \brief Read-only view of NDM.
   const int& ndm = _ndm;
-  //! \brief Total geometric cross-section.
-  double gcs;
 
   //! \brief TBD
   dcomplex *vh;
diff --git a/src/include/IterationData.h b/src/include/IterationData.h
new file mode 100644
index 0000000000000000000000000000000000000000..8b8dca9e98afeda9201b92a7b10c1478f747046f
--- /dev/null
+++ b/src/include/IterationData.h
@@ -0,0 +1,493 @@
+/* Copyright (C) 2024   INAF - Osservatorio Astronomico di Cagliari
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   A copy of the GNU General Public License is distributed along with
+   this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>.
+ */
+
+/*! \file IterationData.h
+ *
+ * \brief Multi-process communication data structures.
+ *
+ */
+
+#ifndef INCLUDE_ITERATION_DATA_H_
+#define INCLUDE_ITERATION_DATA_H_
+
+// >>> DEFINITION OF ClusterIterationData CLASS <<<
+/*! \brief A data structure representing the information used for a single scale
+ * of the CLUSTER case.
+ */
+class ClusterIterationData {
+public:
+  //! \brief Pointer to a ParticleDescriptor structure.
+  ParticleDescriptor *c1;
+  //! \brief Vector of geometric asymmetry factors.
+  double *gaps;
+  //! \brief Components of extinction contribution to radiation torque on a single sphere along k.
+  double **tqse;
+  //! \brief Components of polarized extinction contribution to radiation torque on a single sphere along k.
+  dcomplex **tqspe;
+  //! \brief Components of scattering contribution to radiation torque on a single sphere along k.
+  double **tqss;
+  //! \brief Components of polarized scattering contribution to radiation torque on a single sphere along k.
+  dcomplex **tqsps;
+  //! \brief L-dependent coefficients of the geometric asymmetry parameter.
+  double ****zpv;
+  //! \brief Mean geometric asymmetry parameters.
+  double **gapm;
+  //! \brief Mean geometric asymmetry parameters referred to polarization plane.
+  dcomplex **gappm;
+  //! \brief Imaginary part of the harmonic functions argument.
+  double *argi;
+  //! \brief Argument of the harmonic functions referred to the scattering plane.
+  double *args;
+  //! \brief Geometric asymmetry parameters.
+  double **gap;
+  //! \brief Geometric asymmetry parameters referred to polarization plane.
+  dcomplex **gapp;
+  //! \brief Components of extinction contribution to radiation torque on the cluster along k.
+  double **tqce;
+  //! \brief Components of extinction contribution to radiation torque on the cluster along k referred to polarization plane.
+  dcomplex **tqcpe;
+  //! \brief Components of scattering contribution to radiation torque on the cluster along k.
+  double **tqcs;
+  //! \brief Components of scattering contribution to radiation torque on the cluster along k referred to polarization plane.
+  dcomplex **tqcps;
+  //! \brief Variation of unitary radiation vector. QUESTION: correct?
+  double *duk;
+  //! \brief Cluster extinction cross-section components referred to scattering plane.
+  double **cextlr;
+  //! \brief Cluster extinction cross-section components referred to meridional plane.
+  double **cext;
+  //! \brief Cluster Mueller Transformation Matrix components referred to scattering plane.
+  double **cmullr;
+  //! \brief Cluster Mueller Transformation Matrix components referred to meridional plane.
+  double **cmul;
+  //! \brief Geometric asymmetry parameter components.
+  double *gapv;
+  //! \brief Radiation extinction torque components.
+  double *tqev;
+  //! \brief Radiation scattering torque components.
+  double *tqsv;
+  //! \brief Incident unitary vector components.
+  double *u;
+  //! \brief Scattered unitary vector components.
+  double *us;
+  //! \brief Normal unitary vector components.
+  double *un;
+  //! \brief Normal scattered unitary vector components.
+  double *uns;
+  //! \brief Incident unitary vector components on polarization plane.
+  double *up;
+  //! \brief Scattered unitary vector components on polarization plane.
+  double *ups;
+  //! \brief Mean unitary vector components normal to polarization plane.
+  double *unmp;
+  //! \brief Mean scattered unitary vector components normal to polarization plane.
+  double *unsmp;
+  //! \brief Mean incident unitary vector components on polarization plane.
+  double *upmp;
+  //! \brief Mean scattered unitary vector components on polarization plane.
+  double *upsmp;
+  //! \brief Scattering angle.
+  double scan;
+  //! \brief Control parameter on incidence direction referred to meridional plane.
+  double cfmp;
+  //! \brief Control parameter on scattering direction referred to meridional plane.
+  double sfmp;
+  //! \brief Control parameter on incidence direction referred to scattering plane.
+  double cfsp;
+  //! \brief Control parameter on scattering direction referred to scattering plane.
+  double sfsp;
+  //! \brief SQSFI = XI^-2
+  double sqsfi;
+  //! \brief Vectorized scattering coefficient matrix.
+  dcomplex *am_vector;
+  //! \brief Scattering coefficient matrix.
+  dcomplex **am;
+  //! \brief Argument of harmonic functions. QUESTION: correct?
+  dcomplex arg;
+  //! \brief Vacuum magnitude of wave vector.
+  double vk;
+  //! \brief Wave number.
+  double wn;
+  //! \brief Normalization scale. QUESTION: correct?
+  double xip;
+  //! \brief Number of scales (wavelengths) to be computed.
+  int number_of_scales;
+  //! \brief Size of the block of scales handled by the current process.
+  int xiblock;
+  //! \brief Index of the first scale handled by the current process.
+  int firstxi;
+  //! \brief Index of the last scale handled by the current process.
+  int lastxi;
+  //! \brief ID of the GPU used by one MPI process.
+  int proc_device;
+  //! \brief Refinement mode selction flag.
+  int refinemode;
+  //! \brief Maximum number of refinement iterations.
+  int maxrefiters;
+  //! \brief Required accuracy level.
+  double accuracygoal;
+
+  /*! \brief `ClusterIterationData` default instance constructor.
+   *
+   * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
+   * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
+   * \param mpidata: `mixMPI *` Pointer to a `mixMPI` object.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
+  
+  /*! \brief `ClusterIterationData` copy constructor.
+   *
+   * \param rhs: `const ClusterIterationData &` Reference to the `ClusterIterationData` object to be copied.
+   */
+  ClusterIterationData(const ClusterIterationData& rhs);
+
+#ifdef MPI_VERSION
+  /*! \brief `ClusterIterationData` MPI constructor.
+   *
+   * \param mpidata: `const mixMPI *` Pointer to a `mixMPI` instance.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  ClusterIterationData(const mixMPI *mpidata, const int device_count);
+
+  /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial ClusterIterationData instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif // MPI_VERSION
+
+  /*! \brief `ClusterIterationData` instance destroyer.
+   */
+  ~ClusterIterationData();
+};
+// >>> END OF ClusterIterationData CLASS DEFINITION <<<
+
+// >>> DEFINITION OF InclusionIterationData CLASS <<<
+/*! \brief A data structure representing the information used for a single scale
+ * of the INCLUSION case.
+ */
+class InclusionIterationData {
+protected:
+  //! \brief Vectorized geometric asymmetry parameter components.
+  double *vec_zpv;
+  
+public:
+  //! \brief External layer index.
+  int nimd;
+  //! \brief External layer radius.
+  double extr;
+  
+  //! \brief Pointer to a ParticleDescriptor structure.
+  ParticleDescriptor *c1;
+  //! \brief Vector of geometric asymmetry factors.
+  double *gaps;
+    //! \brief Components of extinction contribution to radiation torque on a single sphere along k.
+  double **tqse;
+  //! \brief Components of polarized extinction contribution to radiation torque on a single sphere along k.
+  dcomplex **tqspe;
+  //! \brief Components of scattering contribution to radiation torque on a single sphere along k.
+  double **tqss;
+  //! \brief Components of polarized scattering contribution to radiation torque on a single sphere along k.
+  dcomplex **tqsps;
+  //! \brief L-dependent coefficients of the geometric asymmetry parameter.
+  double ****zpv;
+  //! \brief Mean geometric asymmetry parameters.
+  double **gapm;
+  //! \brief Mean geometric asymmetry parameters referred to polarization plane.
+  dcomplex **gappm;
+  //! \brief Imaginary part of the harmonic functions argument.
+  double *argi;
+  //! \brief Argument of the harmonic functions referred to the scattering plane.
+  double *args;
+  //! \brief Geometric asymmetry parameters.
+  double **gap;
+  //! \brief Geometric asymmetry parameters referred to polarization plane.
+  dcomplex **gapp;
+  //! \brief Components of extinction contribution to radiation torque on the cluster along k.
+  double **tqce;
+  //! \brief Components of extinction contribution to radiation torque on the cluster along k referred to polarization plane.
+  dcomplex **tqcpe;
+  //! \brief Components of scattering contribution to radiation torque on the cluster along k.
+  double **tqcs;
+  //! \brief Components of scattering contribution to radiation torque on the cluster along k referred to polarization plane.
+  dcomplex **tqcps;
+  //! \brief Variation of unitary radiation vector. QUESTION: correct?
+  double *duk;
+  //! \brief Cluster extinction cross-section components referred to scattering plane.
+  double **cextlr;
+  //! \brief Cluster extinction cross-section components referred to meridional plane.
+  double **cext;
+  //! \brief Cluster Mueller Transformation Matrix components referred to scattering plane.
+  double **cmullr;
+  //! \brief Cluster Mueller Transformation Matrix components referred to meridional plane.
+  double **cmul;
+  //! \brief Geometric asymmetry parameter components.
+  double *gapv;
+  //! \brief Radiation extinction torque components.
+  double *tqev;
+  //! \brief Radiation scattering torque components.
+  double *tqsv;
+  //! \brief Incident unitary vector components.
+  double *u;
+  //! \brief Scattered unitary vector components.
+  double *us;
+  //! \brief Normal unitary vector components.
+  double *un;
+  //! \brief Normal scattered unitary vector components.
+  double *uns;
+  //! \brief Incident unitary vector components on polarization plane.
+  double *up;
+  //! \brief Scattered unitary vector components on polarization plane.
+  double *ups;
+  //! \brief Mean unitary vector components normal to polarization plane.
+  double *unmp;
+  //! \brief Mean scattered unitary vector components normal to polarization plane.
+  double *unsmp;
+  //! \brief Mean incident unitary vector components on polarization plane.
+  double *upmp;
+  //! \brief Mean scattered unitary vector components on polarization plane.
+  double *upsmp;
+  //! \brief Scattering angle.
+  double scan;
+  //! \brief Control parameter on incidence direction referred to meridional plane.
+  double cfmp;
+  //! \brief Control parameter on scattering direction referred to meridional plane.
+  double sfmp;
+  //! \brief Control parameter on incidence direction referred to scattering plane.
+  double cfsp;
+  //! \brief Control parameter on scattering direction referred to scattering plane.
+  double sfsp;
+  //! \brief SQSFI = XI^-2
+  double sqsfi;
+  //! \brief Vectorized scattering coefficient matrix.
+  dcomplex *am_vector;
+  //! \brief Scattering coefficient matrix.
+  dcomplex **am;
+  //! \brief Argument of harmonic functions. QUESTION: correct?
+  dcomplex arg;
+  //! \brief Vacuum magnitude of wave vector.
+  double vk;
+  //! \brief Wave number.
+  double wn;
+  //! \brief Normalization scale. QUESTION: correct?
+  double xip;
+  //! \brief Number of scales (wavelengths) to be computed.
+  int number_of_scales;
+  //! \brief Size of the block of scales handled by the current process.
+  int xiblock;
+  //! \brief Index of the first scale handled by the current process.
+  int firstxi;
+  //! \brief Index of the last scale handled by the current process.
+  int lastxi;
+  //! \brief ID of the GPU used by one MPI process.
+  int proc_device;
+  //! \brief Refinement mode selction flag.
+  int refinemode;
+  //! \brief Maximum number of refinement iterations.
+  int maxrefiters;
+  //! \brief Required accuracy level.
+  double accuracygoal;
+
+  /*! \brief `InclusionIterationData` default instance constructor.
+   *
+   * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
+   * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
+   * \param mpidata: `mixMPI *` Pointer to a `mixMPI` object.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
+  
+  /*! \brief `InclusionIterationData` copy constructor.
+   *
+   * \param rhs: `const InclusionIterationData &` Reference to the `InclusionIterationData` object to be copied.
+   */
+  InclusionIterationData(const InclusionIterationData& rhs);
+
+#ifdef MPI_VERSION
+  /*! \brief `InclusionIterationData` MPI constructor.
+   *
+   * \param mpidata: `const mixMPI *` Pointer to a `mixMPI` instance.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  InclusionIterationData(const mixMPI *mpidata, const int device_count);
+
+  /*! \brief Broadcast over MPI the InclusionIterationData instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial InclusionIterationData instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
+  /*! \brief `InclusionIterationData` instance destroyer.
+   */
+  ~InclusionIterationData();
+};
+// >>> END OF InclusionIterationData CLASS DEFINITION <<< //
+
+// >>> DEFINITION OF SphereIterationData CLASS <<<
+/*! \brief A data structure representing the information used for a single scale
+ * of the SPHERE case.
+ */
+class SphereIterationData {
+protected:
+  //! \brief Number of spheres
+  int _nsph;
+  //! \brief Maximum field expansion order.
+  int _lm;
+  //! \brief Vector of Mueller matrix components.
+  double *vec_cmul;
+  //! \brief Vector of Mueller matrix components referred to meridional plane.
+  double *vec_cmullr;
+  //! Vectorized TQSPE.
+  dcomplex *vec_tqspe;
+  //! Vectorized TQSPS.
+  dcomplex *vec_tqsps;
+  //! Vectorized TQSE.
+  double *vec_tqse;
+  //! Vectorized TQSS.
+  double *vec_tqss;
+  //! Vectorized ZPV.
+  double *vec_zpv;
+  
+public:
+  //! \brief Vacuum magnitude of wave vector.
+  double vk;
+  //! \brief Wave number.
+  double wn;
+  //! \brief Normalization scale. QUESTION: correct?
+  double xip;
+  //! \brief Number of scales (wavelengths) to be computed.
+  int number_of_scales;
+  //! \brief Size of the block of scales handled by the current process.
+  int xiblock;
+  //! \brief Index of the first scale handled by the current process.
+  int firstxi;
+  //! \brief Index of the last scale handled by the current process.
+  int lastxi;
+  //! \brief Argument of harmonic functions.
+  dcomplex arg;
+  //! \brief S0 = FSAS / (4 PI K^3).
+  dcomplex s0;
+  //! \brief Total forward scattering amplitude of the spheres.
+  dcomplex tfsas;
+  //! \brief Pointer to a sphere particle descriptor.
+  ParticleDescriptor *c1;
+  //! \brief Imaginary part of `arg`.
+  double *argi;
+  //! \brief `arg` squared.
+  double *args;
+  //! \brief Scattering angle.
+  double scan;
+  //! \brief Control parameter on incidence direction referred to meridional plane.
+  double cfmp;
+  //! \brief Control parameter on scattering direction referred to meridional plane.
+  double sfmp;
+  //! \brief Control parameter on incidence direction referred to scattering plane.
+  double cfsp;
+  //! \brief Control parameter on scattering direction referred to scattering plane.
+  double sfsp;
+  //! \brief Geometry asymmetry parameter for spheres.
+  double *gaps;
+  //! \brief Variation of unitary wave vector.
+  double *duk;
+  //! \brief Incidence direction unitary vector.
+  double *u;
+  //! \brief Scattering direction unitary vector.
+  double *us;
+  //! \brief Normal direction unitary vector.
+  double *un;
+  //! \brief Scattering normal direction unitary vector.
+  double *uns;
+  //! \brief Polarization direction unitary vector.
+  double *up;
+  //! \brief Scattered polarization direction unitary vector.
+  double *ups;
+  //! \brief Polarization direction unitary vector referred to meridional plane.
+  double *upmp;
+  //! \brief Scattered polarization direction unitary vector referred to meridional plane.
+  double *upsmp;
+  //! \brief Normal direction unitary vector referred to meridional plane.
+  double *unmp;
+  //! \brief Scattering normal direction unitary vector referred to meridional plane.
+  double *unsmp;
+  //! \brief Mueller matrix components.
+  double **cmul;
+  //! \brief Mueller matrix components referred to meridional plane.
+  double **cmullr;
+  //! \brief Polarization-dependent extinction contribution to torque for each sphere.
+  dcomplex **tqspe;
+  //! \brief Polarization-dependent scattering contribution to torque for each sphere.
+  dcomplex **tqsps;
+  //! \brief Extinction contribution to torque for each sphere.
+  double **tqse;
+  //! \brief Scattering contribution to torque for each sphere.
+  double **tqss;
+  //! \brief Scattering coefficients tensor.
+  double ****zpv;
+  
+  /*! \brief `SphereIterationData` default instance constructor.
+   *
+   * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
+   * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
+   * \param mpidata: `mixMPI *` Pointer to a `mixMPI` object.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  SphereIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
+  
+  /*! \brief `SphereIterationData` copy constructor.
+   *
+   * \param rhs: `const SphereIterationData &` Reference to the object to be copied.
+   */
+  SphereIterationData(const SphereIterationData& rhs);
+
+#ifdef MPI_VERSION
+  /*! \brief `SphereIterationData` MPI constructor.
+   *
+   * \param mpidata: `const mixMPI *` Pointer to a `mixMPI` instance.
+   * \param device_count: `const int` Number of offload devices available on the system.
+   */
+  SphereIterationData(const mixMPI *mpidata, const int device_count);
+
+  /*! \brief Broadcast over MPI the `SphereIterationData` instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial InclusionIterationData instance created by
+   * MPI process 0 needs to be replicated on all other processes. This
+   * function sends it using MPI broadcast calls. The MPI broadcast calls in
+   * this function must match those in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to `mixMPI` instance.
+   */
+  int mpibcast(const mixMPI *mpidata);
+#endif // MPI_VERSION
+
+  /*! \brief `SphereIterationData` instance destroyer.
+   */
+  ~SphereIterationData();
+};
+// >>> END OF SphereIterationData CLASS DEFINITION <<<
+
+#endif // INCLUDE_ITERATION_DATA_H_
diff --git a/src/include/outputs.h b/src/include/outputs.h
index b508e79d02e6510fcbfac6b9fc3d96e8b82a5a6e..abea6a6f891c1e318c7c7c244f196cc469a38b48 100644
--- a/src/include/outputs.h
+++ b/src/include/outputs.h
@@ -467,9 +467,9 @@ public:
 
   /*! \brief `ClusterOutputInfo` constructor from HDF5 input.
    *
-   * \param hdf5_file_name: `const string &` Path to the HDF5 file to be read.
+   * \param hdf5_name: `const string &` Path to the HDF5 file to be read.
    */   
-  ClusterOutputInfo(const std::string &hdf5_file_name);
+  ClusterOutputInfo(const std::string &hdf5_name);
 
   /*! \brief `ClusterOutputInfo` instance destroyer.
    */
@@ -479,13 +479,13 @@ public:
    *
    * \param sc: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` instance.
    * \param gc: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` instance.
-   * \param first_xi: `int` Index of the first scale in output (optional, default is 0).
+   * \param first_xi: `int` Index of the first scale in output (optional, default is 1).
    * \param xi_length: `int` Number of scales tobe included in output (optional, default is all).
    * \return size: `long` Estimated instance size in bytes.
    */
   static long compute_size(
     ScattererConfiguration *sc, GeometryConfiguration *gc,
-    int first_xi = 0, int xi_length = 0
+    int first_xi = 1, int xi_length = 0
   );
   
   /*! \brief Get the size of a `ClusterOutputInfo` instance in bytes.
@@ -647,7 +647,7 @@ public:
   int jwtm;
   //! \brief Vector of scale (wavelength) indices.
   int *vec_jxi;
-  //! \brief Vector of error severities (0 - success, 1 - HJV, 2 - DME).
+  //! \brief Vector of error severities (0 - success, 1 - INDME, 2 - OSPV).
   short *vec_ier;
   //! \brief Vector of vacuum wave numbers.
   double *vec_vk;
@@ -897,9 +897,9 @@ public:
 
   /*! \brief `InclusionOutputInfo` constructor from HDF5 input.
    *
-   * \param hdf5_file_name: `const string &` Path to the HDF5 file to be read.
+   * \param hdf5_name: `const string &` Path to the HDF5 file to be read.
    */   
-  InclusionOutputInfo(const std::string &hdf5_file_name);
+  InclusionOutputInfo(const std::string &hdf5_name);
 
   /*! \brief `InclusionOutputInfo` instance destroyer.
    */
@@ -909,13 +909,13 @@ public:
    *
    * \param sc: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` instance.
    * \param gc: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` instance.
-   * \param first_xi: `int` Index of the first scale in output (optional, default is 0).
+   * \param first_xi: `int` Index of the first scale in output (optional, default is 1).
    * \param xi_length: `int` Number of scales tobe included in output (optional, default is all).
    * \return size: `long` Estimated instance size in bytes.
    */
   static long compute_size(
     ScattererConfiguration *sc, GeometryConfiguration *gc,
-    int first_xi = 0, int xi_length = 0
+    int first_xi = 1, int xi_length = 0
   );
   
   /*! \brief Get the size of a `ClusterOutputInfo` instance in bytes.
@@ -969,4 +969,275 @@ public:
 };
 // >>> END OF OUTPUT FOR INCLUSION <<<
 
+// >>> OUTPUT FOR SPHERE <<<
+class SphereOutputInfo {
+protected:
+  //! \brief Number of incident azimuth calculations.
+  int _num_theta;
+  //! \brief Number of scattered azimuth calculations.
+  int _num_thetas;
+  //! \brief Number of incident elevation calculations.
+  int _num_phi;
+  //! \brief Number of scattered elevation calculations.
+  int _num_phis;
+  //! \brief ID of the first computed wavelength
+  int _first_xi;
+  
+  /*! \brief Write the output to a HDF5 file.
+   *
+   * \param file_name: `const string &` Path to the output to be written.
+   * \return result: `int` Exit code (0 if successful).
+   */
+  int write_hdf5(const std::string &file_name);
+  
+  /*! \brief Write the output to a legacy text file.
+   *
+   * This function takes care of writing the output using the legacy
+   * formatted ASCII structure. If the output file does not exist, it
+   * is created. If it exists, the new content is overwritten.
+   *
+   * \param output: `const string &` Path to the output to be written.
+   * \return result: `int` Exit code (0 if successful).
+   */
+  int write_legacy(const std::string &output);
+  
+public:
+  //! \brief Read-only view on the ID of the first scale
+  const int &first_xi = _first_xi;
+  //! \brief Number of spheres.
+  int nsph;
+  //! \brief Maximum field expansion order.
+  int lm;
+  //! \brief Incident polarization flag.
+  int inpol;
+  //! \brief Number of points for transition layer integration.
+  int npnt;
+  //! \brief Number of points for non-transition layer integration.
+  int npntts;
+  //! \brief Flag for reference to meridional plane.
+  int isam;
+  //! \brief Flag for dielectric function definition.
+  int idfc;
+  //! \brief First incident radiation azimuth angle.
+  double th;
+  //! \brief Incident radiation azimuth angle step.
+  double thstp;
+  //! \brief Last incident radiation azimuth angle.
+  double thlst;
+  //! \brief First scattered radiation azimuth angle.
+  double ths;
+  //! \brief Scattered radiation azimuth angle step.
+  double thsstp;
+  //! \brief Last scattered radiation azimuth angle.
+  double thslst;
+  //! \brief First incident radiation elevation angle.
+  double ph;
+  //! \brief Incident radiation elevation angle step.
+  double phstp;
+  //! \brief Last incident radiation elevation angle.
+  double phlst;
+  //! \brief First scattered radiation elevation angle.
+  double phs;
+  //! \brief Scattered radiation elevation angle step.
+  double phsstp;
+  //! \brief Last scattered radiation elevation angle.
+  double phslst;
+  //! \brief Number of directions to be explicitly solved.
+  int ndirs;
+  //! \brief Refractive index of external medium.
+  double exri;
+  //! \brief Number of scales (wavelengths)
+  int nxi;
+  //! \brief Number of scales handled by the current process.
+  int xi_block_size;
+  //! \brief Index of the wavelength for T-matrix output.
+  int jwtm;
+  //! \brief Number of sphere types.
+  int configurations;
+  //! \brief Highest expansion order achieved in calculations.
+  int lcalc;
+  //! \brief Harmonic functions argument.
+  dcomplex arg;
+  //! \brief Vector of scale (wavelength) indices.
+  int *vec_jxi;
+  //! \brief Vector of error severities (0 - success, 1 - DME).
+  short *vec_ier;
+  //! \brief Vector of vacuum wave numbers.
+  double *vec_vk;
+  //! \brief Vector of computed scales.
+  double *vec_xi;
+  //! \brief Vector of sphere sizes (one for every configuration and scale).
+  double *vec_sphere_sizes;
+  //! \brief Vector of sphere refractive indices (one for every configuration and scale).
+  dcomplex *vec_sphere_ref_indices;
+  //! \brief Vector of sphere scattering cross-sections.
+  double *vec_scs;
+  //! \brief Vector of sphere absorption cross-sections.
+  double *vec_abs;
+  //! \brief Vector of sphere extinction cross-sections.
+  double *vec_exs;
+  //! \brief Vector of sphere albedos.
+  double *vec_albeds;
+  //! \brief Vector of sphere scattering-to-geometric cross-sections.
+  double *vec_scsrt;
+  //! \brief Vector of sphere absorption-to-geometric cross-sections.
+  double *vec_absrt;
+  //! \brief Vector of sphere extinction-to-geometric cross-sections.
+  double *vec_exsrt;
+  //! \brief Vector of sphere forward scattering amplitudes.
+  dcomplex *vec_fsas;
+  //! \brief Vector of sphere QSCHU.
+  double *vec_qschu;
+  //! \brief Vector of sphere PSCHU.
+  double *vec_pschu;
+  //! \brief Vector of sphere S0MAG.
+  double *vec_s0mag;
+  //! \brief Vector of sphere average asymmetry parameter.
+  double *vec_cosav;
+  //! \brief Vector of sphere average radiation pressure force (N).
+  double *vec_raprs;
+  //! \brief Vector of sphere average extinction torque along incidence direction (parallel polarization).
+  double *vec_tqek1;
+  //! \brief Vector of sphere average extinction torque along incidence direction (perpendicular polarization).
+  double *vec_tqek2;
+  //! \brief Vector of sphere average scattering torque along incidence direction (parallel polarization).
+  double *vec_tqsk1;
+  //! \brief Vector of sphere average scattering torque along incidence direction (perpendicular polarization).
+  double *vec_tqsk2;
+  //| \brief Vector of total forward scattering amplitudes.
+  dcomplex *vec_fsat;
+  //! \brief Vector of total QSCHU.
+  double *vec_qschut;
+  //! \brief Vector of total PSCHU.
+  double *vec_pschut;
+  //! \brief Vector of total S0MAG.
+  double *vec_s0magt;
+  //! \brief Vector of incidence azimuth directions (one per incidence azimuth).
+  double *vec_dir_tidg;
+  //! \brief Vector of incidence elevation directions (one per incidence elevation).
+  double *vec_dir_pidg;
+  //! \brief Vector of scattering azimuth directions (one per scattering azimuth).
+  double *vec_dir_tsdg;
+  //! \brief Vector of scattering elevation directions (one per scattering elevation).
+  double *vec_dir_psdg;
+  //! \brief Vector of scattering angles (one per direction).
+  double *vec_dir_scand;
+  //! \brief Control parameter for incidence plane referred to meridional plane (one per direction).
+  double *vec_dir_cfmp;
+  //! \brief Control parameter for scattering plane referred to meridional plane (one per direction).
+  double *vec_dir_sfmp;
+  //! \brief Control parameter for incidence plane referred to scattering plane (one per direction).
+  double *vec_dir_cfsp;
+  //! \brief Control parameter for scattering plane referred to scattering plane (one per direction).
+  double *vec_dir_sfsp;
+  //! \brief Components of the unitary vector perpendicular to incidence plane (three per direction).
+  double *vec_dir_un;
+  //! \brief Components of the unitary vector perpendicular to scattering plane (three per direction).
+  double *vec_dir_uns;
+  //! \brief Vector of sphere differential scattering amplitude with polarization parallel to parallel incidence field.
+  dcomplex *vec_dir_sas11;
+  //! \brief Vector of sphere differential scattering amplitude with polarization perpendicular to the parallel incidence field.
+  dcomplex *vec_dir_sas21;
+  //! \brief Vector of sphere differential scattering amplitude with polarization perpendicular to perpendicular incidence field.
+  dcomplex *vec_dir_sas12;
+  //! \brief Vector of sphere differential scattering amplitude with polarization parallel the perpendicular incidence field.
+  dcomplex *vec_dir_sas22;
+  //! \brief Vector of differential radiation pressure force components along the X axis.
+  double *vec_dir_fx;
+  //! \brief Vector of differential radiation pressure force components along the Y axis.
+  double *vec_dir_fy;
+  //! \brief Vector of differential radiation pressure force components along the Z axis.
+  double *vec_dir_fz;
+  //! \brief Vector of sphere Mueller transormation matrices referred to meridional plane.
+  double *vec_dir_muls;
+  //! \brief Vector of sphere Mueller transormation matrices referred to scattering plane.
+  double *vec_dir_mulslr;
+  
+  /*! \brief `SphereOutputInfo` default instance constructor.
+   *
+   * \param sc: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` instance.
+   * \param gc: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` instance.
+   * \param mpidata: `const mixMPI*` Pointer to a mixMPI instance.
+   * \param first_xi: `int` Index of the first scale in output (optional, default is 1).
+   * \param xi_length: `int` Number of scales tobe included in output (optional, default is 0, meaning all).
+   */   
+  SphereOutputInfo(
+    ScattererConfiguration *sc, GeometryConfiguration *gc,
+    const mixMPI *mpidata, int first_xi = 1, int xi_length = 0
+  );
+
+  /*! \brief `SphereOutputInfo` constructor from HDF5 input.
+   *
+   * \param hdf5_name: `const string &` Path to the HDF5 file to be read.
+   */   
+  SphereOutputInfo(const std::string &hdf5_name);
+
+  /*! \brief `InclusionOutputInfo` instance destroyer.
+   */
+  ~SphereOutputInfo();
+
+  /*! \brief Estimate the size of the structure that would be built for given input.
+   *
+   * \param sc: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` instance.
+   * \param gc: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` instance.
+   * \param first_xi: `int` Index of the first scale in output (optional, default is 1).
+   * \param xi_length: `int` Number of scales tobe included in output (optional, default is all).
+   * \return size: `long` Estimated instance size in bytes.
+   */
+  static long compute_size(
+    ScattererConfiguration *sc, GeometryConfiguration *gc,
+    int first_xi = 1, int xi_length = 0
+  );
+  
+  /*! \brief Get the size of a `ClusterOutputInfo` instance in bytes.
+   *
+   * \return size: `long` Estimated instance size in bytes.
+   */
+  long compute_size();
+
+  /*! \brief Insert in the current output data the data of another block.
+   *
+   * \param rhs: `const SphereOutputInfo &` Reference to the source data block.
+   * \return result: `int` Exit code (0 if successful).
+   */
+  int insert(const SphereOutputInfo &rhs);
+
+  /*! \brief Write the output to a file.
+   *
+   * \param output: `const string &` Path to the output to be written.
+   * \param format: `const string &` Output format (one of LEGACY or HDF5).
+   * \return result: `int` Exit code (0 if successful).
+   */
+  int write(const std::string &output, const std::string &format);
+
+#ifdef MPI_VERSION
+  /*! \brief Receive output data from worker processes.
+   *
+   * This function is invoked by the MPI rank-0 process to fetch the
+   * output data produced by higher rank processes. When calling this
+   * function, process 0 halts until a valid data chunk is transmitted
+   * by the queried process.
+   *
+   * \param mpidata: `const mixMPI*` Pointer to a `mixMPI` instance.
+   * \param pid: `int` Rank of the process that is transmitting data.
+   * \return result: `int` An exit code (0 for success).
+   */
+  int mpireceive(const mixMPI *mpidata, int pid);
+
+  /*! \brief Send output data to process 0.
+   *
+   * This function is invoked by non-zero ranked MPI processes when
+   * they are ready to send back the output data. When a process meets
+   * this function call, it halts until MPI process 0 asks for the
+   * data transmission.
+   *
+   * \param mpidata: `const mixMPI*` Pointer to a `mixMPI` instance.
+   * \param pid: `int` Rank of the process that is transmitting data.
+   * \return result: `int` An exit code (0 for success).
+   */
+  int mpisend(const mixMPI *mpidata);
+#endif // MPI_VERSION
+};
+// >>> END OF OUTPUT FOR SPHERE <<<
+
 #endif // INCLUDE_OUTPUTS_H_
diff --git a/src/inclusion/inclusion.cpp b/src/inclusion/inclusion.cpp
index 25b6880f7e69e83fb1101863d91bd95f85df8c58..86c73b469a07f4a89e9fac7b3420a62bc1119339 100644
--- a/src/inclusion/inclusion.cpp
+++ b/src/inclusion/inclusion.cpp
@@ -24,17 +24,21 @@
 #include <fstream>
 #include <hdf5.h>
 #include <string>
+
 #ifdef _OPENMP
 #include <omp.h>
 #endif
+
 #ifdef USE_MPI
 #ifndef MPI_VERSION
 #include <mpi.h>
 #endif
 #endif
+
 #ifdef USE_NVTX
 #include <nvtx3/nvToolsExt.h>
 #endif
+
 #ifdef USE_MAGMA
 #include <cuda_runtime.h>
 #endif
@@ -95,1243 +99,537 @@
 #include "../include/outputs.h"
 #endif
 
-using namespace std;
-
-// >>> InclusionIterationData header <<< //
-/*! \brief A data structure representing the information used for a single scale
- * of the INCLUSION case.
- */
-class InclusionIterationData {
-protected:
-  //! \brief Vectorized geometric asymmetry parameter components.
-  double *vec_zpv;
-  
-public:
-  //! \brief External layer index.
-  int nimd;
-  //! \brief External layer radius.
-  double extr;
-  
-  //! \brief Pointer to a ParticleDescriptor structure.
-  ParticleDescriptor *c1;
-  //! \brief Vector of geometric asymmetry factors.
-  double *gaps;
-    //! \brief Components of extinction contribution to radiation torque on a single sphere along k.
-  double **tqse;
-  //! \brief Components of polarized extinction contribution to radiation torque on a single sphere along k.
-  dcomplex **tqspe;
-  //! \brief Components of scattering contribution to radiation torque on a single sphere along k.
-  double **tqss;
-  //! \brief Components of polarized scattering contribution to radiation torque on a single sphere along k.
-  dcomplex **tqsps;
-  //! \brief L-dependent coefficients of the geometric asymmetry parameter.
-  double ****zpv;
-  //! \brief Mean geometric asymmetry parameters.
-  double **gapm;
-  //! \brief Mean geometric asymmetry parameters referred to polarization plane.
-  dcomplex **gappm;
-  //! \brief Imaginary part of the harmonic functions argument.
-  double *argi;
-  //! \brief Argument of the harmonic functions referred to the scattering plane.
-  double *args;
-  //! \brief Geometric asymmetry parameters.
-  double **gap;
-  //! \brief Geometric asymmetry parameters referred to polarization plane.
-  dcomplex **gapp;
-  //! \brief Components of extinction contribution to radiation torque on the cluster along k.
-  double **tqce;
-  //! \brief Components of extinction contribution to radiation torque on the cluster along k referred to polarization plane.
-  dcomplex **tqcpe;
-  //! \brief Components of scattering contribution to radiation torque on the cluster along k.
-  double **tqcs;
-  //! \brief Components of scattering contribution to radiation torque on the cluster along k referred to polarization plane.
-  dcomplex **tqcps;
-  //! \brief Variation of unitary radiation vector. QUESTION: correct?
-  double *duk;
-  //! \brief Cluster extinction cross-section components referred to scattering plane.
-  double **cextlr;
-  //! \brief Cluster extinction cross-section components referred to meridional plane.
-  double **cext;
-  //! \brief Cluster Mueller Transformation Matrix components referred to scattering plane.
-  double **cmullr;
-  //! \brief Cluster Mueller Transformation Matrix components referred to meridional plane.
-  double **cmul;
-  //! \brief Geometric asymmetry parameter components.
-  double *gapv;
-  //! \brief Radiation extinction torque components.
-  double *tqev;
-  //! \brief Radiation scattering torque components.
-  double *tqsv;
-  //! \brief Incident unitary vector components.
-  double *u;
-  //! \brief Scattered unitary vector components.
-  double *us;
-  //! \brief Normal unitary vector components.
-  double *un;
-  //! \brief Normal scattered unitary vector components.
-  double *uns;
-  //! \brief Incident unitary vector components on polarization plane.
-  double *up;
-  //! \brief Scattered unitary vector components on polarization plane.
-  double *ups;
-  //! \brief Mean unitary vector components normal to polarization plane.
-  double *unmp;
-  //! \brief Mean scattered unitary vector components normal to polarization plane.
-  double *unsmp;
-  //! \brief Mean incident unitary vector components on polarization plane.
-  double *upmp;
-  //! \brief Mean scattered unitary vector components on polarization plane.
-  double *upsmp;
-  //! \brief Scattering angle.
-  double scan;
-  //! \brief Control parameter on incidence direction referred to meridional plane.
-  double cfmp;
-  //! \brief Control parameter on scattering direction referred to meridional plane.
-  double sfmp;
-  //! \brief Control parameter on incidence direction referred to scattering plane.
-  double cfsp;
-  //! \brief Control parameter on scattering direction referred to scattering plane.
-  double sfsp;
-  //! \brief SQSFI = XI^-2
-  double sqsfi;
-  //! \brief Vectorized scattering coefficient matrix.
-  dcomplex *am_vector;
-  //! \brief Scattering coefficient matrix.
-  dcomplex **am;
-  //! \brief Argument of harmonic functions. QUESTION: correct?
-  dcomplex arg;
-  //! \brief Vacuum magnitude of wave vector.
-  double vk;
-  //! \brief Wave number.
-  double wn;
-  //! \brief Normalization scale. QUESTION: correct?
-  double xip;
-  //! \brief Number of scales (wavelengths) to be computed.
-  int number_of_scales;
-  //! \brief Size of the block of scales handled by the current process.
-  int xiblock;
-  //! \brief Index of the first scale handled by the current process.
-  int firstxi;
-  //! \brief Index of the last scale handled by the current process.
-  int lastxi;
-  //! \brief ID of the GPU used by one MPI process.
-  int proc_device;
-  //! \brief Refinement mode selction flag.
-  int refinemode;
-  //! \brief Maximum number of refinement iterations.
-  int maxrefiters;
-  //! \brief Required accuracy level.
-  double accuracygoal;
-
-  /*! \brief `InclusionIterationData` default instance constructor.
-   *
-   * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
-   * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
-   * \param mpidata: `mixMPI *` Pointer to a `mixMPI` object.
-   * \param device_count: `const int` Number of offload devices available on the system.
-   */
-  InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
-  
-  /*! \brief `InclusionIterationData` copy constructor.
-   *
-   * \param rhs: `const InclusionIterationData &` Reference to the `InclusionIterationData` object to be copied.
-   */
-  InclusionIterationData(const InclusionIterationData& rhs);
-
-#ifdef MPI_VERSION
-  /*! \brief `InclusionIterationData` MPI constructor.
-   *
-   * \param mpidata: `const mixMPI *` Pointer to a `mixMPI` instance.
-   * \param device_count: `const int` Number of offload devices available on the system.
-   */
-  InclusionIterationData(const mixMPI *mpidata, const int device_count);
-
-  /*! \brief Broadcast over MPI the InclusionIterationData instance from MPI process 0 to all others.
-   *
-   * When using MPI, the initial InclusionIterationData instance created by MPI process 0
-   * needs to be replicated on all other processes. This function sends it using
-   * MPI broadcast calls. The MPI broadcast calls in this function must match those
-   * in the constructor using the mixMPI pointer.
-   *
-   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
-   */
-  void mpibcast(const mixMPI *mpidata);
+#ifndef INCLUDE_ITERATION_DATA_H_
+#include "../include/IterationData.h"
 #endif
 
-  /*! \brief `InclusionIterationData` instance destroyer.
-   */
-  ~InclusionIterationData();
-};
+using namespace std;
 
-// >>> End of InclusionIterationData header <<< //
+/*! \brief Main calculation loop.
+ *
+ *  The solution of the scattering problem for different wavelengths is an
+ *  embarrasingly parallel task. This function, therefore, collects all the
+ *  operations that can be independently executed by different processes,
+ *  after the configuration stage and the first calculation loop have been
+ *  executed.
+ *
+ *  \param jxi488: `int` Wavelength loop index.
+ *  \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
+ *  \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
+ *  \param sa: `ScatteringAngles *` Pointer to a `ScatteringAngles` object.
+ *  \param cid: `InclusionIterationData *` Pointer to an `InclusionIterationData` object.
+ *  \param oi: `InclusionOutputInfo *` Pointer to an `InclusionOutputInfo` object.
+ *  \param output_path: `const string &` Path to the output directory.
+ *  \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object.
+ */
+int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output, const string& output_path, VirtualBinaryFile *vtppoanp);
 
-// >>> InclusionIterationData implementation <<< //
-InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) {
-  c1 = new ParticleDescriptorInclusion(gconf, sconf);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  for (int gi = 0; gi < 3; gi++) {
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  argi = new double[1]();
-  args = new double[1]();
-  duk = new double[3]();
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
+/*! \brief C++ implementation of INCLU
+ *
+ * \param config_file: `string` Name of the configuration file.
+ * \param data_file: `string` Name of the input data file.
+ * \param output_path: `string` Directory to write the output files in.
+ * \param mpidata: `mixMPI *` Pointer to an instance of MPI data settings.
+ */
+void inclusion(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
+  chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
+  chrono::duration<double> elapsed;
+  string message;
+  string timing_name;
+  FILE *timing_file;
+  Logger *time_logger;
+  if (mpidata->rank == 0) {
+    timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
+    timing_file = fopen(timing_name.c_str(), "w");
+    time_logger = new Logger(LOG_DEBG, timing_file);
   }
-  vec_zpv = new double[c1->lm * 12]();
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double**[12];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[4];
-      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
-      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
+  Logger *logger = new Logger(LOG_DEBG);
+  int device_count = 0;
+  //===========
+  // Initialise MAGMA
+  //===========
+#ifdef USE_MAGMA
+  const magma_int_t d_array_max_size = 32; // TEMPORARY: can become configurable parameter
+  magma_device_t *device_array = new magma_device_t[d_array_max_size];
+  magma_int_t num_devices;
+  magma_getdevices(device_array, d_array_max_size, &num_devices);
+  device_count = (int)num_devices;
+  delete[] device_array;
+  message = "DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " GPU ";
+  if (device_count > 1) message += "devices.\n";
+  else message += "device.\n";
+  logger->log(message, LOG_DEBG);
+  logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
+  magma_int_t magma_result = magma_init();
+  if (magma_result != MAGMA_SUCCESS) {
+    logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n");
+    logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n");
+    if (mpidata->rank == 0) {
+      fclose(timing_file);
+      delete time_logger;
     }
+    delete logger;
+    return;
   }
-  am_vector = new dcomplex[c1->ndm * c1->ndm]();
-  am = new dcomplex*[c1->ndm];
-  for (int ai = 0; ai < c1->ndm; ai++) {
-    am[ai] = (am_vector + ai * c1->ndm);
-  }
+#endif // end MAGMA initialisation
   
-  arg = 0.0 + 0.0 * I;
-  // These are suspect initializations
-  scan = 0.0;
-  cfmp = 0.0;
-  sfmp = 0.0;
-  cfsp = 0.0;
-  sfsp = 0.0;
-  // End of suspect initializations
-  wn = sconf->wp / 3.0e8;
-  xip = sconf->xip;
-  sqsfi = 1.0;
-  vk = 0.0;
-  number_of_scales = sconf->number_of_scales;
-  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
-  lastxi = ((mpidata->rank+1) * xiblock)+1;
-  firstxi = lastxi-xiblock+1;
-  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
+  //===========================
+  // the following only happens on MPI process 0
+  //===========================
+  if (mpidata->rank == 0) {
+#ifdef USE_NVTX
+    nvtxRangePush("Set up");
+#endif
+    //=======================
+    // Initialise sconf from configuration file
+    //=======================
+    logger->log("INFO: making legacy configuration...", LOG_INFO);
+    ScattererConfiguration *sconf = NULL;
+    try {
+      sconf = ScattererConfiguration::from_dedfb(config_file);
+    } catch(const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open scatterer configuration file.\n");
+      string message = "FILE: " + string(ex.what()) + "\n";
+      logger->err(message);
+      fclose(timing_file);
+      delete time_logger;
+      delete logger;
+      return;
+    }
+    sconf->write_formatted(output_path + "/c_OEDFB");
+    sconf->write_binary(output_path + "/c_TEDF");
+    sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
+    // end scatterer initialisation
 
-  nimd = c1->nshl[0] + 1;
-  c1->rc[0][nimd - 1] = c1->ros[0] * sconf->get_rcf(0, nimd - 1);
-  extr = c1->rc[0][nimd - 1];
-  const double pig = acos(0.0) * 2.0;
-  c1->gcs = pig * extr * extr;
-  
+    //========================
+    // Initialise gconf from configuration files
+    //========================
+    GeometryConfiguration *gconf = NULL;
+    try {
+      gconf = GeometryConfiguration::from_legacy(data_file);
+    } catch (const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open geometry configuration file.\n");
+      string message = "FILE: " + string(ex.what()) + "\n";
+      logger->err(message);
+      if (sconf) delete sconf;
+      fclose(timing_file);
+      delete time_logger;
+      delete logger;
+      return;
+    }
+    logger->log(" done.\n", LOG_INFO);
+    //end gconf initialisation
+
+#ifdef USE_NVTX
+    nvtxRangePop();
+#endif
+    int s_nsph = sconf->number_of_spheres;
+    int nsph = gconf->number_of_spheres;
+    // Sanity check on number of sphere consistency, should always be verified
+    if (s_nsph == nsph) {
+      // Shortcuts to variables stored in configuration objects
+      ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
+      double wp = sconf->wp;
+      // Open empty virtual ascii file for output
+      InclusionOutputInfo *p_output = new InclusionOutputInfo(sconf, gconf, mpidata);
+      InclusionIterationData *cid = new InclusionIterationData(gconf, sconf, mpidata, device_count);
+      const np_int ndi = cid->c1->nsph * cid->c1->nlim;
+      const np_int ndit = 2 * ndi;
+      logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n");
+      time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n");
+      
+      instr(sconf, cid->c1);
+      thdps(cid->c1->lm, cid->zpv);
+      double exdc = sconf->exdc;
+      double exri = sqrt(exdc);
+
+      // Create an empty bynary file
+      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
+      string tppoan_name = output_path + "/c_TPPOAN";
 #ifdef USE_MAGMA
-  proc_device = mpidata->rank % device_count;
+      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
+#elif defined USE_LAPACK
+      logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
 #else
-  proc_device = 0;
+      logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO);
 #endif
+      int iavm = gconf->iavm;
+      int isam = gconf->isam;
+      int inpol = gconf->in_pol;
+      int nxi = sconf->number_of_scales;
+      int nth = p_scattering_angles->nth;
+      int nths = p_scattering_angles->nths;
+      int nph = p_scattering_angles->nph;
+      int nphs = p_scattering_angles->nphs;
+      
+      //========================
+      // write a block of info to virtual binary file
+      //========================
+      vtppoanp->append_line(VirtualBinaryLine(iavm));
+      vtppoanp->append_line(VirtualBinaryLine(isam));
+      vtppoanp->append_line(VirtualBinaryLine(inpol));
+      vtppoanp->append_line(VirtualBinaryLine(nxi));
+      vtppoanp->append_line(VirtualBinaryLine(nth));
+      vtppoanp->append_line(VirtualBinaryLine(nph));
+      vtppoanp->append_line(VirtualBinaryLine(nths));
+      vtppoanp->append_line(VirtualBinaryLine(nphs));
+      if (sconf->idfc < 0) {
+	cid->vk = cid->xip * cid->wn;
+	p_output->vec_vk[0] = cid->vk;
+      }
+      
+      // do the first iteration on jxi488 separately, since it seems to be different from the others
+      int jxi488 = 1;
+      int initialmaxrefiters = cid->maxrefiters;
+      
+      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
+#ifdef USE_NVTX
+      nvtxRangePush("First iteration");
+#endif
+      // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
+      int jer = 0;
+#pragma omp parallel
+      {
+#pragma omp single
+	{
+	  jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
+	}
+      }
+#ifdef USE_NVTX
+      nvtxRangePop();
+#endif
+      chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
+      elapsed = start_iter_1 - t_start;
+      string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
+      logger->log(message);
+      time_logger->log(message);
+      elapsed = end_iter_1 - start_iter_1;
+      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
+      logger->log(message);
+      time_logger->log(message);
+      /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
+      cid->refinemode = 0;
+      /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
+      // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
+      if (jer != 0) {
+	// First loop failed. Halt the calculation.
+	fclose(timing_file);
+	delete time_logger;
+	delete p_output;
+	delete p_scattering_angles;
+	delete cid;
+	delete logger;
+	delete sconf;
+	delete gconf;
+	return;
+      }
 
-  // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
-  refinemode = 2;
-  // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
-  maxrefiters = 20;
-  accuracygoal = 1e-6;
-}
-
-InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs) {
-  c1 = new ParticleDescriptorInclusion(reinterpret_cast<ParticleDescriptorInclusion &>(*(rhs.c1)));
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi];
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  for (int ti = 0; ti < 3; ti++) {
-    tqev[ti] = rhs.tqev[ti];
-    tqsv[ti] = rhs.tqsv[ti];
-  }
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    for (int tj = 0; tj < c1->nsph; tj++) {
-      tqse[ti][tj] = rhs.tqse[ti][tj];
-      tqspe[ti][tj] = rhs.tqspe[ti][tj];
-      tqss[ti][tj] = rhs.tqss[ti][tj];
-      tqsps[ti][tj] = rhs.tqsps[ti][tj];
-    }
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-    for (int tj = 0; tj < 3; tj++) {
-      tqce[ti][tj] = rhs.tqce[ti][tj];
-      tqcpe[ti][tj] = rhs.tqcpe[ti][tj];
-      tqcs[ti][tj] = rhs.tqcs[ti][tj];
-      tqcps[ti][tj] = rhs.tqcps[ti][tj];
-    }
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  for (int gi = 0; gi < 3; gi++) {
-    gapv[gi] = rhs.gapv[gi];
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-    for (int gj = 0; gj < 2; gj++) {
-      gapp[gi][gj] = rhs.gapp[gi][gj];
-      gappm[gi][gj] = rhs.gappm[gi][gj];
-      gap[gi][gj] = rhs.gap[gi][gj];
-      gapm[gi][gj] = rhs.gapm[gi][gj];
-    }
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  duk = new double[3]();
-  for (int ui = 0; ui < 3; ui++) {
-    u[ui] = rhs.u[ui];
-    us[ui] = rhs.us[ui];
-    un[ui] = rhs.un[ui];
-    uns[ui] = rhs.uns[ui];
-    up[ui] = rhs.up[ui];
-    ups[ui] = rhs.ups[ui];
-    unmp[ui] = rhs.unmp[ui];
-    unsmp[ui] = rhs.unsmp[ui];
-    upmp[ui] = rhs.upmp[ui];
-    upsmp[ui] = rhs.upsmp[ui];
-    duk[ui] = rhs.duk[ui];
-  }
-  argi = new double[1]();
-  args = new double[1]();
-  argi[0] = rhs.argi[0];
-  args[0] = rhs.args[0];
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
-    for (int cj = 0; cj < 4; cj++) {
-      cextlr[ci][cj] = rhs.cextlr[ci][cj];
-      cext[ci][cj] = rhs.cext[ci][cj];
-      cmullr[ci][cj] = rhs.cmullr[ci][cj];
-      cmul[ci][cj] = rhs.cmul[ci][cj];
-    }
-  }
-  vec_zpv = new double[c1->lm * 12];
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double **[12];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[4];
-      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
-      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
-      zpv[zi][zj][0][0] = rhs.zpv[zi][zj][0][0];
-      zpv[zi][zj][0][1] = rhs.zpv[zi][zj][0][1];
-      zpv[zi][zj][1][0] = rhs.zpv[zi][zj][1][0];
-      zpv[zi][zj][1][1] = rhs.zpv[zi][zj][1][1];
-    }
-  }
-  am_vector = new dcomplex[c1->ndm * c1->ndm];
-  for (int ai = 0; ai < c1->ndm * c1->ndm; ai++) am_vector[ai] = rhs.am_vector[ai];
-  am = new dcomplex*[c1->ndm];
-  for (int ai = 0; ai < c1->ndm; ai++) {
-    am[ai] = (am_vector + ai * c1->ndm);
-  }
-  
-  arg = rhs.arg;
-  // These are suspect initializations
-  scan = rhs.scan;
-  cfmp = rhs.cfmp;
-  sfmp = rhs.sfmp;
-  cfsp = rhs.cfsp;
-  sfsp = rhs.sfsp;
-  // End of suspect initializations
-  wn = rhs.wn;
-  xip = rhs.xip;
-  sqsfi = rhs.sqsfi;
-  vk = rhs.vk;
-  firstxi = rhs.firstxi;
-  lastxi = rhs.lastxi;
-  xiblock = rhs.xiblock;
-  number_of_scales = rhs.number_of_scales;
+      //==================================================
+      // do the first outputs here, so that I open here the new files, afterwards I only append
+      //==================================================
+      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
+      delete vtppoanp;
+      
+      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
+#ifdef MPI_VERSION
+      if (mpidata->mpirunning) {
+	gconf->mpibcast(mpidata);
+	sconf->mpibcast(mpidata);	    
+	cid->mpibcast(mpidata);
+	p_scattering_angles->mpibcast(mpidata);
+      }	
+#endif
+      // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+      int ompnumthreads = 1;
+      // this is for MPI process 0 (or even if we are not using MPI at all)
+      int myjxi488startoffset = 0;
+      int myMPIstride = ompnumthreads;
+      int myMPIblock = ompnumthreads;
+      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
+      InclusionOutputInfo **p_outarray = NULL;
+      VirtualBinaryFile **vtppoanarray = NULL;
 
-  nimd = rhs.nimd;
-  extr = rhs.extr;
-  
-  proc_device = rhs.proc_device;
-  refinemode = rhs.refinemode;
-  maxrefiters = rhs.maxrefiters;
-  accuracygoal = rhs.accuracygoal;
-}
+#ifdef USE_NVTX
+      nvtxRangePush("Parallel loop");
+#endif
 
-#ifdef MPI_VERSION
-InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int device_count) {
-  c1 = new ParticleDescriptorInclusion(mpidata);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int gi = 0; gi < 3; gi++) {
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  duk = new double[3]();
-  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  argi = new double[1]();
-  args = new double[1]();
-  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
-    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  vec_zpv = new double[c1->lm * 12];
-  MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double **[12];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[4];
-      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
-      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
-    }
-  }
-  am_vector = new dcomplex[c1->ndm * c1->ndm];
-  am = new dcomplex*[c1->ndm];
-  for (int ai = 0; ai < c1->ndm; ai++) {
-    am[ai] = (am_vector + ai * c1->ndm);
-    MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  lastxi = ((mpidata->rank+1) * xiblock)+1;
-  firstxi = lastxi-xiblock+1;
-  if (lastxi > number_of_scales) lastxi = number_of_scales;
+      //===========================================
+      // open the OpenMP parallel context, so each thread can initialise its stuff
+      //===========================================
+#pragma omp parallel
+      {
+	// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+	int myompthread = 0;
 
-  MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  
-#ifdef USE_MAGMA
-  proc_device = mpidata->rank % device_count;
-#else
-  proc_device = 0;
+#ifdef _OPENMP
+	// If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+	myompthread = omp_get_thread_num();
+	if (myompthread == 0) ompnumthreads = omp_get_num_threads();
 #endif
-  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-}
 
-void InclusionIterationData::mpibcast(const mixMPI *mpidata) {
-  c1->mpibcast(mpidata);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int ti = 0; ti < 2; ti++) {
-    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int gi = 0; gi < 3; gi++) {
-    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int ci = 0; ci < 4; ci++) {
-    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time
-  for (int ai = 0; ai < c1->ndm; ai++) {
-    MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-}
+	if (myompthread == 0) {
+	  // Initialise some shared variables only on thread 0
+	  p_outarray = new InclusionOutputInfo*[ompnumthreads];
+	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
+	  myMPIblock = ompnumthreads;
+	  myMPIstride = myMPIblock;
+	}
+
+#ifdef MPI_VERSION
+	if (myompthread == 0) {
+	  if (mpidata->mpirunning) {
+	    // only go through this if MPI has been actually used
+	    for (int rr=1; rr<mpidata->nprocs; rr++) {
+	      // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far
+	      int remotejxi488startoffset = myMPIstride;
+	      MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD);
+	      int remoteMPIblock;
+	      MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      // update myMPIstride to include the ones due to MPI process rr
+	      myMPIstride += remoteMPIblock;
+	    }
+	    // now I know the total myMPIstride, I can send it to all processes
+	    MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
+	  }
+	}
 #endif
+	// add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this
+#pragma omp barrier
 
-InclusionIterationData::~InclusionIterationData() {
-  const int nsph = c1->nsph;
-  delete[] am_vector;
-  delete[] am;
-  for (int zi = 0; zi < c1->lm; zi++) {
-    for (int zj = 0; zj < 3; zj++) {
-      delete[] zpv[zi][zj];
-    }
-    delete[] zpv[zi];
-  }
-  delete[] zpv;
-  delete[] vec_zpv;
-  delete c1;
-  delete[] gaps;
-  for (int ti = 1; ti > -1; ti--) {
-    delete[] tqse[ti];
-    delete[] tqss[ti];
-    delete[] tqspe[ti];
-    delete[] tqsps[ti];
-    delete[] tqce[ti];
-    delete[] tqcpe[ti];
-    delete[] tqcs[ti];
-    delete[] tqcps[ti];
-  }
-  delete[] tqse;
-  delete[] tqss;
-  delete[] tqspe;
-  delete[] tqsps;
-  delete[] tqce;
-  delete[] tqcpe;
-  delete[] tqcs;
-  delete[] tqcps;
-  delete[] tqev;
-  delete[] tqsv;
-  for (int gi = 2; gi > -1; gi--) {
-    delete[] gapp[gi];
-    delete[] gappm[gi];
-    delete[] gap[gi];
-    delete[] gapm[gi];
-  }
-  delete[] gapp;
-  delete[] gappm;
-  delete[] gap;
-  delete[] gapm;
-  delete[] gapv;
-  delete[] u;
-  delete[] us;
-  delete[] un;
-  delete[] uns;
-  delete[] up;
-  delete[] ups;
-  delete[] unmp;
-  delete[] unsmp;
-  delete[] upmp;
-  delete[] upsmp;
-  delete[] argi;
-  delete[] args;
-  delete[] duk;
-  for (int ci = 3; ci > -1; ci--) {
-    delete[] cextlr[ci];
-    delete[] cext[ci];
-    delete[] cmullr[ci];
-    delete[] cmul[ci];
-  }
-  delete[] cextlr;
-  delete[] cext;
-  delete[] cmullr;
-  delete[] cmul;
-}
-// >>> End of InclusionIterationData implementation <<< //
+	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+	InclusionIterationData *cid_2 = NULL;
+	InclusionOutputInfo *p_output_2 = NULL;
+	VirtualBinaryFile *vtppoanp_2 = NULL;
+	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+	if (myompthread == 0) {
+	  cid_2 = cid;
+	  // OMP thread 0 of MPI process 0 holds the pointer to the full output structure
+	  p_output_2 = p_output;
+	  p_outarray[0] = p_output_2;
+	} else {
+	  // this is not thread 0, so do create fresh copies of all local variables
+	  cid_2 = new InclusionIterationData(*cid);
+	}
+	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+	if (myompthread==0) {
+	  logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+	}
+#pragma omp barrier
+	// ok, now I can actually start the parallel calculations
+	for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
+	  // the parallel loop over MPI processes covers a different set of indices for each thread
+#pragma omp barrier
+	  int myjxi488 = ixi488+myompthread;
+	  // each thread opens new virtual files and stores their pointers in the shared array
+	  vtppoanp_2 = new VirtualBinaryFile();
+	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
+	  vtppoanarray[myompthread] = vtppoanp_2;
+#pragma omp barrier
 
-/*! \brief Main calculation loop.
- *
- *  The solution of the scattering problem for different wavelengths is an
- *  embarrasingly parallel task. This function, therefore, collects all the
- *  operations that can be independently executed by different processes,
- *  after the configuration stage and the first calculation loop have been
- *  executed.
- *
- *  \param jxi488: `int` Wavelength loop index.
- *  \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
- *  \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
- *  \param sa: `ScatteringAngles *` Pointer to a `ScatteringAngles` object.
- *  \param cid: `InclusionIterationData *` Pointer to an `InclusionIterationData` object.
- *  \param oi: `InclusionOutputInfo *` Pointer to an `InclusionOutputInfo` object.
- *  \param output_path: `const string &` Path to the output directory.
- *  \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object.
- */
-int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output, const string& output_path, VirtualBinaryFile *vtppoanp);
+	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
+	  if (myjxi488 <= cid_2->number_of_scales) {
+	    if (myompthread > 0) {
+	      // UPDATE: non-0 threads need to allocate memory for one scale at a time.
+	      p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
+	      p_outarray[myompthread] = p_output_2;
+	    }
+	    int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
+	  } else {
+	    if (myompthread > 0) {
+	      // If there is no input for this thread, set output pointer to NULL.
+	      p_outarray[myompthread] = NULL;
+	    }
+	  }
+#pragma omp barrier
 
-/*! \brief C++ implementation of INCLU
- *
- * \param config_file: `string` Name of the configuration file.
- * \param data_file: `string` Name of the input data file.
- * \param output_path: `string` Directory to write the output files in.
- * \param mpidata: `mixMPI *` Pointer to an instance of MPI data settings.
- */
-void inclusion(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
-  chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
-  chrono::duration<double> elapsed;
-  string message;
-  string timing_name;
-  FILE *timing_file;
-  Logger *time_logger;
-  if (mpidata->rank == 0) {
-    timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
-    timing_file = fopen(timing_name.c_str(), "w");
-    time_logger = new Logger(LOG_DEBG, timing_file);
-  }
-  Logger *logger = new Logger(LOG_DEBG);
-  int device_count = 0;
-  //===========
-  // Initialise MAGMA
-  //===========
-#ifdef USE_MAGMA
-  const magma_int_t d_array_max_size = 32; // TEMPORARY: can become configurable parameter
-  magma_device_t *device_array = new magma_device_t[d_array_max_size];
-  magma_int_t num_devices;
-  magma_getdevices(device_array, d_array_max_size, &num_devices);
-  device_count = (int)num_devices;
-  delete[] device_array;
-  message = "DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " GPU ";
-  if (device_count > 1) message += "devices.\n";
-  else message += "device.\n";
-  logger->log(message, LOG_DEBG);
-  logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
-  magma_int_t magma_result = magma_init();
-  if (magma_result != MAGMA_SUCCESS) {
-    logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n");
-    logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n");
-    if (mpidata->rank == 0) {
-      fclose(timing_file);
-      delete time_logger;
-    }
-    delete logger;
-    return;
-  }
-#endif // end MAGMA initialisation
-  
-  //===========================
-  // the following only happens on MPI process 0
-  //===========================
-  if (mpidata->rank == 0) {
 #ifdef USE_NVTX
-    nvtxRangePush("Set up");
+	  nvtxRangePush("Output concatenation");
 #endif
-    //=======================
-    // Initialise sconf from configuration file
-    //=======================
-    logger->log("INFO: making legacy configuration...", LOG_INFO);
-    ScattererConfiguration *sconf = NULL;
-    try {
-      sconf = ScattererConfiguration::from_dedfb(config_file);
-    } catch(const OpenConfigurationFileException &ex) {
-      logger->err("\nERROR: failed to open scatterer configuration file.\n");
-      string message = "FILE: " + string(ex.what()) + "\n";
-      logger->err(message);
-      fclose(timing_file);
-      delete time_logger;
-      delete logger;
-      return;
-    }
-    sconf->write_formatted(output_path + "/c_OEDFB");
-    sconf->write_binary(output_path + "/c_TEDF");
-    sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
-    // end scatterer initialisation
-
-    //========================
-    // Initialise gconf from configuration files
-    //========================
-    GeometryConfiguration *gconf = NULL;
-    try {
-      gconf = GeometryConfiguration::from_legacy(data_file);
-    } catch (const OpenConfigurationFileException &ex) {
-      logger->err("\nERROR: failed to open geometry configuration file.\n");
-      string message = "FILE: " + string(ex.what()) + "\n";
-      logger->err(message);
-      if (sconf) delete sconf;
-      fclose(timing_file);
-      delete time_logger;
-      delete logger;
-      return;
-    }
-    logger->log(" done.\n", LOG_INFO);
-    //end gconf initialisation
+#pragma omp barrier
+	  // threads different from 0 append their virtual files to the one of thread 0, and delete them
+	  if (myompthread == 0) {
+	    for (int ti=1; ti<ompnumthreads; ti++) {
+	      if (p_outarray[ti] != NULL) {
+		p_outarray[0]->insert(*(p_outarray[ti]));
+		delete p_outarray[ti];
+		p_outarray[ti] = NULL;
+	      }
+	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
+	      delete vtppoanarray[ti];
+	    }
+	  }
+#pragma omp barrier
+	  //==============================================
+	  // Collect all virtual files on thread 0 of MPI process 0, and append them to disk
+	  //==============================================
+	  if (myompthread == 0) {
+	    // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
+	    // p_outarray[0]->append_to_disk(output_path + "/c_OINCLU");
+	    // delete p_outarray[0];
+	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
+	    delete vtppoanarray[0];
 
+#ifdef MPI_VERSION
+	    if (mpidata->mpirunning) {
+	      // only go through this if MPI has been actually used
+	      for (int rr=1; rr<mpidata->nprocs; rr++) {
+		// get the data from process rr by receiving it in total memory structure
+		p_outarray[0]->mpireceive(mpidata, rr);
+		// get the data from process rr, creating a new virtual ascii file
+		// VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr);
+		// append to disk and delete virtual ascii file
+		// p_output->append_to_disk(output_path + "/c_OINCLU");
+		// delete p_output;
+		
+		// get the data from process rr, creating a new virtual binary file
+		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
+		// append to disk and delete virtual binary file
+		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
+		delete vtppoanp;
+		int test = MPI_Barrier(MPI_COMM_WORLD);
+	      }
+	    }
+#endif
+	  }
+	  // end block writing to disk
 #ifdef USE_NVTX
-    nvtxRangePop();
+	  nvtxRangePop();
 #endif
-    int s_nsph = sconf->number_of_spheres;
-    int nsph = gconf->number_of_spheres;
-    // Sanity check on number of sphere consistency, should always be verified
-    if (s_nsph == nsph) {
-      // Shortcuts to variables stored in configuration objects
-      ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
-      double wp = sconf->wp;
-      // Open empty virtual ascii file for output
-      InclusionOutputInfo *p_output = new InclusionOutputInfo(sconf, gconf, mpidata);
-      InclusionIterationData *cid = new InclusionIterationData(gconf, sconf, mpidata, device_count);
-      const np_int ndi = cid->c1->nsph * cid->c1->nlim;
-      const np_int ndit = 2 * ndi;
-      logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n");
-      time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n");
-      
-      instr(sconf, cid->c1);
-      thdps(cid->c1->lm, cid->zpv);
-      double exdc = sconf->exdc;
-      double exri = sqrt(exdc);
+#pragma omp barrier
 
-      // Create an empty bynary file
-      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
-      string tppoan_name = output_path + "/c_TPPOAN";
-#ifdef USE_MAGMA
-      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
-#elif defined USE_LAPACK
-      logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
-#else
-      logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO);
-#endif
-      int iavm = gconf->iavm;
-      int isam = gconf->isam;
-      int inpol = gconf->in_pol;
-      int nxi = sconf->number_of_scales;
-      int nth = p_scattering_angles->nth;
-      int nths = p_scattering_angles->nths;
-      int nph = p_scattering_angles->nph;
-      int nphs = p_scattering_angles->nphs;
-      
-      //========================
-      // write a block of info to virtual binary file
-      //========================
-      vtppoanp->append_line(VirtualBinaryLine(iavm));
-      vtppoanp->append_line(VirtualBinaryLine(isam));
-      vtppoanp->append_line(VirtualBinaryLine(inpol));
-      vtppoanp->append_line(VirtualBinaryLine(nxi));
-      vtppoanp->append_line(VirtualBinaryLine(nth));
-      vtppoanp->append_line(VirtualBinaryLine(nph));
-      vtppoanp->append_line(VirtualBinaryLine(nths));
-      vtppoanp->append_line(VirtualBinaryLine(nphs));
-      if (sconf->idfc < 0) {
-	cid->vk = cid->xip * cid->wn;
-	p_output->vec_vk[0] = cid->vk;
-      }
-      
-      // do the first iteration on jxi488 separately, since it seems to be different from the others
-      int jxi488 = 1;
-      int initialmaxrefiters = cid->maxrefiters;
-      
-      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
-#ifdef USE_NVTX
-      nvtxRangePush("First iteration");
-#endif
-      // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
-      int jer = 0;
-#pragma omp parallel
-      {
-#pragma omp single
+	} // close strided loop running on MPI processes, ixi488 loop
+	// delete the shared arrays I used to make available to thread 0 the virtual files of other threads
+#pragma omp barrier
+	if (myompthread == 0) {
+	  delete[] p_outarray;
+	  delete[] vtppoanarray;
+	}
 	{
-	  jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
+	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
+	  logger->log(message);
 	}
-      }
 #ifdef USE_NVTX
-      nvtxRangePop();
+	nvtxRangePop();
 #endif
-      chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
-      elapsed = start_iter_1 - t_start;
-      string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
-      logger->log(message);
-      time_logger->log(message);
-      elapsed = end_iter_1 - start_iter_1;
-      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
-      logger->log(message);
-      time_logger->log(message);
-      /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
-      cid->refinemode = 0;
-      /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
-      // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
-      if (jer != 0) {
-	// First loop failed. Halt the calculation.
-	fclose(timing_file);
-	delete time_logger;
-	delete p_output;
-	delete p_scattering_angles;
-	delete cid;
-	delete logger;
-	delete sconf;
-	delete gconf;
-	return;
+	delete cid_2;
       }
+      delete p_scattering_angles;
+      p_output->write(output_path + "/c_OINCLU.hd5", "HDF5");
+      p_output->write(output_path + "/c_OINCLU", "LEGACY");
+      delete p_output;
+    } // closes s_nsph == nsph check
+	  
+    else { // Sphere number inconsistency error.
+      throw UnrecognizedConfigurationException(
+	"Inconsistent geometry and scatterer configurations."
+      );
+    }
 
-      //==================================================
-      // do the first outputs here, so that I open here the new files, afterwards I only append
-      //==================================================
-      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
-      delete vtppoanp;
-      
-      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
-#ifdef MPI_VERSION
-      if (mpidata->mpirunning) {
-	gconf->mpibcast(mpidata);
-	sconf->mpibcast(mpidata);	    
-	cid->mpibcast(mpidata);
-	p_scattering_angles->mpibcast(mpidata);
-      }	
-#endif
-      // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
-      int ompnumthreads = 1;
-      // this is for MPI process 0 (or even if we are not using MPI at all)
-      int myjxi488startoffset = 0;
-      int myMPIstride = ompnumthreads;
-      int myMPIblock = ompnumthreads;
-      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
-      InclusionOutputInfo **p_outarray = NULL;
-      VirtualBinaryFile **vtppoanarray = NULL;
-
-#ifdef USE_NVTX
-      nvtxRangePush("Parallel loop");
+    delete sconf;
+    delete gconf;
+#ifdef USE_MAGMA
+    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
+    magma_finalize();
 #endif
+    chrono::time_point<chrono::high_resolution_clock> t_end = chrono::high_resolution_clock::now();
+    elapsed = t_end - t_start;
+    string message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n";
+    logger->log(message);
+    logger->log("Finished: output written to " + output_path + "/c_OINCLU\n");
+    time_logger->log(message);
+    fclose(timing_file);
+    delete time_logger;
+  } // end instructions block of MPI process 0
+  
+    //===============================
+    // instruction block for MPI processes different from 0
+    //===============================
+#ifdef MPI_VERSION
+  else {
+    // here go the code for MPI processes other than 0
+    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
+    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
+    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
+    InclusionIterationData *cid = new InclusionIterationData(mpidata, device_count);
+    ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata);
 
-      //===========================================
-      // open the OpenMP parallel context, so each thread can initialise its stuff
-      //===========================================
+    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+    int ompnumthreads = 1;
+    InclusionOutputInfo **p_outarray = NULL;
+    VirtualBinaryFile **vtppoanarray = NULL;
+    int myjxi488startoffset;
+    int myMPIstride = ompnumthreads;
+    int myMPIblock = ompnumthreads;
+      
 #pragma omp parallel
-      {
-	// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
-	int myompthread = 0;
-
+    {
+      // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+      int myompthread = 0;
 #ifdef _OPENMP
-	// If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
-	myompthread = omp_get_thread_num();
-	if (myompthread == 0) ompnumthreads = omp_get_num_threads();
+      // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+      myompthread = omp_get_thread_num();
+      if (myompthread == 0) ompnumthreads = omp_get_num_threads();
 #endif
-
-	if (myompthread == 0) {
-	  // Initialise some shared variables only on thread 0
-	  p_outarray = new InclusionOutputInfo*[ompnumthreads];
-	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
-	  myMPIblock = ompnumthreads;
-	  myMPIstride = myMPIblock;
+      if (myompthread == 0) {
+	// receive the start parameter from MPI process 0
+	MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	// send my number of omp threads to process 0
+	MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD);
+	// receive myMPIstride sent by MPI process 0 to all processes
+	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
+	// allocate virtual files for each thread
+	p_outarray = new InclusionOutputInfo*[ompnumthreads];
+	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
+      }
+#pragma omp barrier
+      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+      InclusionIterationData *cid_2 = NULL;
+      InclusionOutputInfo *p_output_2 = NULL;
+      VirtualBinaryFile *vtppoanp_2 = NULL;
+      // PLACEHOLDER
+      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+      if (myompthread == 0) {
+	cid_2 = cid;
+      } else {
+	// this is not thread 0, so do create fresh copies of all local variables
+	cid_2 = new InclusionIterationData(*cid);
+      }
+      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+#pragma omp barrier
+      // ok, now I can actually start the parallel calculations
+      for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
+	// the parallel loop over MPI processes covers a different set of indices for each thread
+#pragma omp barrier
+	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
+	// each thread opens new virtual files and stores their pointers in the shared array
+	vtppoanp_2 = new VirtualBinaryFile();
+	// each thread puts a copy of the pointers to its virtual files in the shared arrays
+	vtppoanarray[myompthread] = vtppoanp_2;
+#pragma omp barrier
+	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+	// ok, now I can actually start the parallel calculations
+	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
+	if (myjxi488 <= cid_2->number_of_scales) {
+	  if (myompthread > 0) {
+	    // UPDATE: non-0 threads need to allocate memory for one scale at a time.
+	    p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
+	    p_outarray[myompthread] = p_output_2;
+	  } else {
+	    // Thread 0 of non-zero MPI processes needs to allocate memory for the
+	    // output of all threads.
+	    p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
+	    p_outarray[0] = p_output_2;
+	  }
+	  int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
+	} else {
+	  if (myompthread > 0) {
+	    // If there is no input for this thread, set the output pointer to NULL.
+	    p_outarray[myompthread] = NULL;
+	  }	  
 	}
 
-#ifdef MPI_VERSION
-	if (myompthread == 0) {
-	  if (mpidata->mpirunning) {
-	    // only go through this if MPI has been actually used
-	    for (int rr=1; rr<mpidata->nprocs; rr++) {
-	      // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far
-	      int remotejxi488startoffset = myMPIstride;
-	      MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD);
-	      int remoteMPIblock;
-	      MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	      // update myMPIstride to include the ones due to MPI process rr
-	      myMPIstride += remoteMPIblock;
-	    }
-	    // now I know the total myMPIstride, I can send it to all processes
-	    MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
-	  }
-	}
-#endif
-	// add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this
-#pragma omp barrier
-
-	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
-	InclusionIterationData *cid_2 = NULL;
-	InclusionOutputInfo *p_output_2 = NULL;
-	VirtualBinaryFile *vtppoanp_2 = NULL;
-	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
-	if (myompthread == 0) {
-	  cid_2 = cid;
-	  // OMP thread 0 of MPI process 0 holds the pointer to the full output structure
-	  p_output_2 = p_output;
-	  p_outarray[0] = p_output_2;
-	} else {
-	  // this is not thread 0, so do create fresh copies of all local variables
-	  cid_2 = new InclusionIterationData(*cid);
-	}
-	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
-	if (myompthread==0) {
-	  logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
-	}
-#pragma omp barrier
-	// ok, now I can actually start the parallel calculations
-	for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
-	  // the parallel loop over MPI processes covers a different set of indices for each thread
-#pragma omp barrier
-	  int myjxi488 = ixi488+myompthread;
-	  // each thread opens new virtual files and stores their pointers in the shared array
-	  vtppoanp_2 = new VirtualBinaryFile();
-	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
-	  vtppoanarray[myompthread] = vtppoanp_2;
-#pragma omp barrier
-
-	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
-	  if (myjxi488 <= cid_2->number_of_scales) {
-	    if (myompthread > 0) {
-	      // UPDATE: non-0 threads need to allocate memory for one scale at a time.
-	      p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
-	      p_outarray[myompthread] = p_output_2;
-	    }
-	    int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
-	  } else {
-	    if (myompthread > 0) {
-	      // If there is no input for this thread, set output pointer to NULL.
-	      p_outarray[myompthread] = NULL;
-	    }
-	  }
-#pragma omp barrier
-
-#ifdef USE_NVTX
-	  nvtxRangePush("Output concatenation");
-#endif
-#pragma omp barrier
-	  // threads different from 0 append their virtual files to the one of thread 0, and delete them
-	  if (myompthread == 0) {
-	    for (int ti=1; ti<ompnumthreads; ti++) {
-	      if (p_outarray[ti] != NULL) {
-		p_outarray[0]->insert(*(p_outarray[ti]));
-		delete p_outarray[ti];
-		p_outarray[ti] = NULL;
-	      }
-	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
-	      delete vtppoanarray[ti];
-	    }
-	  }
-#pragma omp barrier
-	  //==============================================
-	  // Collect all virtual files on thread 0 of MPI process 0, and append them to disk
-	  //==============================================
-	  if (myompthread == 0) {
-	    // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
-	    // p_outarray[0]->append_to_disk(output_path + "/c_OINCLU");
-	    // delete p_outarray[0];
-	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
-	    delete vtppoanarray[0];
-
-#ifdef MPI_VERSION
-	    if (mpidata->mpirunning) {
-	      // only go through this if MPI has been actually used
-	      for (int rr=1; rr<mpidata->nprocs; rr++) {
-		// get the data from process rr by receiving it in total memory structure
-		p_outarray[0]->mpireceive(mpidata, rr);
-		// get the data from process rr, creating a new virtual ascii file
-		// VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr);
-		// append to disk and delete virtual ascii file
-		// p_output->append_to_disk(output_path + "/c_OINCLU");
-		// delete p_output;
-		
-		// get the data from process rr, creating a new virtual binary file
-		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
-		// append to disk and delete virtual binary file
-		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
-		delete vtppoanp;
-		int test = MPI_Barrier(MPI_COMM_WORLD);
-	      }
-	    }
-#endif
-	  }
-	  // end block writing to disk
-#ifdef USE_NVTX
-	  nvtxRangePop();
-#endif
-#pragma omp barrier
-
-	} // close strided loop running on MPI processes, ixi488 loop
-	// delete the shared arrays I used to make available to thread 0 the virtual files of other threads
-#pragma omp barrier
-	if (myompthread == 0) {
-	  delete[] p_outarray;
-	  delete[] vtppoanarray;
-	}
-	{
-	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
-	  logger->log(message);
-	}
-#ifdef USE_NVTX
-	nvtxRangePop();
-#endif
-	delete cid_2;
-      }
-      delete p_scattering_angles;
-      p_output->write(output_path + "/c_OINCLU.hd5", "HDF5");
-      p_output->write(output_path + "/c_OINCLU", "LEGACY");
-      delete p_output;
-    } // closes s_nsph == nsph check
-	  
-    else { // Sphere number inconsistency error.
-      throw UnrecognizedConfigurationException(
-	"Inconsistent geometry and scatterer configurations."
-      );
-    }
-
-    delete sconf;
-    delete gconf;
-#ifdef USE_MAGMA
-    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
-    magma_finalize();
-#endif
-    chrono::time_point<chrono::high_resolution_clock> t_end = chrono::high_resolution_clock::now();
-    elapsed = t_end - t_start;
-    string message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n";
-    logger->log(message);
-    logger->log("Finished: output written to " + output_path + "/c_OINCLU\n");
-    time_logger->log(message);
-    fclose(timing_file);
-    delete time_logger;
-  } // end instructions block of MPI process 0
-  
-    //===============================
-    // instruction block for MPI processes different from 0
-    //===============================
-#ifdef MPI_VERSION
-  else {
-    // here go the code for MPI processes other than 0
-    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
-    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
-    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
-    InclusionIterationData *cid = new InclusionIterationData(mpidata, device_count);
-    ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata);
-
-    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
-    int ompnumthreads = 1;
-    InclusionOutputInfo **p_outarray = NULL;
-    VirtualBinaryFile **vtppoanarray = NULL;
-    int myjxi488startoffset;
-    int myMPIstride = ompnumthreads;
-    int myMPIblock = ompnumthreads;
-      
-#pragma omp parallel
-    {
-      // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
-      int myompthread = 0;
-#ifdef _OPENMP
-      // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
-      myompthread = omp_get_thread_num();
-      if (myompthread == 0) ompnumthreads = omp_get_num_threads();
-#endif
-      if (myompthread == 0) {
-	// receive the start parameter from MPI process 0
-	MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	// send my number of omp threads to process 0
-	MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD);
-	// receive myMPIstride sent by MPI process 0 to all processes
-	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
-	// allocate virtual files for each thread
-	p_outarray = new InclusionOutputInfo*[ompnumthreads];
-	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
-      }
-#pragma omp barrier
-      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
-      InclusionIterationData *cid_2 = NULL;
-      InclusionOutputInfo *p_output_2 = NULL;
-      VirtualBinaryFile *vtppoanp_2 = NULL;
-      // PLACEHOLDER
-      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
-      if (myompthread == 0) {
-	cid_2 = cid;
-      } else {
-	// this is not thread 0, so do create fresh copies of all local variables
-	cid_2 = new InclusionIterationData(*cid);
-      }
-      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
-#pragma omp barrier
-      // ok, now I can actually start the parallel calculations
-      for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
-	// the parallel loop over MPI processes covers a different set of indices for each thread
-#pragma omp barrier
-	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
-	// each thread opens new virtual files and stores their pointers in the shared array
-	vtppoanp_2 = new VirtualBinaryFile();
-	// each thread puts a copy of the pointers to its virtual files in the shared arrays
-	vtppoanarray[myompthread] = vtppoanp_2;
-#pragma omp barrier
-	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
-	// ok, now I can actually start the parallel calculations
-	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
-	if (myjxi488 <= cid_2->number_of_scales) {
-	  if (myompthread > 0) {
-	    // UPDATE: non-0 threads need to allocate memory for one scale at a time.
-	    p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
-	    p_outarray[myompthread] = p_output_2;
-	  } else {
-	    // Thread 0 of non-zero MPI processes needs to allocate memory for the
-	    // output of all threads.
-	    p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
-	    p_outarray[0] = p_output_2;
-	  }
-	  int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
-	} else {
-	  if (myompthread > 0) {
-	    // If there is no input for this thread, set the output pointer to NULL.
-	    p_outarray[myompthread] = NULL;
-	  }	  
-	}
-
-#pragma omp barrier
-	// threads different from 0 append their virtual files to the one of thread 0, and delete them
+#pragma omp barrier
+	// threads different from 0 append their virtual files to the one of thread 0, and delete them
 	if (myompthread == 0) {
 	  for (int ti=1; ti<ompnumthreads; ti++) {
 	    if (p_outarray[ti] != NULL) {
@@ -2086,9 +1384,551 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
   message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
   logger->log(message);
   
-  logger->log("INFO: finished scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n");
+  logger->log("INFO: finished scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n");
+
+  delete logger;
+
+  return jer;
+}
+
+// >>> IMPLEMENTATION OF InclusionIterationData CLASS <<< //
+InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) {
+  c1 = new ParticleDescriptorInclusion(gconf, sconf);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  for (int gi = 0; gi < 3; gi++) {
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  argi = new double[1]();
+  args = new double[1]();
+  duk = new double[3]();
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+  }
+  vec_zpv = new double[c1->lm * 12]();
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double**[12];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[4];
+      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
+      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
+    }
+  }
+  am_vector = new dcomplex[c1->ndm * c1->ndm]();
+  am = new dcomplex*[c1->ndm];
+  for (int ai = 0; ai < c1->ndm; ai++) {
+    am[ai] = (am_vector + ai * c1->ndm);
+  }
+  
+  arg = 0.0 + 0.0 * I;
+  // These are suspect initializations
+  scan = 0.0;
+  cfmp = 0.0;
+  sfmp = 0.0;
+  cfsp = 0.0;
+  sfsp = 0.0;
+  // End of suspect initializations
+  wn = sconf->wp / 3.0e8;
+  xip = sconf->xip;
+  sqsfi = 1.0;
+  vk = 0.0;
+  number_of_scales = sconf->number_of_scales;
+  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
+
+  nimd = c1->nshl[0] + 1;
+  c1->rc[0][nimd - 1] = c1->ros[0] * sconf->get_rcf(0, nimd - 1);
+  extr = c1->rc[0][nimd - 1];
+  const double pig = acos(0.0) * 2.0;
+  c1->gcs = pig * extr * extr;
+  
+#ifdef USE_MAGMA
+  proc_device = mpidata->rank % device_count;
+#else
+  proc_device = 0;
+#endif
+
+  // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
+  refinemode = 2;
+  // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
+  maxrefiters = 20;
+  accuracygoal = 1e-6;
+}
+
+InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs) {
+  c1 = new ParticleDescriptorInclusion(reinterpret_cast<ParticleDescriptorInclusion &>(*(rhs.c1)));
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi];
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  for (int ti = 0; ti < 3; ti++) {
+    tqev[ti] = rhs.tqev[ti];
+    tqsv[ti] = rhs.tqsv[ti];
+  }
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    for (int tj = 0; tj < c1->nsph; tj++) {
+      tqse[ti][tj] = rhs.tqse[ti][tj];
+      tqspe[ti][tj] = rhs.tqspe[ti][tj];
+      tqss[ti][tj] = rhs.tqss[ti][tj];
+      tqsps[ti][tj] = rhs.tqsps[ti][tj];
+    }
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+    for (int tj = 0; tj < 3; tj++) {
+      tqce[ti][tj] = rhs.tqce[ti][tj];
+      tqcpe[ti][tj] = rhs.tqcpe[ti][tj];
+      tqcs[ti][tj] = rhs.tqcs[ti][tj];
+      tqcps[ti][tj] = rhs.tqcps[ti][tj];
+    }
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  for (int gi = 0; gi < 3; gi++) {
+    gapv[gi] = rhs.gapv[gi];
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+    for (int gj = 0; gj < 2; gj++) {
+      gapp[gi][gj] = rhs.gapp[gi][gj];
+      gappm[gi][gj] = rhs.gappm[gi][gj];
+      gap[gi][gj] = rhs.gap[gi][gj];
+      gapm[gi][gj] = rhs.gapm[gi][gj];
+    }
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  duk = new double[3]();
+  for (int ui = 0; ui < 3; ui++) {
+    u[ui] = rhs.u[ui];
+    us[ui] = rhs.us[ui];
+    un[ui] = rhs.un[ui];
+    uns[ui] = rhs.uns[ui];
+    up[ui] = rhs.up[ui];
+    ups[ui] = rhs.ups[ui];
+    unmp[ui] = rhs.unmp[ui];
+    unsmp[ui] = rhs.unsmp[ui];
+    upmp[ui] = rhs.upmp[ui];
+    upsmp[ui] = rhs.upsmp[ui];
+    duk[ui] = rhs.duk[ui];
+  }
+  argi = new double[1]();
+  args = new double[1]();
+  argi[0] = rhs.argi[0];
+  args[0] = rhs.args[0];
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+    for (int cj = 0; cj < 4; cj++) {
+      cextlr[ci][cj] = rhs.cextlr[ci][cj];
+      cext[ci][cj] = rhs.cext[ci][cj];
+      cmullr[ci][cj] = rhs.cmullr[ci][cj];
+      cmul[ci][cj] = rhs.cmul[ci][cj];
+    }
+  }
+  vec_zpv = new double[c1->lm * 12];
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double **[12];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[4];
+      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
+      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
+      zpv[zi][zj][0][0] = rhs.zpv[zi][zj][0][0];
+      zpv[zi][zj][0][1] = rhs.zpv[zi][zj][0][1];
+      zpv[zi][zj][1][0] = rhs.zpv[zi][zj][1][0];
+      zpv[zi][zj][1][1] = rhs.zpv[zi][zj][1][1];
+    }
+  }
+  am_vector = new dcomplex[c1->ndm * c1->ndm];
+  for (int ai = 0; ai < c1->ndm * c1->ndm; ai++) am_vector[ai] = rhs.am_vector[ai];
+  am = new dcomplex*[c1->ndm];
+  for (int ai = 0; ai < c1->ndm; ai++) {
+    am[ai] = (am_vector + ai * c1->ndm);
+  }
+  
+  arg = rhs.arg;
+  // These are suspect initializations
+  scan = rhs.scan;
+  cfmp = rhs.cfmp;
+  sfmp = rhs.sfmp;
+  cfsp = rhs.cfsp;
+  sfsp = rhs.sfsp;
+  // End of suspect initializations
+  wn = rhs.wn;
+  xip = rhs.xip;
+  sqsfi = rhs.sqsfi;
+  vk = rhs.vk;
+  firstxi = rhs.firstxi;
+  lastxi = rhs.lastxi;
+  xiblock = rhs.xiblock;
+  number_of_scales = rhs.number_of_scales;
+
+  nimd = rhs.nimd;
+  extr = rhs.extr;
+  
+  proc_device = rhs.proc_device;
+  refinemode = rhs.refinemode;
+  maxrefiters = rhs.maxrefiters;
+  accuracygoal = rhs.accuracygoal;
+}
+
+#ifdef MPI_VERSION
+InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int device_count) {
+  c1 = new ParticleDescriptorInclusion(mpidata);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  gaps = new double[c1->nsph]();
+  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c1->nsph]();
+    tqspe[ti] = new dcomplex[c1->nsph]();
+    tqss[ti] = new double[c1->nsph]();
+    tqsps[ti] = new dcomplex[c1->nsph]();
+    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  duk = new double[3]();
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  argi = new double[1]();
+  args = new double[1]();
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  vec_zpv = new double[c1->lm * 12];
+  MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  zpv = new double***[c1->lm];
+  for (int zi = 0; zi < c1->lm; zi++) {
+    zpv[zi] = new double **[12];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[4];
+      zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4);
+      zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2;
+    }
+  }
+  am_vector = new dcomplex[c1->ndm * c1->ndm];
+  am = new dcomplex*[c1->ndm];
+  for (int ai = 0; ai < c1->ndm; ai++) {
+    am[ai] = (am_vector + ai * c1->ndm);
+    MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > number_of_scales) lastxi = number_of_scales;
+
+  MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  
+#ifdef USE_MAGMA
+  proc_device = mpidata->rank % device_count;
+#else
+  proc_device = 0;
+#endif
+  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
 
-  delete logger;
+void InclusionIterationData::mpibcast(const mixMPI *mpidata) {
+  c1->mpibcast(mpidata);
+  const int ndi = c1->nsph * c1->nlim;
+  const np_int ndit = 2 * ndi;
+  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ti = 0; ti < 2; ti++) {
+    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ci = 0; ci < 4; ci++) {
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time
+  for (int ai = 0; ai < c1->ndm; ai++) {
+    MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
 
-  return jer;
+InclusionIterationData::~InclusionIterationData() {
+  const int nsph = c1->nsph;
+  delete[] am_vector;
+  delete[] am;
+  for (int zi = 0; zi < c1->lm; zi++) {
+    for (int zj = 0; zj < 3; zj++) {
+      delete[] zpv[zi][zj];
+    }
+    delete[] zpv[zi];
+  }
+  delete[] zpv;
+  delete[] vec_zpv;
+  delete c1;
+  delete[] gaps;
+  for (int ti = 1; ti > -1; ti--) {
+    delete[] tqse[ti];
+    delete[] tqss[ti];
+    delete[] tqspe[ti];
+    delete[] tqsps[ti];
+    delete[] tqce[ti];
+    delete[] tqcpe[ti];
+    delete[] tqcs[ti];
+    delete[] tqcps[ti];
+  }
+  delete[] tqse;
+  delete[] tqss;
+  delete[] tqspe;
+  delete[] tqsps;
+  delete[] tqce;
+  delete[] tqcpe;
+  delete[] tqcs;
+  delete[] tqcps;
+  delete[] tqev;
+  delete[] tqsv;
+  for (int gi = 2; gi > -1; gi--) {
+    delete[] gapp[gi];
+    delete[] gappm[gi];
+    delete[] gap[gi];
+    delete[] gapm[gi];
+  }
+  delete[] gapp;
+  delete[] gappm;
+  delete[] gap;
+  delete[] gapm;
+  delete[] gapv;
+  delete[] u;
+  delete[] us;
+  delete[] un;
+  delete[] uns;
+  delete[] up;
+  delete[] ups;
+  delete[] unmp;
+  delete[] unsmp;
+  delete[] upmp;
+  delete[] upsmp;
+  delete[] argi;
+  delete[] args;
+  delete[] duk;
+  for (int ci = 3; ci > -1; ci--) {
+    delete[] cextlr[ci];
+    delete[] cext[ci];
+    delete[] cmullr[ci];
+    delete[] cmul[ci];
+  }
+  delete[] cextlr;
+  delete[] cext;
+  delete[] cmullr;
+  delete[] cmul;
 }
+// >>> END OF InclusionIterationData CLASS IMPLEMENTATION <<<
diff --git a/src/libnptm/Commons.cpp b/src/libnptm/Commons.cpp
index e3b7eea555653d1186c28044ec36f177c90a87f7..cfdec141721873bb3ecddaaa80614fefa6a87959 100644
--- a/src/libnptm/Commons.cpp
+++ b/src/libnptm/Commons.cpp
@@ -61,537 +61,6 @@ mixMPI::mixMPI(const mixMPI& rhs) {
 mixMPI::~mixMPI() {
 }
 
-ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) {
-  c1 = new ParticleDescriptorCluster(gconf, sconf);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  for (int gi = 0; gi < 3; gi++) {
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  argi = new double[1]();
-  args = new double[1]();
-  duk = new double[3]();
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
-  }
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double**[3];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[2];
-      for (int zk = 0; zk < 2; zk++) {
-	zpv[zi][zj][zk] = new double[2]();
-      }
-    }
-  }
-  am_vector = new dcomplex[ndit * ndit]();
-  am = new dcomplex*[ndit];
-  for (int ai = 0; ai < ndit; ai++) {
-    am[ai] = (am_vector + ai * ndit);
-  }
-  
-  arg = 0.0 + 0.0 * I;
-  // These are suspect initializations
-  scan = 0.0;
-  cfmp = 0.0;
-  sfmp = 0.0;
-  cfsp = 0.0;
-  sfsp = 0.0;
-  // End of suspect initializations
-  wn = sconf->wp / 3.0e8;
-  xip = sconf->xip;
-  sqsfi = 1.0;
-  vk = 0.0;
-  number_of_scales = sconf->number_of_scales;
-  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
-  lastxi = ((mpidata->rank+1) * xiblock)+1;
-  firstxi = lastxi-xiblock+1;
-  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
-
-#ifdef USE_MAGMA
-  proc_device = mpidata->rank % device_count;
-#else
-  proc_device = 0;
-#endif
-
-  // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
-  refinemode = 2;
-  // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
-  maxrefiters = 20;
-  accuracygoal = 1e-6;
-}
-
-ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
-  c1 = new ParticleDescriptorCluster(reinterpret_cast<ParticleDescriptorCluster &>(*(rhs.c1)));
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi];
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  for (int ti = 0; ti < 3; ti++) {
-    tqev[ti] = rhs.tqev[ti];
-    tqsv[ti] = rhs.tqsv[ti];
-  }
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    for (int tj = 0; tj < c1->nsph; tj++) {
-      tqse[ti][tj] = rhs.tqse[ti][tj];
-      tqspe[ti][tj] = rhs.tqspe[ti][tj];
-      tqss[ti][tj] = rhs.tqss[ti][tj];
-      tqsps[ti][tj] = rhs.tqsps[ti][tj];
-    }
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-    for (int tj = 0; tj < 3; tj++) {
-      tqce[ti][tj] = rhs.tqce[ti][tj];
-      tqcpe[ti][tj] = rhs.tqcpe[ti][tj];
-      tqcs[ti][tj] = rhs.tqcs[ti][tj];
-      tqcps[ti][tj] = rhs.tqcps[ti][tj];
-    }
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  for (int gi = 0; gi < 3; gi++) {
-    gapv[gi] = rhs.gapv[gi];
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-    for (int gj = 0; gj < 2; gj++) {
-      gapp[gi][gj] = rhs.gapp[gi][gj];
-      gappm[gi][gj] = rhs.gappm[gi][gj];
-      gap[gi][gj] = rhs.gap[gi][gj];
-      gapm[gi][gj] = rhs.gapm[gi][gj];
-    }
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  duk = new double[3]();
-  for (int ui = 0; ui < 3; ui++) {
-    u[ui] = rhs.u[ui];
-    us[ui] = rhs.us[ui];
-    un[ui] = rhs.un[ui];
-    uns[ui] = rhs.uns[ui];
-    up[ui] = rhs.up[ui];
-    ups[ui] = rhs.ups[ui];
-    unmp[ui] = rhs.unmp[ui];
-    unsmp[ui] = rhs.unsmp[ui];
-    upmp[ui] = rhs.upmp[ui];
-    upsmp[ui] = rhs.upsmp[ui];
-    duk[ui] = rhs.duk[ui];
-  }
-  argi = new double[1]();
-  args = new double[1]();
-  argi[0] = rhs.argi[0];
-  args[0] = rhs.args[0];
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
-    for (int cj = 0; cj < 4; cj++) {
-      cextlr[ci][cj] = rhs.cextlr[ci][cj];
-      cext[ci][cj] = rhs.cext[ci][cj];
-      cmullr[ci][cj] = rhs.cmullr[ci][cj];
-      cmul[ci][cj] = rhs.cmul[ci][cj];
-    }
-  }
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double**[3];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[2];
-      for (int zk = 0; zk < 2; zk++) {
-	zpv[zi][zj][zk] = new double[2]();
-	zpv[zi][zj][zk][0] = rhs.zpv[zi][zj][zk][0];
-	zpv[zi][zj][zk][1] = rhs.zpv[zi][zj][zk][1];
-      }
-    }
-  }
-  am_vector = new dcomplex[ndit * ndit]();
-  for (np_int ai = 0; ai < ndit * ndit; ai++) am_vector[ai] = rhs.am_vector[ai];
-  am = new dcomplex*[ndit];
-  for (np_int ai = 0; ai < ndit; ai++) {
-    am[ai] = (am_vector + ai * ndit);
-  }
-  
-  arg = rhs.arg;
-  // These are suspect initializations
-  scan = rhs.scan;
-  cfmp = rhs.cfmp;
-  sfmp = rhs.sfmp;
-  cfsp = rhs.cfsp;
-  sfsp = rhs.sfsp;
-  // End of suspect initializations
-  wn = rhs.wn;
-  xip = rhs.xip;
-  sqsfi = rhs.sqsfi;
-  vk = rhs.vk;
-  firstxi = rhs.firstxi;
-  lastxi = rhs.lastxi;
-  xiblock = rhs.xiblock;
-  number_of_scales = rhs.number_of_scales;
-
-  proc_device = rhs.proc_device;
-  refinemode = rhs.refinemode;
-  maxrefiters = rhs.maxrefiters;
-  accuracygoal = rhs.accuracygoal;
-}
-
-#ifdef MPI_VERSION
-ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) {
-  c1 = new ParticleDescriptorCluster(mpidata);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  gaps = new double[c1->nsph]();
-  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  tqev = new double[3]();
-  tqsv = new double[3]();
-  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  tqse = new double*[2];
-  tqspe = new dcomplex*[2];
-  tqss = new double*[2];
-  tqsps = new dcomplex*[2];
-  tqce = new double*[2];
-  tqcpe = new dcomplex*[2];
-  tqcs = new double*[2];
-  tqcps = new dcomplex*[2];
-  for (int ti = 0; ti < 2; ti++) {
-    tqse[ti] = new double[c1->nsph]();
-    tqspe[ti] = new dcomplex[c1->nsph]();
-    tqss[ti] = new double[c1->nsph]();
-    tqsps[ti] = new dcomplex[c1->nsph]();
-    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    tqce[ti] = new double[3]();
-    tqcpe[ti] = new dcomplex[3]();
-    tqcs[ti] = new double[3]();
-    tqcps[ti] = new dcomplex[3]();
-    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  gapv = new double[3]();
-  gapp = new dcomplex*[3];
-  gappm = new dcomplex*[3];
-  gap = new double*[3];
-  gapm = new double*[3];
-  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int gi = 0; gi < 3; gi++) {
-    gapp[gi] = new dcomplex[2]();
-    gappm[gi] = new dcomplex[2]();
-    gap[gi] = new double[2]();
-    gapm[gi] = new double[2]();
-    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  u = new double[3]();
-  us = new double[3]();
-  un = new double[3]();
-  uns = new double[3]();
-  up = new double[3]();
-  ups = new double[3]();
-  unmp = new double[3]();
-  unsmp = new double[3]();
-  upmp = new double[3]();
-  upsmp = new double[3]();
-  duk = new double[3]();
-  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  argi = new double[1]();
-  args = new double[1]();
-  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  cextlr = new double*[4];
-  cext = new double*[4];
-  cmullr = new double*[4];;
-  cmul = new double*[4];
-  for (int ci = 0; ci < 4; ci++) {
-    cextlr[ci] = new double[4]();
-    cext[ci] = new double[4]();
-    cmullr[ci] = new double[4]();
-    cmul[ci] = new double[4]();
-    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  zpv = new double***[c1->lm];
-  for (int zi = 0; zi < c1->lm; zi++) {
-    zpv[zi] = new double**[3];
-    for (int zj = 0; zj < 3; zj++) {
-      zpv[zi][zj] = new double*[2];
-      for (int zk = 0; zk < 2; zk++) {
-	zpv[zi][zj][zk] = new double[2]();
-	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-      }
-    }
-  }
-  am_vector = new dcomplex[ndit * ndit]();
-  am = new dcomplex*[ndit];
-  for (np_int ai = 0; ai < ndit; ai++) {
-    am[ai] = (am_vector + ai * ndit);
-    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  lastxi = ((mpidata->rank+1) * xiblock)+1;
-  firstxi = lastxi-xiblock+1;
-  if (lastxi > number_of_scales) lastxi = number_of_scales;
-
-#ifdef USE_MAGMA
-  proc_device = mpidata->rank % device_count;
-#else
-  proc_device = 0;
-#endif
-  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-}
-
-void ClusterIterationData::mpibcast(const mixMPI *mpidata) {
-  c1->mpibcast(mpidata);
-  const int ndi = c1->nsph * c1->nlim;
-  const np_int ndit = 2 * ndi;
-  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int ti = 0; ti < 2; ti++) {
-    MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int gi = 0; gi < 3; gi++) {
-    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  for (int ci = 0; ci < 4; ci++) {
-    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  }
-  for (int zi = 0; zi < c1->lm; zi++) {
-    for (int zj = 0; zj < 3; zj++) {
-      for (int zk = 0; zk < 2; zk++) {
-	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-      }
-    }
-  }
-  // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time
-  for (int ai = 0; ai < ndit; ai++) {
-    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  }
-  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
-  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
-}
-#endif
-
-ClusterIterationData::~ClusterIterationData() {
-  const int nsph = c1->nsph;
-  delete[] am_vector;
-  delete[] am;
-  for (int zi = c1->lm - 1; zi > -1; zi--) {
-    for (int zj = 2; zj > -1; zj--) {
-      delete[] zpv[zi][zj][1];
-      delete[] zpv[zi][zj][0];
-      delete[] zpv[zi][zj];
-    }
-    delete[] zpv[zi];
-  }
-  delete[] zpv;
-  delete c1;
-  delete[] gaps;
-  for (int ti = 1; ti > -1; ti--) {
-    delete[] tqse[ti];
-    delete[] tqss[ti];
-    delete[] tqspe[ti];
-    delete[] tqsps[ti];
-    delete[] tqce[ti];
-    delete[] tqcpe[ti];
-    delete[] tqcs[ti];
-    delete[] tqcps[ti];
-  }
-  delete[] tqse;
-  delete[] tqss;
-  delete[] tqspe;
-  delete[] tqsps;
-  delete[] tqce;
-  delete[] tqcpe;
-  delete[] tqcs;
-  delete[] tqcps;
-  delete[] tqev;
-  delete[] tqsv;
-  for (int gi = 2; gi > -1; gi--) {
-    delete[] gapp[gi];
-    delete[] gappm[gi];
-    delete[] gap[gi];
-    delete[] gapm[gi];
-  }
-  delete[] gapp;
-  delete[] gappm;
-  delete[] gap;
-  delete[] gapm;
-  delete[] gapv;
-  delete[] u;
-  delete[] us;
-  delete[] un;
-  delete[] uns;
-  delete[] up;
-  delete[] ups;
-  delete[] unmp;
-  delete[] unsmp;
-  delete[] upmp;
-  delete[] upsmp;
-  delete[] argi;
-  delete[] args;
-  delete[] duk;
-  for (int ci = 3; ci > -1; ci--) {
-    delete[] cextlr[ci];
-    delete[] cext[ci];
-    delete[] cmullr[ci];
-    delete[] cmul[ci];
-  }
-  delete[] cextlr;
-  delete[] cext;
-  delete[] cmullr;
-  delete[] cmul;
-}
-
 // >>> ParticleDescriptor class implementation. <<< //
 ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererConfiguration *sconf) {
   _class_type = BASE_TYPE;
diff --git a/src/libnptm/Configuration.cpp b/src/libnptm/Configuration.cpp
index 252e76434f3111b20e19c43e362386eba27bcf46..fe78967c8e6a18705b076d266cd29fe4686c8f69 100644
--- a/src/libnptm/Configuration.cpp
+++ b/src/libnptm/Configuration.cpp
@@ -222,13 +222,18 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
     throw ex;
   }
   int _nsph = 0, _lm = 0, _in_pol = 0, _npnt = 0, _npntts = 0, _isam = 0;
-  int _li = 0, _le = 0, _iavm = 0;
+  int _li = 0, _le = 0, _iavm = 0, num_params = 0;
   np_int _mxndm = 0;
   regex re = regex("-?[0-9]+");
+  str_target = file_lines[last_read_line];
+  while(regex_search(str_target, m, re)) {
+    str_target = m.suffix().str();
+    num_params++;
+  }
   str_target = file_lines[last_read_line++];
   regex_search(str_target, m, re);
   _nsph = stoi(m.str());
-  if (_nsph == 1) {
+  if (num_params == 6) {
     for (int ri = 0; ri < 5; ri++) {
       str_target = m.suffix().str();
       regex_search(str_target, m, re);
@@ -238,7 +243,7 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
       if (ri == 3) _npntts = stoi(m.str());
       if (ri == 4) _isam = stoi(m.str());
     }
-  } else {
+  } else if (num_params == 9) {
     for (int ri = 0; ri < 8; ri++) {
       str_target = m.suffix().str();
       regex_search(str_target, m, re);
@@ -251,6 +256,9 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
       if (ri == 6) _iavm = stoi(m.str());
       if (ri == 7) _isam = stoi(m.str());
     }
+  } else {
+    OpenConfigurationFileException ex("ERROR: " + file_name + " is not a recognized input file.");
+    throw ex;
   }
   double *x, *y, *z;
   x = new double[_nsph];
@@ -312,15 +320,14 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
   regex_search(str_target, m, re);
   fjwtm = stoi(m.str());
   GeometryConfiguration *conf = new GeometryConfiguration(
-							  _nsph, _lm, _in_pol, _npnt, _npntts, _isam,
-							  _li, _le, _mxndm, _iavm,
-							  x, y, z,
-							  in_th_start, in_th_step, in_th_end,
-							  sc_th_start, sc_th_step, sc_th_end,
-							  in_ph_start, in_ph_step, in_ph_end,
-							  sc_ph_start, sc_ph_step, sc_ph_end,
-							  fjwtm
-							  );
+    _nsph, _lm, _in_pol, _npnt, _npntts, _isam,
+    _li, _le, _mxndm, _iavm, x, y, z,
+    in_th_start, in_th_step, in_th_end,
+    sc_th_start, sc_th_step, sc_th_end,
+    in_ph_start, in_ph_step, in_ph_end,
+    sc_ph_start, sc_ph_step, sc_ph_end,
+    fjwtm
+  );
   delete[] file_lines;
   return conf;
 }
diff --git a/src/libnptm/outputs.cpp b/src/libnptm/outputs.cpp
index 4d6e61a2fb6273f9e665e038098d6f76ad0a868b..36abbc3b2a5aad669a10e872a74e68c576061c5d 100644
--- a/src/libnptm/outputs.cpp
+++ b/src/libnptm/outputs.cpp
@@ -288,9 +288,9 @@ ClusterOutputInfo::ClusterOutputInfo(
   vec_dir_mulclr = new double[16 * ndirs * xi_block_size]();
 }
 
-ClusterOutputInfo::ClusterOutputInfo(const std::string &hdf5_file_name) {
+ClusterOutputInfo::ClusterOutputInfo(const std::string &hdf5_name) {
   unsigned int flags = H5F_ACC_RDONLY;
-  HDFFile *hdf_file = new HDFFile(hdf5_file_name, flags);
+  HDFFile *hdf_file = new HDFFile(hdf5_name, flags);
   herr_t status = hdf_file->get_status();
   string str_name, str_type;
   if (status == 0) {
@@ -475,11 +475,11 @@ ClusterOutputInfo::ClusterOutputInfo(const std::string &hdf5_file_name) {
     status = hdf_file->read("VEC_FKC1", str_type, vec_fkc1);
     vec_fkc2 = new double[xi_block_size];
     status = hdf_file->read("VEC_FKC2", str_type, vec_fkc2);
+    // Initialize directions (they are scale-independent)
     vec_dir_tidg = new double[_num_theta];
     vec_dir_pidg = new double[_num_phi];
     vec_dir_tsdg = new double[_num_thetas];
     vec_dir_psdg = new double[_num_phis];
-    // Initialize directions (they are scale-independent)
     double cti = th, cpi = ph, cts = ths, cps = phs;
     for (int di = 0; di < _num_theta; di++) {
       vec_dir_tidg[di] = cti;
@@ -696,7 +696,7 @@ ClusterOutputInfo::ClusterOutputInfo(const std::string &hdf5_file_name) {
     delete hdf_file;
   } else {
     if (hdf_file != NULL) delete hdf_file;
-    UnrecognizedFormatException ex("Error: " + hdf5_file_name + " not recognized as a valid HDF5 file!");
+    UnrecognizedFormatException ex("Error: " + hdf5_name + " not recognized as a valid HDF5 file!");
     throw ex;
   }
 }
@@ -961,6 +961,7 @@ long ClusterOutputInfo::compute_size() {
 
 int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) {
   int result = 0;
+  result += (rhs.nsph == nsph) ? 0 : 1;
   result += (rhs.inpol == inpol) ? 0 : 1;
   result += (rhs.iavm == iavm) ? 0 : 1;
   result += (rhs.isam == isam) ? 0 : 1;
@@ -2355,9 +2356,10 @@ int ClusterOutputInfo::write_legacy(const std::string &output) {
 #ifdef MPI_VERSION
 int ClusterOutputInfo::mpireceive(const mixMPI *mpidata, int pid) {
   int result = 0;
-  int chk_inpol, chk_iavm, chk_isam, chk_num_theta, chk_num_thetas;
+  int chk_nsph, chk_inpol, chk_iavm, chk_isam, chk_num_theta, chk_num_thetas;
   int chk_num_phi, chk_num_phis, chk_ndirs, chk_idfc, chk_configs;
   double chk_exri;
+  MPI_Recv(&chk_nsph, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_inpol, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_iavm, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_isam, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
@@ -2369,6 +2371,7 @@ int ClusterOutputInfo::mpireceive(const mixMPI *mpidata, int pid) {
   MPI_Recv(&chk_exri, 1, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_idfc, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_configs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  result += (chk_nsph == nsph) ? 0 : 1;
   result += (chk_inpol == inpol) ? 0 : 1;
   result += (chk_iavm == iavm) ? 0 : 1;
   result += (chk_isam == isam) ? 0 : 1;
@@ -2559,6 +2562,7 @@ int ClusterOutputInfo::mpisend(const mixMPI *mpidata) {
   int result = 0;
   int chunk_size;
   // Send output metadata for configuration cross-check
+  MPI_Send(&nsph, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&inpol, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&iavm, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&isam, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
@@ -2932,9 +2936,9 @@ InclusionOutputInfo::InclusionOutputInfo(
   vec_dir_mulllr = new double[16 * ndirs * xi_block_size]();
 }
 
-InclusionOutputInfo::InclusionOutputInfo(const std::string &hdf5_file_name) {
+InclusionOutputInfo::InclusionOutputInfo(const std::string &hdf5_name) {
   unsigned int flags = H5F_ACC_RDONLY;
-  HDFFile *hdf_file = new HDFFile(hdf5_file_name, flags);
+  HDFFile *hdf_file = new HDFFile(hdf5_name, flags);
   herr_t status = hdf_file->get_status();
   string str_name, str_type;
   if (status == 0) {
@@ -3060,18 +3064,28 @@ InclusionOutputInfo::InclusionOutputInfo(const std::string &hdf5_file_name) {
     status = hdf_file->read("VEC_FSAS22", str_type, vec_fsas22);
     vec_fsas12 = new dcomplex[xi_block_size];
     status = hdf_file->read("VEC_FSAS12", str_type, vec_fsas12);
-    str_type = "FLOAT64_(" + to_string(_num_theta) + ")";
+    // Initialize directions (they are scale-independent)
     vec_dir_tidg = new double[_num_theta];
-    status = hdf_file->read("VEC_DIR_TIDG", str_type, vec_dir_tidg);
-    str_type = "FLOAT64_(" + to_string(_num_phi) + ")";
-    vec_dir_pidg = new double[_num_phi];
-    status = hdf_file->read("VEC_DIR_PIDG", str_type, vec_dir_pidg);
-    str_type = "FLOAT64_(" + to_string(_num_thetas) + ")";
     vec_dir_tsdg = new double[_num_thetas];
-    status = hdf_file->read("VEC_DIR_TSDG", str_type, vec_dir_tsdg);
-    str_type = "FLOAT64_(" + to_string(_num_phis) + ")";
+    vec_dir_pidg = new double[_num_phi];
     vec_dir_psdg = new double[_num_phis];
-    status = hdf_file->read("VEC_DIR_PSDG", str_type, vec_dir_psdg);
+    double cti = th, cpi = ph, cts = ths, cps = phs;
+    for (int di = 0; di < _num_theta; di++) {
+      vec_dir_tidg[di] = cti;
+      cti += thstp;
+    }
+    for (int di = 0; di < _num_thetas; di++) {
+      vec_dir_tsdg[di] = cts;
+      cts += thsstp;
+    }
+    for (int di = 0; di < _num_phi; di++) {
+      vec_dir_pidg[di] = cpi;
+      cpi += phstp;
+    }
+    for (int di = 0; di < _num_phis; di++) {
+      vec_dir_psdg[di] = cps;
+      cps += phsstp;
+    }
     str_type = "FLOAT64_(" + to_string(ndirs) + ")";
     vec_dir_scand = new double[ndirs];
     status = hdf_file->read("VEC_DIR_SCAND", str_type, vec_dir_scand);
@@ -3236,7 +3250,7 @@ InclusionOutputInfo::InclusionOutputInfo(const std::string &hdf5_file_name) {
     delete hdf_file;
   } else {
     if (hdf_file != NULL) delete hdf_file;
-    UnrecognizedFormatException ex("Error: " + hdf5_file_name + " not recognized as a valid HDF5 file!");
+    UnrecognizedFormatException ex("Error: " + hdf5_name + " not recognized as a valid HDF5 file!");
     throw ex;
   }
 }
@@ -3420,6 +3434,7 @@ long InclusionOutputInfo::compute_size(
 
 int InclusionOutputInfo::insert(const InclusionOutputInfo &rhs) {
   int result = 0;
+  result += (rhs.nsph == nsph) ? 0 : 1;
   result += (rhs.inpol == inpol) ? 0 : 1;
   result += (rhs.iavm == iavm) ? 0 : 1;
   result += (rhs.isam == isam) ? 0 : 1;
@@ -3776,22 +3791,6 @@ int InclusionOutputInfo::write_hdf5(const std::string &file_name) {
   rec_name_list->append("VEC_FSAS22");
   rec_type_list->append(str_type);
   rec_ptr_list->append(vec_fsas22);
-  str_type = "FLOAT64_(" + to_string(_num_theta) + ")";
-  rec_name_list->append("VEC_DIR_TIDG");
-  rec_type_list->append(str_type);
-  rec_ptr_list->append(vec_dir_tidg);
-  str_type = "FLOAT64_(" + to_string(_num_phi) + ")";
-  rec_name_list->append("VEC_DIR_PIDG");
-  rec_type_list->append(str_type);
-  rec_ptr_list->append(vec_dir_pidg);
-  str_type = "FLOAT64_(" + to_string(_num_thetas) + ")";
-  rec_name_list->append("VEC_DIR_TSDG");
-  rec_type_list->append(str_type);
-  rec_ptr_list->append(vec_dir_tsdg);
-  str_type = "FLOAT64_(" + to_string(_num_phis) + ")";
-  rec_name_list->append("VEC_DIR_PSDG");
-  rec_type_list->append(str_type);
-  rec_ptr_list->append(vec_dir_psdg);
   str_type = "FLOAT64_(" + to_string(ndirs) + ")";
   rec_name_list->append("VEC_DIR_SCAND");
   rec_type_list->append(str_type);
@@ -4481,9 +4480,10 @@ int InclusionOutputInfo::write_legacy(const std::string &output) {
 #ifdef MPI_VERSION
 int InclusionOutputInfo::mpireceive(const mixMPI* mpidata, int pid) {
   int result = 0;
-  int chk_inpol, chk_iavm, chk_isam, chk_num_theta, chk_num_thetas;
+  int chk_nsph, chk_inpol, chk_iavm, chk_isam, chk_num_theta, chk_num_thetas;
   int chk_num_phi, chk_num_phis, chk_ndirs, chk_idfc, chk_configs;
   double chk_exri;
+  MPI_Recv(&chk_nsph, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_inpol, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_iavm, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_isam, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
@@ -4495,6 +4495,7 @@ int InclusionOutputInfo::mpireceive(const mixMPI* mpidata, int pid) {
   MPI_Recv(&chk_exri, 1, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_idfc, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
   MPI_Recv(&chk_configs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  result += (chk_nsph == nsph) ? 0 : 1;
   result += (chk_inpol == inpol) ? 0 : 1;
   result += (chk_iavm == iavm) ? 0 : 1;
   result += (chk_isam == isam) ? 0 : 1;
@@ -4638,6 +4639,7 @@ int InclusionOutputInfo::mpisend(const mixMPI *mpidata) {
   int result = 0;
   int chunk_size;
   // Send output metadata for configuration cross-check
+  MPI_Send(&nsph, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&inpol, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&iavm, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
   MPI_Send(&isam, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
@@ -4775,3 +4777,1126 @@ int InclusionOutputInfo::mpisend(const mixMPI *mpidata) {
 }
 #endif // MPI_VERSION
 // >>> END OF InclusionOutputInfo CLASS IMPLEMENTATION <<<
+
+// >>> SphereOutputInfo CLASS IMPLEMENTATION <<<
+SphereOutputInfo::SphereOutputInfo(
+  ScattererConfiguration *sc, GeometryConfiguration *gc,
+  const mixMPI *mpidata, int first_xi, int xi_length
+) {
+  _first_xi = first_xi;
+  nsph = gc->number_of_spheres;
+  lm = gc->l_max;
+  inpol = gc->in_pol;
+  npnt = gc->npnt;
+  npntts = gc->npntts;
+  isam = gc->isam;
+  idfc = sc->idfc;
+  th = gc->in_theta_start;
+  thstp = gc->in_theta_step;
+  thlst = gc->in_theta_end;
+  _num_theta = (thstp == 0.0) ? 1 : 1 + (int)((thlst - th) / thstp);
+  ths = gc->sc_theta_start;
+  thsstp = gc->sc_theta_step;
+  thslst = gc->sc_theta_end;
+  _num_thetas = (thsstp == 0.0) ? 1 : 1 + (int)((thslst - ths) / thsstp);
+  ph = gc->in_phi_start;
+  phstp = gc->in_phi_step;
+  phlst = gc->in_phi_end;
+  _num_phi = (phstp == 0.0) ? 1 : 1 + (int)((phlst - ph) / phstp);
+  phs = gc->sc_phi_start;
+  phsstp = gc->sc_phi_step;
+  phslst = gc->sc_phi_end;
+  _num_phis = (phsstp == 0.0) ? 1 : 1 + (int)((phslst - phs) / phsstp);
+  ndirs = _num_theta * _num_thetas * _num_phi * _num_phis;
+  configurations = sc->configurations;
+  double exdc = sc->exdc;
+  exri = sqrt(exdc);
+  nxi = sc->number_of_scales;
+  xi_block_size = (xi_length == 0) ? nxi : xi_length;
+  jwtm = gc->jwtm;
+  lcalc = 0;
+  arg = 0.0 + I * 0.0;
+  vec_jxi = new int[xi_block_size]();
+  vec_ier = new short[xi_block_size]();
+  vec_vk = new double[xi_block_size]();
+  vec_xi = new double[xi_block_size]();
+  vec_sphere_sizes = new double[configurations * xi_block_size]();
+  vec_sphere_ref_indices = new dcomplex[configurations * xi_block_size]();
+  vec_scs = new double[configurations * xi_block_size]();
+  vec_abs = new double[configurations * xi_block_size]();
+  vec_exs = new double[configurations * xi_block_size]();
+  vec_albeds = new double[configurations * xi_block_size]();
+  vec_scsrt = new double[configurations * xi_block_size]();
+  vec_absrt = new double[configurations * xi_block_size]();
+  vec_exsrt = new double[configurations * xi_block_size]();
+  vec_fsas = new dcomplex[configurations * xi_block_size]();
+  vec_qschu = new double[configurations * xi_block_size]();
+  vec_pschu = new double[configurations * xi_block_size]();
+  vec_s0mag = new double[configurations * xi_block_size]();
+  vec_cosav = new double[configurations * xi_block_size]();
+  vec_raprs = new double[configurations * xi_block_size]();
+  vec_tqek1 = new double[configurations * xi_block_size]();
+  vec_tqek2 = new double[configurations * xi_block_size]();
+  vec_tqsk1 = new double[configurations * xi_block_size]();
+  vec_tqsk2 = new double[configurations * xi_block_size]();
+  if (nsph == 1) {
+    vec_fsat = NULL;
+    vec_qschut = NULL;
+    vec_pschut = NULL;
+    vec_s0magt = NULL;
+  } else {
+    vec_fsat = new dcomplex[xi_block_size]();
+    vec_qschut = new double[xi_block_size]();
+    vec_pschut = new double[xi_block_size]();
+    vec_s0magt = new double[xi_block_size]();
+  }
+  // Initialize directions (they are scale-independent)
+  vec_dir_tidg = new double[_num_theta];
+  vec_dir_pidg = new double[_num_phi];
+  vec_dir_tsdg = new double[_num_thetas];
+  vec_dir_psdg = new double[_num_phis];
+  double cti = th, cpi = ph, cts = ths, cps = phs;
+  for (int di = 0; di < _num_theta; di++) {
+    vec_dir_tidg[di] = cti;
+    cti += thstp;
+  }
+  for (int di = 0; di < _num_thetas; di++) {
+    vec_dir_tsdg[di] = cts;
+    cts += thsstp;
+  }
+  for (int di = 0; di < _num_phi; di++) {
+    vec_dir_pidg[di] = cpi;
+    cpi += phstp;
+  }
+  for (int di = 0; di < _num_phis; di++) {
+    vec_dir_psdg[di] = cps;
+    cps += phsstp;
+  }
+  vec_dir_scand = new double[ndirs]();
+  vec_dir_cfmp = new double[ndirs]();
+  vec_dir_cfsp = new double[ndirs]();
+  vec_dir_sfmp = new double[ndirs]();
+  vec_dir_sfsp = new double[ndirs]();
+  vec_dir_un = new double[3 * ndirs]();
+  vec_dir_uns = new double[3 * ndirs]();
+  vec_dir_sas11 = new dcomplex[nsph * ndirs * xi_block_size]();
+  vec_dir_sas21 = new dcomplex[nsph * ndirs * xi_block_size]();
+  vec_dir_sas12 = new dcomplex[nsph * ndirs * xi_block_size]();
+  vec_dir_sas22 = new dcomplex[nsph * ndirs * xi_block_size]();
+  vec_dir_fx = new double[nsph * _num_theta * _num_phi * xi_block_size]();
+  vec_dir_fy = new double[nsph * _num_theta * _num_phi * xi_block_size]();
+  vec_dir_fz = new double[nsph * _num_theta * _num_phi * xi_block_size]();
+  vec_dir_muls = new double[16 * nsph * ndirs * xi_block_size]();
+  vec_dir_mulslr = new double[16 * nsph * ndirs * xi_block_size]();
+}
+
+SphereOutputInfo::SphereOutputInfo(const std::string &hdf5_name) {
+  unsigned int flags = H5F_ACC_RDONLY;
+  HDFFile *hdf_file = new HDFFile(hdf5_name, flags);
+  herr_t status = hdf_file->get_status();
+  string str_name, str_type;
+  if (status == 0) {
+    status = hdf_file->read("NSPH", "INT32_(1)", &nsph);
+    status = hdf_file->read("LM", "INT32_(1)", &lm);
+    status = hdf_file->read("INPOL", "INT32_(1)", &inpol);
+    status = hdf_file->read("NPNT", "INT32_(1)", &npnt);
+    status = hdf_file->read("NPNTTS", "INT32_(1)", &npntts);
+    status = hdf_file->read("ISAM", "INT32_(1)", &isam);
+    status = hdf_file->read("JWTM", "INT32_(1)", &jwtm);
+    status = hdf_file->read("TH_START", "FLOAT64_(1)", &th);
+    status = hdf_file->read("TH_STEP", "FLOAT64_(1)", &thstp);
+    status = hdf_file->read("TH_END", "FLOAT64_(1)", &thlst);
+    _num_theta = (thstp == 0.0) ? 1 : 1 + int((thlst - th) / thstp);
+    status = hdf_file->read("THS_START", "FLOAT64_(1)", &ths);
+    status = hdf_file->read("THS_STEP", "FLOAT64_(1)", &thsstp);
+    status = hdf_file->read("THS_END", "FLOAT64_(1)", &thslst);
+    _num_thetas = (thsstp == 0.0) ? 1 : 1 + int((thslst - ths) / thsstp);
+    status = hdf_file->read("PH_START", "FLOAT64_(1)", &ph);
+    status = hdf_file->read("PH_STEP", "FLOAT64_(1)", &phstp);
+    status = hdf_file->read("PH_END", "FLOAT64_(1)", &phlst);
+    _num_phi = (phstp == 0.0) ? 1 : 1 + int((phlst - ph) / phstp);
+    status = hdf_file->read("PHS_START", "FLOAT64_(1)", &phs);
+    status = hdf_file->read("PHS_STEP", "FLOAT64_(1)", &phsstp);
+    status = hdf_file->read("PHS_END", "FLOAT64_(1)", &phslst);
+    _num_phis = (phsstp == 0.0) ? 1 : 1 + int((phslst - phs) / phsstp);
+    ndirs = _num_theta * _num_thetas * _num_phi * _num_phis;
+    status = hdf_file->read("EXRI", "FLOAT64_(1)", &exri);
+    status = hdf_file->read("NUM_CONF", "INT32_(1)", &configurations);
+    status = hdf_file->read("IDFC", "INT32_(1)", &idfc);
+    status = hdf_file->read("XI1", "INT32_(1)", &_first_xi);
+    status = hdf_file->read("NXI", "INT32_(1)", &xi_block_size);
+    nxi = (_first_xi == 1) ? xi_block_size : xi_block_size + _first_xi;
+    lcalc = 0;
+    arg = 0.0 + I * 0.0;
+    str_type = "INT32_(" + to_string(xi_block_size) + ")";
+    vec_jxi = new int[xi_block_size];
+    status = hdf_file->read("VEC_JXI", str_type, vec_jxi);
+    str_type = "INT16_(" + to_string(xi_block_size) + ")";
+    vec_ier = new short[xi_block_size];
+    status = hdf_file->read("VEC_IER", str_type, vec_ier);
+    str_type = "FLOAT64_(" + to_string(xi_block_size) + ")";
+    vec_vk = new double[xi_block_size];
+    status = hdf_file->read("VEC_VK", str_type, vec_vk);
+    vec_xi = new double[xi_block_size];
+    status = hdf_file->read("VEC_XI", str_type, vec_xi);
+    str_type = "FLOAT64_(" + to_string(configurations * xi_block_size) + ")";
+    vec_sphere_sizes = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_SPH_SIZES", str_type, vec_sphere_sizes);
+    str_type = "FLOAT64_(" + to_string(2 * configurations * xi_block_size) + ")";
+    vec_sphere_ref_indices = new dcomplex[configurations * xi_block_size];
+    status = hdf_file->read("VEC_SPH_REFRI", str_type, vec_sphere_ref_indices);
+    str_type = "FLOAT64_(" + to_string(configurations * xi_block_size) + ")";
+    vec_scs = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_SCS", str_type, vec_scs);
+    vec_abs = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_ABS", str_type, vec_abs);
+    vec_exs = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_EXS", str_type, vec_exs);
+    vec_albeds = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_ALBEDS", str_type, vec_albeds);
+    vec_scsrt = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_SCSRT", str_type, vec_scsrt);
+    vec_absrt = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_ABSRT", str_type, vec_absrt);
+    vec_exsrt = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_EXSRT", str_type, vec_exsrt);
+    str_type = "FLOAT64_(" + to_string(2 * configurations * xi_block_size) + ")";
+    vec_fsas = new dcomplex[configurations * xi_block_size];
+    status = hdf_file->read("VEC_FSAS", str_type, vec_fsas);
+    str_type = "FLOAT64_(" + to_string(configurations * xi_block_size) + ")";
+    vec_qschu = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_QSCHU", str_type, vec_qschu);
+    vec_pschu = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_PSCHU", str_type, vec_pschu);
+    vec_s0mag = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_S0MAG", str_type, vec_s0mag);
+    vec_cosav = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_COSAV", str_type, vec_cosav);
+    vec_raprs = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_RAPRS", str_type, vec_raprs);
+    vec_tqek1 = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_TQEK1", str_type, vec_tqek1);
+    vec_tqek2 = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_TQEK2", str_type, vec_tqek2);
+    vec_tqsk1 = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_TQSK1", str_type, vec_tqsk1);
+    vec_tqsk2 = new double[configurations * xi_block_size];
+    status = hdf_file->read("VEC_TQSK2", str_type, vec_tqsk2);
+    if (nsph != 1) {
+      str_type = "FLOAT64_(" + to_string(2 * xi_block_size) + ")";
+      vec_fsat = new dcomplex[xi_block_size];
+      status = hdf_file->read("VEC_FSAT", str_type, vec_fsat);
+      str_type = "FLOAT64_(" + to_string(xi_block_size) + ")";
+      vec_qschut = new double[xi_block_size];
+      status = hdf_file->read("VEC_QSCHUT", str_type, vec_qschut);
+      vec_pschut = new double[xi_block_size];
+      status = hdf_file->read("VEC_PSCHUT", str_type, vec_pschut);
+      vec_s0magt = new double[xi_block_size];
+      status = hdf_file->read("VEC_S0MAGT", str_type, vec_s0magt);
+    } else {
+      vec_fsat = NULL;
+      vec_qschut = NULL;
+      vec_pschut = NULL;
+      vec_s0magt = NULL;
+    }
+    // Initialize directions (they are scale-independent)
+    vec_dir_tidg = new double[_num_theta];
+    vec_dir_tsdg = new double[_num_thetas];
+    vec_dir_pidg = new double[_num_phi];
+    vec_dir_psdg = new double[_num_phis];
+    double cti = th, cpi = ph, cts = ths, cps = phs;
+    for (int di = 0; di < _num_theta; di++) {
+      vec_dir_tidg[di] = cti;
+      cti += thstp;
+    }
+    for (int di = 0; di < _num_thetas; di++) {
+      vec_dir_tsdg[di] = cts;
+      cts += thsstp;
+    }
+    for (int di = 0; di < _num_phi; di++) {
+      vec_dir_pidg[di] = cpi;
+      cpi += phstp;
+    }
+    for (int di = 0; di < _num_phis; di++) {
+      vec_dir_psdg[di] = cps;
+      cps += phsstp;
+    }
+    str_type = "FLOAT64_(" + to_string(ndirs) + ")";
+    vec_dir_scand = new double[ndirs];
+    status = hdf_file->read("VEC_DIR_SCAND", str_type, vec_dir_scand);
+    vec_dir_cfmp = new double[ndirs];
+    status = hdf_file->read("VEC_DIR_CFMP", str_type, vec_dir_cfmp);
+    vec_dir_sfmp = new double[ndirs];
+    status = hdf_file->read("VEC_DIR_SFMP", str_type, vec_dir_sfmp);
+    vec_dir_cfsp = new double[ndirs];
+    status = hdf_file->read("VEC_DIR_CFSP", str_type, vec_dir_cfsp);
+    vec_dir_sfsp = new double[ndirs];
+    status = hdf_file->read("VEC_DIR_SFSP", str_type, vec_dir_sfsp);
+    str_type = "FLOAT64_(" + to_string(3 * ndirs) + ")";
+    vec_dir_un = new double[3 * ndirs];
+    status = hdf_file->read("VEC_DIR_UN", str_type, vec_dir_un);
+    vec_dir_uns = new double[3 * ndirs];
+    status = hdf_file->read("VEC_DIR_UNS", str_type, vec_dir_uns);
+    str_type = "FLOAT64_(" + to_string(2 * nsph * ndirs * xi_block_size) + ")";
+    vec_dir_sas11 = new dcomplex[nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_SAS11", str_type, vec_dir_sas11);
+    vec_dir_sas21 = new dcomplex[nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_SAS21", str_type, vec_dir_sas21);
+    vec_dir_sas12 = new dcomplex[nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_SAS12", str_type, vec_dir_sas12);
+    vec_dir_sas22 = new dcomplex[nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_SAS22", str_type, vec_dir_sas22);
+    str_type = "FLOAT64_(" + to_string(nsph * _num_theta * _num_phi * xi_block_size) + ")";
+    vec_dir_fx = new double[nsph * _num_theta * _num_phi * xi_block_size];
+    status = hdf_file->read("VEC_DIR_FX", str_type, vec_dir_fx);
+    vec_dir_fy = new double[nsph * _num_theta * _num_phi * xi_block_size];
+    status = hdf_file->read("VEC_DIR_FY", str_type, vec_dir_fy);
+    vec_dir_fz = new double[nsph * _num_theta * _num_phi * xi_block_size];
+    status = hdf_file->read("VEC_DIR_FZ", str_type, vec_dir_fz);
+    str_type = "FLOAT64_(" + to_string(16 * nsph * ndirs * xi_block_size) + ")";
+    vec_dir_muls = new double[16 * nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_MULS", str_type, vec_dir_muls);
+    vec_dir_mulslr = new double[16 * nsph * ndirs * xi_block_size];
+    status = hdf_file->read("VEC_DIR_MULSLR", str_type, vec_dir_mulslr);
+    status = hdf_file->close();
+    delete hdf_file;
+  } else {
+    if (hdf_file != NULL) delete hdf_file;
+    UnrecognizedFormatException ex("Error: " + hdf5_name + " not recognized as a valid HDF5 file!");
+    throw ex;
+  }
+}
+
+SphereOutputInfo::~SphereOutputInfo() {
+  delete[] vec_jxi;
+  delete[] vec_ier;
+  delete[] vec_vk;
+  delete[] vec_xi;
+  delete[] vec_sphere_sizes;
+  delete[] vec_sphere_ref_indices;
+  delete[] vec_scs;
+  delete[] vec_abs;
+  delete[] vec_exs;
+  delete[] vec_albeds;
+  delete[] vec_scsrt;
+  delete[] vec_absrt;
+  delete[] vec_exsrt;
+  delete[] vec_fsas;
+  delete[] vec_qschu;
+  delete[] vec_pschu;
+  delete[] vec_s0mag;
+  delete[] vec_cosav;
+  delete[] vec_raprs;
+  delete[] vec_tqek1;
+  delete[] vec_tqek2;
+  delete[] vec_tqsk1;
+  delete[] vec_tqsk2;
+  if (nsph != 1) {
+    delete[] vec_fsat;
+    delete[] vec_qschut;
+    delete[] vec_pschut;
+    delete[] vec_s0magt;
+  }
+  delete[] vec_dir_tidg;
+  delete[] vec_dir_pidg;
+  delete[] vec_dir_tsdg;
+  delete[] vec_dir_psdg;
+  delete[] vec_dir_scand;
+  delete[] vec_dir_cfmp;
+  delete[] vec_dir_cfsp;
+  delete[] vec_dir_sfmp;
+  delete[] vec_dir_sfsp;
+  delete[] vec_dir_un;
+  delete[] vec_dir_uns;
+  delete[] vec_dir_sas11;
+  delete[] vec_dir_sas21;
+  delete[] vec_dir_sas12;
+  delete[] vec_dir_sas22;
+  delete[] vec_dir_fx;
+  delete[] vec_dir_fy;
+  delete[] vec_dir_fz;
+  delete[] vec_dir_muls;
+  delete[] vec_dir_mulslr;
+}
+
+long SphereOutputInfo::compute_size(
+  ScattererConfiguration *sc, GeometryConfiguration *gc,
+  int first_xi, int xi_length
+) {
+  // Size of the configuration set
+  long result = 18 * sizeof(int);
+  result += 12 * sizeof(double);
+  result += 47 * sizeof(long);
+  result += sizeof(dcomplex);
+  // Get configuration parameters
+  int _nsph = gc->number_of_spheres;
+  double _th = gc->in_theta_start;
+  double _thstp = gc->in_theta_step;
+  double _thlst = gc->in_theta_end;
+  int num_theta = (_thstp == 0.0) ? 1 : 1 + (int)((_thlst - _th) / _thstp);
+  double _ths = gc->sc_theta_start;
+  double _thsstp = gc->sc_theta_step;
+  double _thslst = gc->sc_theta_end;
+  int num_thetas = (_thsstp == 0.0) ? 1 : 1 + (int)((_thslst - _ths) / _thsstp);
+  double _ph = gc->in_phi_start;
+  double _phstp = gc->in_phi_step;
+  double _phlst = gc->in_phi_end;
+  int num_phi = (_phstp == 0.0) ? 1 : 1 + (int)((_phlst - _ph) / _phstp);
+  double _phs = gc->sc_phi_start;
+  double _phsstp = gc->sc_phi_step;
+  double _phslst = gc->sc_phi_end;
+  int num_phis = (_phsstp == 0.0) ? 1 : 1 + (int)((_phslst - _phs) / _phsstp);
+  int _ndirs = num_theta * num_thetas * num_phi * num_phis;
+  int _nxi = sc->number_of_scales;
+  int _xi_block_size = (xi_length == 0) ? _nxi : xi_length;
+  int _nconf = sc->configurations;
+  // Size of the data set
+  result += _xi_block_size * (sizeof(short) + sizeof(int));
+  result += 2 * _xi_block_size * sizeof(double);
+  result += 16 * _nconf * _xi_block_size * sizeof(double);
+  result += 2 * _nconf * _xi_block_size * sizeof(dcomplex);
+  result += (num_theta + num_thetas + num_phi + num_phis) * sizeof(double);
+  result += 11 * _ndirs * sizeof(double);
+  result += 4 * _nsph * _ndirs * _xi_block_size * sizeof(dcomplex);
+  result += 3 * _nsph * num_theta * num_phi * sizeof(double);
+  result += 32 * _nsph * _ndirs * _xi_block_size * sizeof(double);
+  if (_nsph != 1) {
+    result += _xi_block_size * sizeof(dcomplex);
+    result += 3 * _xi_block_size * sizeof(double);
+  }
+  return result;
+}
+
+long SphereOutputInfo::compute_size() {
+  // Size of the configuration set
+  long result = 18 * sizeof(int);
+  result += 12 * sizeof(double);
+  result += 47 * sizeof(long);
+  result += sizeof(dcomplex);
+  // Size of the data set
+  result += xi_block_size * (sizeof(short) + sizeof(int));
+  result += 2 * xi_block_size * sizeof(double);
+  result += 16 * configurations * xi_block_size * sizeof(double);
+  result += 2 * configurations * xi_block_size * sizeof(dcomplex);
+  result += (_num_theta + _num_thetas + _num_phi + _num_phis) * sizeof(double);
+  result += 11 * ndirs * sizeof(double);
+  result += 4 * nsph * ndirs * xi_block_size * sizeof(dcomplex);
+  result += 3 * nsph * _num_theta * _num_phi * sizeof(double);
+  result += 32 * nsph * ndirs * xi_block_size * sizeof(double);
+  if (nsph != 1) {
+    result += xi_block_size * sizeof(dcomplex);
+    result += 3 * xi_block_size * sizeof(double);
+  }
+  return result;
+}
+
+int SphereOutputInfo::insert(const SphereOutputInfo &rhs) {
+  int result = 0;
+  result += (rhs.nsph == nsph) ? 0 : 1;
+  result += (rhs.inpol == inpol) ? 0 : 1;
+  result += (rhs.isam == isam) ? 0 : 1;
+  result += (rhs._num_theta == _num_theta) ? 0 : 1;
+  result += (rhs._num_thetas == _num_thetas) ? 0 : 1;
+  result += (rhs._num_phi == _num_phi) ? 0 : 1;
+  result += (rhs._num_phis == _num_phis) ? 0 : 1;
+  result += (rhs.ndirs == ndirs) ? 0 : 1;
+  result += (rhs.exri == exri) ? 0 : 1;
+  result += (rhs.idfc == idfc) ? 0 : 1;
+  result += (rhs.configurations == configurations) ? 0 : 1;
+  if (result == 0) {
+    int offset, chunk_size, xi1;
+    xi1 = rhs._first_xi;
+    // Insert vectors whose sizes depend on wavelengths
+    offset = xi1 - _first_xi;
+    chunk_size = rhs.xi_block_size;
+    memcpy(vec_jxi + offset, rhs.vec_jxi, chunk_size * sizeof(int));
+    memcpy(vec_ier + offset, rhs.vec_ier, chunk_size * sizeof(short));
+    memcpy(vec_vk + offset, rhs.vec_vk, chunk_size * sizeof(double));
+    memcpy(vec_xi + offset, rhs.vec_xi, chunk_size * sizeof(double));
+    if (nsph != 1) {
+      memcpy(vec_fsat + offset, rhs.vec_fsat, chunk_size * sizeof(dcomplex));
+      memcpy(vec_qschut + offset, rhs.vec_qschut, chunk_size * sizeof(double));
+      memcpy(vec_pschut + offset, rhs.vec_pschut, chunk_size * sizeof(double));
+      memcpy(vec_s0magt + offset, rhs.vec_s0magt, chunk_size * sizeof(double));
+    }
+
+    // Insert vectors whose sizes depend on configurations and wavelengths
+    offset = (xi1 - _first_xi) * configurations;
+    chunk_size = rhs.xi_block_size * configurations;
+    memcpy(vec_sphere_sizes + offset, rhs.vec_sphere_sizes, chunk_size * sizeof(double));
+    memcpy(vec_sphere_ref_indices + offset, rhs.vec_sphere_ref_indices, chunk_size * sizeof(dcomplex));
+    memcpy(vec_scs + offset, rhs.vec_scs, chunk_size * sizeof(double));
+    memcpy(vec_abs + offset, rhs.vec_abs, chunk_size * sizeof(double));
+    memcpy(vec_exs + offset, rhs.vec_exs, chunk_size * sizeof(double));
+    memcpy(vec_albeds + offset, rhs.vec_albeds, chunk_size * sizeof(double));
+    memcpy(vec_scsrt + offset, rhs.vec_scsrt, chunk_size * sizeof(double));
+    memcpy(vec_absrt + offset, rhs.vec_absrt, chunk_size * sizeof(double));
+    memcpy(vec_exsrt + offset, rhs.vec_exsrt, chunk_size * sizeof(double));
+    memcpy(vec_fsas + offset, rhs.vec_fsas, chunk_size * sizeof(dcomplex));
+    memcpy(vec_qschu + offset, rhs.vec_qschu, chunk_size * sizeof(double));
+    memcpy(vec_pschu + offset, rhs.vec_pschu, chunk_size * sizeof(double));
+    memcpy(vec_s0mag + offset, rhs.vec_s0mag, chunk_size * sizeof(double));
+    memcpy(vec_cosav + offset, rhs.vec_cosav, chunk_size * sizeof(double));
+    memcpy(vec_raprs + offset, rhs.vec_raprs, chunk_size * sizeof(double));
+    memcpy(vec_tqek1 + offset, rhs.vec_tqek1, chunk_size * sizeof(double));
+    memcpy(vec_tqek2 + offset, rhs.vec_tqek2, chunk_size * sizeof(double));
+    memcpy(vec_tqsk1 + offset, rhs.vec_tqsk1, chunk_size * sizeof(double));
+    memcpy(vec_tqsk2 + offset, rhs.vec_tqsk2, chunk_size * sizeof(double));
+    
+    // Insert vectors whose sizes depend on NSPH, directions and wavelengths
+    offset = (xi1 - _first_xi) * nsph * ndirs;
+    chunk_size = rhs.xi_block_size * nsph * ndirs;
+    memcpy(vec_dir_sas11 + offset, rhs.vec_dir_sas11, chunk_size * sizeof(dcomplex));
+    memcpy(vec_dir_sas21 + offset, rhs.vec_dir_sas21, chunk_size * sizeof(dcomplex));
+    memcpy(vec_dir_sas12 + offset, rhs.vec_dir_sas12, chunk_size * sizeof(dcomplex));
+    memcpy(vec_dir_sas22 + offset, rhs.vec_dir_sas22, chunk_size * sizeof(dcomplex));
+    memcpy(vec_dir_muls + 16 * offset, rhs.vec_dir_muls, 16 * chunk_size * sizeof(double));
+    memcpy(vec_dir_mulslr + 16 * offset, rhs.vec_dir_mulslr, 16 * chunk_size * sizeof(double));
+
+    // Insert vectors whose sizes depend on NSPH, incidence directions and wavelengths
+    offset = (xi1 - _first_xi) * nsph * _num_theta * _num_phi;
+    chunk_size = rhs.xi_block_size  * nsph * _num_theta * _num_phi;
+    memcpy(vec_dir_fx + offset, rhs.vec_dir_fx, chunk_size * sizeof(double));
+    memcpy(vec_dir_fy + offset, rhs.vec_dir_fy, chunk_size * sizeof(double));
+    memcpy(vec_dir_fz + offset, rhs.vec_dir_fz, chunk_size * sizeof(double));
+    // TODO: fix the vector sizes in HDF5 writer and MPI communicators
+  }
+  return result;
+}
+
+int SphereOutputInfo::write(const std::string &output, const std::string &format) {
+  int result = 0;
+  if (format.compare("LEGACY") == 0) {
+    result = write_legacy(output);
+  } else if (format.compare("HDF5") == 0) {
+    result = write_hdf5(output);
+  } else {
+    string message = "Unknown format mode: \"" + format + "\"";
+    throw UnrecognizedConfigurationException(message);
+  }
+  return result;
+}
+
+int SphereOutputInfo::write_hdf5(const std::string &file_name) {
+  List<string> *rec_name_list = new List<string>(1);
+  List<string> *rec_type_list = new List<string>(1);
+  List<void *> *rec_ptr_list = new List<void *>(1);
+  string str_type, str_name;
+  rec_name_list->set(0, "NSPH");
+  rec_type_list->set(0, "INT32_(1)");
+  rec_ptr_list->set(0, &nsph);
+  rec_name_list->append("LM");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&lm);
+  rec_name_list->append("INPOL");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&inpol);
+  rec_name_list->append("NPNT");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&npnt);
+  rec_name_list->append("NPNTTS");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&npntts);
+  rec_name_list->append("ISAM");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&isam);
+  rec_name_list->append("JWTM");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&jwtm);
+  rec_name_list->append("TH_START");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&th);
+  rec_name_list->append("TH_STEP");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&thstp);
+  rec_name_list->append("TH_END");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&thlst);
+  rec_name_list->append("THS_START");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&ths);
+  rec_name_list->append("THS_STEP");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&thsstp);
+  rec_name_list->append("THS_END");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&thslst);
+  rec_name_list->append("PH_START");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&ph);
+  rec_name_list->append("PH_STEP");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&phstp);
+  rec_name_list->append("PH_END");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&phlst);
+  rec_name_list->append("PHS_START");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&phs);
+  rec_name_list->append("PHS_STEP");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&phsstp);
+  rec_name_list->append("PHS_END");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&phslst);
+  rec_name_list->append("EXRI");
+  rec_type_list->append("FLOAT64_(1)");
+  rec_ptr_list->append(&exri);
+  rec_name_list->append("IDFC");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&idfc);
+  rec_name_list->append("NUM_CONF");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&configurations);
+  rec_name_list->append("XI1");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&_first_xi);
+  rec_name_list->append("NXI");
+  rec_type_list->append("INT32_(1)");
+  rec_ptr_list->append(&xi_block_size);
+  rec_name_list->append("VEC_JXI");
+  rec_type_list->append("INT32_(" + to_string(xi_block_size) + ")");
+  rec_ptr_list->append(vec_jxi);
+  rec_name_list->append("VEC_IER");
+  rec_type_list->append("INT16_(" + to_string(xi_block_size) + ")");
+  rec_ptr_list->append(vec_ier);
+  rec_name_list->append("VEC_VK");
+  rec_type_list->append("FLOAT64_(" + to_string(xi_block_size) + ")");
+  rec_ptr_list->append(vec_vk);
+  rec_name_list->append("VEC_XI");
+  rec_type_list->append("FLOAT64_(" + to_string(xi_block_size) + ")");
+  rec_ptr_list->append(vec_xi);
+  rec_name_list->append("VEC_SPH_SIZES");
+  rec_type_list->append("FLOAT64_(" + to_string(configurations * xi_block_size) + ")");
+  rec_ptr_list->append(vec_sphere_sizes);
+  rec_name_list->append("VEC_SPH_REFRI");
+  rec_type_list->append("FLOAT64_(" + to_string(2 * configurations * xi_block_size) + ")");
+  rec_ptr_list->append(vec_sphere_ref_indices);
+  str_type = "FLOAT64_(" + to_string(configurations * xi_block_size) + ")";
+  rec_name_list->append("VEC_SCS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_scs);
+  rec_name_list->append("VEC_ABS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_abs);
+  rec_name_list->append("VEC_EXS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_exs);
+  rec_name_list->append("VEC_ALBEDS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_albeds);
+  rec_name_list->append("VEC_SCSRT");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_scsrt);
+  rec_name_list->append("VEC_ABSRT");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_absrt);
+  rec_name_list->append("VEC_EXSRT");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_exsrt);
+  str_type = "FLOAT64_(" + to_string(2 * configurations * xi_block_size) + ")";
+  rec_name_list->append("VEC_FSAS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_fsas);
+  str_type = "FLOAT64_(" + to_string(configurations * xi_block_size) + ")";
+  rec_name_list->append("VEC_QSCHU");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_qschu);
+  rec_name_list->append("VEC_PSCHU");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_pschu);
+  rec_name_list->append("VEC_S0MAG");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_s0mag);
+  rec_name_list->append("VEC_COSAV");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_cosav);
+  rec_name_list->append("VEC_RAPRS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_raprs);
+  rec_name_list->append("VEC_TQEK1");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_tqek1);
+  rec_name_list->append("VEC_TQEK2");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_tqek2);
+  rec_name_list->append("VEC_TQSK1");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_tqsk1);
+  rec_name_list->append("VEC_TQSK2");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_tqsk2);
+  if (nsph != 1) {
+    str_type = "FLOAT64_(" + to_string(2 * xi_block_size) + ")";
+    rec_name_list->append("VEC_FSAT");
+    rec_type_list->append(str_type);
+    rec_ptr_list->append(vec_fsat);
+    str_type = "FLOAT64_(" + to_string(xi_block_size) + ")";
+    rec_name_list->append("VEC_QSCHUT");
+    rec_type_list->append(str_type);
+    rec_ptr_list->append(vec_qschut);
+    rec_name_list->append("VEC_PSCHUT");
+    rec_type_list->append(str_type);
+    rec_ptr_list->append(vec_pschut);
+    rec_name_list->append("VEC_S0MAGT");
+    rec_type_list->append(str_type);
+    rec_ptr_list->append(vec_s0magt);
+  }
+  str_type = "FLOAT64_(" + to_string(ndirs) + ")";
+  rec_name_list->append("VEC_DIR_SCAND");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_scand);
+  rec_name_list->append("VEC_DIR_CFMP");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_cfmp);
+  rec_name_list->append("VEC_DIR_CFSP");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_cfsp);
+  rec_name_list->append("VEC_DIR_SFMP");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sfmp);
+  rec_name_list->append("VEC_DIR_SFSP");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sfsp);
+  str_type = "FLOAT64_(" + to_string(3 * ndirs) + ")";
+  rec_name_list->append("VEC_DIR_UN");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_un);
+  rec_name_list->append("VEC_DIR_UNS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_uns);
+  str_type = "FLOAT64_(" + to_string(2 * nsph * ndirs * xi_block_size) + ")";
+  rec_name_list->append("VEC_DIR_SAS11");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sas11);
+  rec_name_list->append("VEC_DIR_SAS21");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sas21);
+  rec_name_list->append("VEC_DIR_SAS12");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sas12);
+  rec_name_list->append("VEC_DIR_SAS22");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_sas22);
+  str_type = "FLOAT64_(" + to_string(nsph * _num_theta * _num_phi * xi_block_size) + ")";
+  rec_name_list->append("VEC_DIR_FX");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_fx);
+  rec_name_list->append("VEC_DIR_FY");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_fy);
+  rec_name_list->append("VEC_DIR_FZ");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_fz);
+  str_type = "FLOAT64_(" + to_string(16 * nsph * ndirs * xi_block_size) + ")";
+  rec_name_list->append("VEC_DIR_MULS");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_muls);
+  rec_name_list->append("VEC_DIR_MULSLR");
+  rec_type_list->append(str_type);
+  rec_ptr_list->append(vec_dir_mulslr);
+  
+  // Convert the lists to arrays and write them to HDF5
+  string *rec_names = rec_name_list->to_array();
+  string *rec_types = rec_type_list->to_array();
+  void **rec_pointers = rec_ptr_list->to_array();
+  const int rec_num = rec_name_list->length();
+  FileSchema *schema = new FileSchema(rec_num, rec_types, rec_names);
+  HDFFile *hdf_file = HDFFile::from_schema(*schema, file_name, H5F_ACC_TRUNC);
+  for (int ri = 0; ri < rec_num; ri++)
+    hdf_file->write(rec_names[ri], rec_types[ri], rec_pointers[ri]);
+  hdf_file->close();
+  
+  // Clean memory
+  delete rec_name_list;
+  delete rec_type_list;
+  delete rec_ptr_list;
+  delete[] rec_names;
+  delete[] rec_types;
+  delete[] rec_pointers;
+  delete schema;
+  delete hdf_file;
+  return 0;
+}
+
+int SphereOutputInfo::write_legacy(const std::string &file_name) {
+  const dcomplex cc0 = 0.0 + I * 0.0;
+  int result = 0;
+  int nks = _num_thetas * _num_phis;
+  FILE *p_outfile = fopen(file_name.c_str(), "w");
+  if (p_outfile != NULL) {
+    if (vec_jxi[0] == 1) {
+      fprintf(p_outfile, " READ(IR,*)NSPH,LM,INPOL,NPNT,NPNTTS,ISAM\n");
+      fprintf(
+        p_outfile, " %5d%5d%5d%5d%5d%5d\n",
+        nsph, lm, inpol, npnt, npntts, isam
+      );
+      fprintf(p_outfile, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n");
+      fprintf(
+        p_outfile, "  %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE\n",
+        th, thstp, thlst, ths, thsstp, thslst
+      );
+      fprintf(p_outfile, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n");
+      fprintf(
+        p_outfile, "  %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE\n",
+        ph, phstp, phlst, phs, phsstp, phslst
+      );
+      fprintf(p_outfile, " READ(IR,*)JWTM\n");
+      fprintf(p_outfile, " %5d\n", jwtm);
+      fprintf(p_outfile, "  READ(ITIN)NSPHT\n");
+      fprintf(p_outfile, "  READ(ITIN)(IOG(I),I=1,NSPH)\n");
+      fprintf(p_outfile, "  READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n");
+      fprintf(p_outfile, "  READ(ITIN)(XIV(I),I=1,NXI)\n");
+      fprintf(p_outfile, "  READ(ITIN)NSHL(I),ROS(I)\n");
+      fprintf(p_outfile, "  READ(ITIN)(RCF(I,NS),NS=1,NSH)\n \n");
+      fprintf(p_outfile, "  REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri);
+      if (inpol == 0) fprintf(p_outfile, "   LIN\n \n");
+      else fprintf(p_outfile, "  CIRC\n \n");
+      if (idfc < 0) {
+	fprintf(p_outfile, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n \n", vec_vk[0]);
+      }
+    } // End preamble writing
+    // Wavelength loop
+    for (int jxi = 0; jxi < xi_block_size; jxi++) {
+      int done_dirs = 0;
+      fprintf(p_outfile, "========== JXI =%3d ====================\n", jxi + 1);
+      if (idfc >= 0) {
+	fprintf(p_outfile, "  VK=%15.7lE, XI=%15.7lE\n", vec_xi[jxi], vec_vk[jxi]);
+      } else { // IDFC < 0
+	fprintf(p_outfile, "  XI=%15.7lE\n", vec_xi[jxi]);
+      }
+      if (vec_ier[jxi] == 1) {
+	fprintf(p_outfile, "  STOP IN DME\n");
+	fprintf(
+	  p_outfile, "  AT %1d LCALC=%3d TOO SMALL WITH ARG=%15.7lE+i(%15.7lE)\n",
+	  (int)vec_ier[jxi], lcalc, real(arg), imag(arg)
+	);
+      }
+      for (int ci = 0; ci < configurations; ci++) {
+	int cindex = jxi * configurations + ci;
+	fprintf(p_outfile, "     SPHERE %2d\n", ci + 1);
+	if (vec_sphere_ref_indices[cindex] == cc0) {
+	  fprintf(p_outfile, "  SIZE=%15.7lE\n", vec_sphere_sizes[cindex]);
+	} else {
+	  fprintf(
+		  p_outfile, "  SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n",
+		  vec_sphere_sizes[cindex], real(vec_sphere_ref_indices[cindex]),
+		  imag(vec_sphere_ref_indices[cindex])
+	  );
+	}
+	fprintf(p_outfile, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n");
+	fprintf(
+	  p_outfile, " %14.7lE%15.7lE%15.7lE%15.7lE\n",
+	  vec_scs[cindex], vec_abs[cindex], vec_exs[cindex], vec_albeds[cindex]
+        );
+	fprintf(p_outfile, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n");
+	fprintf(
+	  p_outfile, " %14.7lE%15.7lE%15.7lE\n",
+	  vec_scsrt[cindex], vec_absrt[cindex], vec_exsrt[cindex]
+        );
+	fprintf(
+          p_outfile, "  FSAS=%15.7lE%15.7lE\n",
+	  real(vec_fsas[cindex]), imag(vec_fsas[cindex])
+        );
+	fprintf(
+	  p_outfile, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
+	  vec_qschu[cindex], vec_pschu[cindex], vec_s0mag[cindex]
+        );
+	fprintf(
+	  p_outfile, "  COSAV=%15.7lE, RAPRS=%15.7lE\n",
+	  vec_cosav[cindex], vec_raprs[cindex]
+        );
+	fprintf(
+	  p_outfile, "  IPO= 1, TQEk=%15.7lE, TQSk=%15.7lE\n",
+	  vec_tqek1[cindex], vec_tqsk1[cindex]
+	);
+	fprintf(
+	  p_outfile, "  IPO= 2, TQEk=%15.7lE, TQSk=%15.7lE\n",
+	  vec_tqek2[cindex], vec_tqsk2[cindex]
+        );
+      } // ci configuration loop
+      if (nsph != 1) {
+	fprintf(
+	  p_outfile, "  FSAT=(%15.7lE,%15.7lE)\n",
+	  real(vec_fsat[jxi]), imag(vec_fsat[jxi])
+	);
+	fprintf(
+	  p_outfile, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
+	  vec_qschut[jxi], vec_pschut[jxi], vec_s0magt[jxi]
+	);
+      }
+      for (int jth = 0; jth < _num_theta; jth++) {
+	for (int jph = 0; jph < _num_phi; jph++) {
+	  for (int jths = 0; jths < _num_thetas; jths++) {
+	    for (int jphs = 0; jphs < _num_phis; jphs++) {
+	      int dir_index = ndirs * jxi + done_dirs;
+	      bool goto190 = (nks == 1) && ((jxi > 0) || (jth > 0) || (jph > 0));
+	      fprintf(
+		p_outfile, "********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n",
+		jth + 1, jph + 1, jths + 1, jphs + 1
+	      );
+	      fprintf(
+		p_outfile, "  TIDG=%10.3lE, PIDG=%10.3lE, TSDG=%10.3lE, PSDG=%10.3lE\n",
+		th + jth * thstp,
+		ph + jph * phstp,
+		ths + jths * thsstp,
+		phs + jphs * phsstp
+	      );
+	      fprintf(p_outfile, "  SCAND=%10.3lE\n", vec_dir_scand[done_dirs]);
+	      fprintf(
+		p_outfile, "  CFMP=%15.7lE, SFMP=%15.7lE\n",
+		vec_dir_cfmp[done_dirs], vec_dir_sfmp[done_dirs]
+	      );
+	      fprintf(
+		p_outfile, "  CFSP=%15.7lE, SFSP=%15.7lE\n",
+		vec_dir_cfsp[done_dirs], vec_dir_sfsp[done_dirs]
+	      );
+	      if (isam >= 0) {
+		fprintf(
+		  p_outfile, "  UNI=(%12.5lE,%12.5lE,%12.5lE)\n",
+		  vec_dir_un[3 * done_dirs],
+		  vec_dir_un[3 * done_dirs + 1],
+		  vec_dir_un[3 * done_dirs + 2]
+		);
+		fprintf(
+		  p_outfile, "  UNS=(%12.5lE,%12.5lE,%12.5lE)\n",
+		  vec_dir_uns[3 * done_dirs],
+		  vec_dir_uns[3 * done_dirs + 1],
+		  vec_dir_uns[3 * done_dirs + 2]
+		);
+	      } else {
+		fprintf(
+		  p_outfile, "  UN=(%12.5lE,%12.5lE,%12.5lE)\n",
+		  vec_dir_un[3 * done_dirs],
+		  vec_dir_un[3 * done_dirs + 1],
+		  vec_dir_un[3 * done_dirs + 2]
+		);
+	      }
+	      for (int i = 0; i < nsph; i++) {
+		int cindex = jxi * nsph * ndirs + nsph * done_dirs + i;
+		fprintf(p_outfile, "     SPHERE %2d\n", i + 1);
+		fprintf(
+		  p_outfile, "  SAS(1,1)=%15.7lE%15.7lE, SAS(2,1)=%15.7lE%15.7lE\n",
+		  real(vec_dir_sas11[cindex]),
+		  imag(vec_dir_sas11[cindex]),
+		  real(vec_dir_sas21[cindex]),
+		  imag(vec_dir_sas21[cindex])
+		);
+		fprintf(
+		  p_outfile, "  SAS(1,2)=%15.7lE%15.7lE, SAS(2,2)=%15.7lE%15.7lE\n",
+		  real(vec_dir_sas12[cindex]),
+		  imag(vec_dir_sas12[cindex]),
+		  real(vec_dir_sas22[cindex]),
+		  imag(vec_dir_sas22[cindex])
+		);
+		if (jths == 0 && jphs == 0) {
+		  fprintf(
+		    p_outfile, "  Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n",
+		    vec_dir_fx[jxi * nsph * _num_theta * _num_phi + jth * nsph * _num_phi + jph * nsph + i],
+		    vec_dir_fy[jxi * nsph * _num_theta * _num_phi + jth * nsph * _num_phi + jph * nsph + i],
+		    vec_dir_fz[jxi * nsph * _num_theta * _num_phi + jth * nsph * _num_phi + jph * nsph + i]
+		  );
+		}
+		fprintf(p_outfile, "  MULS\n");
+		for (int j = 0; j < 4; j++) {
+		  int muls_index = 16 * cindex + 4 * j;
+		  fprintf(
+		    p_outfile,  "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
+		    vec_dir_muls[muls_index],
+		    vec_dir_muls[muls_index + 1],
+		    vec_dir_muls[muls_index + 2],
+		    vec_dir_muls[muls_index + 3]
+		  );
+		} // j muls loop
+		fprintf(p_outfile, "  MULSLR\n");
+		for (int j = 0; j < 4; j++) {
+		  int muls_index = 16 * cindex + 4 * j;
+		  fprintf(
+		    p_outfile,  "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
+		    vec_dir_mulslr[muls_index],
+		    vec_dir_mulslr[muls_index + 1],
+		    vec_dir_mulslr[muls_index + 2],
+		    vec_dir_mulslr[muls_index + 3]
+		  );
+		} // j mulslr loop
+	      } // i sphere loop
+	      done_dirs++;
+	    } // jphs loop
+	  } // jths loop
+	} // jph loop
+      } // jth loop
+    } // jxi wavelength loop
+    fclose(p_outfile);
+  } else {
+    result = -1;
+  }
+  return result;
+}
+
+#ifdef MPI_VERSION
+int SphereOutputInfo::mpireceive(const mixMPI *mpidata, int pid) {
+  int result = 0;
+  int chk_nsph, chk_inpol, chk_isam, chk_num_theta, chk_num_thetas;
+  int chk_num_phi, chk_num_phis, chk_ndirs, chk_idfc, chk_configs;
+  double chk_exri;
+  MPI_Recv(&chk_nsph, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_inpol, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_isam, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_num_theta, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_num_thetas, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_num_phi, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_num_phis, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_ndirs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_exri, 1, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_idfc, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  MPI_Recv(&chk_configs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  result += (chk_nsph == nsph) ? 0 : 1;
+  result += (chk_inpol == inpol) ? 0 : 1;
+  result += (chk_isam == isam) ? 0 : 1;
+  result += (chk_num_theta == _num_theta) ? 0 : 1;
+  result += (chk_num_thetas == _num_thetas) ? 0 : 1;
+  result += (chk_num_phi == _num_phi) ? 0 : 1;
+  result += (chk_num_phis == _num_phis) ? 0 : 1;
+  result += (chk_ndirs == ndirs) ? 0 : 1;
+  result += (chk_exri == exri) ? 0 : 1;
+  result += (chk_idfc == idfc) ? 0 : 1;
+  result += (chk_configs == configurations) ? 0 : 1;
+  if (result == 0) {
+    int xi1, offset, chunk_size;
+    MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD);
+    MPI_Recv(&xi1, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    // Receive vectors of single values per scale
+    MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    offset = xi1 - _first_xi;
+    MPI_Recv(vec_jxi + offset, chunk_size, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_ier + offset, chunk_size, MPI_SHORT, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_vk + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_xi + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    if (nsph != 1) {
+      MPI_Recv(vec_fsat + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 
+      MPI_Recv(vec_qschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      MPI_Recv(vec_pschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      MPI_Recv(vec_s0magt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+
+    // Receive vectors whose sizes depend on configurations and wavelengths
+    MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    offset = (xi1 - _first_xi) * configurations;
+    MPI_Recv(vec_sphere_sizes + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_sphere_ref_indices + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_scs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_abs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_exs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_albeds + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_scsrt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_absrt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_exsrt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_fsas + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_qschu + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_pschu + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_s0mag + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_cosav + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_raprs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_tqek1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_tqek2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_tqsk1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_tqsk2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      
+    // Receive vectors whose sizes depend on NSPH, directions and wavelengths
+    MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    offset = (xi1 - _first_xi) * nsph * ndirs;
+    MPI_Recv(vec_dir_sas11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_sas21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_sas12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_sas22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_muls + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_mulslr + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+    // Receive vectors whose sizes depend on NSPH, incidence directions and wavelengths
+    MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    offset = (xi1 - _first_xi) * nsph * _num_theta * _num_phi;
+    MPI_Recv(vec_dir_fx + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_fy + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    MPI_Recv(vec_dir_fz + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  }  else {
+    MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD);
+  }
+  return result;
+}
+
+int SphereOutputInfo::mpisend(const mixMPI *mpidata) {
+  int result = 0;
+  int chunk_size;
+  // Send output metadata for configuration cross-check
+  MPI_Send(&nsph, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&inpol, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&isam, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&_num_theta, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&_num_thetas, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&_num_phi, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&_num_phis, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&ndirs, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&exri, 1, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&idfc, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  MPI_Send(&configurations, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+  // Wait for process 0 to cross-check the configuration
+  MPI_Recv(&result, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  if (result == 0) {
+    // Process 0 confirmed the consistency of configuration. Send the data.
+    // Send vectors of single values per scale
+    MPI_Send(&_first_xi, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(&xi_block_size, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_jxi, xi_block_size, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_ier, xi_block_size, MPI_SHORT, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_vk, xi_block_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_xi, xi_block_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    if (nsph != 1) {
+      MPI_Send(vec_fsat, xi_block_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+      MPI_Send(vec_qschut, xi_block_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+      MPI_Send(vec_pschut, xi_block_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+      MPI_Send(vec_s0magt, xi_block_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    }
+
+    // Send vectors whose sizes depend on configurations and scales
+    chunk_size = xi_block_size * configurations;
+    MPI_Send(&chunk_size, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_sphere_sizes, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_sphere_ref_indices, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_scs, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_abs, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_exs, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_albeds, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_scsrt, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_absrt, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_exsrt, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_fsas, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_qschu, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_pschu, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_s0mag, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_cosav, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_raprs, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_tqek1, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_tqek2, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_tqsk1, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_tqsk2, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+
+    // Send vectors whose sizes depend on NSPH, directions and wavelengths
+    chunk_size = xi_block_size * nsph * ndirs;
+    MPI_Send(&chunk_size, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_sas11, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_sas21, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_sas12, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_sas22, chunk_size, MPI_C_DOUBLE_COMPLEX, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_muls, 16 * chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_mulslr, 16 * chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+
+    // Send vectors whose sizes depend on NSPH, incidence directions and wavelengths
+    chunk_size = xi_block_size * nsph * _num_theta * _num_phi;
+    MPI_Send(vec_dir_fx, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_fy, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+    MPI_Send(vec_dir_fz, chunk_size, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD);
+  }
+  return result;
+}
+#endif // MPI_VERSION
+// >>> END OF SphereOutputInfo CLASS IMPLEMENTATION <<<
diff --git a/src/sphere/np_sphere.cpp b/src/sphere/np_sphere.cpp
index 8c369947763b15011ff243f00e57d39209064d78..e83f1edeedefdc422b30f89540fd1d0b8642ee93 100644
--- a/src/sphere/np_sphere.cpp
+++ b/src/sphere/np_sphere.cpp
@@ -34,6 +34,12 @@
 #include <cstdio>
 #include <string>
 
+#ifdef USE_MPI
+#ifndef MPI_VERSION
+#include <mpi.h>
+#endif
+#endif
+
 #ifndef INCLUDE_TYPES_H_
 #include "../include/types.h"
 #endif
@@ -42,9 +48,13 @@
 #include "../include/Configuration.h"
 #endif
 
+#ifndef INCLUDE_COMMONS_H_
+#include "../include/Commons.h"
+#endif
+
 using namespace std;
 
-extern void sphere(const string& config_file, const string& data_file, const string& output_path);
+extern void sphere(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata);
 
 /*! \brief Main program entry point.
  *
@@ -59,6 +69,15 @@ extern void sphere(const string& config_file, const string& data_file, const str
  * \return result: `int` An exit code passed to the OS (0 for succesful execution).
  */
 int main(int argc, char **argv) {
+  int ierr = 0;
+#ifdef MPI_VERSION
+  ierr = MPI_Init(&argc, &argv);
+  // create and initialise class with essential MPI data
+  mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD);
+#else
+  // create a the class with dummy data if we are not using MPI at all
+  mixMPI *mpidata = new mixMPI();
+#endif
   string config_file = "../../test_data/sphere/DEDFB";
   string data_file = "../../test_data/sphere/DSPH";
   string output_path = ".";
@@ -67,6 +86,10 @@ int main(int argc, char **argv) {
     data_file = string(argv[2]);
     output_path = string(argv[3]);
   }
-  sphere(config_file, data_file, output_path);
-  return 0;
+  sphere(config_file, data_file, output_path, mpidata);
+#ifdef MPI_VERSION
+  MPI_Finalize();
+#endif
+  delete mpidata;
+  return ierr;
 }
diff --git a/src/sphere/sphere.cpp b/src/sphere/sphere.cpp
index 9f8c661698188683e17ea41db885f12f1fd658b3..033e111cad90872665c1ccb4fb7e191f000bdd37 100644
--- a/src/sphere/sphere.cpp
+++ b/src/sphere/sphere.cpp
@@ -21,8 +21,19 @@
 #include <cstdio>
 #include <exception>
 #include <fstream>
+#include <hdf5.h>
 #include <string>
 
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#ifdef USE_MPI
+#ifndef MPI_VERSION
+#include <mpi.h>
+#endif
+#endif
+
 #ifndef INCLUDE_TYPES_H_
 #include "../include/types.h"
 #endif
@@ -51,642 +62,1160 @@
 #include "../include/TransitionMatrix.h"
 #endif
 
+#ifndef INCLUDE_LIST_H_
+#include "../include/List.h"
+#endif
+
+#ifndef INCLUDE_FILE_IO_H_
+#include "../include/file_io.h"
+#endif
+
+#ifndef INCLUDE_OUTPUTS_H_
+#include "../include/outputs.h"
+#endif
+
+#ifndef INCLUDE_ITERATION_DATA_H_
+#include "../include/IterationData.h"
+#endif
+
 using namespace std;
 
+/*! \brief Main calculation loop.
+ *
+ *  \param jxi488: `int` Wavelength loop index.
+ *  \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object.
+ *  \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object.
+ *  \param sa: `ScatteringAngles *` Pointer to a `ScatteringAngles` object.
+ *  \param sid: `SphereIterationData *` Pointer to a `SphereIterationData` object.
+ *  \param oi: `SphereOutputInfo *` Pointer to a `SphereOutputInfo` object.
+ *  \param output_path: `const string &` Path to the output directory.
+ *  \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object.
+ */
+int sphere_jxi488_cycle(
+  int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf,
+  ScatteringAngles *sa, SphereIterationData *sid, SphereOutputInfo *oi,
+  const string& output_path, VirtualBinaryFile *vtppoanp
+);
+
 /*! \brief C++ implementation of SPH
  *
  *  \param config_file: `string` Name of the configuration file.
  *  \param data_file: `string` Name of the input data file.
  *  \param output_path: `string` Directory to write the output files in.
+ *  \param mpidata: `const mixMPI *` Pointer to a mixMPI data structure.
  */
-void sphere(const string& config_file, const string& data_file, const string& output_path) {
+void sphere(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
   Logger *logger = new Logger(LOG_INFO);
-  dcomplex arg, s0, tfsas;
-  double th, ph;
-  logger->log("INFO: making legacy configuration...\n");
-  ScattererConfiguration *sconf = NULL;
-  try {
-    sconf = ScattererConfiguration::from_dedfb(config_file);
-  } catch(const OpenConfigurationFileException &ex) {
-    logger->err("\nERROR: failed to open scatterer configuration file.\n");
-    string message = ex.what();
-    logger->err("FILE: " + message + "\n");
-    delete logger;
-    exit(1);
-  }
-  sconf->write_formatted(output_path + "/c_OEDFB");
-  sconf->write_binary(output_path + "/c_TEDF");
-  sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
-  GeometryConfiguration *gconf = NULL;
-  try {
-    gconf = GeometryConfiguration::from_legacy(data_file);
-  } catch(const OpenConfigurationFileException &ex) {
-    logger->err("\nERROR: failed to open geometry configuration file.\n");
-    string message = ex.what();
-    logger->err("FILE: " + message + "\n");
-    if (sconf != NULL) delete sconf;
-    delete logger;
-    exit(1);
-  }
-  int s_nsph = sconf->number_of_spheres;
-  int nsph = gconf->number_of_spheres;
-  if (s_nsph == nsph) {
-    int isq, ibf;
-    double *argi, *args, *gaps;
-    double cost, sint, cosp, sinp;
-    double costs, sints, cosps, sinps;
-    double scan;
-    double *duk = new double[3];
-    double *u = new double[3];
-    double *us = new double[3];
-    double *un = new double[3];
-    double *uns = new double[3];
-    double *up = new double[3];
-    double *ups = new double[3];
-    double *upmp = new double[3];
-    double *upsmp = new double[3];
-    double *unmp = new double[3];
-    double *unsmp = new double[3];
-    double **cmul = new double*[4];
-    double **cmullr = new double*[4];
-    for (int i = 0; i < 4; i++) {
-      cmul[i] = new double[4];
-      cmullr[i] = new double[4];
-    }
-    dcomplex **tqspe, **tqsps;
-    double **tqse, **tqss;
-    tqse = new double*[2];
-    tqss = new double*[2];
-    tqspe = new dcomplex*[2];
-    tqsps = new dcomplex*[2];
-    for (int ti = 0; ti < 2; ti++) {
-      tqse[ti] = new double[2]();
-      tqss[ti] = new double[2]();
-      tqspe[ti] = new dcomplex[2]();
-      tqsps[ti] = new dcomplex[2]();
+  int device_count = 0;
+
+  //===========================
+  // the following only happens on MPI process 0
+  //===========================
+  if (mpidata->rank == 0) {
+    logger->log("INFO: making legacy configuration...");
+    ScattererConfiguration *sconf = NULL;
+    try {
+      sconf = ScattererConfiguration::from_dedfb(config_file);
+    } catch(const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open scatterer configuration file.\n");
+      string message = ex.what();
+      logger->err("FILE: " + message + "\n");
+      delete logger;
+      return;
     }
-    double frx = 0.0, fry = 0.0, frz = 0.0;
-    double cfmp, cfsp, sfmp, sfsp;
-    int jw;
-    int l_max = gconf->l_max;
-    ParticleDescriptor *c1 = new ParticleDescriptorSphere(gconf, sconf);
-    int npnt = gconf->npnt;
-    int npntts = gconf->npntts;
-    int in_pol = gconf->in_pol;
-    int meridional_type = gconf->iavm;
-    int jwtm = gconf->jwtm;
-    double in_theta_start = gconf->in_theta_start;
-    double in_theta_step = gconf->in_theta_step;
-    double in_theta_end = gconf->in_theta_end;
-    double sc_theta_start = gconf->sc_theta_start;
-    double sc_theta_step = gconf->sc_theta_step;
-    double sc_theta_end = gconf->sc_theta_end;
-    double in_phi_start = gconf->in_phi_start;
-    double in_phi_step = gconf->in_phi_step;
-    double in_phi_end = gconf->in_phi_end;
-    double sc_phi_start = gconf->sc_phi_start;
-    double sc_phi_step = gconf->sc_phi_step;
-    double sc_phi_end = gconf->sc_phi_end;
-    argi = new double[1];
-    args = new double[1];
-    gaps = new double[2];
-    FILE *output = fopen((output_path + "/c_OSPH").c_str(), "w");
-    fprintf(output, " READ(IR,*)NSPH,LM,INPOL,NPNT,NPNTTS,ISAM\n");
-    fprintf(
-	    output,
-	    " %5d%5d%5d%5d%5d%5d\n",
-	    nsph,
-	    l_max,
-	    in_pol,
-	    npnt,
-	    npntts,
-	    meridional_type
-	    );
-    fprintf(output, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n");
-    fprintf(
-	    output,
-	    "  %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE\n",
-	    in_theta_start,
-	    in_theta_step,
-	    in_theta_end,
-	    sc_theta_start,
-	    sc_theta_step,
-	    sc_theta_end
-	    );
-    fprintf(output, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n");
-    fprintf(
-	    output,
-	    "  %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE %9.3lE\n",
-	    in_phi_start,
-	    in_phi_step,
-	    in_phi_end,
-	    sc_phi_start,
-	    sc_phi_step,
-	    sc_phi_end
-	    );
-    fprintf(output, " READ(IR,*)JWTM\n");
-    fprintf(output, " %5d\n", jwtm);
-    fprintf(output, "  READ(ITIN)NSPHT\n");
-    fprintf(output, "  READ(ITIN)(IOG(I),I=1,NSPH)\n");
-    fprintf(output, "  READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n");
-    fprintf(output, "  READ(ITIN)(XIV(I),I=1,NXI)\n");
-    fprintf(output, "  READ(ITIN)NSHL(I),ROS(I)\n");
-    fprintf(output, "  READ(ITIN)(RCF(I,NS),NS=1,NSH)\n \n");
-    double sml = 1.0e-3;
-    int nth = 0, nph = 0;
-    if (in_theta_step != 0.0)
-      nth = int((in_theta_end - in_theta_start) / in_theta_step + sml);
-    nth += 1;
-    if (in_phi_step != 0.0)
-      nph = int((in_phi_end - in_phi_start) / in_phi_step + sml);
-    nph += 1;
-    int nths = 0, nphs = 0;
-    double thsca;
-    if (meridional_type > 1) { // ISAM > 1, fixed scattering angle
-      nths = 1;
-      thsca = sc_theta_start - in_theta_start;
-    } else { //ISAM <= 1
-      if (in_theta_step != 0.0)
-	nths = int((sc_theta_end - sc_theta_start) / sc_theta_step + sml);
-      nths += 1;
-      if (meridional_type == 1) { // ISAM = 1
-	nphs = 1;
-      } else { // ISAM < 1
-	if (sc_phi_step != 0.0)
-	  nphs = int((sc_phi_end - sc_phi_start) / sc_phi_step + sml);
-	nphs += 1;
-      }
+    sconf->write_formatted(output_path + "/c_OEDFB");
+    sconf->write_binary(output_path + "/c_TEDF");
+    sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
+    GeometryConfiguration *gconf = NULL;
+    try {
+      gconf = GeometryConfiguration::from_legacy(data_file);
+    } catch(const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open geometry configuration file.\n");
+      string message = ex.what();
+      logger->err("FILE: " + message + "\n");
+      if (sconf != NULL) delete sconf;
+      delete logger;
+      return;
     }
-    int nk = nth * nph;
-    int nks = nths * nphs;
-    int nkks = nk * nks;
-    double th1 = in_theta_start;
-    double ph1 = in_phi_start;
-    double ths1 = sc_theta_start;
-    double phs1 = sc_phi_start;
-    const double half_pi = acos(0.0);
-    const double pi = 2.0 * half_pi;
-    double gcs = 0.0;
-    for (int i116 = 0; i116 < nsph; i116++) {
-      int i = i116 + 1;
-      int iogi = c1->iog[i116];
-      if (iogi >= i) {
-	double gcss = pi * c1->ros[i116] * c1->ros[i116];
-	c1->gcsv[i116] = gcss;
-	int nsh = c1->nshl[i116];
-	for (int j115 = 0; j115 < nsh; j115++) {
-	  c1->rc[i116][j115] = sconf->get_rcf(i116, j115) * c1->ros[i116];
+    int s_nsph = sconf->number_of_spheres;
+    int nsph = gconf->number_of_spheres;
+    int configurations = sconf->configurations;
+    logger->log(" done.\n");
+    // Sanity check on number of sphere consistency, should always be verified
+    if (s_nsph == nsph) {
+      ScatteringAngles *p_sa = new ScatteringAngles(gconf);
+      SphereIterationData *sid = new SphereIterationData(gconf, sconf, mpidata, 0);
+      SphereOutputInfo *p_output = new SphereOutputInfo(sconf, gconf, mpidata);
+      // FILE *output = fopen((output_path + "/c_OSPH").c_str(), "w");
+      const double half_pi = acos(0.0);
+      const double pi = 2.0 * half_pi;
+      sid->c1->gcs = 0.0;
+      for (int i116 = 0; i116 < nsph; i116++) {
+	int i = i116 + 1;
+	int iogi = sid->c1->iog[i116];
+	if (iogi >= i) {
+	  double gcss = pi * sid->c1->ros[i116] * sid->c1->ros[i116];
+	  sid->c1->gcsv[i116] = gcss;
+	  int nsh = sid->c1->nshl[i116];
+	  for (int j115 = 0; j115 < nsh; j115++) {
+	    sid->c1->rc[i116][j115] = sconf->get_rcf(i116, j115) * sid->c1->ros[i116];
+	  }
 	}
+	sid->c1->gcs += sid->c1->gcsv[iogi - 1];
       }
-      gcs += c1->gcsv[iogi - 1];
-    }
-    double ****zpv = new double***[l_max]; //[l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2]
-    for (int zi = 0; zi < l_max; zi++) {
-      zpv[zi] = new double**[3];
-      for (int zj = 0; zj < 3; zj++) {
-	zpv[zi][zj] = new double*[2];
-	for (int zk = 0; zk < 2; zk++) {
-	  zpv[zi][zj][zk] = new double[2]();
-	}
+      thdps(gconf->l_max, sid->zpv);
+      double exdc = sconf->exdc;
+      double exri = sqrt(exdc);
+
+      // Create empty virtual binary file
+      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
+      string tppoan_name = output_path + "/c_TPPOAN";
+      int imode = 10, tmpvalue;
+
+      //========================
+      // write a block of info to virtual binary file
+      //========================
+      vtppoanp->append_line(VirtualBinaryLine(imode));
+      tmpvalue = gconf->isam;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      tmpvalue = gconf->in_pol;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      vtppoanp->append_line(VirtualBinaryLine(s_nsph));
+      tmpvalue = p_sa->nth;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      tmpvalue = p_sa->nph;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      tmpvalue = p_sa->nths;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      tmpvalue = p_sa->nphs;
+      vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
+      vtppoanp->append_line(VirtualBinaryLine(nsph));
+      for (int nsi = 0; nsi < nsph; nsi++) {
+	tmpvalue = sid->c1->iog[nsi];
+	vtppoanp->append_line(VirtualBinaryLine(tmpvalue));
       }
-    }
-    thdps(l_max, zpv);
-    double exdc = sconf->exdc;
-    double exri = sqrt(exdc);
-    fprintf(output, "  REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri);
-    fstream tppoan;
-    string tppoan_name = output_path + "/c_TPPOAN";
-    tppoan.open(tppoan_name.c_str(), ios::binary|ios::out);
-    if (tppoan.is_open()) {
-      int imode = 10;
-      tppoan.write(reinterpret_cast<char *>(&imode), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&(meridional_type)), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&(in_pol)), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&s_nsph), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nth), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nph), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nths), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nphs), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nsph), sizeof(int));
-
-      for (int nsi = 0; nsi < nsph; nsi++)
-	tppoan.write(reinterpret_cast<char *>(&(c1->iog[nsi])), sizeof(int));
-      if (in_pol == 0) fprintf(output, "   LIN\n");
-      else fprintf(output, "  CIRC\n");
-      fprintf(output, " \n");
-      double wp = sconf->wp;
-      double xip = sconf->xip;
-      double wn = wp / 3.0e8;
-      double sqsfi = 1.0;
-      double vk, vkarg;
-      int idfc = sconf->idfc;
-      int nxi = sconf->number_of_scales;
-      if (idfc < 0) {
-	vk = xip * wn;
-	fprintf(output, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", vk);
-	fprintf(output, " \n");
+
+      if (sconf->idfc < 0) {
+	sid->vk = sid->xip * sid->wn;
+	p_output->vec_vk[0] = sid->vk;
       }
-      for (int jxi488 = 0; jxi488 < nxi; jxi488++) {
-	int jxi = jxi488 + 1;
-	logger->log("INFO: running scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
-	fprintf(output, "========== JXI =%3d ====================\n", jxi);
-	double xi = sconf->get_scale(jxi488);
-	if (idfc >= 0) {
-	  vk = xi * wn;
-	  vkarg = vk;
-	  fprintf(output, "  VK=%15.7lE, XI=%15.7lE\n", xi, vk);
-	} else { // IDFC < 0
-	  vkarg = xi * vk;
-	  sqsfi = 1.0 / (xi * xi);
-	  fprintf(output, "  XI=%15.7lE\n", xi);
+
+      // Do the first wavelength iteration
+      int jxi488 = 1;
+      // Use pragmas to put OMP parallelism to second level.
+      int jer = 0;
+#pragma omp parallel
+      {
+#pragma omp single
+	{
+	  jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp);
+	} // OMP single
+      } // OMP parallel
+      if (jer != 0) { // First iteration failed. Halt the calculation.
+	delete p_output;
+	delete p_sa;
+	delete sid;
+	delete logger;
+	delete sconf;
+	delete gconf;
+	return;
+      }
+
+      //==================================================
+      // do the first outputs here, so that I open here the new files, afterwards I only append
+      //==================================================
+      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
+      delete vtppoanp;
+
+      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
+#ifdef MPI_VERSION
+      if (mpidata->mpirunning) {
+	gconf->mpibcast(mpidata);
+	sconf->mpibcast(mpidata);	    
+	sid->mpibcast(mpidata);
+	p_sa->mpibcast(mpidata);
+      }	
+#endif
+      // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+      int ompnumthreads = 1;
+      // this is for MPI process 0 (or even if we are not using MPI at all)
+      int myjxi488startoffset = 0;
+      int myMPIstride = ompnumthreads;
+      int myMPIblock = ompnumthreads;
+      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
+      SphereOutputInfo **p_outarray = NULL;
+      VirtualBinaryFile **vtppoanarray = NULL;
+
+      //===========================================
+      // open the OpenMP parallel context, so each thread can initialise its stuff
+      //===========================================
+#pragma omp parallel
+      {
+	// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+	int myompthread = 0;
+	
+#ifdef _OPENMP
+	// If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+	myompthread = omp_get_thread_num();
+	if (myompthread == 0) ompnumthreads = omp_get_num_threads();
+#endif
+
+	if (myompthread == 0) {
+	  // Initialise some shared variables only on thread 0
+	  p_outarray = new SphereOutputInfo*[ompnumthreads];
+	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
+	  myMPIblock = ompnumthreads;
+	  myMPIstride = myMPIblock;
+	}
+
+#ifdef MPI_VERSION
+	if (myompthread == 0) {
+	  if (mpidata->mpirunning) {
+	    // only go through this if MPI has been actually used
+	    for (int rr=1; rr<mpidata->nprocs; rr++) {
+	      // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far
+	      int remotejxi488startoffset = myMPIstride;
+	      MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD);
+	      int remoteMPIblock;
+	      MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      // update myMPIstride to include the ones due to MPI process rr
+	      myMPIstride += remoteMPIblock;
+	    }
+	    // now I know the total myMPIstride, I can send it to all processes
+	    MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
+	  }
+	}
+#endif
+	// add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this
+#pragma omp barrier
+
+	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+	SphereIterationData *sid_2 = NULL;
+	SphereOutputInfo *p_output_2 = NULL;
+	VirtualBinaryFile *vtppoanp_2 = NULL;
+	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+	if (myompthread == 0) {
+	  sid_2 = sid;
+	  // OMP thread 0 of MPI process 0 holds the pointer to the full output structure
+	  p_output_2 = p_output;
+	  p_outarray[0] = p_output_2;
+	} else {
+	  // this is not thread 0, so do create fresh copies of all local variables
+	  sid_2 = new SphereIterationData(*sid);
+	}
+	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+	if (myompthread==0) {
+	  logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+	  // Thread 0 of process 0 has already allocated all necessary output memory
 	}
-	tppoan.write(reinterpret_cast<char *>(&vk), sizeof(double));
-	for (int i132 = 0; i132 < nsph; i132++) {
-	  int i = i132 + 1;
-	  int iogi = c1->iog[i132];
-	  if (iogi != i) {
-	    for (int l123 = 0; l123 < l_max; l123++) {
-	      c1->rmi[l123][i132] = c1->rmi[l123][iogi - 1];
-	      c1->rei[l123][i132] = c1->rei[l123][iogi - 1];
+#pragma omp barrier
+	// ok, now I can actually start the parallel calculations
+	for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
+	  // the parallel loop over MPI processes covers a different set of indices for each thread
+#pragma omp barrier
+	  int myjxi488 = ixi488+myompthread;
+	  // each thread opens new virtual files and stores their pointers in the shared array
+	  vtppoanp_2 = new VirtualBinaryFile();
+	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
+	  vtppoanarray[myompthread] = vtppoanp_2;
+#pragma omp barrier
+
+	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
+	  if (myjxi488 <= sid_2->number_of_scales) {
+	    if (myompthread > 0) {
+	      // UPDATE: non-0 threads need to allocate memory for one scale at a time.
+	      p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
+	      p_outarray[myompthread] = p_output_2;
+	    }
+	    int jer = sphere_jxi488_cycle(myjxi488 - 1, sconf, gconf, p_sa, sid_2, p_output_2, output_path, vtppoanp_2);
+	  } else {
+	    if (myompthread > 0) {
+	      // If there is no input for this thread, set output pointer to NULL.
+	      p_outarray[myompthread] = NULL;
 	    }
-	    continue; // i132
 	  }
-	  // label 125
-	  int nsh = c1->nshl[i132];
-	  int ici = (nsh + 1) / 2;
-	  if (idfc == 0) {
-	    for (int ic = 0; ic < ici; ic++)
-	      c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested!
-	  } else { // IDFC != 0
-	    if (jxi == 1) {
-	      for (int ic = 0; ic < ici; ic++) {
-		c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488);
+#pragma omp barrier
+	  // threads different from 0 append their virtual files to the one of thread 0, and delete them
+	  if (myompthread == 0) {
+	    for (int ti=1; ti<ompnumthreads; ti++) {
+	      if (p_outarray[ti] != NULL) {
+		p_outarray[0]->insert(*(p_outarray[ti]));
+		delete p_outarray[ti];
+		p_outarray[ti] = NULL;
 	      }
+	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
+	      delete vtppoanarray[ti];
 	    }
 	  }
-	  if (nsh % 2 == 0) c1->dc0[ici] = exdc;
-	  int jer = 0;
-	  int lcalc = 0;
-	  dme(l_max, i, npnt, npntts, vkarg, exdc, exri, c1, jer, lcalc, arg);
-	  if (jer != 0) {
-	    fprintf(output, "  STOP IN DME\n");
-	    fprintf(output, "  AT %1d LCALC=%3d TOO SMALL WITH ARG=%15.7lE+i(%15.7lE)\n", jer, lcalc, real(arg), imag(arg));
-	    tppoan.close();
-	    fclose(output);
-	    delete sconf;
-	    delete gconf;
-	    delete c1;
-	    for (int zi = l_max - 1; zi > -1; zi--) {
-	      for (int zj = 0; zj < 3; zj++) {
-		for (int zk = 0; zk < 2; zk++) {
-		  delete[] zpv[zi][zj][zk];
-		}
-		delete[] zpv[zi][zj];
+#pragma omp barrier
+	  //==============================================
+	  // Collect all virtual files on thread 0 of MPI process 0, and append them to disk
+	  //==============================================
+	  if (myompthread == 0) {
+	    // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
+	    // p_outarray[0]->append_to_disk(output_path + "/c_OCLU");
+	    // delete p_outarray[0];
+	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
+	    delete vtppoanarray[0];
+
+#ifdef MPI_VERSION
+	    if (mpidata->mpirunning) {
+	      // only go through this if MPI has been actually used
+	      for (int rr=1; rr<mpidata->nprocs; rr++) {
+		// get the data from process rr by receiving it in total memory structure
+		p_outarray[0]->mpireceive(mpidata, rr);
+		// get the data from process rr, creating a new virtual ascii file
+		// VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr);
+		// append to disk and delete virtual ascii file
+		// p_output->append_to_disk(output_path + "/c_OCLU");
+		// delete p_output;
+		
+		// get the data from process rr, creating a new virtual binary file
+		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
+		// append to disk and delete virtual binary file
+		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
+		delete vtppoanp;
+		int test = MPI_Barrier(MPI_COMM_WORLD);
 	      }
-	      delete[] zpv[zi];
 	    }
-	    delete[] zpv;
-	    delete[] duk;
-	    delete[] u;
-	    delete[] us;
-	    delete[] un;
-	    delete[] uns;
-	    delete[] up;
-	    delete[] ups;
-	    delete[] upmp;
-	    delete[] upsmp;
-	    delete[] unmp;
-	    delete[] unsmp;
-	    delete[] argi;
-	    delete[] args;
-	    delete[] gaps;
-	    for (int i = 3; i > -1; i--) {
-	      delete[] cmul[i];
-	      delete[] cmullr[i];
+#endif
+	  }
+	  // end block writing to disk
+#pragma omp barrier
+
+	} // ixi488 strided MPI loop
+#pragma omp barrier
+	if (myompthread == 0) {
+	  delete[] p_outarray;
+	  delete[] vtppoanarray;
+	}
+	{
+	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
+	  logger->log(message);
+	}
+	delete sid_2;
+      } // OMP parallel
+      delete p_sa;
+      p_output->write(output_path + "/c_OSPH.hd5", "HDF5");
+      p_output->write(output_path + "/c_OSPH", "LEGACY");
+      delete p_output;
+      logger->log("Finished. Output written to " + output_path + "/c_OSPH.\n");
+    } else { // NSPH mismatch between geometry and scatterer configurations.
+      throw UnrecognizedConfigurationException(
+        "Inconsistent geometry and scatterer configurations."
+      );
+    }
+    delete sconf;
+    delete gconf;
+  } // end of instruction block for MPI process 0
+  
+    //===============================
+    // instruction block for MPI processes different from 0
+    //===============================
+#ifdef MPI_VERSION
+  else { // Instruction block for MPI processes other than 0.
+    // here go the code for MPI processes other than 0
+    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
+    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
+    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
+    SphereIterationData *sid = new SphereIterationData(mpidata, device_count);
+    ScatteringAngles *p_sa = new ScatteringAngles(mpidata);
+    
+    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+    int ompnumthreads = 1;
+    SphereOutputInfo **p_outarray = NULL;
+    VirtualBinaryFile **vtppoanarray = NULL;
+    int myjxi488startoffset;
+    int myMPIstride = ompnumthreads;
+    int myMPIblock = ompnumthreads;
+
+#pragma omp parallel
+    {
+      // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+      int myompthread = 0;
+#ifdef _OPENMP
+      // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+      myompthread = omp_get_thread_num();
+      if (myompthread == 0) ompnumthreads = omp_get_num_threads();
+#endif
+      if (myompthread == 0) {
+	// receive the start parameter from MPI process 0
+	MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	// send my number of omp threads to process 0
+	MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD);
+	// receive myMPIstride sent by MPI process 0 to all processes
+	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
+	// allocate virtual files for each thread
+	p_outarray = new SphereOutputInfo*[ompnumthreads];
+	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
+      }
+#pragma omp barrier
+      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+      SphereIterationData *sid_2 = NULL;
+      SphereOutputInfo *p_output_2 = NULL;
+      VirtualBinaryFile *vtppoanp_2 = NULL;
+      // PLACEHOLDER
+      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+      if (myompthread == 0) {
+	sid_2 = sid;
+      } else {
+	// this is not thread 0, so do create fresh copies of all local variables
+	sid_2 = new SphereIterationData(*sid);
+      }
+      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+#pragma omp barrier
+      // ok, now I can actually start the parallel calculations
+      for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
+	// the parallel loop over MPI processes covers a different set of indices for each thread
+#pragma omp barrier
+	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
+	// each thread opens new virtual files and stores their pointers in the shared array
+	vtppoanp_2 = new VirtualBinaryFile();
+	// each thread puts a copy of the pointers to its virtual files in the shared arrays
+	vtppoanarray[myompthread] = vtppoanp_2;
+#pragma omp barrier
+	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+	// ok, now I can actually start the parallel calculations
+	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
+	if (myjxi488 <= sid_2->number_of_scales) {
+	  if (myompthread > 0) {
+	    // UPDATE: non-0 threads need to allocate memory for one scale at a time.
+	    p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
+	    p_outarray[myompthread] = p_output_2;
+	  } else {
+	    // Thread 0 of non-zero MPI processes needs to allocate memory for the
+	    // output of all threads.
+	    p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
+	    p_outarray[0] = p_output_2;
+	  }
+	  int jer = sphere_jxi488_cycle(myjxi488 - 1, sconf, gconf, p_sa, sid_2, p_output_2, output_path, vtppoanp_2);
+	} else {
+	  if (myompthread > 0) {
+	    // If there is no input for this thread, set the output pointer to NULL.
+	    p_outarray[myompthread] = NULL;
+	  }	  
+	}
+
+#pragma omp barrier
+	// threads different from 0 append their virtual files to the one of thread 0, and delete them
+	if (myompthread == 0) {
+	  for (int ti=1; ti<ompnumthreads; ti++) {
+	    if (p_outarray[ti] != NULL) {
+	      p_outarray[0]->insert(*(p_outarray[ti]));
+	      delete p_outarray[ti];
+	      p_outarray[ti] = NULL;
 	    }
-	    delete[] cmul;
-	    delete[] cmullr;
-	    for (int ti = 1; ti > -1; ti--) {
-	      delete[] tqse[ti];
-	      delete[] tqss[ti];
-	      delete[] tqspe[ti];
-	      delete[] tqsps[ti];
+	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
+	    delete vtppoanarray[ti];
+	  }
+	  // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them
+	  for (int rr=1; rr<mpidata->nprocs; rr++) {
+	    if (rr == mpidata->rank) {
+	      p_outarray[0]->mpisend(mpidata);
+	      delete p_outarray[0];
+	      vtppoanarray[0]->mpisend(mpidata);
+	      delete vtppoanarray[0];
 	    }
-	    delete[] tqse;
-	    delete[] tqss;
-	    delete[] tqspe;
-	    delete[] tqsps;
-	    delete logger;
-	    return;
+	    int test = MPI_Barrier(MPI_COMM_WORLD);
 	  }
-	} // i132
-	if (idfc >= 0 and nsph == 1 and jxi == jwtm) {
-	  // This is the condition that writes the transition matrix to output.
-	  string ttms_name = output_path + "/c_TTMS.hd5";
-	  TransitionMatrix::write_binary(
-					 ttms_name, l_max, vk, exri, c1->rmi, c1->rei,
-					 sconf->get_radius(0), "HDF5"
-					 );
-	  ttms_name = output_path + "/c_TTMS";
-	  TransitionMatrix::write_binary(
-					 ttms_name, l_max, vk, exri, c1->rmi, c1->rei,
-					 sconf->get_radius(0)
-					 );
 	}
-	double cs0 = 0.25 * vk * vk * vk / half_pi;
-	sscr0(tfsas, nsph, l_max, vk, exri, c1);
-	double sqk = vk * vk * exdc;
-	aps(zpv, l_max, nsph, c1, sqk, gaps);
-	rabas(in_pol, l_max, nsph, c1, tqse, tqspe, tqss, tqsps);
-	for (int i170 = 0; i170 < nsph; i170++) {
-	  int i = i170 + 1;
-	  if (c1->iog[i170] >= i) {
-	    double albeds = c1->sscs[i170] / c1->sexs[i170];
-	    c1->sqscs[i170] *= sqsfi;
-	    c1->sqabs[i170] *= sqsfi;
-	    c1->sqexs[i170] *= sqsfi;
-	    fprintf(output, "     SPHERE %2d\n", i);
-	    if (c1->nshl[i170] != 1) {
-	      fprintf(output, "  SIZE=%15.7lE\n", c1->vsz[i170]);
-	    } else {
-	      fprintf(
-		      output,
-		      "  SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n",
-		      c1->vsz[i170],
-		      real(c1->vkt[i170]),
-		      imag(c1->vkt[i170])
-		      );
-	    }
-	    fprintf(output, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n");
-	    fprintf(
-		    output,
-		    " %14.7lE%15.7lE%15.7lE%15.7lE\n",
-		    c1->sscs[i170], c1->sabs[i170],
-		    c1->sexs[i170], albeds
-		    );
-	    fprintf(output, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n");
-	    fprintf(
-		    output,
-		    " %14.7lE%15.7lE%15.7lE\n",
-		    c1->sqscs[i170], c1->sqabs[i170],
-		    c1->sqexs[i170]
-		    );
-	    fprintf(output, "  FSAS=%15.7lE%15.7lE\n", real(c1->fsas[i170]), imag(c1->fsas[i170]));
-	    double csch = 2.0 * vk * sqsfi / c1->gcsv[i170];
-	    s0 = c1->fsas[i170] * exri;
-	    double qschu = csch * imag(s0);
-	    double pschu = csch * real(s0);
-	    double s0mag = cs0 * cabs(s0);
-	    fprintf(
-		    output,
-		    "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
-		    qschu, pschu, s0mag
-		    );
-	    double rapr = c1->sexs[i170] - gaps[i170];
-	    double cosav = gaps[i170] / c1->sscs[i170];
-	    fprintf(output, "  COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
-	    fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 1, tqse[0][i170], tqss[0][i170]);
-	    fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 2, tqse[1][i170], tqss[1][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170])), sizeof(double));
-	    tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170])), sizeof(double));
-	    double val = real(tqspe[0][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = imag(tqspe[0][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = real(tqsps[0][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = imag(tqsps[0][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170])), sizeof(double));
-	    tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170])), sizeof(double));
-	    val = real(tqspe[1][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = imag(tqspe[1][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = real(tqsps[1][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	    val = imag(tqsps[1][i170]);
-	    tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
-	  } // End if iog[i170] >= i
-	} // i170 loop
-	if (nsph != 1) {
-	  fprintf(output, "  FSAT=(%15.7lE,%15.7lE)\n", real(tfsas), imag(tfsas));
-	  double csch = 2.0 * vk * sqsfi / gcs;
-	  s0 = tfsas * exri;
-	  double qschu = csch * imag(s0);
-	  double pschu = csch * real(s0);
-	  double s0mag = cs0 * cabs(s0);
-	  fprintf(
-		  output,
-		  "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
-		  qschu, pschu, s0mag
-		  );
+      } // ixi488: close strided loop running on MPI processes
+      // Clean memory
+#pragma omp barrier
+      if (myompthread == 0) {
+	delete[] p_outarray;
+	delete[] vtppoanarray;
+      }
+      delete sid_2;
+    } // OMP parallel
+    delete p_sa;
+    delete sconf;
+    delete gconf;
+  } // End instructions block for MPI non-0 processes.
+#endif // MPI_VERSION
+  delete logger;
+} // sphere()
+
+int sphere_jxi488_cycle(
+  int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf,
+  ScatteringAngles *sa, SphereIterationData *sid, SphereOutputInfo *oi,
+  const string& output_path, VirtualBinaryFile *vtppoanp
+) {
+  const dcomplex cc0 = 0.0 + I * 0.0;
+  const double half_pi = acos(0.0);
+  const double pi = 2.0 * half_pi;
+  int jer = 0;
+  Logger *logger = new Logger(LOG_INFO);
+  int oindex = 0;
+  int jxi = jxi488 + 1;
+  int nsph = gconf->number_of_spheres;
+  int l_max = gconf->l_max;
+  int in_pol = gconf->in_pol;
+  int npnt = gconf->npnt;
+  int npntts = gconf->npntts;
+  int jwtm = gconf->jwtm;
+  double wn = sconf->wp / 3.0e8;
+  double sqsfi = 1.0;
+  int idfc = sconf->idfc;
+  int nxi = sconf->number_of_scales;
+  int configurations = sconf->configurations;
+  int ndirs = sa->nkks;
+  int lcalc;
+  int jw, isq, ibf;
+  logger->log("INFO: running scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
+  double vk, vkarg;
+  double xi = sconf->get_scale(jxi488);
+  double exdc = sconf->exdc;
+  double exri = sqrt(exdc);
+  if (idfc >= 0) {
+    vk = xi * wn;
+    vkarg = vk;
+    oi->vec_vk[jxi488] = vk;
+    oi->vec_xi[jxi488] = xi;
+  } else { // IDFC < 0
+    vk = sconf->xip * wn;
+    vkarg = xi * vk;
+    sqsfi = 1.0 / (xi * xi);
+    oi->vec_vk[jxi488] = vk;
+    oi->vec_xi[jxi488] = xi;
+  }
+  vtppoanp->append_line(VirtualBinaryLine(vk));
+  double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0;
+  for (int i132 = 0; i132 < nsph; i132++) {
+    int i = i132 + 1;
+    int iogi = sid->c1->iog[i132];
+    if (iogi != i) {
+      for (int l123 = 0; l123 < l_max; l123++) {
+	sid->c1->rmi[l123][i132] = sid->c1->rmi[l123][iogi - 1];
+	sid->c1->rei[l123][i132] = sid->c1->rei[l123][iogi - 1];
+      }
+      continue; // i132
+    }
+    // label 125
+    int nsh = sid->c1->nshl[i132];
+    int ici = (nsh + 1) / 2;
+    if (idfc == 0) {
+      for (int ic = 0; ic < ici; ic++)
+	sid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested!
+    } else { // IDFC != 0
+      if (jxi == 1) {
+	for (int ic = 0; ic < ici; ic++) {
+	  sid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488);
+	}
+      }
+    }
+    if (nsh % 2 == 0) sid->c1->dc0[ici] = exdc;
+    dme(l_max, i, npnt, npntts, vkarg, exdc, exri, sid->c1, jer, lcalc, sid->arg);
+    if (jer != 0) {
+      oi->vec_ier[jxi] = 1;
+      oi->lcalc = lcalc;
+      return jer;
+    }
+  } // i132
+  if (idfc >= 0 and nsph == 1 and jxi == jwtm) {
+    // This is the condition that writes the transition matrix to output.
+    string ttms_name = output_path + "/c_TTMS.hd5";
+    TransitionMatrix::write_binary(
+      ttms_name, l_max, vk, exri, sid->c1->rmi, sid->c1->rei,
+      sconf->get_radius(0), "HDF5"
+    );
+    ttms_name = output_path + "/c_TTMS";
+    TransitionMatrix::write_binary(
+      ttms_name, l_max, vk, exri, sid->c1->rmi, sid->c1->rei,
+      sconf->get_radius(0)
+    );
+  }
+  double cs0 = 0.25 * vk * vk * vk / half_pi;
+  sscr0(sid->tfsas, nsph, l_max, vk, exri, sid->c1);
+  double sqk = vk * vk * exdc;
+  aps(sid->zpv, l_max, nsph, sid->c1, sqk, sid->gaps);
+  rabas(in_pol, l_max, nsph, sid->c1, sid->tqse, sid->tqspe, sid->tqss, sid->tqsps);
+  int last_configuration = 0;
+  for (int i170 = 0; i170 < nsph; i170++) {
+    int i = i170 + 1;
+    if (sid->c1->iog[i170] >= i) {
+      last_configuration++;
+      oindex = jxi488 * configurations + last_configuration - 1;
+      double albeds = sid->c1->sscs[i170] / sid->c1->sexs[i170];
+      sid->c1->sqscs[i170] *= sqsfi;
+      sid->c1->sqabs[i170] *= sqsfi;
+      sid->c1->sqexs[i170] *= sqsfi;
+      if (sid->c1->nshl[i170] != 1) {
+	oi->vec_sphere_ref_indices[oindex] = cc0;
+	oi->vec_sphere_sizes[oindex] = sid->c1->vsz[i170];
+      } else {
+	oi->vec_sphere_ref_indices[oindex] = sid->c1->vkt[i170];
+	oi->vec_sphere_sizes[oindex] = sid->c1->vsz[i170];
+      }
+      oi->vec_scs[oindex] = sid->c1->sscs[i170];
+      oi->vec_abs[oindex] = sid->c1->sabs[i170];
+      oi->vec_exs[oindex] = sid->c1->sexs[i170];
+      oi->vec_albeds[oindex] = albeds;
+      oi->vec_scsrt[oindex] = sid->c1->sqscs[i170];
+      oi->vec_absrt[oindex] = sid->c1->sqabs[i170];
+      oi->vec_exsrt[oindex] = sid->c1->sqexs[i170];
+      oi->vec_fsas[oindex] = sid->c1->fsas[i170];
+      double csch = 2.0 * vk * sqsfi / sid->c1->gcsv[i170];
+      dcomplex s0 = sid->c1->fsas[i170] * exri;
+      double qschu = csch * imag(s0);
+      double pschu = csch * real(s0);
+      double s0mag = cs0 * cabs(s0);
+      oi->vec_qschu[oindex] = qschu;
+      oi->vec_pschu[oindex] = pschu;
+      oi->vec_s0mag[oindex] = s0mag;
+      double rapr = sid->c1->sexs[i170] - sid->gaps[i170];
+      double cosav = sid->gaps[i170] / sid->c1->sscs[i170];
+      oi->vec_cosav[oindex] = cosav;
+      oi->vec_raprs[oindex] = rapr;
+      oi->vec_tqek1[oindex] = sid->tqse[0][i170];
+      oi->vec_tqsk1[oindex] = sid->tqss[0][i170];
+      oi->vec_tqek2[oindex] = sid->tqse[1][i170];
+      oi->vec_tqsk2[oindex] = sid->tqss[1][i170];
+      double value = sid->tqse[0][i170];
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = sid->tqss[0][i170];
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = real(sid->tqspe[0][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = imag(sid->tqspe[0][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = real(sid->tqsps[0][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = imag(sid->tqsps[0][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = sid->tqse[1][i170];
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = sid->tqss[1][i170];
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = real(sid->tqspe[1][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = imag(sid->tqspe[1][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = real(sid->tqsps[1][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+      value = imag(sid->tqsps[1][i170]);
+      vtppoanp->append_line(VirtualBinaryLine(value));
+    } // End if iog[i170] >= i
+  } // i170 loop
+  if (nsph != 1) {
+    oi->vec_fsat[jxi488] = sid->tfsas;
+    double csch = 2.0 * vk * sqsfi / sid->c1->gcs;
+    dcomplex s0 = sid->tfsas * exri;
+    double qschu = csch * imag(s0);
+    double pschu = csch * real(s0);
+    double s0mag = cs0 * cabs(s0);
+    oi->vec_qschut[jxi488] = qschu;
+    oi->vec_pschut[jxi488] = pschu;
+    oi->vec_s0magt[jxi488] = s0mag;
+  }
+  double th = sa->th;
+  int done_dirs = 0;
+  int nks = sa->nths * sa->nphs;
+  int nkks = sa->nth * sa->nph * nks;
+  int nth = sa->nth;
+  int nph = sa->nph;
+  double frx, fry, frz;
+  for (int jth486 = 0; jth486 < sa->nth; jth486++) { // OpenMP parallelizable section
+    int jth = jth486 + 1;
+    double ph = sa->ph;
+    for (int jph484 = 0; jph484 < sa->nph; jph484++) {
+      int jph = jph484 + 1;
+      bool goto182 = (sa->nk == 1) && (jxi > 1);
+      double cost, sint, cosp, sinp;
+      if (!goto182) {
+	upvmp(th, ph, 0, cost, sint, cosp, sinp, sid->u, sid->upmp, sid->unmp);
+      }
+      if (gconf->isam >= 0) {
+	wmamp(0, cost, sint, cosp, sinp, in_pol, l_max, 0, nsph, sid->argi, sid->u, sid->upmp, sid->unmp, sid->c1);
+	for (int i183 = 0; i183 < nsph; i183++) {
+	  double rapr = sid->c1->sexs[i183] - sid->gaps[i183];
+	  frx = rapr * sid->u[0];
+	  fry = rapr * sid->u[1];
+	  frz = rapr * sid->u[2];
+	}
+	jw = 1;
+      }
+      double thsl = sa->ths;
+      double phsph = 0.0;
+      for (int jths482 = 0; jths482 < sa->nths; jths482++) {
+	int jths = jths482 + 1;
+	double ths = thsl;
+	int icspnv = 0;
+	if (gconf->isam > 1) ths = th + thsca;
+	if (gconf->isam >= 1) {
+	  phsph = 0.0;
+	  if ((ths < 0.0) || (ths > 180.0)) phsph = 180.0;
+	  if (ths < 0.0) ths *= -1.0;
+	  if (ths > 180.0) ths = 360.0 - ths;
+	  if (phsph != 0.0) icspnv = 1;
 	}
-	th = th1;
-	for (int jth486 = 0; jth486 < nth; jth486++) { // OpenMP parallelizable section
-	  int jth = jth486 + 1;
-	  ph = ph1;
-	  for (int jph484 = 0; jph484 < nph; jph484++) {
-	    int jph = jph484 + 1;
-	    bool goto182 = (nk == 1) && (jxi > 1);
-	    if (!goto182) {
-	      upvmp(th, ph, 0, cost, sint, cosp, sinp, u, upmp, unmp);
+	double phs = sa->phs;
+	for (int jphs480 = 0; jphs480 < sa->nphs; jphs480++) {
+	  int jphs = jphs480 + 1;
+	  double costs, sints, cosps, sinps;
+	  if (gconf->isam >= 1) {
+	    phs = ph + phsph;
+	    if (phs >= 360.0) phs -= 360.0;
+	  }
+	  bool goto190 = (nks == 1) && ((jxi > 1) || (jth > 1) || (jph > 1));
+	  if (!goto190) {
+	    upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, sid->us, sid->upsmp, sid->unsmp);
+	    if (gconf->isam >= 0) {
+	      wmamp(2, costs, sints, cosps, sinps, in_pol, l_max, 0, nsph, sid->args, sid->us, sid->upsmp, sid->unsmp, sid->c1);
+	    }
+	  }
+	  if (nkks != 0 || jxi == 1) {
+	    upvsp(
+	      sid->u, sid->upmp, sid->unmp, sid->us, sid->upsmp, sid->unsmp,
+	      sid->up, sid->un, sid->ups, sid->uns, sid->duk, isq, ibf,
+	      sid->scan, sid->cfmp, sid->sfmp, sid->cfsp, sid->sfsp
+	    );
+	    if (gconf->isam < 0) {
+	      wmasp(
+		cost, sint, cosp, sinp, costs, sints, cosps, sinps,
+		sid->u, sid->up, sid->un, sid->us, sid->ups, sid->uns,
+		isq, ibf, in_pol, l_max, 0, nsph, sid->argi, sid->args,
+		sid->c1
+	      );
 	    }
-	    if (meridional_type >= 0) {
-	      wmamp(0, cost, sint, cosp, sinp, in_pol, l_max, 0, nsph, argi, u, upmp, unmp, c1);
-	      for (int i183 = 0; i183 < nsph; i183++) {
-		double rapr = c1->sexs[i183] - gaps[i183];
-		frx = rapr * u[0];
-		fry = rapr * u[1];
-		frz = rapr * u[2];
+	    for (int i193 = 0; i193 < 3; i193++) {
+	      sid->un[i193] = sid->unmp[i193];
+	      sid->uns[i193] = sid->unsmp[i193];
+	    }
+	  }
+	  if (gconf->isam < 0) jw = 1;
+	  vtppoanp->append_line(VirtualBinaryLine(th));
+	  vtppoanp->append_line(VirtualBinaryLine(ph));
+	  vtppoanp->append_line(VirtualBinaryLine(ths));
+	  vtppoanp->append_line(VirtualBinaryLine(phs));
+	  double value = sid->scan;
+	  vtppoanp->append_line(VirtualBinaryLine(value));
+	  if (jw != 0) {
+	    jw = 0;
+	    value = sid->u[0];
+	    vtppoanp->append_line(VirtualBinaryLine(value));
+	    value = sid->u[1];
+	    vtppoanp->append_line(VirtualBinaryLine(value));
+	    value = sid->u[2];
+	    vtppoanp->append_line(VirtualBinaryLine(value));
+	  }
+	  oi->vec_dir_scand[done_dirs] = sid->scan;
+	  oi->vec_dir_cfmp[done_dirs] = sid->cfmp;
+	  oi->vec_dir_cfsp[done_dirs] = sid->cfsp;
+	  oi->vec_dir_sfmp[done_dirs] = sid->sfmp;
+	  oi->vec_dir_sfsp[done_dirs] = sid->sfsp;
+	  if (gconf->isam >= 0) {
+	    oi->vec_dir_un[3 * done_dirs] = sid->un[0];
+	    oi->vec_dir_un[3 * done_dirs + 1] = sid->un[1];
+	    oi->vec_dir_un[3 * done_dirs + 2] = sid->un[2];
+	    oi->vec_dir_uns[3 * done_dirs] = sid->uns[0];
+	    oi->vec_dir_uns[3 * done_dirs + 1] = sid->uns[1];
+	    oi->vec_dir_uns[3 * done_dirs + 2] = sid->uns[2];
+	  } else {
+	    oi->vec_dir_un[3 * done_dirs] = sid->un[0];
+	    oi->vec_dir_un[3 * done_dirs + 1] = sid->un[1];
+	    oi->vec_dir_un[3 * done_dirs + 2] = sid->un[2];
+	    oi->vec_dir_uns[3 * done_dirs] = 0.0;
+	    oi->vec_dir_uns[3 * done_dirs + 1] = 0.0;
+	    oi->vec_dir_uns[3 * done_dirs + 2] = 0.0;
+	  }
+	  sscr2(nsph, l_max, vk, exri, sid->c1);
+	  last_configuration = 0;
+	  for (int ns226 = 0; ns226 < nsph; ns226++) {
+	    int ns = ns226 + 1;
+	    oindex = jxi488 * nsph * ndirs + nsph * done_dirs + ns226;
+	    oi->vec_dir_sas11[oindex] = sid->c1->sas[ns226][0][0];
+	    oi->vec_dir_sas21[oindex] = sid->c1->sas[ns226][1][0];
+	    oi->vec_dir_sas12[oindex] = sid->c1->sas[ns226][0][1];
+	    oi->vec_dir_sas22[oindex] = sid->c1->sas[ns226][1][1];
+	    oi->vec_dir_fx[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frx;
+	    oi->vec_dir_fy[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = fry;
+	    oi->vec_dir_fz[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frz;
+	    for (int i225 = 0; i225 < 16; i225++) sid->c1->vint[i225] = sid->c1->vints[ns226][i225];
+	    mmulc(sid->c1->vint, sid->cmullr, sid->cmul);
+	    for (int imul = 0; imul < 4; imul++) {
+	      int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul;
+	      for (int jmul = 0; jmul < 4; jmul++) {
+		oi->vec_dir_muls[muls_index + jmul] = sid->cmul[imul][jmul];
 	      }
-	      jw = 1;
 	    }
-	    double thsl = ths1;
-	    double phsph = 0.0;
-	    for (int jths482 = 0; jths482 < nths; jths482++) {
-	      int jths = jths482 + 1;
-	      double ths = thsl;
-	      int icspnv = 0;
-	      if (meridional_type > 1) ths = th + thsca;
-	      if (meridional_type >= 1) {
-		phsph = 0.0;
-		if ((ths < 0.0) || (ths > 180.0)) phsph = 180.0;
-		if (ths < 0.0) ths *= -1.0;
-		if (ths > 180.0) ths = 360.0 - ths;
-		if (phsph != 0.0) icspnv = 1;
+	    for (int imul = 0; imul < 4; imul++) {
+	      int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul;
+	      for (int jmul = 0; jmul < 4; jmul++) {
+		oi->vec_dir_mulslr[muls_index + jmul] = sid->cmullr[imul][jmul];
 	      }
-	      double phs = phs1;
-	      for (int jphs480 = 0; jphs480 < nphs; jphs480++) {
-		int jphs = jphs480 + 1;
-		if (meridional_type >= 1) {
-		  phs = ph + phsph;
-		  if (phs >= 360.0) phs -= 360.0;
-		}
-		bool goto190 = (nks == 1) && ((jxi > 1) || (jth > 1) || (jph > 1));
-		if (!goto190) {
-		  upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, us, upsmp, unsmp);
-		  if (meridional_type >= 0) {
-		    wmamp(2, costs, sints, cosps, sinps, in_pol, l_max, 0, nsph, args, us, upsmp, unsmp, c1);
-		  }
-		}
-		if (nkks != 0 || jxi == 1) {
-		  upvsp(u, upmp, unmp, us, upsmp, unsmp, up, un, ups, uns, duk, isq, ibf, scan, cfmp, sfmp, cfsp, sfsp);
-		  if (meridional_type < 0) {
-		    wmasp(
-			  cost, sint, cosp, sinp, costs, sints, cosps, sinps,
-			  u, up, un, us, ups, uns, isq, ibf, in_pol,
-			  l_max, 0, nsph, argi, args, c1
-			  );
-		  }
-		  for (int i193 = 0; i193 < 3; i193++) {
-		    un[i193] = unmp[i193];
-		    uns[i193] = unsmp[i193];
-		  }
-		}
-		if (meridional_type < 0) jw = 1;
-		tppoan.write(reinterpret_cast<char *>(&th), sizeof(double));
-		tppoan.write(reinterpret_cast<char *>(&ph), sizeof(double));
-		tppoan.write(reinterpret_cast<char *>(&ths), sizeof(double));
-		tppoan.write(reinterpret_cast<char *>(&phs), sizeof(double));
-		tppoan.write(reinterpret_cast<char *>(&scan), sizeof(double));
-		if (jw != 0) {
-		  jw = 0;
-		  tppoan.write(reinterpret_cast<char *>(&(u[0])), sizeof(double));
-		  tppoan.write(reinterpret_cast<char *>(&(u[1])), sizeof(double));
-		  tppoan.write(reinterpret_cast<char *>(&(u[2])), sizeof(double));
-		}
-		fprintf(
-			output,
-			"********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n",
-			jth, jph, jths, jphs
-			);
-		fprintf(
-			output,
-			"  TIDG=%10.3lE, PIDG=%10.3lE, TSDG=%10.3lE, PSDG=%10.3lE\n",
-			th, ph, ths, phs
-			);
-		fprintf(output, "  SCAND=%10.3lE\n", scan);
-		fprintf(output, "  CFMP=%15.7lE, SFMP=%15.7lE\n", cfmp, sfmp);
-		fprintf(output, "  CFSP=%15.7lE, SFSP=%15.7lE\n", cfsp, sfsp);
-		if (meridional_type >= 0) {
-		  fprintf(output, "  UNI=(%12.5lE,%12.5lE,%12.5lE)\n", un[0], un[1], un[2]);
-		  fprintf(output, "  UNS=(%12.5lE,%12.5lE,%12.5lE)\n", uns[0], uns[1], uns[2]);
-		} else {
-		  fprintf(output, "  UN=(%12.5lE,%12.5lE,%12.5lE)\n", un[0], un[1], un[2]);
-		}
-		sscr2(nsph, l_max, vk, exri, c1);
-		for (int ns226 = 0; ns226 < nsph; ns226++) {
-		  int ns = ns226 + 1;
-		  fprintf(output, "     SPHERE %2d\n", ns);
-		  fprintf(
-			  output, "  SAS(1,1)=%15.7lE%15.7lE, SAS(2,1)=%15.7lE%15.7lE\n",
-			  real(c1->sas[ns226][0][0]), imag(c1->sas[ns226][0][0]),
-			  real(c1->sas[ns226][1][0]), imag(c1->sas[ns226][1][0])
-			  );
-		  fprintf(
-			  output, "  SAS(1,2)=%15.7lE%15.7lE, SAS(2,2)=%15.7lE%15.7lE\n",
-			  real(c1->sas[ns226][0][1]), imag(c1->sas[ns226][0][1]),
-			  real(c1->sas[ns226][1][1]), imag(c1->sas[ns226][1][1])
-			  );
-		  if (jths == 1 && jphs == 1)
-		    fprintf(
-			    output, "  Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n",
-			    frx, fry, frz
-			    );
-		  for (int i225 = 0; i225 < 16; i225++) c1->vint[i225] = c1->vints[ns226][i225];
-		  mmulc(c1->vint, cmullr, cmul);
-		  fprintf(output, "  MULS\n        ");
-		  for (int imul = 0; imul < 4; imul++) {
-		    for (int jmul = 0; jmul < 4; jmul++) {
-		      fprintf(output, "%15.7lE", cmul[imul][jmul]);
-		    }
-		    if (imul < 3) fprintf(output, "\n        ");
-		    else fprintf(output, "\n");
-		  }
-		  fprintf(output, "  MULSLR\n        ");
-		  for (int imul = 0; imul < 4; imul++) {
-		    for (int jmul = 0; jmul < 4; jmul++) {
-		      fprintf(output, "%15.7lE", cmullr[imul][jmul]);
-		    }
-		    if (imul < 3) fprintf(output, "\n        ");
-		    else fprintf(output, "\n");
-		  }
-		  for (int vi = 0; vi < 16; vi++) {
-		    double value = real(c1->vint[vi]);
-		    tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
-		    value = imag(c1->vint[vi]);
-		    tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
-		  }
-		  for (int imul = 0; imul < 4; imul++) {
-		    for (int jmul = 0; jmul < 4; jmul++) {
-		      tppoan.write(reinterpret_cast<char *>(&(cmul[imul][jmul])), sizeof(double));
-		    }
-		  }
-		} // ns226 loop
-		if (meridional_type < 1) phs += sc_phi_step;
-	      } // jphs480 loop
-	      if (meridional_type <= 1) thsl += sc_theta_step;
-	    } // jths482 loop
-	    ph += in_phi_step;
-	  } // jph484 loop on elevation
-	  th += in_theta_step;
-	} // jth486 loop on azimuth
-	logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
-      } //jxi488 loop on scales
-      tppoan.close();
-    } else { // In case TPPOAN could not be opened. Should never happen.
-      logger->err("ERROR: failed to open TPPOAN file.\n");
-    }
-    fclose(output);
-    delete c1;
-    for (int zi = l_max - 1; zi > -1; zi--) {
-      for (int zj = 0; zj < 3; zj++) {
-	for (int zk = 0; zk < 2; zk++) {
-	  delete[] zpv[zi][zj][zk];
-	}
-	delete[] zpv[zi][zj];
-      }
-      delete[] zpv[zi];
+	    }
+	    for (int vi = 0; vi < 16; vi++) {
+	      value = real(sid->c1->vint[vi]);
+	      vtppoanp->append_line(VirtualBinaryLine(value));
+	      value = imag(sid->c1->vint[vi]);
+	      vtppoanp->append_line(VirtualBinaryLine(value));
+	    }
+	    for (int imul = 0; imul < 4; imul++) {
+	      for (int jmul = 0; jmul < 4; jmul++) {
+		value = sid->cmul[imul][jmul];
+		vtppoanp->append_line(VirtualBinaryLine(value));
+	      }
+	    }
+	  } // ns226 loop
+	  if (gconf->isam < 1) phs += sa->phsstp;
+	  done_dirs++;
+	} // jphs480 loop
+	if (gconf->isam <= 1) thsl += sa->thsstp;
+      } // jths482 loop
+      ph += sa->phstp;
+    } // jph484 loop on elevation
+    th += sa->thstp;
+  } // jth486 loop on azimuth
+  oi->vec_jxi[jxi488] = jxi;
+  logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
+  return jer;
+}
+
+// >>> IMPLEMENTATION OF SphereIterationData CLASS <<<
+SphereIterationData::SphereIterationData(
+  GeometryConfiguration *gconf, ScattererConfiguration *sconf,
+  const mixMPI *mpidata, const int device_count
+) {
+  const dcomplex cc0 = 0.0 + I * 0.0;
+  _nsph = gconf->number_of_spheres;
+  _lm = gconf->l_max;
+  arg = cc0;
+  s0 = cc0;
+  tfsas = cc0;
+  c1 = new ParticleDescriptorSphere(gconf, sconf);
+  argi = new double[1]();
+  args = new double[1]();
+  scan = 0.0;
+  cfmp = 0.0;
+  cfsp = 0.0;
+  sfmp = 0.0;
+  sfsp = 0.0;
+  wn = sconf->wp / 3.0e8;
+  xip = sconf->xip;
+  vk = 0.0;
+  // Scale block initialization
+  number_of_scales = sconf->number_of_scales;
+  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
+  // End of scale block initialization
+  gaps = new double[_nsph]();
+  duk = new double[3]();
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  vec_cmul = new double[16]();
+  vec_cmullr = new double[16]();
+  cmul = new double*[4];
+  cmullr = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cmul[ci] = (vec_cmul + 4 * ci);
+    cmullr[ci] = (vec_cmullr + 4 * ci);
+  }
+  vec_tqspe = new dcomplex[2 * _nsph]();
+  vec_tqsps = new dcomplex[2 * _nsph]();
+  vec_tqse = new double[2 * _nsph]();
+  vec_tqss = new double[2 * _nsph]();
+  tqspe = new dcomplex*[2];
+  tqsps = new dcomplex*[2];
+  tqse = new double*[2];
+  tqss = new double*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqspe[ti] = (vec_tqspe + _nsph * ti);
+    tqsps[ti] = (vec_tqsps + _nsph * ti);
+    tqse[ti] = (vec_tqse + _nsph * ti);
+    tqss[ti] = (vec_tqss + _nsph * ti);
+  }
+  vec_zpv = new double[_lm * 12]();
+  zpv = new double***[_lm];
+  for (int zi = 0; zi < _lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      int vec_index = 12 * zi + 4 * zj;
+      zpv[zi][zj] = new double*[2];
+      zpv[zi][zj][0] = (vec_zpv + vec_index);
+      zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
     }
-    delete[] zpv;
-    delete[] duk;
-    delete[] u;
-    delete[] us;
-    delete[] un;
-    delete[] uns;
-    delete[] up;
-    delete[] ups;
-    delete[] upmp;
-    delete[] upsmp;
-    delete[] unmp;
-    delete[] unsmp;
-    delete[] argi;
-    delete[] args;
-    delete[] gaps;
-    for (int i = 3; i > -1; i--) {
-      delete[] cmul[i];
-      delete[] cmullr[i];
+  }
+}
+
+SphereIterationData::SphereIterationData(const SphereIterationData &rhs) {
+  _nsph = rhs._nsph;
+  _lm = rhs._lm;
+  arg = rhs.arg;
+  s0 = rhs.s0;
+  tfsas = rhs.tfsas;
+  c1 = new ParticleDescriptorSphere(reinterpret_cast<ParticleDescriptorSphere &>(*(rhs.c1)));
+  argi = new double[1];
+  args = new double[1];
+  argi[0] = rhs.argi[0];
+  args[0] = rhs.args[0];
+  scan = rhs.scan;
+  cfmp = rhs.cfmp;
+  cfsp = rhs.cfsp;
+  sfmp = rhs.sfmp;
+  sfsp = rhs.sfsp;
+  wn = rhs.wn;
+  xip = rhs.xip;
+  vk = rhs.vk;
+  // Scale block initialization
+  number_of_scales = rhs.number_of_scales;
+  xiblock = rhs.xiblock;
+  lastxi = rhs.lastxi;
+  firstxi = rhs.firstxi;
+  // End of scale block initialization
+  gaps = new double[_nsph];
+  for (int si = 0; si < _nsph; si++) gaps[si] = rhs.gaps[si];
+  duk = new double[3];
+  u = new double[3];
+  us = new double[3];
+  un = new double[3];
+  uns = new double[3];
+  up = new double[3];
+  ups = new double[3];
+  upmp = new double[3];
+  upsmp = new double[3];
+  unmp = new double[3];
+  unsmp = new double[3];
+  for (int ui = 0; ui < 3; ui++) {
+    duk[ui] = rhs.duk[ui];
+    u[ui] = rhs.u[ui];
+    us[ui] = rhs.us[ui];
+    un[ui] = rhs.un[ui];
+    uns[ui] = rhs.us[ui];
+    up[ui] = rhs.up[ui];
+    ups[ui] = rhs.ups[ui];
+    upmp[ui] = rhs.upmp[ui];
+    upsmp[ui] = rhs.upsmp[ui];
+    unmp[ui] = rhs.unmp[ui];
+    unsmp[ui] = rhs.unsmp[ui];
+  }
+  vec_cmul = new double[16];
+  vec_cmullr = new double[16];
+  for (int mi = 0; mi < 16; mi++) {
+    vec_cmul[mi] = rhs.vec_cmul[mi];
+    vec_cmullr[mi] = rhs.vec_cmullr[mi];
+  }
+  cmul = new double*[4];
+  cmullr = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cmul[ci] = (vec_cmul + 4 * ci);
+    cmullr[ci] = (vec_cmullr + 4 * ci);
+  }
+  vec_tqspe = new dcomplex[2 * _nsph]();
+  vec_tqsps = new dcomplex[2 * _nsph]();
+  vec_tqse = new double[2 * _nsph]();
+  vec_tqss = new double[2 * _nsph]();
+  for (int ti = 0; ti < 2 * _nsph; ti++) {
+    vec_tqspe[ti] = rhs.vec_tqspe[ti];
+    vec_tqsps[ti] = rhs.vec_tqsps[ti];
+    vec_tqse[ti] = rhs.vec_tqse[ti];
+    vec_tqss[ti] = rhs.vec_tqss[ti];
+  }
+  tqspe = new dcomplex*[2];
+  tqsps = new dcomplex*[2];
+  tqse = new double*[2];
+  tqss = new double*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqspe[ti] = (vec_tqspe + _nsph * ti);
+    tqsps[ti] = (vec_tqsps + _nsph * ti);
+    tqse[ti] = (vec_tqse + _nsph * ti);
+    tqss[ti] = (vec_tqss + _nsph * ti);
+  }
+  vec_zpv = new double[_lm * 12];
+  for (int zi = 0; zi < _lm * 12; zi++) vec_zpv[zi] = rhs.vec_zpv[zi];
+  zpv = new double***[_lm];
+  for (int zi = 0; zi < _lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      int vec_index = 12 * zi + 4 * zj;
+      zpv[zi][zj] = new double*[2];
+      zpv[zi][zj][0] = (vec_zpv + vec_index);
+      zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
     }
-    delete[] cmul;
-    delete[] cmullr;
-    for (int ti = 1; ti > -1; ti--) {
-      delete[] tqse[ti];
-      delete[] tqss[ti];
-      delete[] tqspe[ti];
-      delete[] tqsps[ti];
+  }
+}
+
+SphereIterationData::~SphereIterationData() {
+  int lm = c1->li;
+  delete c1;
+  delete[] argi;
+  delete[] args;
+  delete[] gaps;
+  delete[] duk;
+  delete[] u;
+  delete[] us;
+  delete[] un;
+  delete[] uns;
+  delete[] up;
+  delete[] ups;
+  delete[] upmp;
+  delete[] upsmp;
+  delete[] unmp;
+  delete[] unsmp;
+  delete[] vec_cmul;
+  delete[] vec_cmullr;
+  delete[] cmul;
+  delete[] cmullr;
+  delete[] vec_tqspe;
+  delete[] vec_tqsps;
+  delete[] vec_tqse;
+  delete[] vec_tqss;
+  delete[] tqspe;
+  delete[] tqsps;
+  delete[] tqse;
+  delete[] tqss;
+  delete[] vec_zpv;
+  for (int zi = 0; zi < lm; zi++) {
+    for (int zj = 0; zj < 3; zj++) {
+      delete[] zpv[zi][zj];
     }
-    delete[] tqse;
-    delete[] tqss;
-    delete[] tqspe;
-    delete[] tqsps;
-    logger->log("Finished: output written to " + output_path + "/c_OSPH.\n");
-  } else { // NSPH mismatch between geometry and scatterer configurations.
-    throw UnrecognizedConfigurationException(
-					     "Inconsistent geometry and scatterer configurations."
-					     );
+    delete[] zpv[zi];
   }
-  delete sconf;
-  delete gconf;
-  delete logger;
+  delete[] zpv;
+}
+
+#ifdef MPI_VERSION
+SphereIterationData::SphereIterationData(const mixMPI *mpidata, const int device_count) {
+  // Buld child object
+  c1 = new ParticleDescriptorSphere(mpidata);
+
+  // Collect the scalar values
+  MPI_Bcast(&_nsph, 1, MPI_INT32_T, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_lm, 1, MPI_INT32_T, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&s0, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > number_of_scales) lastxi = number_of_scales;
+
+  // Collect length-1 vectors
+  argi = new double[1];
+  args = new double[1];
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  
+  // Collect vectors whose size depend on NSPH
+  gaps = new double[_nsph];
+  vec_tqspe = new dcomplex[2 * _nsph];
+  vec_tqsps = new dcomplex[2 * _nsph];
+  vec_tqse = new double[2 * _nsph];
+  vec_tqss = new double[2 * _nsph];
+  MPI_Bcast(gaps, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqspe, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqsps, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqse, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqss, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  // Collect length-3 vectors
+  duk = new double[3];
+  u = new double[3];
+  us = new double[3];
+  un = new double[3];
+  uns = new double[3];
+  up = new double[3];
+  ups = new double[3];
+  upmp = new double[3];
+  upsmp = new double[3];
+  unmp = new double[3];
+  unsmp = new double[3];
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  
+  // Collect length-16 vectors
+  vec_cmul = new double[16];
+  vec_cmullr = new double[16];
+  MPI_Bcast(vec_cmul, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_cmullr, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  // Collect vectors whose size depend on LM
+  vec_zpv = new double[12 * _lm];
+  MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+int SphereIterationData::mpibcast(const mixMPI *mpidata) {
+  int result = 0;
+  // Broadcast child object
+  c1->mpibcast(mpidata);
+  
+  // Broadcast scalar values
+  MPI_Bcast(&_nsph, 1, MPI_INT32_T, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_lm, 1, MPI_INT32_T, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&s0, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  // Broadcast length-1 vectors
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  // Broadcast vectors whose size depend on NSPH
+  MPI_Bcast(gaps, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqspe, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqsps, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqse, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_tqss, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  // Broadcast length-3 vectors
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  
+  // Broadcast length-16 vectors
+  MPI_Bcast(vec_cmul, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_cmullr, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  // Broadcast vectors whose size depend on LM
+  MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  
+  return 0;
 }
+#endif // MPI_VERSION
+// >>> END OF SphereIterationData CLASS IMPLEMENTATION <<<
diff --git a/src/testing/test_ParticleDescriptor.cpp b/src/testing/test_ParticleDescriptor.cpp
index 9eb221887ba1df88d32297885a479ba3f48e89ac..1cf34ad91e0fb63d9c37ba870bf9bbf9b6bf5fa5 100644
--- a/src/testing/test_ParticleDescriptor.cpp
+++ b/src/testing/test_ParticleDescriptor.cpp
@@ -56,7 +56,7 @@ int test_cluster_devel() {
     const string scat_data_file = "../../test_data/cluster/DEDFB";
     GeometryConfiguration *gconf = GeometryConfiguration::from_legacy(geom_data_file);
     ScattererConfiguration *sconf = ScattererConfiguration::from_dedfb(scat_data_file);
-    ParticleDescriptor *pd = new ParticleDescriptorCluster(gconf, sconf);
+    ParticleDescriptorCluster *pd = new ParticleDescriptorCluster(gconf, sconf);
     delete gconf;
     delete sconf;
     delete pd;
@@ -73,7 +73,7 @@ int test_inclusion() {
     const string scat_data_file = "../../test_data/inclusion/DEDFB";
     GeometryConfiguration *gconf = GeometryConfiguration::from_legacy(geom_data_file);
     ScattererConfiguration *sconf = ScattererConfiguration::from_dedfb(scat_data_file);
-    ParticleDescriptor *pd = new ParticleDescriptorInclusion(gconf, sconf);
+    ParticleDescriptorInclusion *pd = new ParticleDescriptorInclusion(gconf, sconf);
     delete gconf;
     delete sconf;
     delete pd;
@@ -90,7 +90,7 @@ int test_sphere() {
     const string scat_data_file = "../../test_data/sphere/DEDFB";
     GeometryConfiguration *gconf = GeometryConfiguration::from_legacy(geom_data_file);
     ScattererConfiguration *sconf = ScattererConfiguration::from_dedfb(scat_data_file);
-    ParticleDescriptor *pd = new ParticleDescriptorSphere(gconf, sconf);
+    ParticleDescriptorSphere *pd = new ParticleDescriptorSphere(gconf, sconf);
     delete gconf;
     delete sconf;
     delete pd;
diff --git a/src/testing/test_sphere_outputs.cpp b/src/testing/test_sphere_outputs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5acb13c197768345dfcf846b0e538dcfe2fc7e53
--- /dev/null
+++ b/src/testing/test_sphere_outputs.cpp
@@ -0,0 +1,65 @@
+#include <string>
+
+#ifndef INCLUDE_TYPES_H_
+#include "../include/types.h"
+#endif
+
+#ifndef INCLUDE_ERRORS_H_
+#include "../include/errors.h"
+#endif
+
+#ifndef INCLUDE_CONFIGURATION_H_
+#include "../include/Configuration.h"
+#endif
+
+#ifndef INCLUDE_COMMONS_H_
+#include "../include/Commons.h"
+#endif
+
+#ifndef INCLUDE_OUTPUTS_H_
+#include "../include/outputs.h"
+#endif
+
+using namespace std;
+
+int test_sphere_hdf5_output();
+int test_sphere_devel();
+
+int main() {
+  int result = 0;
+  result += test_sphere_hdf5_output(); // 1 if failed
+  result += test_sphere_devel(); // 10 if failed
+  return result;
+}
+
+int test_sphere_hdf5_output() {
+  int result = 0;
+  try {
+    const string hdf5_file = "../../test_data/sphere/c_OSPH.hd5";
+    SphereOutputInfo *oi = new SphereOutputInfo(hdf5_file);
+    oi->write("c_OSPH", "LEGACY");
+    delete oi;
+  } catch (const exception& ex) {
+    result = 1;
+  }
+  return result;
+}
+
+int test_sphere_devel() {
+  int result = 0;
+  try {
+    const string geom_data_file = "../../test_data/sphere/DSPH";
+    const string scat_data_file = "../../test_data/sphere/DEDFB";
+    mixMPI *mpidata = new mixMPI();
+    GeometryConfiguration *gconf = GeometryConfiguration::from_legacy(geom_data_file);
+    ScattererConfiguration *sconf = ScattererConfiguration::from_dedfb(scat_data_file);
+    SphereOutputInfo *oi = new SphereOutputInfo(sconf, gconf, mpidata);
+    delete gconf;
+    delete sconf;
+    delete oi;
+    delete mpidata;
+  } catch (const exception& ex) {
+    result = 10;
+  }
+  return result;
+}
diff --git a/test_data/inclusion/c_OINCLU.hd5 b/test_data/inclusion/c_OINCLU.hd5
index c32b085708391b2755357c05f1fb32a63f5772a0..7d7371ffd94387790e15c2cda446db9c7891f87a 100644
Binary files a/test_data/inclusion/c_OINCLU.hd5 and b/test_data/inclusion/c_OINCLU.hd5 differ
diff --git a/test_data/sphere/c_OSPH.hd5 b/test_data/sphere/c_OSPH.hd5
new file mode 100644
index 0000000000000000000000000000000000000000..17c682f024d1896edd5db82abff4f10e79b2e75e
Binary files /dev/null and b/test_data/sphere/c_OSPH.hd5 differ