diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index 742cc6fdd5e1da4ef92c2a28ec636ceb478985b8..306dafa3ee67dfd2978f2de6399d6c64ec628093 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -12,6 +12,11 @@
 #ifdef _OPENMP
 #include <omp.h>
 #endif
+#ifdef USE_MPI
+#ifndef MPI_VERSION
+#include <mpi.h>
+#endif
+#endif
 
 #ifndef INCLUDE_TYPES_H_
 #include "../include/types.h"
@@ -61,242 +66,401 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
  *  \param data_file: `string` Name of the input data file.
  *  \param output_path: `string` Directory to write the output files in.
  */
-void cluster(const string& config_file, const string& data_file, const string& output_path) {
+void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
   chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
   chrono::duration<double> elapsed;
   string message;
-  string timing_name = output_path + "/c_timing.log";
+  string timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
   FILE *timing_file = fopen(timing_name.c_str(), "w");
   Logger *time_logger = new Logger(LOG_DEBG, timing_file);
   Logger *logger = new Logger(LOG_INFO);
-  logger->log("INFO: making legacy configuration...", LOG_INFO);
-  ScattererConfiguration *sconf = NULL;
-  try {
-    sconf = ScattererConfiguration::from_dedfb(config_file);
-  } catch(const OpenConfigurationFileException &ex) {
-    logger->err("\nERROR: failed to open scatterer configuration file.\n");
-    string message = "FILE: " + string(ex.what()) + "\n";
-    logger->err(message);
-    exit(1);
-  }
-  sconf->write_formatted(output_path + "/c_OEDFB");
-  sconf->write_binary(output_path + "/c_TEDF");
-  sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
-  GeometryConfiguration *gconf = NULL;
-  try {
-    gconf = GeometryConfiguration::from_legacy(data_file);
-  } catch (const OpenConfigurationFileException &ex) {
-    logger->err("\nERROR: failed to open geometry configuration file.\n");
-    string message = "FILE: " + string(ex.what()) + "\n";
-    logger->err(message);
-    if (sconf) delete sconf;
-    exit(1);
-  }
-  logger->log(" done.\n", LOG_INFO);
-  int s_nsph = sconf->number_of_spheres;
-  int nsph = gconf->number_of_spheres;
-  if (s_nsph == nsph) {
-    // Shortcuts to variables stored in configuration objects
-    ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
-    double wp = sconf->wp;
-    FILE *output = fopen((output_path + "/c_OCLU").c_str(), "w");
-    ClusterIterationData *cid = new ClusterIterationData(gconf, sconf);
-    const int ndi = cid->c4->nsph * cid->c4->nlim;
-    np_int ndit = 2 * ndi;
-    logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
-    time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
-    fprintf(output, " READ(IR,*)NSPH,LI,LE,MXNDM,INPOL,NPNT,NPNTTS,IAVM,ISAM\n");
-    fprintf(output, " %5d%5d%5d%5ld%5d%5d%5d%5d%5d\n",
-	    nsph, cid->c4->li, cid->c4->le, gconf->mxndm, gconf->in_pol, gconf->npnt,
-	    gconf->npntts, gconf->iavm, gconf->iavm
-	    );
-    fprintf(output, " READ(IR,*)RXX(I),RYY(I),RZZ(I)\n");
-    for (int ri = 0; ri < nsph; ri++) fprintf(output, "%17.8lE%17.8lE%17.8lE\n",
-					      gconf->get_sph_x(ri), gconf->get_sph_y(ri), gconf->get_sph_z(ri)
-					      );
-    fprintf(output, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n");
-    fprintf(
-	    output, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
-	    p_scattering_angles->th, p_scattering_angles->thstp,
-	    p_scattering_angles->thlst, p_scattering_angles->ths,
-	    p_scattering_angles->thsstp, p_scattering_angles->thslst
-	    );
-    fprintf(output, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n");
-    fprintf(
-	    output, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
-	    p_scattering_angles->ph, p_scattering_angles->phstp,
-	    p_scattering_angles->phlst, p_scattering_angles->phs,
-	    p_scattering_angles->phsstp, p_scattering_angles->phslst
-	    );
-    fprintf(output, " READ(IR,*)JWTM\n");
-    fprintf(output, " %5d\n", gconf->jwtm);
-    fprintf(output, "  READ(ITIN)NSPHT\n");
-    fprintf(output, "  READ(ITIN)(IOG(I),I=1,NSPH)\n");
-    fprintf(output, "  READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n");
-    fprintf(output, "  READ(ITIN)(XIV(I),I=1,NXI)\n");
-    fprintf(output, "  READ(ITIN)NSHL(I),ROS(I)\n");
-    fprintf(output, "  READ(ITIN)(RCF(I,NS),NS=1,NSH)\n");
-    fprintf(output, " \n");
-    str(sconf, cid->c1, cid->c1ao, cid->c3, cid->c4, cid->c6);
-    thdps(cid->c4->lm, cid->zpv);
-    double exdc = sconf->exdc;
-    double exri = sqrt(exdc);
-    fprintf(output, "  REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri);
-    fstream *tppoanp = new fstream;
-    fstream &tppoan = *tppoanp;
-    string tppoan_name = output_path + "/c_TPPOAN";
-    tppoan.open(tppoan_name.c_str(), ios::out | ios::binary);
-    if (tppoan.is_open()) {
+  // the following only happens on MPI process 0
+  if (mpidata->rank == 0) {
+    logger->log("INFO: making legacy configuration...", LOG_INFO);
+    ScattererConfiguration *sconf = NULL;
+    try {
+      sconf = ScattererConfiguration::from_dedfb(config_file);
+    } catch(const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open scatterer configuration file.\n");
+      string message = "FILE: " + string(ex.what()) + "\n";
+      logger->err(message);
+      exit(1);
+    }
+    sconf->write_formatted(output_path + "/c_OEDFB");
+    sconf->write_binary(output_path + "/c_TEDF");
+    sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
+    GeometryConfiguration *gconf = NULL;
+    try {
+      gconf = GeometryConfiguration::from_legacy(data_file);
+    } catch (const OpenConfigurationFileException &ex) {
+      logger->err("\nERROR: failed to open geometry configuration file.\n");
+      string message = "FILE: " + string(ex.what()) + "\n";
+      logger->err(message);
+      if (sconf) delete sconf;
+      exit(1);
+    }
+    logger->log(" done.\n", LOG_INFO);
+    int s_nsph = sconf->number_of_spheres;
+    int nsph = gconf->number_of_spheres;
+    if (s_nsph == nsph) {
+      // Shortcuts to variables stored in configuration objects
+      ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
+      double wp = sconf->wp;
+      FILE *output = fopen((output_path + "/c_OCLU").c_str(), "w");
+      ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata);
+      const int ndi = cid->c4->nsph * cid->c4->nlim;
+      np_int ndit = 2 * ndi;
+      logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
+      time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
+      fprintf(output, " READ(IR,*)NSPH,LI,LE,MXNDM,INPOL,NPNT,NPNTTS,IAVM,ISAM\n");
+      fprintf(output, " %5d%5d%5d%5ld%5d%5d%5d%5d%5d\n",
+	      nsph, cid->c4->li, cid->c4->le, gconf->mxndm, gconf->in_pol, gconf->npnt,
+	      gconf->npntts, gconf->iavm, gconf->iavm
+	      );
+      fprintf(output, " READ(IR,*)RXX(I),RYY(I),RZZ(I)\n");
+      for (int ri = 0; ri < nsph; ri++) fprintf(output, "%17.8lE%17.8lE%17.8lE\n",
+						gconf->get_sph_x(ri), gconf->get_sph_y(ri), gconf->get_sph_z(ri)
+						);
+      fprintf(output, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n");
+      fprintf(
+	      output, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
+	      p_scattering_angles->th, p_scattering_angles->thstp,
+	      p_scattering_angles->thlst, p_scattering_angles->ths,
+	      p_scattering_angles->thsstp, p_scattering_angles->thslst
+	      );
+      fprintf(output, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n");
+      fprintf(
+	      output, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
+	      p_scattering_angles->ph, p_scattering_angles->phstp,
+	      p_scattering_angles->phlst, p_scattering_angles->phs,
+	      p_scattering_angles->phsstp, p_scattering_angles->phslst
+	      );
+      fprintf(output, " READ(IR,*)JWTM\n");
+      fprintf(output, " %5d\n", gconf->jwtm);
+      fprintf(output, "  READ(ITIN)NSPHT\n");
+      fprintf(output, "  READ(ITIN)(IOG(I),I=1,NSPH)\n");
+      fprintf(output, "  READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n");
+      fprintf(output, "  READ(ITIN)(XIV(I),I=1,NXI)\n");
+      fprintf(output, "  READ(ITIN)NSHL(I),ROS(I)\n");
+      fprintf(output, "  READ(ITIN)(RCF(I,NS),NS=1,NSH)\n");
+      fprintf(output, " \n");
+      str(sconf, cid->c1, cid->c1ao, cid->c3, cid->c4, cid->c6);
+      thdps(cid->c4->lm, cid->zpv);
+      double exdc = sconf->exdc;
+      double exri = sqrt(exdc);
+      fprintf(output, "  REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri);
+      fstream *tppoanp = new fstream;
+      fstream &tppoan = *tppoanp;
+      string tppoan_name = output_path + "/c_TPPOAN";
+      tppoan.open(tppoan_name.c_str(), ios::out | ios::binary);
+      if (tppoan.is_open()) {
 #ifdef USE_LAPACK
-      logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
+	logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
 #else
-      logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO);
+	logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO);
 #endif
-      int iavm = gconf->iavm;
-      int isam = gconf->isam;
-      int inpol = gconf->in_pol;
-      int nxi = sconf->number_of_scales;
-      int nth = p_scattering_angles->nth;
-      int nths = p_scattering_angles->nths;
-      int nph = p_scattering_angles->nph;
-      int nphs = p_scattering_angles->nphs;
-      tppoan.write(reinterpret_cast<char *>(&iavm), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&isam), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&inpol), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nxi), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nth), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nph), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nths), sizeof(int));
-      tppoan.write(reinterpret_cast<char *>(&nphs), sizeof(int));
-      if (sconf->idfc < 0) {
-	cid->vk = cid->xip * cid->wn;
-	fprintf(output, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk);
-	fprintf(output, " \n");
-      }
-      // do the first iteration on jxi488 separately, since it seems to be different from the others
-      int jxi488 = 1;
-      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
-      int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan, logger);
-      chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
-      elapsed = start_iter_1 - t_start;
-      message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
-      logger->log(message);
-      time_logger->log(message);
-      elapsed = end_iter_1 - start_iter_1;
-      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
-      logger->log(message);
-      time_logger->log(message);
+	int iavm = gconf->iavm;
+	int isam = gconf->isam;
+	int inpol = gconf->in_pol;
+	int nxi = sconf->number_of_scales;
+	int nth = p_scattering_angles->nth;
+	int nths = p_scattering_angles->nths;
+	int nph = p_scattering_angles->nph;
+	int nphs = p_scattering_angles->nphs;
+	tppoan.write(reinterpret_cast<char *>(&iavm), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&isam), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&inpol), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&nxi), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&nth), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&nph), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&nths), sizeof(int));
+	tppoan.write(reinterpret_cast<char *>(&nphs), sizeof(int));
+	if (sconf->idfc < 0) {
+	  cid->vk = cid->xip * cid->wn;
+	  fprintf(output, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk);
+	  fprintf(output, " \n");
+	}
+	// do the first iteration on jxi488 separately, since it seems to be different from the others
+	int jxi488 = 1;
+	chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
+	int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan, logger);
+	chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
+	elapsed = start_iter_1 - t_start;
+	message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
+	logger->log(message);
+	time_logger->log(message);
+	elapsed = end_iter_1 - start_iter_1;
+	message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
+	logger->log(message);
+	time_logger->log(message);
 
-      // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
-      int ompnumthreads = 1;
+	// here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
+#ifdef MPI_VERSION
+	if (mpidata->mpirunning) {
+	  gconf->mpibcast(mpidata);
+	  sconf->mpibcast(mpidata);	    
+	  cid->mpibcast(mpidata);
+	  p_scattering_angles->mpibcast(mpidata);
+	}	
+#endif
+	// Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+	int ompnumthreads = 1;
 
 #pragma omp parallel
-      {
-	// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
-	int myompthread = 0;
+	{
+	  // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+	  int myompthread = 0;
 #ifdef _OPENMP
-	// If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
-	myompthread = omp_get_thread_num();
-	if (myompthread == 0) ompnumthreads = omp_get_num_threads();
+	  // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+	  myompthread = omp_get_thread_num();
+	  if (myompthread == 0) ompnumthreads = omp_get_num_threads();
 #endif
-	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
-	ClusterIterationData *cid_2 = NULL;
-	FILE *output_2 = NULL;
-	fstream *tppoanp_2 = NULL;
-	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
-	if (myompthread == 0) {
-	  cid_2 = cid;
-	  output_2 = output;
-	  tppoanp_2 = tppoanp;  
-	} else {
-	  // this is not thread 0, so do create fresh copies of all local variables
-	  cid_2 = new ClusterIterationData(*cid);
-	  output_2 = fopen((output_path + "/c_OCLU_" + to_string(myompthread)).c_str(), "w");
-	  tppoanp_2 = new fstream;
-	  tppoanp_2->open((output_path + "/c_TPPOAN_" + to_string(myompthread)).c_str(), ios::out | ios::binary);
-	}
-	fstream &tppoan_2 = *tppoanp_2;
-	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+	  // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+	  ClusterIterationData *cid_2 = NULL;
+	  FILE *output_2 = NULL;
+	  fstream *tppoanp_2 = NULL;
+	  // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+	  if (myompthread == 0) {
+	    cid_2 = cid;
+	    output_2 = output;
+	    tppoanp_2 = tppoanp;
+	  } else {
+	    // this is not thread 0, so do create fresh copies of all local variables
+	    cid_2 = new ClusterIterationData(*cid);
+	    output_2 = fopen((output_path + "/c_OCLU_" + to_string(mpidata->rank) + "_" + to_string(myompthread)).c_str(), "w");
+	    tppoanp_2 = new fstream;
+	    tppoanp_2->open((output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(myompthread)).c_str(), ios::out | ios::binary);
+	  }
+	  fstream &tppoan_2 = *tppoanp_2;
+	  // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
 #pragma omp barrier
-	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
-	// ok, now I can actually start the parallel calculations
+	  if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+	  // ok, now I can actually start the parallel calculations
 #pragma omp for
-	for (jxi488 = 2; jxi488 <= nxi; jxi488++) {
-	  int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid_2, output_2, output_path, *tppoanp_2, logger);
-	}
+	  for (jxi488 = cid_2->firstxi; jxi488 <= cid_2->lastxi; jxi488++) {
+	    int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid_2, output_2, output_path, *tppoanp_2, logger);
+	  }
 
 #pragma omp barrier
-	// only threads different from 0 have to free local copies of variables and close local files
-	if (myompthread != 0) {
-	  delete cid_2;
-	  fclose(output_2);
-	  tppoanp_2->close();
-	  delete tppoanp_2;
-	}
+	  // only threads different from 0 have to free local copies of variables and close local files
+	  if (myompthread != 0) {
+	    delete cid_2;
+	    fclose(output_2);
+	    tppoanp_2->close();
+	    delete tppoanp_2;
+	  }
+#pragma omp barrier
+	  {
+	    message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
+	    logger->log(message);
+	  }
+	} // closes pragma omp parallel
+#ifdef _OPENMP
 #pragma omp barrier
 	{
-	  message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
-	  logger->log(message);
+	  // thread 0 already wrote on global files, skip it and take care of appending the others
+	  for (int ri = 1; ri < ompnumthreads; ri++) {
+	    // Giovanni, please add here in this loop code to reopen the temporary files, reread them and append them respectively to the global output and tppoan, before closing them
+	    string partial_file_name = output_path + "/c_OCLU_" + to_string(mpidata->rank) + "_" + to_string(ri);
+	    message = "Copying ASCII output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	    logger->log(message, LOG_DEBG);
+	    FILE *partial_output = fopen(partial_file_name.c_str(), "r");
+	    int c = fgetc(partial_output);
+	    while (c != EOF) {
+	      fputc(c, output);
+	      c = fgetc(partial_output);
+	    }
+	    fclose(partial_output);
+	    remove(partial_file_name.c_str());
+	    logger->log("done.\n", LOG_DEBG);
+	    partial_file_name = output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(ri);
+	    message = "Copying binary output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	    logger->log(message, LOG_DEBG);
+	    fstream partial_tppoan;
+	    partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary);
+	    partial_tppoan.seekg(0, ios::end);
+	    long buffer_size = partial_tppoan.tellg();
+	    char *binary_buffer = new char[buffer_size];
+	    partial_tppoan.seekg(0, ios::beg);
+	    partial_tppoan.read(binary_buffer, buffer_size);
+	    tppoan.write(binary_buffer, buffer_size);
+	    partial_tppoan.close();
+	    delete[] binary_buffer;
+	    remove(partial_file_name.c_str());
+	    logger->log("done.\n", LOG_DEBG);
+	  }
 	}
-      } // closes pragma omp parallel
+#endif
+	// here go the code to append the files written in MPI processes > 0 to the ones on MPI process 0
+#ifdef MPI_VERSION
+	if (mpidata->mpirunning) {
+	  // only go through this if MPI has been actually used
+	  for (int rr=1; rr<mpidata->nprocs; rr++) {
+	    // get the data from process rr
+	    // how many openmp threads did process rr use?
+	    int remotethreads;
+	    MPI_Recv(&remotethreads, 1, MPI_INT, rr, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	    for (int ri=0; ri<remotethreads; ri++) {
+	      // first get the ASCII local file
+	      int c = 0;
+	      MPI_Recv(&c, 1, MPI_INT, rr, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      while (c != EOF) {
+		fputc(c, output);
+		MPI_Recv(&c, 1, MPI_INT, rr, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      }
+	      // now get the binary local file
+	      long buffer_size = 0;
+	      // get the size of the buffer
+	      MPI_Recv(&buffer_size, 1, MPI_LONG, rr, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      // allocate the bufer
+	      char *binary_buffer = new char[buffer_size];
+	      // actually receive the buffer
+	      MPI_Recv(binary_buffer, buffer_size, MPI_CHAR, rr, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	      // we can write it to disk
+	      tppoan.write(binary_buffer, buffer_size);
+	      delete[] binary_buffer;
+	    }
+	  }
+	}
+#endif
+	tppoanp->close();
+	delete tppoanp;
+      } else { // In case TPPOAN could not be opened. Should never happen.
+	logger->err("\nERROR: failed to open TPPOAN file.\n");
+      }
+      fclose(output);
+      // Clean memory
+      delete cid;
+      delete p_scattering_angles;
+    } else { // NSPH mismatch between geometry and scatterer configurations.
+      throw UnrecognizedConfigurationException(
+					       "Inconsistent geometry and scatterer configurations."
+					       );
+    }
+    delete sconf;
+    delete gconf;
+    chrono::time_point<chrono::high_resolution_clock> t_end = chrono::high_resolution_clock::now();
+    elapsed = t_end - t_start;
+    message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n";
+    logger->log(message);
+    logger->log("Finished: output written to " + output_path + "/c_OCLU\n");
+    time_logger->log(message);
+  }
+
+#ifdef MPI_VERSION
+  else {
+    // here go the code for MPI processes other than 0
+    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
+    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
+    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
+    ClusterIterationData *cid = new ClusterIterationData(mpidata);
+    ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata);
+    // open separate files for other MPI processes
+    // File *output = fopen((output_path + "/c_OCLU_mpi"+ to_string(mpidata->rank)).c_str(), "w");
+    // fstream *tppoanp = new fstream;
+    // fstream &tppoan = *tppoanp;
+    // string tppoan_name = output_path + "/c_TPPOAN_mpi"+ to_string(mpidata->rank);
+    // tppoan.open(tppoan_name.c_str(), ios::out | ios::binary);
+    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
+    int ompnumthreads = 1;
+
+#pragma omp parallel
+    {
+      // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
+      int myompthread = 0;
 #ifdef _OPENMP
+      // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
+      myompthread = omp_get_thread_num();
+      if (myompthread == 0) ompnumthreads = omp_get_num_threads();
+#endif
+      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
+      ClusterIterationData *cid_2 = NULL;
+      FILE *output_2 = NULL;
+      fstream *tppoanp_2 = NULL;
+      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
+      if (myompthread == 0) {
+	cid_2 = cid;
+	// output_2 = output;
+	// tppoanp_2 = tppoanp;
+      } else {
+	// this is not thread 0, so do create fresh copies of all local variables
+	cid_2 = new ClusterIterationData(*cid);
+      }
+      output_2 = fopen((output_path + "/c_OCLU_" + to_string(mpidata->rank) + "_" + to_string(myompthread)).c_str(), "w");
+      tppoanp_2 = new fstream;
+      tppoanp_2->open((output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(myompthread)).c_str(), ios::out | ios::binary);
+      fstream &tppoan_2 = *tppoanp_2;
+      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
+#pragma omp barrier
+      if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
+      // ok, now I can actually start the parallel calculations
+#pragma omp for
+      for (int jxi488 = cid_2->firstxi; jxi488 <= cid_2->lastxi; jxi488++) {
+	int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid_2, output_2, output_path, *tppoanp_2, logger);
+      }
+
+#pragma omp barrier
+      // only threads different from 0 have to free local copies of variables
+      if (myompthread != 0) {
+	delete cid_2;
+      }
+      fclose(output_2);
+      tppoanp_2->close();
+      delete tppoanp_2;
 #pragma omp barrier
       {
-	// thread 0 already wrote on global files, skip it and take care of appending the others
-	for (int ri = 1; ri < ompnumthreads; ri++) {
-	  // Giovanni, please add here in this loop code to reopen the temporary files, reread them and append them respectively to the global output and tppoan, before closing them
-	  string partial_file_name = output_path + "/c_OCLU_" + to_string(ri);
-	  message = "Copying ASCII output of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
-	  logger->log(message, LOG_DEBG);
-	  FILE *partial_output = fopen(partial_file_name.c_str(), "r");
-	  char c = fgetc(partial_output);
-	  while (c != EOF) {
-	    fputc(c, output);
-	    c = fgetc(partial_output);
-	  }
-	  fclose(partial_output);
-	  remove(partial_file_name.c_str());
-	  logger->log("done.\n", LOG_DEBG);
-	  partial_file_name = output_path + "/c_TPPOAN_" + to_string(ri);
-	  message = "Copying binary output of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
-	  logger->log(message, LOG_DEBG);
-	  fstream partial_tppoan;
-	  partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary);
-	  partial_tppoan.seekg(0, ios::end);
-	  long buffer_size = partial_tppoan.tellg();
-	  char *binary_buffer = new char[buffer_size];
-	  partial_tppoan.seekg(0, ios::beg);
-	  partial_tppoan.read(binary_buffer, buffer_size);
-	  tppoan.write(binary_buffer, buffer_size);
-	  partial_tppoan.close();
-	  delete[] binary_buffer;
-	  remove(partial_file_name.c_str());
-	  logger->log("done.\n", LOG_DEBG);
+	message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
+	logger->log(message);
+      }
+    } // closes pragma omp parallel
+#pragma omp barrier
+    {
+      // tell MPI process 0 how many threads we have on this process (not necessarily the same across all processes)
+      MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
+      // reopen local files, send them all to MPI process 0
+      for (int ri = 0; ri < ompnumthreads; ri++) {
+	// Giovanni, please add here in this loop code to reopen the temporary files, reread them and append them respectively to the global output and tppoan, before closing them
+	string partial_file_name = output_path + "/c_OCLU_" + to_string(mpidata->rank) + "_" + to_string(ri);
+	message = "Copying ASCII output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	logger->log(message, LOG_DEBG);
+	FILE *partial_output = fopen(partial_file_name.c_str(), "r");
+	int c = 0;
+	while (c != EOF) {
+	  c = fgetc(partial_output);
+	  MPI_Send(&c, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
 	}
+	fclose(partial_output);
+	remove(partial_file_name.c_str());
+	logger->log("done.\n", LOG_DEBG);
+	partial_file_name = output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(ri);
+	message = "Copying binary output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	logger->log(message, LOG_DEBG);
+	fstream partial_tppoan;
+	partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary);
+	partial_tppoan.seekg(0, ios::end);
+	long buffer_size = partial_tppoan.tellg();
+	char *binary_buffer = new char[buffer_size];
+	partial_tppoan.seekg(0, ios::beg);
+	partial_tppoan.read(binary_buffer, buffer_size);
+	// tell MPI process 0 how large is the buffer
+	MPI_Send(&buffer_size, 1, MPI_LONG, 0, 1, MPI_COMM_WORLD);
+	// actually send the buffer
+	MPI_Send(binary_buffer, buffer_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
+	// tppoan.write(binary_buffer, buffer_size);
+	partial_tppoan.close();
+	delete[] binary_buffer;
+	remove(partial_file_name.c_str());
+	logger->log("done.\n", LOG_DEBG);
       }
-#endif
-      tppoanp->close();
-      delete tppoanp;
-    } else { // In case TPPOAN could not be opened. Should never happen.
-      logger->err("\nERROR: failed to open TPPOAN file.\n");
     }
-    fclose(output);
     // Clean memory
     delete cid;
     delete p_scattering_angles;
-  } else { // NSPH mismatch between geometry and scatterer configurations.
-    throw UnrecognizedConfigurationException(
-					     "Inconsistent geometry and scatterer configurations."
-					     );
+    delete sconf;
+    delete gconf;
+
   }
-  delete sconf;
-  delete gconf;
-  chrono::time_point<chrono::high_resolution_clock> t_end = chrono::high_resolution_clock::now();
-  elapsed = t_end - t_start;
-  message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n";
-  logger->log(message);
-  logger->log("Finished: output written to " + output_path + "/c_OCLU\n");
-  time_logger->log(message);
+#endif
   fclose(timing_file);
   delete time_logger;
   delete logger;
diff --git a/src/cluster/np_cluster.cpp b/src/cluster/np_cluster.cpp
index de9c30fea71cf4a06a5e53e5b6989f1a4f2303c9..f6cac7c2dd98cf5b5774b6aea2c9fb9568f5853b 100644
--- a/src/cluster/np_cluster.cpp
+++ b/src/cluster/np_cluster.cpp
@@ -29,9 +29,13 @@
 #include "../include/Configuration.h"
 #endif
 
+#ifndef INCLUDE_COMMONS_H_
+#include "../include/Commons.h"
+#endif
+
 using namespace std;
 
-extern void cluster(const string& config_file, const string& data_file, const string& output_path);
+extern void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata);
 
 /*! \brief Main program entry point.
  *
@@ -46,6 +50,14 @@ extern void cluster(const string& config_file, const string& data_file, const st
  * \return result: `int` An exit code passed to the OS (0 for succesful execution).
  */
 int main(int argc, char **argv) {
+#ifdef MPI_VERSION
+	int ierr = MPI_Init(&argc, &argv);
+	// create and initialise class with essential MPI data
+	mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD);
+#else
+	// create a the class with dummy data if we are not using MPI at all
+	mixMPI *mpidata = new mixMPI();
+#endif
   	string config_file = "../../test_data/cluster/DEDFB";
 	string data_file = "../../test_data/cluster/DCLU";
 	string output_path = ".";
@@ -54,6 +66,10 @@ int main(int argc, char **argv) {
 		data_file = string(argv[2]);
 		output_path = string(argv[3]);
 	}
-	cluster(config_file, data_file, output_path);
+	cluster(config_file, data_file, output_path, mpidata);
+#ifdef MPI_VERSION
+	MPI_Finalize();
+#endif
+	delete mpidata;
 	return 0;
 }
diff --git a/src/include/Commons.h b/src/include/Commons.h
index 8f311f8c1f7647e4e9b67abe36f17bc5785198bb..a9370909ee9a1f79a6a35e408f591a9b8766852b 100644
--- a/src/include/Commons.h
+++ b/src/include/Commons.h
@@ -19,6 +19,12 @@
 #ifndef INCLUDE_COMMONS_H_
 #define INCLUDE_COMMONS_H_
 
+#ifdef USE_MPI
+#include <mpi.h>
+#endif
+
+class mixMPI;
+
 /*! \brief Representation of the FORTRAN C1 common blocks.
  *
  * C1 common blocks are used to store vector field expansions and geometric
@@ -110,6 +116,20 @@ public:
    */
   C1(const C1& rhs);
 
+#ifdef MPI_VERSION
+  /*! \brief C1 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C1(const mixMPI *mpidata);
+
+  /*! \brief send C1 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
   //! \brief C1 instance destroyer.
   ~C1();
 };
@@ -153,6 +173,21 @@ public:
 
   //! \brief C2 instance destroyer.
   ~C2();
+
+#ifdef MPI_VERSION
+  /*! \brief C2 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C2(const mixMPI *mpidata);
+
+  /*! \brief send C2 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
 /*! \brief Representation of the FORTRAN C3 blocks.
@@ -183,6 +218,21 @@ public:
   /*! \brief C3 instance destroyer.
    */
   ~C3();
+
+#ifdef MPI_VERSION
+  /*! \brief C3 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C3(const mixMPI *mpidata);
+
+  /*! \brief send C3 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
 /*! \brief Representation of the FORTRAN C4 blocks.
@@ -229,6 +279,21 @@ public:
   /*! \brief C4 instance destroyer.
    */
   ~C4();
+
+#ifdef MPI_VERSION
+  /*! \brief C4 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C4(const mixMPI *mpidata);
+
+  /*! \brief send C4 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
 /*! \brief Vectors and matrices that are specific to cluster C1 blocks.
@@ -317,6 +382,21 @@ public:
 
   //! \brief C1_AddOns instance destroyer.
   ~C1_AddOns();
+
+#ifdef MPI_VERSION
+  /*! \brief C1_AddOns instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C1_AddOns(const mixMPI *mpidata);
+
+  /*! \brief send C1_AddOns instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
 /*! \brief Representation of the FORTRAN C6 blocks.
@@ -343,6 +423,21 @@ public:
   /*! \brief C6 instance destroyer.
    */
   ~C6();
+
+#ifdef MPI_VERSION
+  /*! \brief C6 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C6(const mixMPI *mpidata);
+
+  /*! \brief send C6 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
 /*! \brief Representation of the FORTRAN C9 blocks.
@@ -384,6 +479,51 @@ public:
   /*! \brief C9 instance destroyer.
    */
   ~C9();
+
+#ifdef MPI_VERSION
+  /*! \brief C9 instance constructor copying all contents off MPI broadcast from MPI process 0
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  C9(const mixMPI *mpidata);
+
+  /*! \brief send C9 instance from MPI process 0 via MPI broadcasts to all other processes
+   *
+   * \param mpidata: `mixMPI *` pointer to MPI data structure.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
+};
+
+/*! \brief structure with essential MPI data.
+ */
+class mixMPI {
+public:
+  //! \brief was MPI initialised?
+  bool mpirunning;
+  //! \brief MPI rank
+  int rank;
+  //! \brief MPI nprocs
+  int nprocs;
+
+  /*! \brief empty mixMPI instance constructor.
+   */
+  mixMPI();
+
+  /*! \brief mixMPI instance constructor from an actual MPI communicator.
+   */
+#ifdef MPI_VERSION
+  mixMPI(MPI_Comm comm);
+#endif
+  
+  /*! \brief mixMPI instance constructor copying its contents from a preexisting object.
+   */
+  mixMPI(const mixMPI& rhs);
+
+  /*! \brief mixMPI instance destroyer.
+   */
+  ~mixMPI();
 };
 
 /*! \brief A data structure representing the information used for a single scale
@@ -455,12 +595,32 @@ public:
   //! \brief Wave number.
   double wn;
   double xip;
+  int number_of_scales;
+  int xiblock;
+  int firstxi;
+  int lastxi;
 
-  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf);
-
+  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata);
+  
   ClusterIterationData(const ClusterIterationData& rhs);
 
+#ifdef MPI_VERSION
+  ClusterIterationData(const mixMPI *mpidata);
+
+  /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial ClusterIterationData instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
   ~ClusterIterationData();
+
 };
 
 /*! \brief A data structure representing the angles to be evaluated in the problem.
@@ -562,6 +722,27 @@ public:
    * \param rhs: `ScatteringAngles&` Reference to the ScatteringAngles object to be copied.
    */
   ScatteringAngles(const ScatteringAngles &rhs);
+
+#ifdef MPI_VERSION
+  /*! \brief ScatteringAngles copy from MPI broadcast.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpidata instance used to copy the data.
+   */
+  ScatteringAngles(const mixMPI *mpidata);
+
+    /*! \brief Broadcast over MPI the ScatteringAngles instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial ScatteringAngles instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
 };
 
+
 #endif
diff --git a/src/include/Configuration.h b/src/include/Configuration.h
index 1c131e92e6104c856b8d882075da94f5053e3608..726f7471bdbbc784cb4d370e411d3c1634df9c58 100644
--- a/src/include/Configuration.h
+++ b/src/include/Configuration.h
@@ -30,6 +30,8 @@
 #ifndef INCLUDE_CONFIGURATION_H_
 #define INCLUDE_CONFIGURATION_H_
 
+class mixMPI;
+
 /**
  * \brief A class to represent the configuration of the scattering geometry.
  *
@@ -199,6 +201,25 @@ public:
    */
   GeometryConfiguration(const GeometryConfiguration& rhs);
 
+#ifdef MPI_VERSION
+  /*! \brief Build a scattering geometry configuration structure copying it via MPI from MPI process 0.
+   *
+   * \param rhs: `mixMPI *` pointer to the mpidata instance to use for the MPI communications.
+   */
+  GeometryConfiguration(const mixMPI *mpidata);
+
+  /*! \brief Broadcast over MPI the GeometryConfiguration instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial GeometryConfiguration instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
   /*! \brief Destroy a GeometryConfiguration instance.
    */
   ~GeometryConfiguration();
@@ -245,6 +266,7 @@ public:
    * \return scale: `double` The Z coordinate of the requested sphere.
    */
   double get_sph_z(int index) { return _sph_z[index]; }
+
 };
 
 /**
@@ -402,6 +424,25 @@ public:
    */
   ScattererConfiguration(const ScattererConfiguration& rhs);
 
+#ifdef MPI_VERSION
+  /*! \brief Build a scatterer configuration structure copying it via MPI from MPI process 0.
+   *
+   * \param rhs: `mixMPI *` pointer to the mpidata instance to use for the MPI communications.
+   */
+  ScattererConfiguration(const mixMPI *mpidata);
+
+  /*! \brief Broadcast over MPI the ScattererConfiguration instance from MPI process 0 to all others.
+   *
+   * When using MPI, the initial ScattererConfiguration instance created by MPI process 0
+   * needs to be replicated on all other processes. This function sends it using
+   * MPI broadcast calls. The MPI broadcast calls in this function must match those
+   * in the constructor using the mixMPI pointer.
+   *
+   * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast.
+   */
+  void mpibcast(const mixMPI *mpidata);
+#endif
+
   /*! \brief Destroy a scatterer configuration instance.
    */
   ~ScattererConfiguration();
@@ -561,6 +602,7 @@ public:
    * \return result: `bool` True, if the two instances are equal, false otherwise.
    */
   bool operator ==(const ScattererConfiguration &other);
+
 };
 
 #endif
diff --git a/src/libnptm/Commons.cpp b/src/libnptm/Commons.cpp
index cff015719fd91bdb76e8072da99a0fce058fce17..694854ec9589e55470f7baf00e73da801484f392 100644
--- a/src/libnptm/Commons.cpp
+++ b/src/libnptm/Commons.cpp
@@ -16,6 +16,10 @@
 #include "../include/Commons.h"
 #endif
 
+#ifdef USE_MPI
+#include <mpi.h>
+#endif
+
 C1::C1(GeometryConfiguration *gconf, ScattererConfiguration *sconf) {
   lm = gconf->l_max;
   int li = gconf->li;
@@ -92,22 +96,25 @@ C1::C1(const C1& rhs) {
 
   vec_rmi = new dcomplex[nsph * lm]();
   vec_rei = new dcomplex[nsph * lm]();
+  for (long li=0; li<(nsph*lm); li++) {
+    vec_rmi[li] = rhs.vec_rmi[li];
+    vec_rei[li] = rhs.vec_rei[li];
+  }
   rmi = new dcomplex*[lm];
   rei = new dcomplex*[lm];
   for (int ri = 0; ri < lm; ri++) {
     rmi[ri] = &(vec_rmi[nsph * ri]);
     rei[ri] = &(vec_rei[nsph * ri]);
-    /*! Copy the contents from the template */
-    for (int rj=0; rj<nsph; rj++) {
-      rmi[ri][rj] = rhs.rmi[ri][rj];
-      rei[ri][rj] = rhs.rei[ri][rj];
-    }
+    /*! The contents were already copied via vec_rmi and vec_rei */
   }
   vec_w = new dcomplex[4 * nlmmt]();
+  for (long li=0; li<(4*nlmmt); li++) {
+    vec_w[li] = rhs.vec_w[li];
+  }    
   w = new dcomplex*[nlmmt];
   for (int wi = 0; wi < nlmmt; wi++) {
     w[wi] = &(vec_w[4 * wi]);
-    for (int wj=0; wj<4; wj++) w[wi][wj] = rhs.w[wi][wj];
+    /*! The contents were already copied via vec_w */
   }
   configurations = rhs.configurations;
   vint = new dcomplex[16]();
@@ -165,6 +172,117 @@ C1::C1(const C1& rhs) {
   }
 }
 
+#ifdef MPI_VERSION
+C1::C1(const mixMPI *mpidata) {
+  MPI_Bcast(&nlmmt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  vec_rmi = new dcomplex[nsph * lm]();
+  vec_rei = new dcomplex[nsph * lm]();
+  MPI_Bcast(vec_rmi, nsph*lm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_rei, nsph*lm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  rmi = new dcomplex*[lm];
+  rei = new dcomplex*[lm];
+  for (int ri = 0; ri < lm; ri++) {
+    rmi[ri] = &(vec_rmi[nsph * ri]);
+    rei[ri] = &(vec_rei[nsph * ri]);
+    /*! The contents were copied already via vec_rmi and vec_rei */
+  }
+  vec_w = new dcomplex[4 * nlmmt]();
+  MPI_Bcast(vec_w, 4*nlmmt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  w = new dcomplex*[nlmmt];
+  for (int wi = 0; wi < nlmmt; wi++) {
+    w[wi] = &(vec_w[4 * wi]);
+    /*! The contents were copied already via vec_w */
+  }
+  MPI_Bcast(&configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  vint = new dcomplex[16]();
+  MPI_Bcast(vint, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  vec_vints = new dcomplex[nsph * 16]();
+  MPI_Bcast(vec_vints, nsph*16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  vints = new dcomplex*[nsph];
+  rc = new double*[nsph];
+  nshl = new int[nsph]();
+  MPI_Bcast(nshl, nsph, MPI_INT, 0, MPI_COMM_WORLD);
+  iog = new int[nsph]();
+  MPI_Bcast(iog, nsph, MPI_INT, 0, MPI_COMM_WORLD);
+  for (int vi = 0; vi < nsph; vi++) {
+    vints[vi] = &(vec_vints[16 * vi]);
+    // The contents were copied already via vec_vints
+  }
+  for (int ri=0; ri<configurations; ri++) {
+    rc[ri] = new double[nshl[ri]]();
+    MPI_Bcast(rc[ri], nshl[ri], MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  fsas = new dcomplex[nsph]();
+  sscs = new double[nsph]();
+  sexs = new double[nsph]();
+  sabs = new double[nsph]();
+  sqscs = new double[nsph]();
+  sqexs = new double[nsph]();
+  sqabs = new double[nsph]();
+  gcsv = new double[nsph]();
+  rxx = new double[nsph]();
+  ryy = new double[nsph]();
+  rzz = new double[nsph]();
+  ros = new double[nsph]();
+  MPI_Bcast(fsas, nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sexs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sabs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqexs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqabs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(gcsv, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(rxx, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ryy, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(rzz, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ros, configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  sas = new dcomplex**[nsph];
+  for (int si = 0; si < nsph; si++) {
+    sas[si] = new dcomplex*[2];
+    for (int sj=0; sj<2; sj++) {
+      sas[si][sj] = new dcomplex[2]();
+      MPI_Bcast(sas[si][sj], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    }
+  }
+}
+
+void C1::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&nlmmt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_rmi, nsph*lm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_rei, nsph*lm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_w, 4*nlmmt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vint, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vec_vints, nsph*16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(nshl, nsph, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(iog, nsph, MPI_INT, 0, MPI_COMM_WORLD);
+  for (int ri=0; ri<configurations; ri++) {
+    MPI_Bcast(rc[ri], nshl[ri], MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(fsas, nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sexs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sabs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqexs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(sqabs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(gcsv, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(rxx, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ryy, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(rzz, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ros, configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int si = 0; si < nsph; si++) {
+    for (int sj=0; sj<2; sj++) {
+      MPI_Bcast(sas[si][sj], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    }
+  }
+}
+#endif
+
 C1::~C1() {
   delete[] vec_rmi;
   delete[] vec_rei;
@@ -358,6 +476,122 @@ C1_AddOns::~C1_AddOns() {
   delete[] ecsc;
 }
 
+#ifdef MPI_VERSION
+C1_AddOns::C1_AddOns(const mixMPI *mpidata) {
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  int vhsize=(nsph * nsph - 1) * litpo;
+  vh = new dcomplex[vhsize]();
+  MPI_Bcast(vh, vhsize, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vj0size=nsph * lmtpo;
+  vj0 = new dcomplex[vj0size]();
+  MPI_Bcast(vj0, vj0size, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  vj = new dcomplex[1](); // QUESTION: is 1 really enough for a general case?
+  MPI_Bcast(vj, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vyhjsize=(nsph * nsph - 1) * litpos;
+  vyhj = new dcomplex[vyhjsize]();
+  MPI_Bcast(vyhj, vyhjsize, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vyj0size = nsph * lmtpos;
+  vyj0 = new dcomplex[vyj0size]();
+  MPI_Bcast(vyj0, vyj0size, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  am0v = new dcomplex[nlemt * nlemt]();
+  am0m = new dcomplex*[nlemt];
+  MPI_Bcast(am0v, nlemt*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  for (int ai = 0; ai < nlemt; ai++) {
+    am0m[ai] = (am0v + nlemt * ai);
+  }
+  vintm = new dcomplex[16]();
+  vintt = new dcomplex[16]();
+  MPI_Bcast(vintm, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vintt, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  fsac = new dcomplex*[2];
+  sac = new dcomplex*[2];
+  fsacm = new dcomplex*[2];
+  for (int fi = 0; fi < 2; fi++) {
+    fsac[fi] = new dcomplex[2]();
+    sac[fi] = new dcomplex[2]();
+    fsacm[fi] = new dcomplex[2]();
+    MPI_Bcast(fsac[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(sac[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(fsacm[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  scscp = new dcomplex[2]();
+  ecscp = new dcomplex[2]();
+  scscpm = new dcomplex[2]();
+  ecscpm = new dcomplex[2]();
+  MPI_Bcast(scscp, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecscp, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scscpm, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecscpm, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  v3j0 = new double[nv3j]();
+  MPI_Bcast(v3j0, nv3j, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  ind3j = new int*[lmpo];
+  for (int ii = 0; ii < lmpo; ii++) {
+    ind3j[ii] = new int[lm]();
+    MPI_Bcast(ind3j[ii], lm, MPI_INT, 0, MPI_COMM_WORLD);
+  }
+  sscs = new double[nsph]();
+  MPI_Bcast(sscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  ecscm = new double[2]();
+  scscm = new double[2]();
+  scsc = new double[2]();
+  ecsc = new double[2]();
+  MPI_Bcast(ecscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scsc, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecsc, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+void C1_AddOns::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  int vhsize=(nsph * nsph - 1) * litpo;
+  MPI_Bcast(vh, vhsize, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vj0size=nsph * lmtpo;
+  MPI_Bcast(vj0, vj0size, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vj, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vyhjsize=(nsph * nsph - 1) * litpos;
+  MPI_Bcast(vyhj, vyhjsize, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  int vyj0size = nsph * lmtpos;
+  MPI_Bcast(vyj0, vyj0size, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(am0v, nlemt*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vintm, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vintt, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  for (int fi = 0; fi < 2; fi++) {
+    MPI_Bcast(fsac[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(sac[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(fsacm[fi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(scscp, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecscp, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scscpm, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecscpm, 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(v3j0, nv3j, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ii = 0; ii < lmpo; ii++) {
+    MPI_Bcast(ind3j[ii], lm, MPI_INT, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(sscs, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(scsc, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ecsc, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
+
 C2::C2(GeometryConfiguration *gconf, ScattererConfiguration *sconf) {
   nsph = gconf->number_of_spheres;
   int npnt = gconf->npnt;
@@ -401,6 +635,35 @@ C2::~C2() {
   delete[] vsz;
 }
 
+#ifdef MPI_VERSION
+C2::C2(const mixMPI *mpidata) {
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nhspo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nl, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  ris = new dcomplex[nhspo]();
+  dlri = new dcomplex[nhspo]();
+  MPI_Bcast(ris, nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(dlri, nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  vkt = new dcomplex[nsph]();
+  vsz = new double[nsph]();
+  MPI_Bcast(vkt, nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vsz, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  dc0 = new dcomplex[nl]();
+  MPI_Bcast(dc0, nl, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+}
+
+void C2::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nhspo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nl, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ris, nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(dlri, nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vkt, nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(vsz, nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(dc0, nl, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+}
+#endif
+
 C3::C3() {
   tsas = new dcomplex*[2];
   tsas[0] = new dcomplex[2];
@@ -431,6 +694,32 @@ C3::~C3() {
   delete[] tsas;
 }
 
+#ifdef MPI_VERSION
+C3::C3(const mixMPI *mpidata) {
+  tsas = new dcomplex*[2];
+  for (int ti=0; ti<2; ti++) {
+    tsas[ti] = new dcomplex[2];
+    MPI_Bcast(tsas[ti], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&gcs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+void C3::mpibcast(const mixMPI *mpidata) {
+  for (int ti=0; ti<2; ti++) {
+    MPI_Bcast(tsas[ti], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&gcs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
+
 C4::C4(GeometryConfiguration *gconf) {
   li = gconf->li;
   le = gconf->le;
@@ -466,6 +755,40 @@ C4::C4(const C4& rhs) {
 C4::~C4() {
 }
 
+#ifdef MPI_VERSION
+C4::C4(const mixMPI *mpidata) {
+  MPI_Bcast(&li, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&le, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  // The following is needed to initialize C1_AddOns
+  MPI_Bcast(&litpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlim, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+}
+
+void C4::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&li, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&le, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nsph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  // The following is needed to initialize C1_AddOns
+  MPI_Bcast(&litpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&litpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlim, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&lmpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+}
+#endif
+
 C6::C6(int _lmtpo) {
   lmtpo = _lmtpo;
   rac3j = new double[lmtpo]();
@@ -481,6 +804,19 @@ C6::~C6() {
   delete[] rac3j;
 }
 
+#ifdef MPI_VERSION
+C6::C6(const mixMPI *mpidata) {
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  rac3j = new double[lmtpo]();
+  MPI_Bcast(rac3j, lmtpo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+void C6::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(rac3j, lmtpo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
+
 C9::C9(int ndi, int _nlem, int ndit, int _nlemt) {
   gis_size_0 = ndi;
   sam_size_0 = ndit;
@@ -529,7 +865,68 @@ C9::~C9() {
   delete[] sam;
 }
 
-ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf) {
+#ifdef MPI_VERSION
+C9::C9(const mixMPI *mpidata) {
+  MPI_Bcast(&gis_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sam_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  gis = new dcomplex*[gis_size_0];
+  gls = new dcomplex*[gis_size_0];
+  for (int gi = 0; gi < gis_size_0; gi++) {
+    gis[gi] = new dcomplex[nlem]();
+    gls[gi] = new dcomplex[nlem]();
+    MPI_Bcast(gis[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gls[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  sam = new dcomplex*[sam_size_0];
+  for (int si = 0; si < sam_size_0; si++) {
+    sam[si] = new dcomplex[nlemt]();
+    MPI_Bcast(sam[si], nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+}
+
+void C9::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&gis_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sam_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < gis_size_0; gi++) {
+    MPI_Bcast(gis[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gls[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  for (int si = 0; si < sam_size_0; si++) {
+    MPI_Bcast(sam[si], nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+}
+#endif
+
+mixMPI::mixMPI() {
+  mpirunning = 0;
+  rank = 0;
+  nprocs = 1;
+}
+
+#ifdef MPI_VERSION
+mixMPI::mixMPI(MPI_Comm comm){
+  mpirunning = 1;
+  int ierr;
+  // we should add some meaningful error checking and management here
+  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  ierr = MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+}
+#endif
+
+mixMPI::mixMPI(const mixMPI& rhs) {
+  mpirunning = rhs.mpirunning;
+  rank = rhs.rank;
+  nprocs = rhs.nprocs;
+}
+
+mixMPI::~mixMPI() {
+}
+
+ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata) {
   c1 = new C1(gconf, sconf);
   c2 = new C2(gconf, sconf);
   c3 = new C3();
@@ -623,6 +1020,11 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter
   xip = sconf->xip;
   sqsfi = 1.0;
   vk = 0.0;
+  number_of_scales = sconf->number_of_scales;
+  xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs));
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;
 }
 
 ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
@@ -767,8 +1169,220 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
   xip = rhs.xip;
   sqsfi = rhs.sqsfi;
   vk = rhs.vk;
+  firstxi = rhs.firstxi;
+  lastxi = rhs.lastxi;
 }
 
+#ifdef MPI_VERSION
+ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) {
+  c1 = new C1(mpidata);
+  c2 = new C2(mpidata);
+  c3 = new C3(mpidata);
+  c4 = new C4(mpidata);
+  c1ao = new C1_AddOns(mpidata);
+  c6 = new C6(mpidata);
+  const int ndi = c4->nsph * c4->nlim;
+  const np_int ndit = 2 * ndi;
+  c9 = new C9(mpidata);
+  gaps = new double[c4->nsph]();
+  MPI_Bcast(gaps, c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqev = new double[3]();
+  tqsv = new double[3]();
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  tqse = new double*[2];
+  tqspe = new dcomplex*[2];
+  tqss = new double*[2];
+  tqsps = new dcomplex*[2];
+  tqce = new double*[2];
+  tqcpe = new dcomplex*[2];
+  tqcs = new double*[2];
+  tqcps = new dcomplex*[2];
+  for (int ti = 0; ti < 2; ti++) {
+    tqse[ti] = new double[c4->nsph]();
+    tqspe[ti] = new dcomplex[c4->nsph]();
+    tqss[ti] = new double[c4->nsph]();
+    tqsps[ti] = new dcomplex[c4->nsph]();
+    MPI_Bcast(tqse[ti], c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c4->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c4->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    tqce[ti] = new double[3]();
+    tqcpe[ti] = new dcomplex[3]();
+    tqcs[ti] = new double[3]();
+    tqcps[ti] = new dcomplex[3]();
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  gapv = new double[3]();
+  gapp = new dcomplex*[3];
+  gappm = new dcomplex*[3];
+  gap = new double*[3];
+  gapm = new double*[3];
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    gapp[gi] = new dcomplex[2]();
+    gappm[gi] = new dcomplex[2]();
+    gap[gi] = new double[2]();
+    gapm[gi] = new double[2]();
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  u = new double[3]();
+  us = new double[3]();
+  un = new double[3]();
+  uns = new double[3]();
+  up = new double[3]();
+  ups = new double[3]();
+  unmp = new double[3]();
+  unsmp = new double[3]();
+  upmp = new double[3]();
+  upsmp = new double[3]();
+  duk = new double[3]();
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  argi = new double[1]();
+  args = new double[1]();
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  cextlr = new double*[4];
+  cext = new double*[4];
+  cmullr = new double*[4];;
+  cmul = new double*[4];
+  for (int ci = 0; ci < 4; ci++) {
+    cextlr[ci] = new double[4]();
+    cext[ci] = new double[4]();
+    cmullr[ci] = new double[4]();
+    cmul[ci] = new double[4]();
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  zpv = new double***[c4->lm];
+  for (int zi = 0; zi < c4->lm; zi++) {
+    zpv[zi] = new double**[3];
+    for (int zj = 0; zj < 3; zj++) {
+      zpv[zi][zj] = new double*[2];
+      for (int zk = 0; zk < 2; zk++) {
+	zpv[zi][zj][zk] = new double[2]();
+	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+      }
+    }
+  }
+  am_vector = new dcomplex[ndit * ndit]();
+  am = new dcomplex*[ndit];
+  for (np_int ai = 0; ai < ndit; ai++) {
+    am[ai] = (am_vector + ai * ndit);
+    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&qsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  lastxi = ((mpidata->rank+1) * xiblock)+1;
+  firstxi = lastxi-xiblock+1;
+  if (lastxi > number_of_scales) lastxi = number_of_scales;
+}
+
+void ClusterIterationData::mpibcast(const mixMPI *mpidata) {
+  c1->mpibcast(mpidata);
+  c2->mpibcast(mpidata);
+  c3->mpibcast(mpidata);
+  c4->mpibcast(mpidata);
+  c1ao->mpibcast(mpidata);
+  c6->mpibcast(mpidata);
+  const int ndi = c4->nsph * c4->nlim;
+  const np_int ndit = 2 * ndi;
+  c9->mpibcast(mpidata);
+  MPI_Bcast(gaps, c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ti = 0; ti < 2; ti++) {
+    MPI_Bcast(tqse[ti], c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqspe[ti], c4->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqss[ti], c4->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqsps[ti], c4->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int gi = 0; gi < 3; gi++) {
+    MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  for (int ci = 0; ci < 4; ci++) {
+    MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  }
+  for (int zi = 0; zi < c4->lm; zi++) {
+    for (int zj = 0; zj < 3; zj++) {
+      for (int zk = 0; zk < 2; zk++) {
+	MPI_Bcast(zpv[zi][zj][zk], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+      }
+    }
+  }
+  // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time
+  for (int ai = 0; ai < ndit; ai++) {
+    MPI_Bcast(am[ai], ndit, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  }
+  MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&qsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+}
+#endif
+
 ClusterIterationData::~ClusterIterationData() {
   const int nsph = c4->nsph;
   delete[] am_vector;
@@ -912,3 +1526,51 @@ ScatteringAngles::ScatteringAngles(const ScatteringAngles &rhs) {
   _nks = rhs._nks;
   _nkks = rhs._nkks;
 }
+
+#ifdef MPI_VERSION
+ScatteringAngles::ScatteringAngles(const mixMPI *mpidata) {
+  MPI_Bcast(&_th, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thlst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_ths, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thsstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thslst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_ph, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phlst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phsstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phslst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nth, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nths, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nphs, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thsca, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nk, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nks, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nkks, 1, MPI_INT, 0, MPI_COMM_WORLD);
+}
+
+void ScatteringAngles::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&_th, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thlst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_ths, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thsstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thslst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_ph, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phlst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phsstp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_phslst, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nth, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nph, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nths, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nphs, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_thsca, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nk, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nks, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_nkks, 1, MPI_INT, 0, MPI_COMM_WORLD);
+}
+#endif
diff --git a/src/libnptm/Configuration.cpp b/src/libnptm/Configuration.cpp
index 704d4eca13b96e486c332c3982bf18f861f31c2f..a613463cb98ba3ae5af94c479224247607326568 100644
--- a/src/libnptm/Configuration.cpp
+++ b/src/libnptm/Configuration.cpp
@@ -12,6 +12,11 @@
 #include <hdf5.h>
 #include <regex>
 #include <string>
+#ifdef USE_MPI
+#ifndef MPI_VERSION
+#include <mpi.h>
+#endif
+#endif
 
 #ifndef INCLUDE_TYPES_H_
 #include "../include/types.h"
@@ -33,6 +38,10 @@
 #include "../include/Configuration.h"
 #endif
 
+#ifndef INCLUDE_COMMONS_H
+#include "../include/Commons.h"
+#endif
+
 #ifndef INCLUDE_FILE_IO_H_
 #include "../include/file_io.h"
 #endif
@@ -48,7 +57,7 @@ GeometryConfiguration::GeometryConfiguration(
 					     double in_ph_start, double in_ph_step, double in_ph_end,
 					     double sc_ph_start, double sc_ph_step, double sc_ph_end,
 					     int jwtm
-) {
+					     ) {
   _number_of_spheres = nsph;
   _l_max = lm;
   _in_pol = in_pol;
@@ -112,6 +121,73 @@ GeometryConfiguration::GeometryConfiguration(const GeometryConfiguration& rhs)
   }
 }
 
+#ifdef MPI_VERSION
+GeometryConfiguration::GeometryConfiguration(const mixMPI *mpidata) {
+  MPI_Bcast(&_number_of_spheres, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_l_max, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_pol, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_npnt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_npntts, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_isam, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_li, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_le, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  // I have to jump through some hoops because the size of np_int is not fixed a priori
+  char *byte_mxndm = (char *) &_mxndm;
+  MPI_Bcast(byte_mxndm, sizeof(np_int), MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_iavm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_jwtm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  _sph_x = new double[_number_of_spheres]();
+  _sph_y = new double[_number_of_spheres]();
+  _sph_z = new double[_number_of_spheres]();
+  MPI_Bcast(_sph_x, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_sph_y, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_sph_z, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+
+void GeometryConfiguration::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&_number_of_spheres, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_l_max, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_pol, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_npnt, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_npntts, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_isam, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_li, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_le, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  // I have to jump through some hoops because the size of np_int is not fixed a priori
+  char *byte_mxndm = (char *) &_mxndm;
+  MPI_Bcast(byte_mxndm, sizeof(np_int), MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_iavm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_theta_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_in_phi_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_theta_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_start, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_step, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_sc_phi_end, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_jwtm, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_sph_x, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_sph_y, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_sph_z, _number_of_spheres, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
+#endif
+
 GeometryConfiguration::~GeometryConfiguration() {
   delete[] _sph_x;
   delete[] _sph_y;
@@ -229,7 +305,7 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
 							  in_ph_start, in_ph_step, in_ph_end,
 							  sc_ph_start, sc_ph_step, sc_ph_end,
 							  fjwtm
-  );
+							  );
   delete[] file_lines;
   return conf;
 }
@@ -250,7 +326,7 @@ ScattererConfiguration::ScattererConfiguration(
 					       double ex,
 					       double w,
 					       double x
-) {
+					       ) {
   _number_of_spheres = nsph;
   _configurations = configs;
   _number_of_scales = nxi;
@@ -281,7 +357,7 @@ ScattererConfiguration::ScattererConfiguration(
     }
   }
 }
-
+  
 ScattererConfiguration::ScattererConfiguration(const ScattererConfiguration& rhs)
 {
   _number_of_spheres = rhs._number_of_spheres;
@@ -317,6 +393,77 @@ ScattererConfiguration::ScattererConfiguration(const ScattererConfiguration& rhs
   }
 }
 
+#ifdef MPI_VERSION
+ScattererConfiguration::ScattererConfiguration(const mixMPI *mpidata)
+{
+  MPI_Bcast(&_number_of_spheres, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  int itemp;
+  MPI_Bcast(&itemp, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  char *ctemp = new char[itemp];
+  MPI_Bcast(ctemp, itemp, MPI_CHAR, 0, MPI_COMM_WORLD);
+  _reference_variable_name = ctemp;
+  delete[] ctemp;
+  MPI_Bcast(&_idfc, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  itemp = sizeof(bool);
+  char *ptemp = (char *) &_use_external_sphere;
+  MPI_Bcast(ptemp, itemp, MPI_CHAR, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_exdc, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_wp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  _iog_vec = new int[_number_of_spheres]();
+  MPI_Bcast(_iog_vec, _number_of_spheres, MPI_INT, 0, MPI_COMM_WORLD);
+  _radii_of_spheres = new double[_number_of_spheres]();
+  MPI_Bcast(_radii_of_spheres, _configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  _nshl_vec = new int[_configurations]();
+  MPI_Bcast(_nshl_vec, _configurations, MPI_INT, 0, MPI_COMM_WORLD);
+  _rcf = new double*[_configurations];
+  _scale_vec = new double[_number_of_scales]();
+  _dc0_matrix = new dcomplex**[_configurations];
+  MPI_Bcast(_scale_vec, _number_of_scales, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  int dim3 = (_idfc == 0) ? _number_of_scales : 1;
+  for (int si = 0; si < _configurations; si++) {
+    _rcf[si] = new double[_nshl_vec[si]]();
+    MPI_Bcast(_rcf[si], _nshl_vec[si], MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    _dc0_matrix[si] = new dcomplex*[_number_of_spheres];
+    for (int sj = 0; sj < _number_of_spheres; sj++) {
+      _dc0_matrix[si][sj] = new dcomplex[dim3]();
+      MPI_Bcast(_dc0_matrix[si][sj], dim3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    }
+  }
+}
+
+void ScattererConfiguration::mpibcast(const mixMPI *mpidata) {
+  MPI_Bcast(&_number_of_spheres, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  int itemp = _reference_variable_name.length()+1;
+  MPI_Bcast(&itemp, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  char *ctemp = strdup(_reference_variable_name.c_str());
+  MPI_Bcast(ctemp, itemp, MPI_CHAR, 0, MPI_COMM_WORLD);
+  delete[] ctemp;
+  MPI_Bcast(&_idfc, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  itemp = sizeof(bool);
+  char *ptemp = (char *) &_use_external_sphere;
+  MPI_Bcast(ptemp, itemp, MPI_CHAR, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_exdc, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_wp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&_xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_iog_vec, _number_of_spheres, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_radii_of_spheres, _configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_nshl_vec, _configurations, MPI_INT, 0, MPI_COMM_WORLD);
+  MPI_Bcast(_scale_vec, _number_of_scales, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  int dim3 = (_idfc == 0) ? _number_of_scales : 1;
+  for (int si = 0; si < _configurations; si++) {
+    MPI_Bcast(_rcf[si], _nshl_vec[si], MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    for (int sj = 0; sj < _number_of_spheres; sj++) {
+      MPI_Bcast(_dc0_matrix[si][sj], dim3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
+    }
+  }
+}
+#endif
+
 ScattererConfiguration::~ScattererConfiguration() {
   for (int i = 0; i < _configurations; i++) {
     for (int j = 0; j < _number_of_spheres; j++) {
@@ -605,7 +752,7 @@ ScattererConfiguration* ScattererConfiguration::from_dedfb(const std::string& de
 							      fexdc,
 							      fwp,
 							      fxip
-  );
+							      );
   delete[] file_lines;
   delete[] variable_vector;
   return config;
@@ -663,7 +810,7 @@ ScattererConfiguration* ScattererConfiguration::from_hdf5(const std::string& fil
     str_name = "XIVEC";
     str_type = "FLOAT64_(" + to_string(nxi) + ")";
     status = hdf_file->read(str_name, str_type, xi_vec);
-
+      
     int dim3 = (_idfc == 0) ? nxi : 1;
     int element_size = 2 * dim3 * nsph * configuration_count;
     double *elements = new double[element_size]();
@@ -702,7 +849,7 @@ ScattererConfiguration* ScattererConfiguration::from_hdf5(const std::string& fil
 				      _exdc,
 				      _wp,
 				      _xip
-    );
+				      );
   }
   
   return conf;
@@ -791,12 +938,12 @@ ScattererConfiguration* ScattererConfiguration::from_legacy(const std::string& f
 							    _exdc,
 							    _wp,
 							    _xip
-  );
+							    );
   return conf;
 }
 
 /*
-double ScattererConfiguration::get_param(const std::string& param_name) {
+  double ScattererConfiguration::get_param(const std::string& param_name) {
   double value;
   if (param_name.compare("number_of_spheres") == 0) value = (double)number_of_spheres;
   else if (param_name.compare("nsph") == 0) value = (double)number_of_spheres;
@@ -808,11 +955,11 @@ double ScattererConfiguration::get_param(const std::string& param_name) {
   else if (param_name.compare("wp") == 0) value = wp;
   else if (param_name.compare("xip") == 0) value = xip;
   else {
-    string message = "unrecognized parameter \"" + param_name + "\"";
-    throw UnrecognizedParameterException(message);
+  string message = "unrecognized parameter \"" + param_name + "\"";
+  throw UnrecognizedParameterException(message);
   }
   return value;
-}
+  }
 */
 
 void ScattererConfiguration::print() {
@@ -960,7 +1107,7 @@ void ScattererConfiguration::write_hdf5(const std::string& file_name) {
   for (int ri = 0; ri < rec_num; ri++)
     hdf_file->write(rec_names[ri], rec_types[ri], rec_pointers[ri]);
   hdf_file->close();
-  
+
   // Clean memory
   delete rec_name_list;
   delete rec_type_list;
@@ -1057,7 +1204,7 @@ void ScattererConfiguration::write_formatted(const std::string& file_name) {
 		wl_vec[i],
 		pu_vec[i],
 		ev_vec[i]
-	);
+		);
       }
       break;
     case 1:
@@ -1077,7 +1224,7 @@ void ScattererConfiguration::write_formatted(const std::string& file_name) {
 		pu_vec[i],
 		ev_vec[i],
 		xi_vec[i]
-	);
+		);
       }
       break;
     case 2:
@@ -1097,7 +1244,7 @@ void ScattererConfiguration::write_formatted(const std::string& file_name) {
 		pu_vec[i],
 		ev_vec[i],
 		xi_vec[i]
-	);
+		);
       }
       break;
     case 3:
@@ -1117,7 +1264,7 @@ void ScattererConfiguration::write_formatted(const std::string& file_name) {
 		wl_vec[i],
 		ev_vec[i],
 		xi_vec[i]
-	);
+		);
       }
       break;
     case 4:
@@ -1137,7 +1284,7 @@ void ScattererConfiguration::write_formatted(const std::string& file_name) {
 		wl_vec[i],
 		pu_vec[i],
 		xi_vec[i]
-	);
+		);
       }
       break;
     default:
@@ -1256,3 +1403,4 @@ bool ScattererConfiguration::operator ==(const ScattererConfiguration &other) {
   } // ri loop
   return true;
 }
+
diff --git a/src/make.inc b/src/make.inc
index 88c109cc067b6e27e5c33ad5b7688dfd9fc9e91c..28cee0656e6208962ef313de67cad48cb6ff9580 100644
--- a/src/make.inc
+++ b/src/make.inc
@@ -30,8 +30,16 @@ endif
 
 # CXX defines the default C++ compiler to use. If undefined, GNU Make tries to use g++
 ifndef CXX
+ifdef USE_MPI
+override CXX=mpicxx
+else
 override CXX=g++
 endif
+endif
+
+ifdef USE_MPI
+override MPI_CXXFLAGS+=-DUSE_MPI
+endif
 
 # HDF5_INCLUDE defines the default path to the HDF5 include files to use
 ifndef HDF5_INCLUDE
@@ -73,7 +81,7 @@ endif
 
 # CXXFLAGS defines the default compilation options for the C++ compiler
 ifndef CXXFLAGS
-override CXXFLAGS=-O3 -ggdb -pg -coverage -I$(HDF5_INCLUDE)
+override CXXFLAGS=-O3 -ggdb -pg -coverage -I$(HDF5_INCLUDE) $(MPI_CXXFLAGS)
 ifdef USE_OPENMP
 override CXXFLAGS+= -fopenmp
 endif