/* Copyright (C) 2024   INAF - Osservatorio Astronomico di Cagliari

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   A copy of the GNU General Public License is distributed along with
   this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>.
 */

/*! \file cluster.cpp
 *
 * \brief Implementation of the calculation for a cluster of spheres.
 */
#include <chrono>
#include <cstdio>
#include <exception>
#include <fstream>
#include <hdf5.h>
#include <string>
#ifdef _OPENMP
#include <omp.h>
#endif
#ifdef USE_MPI
#ifndef MPI_VERSION
#include <mpi.h>
#endif
#endif
#ifdef USE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif
#ifdef USE_MAGMA
#include <cuda_runtime.h>
#endif

#ifndef INCLUDE_TYPES_H_
#include "../include/types.h"
#endif

#ifndef INCLUDE_ERRORS_H_
#include "../include/errors.h"
#endif

#ifndef INCLUDE_LOGGING_H_
#include "../include/logging.h"
#endif

#ifndef INCLUDE_CONFIGURATION_H_
#include "../include/Configuration.h"
#endif

#ifndef INCLUDE_COMMONS_H_
#include "../include/Commons.h"
#endif

#ifndef INCLUDE_SPH_SUBS_H_
#include "../include/sph_subs.h"
#endif

#ifndef INCLUDE_CLU_SUBS_H_
#include "../include/clu_subs.h"
#endif

#ifndef INCLUDE_TRANSITIONMATRIX_H_
#include "../include/TransitionMatrix.h"
#endif

#ifndef INCLUDE_ALGEBRAIC_H_
#include "../include/algebraic.h"
#endif

#ifndef INCLUDE_LIST_H_
#include "../include/List.h"
#endif

#ifndef INCLUDE_FILE_IO_H_
#include "../include/file_io.h"
#endif

#ifndef INCLUDE_UTILS_H_
#include "../include/utils.h"
#endif

using namespace std;

// I would like to put it all in a struct, but then I'd have to write a constructor for it, due to members defined as references, creating a worse nightmare than the one I'd like to simplify...

int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, ClusterIterationData *cid, VirtualAsciiFile *output, const string& output_path, VirtualBinaryFile *vtppoanp);

/*! \brief C++ implementation of CLU
 *
 *  \param config_file: `string` Name of the configuration file.
 *  \param data_file: `string` Name of the input data file.
 *  \param output_path: `string` Directory to write the output files in.
 */

void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
  chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
  chrono::duration<double> elapsed;
  string message;
  string timing_name;
  FILE *timing_file;
  Logger *time_logger;
  if (mpidata->rank == 0) {
    timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
    timing_file = fopen(timing_name.c_str(), "w");
    time_logger = new Logger(LOG_DEBG, timing_file);
  }
  Logger *logger = new Logger(LOG_DEBG);
  int device_count = 0;

  //===========
  // Initialise MAGMA
  //===========
#ifdef USE_MAGMA
  cudaGetDeviceCount(&device_count);
  logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
  logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
  magma_int_t magma_result = magma_init();
  if (magma_result != MAGMA_SUCCESS) {
    logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n");
    logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n");
    if (mpidata->rank == 0) {
      fclose(timing_file);
      delete time_logger;
    }
    delete logger;
    return;
  }
#endif
  // end MAGMA initialisation

  //===========================
  // the following only happens on MPI process 0
  //===========================
  if (mpidata->rank == 0) {
#ifdef USE_NVTX
    nvtxRangePush("Set up");
#endif
    //=======================
    // Initialise sconf from configuration file
    //=======================
    logger->log("INFO: making legacy configuration...", LOG_INFO);
    ScattererConfiguration *sconf = NULL;
    try {
      sconf = ScattererConfiguration::from_dedfb(config_file);
    } catch(const OpenConfigurationFileException &ex) {
      logger->err("\nERROR: failed to open scatterer configuration file.\n");
      string message = "FILE: " + string(ex.what()) + "\n";
      logger->err(message);
      fclose(timing_file);
      delete time_logger;
      delete logger;
      return;
    }
    sconf->write_formatted(output_path + "/c_OEDFB");
    sconf->write_binary(output_path + "/c_TEDF");
    sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5");
    // end logger initialisation

    //========================
    // Initialise gconf from configuration files
    //========================
    GeometryConfiguration *gconf = NULL;
    try {
      gconf = GeometryConfiguration::from_legacy(data_file);
    } catch (const OpenConfigurationFileException &ex) {
      logger->err("\nERROR: failed to open geometry configuration file.\n");
      string message = "FILE: " + string(ex.what()) + "\n";
      logger->err(message);
      if (sconf) delete sconf;
      fclose(timing_file);
      delete time_logger;
      delete logger;
      return;
    }
    logger->log(" done.\n", LOG_INFO);
    //end gconf initialisation

#ifdef USE_NVTX
    nvtxRangePop();
#endif
    int s_nsph = sconf->number_of_spheres;
    int nsph = gconf->number_of_spheres;
    // Sanity check on number of sphere consistency, should always be verified
    if (s_nsph == nsph) {
      // Shortcuts to variables stored in configuration objects
      ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
      double wp = sconf->wp;
      // Open empty virtual ascii file for output
      VirtualAsciiFile *p_output = new VirtualAsciiFile();
      // for the time being, this is ok. When we can, add some logic in the sprintf calls that checks if a longer buffer would be needed, and in case expand it
      // in any case, replace all sprintf() with snprintf(), to avoid in any case writing more than the available buffer size
      char virtual_line[256];
      // Create and initialise pristine cid for MPI proc 0 and thread 0
      ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata, device_count);
      const int ndi = cid->c1->nsph * cid->c1->nlim;
      np_int ndit = 2 * ndi;
      logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
      time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");

      //==========================
      // Write a block of info to the ascii output file
      //==========================
      sprintf(virtual_line, " READ(IR,*)NSPH,LI,LE,MXNDM,INPOL,NPNT,NPNTTS,IAVM,ISAM\n");
      p_output->append_line(virtual_line);
#ifdef USE_ILP64
      sprintf(virtual_line, " %5d%5d%5d%5ld%5d%5d%5d%5d%5d\n",
	      nsph, cid->c1->li, cid->c1->le, gconf->mxndm, gconf->in_pol, gconf->npnt,
	      gconf->npntts, gconf->iavm, gconf->iavm
	      );
#else
      sprintf(virtual_line, " %5d%5d%5d%5d%5d%5d%5d%5d%5d\n",
	      nsph, cid->c1->li, cid->c1->le, gconf->mxndm, gconf->in_pol, gconf->npnt,
	      gconf->npntts, gconf->iavm, gconf->iavm
	      );
#endif      
      p_output->append_line(virtual_line);
      sprintf(virtual_line, " READ(IR,*)RXX(I),RYY(I),RZZ(I)\n");
      p_output->append_line(virtual_line);
      for (int ri = 0; ri < nsph; ri++) {
	sprintf(virtual_line, "%17.8lE%17.8lE%17.8lE\n",
		gconf->get_sph_x(ri), gconf->get_sph_y(ri), gconf->get_sph_z(ri)
		);
	p_output->append_line(virtual_line);
      }
      sprintf(virtual_line, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n");
      p_output->append_line(virtual_line);
      sprintf(
	      virtual_line, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
	      p_scattering_angles->th, p_scattering_angles->thstp,
	      p_scattering_angles->thlst, p_scattering_angles->ths,
	      p_scattering_angles->thsstp, p_scattering_angles->thslst
	      );
      p_output->append_line(virtual_line);
      sprintf(virtual_line, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n");
      p_output->append_line(virtual_line);
      sprintf(
	      virtual_line, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n",
	      p_scattering_angles->ph, p_scattering_angles->phstp,
	      p_scattering_angles->phlst, p_scattering_angles->phs,
	      p_scattering_angles->phsstp, p_scattering_angles->phslst
	      );
      p_output->append_line(virtual_line);
      sprintf(virtual_line, " READ(IR,*)JWTM\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, " %5d\n", gconf->jwtm);
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)NSPHT\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)(IOG(I),I=1,NSPH)\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)(XIV(I),I=1,NXI)\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)NSHL(I),ROS(I)\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, "  READ(ITIN)(RCF(I,NS),NS=1,NSH)\n");
      p_output->append_line(virtual_line);
      sprintf(virtual_line, " \n");
      p_output->append_line(virtual_line);
      str(sconf, cid->c1, cid->c3, cid->c6);
      thdps(cid->c1->lm, cid->zpv);
      double exdc = sconf->exdc;
      double exri = sqrt(exdc);
      sprintf(virtual_line, "  REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri);
      p_output->append_line(virtual_line);

      // Create empty virtual binary file
      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
      string tppoan_name = output_path + "/c_TPPOAN";
#ifdef USE_MAGMA
      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
#elif defined USE_LAPACK
      logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
#else
      logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO);
#endif
      int iavm = gconf->iavm;
      int isam = gconf->isam;
      int inpol = gconf->in_pol;
      int nxi = sconf->number_of_scales;
      int nth = p_scattering_angles->nth;
      int nths = p_scattering_angles->nths;
      int nph = p_scattering_angles->nph;
      int nphs = p_scattering_angles->nphs;

      //========================
      // write a block of info to virtual binary file
      //========================
      vtppoanp->append_line(VirtualBinaryLine(iavm));
      vtppoanp->append_line(VirtualBinaryLine(isam));
      vtppoanp->append_line(VirtualBinaryLine(inpol));
      vtppoanp->append_line(VirtualBinaryLine(nxi));
      vtppoanp->append_line(VirtualBinaryLine(nth));
      vtppoanp->append_line(VirtualBinaryLine(nph));
      vtppoanp->append_line(VirtualBinaryLine(nths));
      vtppoanp->append_line(VirtualBinaryLine(nphs));
      if (sconf->idfc < 0) {
	cid->vk = cid->xip * cid->wn;
	sprintf(virtual_line, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk);
	p_output->append_line(virtual_line);
	sprintf(virtual_line, " \n");
	p_output->append_line(virtual_line);
      }

      // do the first iteration on jxi488 separately, since it seems to be different from the others
      int jxi488 = 1;
      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
      nvtxRangePush("First iteration");
#endif
      // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
      int jer = 0;
#pragma omp parallel
      {
#pragma omp single
	{
	  jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
	}
      }
#ifdef USE_NVTX
      nvtxRangePop();
#endif
      chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
      elapsed = start_iter_1 - t_start;
      string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
      logger->log(message);
      time_logger->log(message);
      elapsed = end_iter_1 - start_iter_1;
      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
      logger->log(message);
      time_logger->log(message);
      if (jer != 0) {
	// First loop failed. Halt the calculation.
	fclose(timing_file);
	delete time_logger;
	delete p_output;
	delete p_scattering_angles;
	delete cid;
	delete logger;
	delete sconf;
	delete gconf;
	return;
      }

      //==================================================
      // do the first outputs here, so that I open here the new files, afterwards I only append
      //==================================================
      p_output->write_to_disk(output_path + "/c_OCLU");
      delete p_output;
      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
      delete vtppoanp;

      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
#ifdef MPI_VERSION
      if (mpidata->mpirunning) {
	gconf->mpibcast(mpidata);
	sconf->mpibcast(mpidata);	    
	cid->mpibcast(mpidata);
	p_scattering_angles->mpibcast(mpidata);
      }	
#endif
      // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
      int ompnumthreads = 1;
      // this is for MPI process 0 (or even if we are not using MPI at all)
      int myjxi488startoffset = 0;
      int myMPIstride = ompnumthreads;
      int myMPIblock = ompnumthreads;
      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
      VirtualAsciiFile **p_outarray = NULL;
      VirtualBinaryFile **vtppoanarray = NULL;

#ifdef USE_NVTX
      nvtxRangePush("Parallel loop");
#endif

      //===========================================
      // open the OpenMP parallel context, so each thread can initialise its stuff
      //===========================================
#pragma omp parallel
      {
	// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
	int myompthread = 0;

#ifdef _OPENMP
	// If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
	myompthread = omp_get_thread_num();
	if (myompthread == 0) ompnumthreads = omp_get_num_threads();
#endif

	if (myompthread == 0) {
	  // Initialise some shared variables only on thread 0
	  p_outarray = new VirtualAsciiFile*[ompnumthreads];
	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
	  myMPIblock = ompnumthreads;
	  myMPIstride = myMPIblock;
	}

#ifdef MPI_VERSION
	if (myompthread == 0) {
	  if (mpidata->mpirunning) {
	    // only go through this if MPI has been actually used
	    for (int rr=1; rr<mpidata->nprocs; rr++) {
	      // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far
	      int remotejxi488startoffset = myMPIstride;
	      MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD);
	      int remoteMPIblock;
	      MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	      // update myMPIstride to include the ones due to MPI process rr
	      myMPIstride += remoteMPIblock;
	    }
	    // now I know the total myMPIstride, I can send it to all processes
	    MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
	  }
	}
#endif
	// add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this
#pragma omp barrier

	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
	ClusterIterationData *cid_2 = NULL;
	VirtualAsciiFile *p_output_2 = NULL;
	VirtualBinaryFile *vtppoanp_2 = NULL;
	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
	if (myompthread == 0) {
	  cid_2 = cid;
	} else {
	  // this is not thread 0, so do create fresh copies of all local variables
	  cid_2 = new ClusterIterationData(*cid);
	}
	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
	if (myompthread==0) {
	  logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
	}
#pragma omp barrier
	// ok, now I can actually start the parallel calculations
	for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
	  // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
	  // each thread opens new virtual files and stores their pointers in the shared array
	  p_output_2 = new VirtualAsciiFile();
	  vtppoanp_2 = new VirtualBinaryFile();
	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
	  p_outarray[myompthread] = p_output_2;
	  vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier

	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
	  if (myjxi488 <= cid_2->number_of_scales) {
	    int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	  }
#pragma omp barrier

#ifdef USE_NVTX
	  nvtxRangePush("Output concatenation");
#endif
#pragma omp barrier
	  // threads different from 0 append their virtual files to the one of thread 0, and delete them
	  if (myompthread == 0) {
	    for (int ti=1; ti<ompnumthreads; ti++) {
	      p_outarray[0]->append(*(p_outarray[ti]));
	      delete p_outarray[ti];
	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
	      delete vtppoanarray[ti];
	    }
	  }
#pragma omp barrier
	  //==============================================
	  // Collect all virtual files on thread 0 of MPI process 0, and append them to disk
	  //==============================================
	  if (myompthread == 0) {
	    // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
	    p_outarray[0]->append_to_disk(output_path + "/c_OCLU");
	    delete p_outarray[0];
	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
	    delete vtppoanarray[0];

#ifdef MPI_VERSION
	    if (mpidata->mpirunning) {
	      // only go through this if MPI has been actually used
	      for (int rr=1; rr<mpidata->nprocs; rr++) {
		// get the data from process rr, creating a new virtual ascii file
		VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr);
		// append to disk and delete virtual ascii file
		p_output->append_to_disk(output_path + "/c_OCLU");
		delete p_output;
		// get the data from process rr, creating a new virtual binary file
		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
		// append to disk and delete virtual binary file
		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
		delete vtppoanp;
		int test = MPI_Barrier(MPI_COMM_WORLD);
	      }
	    }
#endif
	  }
	  // end block writing to disk
#ifdef USE_NVTX
	  nvtxRangePop();
#endif
#pragma omp barrier

	} // close strided loop running on MPI processes, ixi488 loop
	// delete the shared arrays I used to make available to thread 0 the virtual files of other threads
#pragma omp barrier
	if (myompthread == 0) {
	  delete[] p_outarray;
	  delete[] vtppoanarray;
	}
	{
	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
	  logger->log(message);
	}
#ifdef USE_NVTX
	nvtxRangePop();
#endif
	delete cid_2;
      }
      delete p_scattering_angles;
    }    

    else { // NSPH mismatch between geometry and scatterer configurations.
      throw UnrecognizedConfigurationException(
					       "Inconsistent geometry and scatterer configurations."
					       );
    }
      
    delete sconf;
    delete gconf;
#ifdef USE_MAGMA
    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
    magma_finalize();
#endif
    chrono::time_point<chrono::high_resolution_clock> t_end = chrono::high_resolution_clock::now();
    elapsed = t_end - t_start;
    string message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n";
    logger->log(message);
    logger->log("Finished: output written to " + output_path + "/c_OCLU\n");
    time_logger->log(message);
    fclose(timing_file);
    delete time_logger;
  } // end instructions block of MPI process 0
  
    //===============================
    // instruction block for MPI processes different from 0
    //===============================
#ifdef MPI_VERSION
  else {
    // here go the code for MPI processes other than 0
    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
    ClusterIterationData *cid = new ClusterIterationData(mpidata, device_count);
    ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata);

    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
    int ompnumthreads = 1;
    VirtualAsciiFile **p_outarray = NULL;
    VirtualBinaryFile **vtppoanarray = NULL;
    int myjxi488startoffset;
    int myMPIstride = ompnumthreads;
    int myMPIblock = ompnumthreads;
      
#pragma omp parallel
    {
      // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
      int myompthread = 0;
#ifdef _OPENMP
      // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files
      myompthread = omp_get_thread_num();
      if (myompthread == 0) ompnumthreads = omp_get_num_threads();
#endif
      if (myompthread == 0) {
	// receive the start parameter from MPI process 0
	MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	// send my number of omp threads to process 0
	MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD);
	// receive myMPIstride sent by MPI process 0 to all processes
	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
	// allocate virtual files for each thread
	p_outarray = new VirtualAsciiFile*[ompnumthreads];
	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
      }
#pragma omp barrier
      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
      ClusterIterationData *cid_2 = NULL;
      VirtualAsciiFile *p_output_2 = NULL;
      VirtualBinaryFile *vtppoanp_2 = NULL;
      // PLACEHOLDER
      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
      if (myompthread == 0) {
	cid_2 = cid;
      } else {
	// this is not thread 0, so do create fresh copies of all local variables
	cid_2 = new ClusterIterationData(*cid);
      }
      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
#pragma omp barrier
      // ok, now I can actually start the parallel calculations
      for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
	// each thread opens new virtual files and stores their pointers in the shared array
	p_output_2 = new VirtualAsciiFile();
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
	p_outarray[myompthread] = p_output_2;
	vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier
	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
	// ok, now I can actually start the parallel calculations
	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
	if (myjxi488 <= cid_2->number_of_scales) {
	  int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	} // close the OMP parallel for loop

#pragma omp barrier
	// threads different from 0 append their virtual files to the one of thread 0, and delete them
	if (myompthread == 0) {
	  for (int ti=1; ti<ompnumthreads; ti++) {
	    p_outarray[0]->append(*(p_outarray[ti]));
	    delete p_outarray[ti];
	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
	    delete vtppoanarray[ti];
	  }
	  // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them
	  for (int rr=1; rr<mpidata->nprocs; rr++) {
	    if (rr == mpidata->rank) {
	      p_outarray[0]->mpisend(mpidata);
	      delete p_outarray[0];
	      vtppoanarray[0]->mpisend(mpidata);
	      delete vtppoanarray[0];
	    }
	    int test = MPI_Barrier(MPI_COMM_WORLD);
	  }
	}
      } // close strided loop running on MPI processes
      
	// Clean memory
#pragma omp barrier
      if (myompthread == 0) {
	delete[] p_outarray;
	delete[] vtppoanarray;
      }
      delete cid_2;

    } // close pragma omp parallel
    delete p_scattering_angles;
    delete sconf;
    delete gconf;
#endif
#ifdef USE_MAGMA
    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
    magma_finalize();
#endif
    delete logger;
#ifdef MPI_VERSION
  }
#endif
}

int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, ClusterIterationData *cid, VirtualAsciiFile *output, const string& output_path, VirtualBinaryFile *vtppoanp)
{
  int nxi = sconf->number_of_scales;
  char virtual_line[256];
  string message = "INFO: running scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n";
  Logger *logger = new Logger(LOG_DEBG);
  logger->log(message);
  chrono::duration<double> elapsed;
  chrono::time_point<chrono::high_resolution_clock> interval_start, interval_end;
  int jer = 0;
  int lcalc = 0;
  int jaw = 1;
  int li = gconf->li;
  int le = gconf->le;
  int lm = 0;
  if (le > lm) lm = le;
  if (li > lm) lm = li;
  int nsph = sconf->number_of_spheres;
  np_int mxndm = gconf->mxndm;
  int iavm = gconf->iavm;
  int inpol = gconf->in_pol;
  int npnt = gconf->npnt;
  int npntts = gconf->npntts;
  int isam = gconf->iavm;
  int jwtm = gconf->jwtm;
  np_int ndit = 2 * nsph * cid->c1->nlim;
  int isq, ibf;
  int last_configuration;

#ifdef USE_NVTX
  nvtxRangePush("Prepare matrix calculation");
#endif
  sprintf(virtual_line, "========== JXI =%3d ====================\n", jxi488);
  output->append_line(virtual_line);
  double xi = sconf->get_scale(jxi488 - 1);
  double exdc = sconf->exdc;
  double exri = sqrt(exdc);
  int idfc = (int)sconf->idfc;
  double vkarg = 0.0;
  if (idfc >= 0) {
    cid->vk = xi * cid->wn;
    vkarg = cid->vk;
    sprintf(virtual_line, "  VK=%15.7lE, XI=%15.7lE\n", cid->vk, xi);
    output->append_line(virtual_line);
  } else {
    vkarg = xi * cid->vk;
    cid->sqsfi = 1.0 / (xi * xi);
    sprintf(virtual_line, "  XI=%15.7lE\n", xi);
    output->append_line(virtual_line);
  }
  hjv(exri, vkarg, jer, lcalc, cid->arg, cid->c1);
  if (jer != 0) {
    sprintf(virtual_line, "  STOP IN HJV\n");
    output->append_line(virtual_line);
    return jer;
    // break; // rewrite this to go to the end of the function, to free locally allocated variables and return jer
  }
  last_configuration = 0;
  for (int i132 = 1; i132 <= nsph; i132++) {
    int iogi = cid->c1->iog[i132 - 1];
    if (iogi != i132) {
      for (int l123 = 1; l123 <= li; l123++) {
	cid->c1->rmi[l123 - 1][i132 - 1] = cid->c1->rmi[l123 - 1][iogi - 1];
	cid->c1->rei[l123 - 1][i132 - 1] = cid->c1->rei[l123 - 1][iogi - 1];
      } // l123 loop
    } else {
      last_configuration++;
      int nsh = cid->c1->nshl[last_configuration - 1];
      int ici = (nsh + 1) / 2;
      if (idfc == 0) {
	for (int ic = 0; ic < ici; ic++)
	  cid->c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, jxi488 - 1);
      } else {
	if (jxi488 == 1) {
	  for (int ic = 0; ic < ici; ic++)
	    cid->c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, 0);
	}
      }
      if (nsh % 2 == 0) cid->c2->dc0[ici] = exdc;
      dme(
	  cid->c1->li, i132, npnt, npntts, vkarg, exdc, exri,
	  cid->c1, cid->c2, jer, lcalc, cid->arg, last_configuration
	  );
      if (jer != 0) {
	sprintf(virtual_line, "  STOP IN DME\n");
	output->append_line(virtual_line);
	return jer;
	//break;
      }
    }
    if (jer != 0) {
      return jer;
      //break;
    }
  } // i132 loop
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Calculate inverted matrix");
#endif
#ifdef DEBUG_AM
  /* now, before cms, output am to p_outam0 */
  VirtualAsciiFile *outam0 = new VirtualAsciiFile();
  string outam0_name = output_path + "/c_AM0_JXI" + to_string(jxi488) + ".txt";
  sprintf(virtual_line, " AM matrix before CMS\n");
  outam0->append_line(virtual_line);
  sprintf(virtual_line, " I1+1   I2+1    Real    Imag\n");
  outam0->append_line(virtual_line);
  write_dcomplex_matrix(outam0, cid->am, ndit, ndit);
  outam0->write_to_disk(outam0_name);
  delete outam0;
#endif
  cms(cid->am, cid->c1, cid->c6);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam1 = new VirtualAsciiFile();
  string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt";
  sprintf(virtual_line, " AM matrix after CMS before LUCIN\n");
  outam1->append_line(virtual_line);
  sprintf(virtual_line, " I1+1   I2+1    Real    Imag\n");
  outam1->append_line(virtual_line);
  write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1);
  outam1->write_to_disk(outam1_name);
  delete outam1;
#endif
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: matrix calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
  logger->log(message);
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Invert the matrix");
#endif
  invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam2 = new VirtualAsciiFile();
  string outam2_name = output_path + "/c_AM2_JXI" + to_string(jxi488) + ".txt";
  sprintf(virtual_line, " AM matrix after LUCIN before ZTM\n");
  outam2->append_line(virtual_line);
  sprintf(virtual_line, " I1+1   I2+1    Real    Imag\n");
  outam2->append_line(virtual_line);
  write_dcomplex_matrix(outam2, cid->am, ndit, ndit);
  outam2->write_to_disk(outam2_name);
  delete outam2;
#endif
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: matrix inversion for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
  logger->log(message);
  if (jer != 0) {
    message = "ERROR: matrix inversion ended with error code " + to_string(jer) + ".\n";
    logger->err(message);
    return jer;
    // break; // jxi488 loop: goes to memory clean
  }
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Average calculation");
#endif
  ztm(cid->am, cid->c1, cid->c6, cid->c9);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam3 = new VirtualAsciiFile();
  string outam3_name = output_path + "/c_AM3_JXI" + to_string(jxi488) + ".txt";
  sprintf(virtual_line, " AM matrix after ZTM\n");
  outam3->append_line(virtual_line);
  sprintf(virtual_line, " I1+1   I2+1    Real    Imag\n");
  outam3->append_line(virtual_line);
  write_dcomplex_matrix(outam3, cid->am, ndit, ndit);
  outam3->write_to_disk(outam3_name);
  delete outam3;
#endif
  if (idfc >= 0) {
    if (jxi488 == jwtm) {
      int nlemt = 2 * cid->c1->nlem;
      string ttms_name = output_path + "/c_TTMS.hd5";
      TransitionMatrix::write_binary(ttms_name, nlemt, lm, cid->vk, exri, cid->c1->am0m, "HDF5");
      ttms_name = output_path + "/c_TTMS";
      TransitionMatrix::write_binary(ttms_name, nlemt, lm, cid->vk, exri, cid->c1->am0m);
    }
  }
  // label 156: continue from here
  if (inpol == 0) {
    sprintf(virtual_line, "   LIN\n");
    output->append_line(virtual_line);
  } else { // label 158
    sprintf(virtual_line, "  CIRC\n");
    output->append_line(virtual_line);
  }
  // label 160
  double cs0 = 0.25 * cid->vk * cid->vk * cid->vk / acos(0.0);
  double csch = 0.0, qschu = 0.0, pschu = 0.0, s0mag = 0.0;
  dcomplex s0 = 0.0 + 0.0 * I;
  scr0(cid->vk, exri, cid->c1, cid->c3);
  double sqk = cid->vk * cid->vk * exdc;
  aps(cid->zpv, cid->c1->li, nsph, cid->c1, sqk, cid->gaps);
  rabas(inpol, cid->c1->li, nsph, cid->c1, cid->tqse, cid->tqspe, cid->tqss, cid->tqsps);
  if (cid->c1->li != cid->c1->le) {
    sprintf(virtual_line, "     SPHERES; LMX=LI\n");
    output->append_line(virtual_line);
  }
  last_configuration = 0;
  for (int i170 = 1; i170 <= nsph; i170++) {
    if (cid->c1->iog[i170 - 1] >= i170) {
      int i = i170 - 1;
      last_configuration++;
      double albeds = cid->c1->sscs[i] / cid->c1->sexs[i];
      cid->c1->sqscs[i] *= cid->sqsfi;
      cid->c1->sqabs[i] *= cid->sqsfi;
      cid->c1->sqexs[i] *= cid->sqsfi;
      sprintf(virtual_line, "     SPHERE %2d\n", i170);
      output->append_line(virtual_line);
      if (cid->c1->nshl[last_configuration - 1] != 1) {
	sprintf(virtual_line, "  SIZE=%15.7lE\n", cid->c2->vsz[i]);
	output->append_line(virtual_line);
      } else { // label 162
	sprintf(virtual_line, "  SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", cid->c2->vsz[i], real(cid->c2->vkt[i]), imag(cid->c2->vkt[i]));
	output->append_line(virtual_line);
      }
      // label 164
      sprintf(virtual_line, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n");
      output->append_line(virtual_line);
      sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE%15.7lE\n", cid->c1->sscs[i], cid->c1->sabs[i], cid->c1->sexs[i], albeds);
      output->append_line(virtual_line);
      sprintf(virtual_line, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n");
      output->append_line(virtual_line);
      sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE\n", cid->c1->sqscs[i], cid->c1->sqabs[i], cid->c1->sqexs[i]);
      output->append_line(virtual_line);
      sprintf(virtual_line, "  FSAS=%15.7lE%15.7lE\n", real(cid->c1->fsas[i]), imag(cid->c1->fsas[i]));
      output->append_line(virtual_line);
      double alamb = 2.0 * 3.141592653589793 / cid->vk;
      sprintf(virtual_line, "INSERTION: CS_SPHERE  %15.7lE%15.7lE%15.7lE%15.7lE\n", alamb, cid->c1->sscs[i], cid->c1->sabs[i], cid->c1->sexs[i]);
      output->append_line(virtual_line);
      csch = 2.0 * cid->vk * cid->sqsfi / cid->c1->gcsv[i];
      s0 = cid->c1->fsas[i] * exri;
      qschu = imag(s0) * csch;
      pschu = real(s0) * csch;
      s0mag = cabs(s0) * cs0;
      sprintf(virtual_line, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n", qschu, pschu, s0mag);
      output->append_line(virtual_line);
      double rapr = cid->c1->sexs[i] - cid->gaps[i];
      double cosav = cid->gaps[i] / cid->c1->sscs[i];
      sprintf(virtual_line, "  COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
      output->append_line(virtual_line);
      sprintf(virtual_line, "  IPO= 1, TQEk=%15.7lE, TQSk=%15.7lE\n", cid->tqse[0][i], cid->tqss[0][i]);
      output->append_line(virtual_line);
      sprintf(virtual_line, "  IPO= 2, TQEk=%15.7lE, TQSk=%15.7lE\n", cid->tqse[1][i], cid->tqss[1][i]);
      output->append_line(virtual_line);
    }
  } // i170 loop
  sprintf(virtual_line, "  FSAT=%15.7lE%15.7lE\n", real(cid->c3->tfsas), imag(cid->c3->tfsas));
  output->append_line(virtual_line);
  csch = 2.0 * cid->vk * cid->sqsfi / cid->c3->gcs;
  s0 = cid->c3->tfsas * exri;
  qschu = imag(s0) * csch;
  pschu = real(s0) * csch;
  s0mag = cabs(s0) * cs0;
  sprintf(virtual_line, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n", qschu, pschu, s0mag);
  output->append_line(virtual_line);
  // tppoan.write(reinterpret_cast<char *>(&(cid->vk)), sizeof(double));
  vtppoanp->append_line(VirtualBinaryLine(cid->vk));
  pcrsm0(cid->vk, exri, inpol, cid->c1);
  apcra(cid->zpv, cid->c1->le, cid->c1->am0m, inpol, sqk, cid->gapm, cid->gappm);
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: average calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
  logger->log(message);
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Angle loop");
#endif
  double th = sa->th;
  for (int jth486 = 1; jth486 <= sa->nth; jth486++) { // OpenMP portable?
    double ph = sa->ph;
    double cost = 0.0, sint = 0.0, cosp = 0.0, sinp = 0.0;
    for (int jph484 = 1; jph484 <= sa->nph; jph484++) {
      int jw = 0;
      if (sa->nk != 1 || jxi488 <= 1) {
	upvmp(th, ph, 0, cost, sint, cosp, sinp, cid->u, cid->upmp, cid->unmp);
	if (isam >= 0) {
	  wmamp(
		0, cost, sint, cosp, sinp, inpol, cid->c1->le, 0,
		nsph, cid->argi, cid->u, cid->upmp, cid->unmp, cid->c1
		);
	  // label 182
	  apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp);
	  raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps);
	  jw = 1;
	}
      } else { // label 180, NK == 1 AND JXI488 == 1
	if (isam >= 0) {
	  // label 182
	  apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp);
	  raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps);
	  jw = 1;
	}
      }
      // label 184
      double thsl = sa->ths;
      double phsph = 0.0;
      for (int jths = 1; jths <= sa->nths; jths++) {
	double ths = thsl;
	int icspnv = 0;
	if (isam > 1) ths += sa->thsca;
	if (isam >= 1) {
	  phsph = 0.0;
	  if (ths < 0.0 || ths > 180.0) phsph = 180.0;
	  if (ths < 0.0) ths *= -1.0;
	  if (ths > 180.0) ths = 360.0 - ths;
	  if (phsph != 0.0) icspnv = 1;
	}
	// label 186
	double phs = sa->phs;
	for (int jphs = 1; jphs <= sa->nphs; jphs++) {
	  double costs = 0.0, sints = 0.0, cosps = 0.0, sinps = 0.0;
	  if (isam >= 1) {
	    phs = sa->ph + phsph;
	    if (phs > 360.0) phs -= 360.0;
	  }
	  // label 188
	  bool goto190 = (sa->nks == 1 && (jxi488 > 1 || jth486 > 1 || jph484 > 1));
	  if (!goto190) {
	    upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, cid->us, cid->upsmp, cid->unsmp);
	    if (isam >= 0)
	      wmamp(
		    2, costs, sints, cosps, sinps, inpol, cid->c1->le,
		    0, nsph, cid->args, cid->us, cid->upsmp, cid->unsmp, cid->c1
		    );
	  }
	  // label 190
	  if (sa->nkks != 1 || jxi488 <= 1) {
	    upvsp(
		  cid->u, cid->upmp, cid->unmp, cid->us, cid->upsmp, cid->unsmp, cid->up, cid->un, cid->ups, cid->uns,
		  cid->duk, isq, ibf, cid->scan, cid->cfmp, cid->sfmp, cid->cfsp, cid->sfsp
		  );
	    if (isam < 0) {
	      wmasp(
		    cost, sint, cosp, sinp, costs, sints, cosps, sinps,
		    cid->u, cid->up, cid->un, cid->us, cid->ups, cid->uns, isq, ibf, inpol, cid->c1->le,
		    0, nsph, cid->argi, cid->args, cid->c1
		    );
	    } else { // label 192
	      for (int i193 = 0; i193 < 3; i193++) {
		cid->up[i193] = cid->upmp[i193];
		cid->un[i193] = cid->unmp[i193];
		cid->ups[i193] = cid->upsmp[i193];
		cid->uns[i193] = cid->unsmp[i193];
	      }
	    }
	  }
	  // label 194
	  if (iavm == 1) crsm1(cid->vk, exri, cid->c1, cid->c6);
	  if (isam < 0) {
	    apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp);
	    raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps);
	    jw = 1;
	  }
	  // label 196
	  // tppoan.write(reinterpret_cast<char *>(&th), sizeof(double));
	  vtppoanp->append_line(VirtualBinaryLine(th));
	  // tppoan.write(reinterpret_cast<char *>(&ph), sizeof(double));
	  vtppoanp->append_line(VirtualBinaryLine(ph));
	  // tppoan.write(reinterpret_cast<char *>(&ths), sizeof(double));
	  vtppoanp->append_line(VirtualBinaryLine(ths));
	  // tppoan.write(reinterpret_cast<char *>(&phs), sizeof(double));
	  vtppoanp->append_line(VirtualBinaryLine(phs));
	  // tppoan.write(reinterpret_cast<char *>(&(cid->scan)), sizeof(double));
	  vtppoanp->append_line(VirtualBinaryLine(cid->scan));
	  if (jaw != 0) {
	    jaw = 0;
	    mextc(cid->vk, exri, cid->c1->fsacm, cid->cextlr, cid->cext);
	    // We now have some implicit loops writing to binary
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scscm[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscpm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscpm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecscm[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscpm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscpm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gapm[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gappm[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gappm[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    sprintf(virtual_line, "     CLUSTER (ENSEMBLE AVERAGE, MODE%2d)\n", iavm);
	    output->append_line(virtual_line);
	    int jlr = 2;
	    for (int ilr210 = 1; ilr210 <= 2; ilr210++) {
	      int ipol = (ilr210 % 2 == 0) ? 1 : -1;
	      if (ilr210 == 2) jlr = 1;
	      double extsm = cid->c1->ecscm[ilr210 - 1];
	      double qextm = extsm * cid->sqsfi / cid->c3->gcs;
	      double extrm = extsm / cid->c3->ecs;
	      double scasm = cid->c1->scscm[ilr210 - 1];
	      double albdm = scasm / extsm;
	      double qscam = scasm * cid->sqsfi / cid->c3->gcs;
	      double scarm = scasm / cid->c3->scs;
	      double abssm = extsm - scasm;
	      double qabsm = abssm * cid->sqsfi / cid->c3->gcs;
	      double absrm = abssm / cid->c3->acs;
	      double acsecs = cid->c3->acs / cid->c3->ecs;
	      if (acsecs >= -1.0e-6 && acsecs <= 1.0e-6) absrm = 1.0;
	      dcomplex s0m = cid->c1->fsacm[ilr210 - 1][ilr210 - 1] * exri;
	      double qschum = imag(s0m) * csch;
	      double pschum = real(s0m) * csch;
	      double s0magm = cabs(s0m) * cs0;
	      double rfinrm = real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / real(cid->c3->tfsas);
	      double extcrm = imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / imag(cid->c3->tfsas);
	      if (inpol == 0) {
		sprintf(virtual_line, "   LIN %2d\n", ipol);
		output->append_line(virtual_line);
	      } else { // label 206
		sprintf(virtual_line, "  CIRC %2d\n", ipol);
		output->append_line(virtual_line);
	      }
	      // label 208
	      sprintf(virtual_line, " ----- SCC ----- ABC ----- EXC ----- ALBEDC --\n");
	      output->append_line(virtual_line);
	      sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE%15.7lE\n", scasm, abssm, extsm, albdm);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, " --- SCC/TGS - ABC/TGS - EXC/TGS ---\n");
	      output->append_line(virtual_line);
	      sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE\n", qscam, qabsm, qextm);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, " ---- SCCRT --- ABCRT --- EXCRT ----\n");
	      output->append_line(virtual_line);
	      sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE\n", scarm, absrm, extrm);
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "  FSAC(%1d,%1d)=%15.7lE%15.7lE   FSAC(%1d,%1d)=%15.7lE%15.7lE\n",
		      ilr210, ilr210, real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]),
		      imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]), jlr, ilr210,
		      real(cid->c1->fsacm[jlr - 1][ilr210 - 1]), imag(cid->c1->fsacm[jlr - 1][ilr210 - 1])
		      );
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "  RE(FSAC(%1d,%1d))/RE(TFSAS)=%15.7lE, IM(FSAC(%1d,%1d))/IM(TFSAS)=%15.7lE\n",
		      ilr210, ilr210, rfinrm, ilr210, ilr210, extcrm
		      );
	      output->append_line(virtual_line);
	      sprintf(virtual_line, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n", qschum, pschum, s0magm);
	      output->append_line(virtual_line);
	      double rapr = cid->c1->ecscm[ilr210 - 1] - cid->gapm[2][ilr210 - 1];
	      double cosav = cid->gapm[2][ilr210 - 1] / cid->c1->scscm[ilr210 - 1];
	      double fz = rapr;
	      sprintf(virtual_line, "  COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, "  Fk=%15.7lE\n", fz);
	      output->append_line(virtual_line);
	      double alamb = 2.0 * 3.141592653589793 / cid->vk;
	      if (ilr210 == 1) {
		sprintf(virtual_line, "INSERTION: CSM_CLUSTER  %15.7lE%15.7lE%15.7lE%15.7lE\n", alamb, scasm, abssm, extsm);
		output->append_line(virtual_line);
	      }
	    } // ilr210 loop
	    double rmbrif = (real(cid->c1->fsacm[0][0]) - real(cid->c1->fsacm[1][1])) / real(cid->c1->fsacm[0][0]);
	    double rmdchr = (imag(cid->c1->fsacm[0][0]) - imag(cid->c1->fsacm[1][1])) / imag(cid->c1->fsacm[0][0]);
	    sprintf(virtual_line, "  (RE(FSAC(1,1))-RE(FSAC(2,2)))/RE(FSAC(1,1))=%15.7lE\n", rmbrif);
	    output->append_line(virtual_line);
	    sprintf(virtual_line, "  (IM(FSAC(1,1))-IM(FSAC(2,2)))/IM(FSAC(1,1))=%15.7lE\n", rmdchr);
	    output->append_line(virtual_line);
	  }
	  // label 212
	  sprintf(virtual_line, "********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n", jth486, jph484, jths, jphs);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  TIDG=%10.3lE, PIDG=%10.3lE, TSDG=%10.3lE, PSDG=%10.3lE\n", th, ph, ths, phs);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  SCAND=%10.3lE\n", cid->scan);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  CFMP=%15.7lE, SFMP=%15.7lE\n", cid->cfmp, cid->sfmp);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  CFSP=%15.7lE, SFSP=%15.7lE\n", cid->cfsp, cid->sfsp);
	  output->append_line(virtual_line);
	  if (isam >= 0) {
	    sprintf(virtual_line, "  UNI=(%12.5lE,%12.5lE,%12.5lE)\n", cid->un[0], cid->un[1], cid->un[2]);
	    output->append_line(virtual_line);
	    sprintf(virtual_line, "  UNS=(%12.5lE,%12.5lE,%12.5lE)\n", cid->uns[0], cid->uns[1], cid->uns[2]);
	    output->append_line(virtual_line);
	  } else { // label 214
	    sprintf(virtual_line, "  UN=(%12.5lE,%12.5lE,%12.5lE)\n\n", cid->un[0], cid->un[1], cid->un[2]);
	    output->append_line(virtual_line);
	  }
	  // label 220
	  if (inpol == 0) {
	    sprintf(virtual_line, "   LIN\n");
	    output->append_line(virtual_line);
	  } else { // label 222
	    sprintf(virtual_line, "  CIRC\n");
	    output->append_line(virtual_line);
	  }
	  // label 224
	  scr2(cid->vk, vkarg, exri, cid->duk, cid->c1, cid->c3);
	  if (cid->c1->li != cid->c1->le) {
	    sprintf(virtual_line, "     SPHERES; LMX=MIN0(LI,LE)\n");
	    output->append_line(virtual_line);
	  }
	  for (int i226 = 1; i226 <= nsph; i226++) {
	    if (cid->c1->iog[i226 - 1] >= i226) {
	      sprintf(virtual_line, "     SPHERE %2d\n", i226);
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "  SAS(1,1)=%15.7lE%15.7lE, SAS(2,1)=%15.7lE%15.7lE\n",
		      real(cid->c1->sas[i226 - 1][0][0]), imag(cid->c1->sas[i226 - 1][0][0]),
		      real(cid->c1->sas[i226 - 1][1][0]), imag(cid->c1->sas[i226 - 1][1][0])
		      );
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "  SAS(1,2)=%15.7lE%15.7lE, SAS(2,2)=%15.7lE%15.7lE\n",
		      real(cid->c1->sas[i226 - 1][0][1]), imag(cid->c1->sas[i226 - 1][0][1]),
		      real(cid->c1->sas[i226 - 1][1][1]), imag(cid->c1->sas[i226 - 1][1][1])
		      );
	      output->append_line(virtual_line);
	      for (int j225 = 0; j225 < 16; j225++) {
		cid->c1->vint[j225] = cid->c1->vints[i226 - 1][j225];
	      } // j225 loop
	      mmulc(cid->c1->vint, cid->cmullr, cid->cmul);
	      sprintf(virtual_line, "  MULS\n");
	      output->append_line(virtual_line);
	      for (int i1 = 0; i1 < 4; i1++) {
		sprintf(
			virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
			cid->cmul[i1][0], cid->cmul[i1][1], cid->cmul[i1][2], cid->cmul[i1][3]
			);
		output->append_line(virtual_line);
	      } // i1 loop
	      sprintf(virtual_line, "  MULSLR\n");
	      output->append_line(virtual_line);
	      for (int i1 = 0; i1 < 4; i1++) {
		sprintf(
			virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
			cid->cmullr[i1][0], cid->cmullr[i1][1], cid->cmullr[i1][2], cid->cmullr[i1][3]
			);
		output->append_line(virtual_line);
	      } // i1 loop
	    }
	  } // i226 loop
	  sprintf(
		  virtual_line, "  SAT(1,1)=%15.7lE%15.7lE, SAT(2,1)=%15.7lE%15.7lE\n",
		  real(cid->c3->tsas[0][0]), imag(cid->c3->tsas[0][0]),
		  real(cid->c3->tsas[1][0]), imag(cid->c3->tsas[1][0])
		  );
	  output->append_line(virtual_line);
	  sprintf(
		  virtual_line, "  SAT(1,2)=%15.7lE%15.7lE, SAT(2,2)=%15.7lE%15.7lE\n",
		  real(cid->c3->tsas[0][1]), imag(cid->c3->tsas[0][1]),
		  real(cid->c3->tsas[1][1]), imag(cid->c3->tsas[1][1])
		  );
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "     CLUSTER\n");
	  output->append_line(virtual_line);
	  pcros(cid->vk, exri, cid->c1);
	  mextc(cid->vk, exri, cid->c1->fsac, cid->cextlr, cid->cext);
	  mmulc(cid->c1->vint, cid->cmullr, cid->cmul);
	  if (jw != 0) {
	    jw = 0;
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scsc[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecsc[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gap[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gapp[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gapp[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqce[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcpe[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcpe[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqcs[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcps[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcps[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 3; i++) {
	      double value = cid->u[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->up[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->un[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  // label 254
	  for (int i = 0; i < 16; i++) {
	    double value = real(cid->c1->vint[i]);
	    // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	    vtppoanp->append_line(VirtualBinaryLine(value));
	    value = imag(cid->c1->vint[i]);
	    // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	    vtppoanp->append_line(VirtualBinaryLine(value));
	  }
	  for (int i = 0; i < 4; i++) {
	    for (int j = 0; j < 4; j++) {
	      double value = cid->cmul[i][j];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  int jlr = 2;
	  for (int ilr290 = 1; ilr290 <= 2; ilr290++) {
	    int ipol = (ilr290 % 2 == 0) ? 1 : -1;
	    if (ilr290 == 2) jlr = 1;
	    double extsec = cid->c1->ecsc[ilr290 - 1];
	    double qext = extsec * cid->sqsfi / cid->c3->gcs;
	    double extrat = extsec / cid->c3->ecs;
	    double scasec = cid->c1->scsc[ilr290 - 1];
	    double albedc = scasec / extsec;
	    double qsca = scasec * cid->sqsfi / cid->c3->gcs;
	    double scarat = scasec / cid->c3->scs;
	    double abssec = extsec - scasec;
	    double qabs = abssec * cid->sqsfi / cid->c3->gcs;
	    double absrat = 1.0;
	    double ratio = cid->c3->acs / cid->c3->ecs;
	    if (ratio < -1.0e-6 || ratio > 1.0e-6) absrat = abssec / cid->c3->acs;
	    s0 = cid->c1->fsac[ilr290 - 1][ilr290 - 1] * exri;
	    double qschu = imag(s0) * csch;
	    double pschu = real(s0) * csch;
	    s0mag = cabs(s0) * cs0;
	    double refinr = real(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / real(cid->c3->tfsas);
	    double extcor = imag(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / imag(cid->c3->tfsas);
	    if (inpol == 0) {
	      sprintf(virtual_line, "   LIN %2d\n", ipol);
	      output->append_line(virtual_line);
	    } else { // label 273
	      sprintf(virtual_line, "  CIRC %2d\n", ipol);
	      output->append_line(virtual_line);
	    }
	    // label 275
	    sprintf(virtual_line, " ----- SCC ----- ABC ----- EXC ----- ALBEDC --\n");
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, " %14.7lE%15.7lE%15.7lE%15.7lE\n",
		    scasec, abssec, extsec, albedc
		    );
	    output->append_line(virtual_line);
	    sprintf(virtual_line, " --- SCC/TGS - ABC/TGS - EXC/TGS ---\n");
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, " %14.7lE%15.7lE%15.7lE\n",
		    qsca, qabs, qext
		    );
	    output->append_line(virtual_line);
	    sprintf(virtual_line, " ---- SCCRT --- ABCRT --- EXCRT ----\n");
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, " %14.7lE%15.7lE%15.7lE\n",
		    scarat, absrat, extrat
		    );
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, "  FSAC(%1d,%1d)=%15.7lE%15.7lE   FSAC(%1d,%1d)=%15.7lE%15.7lE\n",
		    ilr290, ilr290, real(cid->c1->fsac[ilr290 - 1][ilr290 - 1]), imag(cid->c1->fsac[ilr290 - 1][ilr290 - 1]),
		    jlr, ilr290, real(cid->c1->fsac[jlr - 1][ilr290 - 1]), imag(cid->c1->fsac[jlr - 1][ilr290 - 1])
		    );
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, "   SAC(%1d,%1d)=%15.7lE%15.7lE    SAC(%1d,%1d)=%15.7lE%15.7lE\n",
		    ilr290, ilr290, real(cid->c1->sac[ilr290 - 1][ilr290 - 1]), imag(cid->c1->sac[ilr290 - 1][ilr290 - 1]),
		    jlr, ilr290, real(cid->c1->sac[jlr - 1][ilr290 - 1]), imag(cid->c1->sac[jlr - 1][ilr290 - 1])
		    );
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, "  RE(FSAC(%1d,%1d))/RE(TFSAS)=%15.7lE, IM(FSAC(%1d,%1d))/IM(TFSAS)=%15.7lE\n",
		    ilr290, ilr290, refinr, ilr290, ilr290, extcor
		    );
	    output->append_line(virtual_line);
	    sprintf(
		    virtual_line, "  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
		    qschu, pschu, s0mag
		    );
	    output->append_line(virtual_line);
	    double alamb = 2.0 * 3.141592653589793 / cid->vk;
	    if (ilr290 == 1) {
	      sprintf(virtual_line, "INSERTION: CS1_CLUSTER  %13.5le%10.3le%10.3le%15.7le%15.7le%15.7le\n", alamb, th, ths, scasec, abssec, extsec);
	    } else if (ilr290 == 2) {
	      sprintf(virtual_line, "INSERTION: CS2_CLUSTER  %13.5le%10.3le%10.3le%15.7le%15.7le%15.7le\n", alamb, th, ths, scasec, abssec, extsec);
	    }
	    output->append_line(virtual_line);
	    bool goto190 = isam >= 0 && (jths > 1 || jphs > 1);
	    if (!goto190) {
	      cid->gapv[0] = cid->gap[0][ilr290 - 1];
	      cid->gapv[1] = cid->gap[1][ilr290 - 1];
	      cid->gapv[2] = cid->gap[2][ilr290 - 1];
	      double extins = cid->c1->ecsc[ilr290 - 1];
	      double scatts = cid->c1->scsc[ilr290 - 1];
	      double rapr, cosav, fp, fn, fk, fx, fy, fz;
	      rftr(cid->u, cid->up, cid->un, cid->gapv, extins, scatts, rapr, cosav, fp, fn, fk, fx, fy, fz);
	      sprintf(virtual_line, "  COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, "  Fl=%15.7lE, Fr=%15.7lE, Fk=%15.7lE\n", fp, fn, fk);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, "  Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n", fx, fy, fz);
	      output->append_line(virtual_line);
	      cid->tqev[0] = cid->tqce[ilr290 - 1][0];
	      cid->tqev[1] = cid->tqce[ilr290 - 1][1];
	      cid->tqev[2] = cid->tqce[ilr290 - 1][2];
	      cid->tqsv[0] = cid->tqcs[ilr290 - 1][0];
	      cid->tqsv[1] = cid->tqcs[ilr290 - 1][1];
	      cid->tqsv[2] = cid->tqcs[ilr290 - 1][2];
	      double tep, ten, tek, tsp, tsn, tsk;
	      tqr(cid->u, cid->up, cid->un, cid->tqev, cid->tqsv, tep, ten, tek, tsp, tsn, tsk);
	      sprintf(virtual_line, "   TQEl=%15.7lE,  TQEr=%15.7lE,  TQEk=%15.7lE\n", tep, ten, tek);
	      output->append_line(virtual_line);
	      sprintf(virtual_line, "   TQSl=%15.7lE,  TQSr=%15.7lE,  TQSk=%15.7lE\n", tsp, tsn, tsk);
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "   TQEx=%15.7lE,  TQEy=%15.7lE,  TQEz=%15.7lE\n",
		      cid->tqce[ilr290 - 1][0], cid->tqce[ilr290 - 1][1], cid->tqce[ilr290 - 1][2]
		      );
	      output->append_line(virtual_line);
	      sprintf(
		      virtual_line, "   TQSx=%15.7lE,  TQSy=%15.7lE,  TQSz=%15.7lE\n",
		      cid->tqcs[ilr290 - 1][0], cid->tqcs[ilr290 - 1][1], cid->tqcs[ilr290 - 1][2]
		      );
	      output->append_line(virtual_line);
	    }
	  } //ilr290 loop
	  double rbirif = (real(cid->c1->fsac[0][0]) - real(cid->c1->fsac[1][1])) / real(cid->c1->fsac[0][0]);
	  double rdichr = (imag(cid->c1->fsac[0][0]) - imag(cid->c1->fsac[1][1])) / imag(cid->c1->fsac[0][0]);
	  sprintf(virtual_line, "  (RE(FSAC(1,1))-RE(FSAC(2,2)))/RE(FSAC(1,1))=%15.7lE\n", rbirif);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  (IM(FSAC(1,1))-IM(FSAC(2,2)))/IM(FSAC(1,1))=%15.7lE\n", rdichr);
	  output->append_line(virtual_line);
	  sprintf(virtual_line, "  MULC\n");
	  output->append_line(virtual_line);
	  for (int i = 0; i < 4; i++) {
	    sprintf(
		    virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
		    cid->cmul[i][0], cid->cmul[i][1], cid->cmul[i][2], cid->cmul[i][3]
		    );
	    output->append_line(virtual_line);
	  }
	  sprintf(virtual_line, "  MULCLR\n");
	  output->append_line(virtual_line);
	  for (int i = 0; i < 4; i++) {
	    sprintf(
		    virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
		    cid->cmullr[i][0], cid->cmullr[i][1], cid->cmullr[i][2], cid->cmullr[i][3]
		    );
	    output->append_line(virtual_line);
	  }
	  if (iavm != 0) {
	    mmulc(cid->c1->vintm, cid->cmullr, cid->cmul);
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 16; i++) {
	      double value;
	      value = real(cid->c1->vintm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->vintm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cmul[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    sprintf(virtual_line, "     CLUSTER (ENSEMBLE AVERAGE, MODE%2d)\n", iavm);
	    output->append_line(virtual_line);
	    if (inpol == 0) {
	      sprintf(virtual_line, "   LIN\n");
	      output->append_line(virtual_line);
	    } else { // label 316
	      sprintf(virtual_line, "  CIRC\n");
	      output->append_line(virtual_line);
	    }
	    // label 318
	    sprintf(virtual_line, "  MULC\n");
	    output->append_line(virtual_line);
	    for (int i = 0; i < 4; i++) {
	      sprintf(
		      virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
		      cid->cmul[i][0], cid->cmul[i][1], cid->cmul[i][2], cid->cmul[i][3]
		      );
	      output->append_line(virtual_line);
	    }
	    sprintf(virtual_line, "  MULCLR\n");
	    output->append_line(virtual_line);
	    for (int i = 0; i < 4; i++) {
	      sprintf(
		      virtual_line, "        %15.7lE%15.7lE%15.7lE%15.7lE\n",
		      cid->cmullr[i][0], cid->cmullr[i][1], cid->cmullr[i][2], cid->cmullr[i][3]
		      );
	      output->append_line(virtual_line);
	    }
	  }
	  // label 420, continues jphs loop
	  if (isam < 1) phs += sa->phsstp;
	} // jphs loop, labeled 480
	if (isam <= 1) thsl += sa->thsstp;
      } // jths loop, labeled 482
      ph += sa->phstp;
    } // jph484 loop
    th += sa->thstp;
  } // jth486 loop
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
  logger->log(message);
  
  logger->log("INFO: finished scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n");

  delete logger;

  return jer;
}
