diff --git a/src/libnptm/outputs.cpp b/src/libnptm/outputs.cpp index 9cab3e0989517ee21a0c0e8d7294ac88d798f6f4..36abbc3b2a5aad669a10e872a74e68c576061c5d 100644 --- a/src/libnptm/outputs.cpp +++ b/src/libnptm/outputs.cpp @@ -5522,6 +5522,7 @@ int SphereOutputInfo::write_hdf5(const std::string &file_name) { int SphereOutputInfo::write_legacy(const std::string &file_name) { const dcomplex cc0 = 0.0 + I * 0.0; int result = 0; + int nks = _num_thetas * _num_phis; FILE *p_outfile = fopen(file_name.c_str(), "w"); if (p_outfile != NULL) { if (vec_jxi[0] == 1) { @@ -5629,7 +5630,7 @@ int SphereOutputInfo::write_legacy(const std::string &file_name) { for (int jths = 0; jths < _num_thetas; jths++) { for (int jphs = 0; jphs < _num_phis; jphs++) { int dir_index = ndirs * jxi + done_dirs; - bool goto190 = (done_dirs == 0) && ((jxi > 0) || (jth > 0) || (jph > 0)); + bool goto190 = (nks == 1) && ((jxi > 0) || (jth > 0) || (jph > 0)); fprintf( p_outfile, "********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n", jth + 1, jph + 1, jths + 1, jphs + 1 diff --git a/src/sphere/np_sphere.cpp b/src/sphere/np_sphere.cpp index 8c369947763b15011ff243f00e57d39209064d78..e83f1edeedefdc422b30f89540fd1d0b8642ee93 100644 --- a/src/sphere/np_sphere.cpp +++ b/src/sphere/np_sphere.cpp @@ -34,6 +34,12 @@ #include <cstdio> #include <string> +#ifdef USE_MPI +#ifndef MPI_VERSION +#include <mpi.h> +#endif +#endif + #ifndef INCLUDE_TYPES_H_ #include "../include/types.h" #endif @@ -42,9 +48,13 @@ #include "../include/Configuration.h" #endif +#ifndef INCLUDE_COMMONS_H_ +#include "../include/Commons.h" +#endif + using namespace std; -extern void sphere(const string& config_file, const string& data_file, const string& output_path); +extern void sphere(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata); /*! \brief Main program entry point. * @@ -59,6 +69,15 @@ extern void sphere(const string& config_file, const string& data_file, const str * \return result: `int` An exit code passed to the OS (0 for succesful execution). */ int main(int argc, char **argv) { + int ierr = 0; +#ifdef MPI_VERSION + ierr = MPI_Init(&argc, &argv); + // create and initialise class with essential MPI data + mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD); +#else + // create a the class with dummy data if we are not using MPI at all + mixMPI *mpidata = new mixMPI(); +#endif string config_file = "../../test_data/sphere/DEDFB"; string data_file = "../../test_data/sphere/DSPH"; string output_path = "."; @@ -67,6 +86,10 @@ int main(int argc, char **argv) { data_file = string(argv[2]); output_path = string(argv[3]); } - sphere(config_file, data_file, output_path); - return 0; + sphere(config_file, data_file, output_path, mpidata); +#ifdef MPI_VERSION + MPI_Finalize(); +#endif + delete mpidata; + return ierr; } diff --git a/src/sphere/sphere.cpp b/src/sphere/sphere.cpp index dafa02def09757897e92c1b8ff336f04388ad1b4..033e111cad90872665c1ccb4fb7e191f000bdd37 100644 --- a/src/sphere/sphere.cpp +++ b/src/sphere/sphere.cpp @@ -21,8 +21,19 @@ #include <cstdio> #include <exception> #include <fstream> +#include <hdf5.h> #include <string> +#ifdef _OPENMP +#include <omp.h> +#endif + +#ifdef USE_MPI +#ifndef MPI_VERSION +#include <mpi.h> +#endif +#endif + #ifndef INCLUDE_TYPES_H_ #include "../include/types.h" #endif @@ -51,594 +62,1160 @@ #include "../include/TransitionMatrix.h" #endif +#ifndef INCLUDE_LIST_H_ +#include "../include/List.h" +#endif + +#ifndef INCLUDE_FILE_IO_H_ +#include "../include/file_io.h" +#endif + #ifndef INCLUDE_OUTPUTS_H_ #include "../include/outputs.h" #endif +#ifndef INCLUDE_ITERATION_DATA_H_ +#include "../include/IterationData.h" +#endif + using namespace std; +/*! \brief Main calculation loop. + * + * \param jxi488: `int` Wavelength loop index. + * \param sconf: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` object. + * \param gconf: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` object. + * \param sa: `ScatteringAngles *` Pointer to a `ScatteringAngles` object. + * \param sid: `SphereIterationData *` Pointer to a `SphereIterationData` object. + * \param oi: `SphereOutputInfo *` Pointer to a `SphereOutputInfo` object. + * \param output_path: `const string &` Path to the output directory. + * \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object. + */ +int sphere_jxi488_cycle( + int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, + ScatteringAngles *sa, SphereIterationData *sid, SphereOutputInfo *oi, + const string& output_path, VirtualBinaryFile *vtppoanp +); + /*! \brief C++ implementation of SPH * * \param config_file: `string` Name of the configuration file. * \param data_file: `string` Name of the input data file. * \param output_path: `string` Directory to write the output files in. + * \param mpidata: `const mixMPI *` Pointer to a mixMPI data structure. */ -void sphere(const string& config_file, const string& data_file, const string& output_path) { +void sphere(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) { Logger *logger = new Logger(LOG_INFO); - dcomplex arg, s0, tfsas; - double th, ph; - logger->log("INFO: making legacy configuration...\n"); - ScattererConfiguration *sconf = NULL; - try { - sconf = ScattererConfiguration::from_dedfb(config_file); - } catch(const OpenConfigurationFileException &ex) { - logger->err("\nERROR: failed to open scatterer configuration file.\n"); - string message = ex.what(); - logger->err("FILE: " + message + "\n"); - delete logger; - exit(1); - } - sconf->write_formatted(output_path + "/c_OEDFB"); - sconf->write_binary(output_path + "/c_TEDF"); - sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5"); - GeometryConfiguration *gconf = NULL; - try { - gconf = GeometryConfiguration::from_legacy(data_file); - } catch(const OpenConfigurationFileException &ex) { - logger->err("\nERROR: failed to open geometry configuration file.\n"); - string message = ex.what(); - logger->err("FILE: " + message + "\n"); - if (sconf != NULL) delete sconf; - delete logger; - exit(1); - } - int s_nsph = sconf->number_of_spheres; - int nsph = gconf->number_of_spheres; - int configurations = sconf->configurations; - if (s_nsph == nsph) { - int isq, ibf; - double *argi, *args, *gaps; - double cost, sint, cosp, sinp; - double costs, sints, cosps, sinps; - double scan; - double *duk = new double[3]; - double *u = new double[3]; - double *us = new double[3]; - double *un = new double[3]; - double *uns = new double[3]; - double *up = new double[3]; - double *ups = new double[3]; - double *upmp = new double[3]; - double *upsmp = new double[3]; - double *unmp = new double[3]; - double *unsmp = new double[3]; - double **cmul = new double*[4]; - double **cmullr = new double*[4]; - for (int i = 0; i < 4; i++) { - cmul[i] = new double[4]; - cmullr[i] = new double[4]; - } - dcomplex **tqspe, **tqsps; - double **tqse, **tqss; - tqse = new double*[2]; - tqss = new double*[2]; - tqspe = new dcomplex*[2]; - tqsps = new dcomplex*[2]; - for (int ti = 0; ti < 2; ti++) { - tqse[ti] = new double[2](); - tqss[ti] = new double[2](); - tqspe[ti] = new dcomplex[2](); - tqsps[ti] = new dcomplex[2](); + int device_count = 0; + + //=========================== + // the following only happens on MPI process 0 + //=========================== + if (mpidata->rank == 0) { + logger->log("INFO: making legacy configuration..."); + ScattererConfiguration *sconf = NULL; + try { + sconf = ScattererConfiguration::from_dedfb(config_file); + } catch(const OpenConfigurationFileException &ex) { + logger->err("\nERROR: failed to open scatterer configuration file.\n"); + string message = ex.what(); + logger->err("FILE: " + message + "\n"); + delete logger; + return; } - double frx = 0.0, fry = 0.0, frz = 0.0; - double cfmp, cfsp, sfmp, sfsp; - const dcomplex cc0 = 0.0 * I + 0.0; - int jw; - int l_max = gconf->l_max; - ParticleDescriptor *c1 = new ParticleDescriptorSphere(gconf, sconf); - int npnt = gconf->npnt; - int npntts = gconf->npntts; - int in_pol = gconf->in_pol; - int meridional_type = gconf->iavm; - int jwtm = gconf->jwtm; - double in_theta_start = gconf->in_theta_start; - double in_theta_step = gconf->in_theta_step; - double in_theta_end = gconf->in_theta_end; - double sc_theta_start = gconf->sc_theta_start; - double sc_theta_step = gconf->sc_theta_step; - double sc_theta_end = gconf->sc_theta_end; - double in_phi_start = gconf->in_phi_start; - double in_phi_step = gconf->in_phi_step; - double in_phi_end = gconf->in_phi_end; - double sc_phi_start = gconf->sc_phi_start; - double sc_phi_step = gconf->sc_phi_step; - double sc_phi_end = gconf->sc_phi_end; - argi = new double[1]; - args = new double[1]; - gaps = new double[2]; - // FILE *output = fopen((output_path + "/c_OSPH").c_str(), "w"); - mixMPI *mpidata = new mixMPI(); // Fake MPI configuration - SphereOutputInfo *p_output = new SphereOutputInfo(sconf, gconf, mpidata); - double sml = 1.0e-3; - int nth = 0, nph = 0; - if (in_theta_step != 0.0) - nth = int((in_theta_end - in_theta_start) / in_theta_step + sml); - nth += 1; - if (in_phi_step != 0.0) - nph = int((in_phi_end - in_phi_start) / in_phi_step + sml); - nph += 1; - int nths = 0, nphs = 0; - double thsca; - if (meridional_type > 1) { // ISAM > 1, fixed scattering angle - nths = 1; - thsca = sc_theta_start - in_theta_start; - } else { //ISAM <= 1 - if (in_theta_step != 0.0) - nths = int((sc_theta_end - sc_theta_start) / sc_theta_step + sml); - nths += 1; - if (meridional_type == 1) { // ISAM = 1 - nphs = 1; - } else { // ISAM < 1 - if (sc_phi_step != 0.0) - nphs = int((sc_phi_end - sc_phi_start) / sc_phi_step + sml); - nphs += 1; - } + sconf->write_formatted(output_path + "/c_OEDFB"); + sconf->write_binary(output_path + "/c_TEDF"); + sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5"); + GeometryConfiguration *gconf = NULL; + try { + gconf = GeometryConfiguration::from_legacy(data_file); + } catch(const OpenConfigurationFileException &ex) { + logger->err("\nERROR: failed to open geometry configuration file.\n"); + string message = ex.what(); + logger->err("FILE: " + message + "\n"); + if (sconf != NULL) delete sconf; + delete logger; + return; } - int nk = nth * nph; - int nks = nths * nphs; - int nkks = nk * nks; - double th1 = in_theta_start; - double ph1 = in_phi_start; - double ths1 = sc_theta_start; - double phs1 = sc_phi_start; - const double half_pi = acos(0.0); - const double pi = 2.0 * half_pi; - double gcs = 0.0; - for (int i116 = 0; i116 < nsph; i116++) { - int i = i116 + 1; - int iogi = c1->iog[i116]; - if (iogi >= i) { - double gcss = pi * c1->ros[i116] * c1->ros[i116]; - c1->gcsv[i116] = gcss; - int nsh = c1->nshl[i116]; - for (int j115 = 0; j115 < nsh; j115++) { - c1->rc[i116][j115] = sconf->get_rcf(i116, j115) * c1->ros[i116]; + int s_nsph = sconf->number_of_spheres; + int nsph = gconf->number_of_spheres; + int configurations = sconf->configurations; + logger->log(" done.\n"); + // Sanity check on number of sphere consistency, should always be verified + if (s_nsph == nsph) { + ScatteringAngles *p_sa = new ScatteringAngles(gconf); + SphereIterationData *sid = new SphereIterationData(gconf, sconf, mpidata, 0); + SphereOutputInfo *p_output = new SphereOutputInfo(sconf, gconf, mpidata); + // FILE *output = fopen((output_path + "/c_OSPH").c_str(), "w"); + const double half_pi = acos(0.0); + const double pi = 2.0 * half_pi; + sid->c1->gcs = 0.0; + for (int i116 = 0; i116 < nsph; i116++) { + int i = i116 + 1; + int iogi = sid->c1->iog[i116]; + if (iogi >= i) { + double gcss = pi * sid->c1->ros[i116] * sid->c1->ros[i116]; + sid->c1->gcsv[i116] = gcss; + int nsh = sid->c1->nshl[i116]; + for (int j115 = 0; j115 < nsh; j115++) { + sid->c1->rc[i116][j115] = sconf->get_rcf(i116, j115) * sid->c1->ros[i116]; + } } + sid->c1->gcs += sid->c1->gcsv[iogi - 1]; } - gcs += c1->gcsv[iogi - 1]; - } - double ****zpv = new double***[l_max]; //[l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2] - for (int zi = 0; zi < l_max; zi++) { - zpv[zi] = new double**[3]; - for (int zj = 0; zj < 3; zj++) { - zpv[zi][zj] = new double*[2]; - for (int zk = 0; zk < 2; zk++) { - zpv[zi][zj][zk] = new double[2](); - } + thdps(gconf->l_max, sid->zpv); + double exdc = sconf->exdc; + double exri = sqrt(exdc); + + // Create empty virtual binary file + VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(); + string tppoan_name = output_path + "/c_TPPOAN"; + int imode = 10, tmpvalue; + + //======================== + // write a block of info to virtual binary file + //======================== + vtppoanp->append_line(VirtualBinaryLine(imode)); + tmpvalue = gconf->isam; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + tmpvalue = gconf->in_pol; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + vtppoanp->append_line(VirtualBinaryLine(s_nsph)); + tmpvalue = p_sa->nth; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + tmpvalue = p_sa->nph; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + tmpvalue = p_sa->nths; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + tmpvalue = p_sa->nphs; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); + vtppoanp->append_line(VirtualBinaryLine(nsph)); + for (int nsi = 0; nsi < nsph; nsi++) { + tmpvalue = sid->c1->iog[nsi]; + vtppoanp->append_line(VirtualBinaryLine(tmpvalue)); } - } - thdps(l_max, zpv); - double exdc = sconf->exdc; - double exri = sqrt(exdc); - fstream tppoan; - string tppoan_name = output_path + "/c_TPPOAN"; - tppoan.open(tppoan_name.c_str(), ios::binary|ios::out); - if (tppoan.is_open()) { - int imode = 10; - tppoan.write(reinterpret_cast<char *>(&imode), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&(meridional_type)), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&(in_pol)), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&s_nsph), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&nth), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&nph), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&nths), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&nphs), sizeof(int)); - tppoan.write(reinterpret_cast<char *>(&nsph), sizeof(int)); - - for (int nsi = 0; nsi < nsph; nsi++) - tppoan.write(reinterpret_cast<char *>(&(c1->iog[nsi])), sizeof(int)); - double wp = sconf->wp; - double xip = sconf->xip; - double wn = wp / 3.0e8; - double sqsfi = 1.0; - double vk, vkarg; - int idfc = sconf->idfc; - int nxi = sconf->number_of_scales; - if (idfc < 0) { - vk = xip * wn; - p_output->vec_vk[0] = vk; + + if (sconf->idfc < 0) { + sid->vk = sid->xip * sid->wn; + p_output->vec_vk[0] = sid->vk; } - int ndirs = nkks; - for (int jxi488 = 0; jxi488 < nxi; jxi488++) { - int oindex = 0; - int jxi = jxi488 + 1; - logger->log("INFO: running scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n"); - double xi = sconf->get_scale(jxi488); - if (idfc >= 0) { - vk = xi * wn; - vkarg = vk; - p_output->vec_vk[jxi488] = vk; - p_output->vec_xi[jxi488] = xi; - } else { // IDFC < 0 - vkarg = xi * vk; - sqsfi = 1.0 / (xi * xi); - p_output->vec_vk[jxi488] = vk; - p_output->vec_xi[jxi488] = xi; + + // Do the first wavelength iteration + int jxi488 = 1; + // Use pragmas to put OMP parallelism to second level. + int jer = 0; +#pragma omp parallel + { +#pragma omp single + { + jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp); + } // OMP single + } // OMP parallel + if (jer != 0) { // First iteration failed. Halt the calculation. + delete p_output; + delete p_sa; + delete sid; + delete logger; + delete sconf; + delete gconf; + return; + } + + //================================================== + // do the first outputs here, so that I open here the new files, afterwards I only append + //================================================== + vtppoanp->write_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanp; + + // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used +#ifdef MPI_VERSION + if (mpidata->mpirunning) { + gconf->mpibcast(mpidata); + sconf->mpibcast(mpidata); + sid->mpibcast(mpidata); + p_sa->mpibcast(mpidata); + } +#endif + // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled + int ompnumthreads = 1; + // this is for MPI process 0 (or even if we are not using MPI at all) + int myjxi488startoffset = 0; + int myMPIstride = ompnumthreads; + int myMPIblock = ompnumthreads; + // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later + SphereOutputInfo **p_outarray = NULL; + VirtualBinaryFile **vtppoanarray = NULL; + + //=========================================== + // open the OpenMP parallel context, so each thread can initialise its stuff + //=========================================== +#pragma omp parallel + { + // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway + int myompthread = 0; + +#ifdef _OPENMP + // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files + myompthread = omp_get_thread_num(); + if (myompthread == 0) ompnumthreads = omp_get_num_threads(); +#endif + + if (myompthread == 0) { + // Initialise some shared variables only on thread 0 + p_outarray = new SphereOutputInfo*[ompnumthreads]; + vtppoanarray = new VirtualBinaryFile*[ompnumthreads]; + myMPIblock = ompnumthreads; + myMPIstride = myMPIblock; + } + +#ifdef MPI_VERSION + if (myompthread == 0) { + if (mpidata->mpirunning) { + // only go through this if MPI has been actually used + for (int rr=1; rr<mpidata->nprocs; rr++) { + // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far + int remotejxi488startoffset = myMPIstride; + MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD); + int remoteMPIblock; + MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // update myMPIstride to include the ones due to MPI process rr + myMPIstride += remoteMPIblock; + } + // now I know the total myMPIstride, I can send it to all processes + MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD); + } + } +#endif + // add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this +#pragma omp barrier + + // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number + SphereIterationData *sid_2 = NULL; + SphereOutputInfo *p_output_2 = NULL; + VirtualBinaryFile *vtppoanp_2 = NULL; + // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones + if (myompthread == 0) { + sid_2 = sid; + // OMP thread 0 of MPI process 0 holds the pointer to the full output structure + p_output_2 = p_output; + p_outarray[0] = p_output_2; + } else { + // this is not thread 0, so do create fresh copies of all local variables + sid_2 = new SphereIterationData(*sid); } - tppoan.write(reinterpret_cast<char *>(&vk), sizeof(double)); - for (int i132 = 0; i132 < nsph; i132++) { - int i = i132 + 1; - int iogi = c1->iog[i132]; - if (iogi != i) { - for (int l123 = 0; l123 < l_max; l123++) { - c1->rmi[l123][i132] = c1->rmi[l123][iogi - 1]; - c1->rei[l123][i132] = c1->rei[l123][iogi - 1]; + // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads + if (myompthread==0) { + logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n"); + // Thread 0 of process 0 has already allocated all necessary output memory + } +#pragma omp barrier + // ok, now I can actually start the parallel calculations + for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { + // the parallel loop over MPI processes covers a different set of indices for each thread +#pragma omp barrier + int myjxi488 = ixi488+myompthread; + // each thread opens new virtual files and stores their pointers in the shared array + vtppoanp_2 = new VirtualBinaryFile(); + // each thread puts a copy of the pointers to its virtual files in the shared arrays + vtppoanarray[myompthread] = vtppoanp_2; +#pragma omp barrier + + // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism + if (myjxi488 <= sid_2->number_of_scales) { + if (myompthread > 0) { + // UPDATE: non-0 threads need to allocate memory for one scale at a time. + p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, 1); + p_outarray[myompthread] = p_output_2; + } + int jer = sphere_jxi488_cycle(myjxi488 - 1, sconf, gconf, p_sa, sid_2, p_output_2, output_path, vtppoanp_2); + } else { + if (myompthread > 0) { + // If there is no input for this thread, set output pointer to NULL. + p_outarray[myompthread] = NULL; } - continue; // i132 } - // label 125 - int nsh = c1->nshl[i132]; - int ici = (nsh + 1) / 2; - if (idfc == 0) { - for (int ic = 0; ic < ici; ic++) - c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested! - } else { // IDFC != 0 - if (jxi == 1) { - for (int ic = 0; ic < ici; ic++) { - c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); +#pragma omp barrier + // threads different from 0 append their virtual files to the one of thread 0, and delete them + if (myompthread == 0) { + for (int ti=1; ti<ompnumthreads; ti++) { + if (p_outarray[ti] != NULL) { + p_outarray[0]->insert(*(p_outarray[ti])); + delete p_outarray[ti]; + p_outarray[ti] = NULL; } + vtppoanarray[0]->append(*(vtppoanarray[ti])); + delete vtppoanarray[ti]; } } - if (nsh % 2 == 0) c1->dc0[ici] = exdc; - int jer = 0; - int lcalc = 0; - dme(l_max, i, npnt, npntts, vkarg, exdc, exri, c1, jer, lcalc, arg); - if (jer != 0) { - p_output->vec_ier[jxi] = 1; - p_output->lcalc = lcalc; - p_output->arg = arg; - tppoan.close(); - delete sconf; - delete gconf; - delete c1; - for (int zi = l_max - 1; zi > -1; zi--) { - for (int zj = 0; zj < 3; zj++) { - for (int zk = 0; zk < 2; zk++) { - delete[] zpv[zi][zj][zk]; - } - delete[] zpv[zi][zj]; +#pragma omp barrier + //============================================== + // Collect all virtual files on thread 0 of MPI process 0, and append them to disk + //============================================== + if (myompthread == 0) { + // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them + // p_outarray[0]->append_to_disk(output_path + "/c_OCLU"); + // delete p_outarray[0]; + vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanarray[0]; + +#ifdef MPI_VERSION + if (mpidata->mpirunning) { + // only go through this if MPI has been actually used + for (int rr=1; rr<mpidata->nprocs; rr++) { + // get the data from process rr by receiving it in total memory structure + p_outarray[0]->mpireceive(mpidata, rr); + // get the data from process rr, creating a new virtual ascii file + // VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr); + // append to disk and delete virtual ascii file + // p_output->append_to_disk(output_path + "/c_OCLU"); + // delete p_output; + + // get the data from process rr, creating a new virtual binary file + VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr); + // append to disk and delete virtual binary file + vtppoanp->append_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanp; + int test = MPI_Barrier(MPI_COMM_WORLD); } - delete[] zpv[zi]; } - delete[] zpv; - delete[] duk; - delete[] u; - delete[] us; - delete[] un; - delete[] uns; - delete[] up; - delete[] ups; - delete[] upmp; - delete[] upsmp; - delete[] unmp; - delete[] unsmp; - delete[] argi; - delete[] args; - delete[] gaps; - for (int i = 3; i > -1; i--) { - delete[] cmul[i]; - delete[] cmullr[i]; +#endif + } + // end block writing to disk +#pragma omp barrier + + } // ixi488 strided MPI loop +#pragma omp barrier + if (myompthread == 0) { + delete[] p_outarray; + delete[] vtppoanarray; + } + { + string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n"; + logger->log(message); + } + delete sid_2; + } // OMP parallel + delete p_sa; + p_output->write(output_path + "/c_OSPH.hd5", "HDF5"); + p_output->write(output_path + "/c_OSPH", "LEGACY"); + delete p_output; + logger->log("Finished. Output written to " + output_path + "/c_OSPH.\n"); + } else { // NSPH mismatch between geometry and scatterer configurations. + throw UnrecognizedConfigurationException( + "Inconsistent geometry and scatterer configurations." + ); + } + delete sconf; + delete gconf; + } // end of instruction block for MPI process 0 + + //=============================== + // instruction block for MPI processes different from 0 + //=============================== +#ifdef MPI_VERSION + else { // Instruction block for MPI processes other than 0. + // here go the code for MPI processes other than 0 + // copy gconf, sconf, cid and p_scattering_angles from MPI process 0 + GeometryConfiguration *gconf = new GeometryConfiguration(mpidata); + ScattererConfiguration *sconf = new ScattererConfiguration(mpidata); + SphereIterationData *sid = new SphereIterationData(mpidata, device_count); + ScatteringAngles *p_sa = new ScatteringAngles(mpidata); + + // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled + int ompnumthreads = 1; + SphereOutputInfo **p_outarray = NULL; + VirtualBinaryFile **vtppoanarray = NULL; + int myjxi488startoffset; + int myMPIstride = ompnumthreads; + int myMPIblock = ompnumthreads; + +#pragma omp parallel + { + // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway + int myompthread = 0; +#ifdef _OPENMP + // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files + myompthread = omp_get_thread_num(); + if (myompthread == 0) ompnumthreads = omp_get_num_threads(); +#endif + if (myompthread == 0) { + // receive the start parameter from MPI process 0 + MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // send my number of omp threads to process 0 + MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD); + // receive myMPIstride sent by MPI process 0 to all processes + MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD); + // allocate virtual files for each thread + p_outarray = new SphereOutputInfo*[ompnumthreads]; + vtppoanarray = new VirtualBinaryFile*[ompnumthreads]; + } +#pragma omp barrier + // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number + SphereIterationData *sid_2 = NULL; + SphereOutputInfo *p_output_2 = NULL; + VirtualBinaryFile *vtppoanp_2 = NULL; + // PLACEHOLDER + // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones + if (myompthread == 0) { + sid_2 = sid; + } else { + // this is not thread 0, so do create fresh copies of all local variables + sid_2 = new SphereIterationData(*sid); + } + // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads +#pragma omp barrier + // ok, now I can actually start the parallel calculations + for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { + // the parallel loop over MPI processes covers a different set of indices for each thread +#pragma omp barrier + int myjxi488 = ixi488 + myjxi488startoffset + myompthread; + // each thread opens new virtual files and stores their pointers in the shared array + vtppoanp_2 = new VirtualBinaryFile(); + // each thread puts a copy of the pointers to its virtual files in the shared arrays + vtppoanarray[myompthread] = vtppoanp_2; +#pragma omp barrier + if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n"); + // ok, now I can actually start the parallel calculations + // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism + if (myjxi488 <= sid_2->number_of_scales) { + if (myompthread > 0) { + // UPDATE: non-0 threads need to allocate memory for one scale at a time. + p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, 1); + p_outarray[myompthread] = p_output_2; + } else { + // Thread 0 of non-zero MPI processes needs to allocate memory for the + // output of all threads. + p_output_2 = new SphereOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads); + p_outarray[0] = p_output_2; + } + int jer = sphere_jxi488_cycle(myjxi488 - 1, sconf, gconf, p_sa, sid_2, p_output_2, output_path, vtppoanp_2); + } else { + if (myompthread > 0) { + // If there is no input for this thread, set the output pointer to NULL. + p_outarray[myompthread] = NULL; + } + } + +#pragma omp barrier + // threads different from 0 append their virtual files to the one of thread 0, and delete them + if (myompthread == 0) { + for (int ti=1; ti<ompnumthreads; ti++) { + if (p_outarray[ti] != NULL) { + p_outarray[0]->insert(*(p_outarray[ti])); + delete p_outarray[ti]; + p_outarray[ti] = NULL; } - delete[] cmul; - delete[] cmullr; - for (int ti = 1; ti > -1; ti--) { - delete[] tqse[ti]; - delete[] tqss[ti]; - delete[] tqspe[ti]; - delete[] tqsps[ti]; + vtppoanarray[0]->append(*(vtppoanarray[ti])); + delete vtppoanarray[ti]; + } + // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them + for (int rr=1; rr<mpidata->nprocs; rr++) { + if (rr == mpidata->rank) { + p_outarray[0]->mpisend(mpidata); + delete p_outarray[0]; + vtppoanarray[0]->mpisend(mpidata); + delete vtppoanarray[0]; } - delete[] tqse; - delete[] tqss; - delete[] tqspe; - delete[] tqsps; - delete logger; - return; + int test = MPI_Barrier(MPI_COMM_WORLD); } - } // i132 - if (idfc >= 0 and nsph == 1 and jxi == jwtm) { - // This is the condition that writes the transition matrix to output. - string ttms_name = output_path + "/c_TTMS.hd5"; - TransitionMatrix::write_binary( - ttms_name, l_max, vk, exri, c1->rmi, c1->rei, - sconf->get_radius(0), "HDF5" - ); - ttms_name = output_path + "/c_TTMS"; - TransitionMatrix::write_binary( - ttms_name, l_max, vk, exri, c1->rmi, c1->rei, - sconf->get_radius(0) - ); } - double cs0 = 0.25 * vk * vk * vk / half_pi; - sscr0(tfsas, nsph, l_max, vk, exri, c1); - double sqk = vk * vk * exdc; - aps(zpv, l_max, nsph, c1, sqk, gaps); - rabas(in_pol, l_max, nsph, c1, tqse, tqspe, tqss, tqsps); - int last_configuration = 0; - for (int i170 = 0; i170 < nsph; i170++) { - int i = i170 + 1; - if (c1->iog[i170] >= i) { - last_configuration++; - oindex = jxi488 * configurations + last_configuration - 1; - double albeds = c1->sscs[i170] / c1->sexs[i170]; - c1->sqscs[i170] *= sqsfi; - c1->sqabs[i170] *= sqsfi; - c1->sqexs[i170] *= sqsfi; - if (c1->nshl[i170] != 1) { - p_output->vec_sphere_ref_indices[oindex] = cc0; - p_output->vec_sphere_sizes[oindex] = c1->vsz[i170]; - } else { - p_output->vec_sphere_ref_indices[oindex] = c1->vkt[i170]; - p_output->vec_sphere_sizes[oindex] = c1->vsz[i170]; - } - p_output->vec_scs[oindex] = c1->sscs[i170]; - p_output->vec_abs[oindex] = c1->sabs[i170]; - p_output->vec_exs[oindex] = c1->sexs[i170]; - p_output->vec_albeds[oindex] = albeds; - p_output->vec_scsrt[oindex] = c1->sqscs[i170]; - p_output->vec_absrt[oindex] = c1->sqabs[i170]; - p_output->vec_exsrt[oindex] = c1->sqexs[i170]; - p_output->vec_fsas[oindex] = c1->fsas[i170]; - double csch = 2.0 * vk * sqsfi / c1->gcsv[i170]; - s0 = c1->fsas[i170] * exri; - double qschu = csch * imag(s0); - double pschu = csch * real(s0); - double s0mag = cs0 * cabs(s0); - p_output->vec_qschu[oindex] = qschu; - p_output->vec_pschu[oindex] = pschu; - p_output->vec_s0mag[oindex] = s0mag; - double rapr = c1->sexs[i170] - gaps[i170]; - double cosav = gaps[i170] / c1->sscs[i170]; - p_output->vec_cosav[oindex] = cosav; - p_output->vec_raprs[oindex] = rapr; - p_output->vec_tqek1[oindex] = tqse[0][i170]; - p_output->vec_tqsk1[oindex] = tqss[0][i170]; - p_output->vec_tqek2[oindex] = tqse[1][i170]; - p_output->vec_tqsk2[oindex] = tqss[1][i170]; - tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170])), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170])), sizeof(double)); - double val = real(tqspe[0][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = imag(tqspe[0][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = real(tqsps[0][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = imag(tqsps[0][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170])), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170])), sizeof(double)); - val = real(tqspe[1][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = imag(tqspe[1][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = real(tqsps[1][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - val = imag(tqsps[1][i170]); - tppoan.write(reinterpret_cast<char *>(&val), sizeof(double)); - } // End if iog[i170] >= i - } // i170 loop - if (nsph != 1) { - p_output->vec_fsat[jxi488] = tfsas; - double csch = 2.0 * vk * sqsfi / gcs; - s0 = tfsas * exri; - double qschu = csch * imag(s0); - double pschu = csch * real(s0); - double s0mag = cs0 * cabs(s0); - p_output->vec_qschut[jxi488] = qschu; - p_output->vec_pschut[jxi488] = pschu; - p_output->vec_s0magt[jxi488] = s0mag; + } // ixi488: close strided loop running on MPI processes + // Clean memory +#pragma omp barrier + if (myompthread == 0) { + delete[] p_outarray; + delete[] vtppoanarray; + } + delete sid_2; + } // OMP parallel + delete p_sa; + delete sconf; + delete gconf; + } // End instructions block for MPI non-0 processes. +#endif // MPI_VERSION + delete logger; +} // sphere() + +int sphere_jxi488_cycle( + int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, + ScatteringAngles *sa, SphereIterationData *sid, SphereOutputInfo *oi, + const string& output_path, VirtualBinaryFile *vtppoanp +) { + const dcomplex cc0 = 0.0 + I * 0.0; + const double half_pi = acos(0.0); + const double pi = 2.0 * half_pi; + int jer = 0; + Logger *logger = new Logger(LOG_INFO); + int oindex = 0; + int jxi = jxi488 + 1; + int nsph = gconf->number_of_spheres; + int l_max = gconf->l_max; + int in_pol = gconf->in_pol; + int npnt = gconf->npnt; + int npntts = gconf->npntts; + int jwtm = gconf->jwtm; + double wn = sconf->wp / 3.0e8; + double sqsfi = 1.0; + int idfc = sconf->idfc; + int nxi = sconf->number_of_scales; + int configurations = sconf->configurations; + int ndirs = sa->nkks; + int lcalc; + int jw, isq, ibf; + logger->log("INFO: running scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n"); + double vk, vkarg; + double xi = sconf->get_scale(jxi488); + double exdc = sconf->exdc; + double exri = sqrt(exdc); + if (idfc >= 0) { + vk = xi * wn; + vkarg = vk; + oi->vec_vk[jxi488] = vk; + oi->vec_xi[jxi488] = xi; + } else { // IDFC < 0 + vk = sconf->xip * wn; + vkarg = xi * vk; + sqsfi = 1.0 / (xi * xi); + oi->vec_vk[jxi488] = vk; + oi->vec_xi[jxi488] = xi; + } + vtppoanp->append_line(VirtualBinaryLine(vk)); + double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0; + for (int i132 = 0; i132 < nsph; i132++) { + int i = i132 + 1; + int iogi = sid->c1->iog[i132]; + if (iogi != i) { + for (int l123 = 0; l123 < l_max; l123++) { + sid->c1->rmi[l123][i132] = sid->c1->rmi[l123][iogi - 1]; + sid->c1->rei[l123][i132] = sid->c1->rei[l123][iogi - 1]; + } + continue; // i132 + } + // label 125 + int nsh = sid->c1->nshl[i132]; + int ici = (nsh + 1) / 2; + if (idfc == 0) { + for (int ic = 0; ic < ici; ic++) + sid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested! + } else { // IDFC != 0 + if (jxi == 1) { + for (int ic = 0; ic < ici; ic++) { + sid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); } - th = th1; - int done_dirs = 0; - for (int jth486 = 0; jth486 < nth; jth486++) { // OpenMP parallelizable section - int jth = jth486 + 1; - ph = ph1; - for (int jph484 = 0; jph484 < nph; jph484++) { - int jph = jph484 + 1; - bool goto182 = (nk == 1) && (jxi > 1); - if (!goto182) { - upvmp(th, ph, 0, cost, sint, cosp, sinp, u, upmp, unmp); + } + } + if (nsh % 2 == 0) sid->c1->dc0[ici] = exdc; + dme(l_max, i, npnt, npntts, vkarg, exdc, exri, sid->c1, jer, lcalc, sid->arg); + if (jer != 0) { + oi->vec_ier[jxi] = 1; + oi->lcalc = lcalc; + return jer; + } + } // i132 + if (idfc >= 0 and nsph == 1 and jxi == jwtm) { + // This is the condition that writes the transition matrix to output. + string ttms_name = output_path + "/c_TTMS.hd5"; + TransitionMatrix::write_binary( + ttms_name, l_max, vk, exri, sid->c1->rmi, sid->c1->rei, + sconf->get_radius(0), "HDF5" + ); + ttms_name = output_path + "/c_TTMS"; + TransitionMatrix::write_binary( + ttms_name, l_max, vk, exri, sid->c1->rmi, sid->c1->rei, + sconf->get_radius(0) + ); + } + double cs0 = 0.25 * vk * vk * vk / half_pi; + sscr0(sid->tfsas, nsph, l_max, vk, exri, sid->c1); + double sqk = vk * vk * exdc; + aps(sid->zpv, l_max, nsph, sid->c1, sqk, sid->gaps); + rabas(in_pol, l_max, nsph, sid->c1, sid->tqse, sid->tqspe, sid->tqss, sid->tqsps); + int last_configuration = 0; + for (int i170 = 0; i170 < nsph; i170++) { + int i = i170 + 1; + if (sid->c1->iog[i170] >= i) { + last_configuration++; + oindex = jxi488 * configurations + last_configuration - 1; + double albeds = sid->c1->sscs[i170] / sid->c1->sexs[i170]; + sid->c1->sqscs[i170] *= sqsfi; + sid->c1->sqabs[i170] *= sqsfi; + sid->c1->sqexs[i170] *= sqsfi; + if (sid->c1->nshl[i170] != 1) { + oi->vec_sphere_ref_indices[oindex] = cc0; + oi->vec_sphere_sizes[oindex] = sid->c1->vsz[i170]; + } else { + oi->vec_sphere_ref_indices[oindex] = sid->c1->vkt[i170]; + oi->vec_sphere_sizes[oindex] = sid->c1->vsz[i170]; + } + oi->vec_scs[oindex] = sid->c1->sscs[i170]; + oi->vec_abs[oindex] = sid->c1->sabs[i170]; + oi->vec_exs[oindex] = sid->c1->sexs[i170]; + oi->vec_albeds[oindex] = albeds; + oi->vec_scsrt[oindex] = sid->c1->sqscs[i170]; + oi->vec_absrt[oindex] = sid->c1->sqabs[i170]; + oi->vec_exsrt[oindex] = sid->c1->sqexs[i170]; + oi->vec_fsas[oindex] = sid->c1->fsas[i170]; + double csch = 2.0 * vk * sqsfi / sid->c1->gcsv[i170]; + dcomplex s0 = sid->c1->fsas[i170] * exri; + double qschu = csch * imag(s0); + double pschu = csch * real(s0); + double s0mag = cs0 * cabs(s0); + oi->vec_qschu[oindex] = qschu; + oi->vec_pschu[oindex] = pschu; + oi->vec_s0mag[oindex] = s0mag; + double rapr = sid->c1->sexs[i170] - sid->gaps[i170]; + double cosav = sid->gaps[i170] / sid->c1->sscs[i170]; + oi->vec_cosav[oindex] = cosav; + oi->vec_raprs[oindex] = rapr; + oi->vec_tqek1[oindex] = sid->tqse[0][i170]; + oi->vec_tqsk1[oindex] = sid->tqss[0][i170]; + oi->vec_tqek2[oindex] = sid->tqse[1][i170]; + oi->vec_tqsk2[oindex] = sid->tqss[1][i170]; + double value = sid->tqse[0][i170]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = sid->tqss[0][i170]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(sid->tqspe[0][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(sid->tqspe[0][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(sid->tqsps[0][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(sid->tqsps[0][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = sid->tqse[1][i170]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = sid->tqss[1][i170]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(sid->tqspe[1][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(sid->tqspe[1][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(sid->tqsps[1][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(sid->tqsps[1][i170]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } // End if iog[i170] >= i + } // i170 loop + if (nsph != 1) { + oi->vec_fsat[jxi488] = sid->tfsas; + double csch = 2.0 * vk * sqsfi / sid->c1->gcs; + dcomplex s0 = sid->tfsas * exri; + double qschu = csch * imag(s0); + double pschu = csch * real(s0); + double s0mag = cs0 * cabs(s0); + oi->vec_qschut[jxi488] = qschu; + oi->vec_pschut[jxi488] = pschu; + oi->vec_s0magt[jxi488] = s0mag; + } + double th = sa->th; + int done_dirs = 0; + int nks = sa->nths * sa->nphs; + int nkks = sa->nth * sa->nph * nks; + int nth = sa->nth; + int nph = sa->nph; + double frx, fry, frz; + for (int jth486 = 0; jth486 < sa->nth; jth486++) { // OpenMP parallelizable section + int jth = jth486 + 1; + double ph = sa->ph; + for (int jph484 = 0; jph484 < sa->nph; jph484++) { + int jph = jph484 + 1; + bool goto182 = (sa->nk == 1) && (jxi > 1); + double cost, sint, cosp, sinp; + if (!goto182) { + upvmp(th, ph, 0, cost, sint, cosp, sinp, sid->u, sid->upmp, sid->unmp); + } + if (gconf->isam >= 0) { + wmamp(0, cost, sint, cosp, sinp, in_pol, l_max, 0, nsph, sid->argi, sid->u, sid->upmp, sid->unmp, sid->c1); + for (int i183 = 0; i183 < nsph; i183++) { + double rapr = sid->c1->sexs[i183] - sid->gaps[i183]; + frx = rapr * sid->u[0]; + fry = rapr * sid->u[1]; + frz = rapr * sid->u[2]; + } + jw = 1; + } + double thsl = sa->ths; + double phsph = 0.0; + for (int jths482 = 0; jths482 < sa->nths; jths482++) { + int jths = jths482 + 1; + double ths = thsl; + int icspnv = 0; + if (gconf->isam > 1) ths = th + thsca; + if (gconf->isam >= 1) { + phsph = 0.0; + if ((ths < 0.0) || (ths > 180.0)) phsph = 180.0; + if (ths < 0.0) ths *= -1.0; + if (ths > 180.0) ths = 360.0 - ths; + if (phsph != 0.0) icspnv = 1; + } + double phs = sa->phs; + for (int jphs480 = 0; jphs480 < sa->nphs; jphs480++) { + int jphs = jphs480 + 1; + double costs, sints, cosps, sinps; + if (gconf->isam >= 1) { + phs = ph + phsph; + if (phs >= 360.0) phs -= 360.0; + } + bool goto190 = (nks == 1) && ((jxi > 1) || (jth > 1) || (jph > 1)); + if (!goto190) { + upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, sid->us, sid->upsmp, sid->unsmp); + if (gconf->isam >= 0) { + wmamp(2, costs, sints, cosps, sinps, in_pol, l_max, 0, nsph, sid->args, sid->us, sid->upsmp, sid->unsmp, sid->c1); } - if (meridional_type >= 0) { - wmamp(0, cost, sint, cosp, sinp, in_pol, l_max, 0, nsph, argi, u, upmp, unmp, c1); - for (int i183 = 0; i183 < nsph; i183++) { - double rapr = c1->sexs[i183] - gaps[i183]; - frx = rapr * u[0]; - fry = rapr * u[1]; - frz = rapr * u[2]; + } + if (nkks != 0 || jxi == 1) { + upvsp( + sid->u, sid->upmp, sid->unmp, sid->us, sid->upsmp, sid->unsmp, + sid->up, sid->un, sid->ups, sid->uns, sid->duk, isq, ibf, + sid->scan, sid->cfmp, sid->sfmp, sid->cfsp, sid->sfsp + ); + if (gconf->isam < 0) { + wmasp( + cost, sint, cosp, sinp, costs, sints, cosps, sinps, + sid->u, sid->up, sid->un, sid->us, sid->ups, sid->uns, + isq, ibf, in_pol, l_max, 0, nsph, sid->argi, sid->args, + sid->c1 + ); + } + for (int i193 = 0; i193 < 3; i193++) { + sid->un[i193] = sid->unmp[i193]; + sid->uns[i193] = sid->unsmp[i193]; + } + } + if (gconf->isam < 0) jw = 1; + vtppoanp->append_line(VirtualBinaryLine(th)); + vtppoanp->append_line(VirtualBinaryLine(ph)); + vtppoanp->append_line(VirtualBinaryLine(ths)); + vtppoanp->append_line(VirtualBinaryLine(phs)); + double value = sid->scan; + vtppoanp->append_line(VirtualBinaryLine(value)); + if (jw != 0) { + jw = 0; + value = sid->u[0]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = sid->u[1]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = sid->u[2]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + oi->vec_dir_scand[done_dirs] = sid->scan; + oi->vec_dir_cfmp[done_dirs] = sid->cfmp; + oi->vec_dir_cfsp[done_dirs] = sid->cfsp; + oi->vec_dir_sfmp[done_dirs] = sid->sfmp; + oi->vec_dir_sfsp[done_dirs] = sid->sfsp; + if (gconf->isam >= 0) { + oi->vec_dir_un[3 * done_dirs] = sid->un[0]; + oi->vec_dir_un[3 * done_dirs + 1] = sid->un[1]; + oi->vec_dir_un[3 * done_dirs + 2] = sid->un[2]; + oi->vec_dir_uns[3 * done_dirs] = sid->uns[0]; + oi->vec_dir_uns[3 * done_dirs + 1] = sid->uns[1]; + oi->vec_dir_uns[3 * done_dirs + 2] = sid->uns[2]; + } else { + oi->vec_dir_un[3 * done_dirs] = sid->un[0]; + oi->vec_dir_un[3 * done_dirs + 1] = sid->un[1]; + oi->vec_dir_un[3 * done_dirs + 2] = sid->un[2]; + oi->vec_dir_uns[3 * done_dirs] = 0.0; + oi->vec_dir_uns[3 * done_dirs + 1] = 0.0; + oi->vec_dir_uns[3 * done_dirs + 2] = 0.0; + } + sscr2(nsph, l_max, vk, exri, sid->c1); + last_configuration = 0; + for (int ns226 = 0; ns226 < nsph; ns226++) { + int ns = ns226 + 1; + oindex = jxi488 * nsph * ndirs + nsph * done_dirs + ns226; + oi->vec_dir_sas11[oindex] = sid->c1->sas[ns226][0][0]; + oi->vec_dir_sas21[oindex] = sid->c1->sas[ns226][1][0]; + oi->vec_dir_sas12[oindex] = sid->c1->sas[ns226][0][1]; + oi->vec_dir_sas22[oindex] = sid->c1->sas[ns226][1][1]; + oi->vec_dir_fx[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frx; + oi->vec_dir_fy[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = fry; + oi->vec_dir_fz[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frz; + for (int i225 = 0; i225 < 16; i225++) sid->c1->vint[i225] = sid->c1->vints[ns226][i225]; + mmulc(sid->c1->vint, sid->cmullr, sid->cmul); + for (int imul = 0; imul < 4; imul++) { + int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul; + for (int jmul = 0; jmul < 4; jmul++) { + oi->vec_dir_muls[muls_index + jmul] = sid->cmul[imul][jmul]; } - jw = 1; } - double thsl = ths1; - double phsph = 0.0; - for (int jths482 = 0; jths482 < nths; jths482++) { - int jths = jths482 + 1; - double ths = thsl; - int icspnv = 0; - if (meridional_type > 1) ths = th + thsca; - if (meridional_type >= 1) { - phsph = 0.0; - if ((ths < 0.0) || (ths > 180.0)) phsph = 180.0; - if (ths < 0.0) ths *= -1.0; - if (ths > 180.0) ths = 360.0 - ths; - if (phsph != 0.0) icspnv = 1; + for (int imul = 0; imul < 4; imul++) { + int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul; + for (int jmul = 0; jmul < 4; jmul++) { + oi->vec_dir_mulslr[muls_index + jmul] = sid->cmullr[imul][jmul]; } - double phs = phs1; - for (int jphs480 = 0; jphs480 < nphs; jphs480++) { - int jphs = jphs480 + 1; - if (meridional_type >= 1) { - phs = ph + phsph; - if (phs >= 360.0) phs -= 360.0; - } - bool goto190 = (nks == 1) && ((jxi > 1) || (jth > 1) || (jph > 1)); - if (!goto190) { - upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, us, upsmp, unsmp); - if (meridional_type >= 0) { - wmamp(2, costs, sints, cosps, sinps, in_pol, l_max, 0, nsph, args, us, upsmp, unsmp, c1); - } - } - if (nkks != 0 || jxi == 1) { - upvsp(u, upmp, unmp, us, upsmp, unsmp, up, un, ups, uns, duk, isq, ibf, scan, cfmp, sfmp, cfsp, sfsp); - if (meridional_type < 0) { - wmasp( - cost, sint, cosp, sinp, costs, sints, cosps, sinps, - u, up, un, us, ups, uns, isq, ibf, in_pol, - l_max, 0, nsph, argi, args, c1 - ); - } - for (int i193 = 0; i193 < 3; i193++) { - un[i193] = unmp[i193]; - uns[i193] = unsmp[i193]; - } - } - if (meridional_type < 0) jw = 1; - tppoan.write(reinterpret_cast<char *>(&th), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&ph), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&ths), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&phs), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&scan), sizeof(double)); - if (jw != 0) { - jw = 0; - tppoan.write(reinterpret_cast<char *>(&(u[0])), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&(u[1])), sizeof(double)); - tppoan.write(reinterpret_cast<char *>(&(u[2])), sizeof(double)); - } - p_output->vec_dir_scand[done_dirs] = scan; - p_output->vec_dir_cfmp[done_dirs] = cfmp; - p_output->vec_dir_cfsp[done_dirs] = cfsp; - p_output->vec_dir_sfmp[done_dirs] = sfmp; - p_output->vec_dir_sfsp[done_dirs] = sfsp; - if (meridional_type >= 0) { - p_output->vec_dir_un[3 * done_dirs] = un[0]; - p_output->vec_dir_un[3 * done_dirs + 1] = un[1]; - p_output->vec_dir_un[3 * done_dirs + 2] = un[2]; - p_output->vec_dir_uns[3 * done_dirs] = uns[0]; - p_output->vec_dir_uns[3 * done_dirs + 1] = uns[1]; - p_output->vec_dir_uns[3 * done_dirs + 2] = uns[2]; - } else { - p_output->vec_dir_un[3 * done_dirs] = un[0]; - p_output->vec_dir_un[3 * done_dirs + 1] = un[1]; - p_output->vec_dir_un[3 * done_dirs + 2] = un[2]; - p_output->vec_dir_uns[3 * done_dirs] = 0.0; - p_output->vec_dir_uns[3 * done_dirs + 1] = 0.0; - p_output->vec_dir_uns[3 * done_dirs + 2] = 0.0; - } - sscr2(nsph, l_max, vk, exri, c1); - last_configuration = 0; - for (int ns226 = 0; ns226 < nsph; ns226++) { - int ns = ns226 + 1; - oindex = jxi488 * nsph * ndirs + nsph * done_dirs + ns226; - p_output->vec_dir_sas11[oindex] = c1->sas[ns226][0][0]; - p_output->vec_dir_sas21[oindex] = c1->sas[ns226][1][0]; - p_output->vec_dir_sas12[oindex] = c1->sas[ns226][0][1]; - p_output->vec_dir_sas22[oindex] = c1->sas[ns226][1][1]; - p_output->vec_dir_fx[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frx; - p_output->vec_dir_fy[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = fry; - p_output->vec_dir_fz[jxi488 * nsph * nth * nph + nsph * nph * (jth - 1) + nsph * (jph - 1) + ns226] = frz; - for (int i225 = 0; i225 < 16; i225++) c1->vint[i225] = c1->vints[ns226][i225]; - mmulc(c1->vint, cmullr, cmul); - for (int imul = 0; imul < 4; imul++) { - int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul; - for (int jmul = 0; jmul < 4; jmul++) { - p_output->vec_dir_muls[muls_index + jmul] = cmul[imul][jmul]; - } - } - for (int imul = 0; imul < 4; imul++) { - int muls_index = 16 * jxi488 * nsph * ndirs + 16 * nsph * done_dirs + 4 * imul; - for (int jmul = 0; jmul < 4; jmul++) { - p_output->vec_dir_mulslr[muls_index + jmul] = cmullr[imul][jmul]; - } - } - for (int vi = 0; vi < 16; vi++) { - double value = real(c1->vint[vi]); - tppoan.write(reinterpret_cast<char *>(&value), sizeof(double)); - value = imag(c1->vint[vi]); - tppoan.write(reinterpret_cast<char *>(&value), sizeof(double)); - } - for (int imul = 0; imul < 4; imul++) { - for (int jmul = 0; jmul < 4; jmul++) { - tppoan.write(reinterpret_cast<char *>(&(cmul[imul][jmul])), sizeof(double)); - } - } - } // ns226 loop - if (meridional_type < 1) phs += sc_phi_step; - done_dirs++; - } // jphs480 loop - if (meridional_type <= 1) thsl += sc_theta_step; - } // jths482 loop - ph += in_phi_step; - } // jph484 loop on elevation - th += in_theta_step; - } // jth486 loop on azimuth - p_output->vec_jxi[jxi488] = jxi488 + 1; - logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n"); - } //jxi488 loop on scales - tppoan.close(); - } else { // In case TPPOAN could not be opened. Should never happen. - logger->err("ERROR: failed to open TPPOAN file.\n"); - } - // fclose(output); - p_output->write(output_path + "/c_OSPH.hd5", "HDF5"); - p_output->write(output_path + "/c_OSPH", "LEGACY"); - delete p_output; - delete mpidata; - delete c1; - for (int zi = l_max - 1; zi > -1; zi--) { - for (int zj = 0; zj < 3; zj++) { - for (int zk = 0; zk < 2; zk++) { - delete[] zpv[zi][zj][zk]; - } - delete[] zpv[zi][zj]; - } - delete[] zpv[zi]; + } + for (int vi = 0; vi < 16; vi++) { + value = real(sid->c1->vint[vi]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(sid->c1->vint[vi]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + for (int imul = 0; imul < 4; imul++) { + for (int jmul = 0; jmul < 4; jmul++) { + value = sid->cmul[imul][jmul]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + } // ns226 loop + if (gconf->isam < 1) phs += sa->phsstp; + done_dirs++; + } // jphs480 loop + if (gconf->isam <= 1) thsl += sa->thsstp; + } // jths482 loop + ph += sa->phstp; + } // jph484 loop on elevation + th += sa->thstp; + } // jth486 loop on azimuth + oi->vec_jxi[jxi488] = jxi; + logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n"); + return jer; +} + +// >>> IMPLEMENTATION OF SphereIterationData CLASS <<< +SphereIterationData::SphereIterationData( + GeometryConfiguration *gconf, ScattererConfiguration *sconf, + const mixMPI *mpidata, const int device_count +) { + const dcomplex cc0 = 0.0 + I * 0.0; + _nsph = gconf->number_of_spheres; + _lm = gconf->l_max; + arg = cc0; + s0 = cc0; + tfsas = cc0; + c1 = new ParticleDescriptorSphere(gconf, sconf); + argi = new double[1](); + args = new double[1](); + scan = 0.0; + cfmp = 0.0; + cfsp = 0.0; + sfmp = 0.0; + sfsp = 0.0; + wn = sconf->wp / 3.0e8; + xip = sconf->xip; + vk = 0.0; + // Scale block initialization + number_of_scales = sconf->number_of_scales; + xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs)); + lastxi = ((mpidata->rank+1) * xiblock)+1; + firstxi = lastxi-xiblock+1; + if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales; + // End of scale block initialization + gaps = new double[_nsph](); + duk = new double[3](); + u = new double[3](); + us = new double[3](); + un = new double[3](); + uns = new double[3](); + up = new double[3](); + ups = new double[3](); + upmp = new double[3](); + upsmp = new double[3](); + unmp = new double[3](); + unsmp = new double[3](); + vec_cmul = new double[16](); + vec_cmullr = new double[16](); + cmul = new double*[4]; + cmullr = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cmul[ci] = (vec_cmul + 4 * ci); + cmullr[ci] = (vec_cmullr + 4 * ci); + } + vec_tqspe = new dcomplex[2 * _nsph](); + vec_tqsps = new dcomplex[2 * _nsph](); + vec_tqse = new double[2 * _nsph](); + vec_tqss = new double[2 * _nsph](); + tqspe = new dcomplex*[2]; + tqsps = new dcomplex*[2]; + tqse = new double*[2]; + tqss = new double*[2]; + for (int ti = 0; ti < 2; ti++) { + tqspe[ti] = (vec_tqspe + _nsph * ti); + tqsps[ti] = (vec_tqsps + _nsph * ti); + tqse[ti] = (vec_tqse + _nsph * ti); + tqss[ti] = (vec_tqss + _nsph * ti); + } + vec_zpv = new double[_lm * 12](); + zpv = new double***[_lm]; + for (int zi = 0; zi < _lm; zi++) { + zpv[zi] = new double**[3]; + for (int zj = 0; zj < 3; zj++) { + int vec_index = 12 * zi + 4 * zj; + zpv[zi][zj] = new double*[2]; + zpv[zi][zj][0] = (vec_zpv + vec_index); + zpv[zi][zj][1] = (vec_zpv + vec_index + 2); } - delete[] zpv; - delete[] duk; - delete[] u; - delete[] us; - delete[] un; - delete[] uns; - delete[] up; - delete[] ups; - delete[] upmp; - delete[] upsmp; - delete[] unmp; - delete[] unsmp; - delete[] argi; - delete[] args; - delete[] gaps; - for (int i = 3; i > -1; i--) { - delete[] cmul[i]; - delete[] cmullr[i]; + } +} + +SphereIterationData::SphereIterationData(const SphereIterationData &rhs) { + _nsph = rhs._nsph; + _lm = rhs._lm; + arg = rhs.arg; + s0 = rhs.s0; + tfsas = rhs.tfsas; + c1 = new ParticleDescriptorSphere(reinterpret_cast<ParticleDescriptorSphere &>(*(rhs.c1))); + argi = new double[1]; + args = new double[1]; + argi[0] = rhs.argi[0]; + args[0] = rhs.args[0]; + scan = rhs.scan; + cfmp = rhs.cfmp; + cfsp = rhs.cfsp; + sfmp = rhs.sfmp; + sfsp = rhs.sfsp; + wn = rhs.wn; + xip = rhs.xip; + vk = rhs.vk; + // Scale block initialization + number_of_scales = rhs.number_of_scales; + xiblock = rhs.xiblock; + lastxi = rhs.lastxi; + firstxi = rhs.firstxi; + // End of scale block initialization + gaps = new double[_nsph]; + for (int si = 0; si < _nsph; si++) gaps[si] = rhs.gaps[si]; + duk = new double[3]; + u = new double[3]; + us = new double[3]; + un = new double[3]; + uns = new double[3]; + up = new double[3]; + ups = new double[3]; + upmp = new double[3]; + upsmp = new double[3]; + unmp = new double[3]; + unsmp = new double[3]; + for (int ui = 0; ui < 3; ui++) { + duk[ui] = rhs.duk[ui]; + u[ui] = rhs.u[ui]; + us[ui] = rhs.us[ui]; + un[ui] = rhs.un[ui]; + uns[ui] = rhs.us[ui]; + up[ui] = rhs.up[ui]; + ups[ui] = rhs.ups[ui]; + upmp[ui] = rhs.upmp[ui]; + upsmp[ui] = rhs.upsmp[ui]; + unmp[ui] = rhs.unmp[ui]; + unsmp[ui] = rhs.unsmp[ui]; + } + vec_cmul = new double[16]; + vec_cmullr = new double[16]; + for (int mi = 0; mi < 16; mi++) { + vec_cmul[mi] = rhs.vec_cmul[mi]; + vec_cmullr[mi] = rhs.vec_cmullr[mi]; + } + cmul = new double*[4]; + cmullr = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cmul[ci] = (vec_cmul + 4 * ci); + cmullr[ci] = (vec_cmullr + 4 * ci); + } + vec_tqspe = new dcomplex[2 * _nsph](); + vec_tqsps = new dcomplex[2 * _nsph](); + vec_tqse = new double[2 * _nsph](); + vec_tqss = new double[2 * _nsph](); + for (int ti = 0; ti < 2 * _nsph; ti++) { + vec_tqspe[ti] = rhs.vec_tqspe[ti]; + vec_tqsps[ti] = rhs.vec_tqsps[ti]; + vec_tqse[ti] = rhs.vec_tqse[ti]; + vec_tqss[ti] = rhs.vec_tqss[ti]; + } + tqspe = new dcomplex*[2]; + tqsps = new dcomplex*[2]; + tqse = new double*[2]; + tqss = new double*[2]; + for (int ti = 0; ti < 2; ti++) { + tqspe[ti] = (vec_tqspe + _nsph * ti); + tqsps[ti] = (vec_tqsps + _nsph * ti); + tqse[ti] = (vec_tqse + _nsph * ti); + tqss[ti] = (vec_tqss + _nsph * ti); + } + vec_zpv = new double[_lm * 12]; + for (int zi = 0; zi < _lm * 12; zi++) vec_zpv[zi] = rhs.vec_zpv[zi]; + zpv = new double***[_lm]; + for (int zi = 0; zi < _lm; zi++) { + zpv[zi] = new double**[3]; + for (int zj = 0; zj < 3; zj++) { + int vec_index = 12 * zi + 4 * zj; + zpv[zi][zj] = new double*[2]; + zpv[zi][zj][0] = (vec_zpv + vec_index); + zpv[zi][zj][1] = (vec_zpv + vec_index + 2); } - delete[] cmul; - delete[] cmullr; - for (int ti = 1; ti > -1; ti--) { - delete[] tqse[ti]; - delete[] tqss[ti]; - delete[] tqspe[ti]; - delete[] tqsps[ti]; + } +} + +SphereIterationData::~SphereIterationData() { + int lm = c1->li; + delete c1; + delete[] argi; + delete[] args; + delete[] gaps; + delete[] duk; + delete[] u; + delete[] us; + delete[] un; + delete[] uns; + delete[] up; + delete[] ups; + delete[] upmp; + delete[] upsmp; + delete[] unmp; + delete[] unsmp; + delete[] vec_cmul; + delete[] vec_cmullr; + delete[] cmul; + delete[] cmullr; + delete[] vec_tqspe; + delete[] vec_tqsps; + delete[] vec_tqse; + delete[] vec_tqss; + delete[] tqspe; + delete[] tqsps; + delete[] tqse; + delete[] tqss; + delete[] vec_zpv; + for (int zi = 0; zi < lm; zi++) { + for (int zj = 0; zj < 3; zj++) { + delete[] zpv[zi][zj]; } - delete[] tqse; - delete[] tqss; - delete[] tqspe; - delete[] tqsps; - logger->log("Finished: output written to " + output_path + "/c_OSPH.\n"); - } else { // NSPH mismatch between geometry and scatterer configurations. - throw UnrecognizedConfigurationException( - "Inconsistent geometry and scatterer configurations." - ); + delete[] zpv[zi]; } - delete sconf; - delete gconf; - delete logger; + delete[] zpv; +} + +#ifdef MPI_VERSION +SphereIterationData::SphereIterationData(const mixMPI *mpidata, const int device_count) { + // Buld child object + c1 = new ParticleDescriptorSphere(mpidata); + + // Collect the scalar values + MPI_Bcast(&_nsph, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); + MPI_Bcast(&_lm, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); + MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&s0, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); + lastxi = ((mpidata->rank+1) * xiblock)+1; + firstxi = lastxi-xiblock+1; + if (lastxi > number_of_scales) lastxi = number_of_scales; + + // Collect length-1 vectors + argi = new double[1]; + args = new double[1]; + MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Collect vectors whose size depend on NSPH + gaps = new double[_nsph]; + vec_tqspe = new dcomplex[2 * _nsph]; + vec_tqsps = new dcomplex[2 * _nsph]; + vec_tqse = new double[2 * _nsph]; + vec_tqss = new double[2 * _nsph]; + MPI_Bcast(gaps, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqspe, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqsps, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqse, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqss, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Collect length-3 vectors + duk = new double[3]; + u = new double[3]; + us = new double[3]; + un = new double[3]; + uns = new double[3]; + up = new double[3]; + ups = new double[3]; + upmp = new double[3]; + upsmp = new double[3]; + unmp = new double[3]; + unsmp = new double[3]; + MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Collect length-16 vectors + vec_cmul = new double[16]; + vec_cmullr = new double[16]; + MPI_Bcast(vec_cmul, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_cmullr, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Collect vectors whose size depend on LM + vec_zpv = new double[12 * _lm]; + MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD); +} + +int SphereIterationData::mpibcast(const mixMPI *mpidata) { + int result = 0; + // Broadcast child object + c1->mpibcast(mpidata); + + // Broadcast scalar values + MPI_Bcast(&_nsph, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); + MPI_Bcast(&_lm, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); + MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&s0, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // Broadcast length-1 vectors + MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Broadcast vectors whose size depend on NSPH + MPI_Bcast(gaps, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqspe, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqsps, 2 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqse, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tqss, 2 * _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Broadcast length-3 vectors + MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Broadcast length-16 vectors + MPI_Bcast(vec_cmul, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_cmullr, 16, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + // Broadcast vectors whose size depend on LM + MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + return 0; } +#endif // MPI_VERSION +// >>> END OF SphereIterationData CLASS IMPLEMENTATION <<<