Skip to content
Snippets Groups Projects
Commit 52f66298 authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

- update cluster to treat first wavelength iteration on the same footing as others

parent f2ec3daa
Branches
No related tags found
No related merge requests found
...@@ -288,53 +288,55 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -288,53 +288,55 @@ void cluster(const string& config_file, const string& data_file, const string& o
} }
// do the first iteration on jxi488 separately, since it seems to be different from the others // do the first iteration on jxi488 separately, since it seems to be different from the others
int jxi488 = 1; // not anymore, now this iteration is at the same level as others
int jxi488;
int initialmaxrefiters = cid->maxrefiters; int initialmaxrefiters = cid->maxrefiters;
chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now(); //chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
#ifdef USE_NVTX // #ifdef USE_NVTX
nvtxRangePush("First iteration"); // nvtxRangePush("First iteration");
#endif // #endif
// use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
int jer = 0; int jer = 0;
#pragma omp parallel // #pragma omp parallel
{ // {
#pragma omp single // #pragma omp single
{ // {
jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); // jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
} // OMP sinlge // } // OMP single
} // OMP parallel // } // OMP parallel
#ifdef USE_NVTX // #ifdef USE_NVTX
nvtxRangePop(); // nvtxRangePop();
#endif // #endif
chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now(); //chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
elapsed = start_iter_1 - t_start; //elapsed = start_iter_1 - t_start;
string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n"; // string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
logger->log(message); // logger->log(message);
time_logger->log(message); // time_logger->log(message);
elapsed = end_iter_1 - start_iter_1; // elapsed = end_iter_1 - start_iter_1;
message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n"; // message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
logger->log(message); // logger->log(message);
time_logger->log(message); // time_logger->log(message);
/* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */ /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
cid->refinemode = 0; // cid->refinemode = 0;
/* add an extra iteration for margin, if this does not exceed initialmaxrefiters */ /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
// if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++; // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
if (jer != 0) { // if (jer != 0) {
// First loop failed. Halt the calculation. // // First loop failed. Halt the calculation.
fclose(timing_file); // fclose(timing_file);
delete time_logger; // delete time_logger;
delete p_output; // delete p_output;
delete p_scattering_angles; // delete p_scattering_angles;
delete cid; // delete cid;
delete logger; // delete logger;
delete sconf; // delete sconf;
delete gconf; // delete gconf;
return; // return;
} // }
//================================================== //==================================================
// do the first outputs here, so that I open here the new files, afterwards I only append // do the first outputs here, so that I open here the new files, afterwards I only append
//================================================== //==================================================
// How should we handle this, when first iteration is not treated specially anymore? This should be ok, just write what was put in vtppoanp on initialisation, even if no actual calc was done yet. This creates the file nonetheless,
vtppoanp->write_to_disk(output_path + "/c_TPPOAN"); vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
delete vtppoanp; delete vtppoanp;
...@@ -425,7 +427,8 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -425,7 +427,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
} }
#pragma omp barrier #pragma omp barrier
// ok, now I can actually start the parallel calculations // ok, now I can actually start the parallel calculations
for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { // we now start the loop from 1, so that the first iteration is run exactly as the others
for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
// the parallel loop over MPI processes covers a different set of indices for each thread // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier #pragma omp barrier
int myjxi488 = ixi488+myompthread; int myjxi488 = ixi488+myompthread;
...@@ -473,6 +476,10 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -473,6 +476,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
// thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
// p_outarray[0]->append_to_disk(output_path + "/c_OCLU"); // p_outarray[0]->append_to_disk(output_path + "/c_OCLU");
// delete p_outarray[0]; // delete p_outarray[0];
// ******************************************************
// if this is the very first time, we should actually use
// ->write_to_disk, not ->append_to_disk
// ******************************************************
vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN"); vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
delete vtppoanarray[0]; delete vtppoanarray[0];
...@@ -604,7 +611,8 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -604,7 +611,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
#pragma omp barrier #pragma omp barrier
// ok, now I can actually start the parallel calculations // ok, now I can actually start the parallel calculations
for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { // we now start the loop from 1, so that the first iteration is treated exactly on equal footing as others
for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
// the parallel loop over MPI processes covers a different set of indices for each thread // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier #pragma omp barrier
int myjxi488 = ixi488 + myjxi488startoffset + myompthread; int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
...@@ -698,7 +706,8 @@ int cluster_jxi488_cycle( ...@@ -698,7 +706,8 @@ int cluster_jxi488_cycle(
int jer = 0; int jer = 0;
int lcalc = 0; int lcalc = 0;
int jaw = 1; int jaw = 1;
bool is_first_scale = (jxi488 == 1); // this is now part of cid, so don't mess with it here, just copy it by reference
bool &is_first_scale = cid->is_first_scale;
const int nsph = sconf->number_of_spheres; const int nsph = sconf->number_of_spheres;
const np_int mxndm = gconf->mxndm; const np_int mxndm = gconf->mxndm;
const int iavm = gconf->iavm; const int iavm = gconf->iavm;
...@@ -1563,6 +1572,8 @@ int cluster_jxi488_cycle( ...@@ -1563,6 +1572,8 @@ int cluster_jxi488_cycle(
} // jph484 loop } // jph484 loop
th += sa->thstp; th += sa->thstp;
} // jth486 loop } // jth486 loop
// at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes
is_first_scale = 0;
#ifdef USE_NVTX #ifdef USE_NVTX
nvtxRangePop(); nvtxRangePop();
#endif #endif
...@@ -1679,6 +1690,8 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter ...@@ -1679,6 +1690,8 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter
proc_device = 0; proc_device = 0;
#endif #endif
// the first time the object is used, it will be a "first iteration", to ensure data structures are properly initialised.
is_first_scale = 1;
// In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations) // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
refinemode = 2; refinemode = 2;
// maxrefiters and accuracygoal should be configurable and preferably set somewhere else // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
...@@ -1827,6 +1840,7 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { ...@@ -1827,6 +1840,7 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
number_of_scales = rhs.number_of_scales; number_of_scales = rhs.number_of_scales;
proc_device = rhs.proc_device; proc_device = rhs.proc_device;
is_first_scale = rhs.is_first_scale;
refinemode = rhs.refinemode; refinemode = rhs.refinemode;
maxrefiters = rhs.maxrefiters; maxrefiters = rhs.maxrefiters;
accuracygoal = rhs.accuracygoal; accuracygoal = rhs.accuracygoal;
...@@ -1964,6 +1978,7 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int devi ...@@ -1964,6 +1978,7 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int devi
proc_device = 0; proc_device = 0;
#endif #endif
MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
} }
...@@ -2035,6 +2050,7 @@ void ClusterIterationData::mpibcast(const mixMPI *mpidata) { ...@@ -2035,6 +2050,7 @@ void ClusterIterationData::mpibcast(const mixMPI *mpidata) {
MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
} }
......
...@@ -135,6 +135,8 @@ public: ...@@ -135,6 +135,8 @@ public:
int proc_device; int proc_device;
//! \brief Refinement mode selction flag. //! \brief Refinement mode selction flag.
int refinemode; int refinemode;
//! \brief flag defining a first iteration
bool is_first_scale;
//! \brief Maximum number of refinement iterations. //! \brief Maximum number of refinement iterations.
int maxrefiters; int maxrefiters;
//! \brief Required accuracy level. //! \brief Required accuracy level.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment