diff --git a/src/include/IterationData.h b/src/include/IterationData.h index 6ab981db1616318f194dfb3d614c3a9efca4d265..a8d82dbf9e2d5a02d583fc94cf3f5f80442437dd 100644 --- a/src/include/IterationData.h +++ b/src/include/IterationData.h @@ -313,6 +313,8 @@ public: int proc_device; //! \brief Refinement mode selction flag. int refinemode; + //! \brief flag defining a first iteration + bool is_first_scale; //! \brief Maximum number of refinement iterations. int maxrefiters; //! \brief Required accuracy level. @@ -484,6 +486,8 @@ public: double **tqss; //! \brief Scattering coefficients tensor. double ****zpv; + //! \brief flag for first time initialisation + bool is_first_scale; /*! \brief `SphereIterationData` default instance constructor. * diff --git a/src/inclusion/inclusion.cpp b/src/inclusion/inclusion.cpp index 7d6d4ae38b3403862747b80dc9ac3e310e05d101..b70306c044b424f3d719d8df501b98f605964d44 100644 --- a/src/inclusion/inclusion.cpp +++ b/src/inclusion/inclusion.cpp @@ -278,50 +278,50 @@ void inclusion(const string& config_file, const string& data_file, const string& } // do the first iteration on jxi488 separately, since it seems to be different from the others - int jxi488 = 1; + int jxi488; int initialmaxrefiters = cid->maxrefiters; - chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now(); -#ifdef USE_NVTX - nvtxRangePush("First iteration"); -#endif +// chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now(); +// #ifdef USE_NVTX +// nvtxRangePush("First iteration"); +// #endif // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration int jer = 0; -#pragma omp parallel - { -#pragma omp single - { - jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); - } - } -#ifdef USE_NVTX - nvtxRangePop(); -#endif - chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now(); - elapsed = start_iter_1 - t_start; - string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n"; - logger->log(message); - time_logger->log(message); - elapsed = end_iter_1 - start_iter_1; - message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n"; - logger->log(message); - time_logger->log(message); +// #pragma omp parallel +// { +// #pragma omp single +// { +// jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); +// } +// } +// #ifdef USE_NVTX +// nvtxRangePop(); +// #endif +// chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now(); +// elapsed = start_iter_1 - t_start; +// string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n"; +// logger->log(message); +// time_logger->log(message); +// elapsed = end_iter_1 - start_iter_1; +// message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n"; +// logger->log(message); +// time_logger->log(message); /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */ - cid->refinemode = 0; - /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */ - // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++; - if (jer != 0) { - // First loop failed. Halt the calculation. - fclose(timing_file); - delete time_logger; - delete p_output; - delete p_scattering_angles; - delete cid; - delete logger; - delete sconf; - delete gconf; - return; - } + // cid->refinemode = 0; + // /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */ + // // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++; + // if (jer != 0) { + // // First loop failed. Halt the calculation. + // fclose(timing_file); + // delete time_logger; + // delete p_output; + // delete p_scattering_angles; + // delete cid; + // delete logger; + // delete sconf; + // delete gconf; + // return; + // } //================================================== // do the first outputs here, so that I open here the new files, afterwards I only append @@ -415,7 +415,7 @@ void inclusion(const string& config_file, const string& data_file, const string& } #pragma omp barrier // ok, now I can actually start the parallel calculations - for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { + for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488+myompthread; @@ -595,7 +595,7 @@ void inclusion(const string& config_file, const string& data_file, const string& // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads #pragma omp barrier // ok, now I can actually start the parallel calculations - for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { + for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488 + myjxi488startoffset + myompthread; @@ -685,7 +685,9 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo int jer = 0; int lcalc = 0; int jaw = 1; - bool is_first_scale = (jxi488 == 1); + // this is now part of cid, so don't mess with it here, just copy it by reference + bool &is_first_scale = cid->is_first_scale; + //bool is_first_scale = (jxi488 == 1); // int li = cid->c1->li; // int le = cid->c1->le; // int lm = cid->c1->lm; @@ -1414,6 +1416,8 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo } // jph484 loop th += sa->thstp; } // jth486 loop + // at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes + is_first_scale = 0; #ifdef USE_NVTX nvtxRangePop(); #endif @@ -1534,6 +1538,8 @@ InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, Sca proc_device = 0; #endif + // the first time the object is used, it will be a "first iteration", to ensure data structures are properly initialised. + is_first_scale = 1; // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations) refinemode = 2; // maxrefiters and accuracygoal should be configurable and preferably set somewhere else @@ -1685,6 +1691,7 @@ InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs extr = rhs.extr; proc_device = rhs.proc_device; + is_first_scale = rhs.is_first_scale; refinemode = rhs.refinemode; maxrefiters = rhs.maxrefiters; accuracygoal = rhs.accuracygoal; @@ -1823,6 +1830,7 @@ InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int proc_device = 0; #endif MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } @@ -1888,6 +1896,7 @@ void InclusionIterationData::mpibcast(const mixMPI *mpidata) { MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } diff --git a/src/sphere/sphere.cpp b/src/sphere/sphere.cpp index d123b0ff3927e89da061c1c946190575d19afa9c..7d26091f21762d4f1edf166e1a65cf6dd7c92163 100644 --- a/src/sphere/sphere.cpp +++ b/src/sphere/sphere.cpp @@ -201,25 +201,25 @@ void sphere(const string& config_file, const string& data_file, const string& ou } // Do the first wavelength iteration - int jxi488 = 1; + // int jxi488 = 1; // Use pragmas to put OMP parallelism to second level. int jer = 0; -#pragma omp parallel - { -#pragma omp single - { - jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp); - } // OMP single - } // OMP parallel - if (jer != 0) { // First iteration failed. Halt the calculation. - delete p_output; - delete p_sa; - delete sid; - delete logger; - delete sconf; - delete gconf; - return; - } +// #pragma omp parallel +// { +// #pragma omp single +// { +// jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp); +// } // OMP single +// } // OMP parallel +// if (jer != 0) { // First iteration failed. Halt the calculation. +// delete p_output; +// delete p_sa; +// delete sid; +// delete logger; +// delete sconf; +// delete gconf; +// return; +// } //================================================== // do the first outputs here, so that I open here the new files, afterwards I only append @@ -310,7 +310,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou } #pragma omp barrier // ok, now I can actually start the parallel calculations - for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { + for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488+myompthread; @@ -463,7 +463,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads #pragma omp barrier // ok, now I can actually start the parallel calculations - for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { + for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) { // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488 + myjxi488startoffset + myompthread; @@ -546,7 +546,9 @@ int sphere_jxi488_cycle( int oindex = 0; int jxi = jxi488 + 1; int jxindex = jxi - oi->first_xi; - bool is_first_scale = (jxi == 1); + // this is now part of sid, so don't mess with it here, just copy it by reference + bool &is_first_scale = sid->is_first_scale; + //bool is_first_scale = (jxi == 1); int nsph = gconf->number_of_spheres; //int l_max = gconf->l_max; int in_pol = gconf->in_pol; @@ -889,6 +891,8 @@ int sphere_jxi488_cycle( } // jph484 loop on elevation th += sa->thstp; } // jth486 loop on azimuth + // at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes + is_first_scale = 0; oi->vec_jxi[jxindex] = jxi; logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n"); delete logger; @@ -977,6 +981,7 @@ SphereIterationData::SphereIterationData( zpv[zi][zj][1] = (vec_zpv + vec_index + 2); } } + is_first_scale = 1; } SphereIterationData::SphereIterationData(const SphereIterationData &rhs) { @@ -1082,6 +1087,7 @@ SphereIterationData::SphereIterationData(const SphereIterationData &rhs) { zpv[zi][zj][1] = (vec_zpv + vec_index + 2); } } + is_first_scale = rhs.is_first_scale; } SphereIterationData::~SphereIterationData() { @@ -1207,6 +1213,9 @@ SphereIterationData::SphereIterationData(const mixMPI *mpidata, const int device // Collect vectors whose size depend on LM vec_zpv = new double[12 * _lm]; MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + } int SphereIterationData::mpibcast(const mixMPI *mpidata) { @@ -1269,7 +1278,9 @@ int SphereIterationData::mpibcast(const mixMPI *mpidata) { // Broadcast vectors whose size depend on LM MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD); - + + MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + return 0; } #endif // MPI_VERSION