diff --git a/build/configure.sh b/build/configure.sh index 191dd7c2f91d75422b578361f9923c162fec5e67..161ddb933460e7f32952856381a652c2a5c86c25 100755 --- a/build/configure.sh +++ b/build/configure.sh @@ -826,7 +826,7 @@ else fi # End of offload checks if [ "x$CXXFLAGS" = "x" ]; then - CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}" + CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}${NVTXFLAGS}" fi if [ "x$CXXLDFLAGS" = "x" ]; then if [ "x$LIBMODE" = "xstatic" ]; then diff --git a/src/include/tfrfme.h b/src/include/tfrfme.h index be6ac56252d6f7b63241af2b8a60119fe0b00cbf..ded5377a19f0a4879e57e3733601648b4db12913 100644 --- a/src/include/tfrfme.h +++ b/src/include/tfrfme.h @@ -63,6 +63,9 @@ protected: void write_legacy(const std::string& file_name); public: + //! \brief Read only view on WK. + const dcomplex *vec_wk; + /*! \brief Swap1 instance constructor. * * \param lm: `int` Maximum field expansion order. @@ -97,12 +100,6 @@ public: */ static long get_memory_requirement(int lm, int _nkv); - /*! \brief Get the pointer to the WK vector. - * - * \return value: `complex double *` Memory address of the WK vector. - */ - dcomplex *get_vector() { return wk; } - /*! \brief Bring the pointer to the next element at the start of vector. */ void reset() { last_index = 0; } diff --git a/src/libnptm/tfrfme.cpp b/src/libnptm/tfrfme.cpp index df999983f83f4b1e855fa23b9990c2500998cf33..2b81215780af3610fd3c535a49326a5663780823 100644 --- a/src/libnptm/tfrfme.cpp +++ b/src/libnptm/tfrfme.cpp @@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) { nlmmt = 2 * lm * (lm + 2); const int size = nkv * nkv * nlmmt; wk = new dcomplex[size](); + vec_wk = wk; last_index = 0; } @@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { string str_type; int _nlmmt, _nkv, lm, num_elements, index; dcomplex value; - dcomplex *_wk = NULL; if (status == 0) { status = hdf_file->read("NLMMT", "INT32", &_nlmmt); status = hdf_file->read("NKV", "INT32", &_nkv); - lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); + lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; num_elements = 2 * _nlmmt * _nkv * _nkv; instance = new Swap1(lm, _nkv); - _wk = instance->get_vector(); elements = new double[num_elements](); str_type = "FLOAT64_(" + to_string(num_elements) + ")"; status = hdf_file->read("WK", str_type, elements); for (int wi = 0; wi < num_elements / 2; wi++) { index = 2 * wi; value = elements[index] + elements[index + 1] * I; - _wk[wi] = value; + instance->wk[wi] = value; } // wi loop delete[] elements; status = hdf_file->close(); @@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { Swap1* Swap1::from_legacy(const std::string& file_name) { fstream input; Swap1 *instance = NULL; - dcomplex *_wk = NULL; int _nlmmt, _nkv, lm; double rval, ival; input.open(file_name.c_str(), ios::in | ios::binary); if (input.is_open()) { input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int)); - lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); + lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; input.read(reinterpret_cast<char *>(&_nkv), sizeof(int)); instance = new Swap1(lm, _nkv); - _wk = instance->get_vector(); int num_elements = _nlmmt * _nkv * _nkv; for (int j = 0; j < num_elements; j++) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); - _wk[j] = rval + ival * I; + instance->wk[j] = rval + ival * I; } input.close(); } else { diff --git a/src/libnptm/tra_subs.cpp b/src/libnptm/tra_subs.cpp index 8d04f4479c86d72407a7f3b836864425e3d0abdd..59496de9b3d642b013c36d394db50e5989ce9017 100644 --- a/src/libnptm/tra_subs.cpp +++ b/src/libnptm/tra_subs.cpp @@ -269,9 +269,9 @@ void ffrt(dcomplex *ac, dcomplex *ws, double *ffte, double *ffts, CIL *cil) { } dcomplex *frfmer( - int nkv, double vkm, double vknmx, double apfafa, double tra, - double spd, double rir, double ftcn, int le, int lmode, double pmf, - Swap1 *tt1, Swap2 *tt2 + int nkv, double vkm, double vknmx, double apfafa, double tra, + double spd, double rir, double ftcn, int le, int lmode, double pmf, + Swap1 *tt1, Swap2 *tt2 ) { const int nlemt = le * (le + 2) * 2; const dcomplex cc0 = 0.0 + 0.0 * I; diff --git a/src/sphere/sphere.cpp b/src/sphere/sphere.cpp index 288c259f4aee6b48efe3b6980af1e87f01e6dbd8..d9a1fd3449ae4fe4153619f63301f2ee5f43384c 100644 --- a/src/sphere/sphere.cpp +++ b/src/sphere/sphere.cpp @@ -576,6 +576,12 @@ int sphere_jxi488_cycle( oi->vec_vk[jxindex] = vk; oi->vec_xi[jxindex] = xi; } + // Adaptive definition of L_MAX + double wavelength = 2.0 * pi / vk; + double size_param = 2.0 * pi * sconf->get_radius(0) / wavelength; + int N = int(size_param + 4.05 * pow(size_param, 1.0 / 3.0)) + 2; + if (N < l_max) l_max = N; + // End of adaptive definition of L_MAX vtppoanp->append_line(VirtualBinaryLine(vk)); double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0; for (int i132 = 0; i132 < nsph; i132++) { diff --git a/src/trapping/cfrfme.cpp b/src/trapping/cfrfme.cpp index d2c19f3cec6abf5358e09d9f91975d0739019bd4..5901be17f7a617910bc6b3e4413a5bc8eaa879cb 100644 --- a/src/trapping/cfrfme.cpp +++ b/src/trapping/cfrfme.cpp @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif +#ifdef USE_NVTX +#include <nvtx3/nvToolsExt.h> +#endif + using namespace std; /*! \brief C++ implementation of FRFME @@ -64,13 +68,15 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void frfme(string data_file, string output_path) { +#ifdef USE_NVTX + nvtxRangePush("Running frfme()"); +#endif string tfrfme_name = output_path + "/c_TFRFME.hd5"; TFRFME *tfrfme = NULL; Swap1 *tt1 = NULL; Swap2 *tt2 = NULL; char namef[7]; char more; - dcomplex **w = NULL; dcomplex *wk = NULL; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; @@ -98,6 +104,9 @@ void frfme(string data_file, string output_path) { int wsum_size; // End of vector size variables if (jlmf != 1) { +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf != 1"); +#endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { @@ -140,7 +149,16 @@ void frfme(string data_file, string output_path) { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; - } else { // label 16 +#ifdef USE_NVTX + nvtxRangePop(); +#endif + } else { // label 16; jlfm = 1 +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf == 1"); +#endif +#ifdef USE_NVTX + nvtxRangePush("Setup operations"); +#endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { @@ -176,6 +194,9 @@ void frfme(string data_file, string output_path) { } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { @@ -193,6 +214,9 @@ void frfme(string data_file, string output_path) { string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TEDF data import"); +#endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); @@ -216,6 +240,9 @@ void frfme(string data_file, string output_path) { xi = xip; } // label 20 +#ifdef USE_NVTX + nvtxRangePop(); +#endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; @@ -236,6 +263,9 @@ void frfme(string data_file, string output_path) { fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 +#ifdef USE_NVTX + nvtxRangePush("Memory data loading"); +#endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; @@ -279,6 +309,12 @@ void frfme(string data_file, string output_path) { double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); +#ifdef USE_NVTX + nvtxRangePop(); +#endif +#ifdef USE_NVTX + nvtxRangePush("Looped vector initialization"); +#endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; @@ -297,7 +333,13 @@ void frfme(string data_file, string output_path) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (tfrfme != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TFRFME initialization"); +#endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); @@ -329,19 +371,20 @@ void frfme(string data_file, string output_path) { tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif + dcomplex *vec_w = new dcomplex[nkv * nkv](); + dcomplex **w = new dcomplex*[nkv]; + for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; +#ifdef USE_NVTX + nvtxRangePush("j80 loop"); +#endif for (int j80 = jlmf; j80 <= jlml; j80++) { - dcomplex *tt1_wk = tt1->get_vector(); int wk_index = 0; - // w matrix - if (w != NULL) { - for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; - delete[] w; - } - w = new dcomplex*[nkv]; - for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv](); for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jx50 = 0; jx50 < nkv; jx50++) { - for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++]; + for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++]; w[jx50][jy50] = wk[j80 - 1]; } // jx50 } // jy50 loop @@ -377,7 +420,15 @@ void frfme(string data_file, string output_path) { } // iy70 loop } // iz75 loop } // j80 loop + delete[] vec_w; + delete[] w; +#ifdef USE_NVTX + nvtxRangePop(); +#endif // label 88 +#ifdef USE_NVTX + nvtxRangePush("Closing operations"); +#endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); @@ -386,6 +437,9 @@ void frfme(string data_file, string output_path) { if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); +#ifdef USE_NVTX + nvtxRangePop(); +#endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } @@ -398,16 +452,24 @@ void frfme(string data_file, string output_path) { fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } +#ifdef USE_NVTX + nvtxRangePop(); +#endif } // label 45 +#ifdef USE_NVTX + nvtxRangePush("frfme() memory clean"); +#endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; - if (w != NULL) { - for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; - delete[] w; - } if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; +#ifdef USE_NVTX + nvtxRangePop(); +#endif printf("FRFME: Done.\n"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif } diff --git a/src/trapping/clffft.cpp b/src/trapping/clffft.cpp index 46828b2ddfb1d5de61249f6a570fa704dfc1f9e5..4d66d8bad883686c89c169c7afb90641735f1729 100644 --- a/src/trapping/clffft.cpp +++ b/src/trapping/clffft.cpp @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif +#ifdef USE_NVTX +#include <nvtx3/nvToolsExt.h> +#endif + using namespace std; /*! \brief C++ implementation of LFFFT @@ -64,6 +68,9 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void lffft(string data_file, string output_path) { +#ifdef USE_NVTX + nvtxRangePush("Running lffft()"); +#endif const dcomplex uim = 0.0 + 1.0 * I; const double sq2i = 1.0 / sqrt(2.0); const dcomplex sq2iti = sq2i * uim; @@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) { delete ccr; delete[] file_lines; printf("LFFT: Done.\n"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif }