diff --git a/src/trapping/cfrfme.cpp b/src/trapping/cfrfme.cpp index 9415909a6e06d7f7481152cdf738346cd961871e..f7ccbaff69065bc3f1512d9c85bb99b0dbd201bb 100644 --- a/src/trapping/cfrfme.cpp +++ b/src/trapping/cfrfme.cpp @@ -105,6 +105,9 @@ void frfme(string data_file, string output_path) { int wsum_size; // End of vector size variables if (jlmf != 1) { +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf != 1"); +#endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { @@ -147,7 +150,16 @@ void frfme(string data_file, string output_path) { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; - } else { // label 16 +#ifdef USE_NVTX + nvtxRangePop(); +#endif + } else { // label 16; jlfm = 1 +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf == 1"); +#endif +#ifdef USE_NVTX + nvtxRangePush("Setup operations"); +#endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { @@ -183,6 +195,9 @@ void frfme(string data_file, string output_path) { } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { @@ -200,6 +215,9 @@ void frfme(string data_file, string output_path) { string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TEDF data import"); +#endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); @@ -223,6 +241,9 @@ void frfme(string data_file, string output_path) { xi = xip; } // label 20 +#ifdef USE_NVTX + nvtxRangePop(); +#endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; @@ -243,6 +264,9 @@ void frfme(string data_file, string output_path) { fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 +#ifdef USE_NVTX + nvtxRangePush("Memory data loading"); +#endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; @@ -286,6 +310,12 @@ void frfme(string data_file, string output_path) { double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); +#ifdef USE_NVTX + nvtxRangePop(); +#endif +#ifdef USE_NVTX + nvtxRangePush("Looped vector initialization"); +#endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; @@ -304,7 +334,13 @@ void frfme(string data_file, string output_path) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (tfrfme != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TFRFME initialization"); +#endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); @@ -336,6 +372,12 @@ void frfme(string data_file, string output_path) { tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif +#ifdef USE_NVTX + nvtxRangePush("j80 loop"); +#endif for (int j80 = jlmf; j80 <= jlml; j80++) { dcomplex *tt1_wk = tt1->get_vector(); int wk_index = 0; @@ -384,7 +426,13 @@ void frfme(string data_file, string output_path) { } // iy70 loop } // iz75 loop } // j80 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif // label 88 +#ifdef USE_NVTX + nvtxRangePush("Closing operations"); +#endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); @@ -393,6 +441,9 @@ void frfme(string data_file, string output_path) { if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); +#ifdef USE_NVTX + nvtxRangePop(); +#endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } @@ -405,8 +456,14 @@ void frfme(string data_file, string output_path) { fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } +#ifdef USE_NVTX + nvtxRangePop(); +#endif } // label 45 +#ifdef USE_NVTX + nvtxRangePush("frfme() memory clean"); +#endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; @@ -416,6 +473,9 @@ void frfme(string data_file, string output_path) { } if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; +#ifdef USE_NVTX + nvtxRangePop(); +#endif printf("FRFME: Done.\n"); #ifdef USE_NVTX nvtxRangePop(); diff --git a/src/trapping/clffft.cpp b/src/trapping/clffft.cpp index 46828b2ddfb1d5de61249f6a570fa704dfc1f9e5..4d66d8bad883686c89c169c7afb90641735f1729 100644 --- a/src/trapping/clffft.cpp +++ b/src/trapping/clffft.cpp @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif +#ifdef USE_NVTX +#include <nvtx3/nvToolsExt.h> +#endif + using namespace std; /*! \brief C++ implementation of LFFFT @@ -64,6 +68,9 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void lffft(string data_file, string output_path) { +#ifdef USE_NVTX + nvtxRangePush("Running lffft()"); +#endif const dcomplex uim = 0.0 + 1.0 * I; const double sq2i = 1.0 / sqrt(2.0); const dcomplex sq2iti = sq2i * uim; @@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) { delete ccr; delete[] file_lines; printf("LFFT: Done.\n"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif }