/* Copyright (C) 2024 INAF - Osservatorio Astronomico di Cagliari This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. A copy of the GNU General Public License is distributed along with this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>. */ /*! \file cfrfme.cpp * * C++ implementation of FRFME functions. */ #include <cstdio> #include <exception> #include <fstream> #include <regex> #include <string> #ifndef INCLUDE_TYPES_H_ #include "../include/types.h" #endif #ifndef INCLUDE_PARSERS_H_ #include "../include/Parsers.h" #endif #ifndef INCLUDE_LOGGING_H_ #include "../include/logging.h" #endif #ifndef INCLUDE_CONFIGURATION_H_ #include "../include/Configuration.h" #endif #ifndef INCLUDE_COMMONS_H_ #include "../include/Commons.h" #endif #ifndef INCLUDE_SPH_SUBS_H_ #include "../include/sph_subs.h" #endif #ifndef INCLUDE_TFRFME_H_ #include "../include/tfrfme.h" #endif #ifndef INCLUDE_TRA_SUBS_H_ #include "../include/tra_subs.h" #endif #ifdef USE_NVTX #include <nvtx3/nvToolsExt.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif using namespace std; /*! \brief C++ implementation of FRFME * * \param data_file: `string` Name of the input data file. * \param output_path: `string` Directory to write the output files in. */ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePush("Running frfme()"); #endif string tfrfme_name = output_path + "/c_TFRFME.hd5"; TFRFME *tfrfme = NULL; Swap1 *tt1 = NULL; Swap2 *tt2 = NULL; char namef[7]; char more; dcomplex *wk = NULL; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; int line_count = 0, last_read_line = 0; regex re = regex("-?[0-9]+"); string *file_lines = load_file(data_file, &line_count); smatch m; string str_target = file_lines[last_read_line++]; regex_search(str_target, m, re); int jlmf = stoi(m.str()); str_target = m.suffix().str(); regex_search(str_target, m, re); int jlml = stoi(m.str()); int lmode = 0, lm = 0, nks = 0, nkv = 0; double vk = 0.0, exri = 0.0, an = 0.0, ff = 0.0, tra = 0.0; double exdc = 0.0, wp = 0.0, xip = 0.0, xi = 0.0; int idfc = 0, nxi = 0; double apfafa = 0.0, pmf = 0.0, spd = 0.0, rir = 0.0, ftcn = 0.0, fshmx = 0.0; double vxyzmx = 0.0, delxyz = 0.0, vknmx = 0.0, delk = 0.0, delks = 0.0; double frsh = 0.0, exril = 0.0; double **vkzm = NULL; double *vkv = NULL; int nlmmt = 0, nrvc = 0; // Vector size variables int wsum_size; // End of vector size variables if (jlmf != 1) { #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf != 1"); #endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { lmode = (int)tfrfme->get_param("lmode"); lm = (int)tfrfme->get_param("lm"); nkv = (int)tfrfme->get_param("nkv"); nxv = (int)tfrfme->get_param("nxv"); nyv = (int)tfrfme->get_param("nyv"); nzv = (int)tfrfme->get_param("nzv"); vk = tfrfme->get_param("vk"); exri = tfrfme->get_param("exri"); an = tfrfme->get_param("an"); ff = tfrfme->get_param("ff"); tra = tfrfme->get_param("tra"); spd = tfrfme->get_param("spd"); frsh = tfrfme->get_param("frsh"); exril = tfrfme->get_param("exril"); string tempname2 = output_path + "/c_TEMPTAPE2.hd5"; if (tt2 == NULL) tt2 = Swap2::from_binary(tempname2, "HDF5"); if (tt2 != NULL) { vkv = tt2->get_vector(); vkzm = tt2->get_matrix(); apfafa = tt2->apfafa; pmf = tt2->pmf; spd = tt2->spd; rir = tt2->rir; ftcn = tt2->ftcn; fshmx = tt2->fshmx; vxyzmx = tt2->vxyzmx; delxyz = tt2->delxyz; vknmx = tt2->vknmx; delk = tt2->delk; delks = tt2->delks; nlmmt = tt2->nlmmt; nrvc = tt2->nrvc; } else { printf("ERROR: could not open TEMPTAPE2 file.\n"); } } else { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; #ifdef USE_NVTX nvtxRangePop(); #endif } else { // label 16; jlfm = 1 #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf == 1"); #endif #ifdef USE_NVTX nvtxRangePush("Setup operations"); #endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { regex_search(str_target, m, re); if (cli == 0) lmode = stoi(m.str()); else if (cli == 1) lm = stoi(m.str()); else if (cli == 2) nksh = stoi(m.str()); else if (cli == 3) nrsh = stoi(m.str()); else if (cli == 4) nxsh = stoi(m.str()); else if (cli == 5) nysh = stoi(m.str()); else if (cli == 6) nzsh = stoi(m.str()); str_target = m.suffix().str(); } re = regex("-?[0-9]\\.[0-9]+([dDeE][-+]?[0-9]+)?"); regex_search(str_target, m, re); double wlenfr = stod(m.str()); str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 3; cli++) { regex_search(str_target, m, re); if (cli == 0) an = stod(m.str()); else if (cli == 1) ff = stod(m.str()); else if (cli == 2) tra = stod(m.str()); str_target = m.suffix().str(); } double spdfr, exdcl; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 3; cli++) { regex_search(str_target, m, re); if (cli == 0) spd = stod(m.str()); else if (cli == 1) spdfr = stod(m.str()); else if (cli == 2) exdcl = stod(m.str()); str_target = m.suffix().str(); } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); #ifdef USE_NVTX nvtxRangePop(); #endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { more = 'M'; sprintf(namef, "c_TMDF"); } else if (more == 'e' || more == 'E') { more = 'E'; sprintf(namef, "c_TEDF"); } str_target = m.suffix().str(); re = regex("[0-9]+"); regex_search(str_target, m, re); int ixi = stoi(m.str()); string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { #ifdef USE_NVTX nvtxRangePush("TEDF data import"); #endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); exdc = tedf->exdc; wp = tedf->wp; xip = tedf->xip; idfc = tedf->idfc; nxi = tedf->number_of_scales; if (idfc >= 0) { if (ixi <= nxi) { xi = tedf->get_scale(ixi - 1); } else { // label 96 delete tedf; // label 98 string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } } else { // label 18 xi = xip; } // label 20 #ifdef USE_NVTX nvtxRangePop(); #endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; exri = sqrt(exdc); frsh = 0.0; exril = 0.0; fshmx = 0.0; apfafa = exri / (an * ff); if (lmode != 0) pmf = 2.0 * apfafa; if (spd > 0.0) { exril = sqrt(exdcl); rir = exri / exril; ftcn = 2.0 / (1.0 + rir); frsh = -spd * spdfr; double sthmx = an / exri; double sthlmx = sthmx * rir; double uy = 1.0; fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 #ifdef USE_NVTX nvtxRangePush("Memory data loading"); #endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; // Array initialization long swap1_size, swap2_size, tfrfme_size; double size_mb; printf("INFO: calculating memory requirements\n"); swap1_size = Swap1::get_size(lm, nkv); size_mb = 1.0 * swap1_size / 1024.0 / 1024.0; printf("Swap 1: %.2lg MB\n", size_mb); swap2_size = Swap2::get_size(nkv); size_mb = 1.0 * swap2_size / 1024.0 / 1024.0; printf("Swap 2: %.2lg MB\n", size_mb); tt2 = new Swap2(nkv); vkv = tt2->get_vector(); vkzm = tt2->get_matrix(); // End of array initialization double vkm = vk * exri; vknmx = vk * an; delk = vknmx / nksh; delks = delk / vkm; delks = delks * delks; vxyzmx = acos(0.0) * 4.0 / vkm * wlenfr; delxyz = vxyzmx / nrsh; int nxs = nxsh * 2; int nxv = nxs + 1; int nxshpo = nxsh + 1; int nys = nysh * 2; int nyv = nys + 1; int nyshpo = nysh + 1; int nzs = nzsh * 2; int nzv = nzs + 1; int nzshpo = nzsh + 1; tfrfme_size = TFRFME::get_memory_requirement(lmode, lm, nkv, nxv, nyv, nzv); size_mb = 1.0 * tfrfme_size / 1024.0 / 1024.0; printf("TFRFME: %.2lg MB\n", size_mb); size_mb = 1.0 * (swap1_size + swap2_size + tfrfme_size) / 1024.0 / 1024.0; printf("TOTAL: %.2lg MB\n", size_mb); tfrfme = new TFRFME(lmode, lm, nkv, nxv, nyv, nzv); double *_xv = tfrfme->get_x(); double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("Looped vector initialization"); #endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; } // i24 loop for (int i25 = nyshpo; i25 <= nys; i25++) { _yv[i25] = _yv[i25 - 1] + delxyz; _yv[nyv - i25 - 1] = -_yv[i25]; } // i25 loop for (int i27 = nzshpo; i27 <= nzs; i27++) { _zv[i27] = _zv[i27 - 1] + delxyz; _zv[nzv - i27 - 1] = -_zv[i27]; } // i27 loop int nrvc = nxv * nyv * nzv; int nkshpo = nksh + 1; for (int i28 = nkshpo; i28 <= nks; i28++) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop #ifdef USE_NVTX nvtxRangePop(); #endif if (tfrfme != NULL) { #ifdef USE_NVTX nvtxRangePush("TFRFME initialization"); #endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); tfrfme->set_param("ff", ff); tfrfme->set_param("tra", tra); tfrfme->set_param("spd", spd); tfrfme->set_param("frsh", frsh); tfrfme->set_param("exril", exril); //fstream temptape1, temptape2; string temp_name1 = output_path + "/c_TEMPTAPE1.hd5"; string temp_name2 = output_path + "/c_TEMPTAPE2.hd5"; //temptape1.open(temp_name1.c_str(), ios::out | ios::binary); tt1 = new Swap1(lm, nkv); if (wk != NULL) delete[] wk; wk = frfmer(nkv, vkm, vknmx, apfafa, tra, spd, rir, ftcn, lm, lmode, pmf, tt1, tt2); tt1->write_binary(temp_name1, "HDF5"); //delete tt1; tt2->set_param("apfafa", apfafa); tt2->set_param("pmf", pmf); tt2->set_param("spd", spd); tt2->set_param("rir", rir); tt2->set_param("ftcn", ftcn); tt2->set_param("fshmx", fshmx); tt2->set_param("vxyzmx", vxyzmx); tt2->set_param("delxyz", delxyz); tt2->set_param("vknmx", vknmx); tt2->set_param("delk", delk); tt2->set_param("delks", delks); tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("j80 loop"); #endif #pragma omp target parallel for for (int j80 = jlmf; j80 <= jlml; j80++) { // dcomplex *vec_w = new dcomplex[nkv * nkv](); // dcomplex **w = new dcomplex*[nkv]; // dcomplex *wk_local = new dcomplex[nlmmt](); dcomplex *vec_w = (dcomplex *) calloc(nkv * nkv, sizeof(dcomplex)); dcomplex **w = (dcomplex **) calloc(nkv, sizeof(dcomplex *)); dcomplex *wk_local = (dcomplex *) calloc(nlmmt, sizeof(dcomplex)); for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; int wk_index = 0; for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jx50 = 0; jx50 < nkv; jx50++) { for (int wi = 0; wi < nlmmt; wi++) wk_local[wi] = tt1->wk[wk_index++]; w[jx50][jy50] = wk_local[j80 - 1]; } // jx50 } // jy50 loop int ixyz = 0; for (int wj = 0; wj < nrvc; wj++) _wsum[j80 - 1][wj] = cc0; for (int iz75 = 0; iz75 < nzv; iz75++) { double z = _zv[iz75] + frsh; for (int iy70 = 0; iy70 < nyv; iy70++) { double y = _yv[iy70]; for (int ix65 = 0; ix65 < nxv; ix65++) { double x = _xv[ix65]; ixyz++; dcomplex sumy = cc0; for (int jy60 = 0; jy60 < nkv; jy60++) { double vky = vkv[jy60]; double vkx = vkv[nkv - 1]; double vkzf = vkzm[0][jy60]; dcomplex phasf = cexp(uim * (-vkx * x + vky * y + vkzf * z)); double vkzl = vkzm[nkv - 1][jy60]; dcomplex phasl = cexp(uim * (vkx * x + vky * y + vkzl * z)); dcomplex sumx = 0.5 * (w[0][jy60] * phasf + w[nkv - 1][jy60] * phasl); for (int jx55 = 2; jx55 <= nks; jx55++) { vkx = vkv[jx55 - 1]; double vkz = vkzm[jx55 - 1][jy60]; dcomplex phas = cexp(uim * (vkx * x + vky * y + vkz * z)); sumx += (w[jx55 - 1][jy60] * phas); } // jx55 loop if (jy60 == 0 || jy60 == nkv - 1) sumx *= 0.5; sumy += sumx; } // jy60 loop _wsum[j80 - 1][ixyz - 1] = sumy * delks; } // ix65 loop } // iy70 loop } // iz75 loop // delete[] vec_w; // delete[] w; // delete[] wk_local; free(vec_w); free(w); free(wk_local); } // j80 loop #ifdef USE_NVTX nvtxRangePop(); #endif // label 88 #ifdef USE_NVTX nvtxRangePush("Closing operations"); #endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); fprintf(output, " IF JLML < NLMMT, PRESERVE TEMPTAPE1, TEMPTAPE2, AND TFRFRME,\n"); fprintf(output, " AND RESTART LM RUN WITH JLMF = JLML+1\n"); if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); #ifdef USE_NVTX nvtxRangePop(); #endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } } else { printf("ERROR: could not open TEDF file.\n"); } } else { // label 98 string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } #ifdef USE_NVTX nvtxRangePop(); #endif } // label 45 #ifdef USE_NVTX nvtxRangePush("frfme() memory clean"); #endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; #ifdef USE_NVTX nvtxRangePop(); #endif printf("FRFME: Done.\n"); #ifdef USE_NVTX nvtxRangePop(); #endif }