Select Git revision
cfrfme.cpp 15.54 KiB
/* Copyright (C) 2025 INAF - Osservatorio Astronomico di Cagliari
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
A copy of the GNU General Public License is distributed along with
this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>.
*/
/*! \file cfrfme.cpp
*
* C++ implementation of FRFME functions.
*/
#include <chrono>
#include <cstdio>
#include <exception>
#include <fstream>
#include <regex>
#include <string>
#ifndef INCLUDE_TYPES_H_
#include "../include/types.h"
#endif
#ifndef INCLUDE_PARSERS_H_
#include "../include/Parsers.h"
#endif
#ifndef INCLUDE_LOGGING_H_
#include "../include/logging.h"
#endif
#ifndef INCLUDE_CONFIGURATION_H_
#include "../include/Configuration.h"
#endif
#ifndef INCLUDE_COMMONS_H_
#include "../include/Commons.h"
#endif
#ifndef INCLUDE_SPH_SUBS_H_
#include "../include/sph_subs.h"
#endif
#ifndef INCLUDE_TFRFME_H_
#include "../include/tfrfme.h"
#endif
#ifndef INCLUDE_TRA_SUBS_H_
#include "../include/tra_subs.h"
#endif
#ifdef USE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif
using namespace std;
/*! \brief C++ implementation of FRFME
*
* \param data_file: `string` Name of the input data file.
* \param output_path: `string` Directory to write the output files in.
*/
void frfme(string data_file, string output_path) {
#ifdef USE_NVTX
nvtxRangePush("Running frfme()");
#endif
chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
chrono::duration<double> elapsed;
char buffer[256];
string message = "INIT";
Logger logger(LOG_INFO);
string tfrfme_name = output_path + "/c_TFRFME.hd5";
TFRFME *tfrfme = NULL;
Swap1 *tt1 = NULL;
Swap2 *tt2 = NULL;
char namef[7];
char more;
dcomplex *wk = NULL;
const dcomplex cc0 = 0.0 + 0.0 * I;
const dcomplex uim = 0.0 + 1.0 * I;
int line_count = 0, last_read_line = 0;
regex re = regex("-?[0-9]+");
string *file_lines = load_file(data_file, &line_count);
smatch m;
string str_target = file_lines[last_read_line++];
regex_search(str_target, m, re);
int jlmf = stoi(m.str());
str_target = m.suffix().str();
regex_search(str_target, m, re);
int jlml = stoi(m.str());
int lmode = 0, lm = 0, nks = 0, nkv = 0;
double vk = 0.0, exri = 0.0, an = 0.0, ff = 0.0, tra = 0.0;
double exdc = 0.0, wp = 0.0, xip = 0.0, xi = 0.0;
int idfc = 0, nxi = 0;
double apfafa = 0.0, pmf = 0.0, spd = 0.0, rir = 0.0, ftcn = 0.0, fshmx = 0.0;
double vxyzmx = 0.0, delxyz = 0.0, vknmx = 0.0, delk = 0.0, delks = 0.0;
double frsh = 0.0, exril = 0.0;
double **vkzm = NULL;
double *vkv = NULL;
int nlmmt = 0, nrvc = 0;
// Vector size variables
int wsum_size;
// End of vector size variables
if (jlmf != 1) {
#ifdef USE_NVTX
nvtxRangePush("frfme() with jlmf != 1");
#endif
int nxv, nyv, nzv;
if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5");
if (tfrfme != NULL) {
lmode = tfrfme->lmode;
lm = tfrfme->lm;
nkv = tfrfme->nkv;
nxv = tfrfme->nxv;
nyv = tfrfme->nyv;
nzv = tfrfme->nzv;
vk = tfrfme->vk;
exri = tfrfme->exri;
an = tfrfme->an;
ff = tfrfme->ff;
tra = tfrfme->tra;
spd = tfrfme->spd;
frsh = tfrfme->frsh;
exril = tfrfme->exril;
string tempname2 = output_path + "/c_TEMPTAPE2.hd5";
if (tt2 == NULL) tt2 = Swap2::from_binary(tempname2, "HDF5");
if (tt2 != NULL) {
vkv = tt2->get_vector();
vkzm = tt2->get_matrix();
apfafa = tt2->apfafa;
pmf = tt2->pmf;
spd = tt2->spd;
rir = tt2->rir;
ftcn = tt2->ftcn;
fshmx = tt2->fshmx;
vxyzmx = tt2->vxyzmx;
delxyz = tt2->delxyz;
vknmx = tt2->vknmx;
delk = tt2->delk;
delks = tt2->delks;
nlmmt = tt2->nlmmt;
nrvc = tt2->nrvc;
} else {
message = "ERROR: could not open TEMPTAPE2 file.\n";
logger.err(message);
}
} else {
message = "ERROR: could not open TFRFME file.\n";
logger.err(message);
}
nks = nkv - 1;
#ifdef USE_NVTX
nvtxRangePop();
#endif
} else { // label 16; jlfm = 1
#ifdef USE_NVTX
nvtxRangePush("frfme() with jlmf == 1");
#endif
#ifdef USE_NVTX
nvtxRangePush("Setup operations");
#endif
int nksh, nrsh, nxsh, nysh, nzsh;
str_target = file_lines[last_read_line++];
for (int cli = 0; cli < 7; cli++) {
regex_search(str_target, m, re);
if (cli == 0) lmode = stoi(m.str());
else if (cli == 1) lm = stoi(m.str());
else if (cli == 2) nksh = stoi(m.str());
else if (cli == 3) nrsh = stoi(m.str());
else if (cli == 4) nxsh = stoi(m.str());
else if (cli == 5) nysh = stoi(m.str());
else if (cli == 6) nzsh = stoi(m.str());
str_target = m.suffix().str();
}
re = regex("-?[0-9]\\.[0-9]+([dDeE][-+]?[0-9]+)?");
regex_search(str_target, m, re);
double wlenfr = stod(m.str());
str_target = file_lines[last_read_line++];
for (int cli = 0; cli < 3; cli++) {
regex_search(str_target, m, re);
if (cli == 0) an = stod(m.str());
else if (cli == 1) ff = stod(m.str());
else if (cli == 2) tra = stod(m.str());
str_target = m.suffix().str();
}
double spdfr, exdcl;
str_target = file_lines[last_read_line++];
for (int cli = 0; cli < 3; cli++) {
regex_search(str_target, m, re);
if (cli == 0) spd = stod(m.str());
else if (cli == 1) spdfr = stod(m.str());
else if (cli == 2) exdcl = stod(m.str());
str_target = m.suffix().str();
}
str_target = file_lines[last_read_line++];
re = regex("[eEmM]");
#ifdef USE_NVTX
nvtxRangePop();
#endif
if (regex_search(str_target, m, re)) {
more = m.str().at(0);
if (more == 'm' || more == 'M') {
more = 'M';
sprintf(namef, "c_TMDF");
}
else if (more == 'e' || more == 'E') {
more = 'E';
sprintf(namef, "c_TEDF");
}
str_target = m.suffix().str();
re = regex("[0-9]+");
regex_search(str_target, m, re);
int ixi = stoi(m.str());
string tedf_name = output_path + "/" + namef + ".hd5";
ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5");
if (tedf != NULL) {
#ifdef USE_NVTX
nvtxRangePush("TEDF data import");
#endif
int iduml, idum;
iduml = tedf->number_of_spheres;
idum = tedf->get_iog(iduml - 1);
exdc = tedf->exdc;
wp = tedf->wp;
xip = tedf->xip;
idfc = tedf->idfc;
nxi = tedf->number_of_scales;
if (idfc >= 0) {
if (ixi <= nxi) {
xi = tedf->get_scale(ixi - 1);
} else { // label 96
delete tedf;
// label 98
string output_name = output_path + "/c_OFRFME";
FILE *output = fopen(output_name.c_str(), "w");
fprintf(output, " WRONG INPUT TAPE\n");
fclose(output);
}
} else { // label 18
xi = xip;
}
// label 20
#ifdef USE_NVTX
nvtxRangePop();
#endif
delete tedf;
double wn = wp / 3.0e8;
vk = xi * wn;
exri = sqrt(exdc);
frsh = 0.0;
exril = 0.0;
fshmx = 0.0;
apfafa = exri / (an * ff);
if (lmode != 0) pmf = 2.0 * apfafa;
if (spd > 0.0) {
exril = sqrt(exdcl);
rir = exri / exril;
ftcn = 2.0 / (1.0 + rir);
frsh = -spd * spdfr;
double sthmx = an / exri;
double sthlmx = sthmx * rir;
double uy = 1.0;
fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy);
}
// label 22
#ifdef USE_NVTX
nvtxRangePush("Memory data loading");
#endif
nlmmt = lm * (lm + 2) * 2;
nks = nksh * 2;
nkv = nks + 1;
// Array initialization
long swap1_size, swap2_size, tfrfme_size;
double size_mb;
message = "INFO: calculating memory requirements...\n";
logger.log(message);
swap1_size = Swap1::get_size(lm, nkv);
size_mb = 1.0 * swap1_size / 1024.0 / 1024.0;
printf("Swap 1: %.2lg MB\n", size_mb);
swap2_size = Swap2::get_size(nkv);
size_mb = 1.0 * swap2_size / 1024.0 / 1024.0;
sprintf(buffer, "Swap 2: %.2lg MB\n", size_mb);
message = string(buffer);
logger.log(message);
tt2 = new Swap2(nkv);
vkv = tt2->get_vector();
vkzm = tt2->get_matrix();
// End of array initialization
double vkm = vk * exri;
vknmx = vk * an;
delk = vknmx / nksh;
delks = delk / vkm;
delks = delks * delks;
vxyzmx = acos(0.0) * 4.0 / vkm * wlenfr;
delxyz = vxyzmx / nrsh;
int nxs = nxsh * 2;
int nxv = nxs + 1;
int nxshpo = nxsh + 1;
int nys = nysh * 2;
int nyv = nys + 1;
int nyshpo = nysh + 1;
int nzs = nzsh * 2;
int nzv = nzs + 1;
int nzshpo = nzsh + 1;
tfrfme_size = TFRFME::get_size(lm, nkv, nxv, nyv, nzv);
size_mb = 1.0 * tfrfme_size / 1024.0 / 1024.0;
sprintf(buffer, "TFRFME: %.2lg MB\n", size_mb);
message = string(buffer);
logger.log(message);
size_mb = 1.0 * (swap1_size + swap2_size + tfrfme_size) / 1024.0 / 1024.0;
sprintf(buffer, "TOTAL: %.2lg MB\n", size_mb);
message = string(buffer);
logger.log(message);
tfrfme = new TFRFME(lmode, lm, nkv, nxv, nyv, nzv);
double *_xv = tfrfme->get_x();
double *_yv = tfrfme->get_y();
double *_zv = tfrfme->get_z();
#ifdef USE_NVTX
nvtxRangePop();
#endif
#ifdef USE_NVTX
nvtxRangePush("Looped vector initialization");
#endif
for (int i24 = nxshpo; i24 <= nxs; i24++) {
_xv[i24] = _xv[i24 - 1] + delxyz;
_xv[nxv - i24 - 1] = -_xv[i24];
} // i24 loop
for (int i25 = nyshpo; i25 <= nys; i25++) {
_yv[i25] = _yv[i25 - 1] + delxyz;
_yv[nyv - i25 - 1] = -_yv[i25];
} // i25 loop
for (int i27 = nzshpo; i27 <= nzs; i27++) {
_zv[i27] = _zv[i27 - 1] + delxyz;
_zv[nzv - i27 - 1] = -_zv[i27];
} // i27 loop
int nrvc = nxv * nyv * nzv;
int nkshpo = nksh + 1;
for (int i28 = nkshpo; i28 <= nks; i28++) {
vkv[i28] = vkv[i28 - 1] + delk;
vkv[nkv - i28 - 1] = -vkv[i28];
} // i28 loop
#ifdef USE_NVTX
nvtxRangePop();
#endif
if (tfrfme != NULL) {
#ifdef USE_NVTX
nvtxRangePush("TFRFME initialization");
#endif
tfrfme->set_param("vk", vk);
tfrfme->set_param("exri", exri);
tfrfme->set_param("an", an);
tfrfme->set_param("ff", ff);
tfrfme->set_param("tra", tra);
tfrfme->set_param("spd", spd);
tfrfme->set_param("frsh", frsh);
tfrfme->set_param("exril", exril);
//fstream temptape1, temptape2;
string temp_name1 = output_path + "/c_TEMPTAPE1.hd5";
string temp_name2 = output_path + "/c_TEMPTAPE2.hd5";
//temptape1.open(temp_name1.c_str(), ios::out | ios::binary);
tt1 = new Swap1(lm, nkv);
if (wk != NULL) delete[] wk;
wk = frfmer(nkv, vkm, vknmx, apfafa, tra, spd, rir, ftcn, lm, lmode, pmf, tt1, tt2);
tt1->write_binary(temp_name1, "HDF5");
//delete tt1;
tt2->set_param("apfafa", apfafa);
tt2->set_param("pmf", pmf);
tt2->set_param("spd", spd);
tt2->set_param("rir", rir);
tt2->set_param("ftcn", ftcn);
tt2->set_param("fshmx", fshmx);
tt2->set_param("vxyzmx", vxyzmx);
tt2->set_param("delxyz", delxyz);
tt2->set_param("vknmx", vknmx);
tt2->set_param("delk", delk);
tt2->set_param("delks", delks);
tt2->set_param("nlmmt", 1.0 * nlmmt);
tt2->set_param("nrvc", 1.0 * nrvc);
tt2->write_binary(temp_name2, "HDF5");
#ifdef USE_NVTX
nvtxRangePop();
#endif
#ifdef USE_NVTX
nvtxRangePush("j80 loop");
#endif
dcomplex *vec_wsum = tfrfme->wsum[0];
double *vec_vkzm = vkzm[0];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd
#endif
// #pragma omp parallel for
for (int j80 = jlmf-1; j80 < jlml; j80++) {
int nkvs = nkv * nkv;
dcomplex *vec_w = (dcomplex *) calloc(nkvs, sizeof(dcomplex));
dcomplex **w = (dcomplex **) calloc(nkv, sizeof(dcomplex *));
// dcomplex *wk_local = new dcomplex[nlmmt]();
for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
dcomplex wk_value;
int wk_index = 0;
// for (int jy50 = 0; jy50 < nkv; jy50++) {
// for (int jx50 = 0; jx50 < nkv; jx50++) {
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd
// #endif
// #pragma omp parallel for
for (int jxy50 = 0; jxy50 < nkvs; jxy50++) {
// for (int wi = 0; wi < nlmmt; wi++) wk_local[wi] = tt1->wk[wk_index++];
// w[jx50][jy50] = wk_local[j80];
wk_index = nlmmt*jxy50;
wk_value = tt1->wk[wk_index + j80];
// wk_index += nlmmt;
int jy50 = jxy50 / nkv;
int jx50 = jxy50 % nkv;
vec_w[(nkv*jx50) + jy50] = wk_value;
// w[jx50][jy50] = wk_value;
} // jxy50 loop
// } // jx50
// } // jy50 loop
int ixyz = 0;
for (int wj = 0; wj < nrvc; wj++) vec_wsum[(j80*nrvc)+wj] = cc0;
int nvtot = nxv*nyv*nzv;
int nvxy = nxv *nyv;
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for
// #endif
// #pragma omp parallel for
for (int ixyz = 0; ixyz < nvtot; ixyz++) {
int iz75 = ixyz / nvxy;
int iy70 = (ixyz % nvxy) / nxv;
int ix65 = ixyz % nxv;
// for (int iz75 = 0; iz75 < nzv; iz75++) {
double z = _zv[iz75] + frsh;
// for (int iy70 = 0; iy70 < nyv; iy70++) {
double y = _yv[iy70];
// for (int ix65 = 0; ix65 < nxv; ix65++) {
double x = _xv[ix65];
// ixyz++;
dcomplex sumy = cc0;
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target parallel for simd reduction(+:sumy)
// #endif
for (int jy60x55 = 0; jy60x55 < nkvs ; jy60x55++) {
int jy60 = jy60x55 / nkv;
int jx55 = jy60x55 % nkv;
double vky = vkv[jy60];
double vkx = (jx55==0) ? vkv[nkv - 1] : vkv[jx55];
double vkz = vec_vkzm[(jx55*nkv)+jy60];
dcomplex phas = (jx55==0) ?
cexp(uim * (-vkx * x + vky * y + vkz * z)):
cexp(uim * (vkx * x + vky * y + vkz * z));
dcomplex sumx = vec_w[(jx55*nkv)+jy60] * phas;
double factor1 = ((jx55==0)||(jx55==(nkv-1))) ? 0.5 : 1.0;
double factor2 = ((jy60==0)||(jy60==(nkv-1))) ? 0.5 : 1.0;
sumx *= factor1*factor2;
sumy += sumx;
} // jy60x55 loop
vec_wsum[((j80)*nrvc)+ixyz] = sumy * delks;
// } // ix65 loop
// } // iy70 loop
// } // iz75 loop
} // ixyz loop
free(vec_w);
free(w);
// delete[] wk_local;
} // j80 loop
#ifdef USE_NVTX
nvtxRangePop();
#endif
// label 88
#ifdef USE_NVTX
nvtxRangePush("Closing operations");
#endif
tfrfme->write_binary(tfrfme_name, "HDF5");
string output_name = output_path + "/c_OFRFME";
FILE *output = fopen(output_name.c_str(), "w");
fprintf(output, " IF JLML < NLMMT, PRESERVE TEMPTAPE1, TEMPTAPE2, AND TFRFRME,\n");
fprintf(output, " AND RESTART LM RUN WITH JLMF = JLML+1\n");
if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx);
fprintf(output, " FRSH =%15.7lE\n", frsh);
fclose(output);
#ifdef USE_NVTX
nvtxRangePop();
#endif
} else { // Should never happen.
message = "ERROR: could not open TFRFME file for output.\n";
logger.err(message);
}
} else {
message = "ERROR: could not open TEDF file.\n";
logger.err(message);
}
} else { // label 98
string output_name = output_path + "/c_OFRFME";
FILE *output = fopen(output_name.c_str(), "w");
fprintf(output, " WRONG INPUT TAPE\n");
fclose(output);
}
#ifdef USE_NVTX
nvtxRangePop();
#endif
}
// label 45
#ifdef USE_NVTX
nvtxRangePush("frfme() memory clean");
#endif
if (tfrfme != NULL) delete tfrfme;
delete[] file_lines;
if (tt2 != NULL) delete tt2;
if (wk != NULL) delete[] wk;
if (tt1 != NULL) delete tt1;
#ifdef USE_NVTX
nvtxRangePop();
#endif
elapsed = chrono::high_resolution_clock::now() - t_start;
message = "INFO: FRFME took " + to_string(elapsed.count()) + "s.\n";
logger.log(message);
#ifdef USE_NVTX
nvtxRangePop();
#endif
}