Skip to content
Snippets Groups Projects
Select Git revision
  • 858e3eb65a4eedc936072f23aa390155e019e9d9
  • master default protected
  • parallel_trapping
  • offload_trapping
  • script_devel
  • unify_iterations
  • containers-m10
  • magma_refinement
  • release9
  • enable_svd
  • parallel_angles_gmu
  • containers-m8
  • parallel_angles
  • profile_omp_leonardo
  • test_nvidia_profiler
  • containers
  • shaditest
  • test1
  • main
  • 3-error-in-run-the-program
  • experiment
  • NP_TMcode-M10a.03
  • NP_TMcode-M10a.02
  • NP_TMcode-M10a.01
  • NP_TMcode-M10a.00
  • NP_TMcode-M9.01
  • NP_TMcode-M9.00
  • NP_TMcode-M8.03
  • NP_TMcode-M8.02
  • NP_TMcode-M8.01
  • NP_TMcode-M8.00
  • NP_TMcode-M7.00
  • v0.0
33 results

cfrfme.cpp

Blame
  • cfrfme.cpp 14.41 KiB
    /* Copyright (C) 2024   INAF - Osservatorio Astronomico di Cagliari
    
       This program is free software: you can redistribute it and/or modify
       it under the terms of the GNU General Public License as published by
       the Free Software Foundation, either version 3 of the License, or
       (at your option) any later version.
       
       This program is distributed in the hope that it will be useful,
       but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       GNU General Public License for more details.
       
       A copy of the GNU General Public License is distributed along with
       this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>.
     */
    
    /*! \file cfrfme.cpp
     *
     * C++ implementation of FRFME functions.
     */
    #include <cstdio>
    #include <exception>
    #include <fstream>
    #include <regex>
    #include <string>
    
    #ifndef INCLUDE_TYPES_H_
    #include "../include/types.h"
    #endif
    
    #ifndef INCLUDE_PARSERS_H_
    #include "../include/Parsers.h"
    #endif
    
    #ifndef INCLUDE_LOGGING_H_
    #include "../include/logging.h"
    #endif
    
    #ifndef INCLUDE_CONFIGURATION_H_
    #include "../include/Configuration.h"
    #endif
    
    #ifndef INCLUDE_COMMONS_H_
    #include "../include/Commons.h"
    #endif
    
    #ifndef INCLUDE_SPH_SUBS_H_
    #include "../include/sph_subs.h"
    #endif
    
    #ifndef INCLUDE_TFRFME_H_
    #include "../include/tfrfme.h"
    #endif
    
    #ifndef INCLUDE_TRA_SUBS_H_
    #include "../include/tra_subs.h"
    #endif
    
    #ifdef USE_NVTX
    #include <nvtx3/nvToolsExt.h>
    #endif
    
    #ifdef USE_TARGET_OFFLOAD
    #pragma omp requires unified_shared_memory
    #endif
    
    using namespace std;
    
    /*! \brief C++ implementation of FRFME
     *
     *  \param data_file: `string` Name of the input data file.
     *  \param output_path: `string` Directory to write the output files in.
     */
    void frfme(string data_file, string output_path) {
    #ifdef USE_NVTX
      nvtxRangePush("Running frfme()");
    #endif
      string tfrfme_name = output_path + "/c_TFRFME.hd5";
      TFRFME *tfrfme = NULL;
      Swap1 *tt1 = NULL;
      Swap2 *tt2 = NULL;
      char namef[7];
      char more;
      dcomplex *wk = NULL;
      const dcomplex cc0 = 0.0 + 0.0 * I;
      const dcomplex uim = 0.0 + 1.0 * I;
      int line_count = 0, last_read_line = 0;
      regex re = regex("-?[0-9]+");
      string *file_lines = load_file(data_file, &line_count);
      smatch m;
      string str_target = file_lines[last_read_line++];
      regex_search(str_target, m, re);
      int jlmf = stoi(m.str());
      str_target = m.suffix().str();
      regex_search(str_target, m, re);
      int jlml = stoi(m.str());
      int lmode = 0, lm = 0, nks = 0, nkv = 0;
      double vk = 0.0, exri = 0.0, an = 0.0, ff = 0.0, tra = 0.0;
      double exdc = 0.0, wp = 0.0, xip = 0.0, xi = 0.0;
      int idfc = 0, nxi = 0;
      double apfafa = 0.0, pmf = 0.0, spd = 0.0, rir = 0.0, ftcn = 0.0, fshmx = 0.0;
      double vxyzmx = 0.0, delxyz = 0.0, vknmx = 0.0, delk = 0.0, delks = 0.0;
      double frsh = 0.0, exril = 0.0;
      double **vkzm = NULL;
      double *vkv = NULL;
      int nlmmt = 0, nrvc = 0;
      // Vector size variables
      int wsum_size;
      // End of vector size variables
      if (jlmf != 1) {
    #ifdef USE_NVTX
        nvtxRangePush("frfme() with jlmf != 1");
    #endif
        int nxv, nyv, nzv;
        if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5");
        if (tfrfme != NULL) {
          lmode = (int)tfrfme->get_param("lmode");
          lm = (int)tfrfme->get_param("lm");
          nkv = (int)tfrfme->get_param("nkv");
          nxv = (int)tfrfme->get_param("nxv");
          nyv = (int)tfrfme->get_param("nyv");
          nzv = (int)tfrfme->get_param("nzv");
          vk = tfrfme->get_param("vk");
          exri = tfrfme->get_param("exri");
          an = tfrfme->get_param("an");
          ff = tfrfme->get_param("ff");
          tra = tfrfme->get_param("tra");
          spd = tfrfme->get_param("spd");
          frsh = tfrfme->get_param("frsh");
          exril = tfrfme->get_param("exril");
          string tempname2 = output_path + "/c_TEMPTAPE2.hd5";
          if (tt2 == NULL) tt2 = Swap2::from_binary(tempname2, "HDF5");
          if (tt2 != NULL) {
    	vkv = tt2->get_vector();
    	vkzm = tt2->get_matrix();
    	apfafa = tt2->apfafa;
    	pmf = tt2->pmf;
    	spd = tt2->spd;
    	rir = tt2->rir;
    	ftcn = tt2->ftcn;
    	fshmx = tt2->fshmx;
    	vxyzmx = tt2->vxyzmx;
    	delxyz = tt2->delxyz;
    	vknmx = tt2->vknmx;
    	delk = tt2->delk;
    	delks = tt2->delks;
    	nlmmt = tt2->nlmmt;
    	nrvc = tt2->nrvc;
          } else {
    	printf("ERROR: could not open TEMPTAPE2 file.\n");
          }
        } else {
          printf("ERROR: could not open TFRFME file.\n");
        }
        nks = nkv - 1;
    #ifdef USE_NVTX
        nvtxRangePop();
    #endif
      } else { // label 16; jlfm = 1
    #ifdef USE_NVTX
        nvtxRangePush("frfme() with jlmf == 1");
    #endif
    #ifdef USE_NVTX
        nvtxRangePush("Setup operations");
    #endif
        int nksh, nrsh, nxsh, nysh, nzsh;
        str_target = file_lines[last_read_line++];
        for (int cli = 0; cli < 7; cli++) {
          regex_search(str_target, m, re);
          if (cli == 0) lmode = stoi(m.str());
          else if (cli == 1) lm = stoi(m.str());
          else if (cli == 2) nksh = stoi(m.str());
          else if (cli == 3) nrsh = stoi(m.str());
          else if (cli == 4) nxsh = stoi(m.str());
          else if (cli == 5) nysh = stoi(m.str());
          else if (cli == 6) nzsh = stoi(m.str());
          str_target = m.suffix().str();
        }
        re = regex("-?[0-9]\\.[0-9]+([dDeE][-+]?[0-9]+)?");
        regex_search(str_target, m, re);
        double wlenfr = stod(m.str());
        str_target = file_lines[last_read_line++];
        for (int cli = 0; cli < 3; cli++) {
          regex_search(str_target, m, re);
          if (cli == 0) an = stod(m.str());
          else if (cli == 1) ff = stod(m.str());
          else if (cli == 2) tra = stod(m.str());
          str_target = m.suffix().str();
        }
        double spdfr, exdcl;
        str_target = file_lines[last_read_line++];
        for (int cli = 0; cli < 3; cli++) {
          regex_search(str_target, m, re);
          if (cli == 0) spd = stod(m.str());
          else if (cli == 1) spdfr = stod(m.str());
          else if (cli == 2) exdcl = stod(m.str());
          str_target = m.suffix().str();
        }
        str_target = file_lines[last_read_line++];
        re = regex("[eEmM]");
    #ifdef USE_NVTX
        nvtxRangePop();
    #endif
        if (regex_search(str_target, m, re)) {
          more = m.str().at(0);
          if (more == 'm' || more == 'M') {
    	more = 'M';
    	sprintf(namef, "c_TMDF");
          }
          else if (more == 'e' || more == 'E') {
    	more = 'E';
    	sprintf(namef, "c_TEDF");
          }
          str_target = m.suffix().str();
          re = regex("[0-9]+");
          regex_search(str_target, m, re);
          int ixi = stoi(m.str());
          string tedf_name = output_path + "/" + namef + ".hd5";
          ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5");
          if (tedf != NULL) {
    #ifdef USE_NVTX
    	nvtxRangePush("TEDF data import");
    #endif
    	int iduml, idum;
    	iduml = tedf->number_of_spheres;
    	idum = tedf->get_iog(iduml - 1);
    	exdc = tedf->exdc;
    	wp = tedf->wp;
    	xip = tedf->xip;
    	idfc = tedf->idfc;
    	nxi = tedf->number_of_scales;
    	if (idfc >= 0) {
    	  if (ixi <= nxi) {
    	    xi = tedf->get_scale(ixi - 1);
    	  } else { // label 96
    	    delete tedf;
    	    // label 98
    	    string output_name = output_path + "/c_OFRFME";
    	    FILE *output = fopen(output_name.c_str(), "w");
    	    fprintf(output, "  WRONG INPUT TAPE\n");
    	    fclose(output);
    	  }
    	} else { // label 18
    	  xi = xip;
    	}
    	// label 20
    #ifdef USE_NVTX
    	nvtxRangePop();
    #endif
    	delete tedf;
    	double wn = wp / 3.0e8;
    	vk = xi * wn;
    	exri = sqrt(exdc);
    	frsh = 0.0;
    	exril = 0.0;
    	fshmx = 0.0;
    	apfafa = exri / (an * ff);
    	if (lmode != 0) pmf = 2.0 * apfafa;
    	if (spd > 0.0) {
    	  exril = sqrt(exdcl);
    	  rir = exri / exril;
    	  ftcn = 2.0 / (1.0 + rir);
    	  frsh = -spd * spdfr;
    	  double sthmx = an / exri;
    	  double sthlmx = sthmx * rir;
    	  double uy = 1.0;
    	  fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy);
    	}
    	// label 22
    #ifdef USE_NVTX
    	nvtxRangePush("Memory data loading");
    #endif
    	nlmmt = lm * (lm + 2) * 2;
    	nks = nksh * 2;
    	nkv = nks + 1;
    	// Array initialization
    	long swap1_size, swap2_size, tfrfme_size;
    	double size_mb;
    	printf("INFO: calculating memory requirements\n");
    	swap1_size = Swap1::get_size(lm, nkv);
    	size_mb = 1.0 * swap1_size / 1024.0 / 1024.0;
    	printf("Swap 1: %.2lg MB\n", size_mb);
    	swap2_size = Swap2::get_size(nkv);
    	size_mb = 1.0 * swap2_size / 1024.0 / 1024.0;
    	printf("Swap 2: %.2lg MB\n", size_mb);
    	tt2 = new Swap2(nkv);
    	vkv = tt2->get_vector();
    	vkzm = tt2->get_matrix();
    	// End of array initialization
    	double vkm = vk * exri;
    	vknmx = vk * an;
    	delk = vknmx / nksh;
    	delks = delk / vkm;
    	delks = delks * delks;
    	vxyzmx = acos(0.0) * 4.0 / vkm * wlenfr;
    	delxyz = vxyzmx / nrsh;
    	int nxs = nxsh * 2;
    	int nxv = nxs + 1;
    	int nxshpo = nxsh + 1;
    	int nys = nysh * 2;
    	int nyv = nys + 1;
    	int nyshpo = nysh + 1;
    	int nzs = nzsh * 2;
    	int nzv = nzs + 1;
    	int nzshpo = nzsh + 1;
    	tfrfme_size = TFRFME::get_memory_requirement(lmode, lm, nkv, nxv, nyv, nzv);
    	size_mb = 1.0 * tfrfme_size / 1024.0 / 1024.0;
    	printf("TFRFME: %.2lg MB\n", size_mb);
    	size_mb = 1.0 * (swap1_size + swap2_size + tfrfme_size) / 1024.0 / 1024.0;
    	printf("TOTAL: %.2lg MB\n", size_mb);
    	tfrfme = new TFRFME(lmode, lm, nkv, nxv, nyv, nzv);
    	double *_xv = tfrfme->get_x();
    	double *_yv = tfrfme->get_y();
    	double *_zv = tfrfme->get_z();
    	dcomplex **_wsum = tfrfme->get_matrix();
    #ifdef USE_NVTX
    	nvtxRangePop();
    #endif
    #ifdef USE_NVTX
    	nvtxRangePush("Looped vector initialization");
    #endif
    	for (int i24 = nxshpo; i24 <= nxs; i24++) {
    	  _xv[i24] = _xv[i24 - 1] + delxyz;
    	  _xv[nxv - i24 - 1] = -_xv[i24];
    	} // i24 loop
    	for (int i25 = nyshpo; i25 <= nys; i25++) {
    	  _yv[i25] = _yv[i25 - 1] + delxyz;
    	  _yv[nyv - i25 - 1] = -_yv[i25];
    	} // i25 loop
    	for (int i27 = nzshpo; i27 <= nzs; i27++) {
    	  _zv[i27] = _zv[i27 - 1] + delxyz;
    	  _zv[nzv - i27 - 1] = -_zv[i27];
    	} // i27 loop
    	int nrvc = nxv * nyv * nzv;
    	int nkshpo = nksh + 1;
    	for (int i28 = nkshpo; i28 <= nks; i28++) {
    	  vkv[i28] = vkv[i28 - 1] + delk;
    	  vkv[nkv - i28 - 1] = -vkv[i28];
    	} // i28 loop
    #ifdef USE_NVTX
    	nvtxRangePop();
    #endif
    	if (tfrfme != NULL) {
    #ifdef USE_NVTX
    	  nvtxRangePush("TFRFME initialization");
    #endif
    	  tfrfme->set_param("vk", vk);
    	  tfrfme->set_param("exri", exri);
    	  tfrfme->set_param("an", an);
    	  tfrfme->set_param("ff", ff);
    	  tfrfme->set_param("tra", tra);
    	  tfrfme->set_param("spd", spd);
    	  tfrfme->set_param("frsh", frsh);
    	  tfrfme->set_param("exril", exril);
    	  //fstream temptape1, temptape2;
    	  string temp_name1 = output_path + "/c_TEMPTAPE1.hd5";
    	  string temp_name2 = output_path + "/c_TEMPTAPE2.hd5";
    	  //temptape1.open(temp_name1.c_str(), ios::out | ios::binary);
    	  tt1 = new Swap1(lm, nkv);
    	  if (wk != NULL) delete[] wk;
    	  wk = frfmer(nkv, vkm, vknmx, apfafa, tra, spd, rir, ftcn, lm, lmode, pmf, tt1, tt2);
    	  tt1->write_binary(temp_name1, "HDF5");
    	  //delete tt1;
    	  tt2->set_param("apfafa", apfafa);
    	  tt2->set_param("pmf", pmf);
    	  tt2->set_param("spd", spd);
    	  tt2->set_param("rir", rir);
    	  tt2->set_param("ftcn", ftcn);
    	  tt2->set_param("fshmx", fshmx);
    	  tt2->set_param("vxyzmx", vxyzmx);
    	  tt2->set_param("delxyz", delxyz);
    	  tt2->set_param("vknmx", vknmx);
    	  tt2->set_param("delk", delk);
    	  tt2->set_param("delks", delks);
    	  tt2->set_param("nlmmt", 1.0 * nlmmt);
    	  tt2->set_param("nrvc", 1.0 * nrvc);
    	  tt2->write_binary(temp_name2, "HDF5");
    #ifdef USE_NVTX
    	  nvtxRangePop();
    #endif
    #ifdef USE_NVTX
    	  nvtxRangePush("j80 loop");
    #endif
    #pragma omp target parallel for
    	  for (int j80 = jlmf; j80 <= jlml; j80++) {
    	    // dcomplex *vec_w = new dcomplex[nkv * nkv]();
    	    // dcomplex **w = new dcomplex*[nkv];
    	    // dcomplex *wk_local = new dcomplex[nlmmt]();
    	    dcomplex *vec_w = (dcomplex *) calloc(nkv * nkv, sizeof(dcomplex));
    	    dcomplex **w = (dcomplex **) calloc(nkv, sizeof(dcomplex *));
    	    dcomplex *wk_local = (dcomplex *) calloc(nlmmt, sizeof(dcomplex));
    	    for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
    	    int wk_index = 0;
    	    for (int jy50 = 0; jy50 < nkv; jy50++) {
    	      for (int jx50 = 0; jx50 < nkv; jx50++) {
    		for (int wi = 0; wi < nlmmt; wi++) wk_local[wi] = tt1->wk[wk_index++];
    		w[jx50][jy50] = wk_local[j80 - 1];
    	      } // jx50
    	    } // jy50 loop
    	    int ixyz = 0;
    	    for (int wj = 0; wj < nrvc; wj++) _wsum[j80 - 1][wj] = cc0;
    	    for (int iz75 = 0; iz75 < nzv; iz75++) {
    	      double z = _zv[iz75] + frsh;
    	      for (int iy70 = 0; iy70 < nyv; iy70++) {
    		double y = _yv[iy70];
    		for (int ix65 = 0; ix65 < nxv; ix65++) {
    		  double x = _xv[ix65];
    		  ixyz++;
    		  dcomplex sumy = cc0;
    		  for (int jy60 = 0; jy60 < nkv; jy60++) {
    		    double vky = vkv[jy60];
    		    double vkx = vkv[nkv - 1];
    		    double vkzf = vkzm[0][jy60];
    		    dcomplex phasf = cexp(uim * (-vkx * x + vky * y + vkzf * z));
    		    double vkzl = vkzm[nkv - 1][jy60];
    		    dcomplex phasl = cexp(uim * (vkx * x + vky * y + vkzl * z));
    		    dcomplex sumx = 0.5 * (w[0][jy60] * phasf + w[nkv - 1][jy60] * phasl);
    		    for (int jx55 = 2; jx55 <= nks; jx55++) {
    		      vkx = vkv[jx55 - 1];
    		      double vkz = vkzm[jx55 - 1][jy60];
    		      dcomplex phas = cexp(uim * (vkx * x + vky * y + vkz * z));
    		      sumx += (w[jx55 - 1][jy60] * phas);
    		    } // jx55 loop
    		    if (jy60 == 0 || jy60 == nkv - 1) sumx *= 0.5;
    		    sumy += sumx;
    		  } // jy60 loop
    		  _wsum[j80 - 1][ixyz - 1] = sumy * delks;
    		} // ix65 loop
    	      } // iy70 loop
    	    } // iz75 loop
    	    // delete[] vec_w;
    	    // delete[] w;
    	    // delete[] wk_local;
    	    free(vec_w);
    	    free(w);
    	    free(wk_local);
    	  } // j80 loop
    #ifdef USE_NVTX
    	  nvtxRangePop();
    #endif
    	  // label 88
    #ifdef USE_NVTX
    	  nvtxRangePush("Closing operations");
    #endif
    	  tfrfme->write_binary(tfrfme_name, "HDF5");
    	  string output_name = output_path + "/c_OFRFME";
    	  FILE *output = fopen(output_name.c_str(), "w");
    	  fprintf(output, " IF JLML < NLMMT, PRESERVE TEMPTAPE1, TEMPTAPE2, AND TFRFRME,\n");
    	  fprintf(output, " AND RESTART LM RUN WITH JLMF = JLML+1\n");
    	  if (spd > 0.0) fprintf(output, "  FSHMX =%15.7lE\n", fshmx);
    	  fprintf(output, "  FRSH =%15.7lE\n", frsh);
    	  fclose(output);
    #ifdef USE_NVTX
    	  nvtxRangePop();
    #endif
    	} else { // Should never happen.
    	  printf("ERROR: could not open TFRFME file for output.\n");
    	}
          } else {
    	printf("ERROR: could not open TEDF file.\n");
          }
        } else { // label 98
          string output_name = output_path + "/c_OFRFME";
          FILE *output = fopen(output_name.c_str(), "w");
          fprintf(output, "  WRONG INPUT TAPE\n");
          fclose(output);
        }
    #ifdef USE_NVTX
        nvtxRangePop();
    #endif
      }
      // label 45
    #ifdef USE_NVTX
      nvtxRangePush("frfme() memory clean");
    #endif
      if (tfrfme != NULL) delete tfrfme;
      delete[] file_lines;
      if (tt2 != NULL) delete tt2;
      if (wk != NULL) delete[] wk;
      if (tt1 != NULL) delete tt1;
    #ifdef USE_NVTX
      nvtxRangePop();
    #endif
      printf("FRFME: Done.\n");
    #ifdef USE_NVTX
      nvtxRangePop();
    #endif
    }