Compare revisions

Giovanni La Mura · Giovanni La Mura · Giovanni La Mura · Giovanni La Mura · Giovanni La Mura · Giovanni La Mura
--- a/src/include/tfrfme.h
+++ b/src/include/tfrfme.h
@@ -63,6 +63,9 @@ protected:
  void write_legacy(const std::string& file_name);
 public:
+  //! \brief Read only view on WK.
+  const dcomplex *vec_wk;
  /*! \brief Swap1 instance constructor.
   *
   * \param lm: `int` Maximum field expansion order.
@@ -97,12 +100,6 @@ public:
   */
  static long get_memory_requirement(int lm, int _nkv);
-  /*! \brief Get the pointer to the WK vector.
-   *
-   * \return value: `complex double *` Memory address of the WK vector.
-   */
-  dcomplex *get_vector() { return wk; }
  /*! \brief Bring the pointer to the next element at the start of vector.
   */
  void reset() { last_index = 0; }

--- a/src/libnptm/tfrfme.cpp
+++ b/src/libnptm/tfrfme.cpp
@@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) {
  nlmmt = 2 * lm * (lm + 2);
  const int size = nkv * nkv * nlmmt;
  wk = new dcomplex[size]();
+  vec_wk = wk;
  last_index = 0;
 }
@@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
  string str_type;
  int _nlmmt, _nkv, lm, num_elements, index;
  dcomplex value;
-  dcomplex *_wk = NULL;
  if (status == 0) {
    status = hdf_file->read("NLMMT", "INT32", &_nlmmt);
    status = hdf_file->read("NKV", "INT32", &_nkv);
-    lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0);
+    lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
    num_elements = 2 * _nlmmt * _nkv * _nkv;
    instance = new Swap1(lm, _nkv);
-    _wk = instance->get_vector();
    elements = new double[num_elements]();
    str_type = "FLOAT64_(" + to_string(num_elements) + ")";
    status = hdf_file->read("WK", str_type, elements);
    for (int wi = 0; wi < num_elements / 2; wi++) {
      index = 2 * wi;
      value = elements[index] + elements[index + 1] * I;
-      _wk[wi] = value;
+      instance->wk[wi] = value;
    } // wi loop
    delete[] elements;
    status = hdf_file->close();
@@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
 Swap1* Swap1::from_legacy(const std::string& file_name) {
  fstream input;
  Swap1 *instance = NULL;
-  dcomplex *_wk = NULL;
  int _nlmmt, _nkv, lm;
  double rval, ival;
  input.open(file_name.c_str(), ios::in | ios::binary);
  if (input.is_open()) {
    input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int));
-    lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0);
+    lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
    input.read(reinterpret_cast<char *>(&_nkv), sizeof(int));
    instance = new Swap1(lm, _nkv);
-    _wk = instance->get_vector();
    int num_elements = _nlmmt * _nkv * _nkv;
    for (int j = 0; j < num_elements; j++) {
      input.read(reinterpret_cast<char *>(&rval), sizeof(double));
      input.read(reinterpret_cast<char *>(&ival), sizeof(double));
-      _wk[j] = rval + ival * I;
+      instance->wk[j] = rval + ival * I;
    }
    input.close();
  } else {

--- a/src/libnptm/tra_subs.cpp
+++ b/src/libnptm/tra_subs.cpp
--- a/src/scripts/model_maker.py
+++ b/src/scripts/model_maker.py
@@ -385,17 +385,25 @@ def load_model(model_file):
                    rnd_engine = "COMPACT"
                if (rnd_engine == "COMPACT"):
                    check = random_compact(sconf, gconf, rnd_seed, max_rad)
-                    if (check == 1):
+                    if (check == -1):
                        print("ERROR: compact random generator works only when all sphere types have the same radius.")
                        return (None, None)
+                    elif (check == -2):
+                        print("ERROR: sub-particle radius larger than particle radius.")
+                        return (None, None)
+                    elif (check == -3):
+                        print("ERROR: requested number of spheres cannot fit in allowed volume.")
+                        return (None, None)
                elif (rnd_engine == "LOOSE"):
                    # random_aggregate() checks internally whether application is INCLUSION
                    check = random_aggregate(sconf, gconf, rnd_seed, max_rad)
                else:
                    print("ERROR: unrecognized random generator engine.")
                    return (None, None)
-                if (check != 0):
+                if (check != sconf['nsph']):
-                    print("WARNING: %d sphere(s) could not be placed."%check)
+                    print("WARNING: placed only %d out of %d requested spheres."%(check, sconf['nsph']))
+                    sconf['nsph'] = check
+                    gconf['nsph'] = check
            else:
                if (len(model['geometry_settings']['x_coords']) != gconf['nsph']):
                    print("ERROR: coordinate vectors do not match the number of spheres!")
@@ -588,15 +596,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
    vec_thetas = [0.0 for i in range(nsph)]
    vec_phis = [0.0 for i in range(nsph)]
    vec_rads = [0.0 for i in range(nsph)]
+    vec_types = []
    n_types = scatterer['configurations']
    if (0 in scatterer['vec_types']):
        tincrement = 1 if scatterer['application'] != "INCLUSION" else 2
        for ti in range(nsph):
            itype = tincrement + int(n_types * random.random())
            scatterer['vec_types'][ti] = itype
+        if (scatterer['application'] == "INCLUSION"):
+            scatterer['vec_types'][0] = 1
    sph_type_index = scatterer['vec_types'][0] - 1
    vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}]
    vec_rads[0] = scatterer['ros'][sph_type_index]
+    vec_types.append(sph_type_index + 1)
    placed_spheres = 1
    attempts = 0
    for i in range(1, nsph):
@@ -670,7 +682,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
            })
            is_placed = True
            placed_spheres += 1
+            vec_types.append(sph_type_index + 1)
            attempts = 0
+    scatterer['vec_types'] = vec_types
    sph_index = 0
    for sphere in sorted(vec_spheres, key=lambda item: item['itype']):
        scatterer['vec_types'][sph_index] = sphere['itype']
@@ -678,6 +692,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
        geometry['vec_sph_y'][sph_index] = sphere['y']
        geometry['vec_sph_z'][sph_index] = sphere['z']
        sph_index += 1
+    result = placed_spheres
    return result
 ## \brief Generate a random compact cluster from YAML configuration options.
@@ -700,45 +715,30 @@ def random_compact(scatterer, geometry, seed, max_rad):
    random.seed(seed)
    nsph = scatterer['nsph']
    n_types = scatterer['configurations']
-    if (0 in scatterer['vec_types']):
+    radius = scatterer['ros'][0]
-        tincrement = 1 if scatterer['application'] != "INCLUSION" else 2
+    # Return an error code if types have different radii
-        for ti in range(nsph):
-            itype = tincrement + int(n_types * random.random())
-            scatterer['vec_types'][ti] = itype
    if (max(scatterer['ros']) != min(scatterer['ros'])):
-        result = 1
+        result = -1
+    elif (radius > max_rad):
+        # Requested spheres are larger than the maximum allowed volume.
+        # End function with error code -2.
+        result = -2
    else:
-        radius = scatterer['ros'][0]
+        x_centers = np.arange(-1.0 * max_rad + 2.0 * radius, max_rad, 2.0 * radius)
-        x_centers = np.arange(-1.0 * max_rad + radius, max_rad, 2.0 * radius)
+        x_size = len(x_centers)
-        y_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius)
+        y_size = int(2.0 * max_rad / ((1.0 + math.sqrt(3.0) / 3.0) * radius))
-        z_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius)
+        z_size = int(2.0 * max_rad / ((1.0 + 2.0 * math.sqrt(6.0) / 3.0) * radius))
-        x_offset = radius
-        y_offset = radius
-        x_layer_offset = radius
-        y_layer_offset = radius / math.sqrt(3.0)
        tmp_spheres = []
-        n_cells = len(x_centers) * len(y_centers) * len(z_centers)
+        n_cells = x_size * y_size * z_size
        print("INFO: the cubic space would contain %d spheres."%n_cells)
-        n_max_spheres = int((max_rad / radius) * (max_rad / radius) * (max_rad / radius) * 0.74)
+        k = 0
-        print("INFO: the maximum radius allows for %d spheres."%n_max_spheres)
+        z = -max_rad + radius
-        for zi in range(len(z_centers)):
+        while (z < max_rad - radius):
-            if (x_layer_offset == 0.0):
+            j = 0
-                x_layer_offset = radius
+            y = -max_rad + radius
-            else:
+            while (y < max_rad - radius):
-                x_layer_offset = 0.0
+                for i in range(len(x_centers)):
-            if (y_offset == 0.0):
+                    x = (2 * (i + 1) + (j + k) % 2) * radius - max_rad
-                y_offset = radius
-            else:
-                y_offset = 0.0
-            for yi in range(len(y_centers)):
-                if (x_offset == 0.0):
-                    x_offset = radius
-                else:
-                    x_offset = 0.0
-                for xi in range(len(x_centers)):
-                    x = x_centers[xi] + x_offset + x_layer_offset
-                    y = y_centers[yi] + y_offset
-                    z = z_centers[zi]
                    extent = radius + math.sqrt(x * x + y * y + z * z)
                    if (extent < max_rad):
                        tmp_spheres.append({
@@ -747,6 +747,11 @@ def random_compact(scatterer, geometry, seed, max_rad):
                            'y': y,
                            'z': z
                        })
+                #
+                j += 1
+                y = math.sqrt(3.0) * (j + (k % 2) / 3.0) * radius - max_rad + radius
+            k += 1
+            z = 2.0 / 3.0 * math.sqrt(6.0) * k * radius - max_rad + radius
        #tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}]
        current_n = len(tmp_spheres)
        print("INFO: before erosion there are %d spheres in use."%current_n)
@@ -778,20 +783,20 @@ def random_compact(scatterer, geometry, seed, max_rad):
            current_n -= 1
        vec_spheres = []
        sph_index = 0
+        # Generate a vector of types if none is given
+        if (0 in scatterer['vec_types']):
+            tincrement = 1 if scatterer['application'] != "INCLUSION" else 2
+            for ti in range(current_n):
+                itype = tincrement + int(n_types * random.random())
+                scatterer['vec_types'][ti] = itype
+            if (scatterer['application'] == "INCLUSION"):
+                scatterer['vec_types'][0] = 1
        for ti in range(len(tmp_spheres)):
            sphere = tmp_spheres[ti]
            if (sphere['x'] < max_rad):
                sphere['itype'] = scatterer['vec_types'][sph_index]
                sph_index += 1
                vec_spheres.append(sphere)
-        #pl = pv.Plotter()
-        #for si in range(len(vec_spheres)):
-        #    x = vec_spheres[si]['x'] / max_rad
-        #    y = vec_spheres[si]['y'] / max_rad
-        #    z = vec_spheres[si]['z'] / max_rad
-        #    mesh = pv.Sphere(radius / max_rad, (x, y, z))
-        #    pl.add_mesh(mesh)
-        #pl.export_obj("scene.obj")
        sph_index = 0
        for sphere in sorted(vec_spheres, key=lambda item: item['itype']):
            scatterer['vec_types'][sph_index] = sphere['itype']
@@ -799,7 +804,7 @@ def random_compact(scatterer, geometry, seed, max_rad):
            geometry['vec_sph_y'][sph_index] = sphere['y']
            geometry['vec_sph_z'][sph_index] = sphere['z']
            sph_index += 1
-    return result
+    return current_n
 ## \brief Write the geometry configuration dictionary to legacy format.
 #

--- a/src/sphere/sphere.cpp
+++ b/src/sphere/sphere.cpp
@@ -576,6 +576,12 @@ int sphere_jxi488_cycle(
    oi->vec_vk[jxindex] = vk;
    oi->vec_xi[jxindex] = xi;
  }
+  // Adaptive definition of L_MAX
+  double wavelength = 2.0 * pi / vk;
+  double size_param = 2.0 * pi * sconf->get_radius(0) / wavelength;
+  int N = int(size_param + 4.05 * pow(size_param, 1.0 / 3.0)) + 2;
+  if (N < l_max) l_max = N;
+  // End of adaptive definition of L_MAX
  vtppoanp->append_line(VirtualBinaryLine(vk));
  double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0;
  for (int i132 = 0; i132 < nsph; i132++) {

--- a/src/trapping/cfrfme.cpp
+++ b/src/trapping/cfrfme.cpp
@@ -77,7 +77,6 @@ void frfme(string data_file, string output_path) {
  Swap2 *tt2 = NULL;
  char namef[7];
  char more;
-  dcomplex **w = NULL;
  dcomplex *wk = NULL;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  const dcomplex uim = 0.0 + 1.0 * I;
@@ -105,6 +104,9 @@ void frfme(string data_file, string output_path) {
  int wsum_size;
  // End of vector size variables
  if (jlmf != 1) {
+#ifdef USE_NVTX
+    nvtxRangePush("frfme() with jlmf != 1");
+#endif
    int nxv, nyv, nzv;
    if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5");
    if (tfrfme != NULL) {
@@ -147,7 +149,16 @@ void frfme(string data_file, string output_path) {
      printf("ERROR: could not open TFRFME file.\n");
    }
    nks = nkv - 1;
-  } else { // label 16
+#ifdef USE_NVTX
+    nvtxRangePop();
+#endif
+  } else { // label 16; jlfm = 1
+#ifdef USE_NVTX
+    nvtxRangePush("frfme() with jlmf == 1");
+#endif
+#ifdef USE_NVTX
+    nvtxRangePush("Setup operations");
+#endif
    int nksh, nrsh, nxsh, nysh, nzsh;
    str_target = file_lines[last_read_line++];
    for (int cli = 0; cli < 7; cli++) {
@@ -183,6 +194,9 @@ void frfme(string data_file, string output_path) {
    }
    str_target = file_lines[last_read_line++];
    re = regex("[eEmM]");
+#ifdef USE_NVTX
+    nvtxRangePop();
+#endif
    if (regex_search(str_target, m, re)) {
      more = m.str().at(0);
      if (more == 'm' || more == 'M') {
@@ -200,6 +214,9 @@ void frfme(string data_file, string output_path) {
      string tedf_name = output_path + "/" + namef + ".hd5";
      ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5");
      if (tedf != NULL) {
+#ifdef USE_NVTX
+	nvtxRangePush("TEDF data import");
+#endif
 	int iduml, idum;
 	iduml = tedf->number_of_spheres;
 	idum = tedf->get_iog(iduml - 1);
@@ -223,6 +240,9 @@ void frfme(string data_file, string output_path) {
 	  xi = xip;
 	}
 	// label 20
+#ifdef USE_NVTX
+	nvtxRangePop();
+#endif
 	delete tedf;
 	double wn = wp / 3.0e8;
 	vk = xi * wn;
@@ -243,6 +263,9 @@ void frfme(string data_file, string output_path) {
 	  fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy);
 	}
 	// label 22
+#ifdef USE_NVTX
+	nvtxRangePush("Memory data loading");
+#endif
 	nlmmt = lm * (lm + 2) * 2;
 	nks = nksh * 2;
 	nkv = nks + 1;
@@ -286,6 +309,12 @@ void frfme(string data_file, string output_path) {
 	double *_yv = tfrfme->get_y();
 	double *_zv = tfrfme->get_z();
 	dcomplex **_wsum = tfrfme->get_matrix();
+#ifdef USE_NVTX
+	nvtxRangePop();
+#endif
+#ifdef USE_NVTX
+	nvtxRangePush("Looped vector initialization");
+#endif
 	for (int i24 = nxshpo; i24 <= nxs; i24++) {
 	  _xv[i24] = _xv[i24 - 1] + delxyz;
 	  _xv[nxv - i24 - 1] = -_xv[i24];
@@ -304,7 +333,13 @@ void frfme(string data_file, string output_path) {
 	  vkv[i28] = vkv[i28 - 1] + delk;
 	  vkv[nkv - i28 - 1] = -vkv[i28];
 	} // i28 loop
+#ifdef USE_NVTX
+	nvtxRangePop();
+#endif
 	if (tfrfme != NULL) {
+#ifdef USE_NVTX
+	  nvtxRangePush("TFRFME initialization");
+#endif
 	  tfrfme->set_param("vk", vk);
 	  tfrfme->set_param("exri", exri);
 	  tfrfme->set_param("an", an);
@@ -336,19 +371,20 @@ void frfme(string data_file, string output_path) {
 	  tt2->set_param("nlmmt", 1.0 * nlmmt);
 	  tt2->set_param("nrvc", 1.0 * nrvc);
 	  tt2->write_binary(temp_name2, "HDF5");
+#ifdef USE_NVTX
+	  nvtxRangePop();
+#endif
+	  dcomplex *vec_w = new dcomplex[nkv * nkv]();
+	  dcomplex **w = new dcomplex*[nkv];
+	  for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
+#ifdef USE_NVTX
+	  nvtxRangePush("j80 loop");
+#endif
 	  for (int j80 = jlmf; j80 <= jlml; j80++) {
-	    dcomplex *tt1_wk = tt1->get_vector();
 	    int wk_index = 0;
-	    // w matrix
-	    if (w != NULL) {
-	      for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
-	      delete[] w;
-	    }
-	    w = new dcomplex*[nkv];
-	    for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv]();
 	    for (int jy50 = 0; jy50 < nkv; jy50++) {
 	      for (int jx50 = 0; jx50 < nkv; jx50++) {
-		for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++];
+		for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++];
 		w[jx50][jy50] = wk[j80 - 1];
 	      } // jx50
 	    } // jy50 loop
@@ -384,7 +420,15 @@ void frfme(string data_file, string output_path) {
 	      } // iy70 loop
 	    } // iz75 loop
 	  } // j80 loop
+	  delete[] vec_w;
+	  delete[] w;
+#ifdef USE_NVTX
+	  nvtxRangePop();
+#endif
 	  // label 88
+#ifdef USE_NVTX
+	  nvtxRangePush("Closing operations");
+#endif
 	  tfrfme->write_binary(tfrfme_name, "HDF5");
 	  string output_name = output_path + "/c_OFRFME";
 	  FILE *output = fopen(output_name.c_str(), "w");
@@ -393,6 +437,9 @@ void frfme(string data_file, string output_path) {
 	  if (spd > 0.0) fprintf(output, "  FSHMX =%15.7lE\n", fshmx);
 	  fprintf(output, "  FRSH =%15.7lE\n", frsh);
 	  fclose(output);
+#ifdef USE_NVTX
+	  nvtxRangePop();
+#endif
 	} else { // Should never happen.
 	  printf("ERROR: could not open TFRFME file for output.\n");
 	}
@@ -405,17 +452,22 @@ void frfme(string data_file, string output_path) {
      fprintf(output, "  WRONG INPUT TAPE\n");
      fclose(output);
    }
+#ifdef USE_NVTX
+    nvtxRangePop();
+#endif
  }
  // label 45
+#ifdef USE_NVTX
+  nvtxRangePush("frfme() memory clean");
+#endif
  if (tfrfme != NULL) delete tfrfme;
  delete[] file_lines;
  if (tt2 != NULL) delete tt2;
-  if (w != NULL) {
-    for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
-    delete[] w;
-  }
  if (wk != NULL) delete[] wk;
  if (tt1 != NULL) delete tt1;
+#ifdef USE_NVTX
+  nvtxRangePop();
+#endif
  printf("FRFME: Done.\n");
 #ifdef USE_NVTX
  nvtxRangePop();

--- a/src/trapping/clffft.cpp
+++ b/src/trapping/clffft.cpp
@@ -56,6 +56,10 @@
 #include "../include/tra_subs.h"
 #endif
+#ifdef USE_NVTX
+#include <nvtx3/nvToolsExt.h>
+#endif
 using namespace std;
 /*! \brief C++ implementation of LFFFT
@@ -64,6 +68,9 @@ using namespace std;
 *  \param output_path: `string` Directory to write the output files in.
 */
 void lffft(string data_file, string output_path) {
+#ifdef USE_NVTX
+  nvtxRangePush("Running lffft()");
+#endif
  const dcomplex uim = 0.0 + 1.0 * I;
  const double sq2i = 1.0 / sqrt(2.0);
  const dcomplex sq2iti = sq2i * uim;
@@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) {
  delete ccr;
  delete[] file_lines;
  printf("LFFT: Done.\n");
+#ifdef USE_NVTX
+  nvtxRangePop();
+#endif
 }
No results found