Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 3-error-in-run-the-program
  • containers
  • containers-m10
  • containers-m8
  • enable_svd
  • experiment
  • magma_refinement
  • main
  • master
  • offload_trapping
  • original
  • parallel_angles
  • parallel_angles_gmu
  • parallel_trapping
  • profile_omp_leonardo
  • profile_trapping
  • release9
  • script_devel
  • shaditest
  • test1
  • test_nvidia_profiler
  • unify_iterations
  • NP_TMcode-M10a.00
  • NP_TMcode-M10a.01
  • NP_TMcode-M10a.02
  • NP_TMcode-M10a.03
  • NP_TMcode-M7.00
  • NP_TMcode-M8.00
  • NP_TMcode-M8.01
  • NP_TMcode-M8.02
  • NP_TMcode-M8.03
  • NP_TMcode-M9.00
  • NP_TMcode-M9.01
  • v0.0
34 results

Target

Select target project
  • giacomo.mulas/np_tmcode
1 result
Select Git revision
  • 3-error-in-run-the-program
  • containers
  • containers-m10
  • containers-m8
  • enable_svd
  • experiment
  • magma_refinement
  • main
  • master
  • offload_trapping
  • original
  • parallel_angles
  • parallel_angles_gmu
  • parallel_trapping
  • profile_omp_leonardo
  • profile_trapping
  • release9
  • script_devel
  • shaditest
  • test1
  • test_nvidia_profiler
  • unify_iterations
  • NP_TMcode-M10a.00
  • NP_TMcode-M10a.01
  • NP_TMcode-M10a.02
  • NP_TMcode-M10a.03
  • NP_TMcode-M7.00
  • NP_TMcode-M8.00
  • NP_TMcode-M8.01
  • NP_TMcode-M8.02
  • NP_TMcode-M8.03
  • NP_TMcode-M9.00
  • NP_TMcode-M9.01
  • v0.0
34 results
Show changes
Commits on Source (6)
...@@ -63,6 +63,9 @@ protected: ...@@ -63,6 +63,9 @@ protected:
void write_legacy(const std::string& file_name); void write_legacy(const std::string& file_name);
public: public:
//! \brief Read only view on WK.
const dcomplex *vec_wk;
/*! \brief Swap1 instance constructor. /*! \brief Swap1 instance constructor.
* *
* \param lm: `int` Maximum field expansion order. * \param lm: `int` Maximum field expansion order.
...@@ -97,12 +100,6 @@ public: ...@@ -97,12 +100,6 @@ public:
*/ */
static long get_memory_requirement(int lm, int _nkv); static long get_memory_requirement(int lm, int _nkv);
/*! \brief Get the pointer to the WK vector.
*
* \return value: `complex double *` Memory address of the WK vector.
*/
dcomplex *get_vector() { return wk; }
/*! \brief Bring the pointer to the next element at the start of vector. /*! \brief Bring the pointer to the next element at the start of vector.
*/ */
void reset() { last_index = 0; } void reset() { last_index = 0; }
......
...@@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) { ...@@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) {
nlmmt = 2 * lm * (lm + 2); nlmmt = 2 * lm * (lm + 2);
const int size = nkv * nkv * nlmmt; const int size = nkv * nkv * nlmmt;
wk = new dcomplex[size](); wk = new dcomplex[size]();
vec_wk = wk;
last_index = 0; last_index = 0;
} }
...@@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { ...@@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
string str_type; string str_type;
int _nlmmt, _nkv, lm, num_elements, index; int _nlmmt, _nkv, lm, num_elements, index;
dcomplex value; dcomplex value;
dcomplex *_wk = NULL;
if (status == 0) { if (status == 0) {
status = hdf_file->read("NLMMT", "INT32", &_nlmmt); status = hdf_file->read("NLMMT", "INT32", &_nlmmt);
status = hdf_file->read("NKV", "INT32", &_nkv); status = hdf_file->read("NKV", "INT32", &_nkv);
lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
num_elements = 2 * _nlmmt * _nkv * _nkv; num_elements = 2 * _nlmmt * _nkv * _nkv;
instance = new Swap1(lm, _nkv); instance = new Swap1(lm, _nkv);
_wk = instance->get_vector();
elements = new double[num_elements](); elements = new double[num_elements]();
str_type = "FLOAT64_(" + to_string(num_elements) + ")"; str_type = "FLOAT64_(" + to_string(num_elements) + ")";
status = hdf_file->read("WK", str_type, elements); status = hdf_file->read("WK", str_type, elements);
for (int wi = 0; wi < num_elements / 2; wi++) { for (int wi = 0; wi < num_elements / 2; wi++) {
index = 2 * wi; index = 2 * wi;
value = elements[index] + elements[index + 1] * I; value = elements[index] + elements[index + 1] * I;
_wk[wi] = value; instance->wk[wi] = value;
} // wi loop } // wi loop
delete[] elements; delete[] elements;
status = hdf_file->close(); status = hdf_file->close();
...@@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { ...@@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
Swap1* Swap1::from_legacy(const std::string& file_name) { Swap1* Swap1::from_legacy(const std::string& file_name) {
fstream input; fstream input;
Swap1 *instance = NULL; Swap1 *instance = NULL;
dcomplex *_wk = NULL;
int _nlmmt, _nkv, lm; int _nlmmt, _nkv, lm;
double rval, ival; double rval, ival;
input.open(file_name.c_str(), ios::in | ios::binary); input.open(file_name.c_str(), ios::in | ios::binary);
if (input.is_open()) { if (input.is_open()) {
input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int)); input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int));
lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
input.read(reinterpret_cast<char *>(&_nkv), sizeof(int)); input.read(reinterpret_cast<char *>(&_nkv), sizeof(int));
instance = new Swap1(lm, _nkv); instance = new Swap1(lm, _nkv);
_wk = instance->get_vector();
int num_elements = _nlmmt * _nkv * _nkv; int num_elements = _nlmmt * _nkv * _nkv;
for (int j = 0; j < num_elements; j++) { for (int j = 0; j < num_elements; j++) {
input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&rval), sizeof(double));
input.read(reinterpret_cast<char *>(&ival), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double));
_wk[j] = rval + ival * I; instance->wk[j] = rval + ival * I;
} }
input.close(); input.close();
} else { } else {
......
...@@ -385,17 +385,25 @@ def load_model(model_file): ...@@ -385,17 +385,25 @@ def load_model(model_file):
rnd_engine = "COMPACT" rnd_engine = "COMPACT"
if (rnd_engine == "COMPACT"): if (rnd_engine == "COMPACT"):
check = random_compact(sconf, gconf, rnd_seed, max_rad) check = random_compact(sconf, gconf, rnd_seed, max_rad)
if (check == 1): if (check == -1):
print("ERROR: compact random generator works only when all sphere types have the same radius.") print("ERROR: compact random generator works only when all sphere types have the same radius.")
return (None, None) return (None, None)
elif (check == -2):
print("ERROR: sub-particle radius larger than particle radius.")
return (None, None)
elif (check == -3):
print("ERROR: requested number of spheres cannot fit in allowed volume.")
return (None, None)
elif (rnd_engine == "LOOSE"): elif (rnd_engine == "LOOSE"):
# random_aggregate() checks internally whether application is INCLUSION # random_aggregate() checks internally whether application is INCLUSION
check = random_aggregate(sconf, gconf, rnd_seed, max_rad) check = random_aggregate(sconf, gconf, rnd_seed, max_rad)
else: else:
print("ERROR: unrecognized random generator engine.") print("ERROR: unrecognized random generator engine.")
return (None, None) return (None, None)
if (check != 0): if (check != sconf['nsph']):
print("WARNING: %d sphere(s) could not be placed."%check) print("WARNING: placed only %d out of %d requested spheres."%(check, sconf['nsph']))
sconf['nsph'] = check
gconf['nsph'] = check
else: else:
if (len(model['geometry_settings']['x_coords']) != gconf['nsph']): if (len(model['geometry_settings']['x_coords']) != gconf['nsph']):
print("ERROR: coordinate vectors do not match the number of spheres!") print("ERROR: coordinate vectors do not match the number of spheres!")
...@@ -588,15 +596,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): ...@@ -588,15 +596,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
vec_thetas = [0.0 for i in range(nsph)] vec_thetas = [0.0 for i in range(nsph)]
vec_phis = [0.0 for i in range(nsph)] vec_phis = [0.0 for i in range(nsph)]
vec_rads = [0.0 for i in range(nsph)] vec_rads = [0.0 for i in range(nsph)]
vec_types = []
n_types = scatterer['configurations'] n_types = scatterer['configurations']
if (0 in scatterer['vec_types']): if (0 in scatterer['vec_types']):
tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 tincrement = 1 if scatterer['application'] != "INCLUSION" else 2
for ti in range(nsph): for ti in range(nsph):
itype = tincrement + int(n_types * random.random()) itype = tincrement + int(n_types * random.random())
scatterer['vec_types'][ti] = itype scatterer['vec_types'][ti] = itype
if (scatterer['application'] == "INCLUSION"):
scatterer['vec_types'][0] = 1
sph_type_index = scatterer['vec_types'][0] - 1 sph_type_index = scatterer['vec_types'][0] - 1
vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}]
vec_rads[0] = scatterer['ros'][sph_type_index] vec_rads[0] = scatterer['ros'][sph_type_index]
vec_types.append(sph_type_index + 1)
placed_spheres = 1 placed_spheres = 1
attempts = 0 attempts = 0
for i in range(1, nsph): for i in range(1, nsph):
...@@ -670,7 +682,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): ...@@ -670,7 +682,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
}) })
is_placed = True is_placed = True
placed_spheres += 1 placed_spheres += 1
vec_types.append(sph_type_index + 1)
attempts = 0 attempts = 0
scatterer['vec_types'] = vec_types
sph_index = 0 sph_index = 0
for sphere in sorted(vec_spheres, key=lambda item: item['itype']): for sphere in sorted(vec_spheres, key=lambda item: item['itype']):
scatterer['vec_types'][sph_index] = sphere['itype'] scatterer['vec_types'][sph_index] = sphere['itype']
...@@ -678,6 +692,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): ...@@ -678,6 +692,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100):
geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_y'][sph_index] = sphere['y']
geometry['vec_sph_z'][sph_index] = sphere['z'] geometry['vec_sph_z'][sph_index] = sphere['z']
sph_index += 1 sph_index += 1
result = placed_spheres
return result return result
## \brief Generate a random compact cluster from YAML configuration options. ## \brief Generate a random compact cluster from YAML configuration options.
...@@ -700,45 +715,30 @@ def random_compact(scatterer, geometry, seed, max_rad): ...@@ -700,45 +715,30 @@ def random_compact(scatterer, geometry, seed, max_rad):
random.seed(seed) random.seed(seed)
nsph = scatterer['nsph'] nsph = scatterer['nsph']
n_types = scatterer['configurations'] n_types = scatterer['configurations']
if (0 in scatterer['vec_types']): radius = scatterer['ros'][0]
tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 # Return an error code if types have different radii
for ti in range(nsph):
itype = tincrement + int(n_types * random.random())
scatterer['vec_types'][ti] = itype
if (max(scatterer['ros']) != min(scatterer['ros'])): if (max(scatterer['ros']) != min(scatterer['ros'])):
result = 1 result = -1
elif (radius > max_rad):
# Requested spheres are larger than the maximum allowed volume.
# End function with error code -2.
result = -2
else: else:
radius = scatterer['ros'][0] x_centers = np.arange(-1.0 * max_rad + 2.0 * radius, max_rad, 2.0 * radius)
x_centers = np.arange(-1.0 * max_rad + radius, max_rad, 2.0 * radius) x_size = len(x_centers)
y_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) y_size = int(2.0 * max_rad / ((1.0 + math.sqrt(3.0) / 3.0) * radius))
z_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) z_size = int(2.0 * max_rad / ((1.0 + 2.0 * math.sqrt(6.0) / 3.0) * radius))
x_offset = radius
y_offset = radius
x_layer_offset = radius
y_layer_offset = radius / math.sqrt(3.0)
tmp_spheres = [] tmp_spheres = []
n_cells = len(x_centers) * len(y_centers) * len(z_centers) n_cells = x_size * y_size * z_size
print("INFO: the cubic space would contain %d spheres."%n_cells) print("INFO: the cubic space would contain %d spheres."%n_cells)
n_max_spheres = int((max_rad / radius) * (max_rad / radius) * (max_rad / radius) * 0.74) k = 0
print("INFO: the maximum radius allows for %d spheres."%n_max_spheres) z = -max_rad + radius
for zi in range(len(z_centers)): while (z < max_rad - radius):
if (x_layer_offset == 0.0): j = 0
x_layer_offset = radius y = -max_rad + radius
else: while (y < max_rad - radius):
x_layer_offset = 0.0 for i in range(len(x_centers)):
if (y_offset == 0.0): x = (2 * (i + 1) + (j + k) % 2) * radius - max_rad
y_offset = radius
else:
y_offset = 0.0
for yi in range(len(y_centers)):
if (x_offset == 0.0):
x_offset = radius
else:
x_offset = 0.0
for xi in range(len(x_centers)):
x = x_centers[xi] + x_offset + x_layer_offset
y = y_centers[yi] + y_offset
z = z_centers[zi]
extent = radius + math.sqrt(x * x + y * y + z * z) extent = radius + math.sqrt(x * x + y * y + z * z)
if (extent < max_rad): if (extent < max_rad):
tmp_spheres.append({ tmp_spheres.append({
...@@ -747,6 +747,11 @@ def random_compact(scatterer, geometry, seed, max_rad): ...@@ -747,6 +747,11 @@ def random_compact(scatterer, geometry, seed, max_rad):
'y': y, 'y': y,
'z': z 'z': z
}) })
#
j += 1
y = math.sqrt(3.0) * (j + (k % 2) / 3.0) * radius - max_rad + radius
k += 1
z = 2.0 / 3.0 * math.sqrt(6.0) * k * radius - max_rad + radius
#tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] #tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}]
current_n = len(tmp_spheres) current_n = len(tmp_spheres)
print("INFO: before erosion there are %d spheres in use."%current_n) print("INFO: before erosion there are %d spheres in use."%current_n)
...@@ -778,20 +783,20 @@ def random_compact(scatterer, geometry, seed, max_rad): ...@@ -778,20 +783,20 @@ def random_compact(scatterer, geometry, seed, max_rad):
current_n -= 1 current_n -= 1
vec_spheres = [] vec_spheres = []
sph_index = 0 sph_index = 0
# Generate a vector of types if none is given
if (0 in scatterer['vec_types']):
tincrement = 1 if scatterer['application'] != "INCLUSION" else 2
for ti in range(current_n):
itype = tincrement + int(n_types * random.random())
scatterer['vec_types'][ti] = itype
if (scatterer['application'] == "INCLUSION"):
scatterer['vec_types'][0] = 1
for ti in range(len(tmp_spheres)): for ti in range(len(tmp_spheres)):
sphere = tmp_spheres[ti] sphere = tmp_spheres[ti]
if (sphere['x'] < max_rad): if (sphere['x'] < max_rad):
sphere['itype'] = scatterer['vec_types'][sph_index] sphere['itype'] = scatterer['vec_types'][sph_index]
sph_index += 1 sph_index += 1
vec_spheres.append(sphere) vec_spheres.append(sphere)
#pl = pv.Plotter()
#for si in range(len(vec_spheres)):
# x = vec_spheres[si]['x'] / max_rad
# y = vec_spheres[si]['y'] / max_rad
# z = vec_spheres[si]['z'] / max_rad
# mesh = pv.Sphere(radius / max_rad, (x, y, z))
# pl.add_mesh(mesh)
#pl.export_obj("scene.obj")
sph_index = 0 sph_index = 0
for sphere in sorted(vec_spheres, key=lambda item: item['itype']): for sphere in sorted(vec_spheres, key=lambda item: item['itype']):
scatterer['vec_types'][sph_index] = sphere['itype'] scatterer['vec_types'][sph_index] = sphere['itype']
...@@ -799,7 +804,7 @@ def random_compact(scatterer, geometry, seed, max_rad): ...@@ -799,7 +804,7 @@ def random_compact(scatterer, geometry, seed, max_rad):
geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_y'][sph_index] = sphere['y']
geometry['vec_sph_z'][sph_index] = sphere['z'] geometry['vec_sph_z'][sph_index] = sphere['z']
sph_index += 1 sph_index += 1
return result return current_n
## \brief Write the geometry configuration dictionary to legacy format. ## \brief Write the geometry configuration dictionary to legacy format.
# #
......
...@@ -576,6 +576,12 @@ int sphere_jxi488_cycle( ...@@ -576,6 +576,12 @@ int sphere_jxi488_cycle(
oi->vec_vk[jxindex] = vk; oi->vec_vk[jxindex] = vk;
oi->vec_xi[jxindex] = xi; oi->vec_xi[jxindex] = xi;
} }
// Adaptive definition of L_MAX
double wavelength = 2.0 * pi / vk;
double size_param = 2.0 * pi * sconf->get_radius(0) / wavelength;
int N = int(size_param + 4.05 * pow(size_param, 1.0 / 3.0)) + 2;
if (N < l_max) l_max = N;
// End of adaptive definition of L_MAX
vtppoanp->append_line(VirtualBinaryLine(vk)); vtppoanp->append_line(VirtualBinaryLine(vk));
double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0; double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0;
for (int i132 = 0; i132 < nsph; i132++) { for (int i132 = 0; i132 < nsph; i132++) {
......
...@@ -77,7 +77,6 @@ void frfme(string data_file, string output_path) { ...@@ -77,7 +77,6 @@ void frfme(string data_file, string output_path) {
Swap2 *tt2 = NULL; Swap2 *tt2 = NULL;
char namef[7]; char namef[7];
char more; char more;
dcomplex **w = NULL;
dcomplex *wk = NULL; dcomplex *wk = NULL;
const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex cc0 = 0.0 + 0.0 * I;
const dcomplex uim = 0.0 + 1.0 * I; const dcomplex uim = 0.0 + 1.0 * I;
...@@ -105,6 +104,9 @@ void frfme(string data_file, string output_path) { ...@@ -105,6 +104,9 @@ void frfme(string data_file, string output_path) {
int wsum_size; int wsum_size;
// End of vector size variables // End of vector size variables
if (jlmf != 1) { if (jlmf != 1) {
#ifdef USE_NVTX
nvtxRangePush("frfme() with jlmf != 1");
#endif
int nxv, nyv, nzv; int nxv, nyv, nzv;
if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5");
if (tfrfme != NULL) { if (tfrfme != NULL) {
...@@ -147,7 +149,16 @@ void frfme(string data_file, string output_path) { ...@@ -147,7 +149,16 @@ void frfme(string data_file, string output_path) {
printf("ERROR: could not open TFRFME file.\n"); printf("ERROR: could not open TFRFME file.\n");
} }
nks = nkv - 1; nks = nkv - 1;
} else { // label 16 #ifdef USE_NVTX
nvtxRangePop();
#endif
} else { // label 16; jlfm = 1
#ifdef USE_NVTX
nvtxRangePush("frfme() with jlmf == 1");
#endif
#ifdef USE_NVTX
nvtxRangePush("Setup operations");
#endif
int nksh, nrsh, nxsh, nysh, nzsh; int nksh, nrsh, nxsh, nysh, nzsh;
str_target = file_lines[last_read_line++]; str_target = file_lines[last_read_line++];
for (int cli = 0; cli < 7; cli++) { for (int cli = 0; cli < 7; cli++) {
...@@ -183,6 +194,9 @@ void frfme(string data_file, string output_path) { ...@@ -183,6 +194,9 @@ void frfme(string data_file, string output_path) {
} }
str_target = file_lines[last_read_line++]; str_target = file_lines[last_read_line++];
re = regex("[eEmM]"); re = regex("[eEmM]");
#ifdef USE_NVTX
nvtxRangePop();
#endif
if (regex_search(str_target, m, re)) { if (regex_search(str_target, m, re)) {
more = m.str().at(0); more = m.str().at(0);
if (more == 'm' || more == 'M') { if (more == 'm' || more == 'M') {
...@@ -200,6 +214,9 @@ void frfme(string data_file, string output_path) { ...@@ -200,6 +214,9 @@ void frfme(string data_file, string output_path) {
string tedf_name = output_path + "/" + namef + ".hd5"; string tedf_name = output_path + "/" + namef + ".hd5";
ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5");
if (tedf != NULL) { if (tedf != NULL) {
#ifdef USE_NVTX
nvtxRangePush("TEDF data import");
#endif
int iduml, idum; int iduml, idum;
iduml = tedf->number_of_spheres; iduml = tedf->number_of_spheres;
idum = tedf->get_iog(iduml - 1); idum = tedf->get_iog(iduml - 1);
...@@ -223,6 +240,9 @@ void frfme(string data_file, string output_path) { ...@@ -223,6 +240,9 @@ void frfme(string data_file, string output_path) {
xi = xip; xi = xip;
} }
// label 20 // label 20
#ifdef USE_NVTX
nvtxRangePop();
#endif
delete tedf; delete tedf;
double wn = wp / 3.0e8; double wn = wp / 3.0e8;
vk = xi * wn; vk = xi * wn;
...@@ -243,6 +263,9 @@ void frfme(string data_file, string output_path) { ...@@ -243,6 +263,9 @@ void frfme(string data_file, string output_path) {
fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy);
} }
// label 22 // label 22
#ifdef USE_NVTX
nvtxRangePush("Memory data loading");
#endif
nlmmt = lm * (lm + 2) * 2; nlmmt = lm * (lm + 2) * 2;
nks = nksh * 2; nks = nksh * 2;
nkv = nks + 1; nkv = nks + 1;
...@@ -286,6 +309,12 @@ void frfme(string data_file, string output_path) { ...@@ -286,6 +309,12 @@ void frfme(string data_file, string output_path) {
double *_yv = tfrfme->get_y(); double *_yv = tfrfme->get_y();
double *_zv = tfrfme->get_z(); double *_zv = tfrfme->get_z();
dcomplex **_wsum = tfrfme->get_matrix(); dcomplex **_wsum = tfrfme->get_matrix();
#ifdef USE_NVTX
nvtxRangePop();
#endif
#ifdef USE_NVTX
nvtxRangePush("Looped vector initialization");
#endif
for (int i24 = nxshpo; i24 <= nxs; i24++) { for (int i24 = nxshpo; i24 <= nxs; i24++) {
_xv[i24] = _xv[i24 - 1] + delxyz; _xv[i24] = _xv[i24 - 1] + delxyz;
_xv[nxv - i24 - 1] = -_xv[i24]; _xv[nxv - i24 - 1] = -_xv[i24];
...@@ -304,7 +333,13 @@ void frfme(string data_file, string output_path) { ...@@ -304,7 +333,13 @@ void frfme(string data_file, string output_path) {
vkv[i28] = vkv[i28 - 1] + delk; vkv[i28] = vkv[i28 - 1] + delk;
vkv[nkv - i28 - 1] = -vkv[i28]; vkv[nkv - i28 - 1] = -vkv[i28];
} // i28 loop } // i28 loop
#ifdef USE_NVTX
nvtxRangePop();
#endif
if (tfrfme != NULL) { if (tfrfme != NULL) {
#ifdef USE_NVTX
nvtxRangePush("TFRFME initialization");
#endif
tfrfme->set_param("vk", vk); tfrfme->set_param("vk", vk);
tfrfme->set_param("exri", exri); tfrfme->set_param("exri", exri);
tfrfme->set_param("an", an); tfrfme->set_param("an", an);
...@@ -336,19 +371,20 @@ void frfme(string data_file, string output_path) { ...@@ -336,19 +371,20 @@ void frfme(string data_file, string output_path) {
tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nlmmt", 1.0 * nlmmt);
tt2->set_param("nrvc", 1.0 * nrvc); tt2->set_param("nrvc", 1.0 * nrvc);
tt2->write_binary(temp_name2, "HDF5"); tt2->write_binary(temp_name2, "HDF5");
#ifdef USE_NVTX
nvtxRangePop();
#endif
dcomplex *vec_w = new dcomplex[nkv * nkv]();
dcomplex **w = new dcomplex*[nkv];
for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
#ifdef USE_NVTX
nvtxRangePush("j80 loop");
#endif
for (int j80 = jlmf; j80 <= jlml; j80++) { for (int j80 = jlmf; j80 <= jlml; j80++) {
dcomplex *tt1_wk = tt1->get_vector();
int wk_index = 0; int wk_index = 0;
// w matrix
if (w != NULL) {
for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
delete[] w;
}
w = new dcomplex*[nkv];
for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv]();
for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jy50 = 0; jy50 < nkv; jy50++) {
for (int jx50 = 0; jx50 < nkv; jx50++) { for (int jx50 = 0; jx50 < nkv; jx50++) {
for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++]; for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++];
w[jx50][jy50] = wk[j80 - 1]; w[jx50][jy50] = wk[j80 - 1];
} // jx50 } // jx50
} // jy50 loop } // jy50 loop
...@@ -384,7 +420,15 @@ void frfme(string data_file, string output_path) { ...@@ -384,7 +420,15 @@ void frfme(string data_file, string output_path) {
} // iy70 loop } // iy70 loop
} // iz75 loop } // iz75 loop
} // j80 loop } // j80 loop
delete[] vec_w;
delete[] w;
#ifdef USE_NVTX
nvtxRangePop();
#endif
// label 88 // label 88
#ifdef USE_NVTX
nvtxRangePush("Closing operations");
#endif
tfrfme->write_binary(tfrfme_name, "HDF5"); tfrfme->write_binary(tfrfme_name, "HDF5");
string output_name = output_path + "/c_OFRFME"; string output_name = output_path + "/c_OFRFME";
FILE *output = fopen(output_name.c_str(), "w"); FILE *output = fopen(output_name.c_str(), "w");
...@@ -393,6 +437,9 @@ void frfme(string data_file, string output_path) { ...@@ -393,6 +437,9 @@ void frfme(string data_file, string output_path) {
if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx);
fprintf(output, " FRSH =%15.7lE\n", frsh); fprintf(output, " FRSH =%15.7lE\n", frsh);
fclose(output); fclose(output);
#ifdef USE_NVTX
nvtxRangePop();
#endif
} else { // Should never happen. } else { // Should never happen.
printf("ERROR: could not open TFRFME file for output.\n"); printf("ERROR: could not open TFRFME file for output.\n");
} }
...@@ -405,17 +452,22 @@ void frfme(string data_file, string output_path) { ...@@ -405,17 +452,22 @@ void frfme(string data_file, string output_path) {
fprintf(output, " WRONG INPUT TAPE\n"); fprintf(output, " WRONG INPUT TAPE\n");
fclose(output); fclose(output);
} }
#ifdef USE_NVTX
nvtxRangePop();
#endif
} }
// label 45 // label 45
#ifdef USE_NVTX
nvtxRangePush("frfme() memory clean");
#endif
if (tfrfme != NULL) delete tfrfme; if (tfrfme != NULL) delete tfrfme;
delete[] file_lines; delete[] file_lines;
if (tt2 != NULL) delete tt2; if (tt2 != NULL) delete tt2;
if (w != NULL) {
for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
delete[] w;
}
if (wk != NULL) delete[] wk; if (wk != NULL) delete[] wk;
if (tt1 != NULL) delete tt1; if (tt1 != NULL) delete tt1;
#ifdef USE_NVTX
nvtxRangePop();
#endif
printf("FRFME: Done.\n"); printf("FRFME: Done.\n");
#ifdef USE_NVTX #ifdef USE_NVTX
nvtxRangePop(); nvtxRangePop();
......
...@@ -56,6 +56,10 @@ ...@@ -56,6 +56,10 @@
#include "../include/tra_subs.h" #include "../include/tra_subs.h"
#endif #endif
#ifdef USE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif
using namespace std; using namespace std;
/*! \brief C++ implementation of LFFFT /*! \brief C++ implementation of LFFFT
...@@ -64,6 +68,9 @@ using namespace std; ...@@ -64,6 +68,9 @@ using namespace std;
* \param output_path: `string` Directory to write the output files in. * \param output_path: `string` Directory to write the output files in.
*/ */
void lffft(string data_file, string output_path) { void lffft(string data_file, string output_path) {
#ifdef USE_NVTX
nvtxRangePush("Running lffft()");
#endif
const dcomplex uim = 0.0 + 1.0 * I; const dcomplex uim = 0.0 + 1.0 * I;
const double sq2i = 1.0 / sqrt(2.0); const double sq2i = 1.0 / sqrt(2.0);
const dcomplex sq2iti = sq2i * uim; const dcomplex sq2iti = sq2i * uim;
...@@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) { ...@@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) {
delete ccr; delete ccr;
delete[] file_lines; delete[] file_lines;
printf("LFFT: Done.\n"); printf("LFFT: Done.\n");
#ifdef USE_NVTX
nvtxRangePop();
#endif
} }