diff --git a/build/configure.sh b/build/configure.sh index 191dd7c2f91d75422b578361f9923c162fec5e67..161ddb933460e7f32952856381a652c2a5c86c25 100755 --- a/build/configure.sh +++ b/build/configure.sh @@ -826,7 +826,7 @@ else fi # End of offload checks if [ "x$CXXFLAGS" = "x" ]; then - CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}" + CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}${NVTXFLAGS}" fi if [ "x$CXXLDFLAGS" = "x" ]; then if [ "x$LIBMODE" = "xstatic" ]; then diff --git a/src/include/tfrfme.h b/src/include/tfrfme.h index be6ac56252d6f7b63241af2b8a60119fe0b00cbf..ded5377a19f0a4879e57e3733601648b4db12913 100644 --- a/src/include/tfrfme.h +++ b/src/include/tfrfme.h @@ -63,6 +63,9 @@ protected: void write_legacy(const std::string& file_name); public: + //! \brief Read only view on WK. + const dcomplex *vec_wk; + /*! \brief Swap1 instance constructor. * * \param lm: `int` Maximum field expansion order. @@ -97,12 +100,6 @@ public: */ static long get_memory_requirement(int lm, int _nkv); - /*! \brief Get the pointer to the WK vector. - * - * \return value: `complex double *` Memory address of the WK vector. - */ - dcomplex *get_vector() { return wk; } - /*! \brief Bring the pointer to the next element at the start of vector. */ void reset() { last_index = 0; } diff --git a/src/libnptm/tfrfme.cpp b/src/libnptm/tfrfme.cpp index df999983f83f4b1e855fa23b9990c2500998cf33..2b81215780af3610fd3c535a49326a5663780823 100644 --- a/src/libnptm/tfrfme.cpp +++ b/src/libnptm/tfrfme.cpp @@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) { nlmmt = 2 * lm * (lm + 2); const int size = nkv * nkv * nlmmt; wk = new dcomplex[size](); + vec_wk = wk; last_index = 0; } @@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { string str_type; int _nlmmt, _nkv, lm, num_elements, index; dcomplex value; - dcomplex *_wk = NULL; if (status == 0) { status = hdf_file->read("NLMMT", "INT32", &_nlmmt); status = hdf_file->read("NKV", "INT32", &_nkv); - lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); + lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; num_elements = 2 * _nlmmt * _nkv * _nkv; instance = new Swap1(lm, _nkv); - _wk = instance->get_vector(); elements = new double[num_elements](); str_type = "FLOAT64_(" + to_string(num_elements) + ")"; status = hdf_file->read("WK", str_type, elements); for (int wi = 0; wi < num_elements / 2; wi++) { index = 2 * wi; value = elements[index] + elements[index + 1] * I; - _wk[wi] = value; + instance->wk[wi] = value; } // wi loop delete[] elements; status = hdf_file->close(); @@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { Swap1* Swap1::from_legacy(const std::string& file_name) { fstream input; Swap1 *instance = NULL; - dcomplex *_wk = NULL; int _nlmmt, _nkv, lm; double rval, ival; input.open(file_name.c_str(), ios::in | ios::binary); if (input.is_open()) { input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int)); - lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); + lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; input.read(reinterpret_cast<char *>(&_nkv), sizeof(int)); instance = new Swap1(lm, _nkv); - _wk = instance->get_vector(); int num_elements = _nlmmt * _nkv * _nkv; for (int j = 0; j < num_elements; j++) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); - _wk[j] = rval + ival * I; + instance->wk[j] = rval + ival * I; } input.close(); } else { diff --git a/src/libnptm/tra_subs.cpp b/src/libnptm/tra_subs.cpp index 8d04f4479c86d72407a7f3b836864425e3d0abdd..59496de9b3d642b013c36d394db50e5989ce9017 100644 --- a/src/libnptm/tra_subs.cpp +++ b/src/libnptm/tra_subs.cpp @@ -269,9 +269,9 @@ void ffrt(dcomplex *ac, dcomplex *ws, double *ffte, double *ffts, CIL *cil) { } dcomplex *frfmer( - int nkv, double vkm, double vknmx, double apfafa, double tra, - double spd, double rir, double ftcn, int le, int lmode, double pmf, - Swap1 *tt1, Swap2 *tt2 + int nkv, double vkm, double vknmx, double apfafa, double tra, + double spd, double rir, double ftcn, int le, int lmode, double pmf, + Swap1 *tt1, Swap2 *tt2 ) { const int nlemt = le * (le + 2) * 2; const dcomplex cc0 = 0.0 + 0.0 * I; diff --git a/src/scripts/model_maker.py b/src/scripts/model_maker.py index 6e2d3d50e20f538a528ba64cf1850ebda9ab21b2..4475a826aafa9a2cf053ac9c73ce94c59a7c7996 100755 --- a/src/scripts/model_maker.py +++ b/src/scripts/model_maker.py @@ -31,6 +31,7 @@ import pdb import random import yaml +## \brief 3D software generation capability flag. allow_3d = True try: import pyvista as pv @@ -46,7 +47,7 @@ from sys import argv # `main()` is the function that handles the creation of the code configuration. # It returns an integer value as exit code, using 0 to signal successful execution. # -# \returns result: `int` Number of detected error-level inconsistencies. +# \returns result: `int` Exit code (0 = SUCCESS). def main(): result = 0 config = parse_arguments() @@ -68,63 +69,69 @@ def main(): # \return result: `int` An exit code (0 if successful). def interpolate_constants(sconf): result = 0 - for i in range(sconf['configurations']): - for j in range(sconf['nshl'][i]): - file_idx = sconf['dielec_id'][i][j] - dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) - file_name = str(dielec_path) - dielec_file = open(file_name, 'r') - wavelengths = [] - rpart = [] - ipart = [] - str_line = dielec_file.readline() - while (str_line != ""): - if (not str_line.startswith('#')): - split_line = str_line.split(',') - if (len(split_line) == 3): - wavelengths.append(float(split_line[0])) - rpart.append(float(split_line[1])) - ipart.append(float(split_line[2])) + err_arg = "" + try: + for i in range(sconf['configurations']): + for j in range(sconf['nshl'][i]): + file_idx = sconf['dielec_id'][i][j] + dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) + err_arg = str(dielec_path) + file_name = err_arg + dielec_file = open(file_name, 'r') + wavelengths = [] + rpart = [] + ipart = [] str_line = dielec_file.readline() - dielec_file.close() - wi = 0 - x0 = 0.0 - x1 = 0.0 - ry0 = 0.0 - iy0 = 0.0 - ry1 = 0.0 - iy1 = 0.0 - for dci in range(sconf['nxi']): - w = sconf['vec_xi'][dci] - while (w > x1): - x0 = wavelengths[wi] - ry0 = rpart[wi] - iy0 = ipart[wi] - if (wi == len(wavelengths)): - print("ERROR: file %s does not cover requested wavelengths!"%file_name) - return 1 - wi += 1 - x1 = wavelengths[wi] - ry1 = rpart[wi] - iy1 = ipart[wi] - if (wi > 0): - x0 = wavelengths[wi - 1] - ry0 = rpart[wi - 1] - iy0 = ipart[wi - 1] - dx = w - x0 - dry = (ry1 - ry0) / (x1 - x0) * dx - diy = (iy1 - iy0) / (x1 - x0) * dx - ry = ry0 + dry - iy = iy0 + diy - sconf['rdc0'][j][i][dci] = ry - sconf['idc0'][j][i][dci] = iy - else: - if (wavelengths[wi] == w): - sconf['rdc0'][j][i][dci] = rpart[0] - sconf['idc0'][j][i][dci] = ipart[0] + while (str_line != ""): + if (not str_line.startswith('#')): + split_line = str_line.split(',') + if (len(split_line) == 3): + wavelengths.append(float(split_line[0])) + rpart.append(float(split_line[1])) + ipart.append(float(split_line[2])) + str_line = dielec_file.readline() + dielec_file.close() + wi = 0 + x0 = 0.0 + x1 = 0.0 + ry0 = 0.0 + iy0 = 0.0 + ry1 = 0.0 + iy1 = 0.0 + for dci in range(sconf['nxi']): + w = sconf['vec_xi'][dci] + while (w > x1): + x0 = wavelengths[wi] + ry0 = rpart[wi] + iy0 = ipart[wi] + if (wi == len(wavelengths)): + print("ERROR: file %s does not cover requested wavelengths!"%file_name) + return 1 + wi += 1 + x1 = wavelengths[wi] + ry1 = rpart[wi] + iy1 = ipart[wi] + if (wi > 0): + x0 = wavelengths[wi - 1] + ry0 = rpart[wi - 1] + iy0 = ipart[wi - 1] + dx = w - x0 + dry = (ry1 - ry0) / (x1 - x0) * dx + diy = (iy1 - iy0) / (x1 - x0) * dx + ry = ry0 + dry + iy = iy0 + diy + sconf['rdc0'][j][i][dci] = ry + sconf['idc0'][j][i][dci] = iy else: - print("ERROR: file %s does not cover requested wavelengths!"%file_name) - return 2 + if (wavelengths[wi] == w): + sconf['rdc0'][j][i][dci] = rpart[0] + sconf['idc0'][j][i][dci] = ipart[0] + else: + print("ERROR: file %s does not cover requested wavelengths!"%file_name) + return 2 + except FileNotFoundError as ex: + print("ERROR: file not found %s!"%err_arg) + return 3 return result ## \brief Create tha calculation configuration structure from YAML input. @@ -261,12 +268,16 @@ def load_model(model_file): [0.0 for k in range(sconf['nxi'])] for j in range(sconf['configurations']) ] for i in range(max_layers) ] - interpolate_constants(sconf) + check = interpolate_constants(sconf) + if (check != 0): + return (None, None) else: # sconf[idfc] != 0 and scaling on wavelength print("ERROR: for wavelength scaling, optical constants must be tabulated!") return (None, None) elif (model['material_settings']['match_mode'] == "GRID"): - match_grid(sconf) + check = match_grid(sconf) + if (check != 0): + return(None, None) else: print("ERROR: %s is not a recognized match mode!"%(model['material_settings']['match_mode'])) return (None, None) @@ -384,17 +395,25 @@ def load_model(model_file): rnd_engine = "COMPACT" if (rnd_engine == "COMPACT"): check = random_compact(sconf, gconf, rnd_seed, max_rad) - if (check == 1): + if (check == -1): print("ERROR: compact random generator works only when all sphere types have the same radius.") return (None, None) + elif (check == -2): + print("ERROR: sub-particle radius larger than particle radius.") + return (None, None) + elif (check == -3): + print("ERROR: requested number of spheres cannot fit in allowed volume.") + return (None, None) elif (rnd_engine == "LOOSE"): # random_aggregate() checks internally whether application is INCLUSION check = random_aggregate(sconf, gconf, rnd_seed, max_rad) else: print("ERROR: unrecognized random generator engine.") return (None, None) - if (check != 0): - print("WARNING: %d sphere(s) could not be placed."%check) + if (check != sconf['nsph']): + print("WARNING: placed only %d out of %d requested spheres."%(check, sconf['nsph'])) + sconf['nsph'] = check + gconf['nsph'] = check else: if (len(model['geometry_settings']['x_coords']) != gconf['nsph']): print("ERROR: coordinate vectors do not match the number of spheres!") @@ -409,13 +428,16 @@ def load_model(model_file): write_obj(sconf, gconf, max_rad) try: max_gpu_ram = int(model['system_settings']['max_gpu_ram']) + matrix_dim = 2 * gconf['nsph'] * gconf['li'] * (gconf['li'] + 2) + matrix_size_bytes = 16 * matrix_dim * matrix_dim + matrix_size_Gb = float(matrix_size_bytes) / 1024.0 / 1024.0 / 1024.0 + print("INFO: estimated matrix size is {0:.3g} Gb.".format(matrix_size_Gb)) if (max_gpu_ram > 0): max_gpu_ram_bytes = max_gpu_ram * 1024 * 1024 * 1024 - matrix_dim = 2 * gconf['nsph'] * gconf['li'] * (gconf['li'] + 2) - matrix_size_bytes = 16 * matrix_dim * matrix_dim if (matrix_size_bytes < max_gpu_ram_bytes): max_gpu_processes = int(max_gpu_ram_bytes / matrix_size_bytes) print("INFO: system supports up to %d simultaneous processes on GPU."%max_gpu_processes) + print("INFO: only %d GPU processes allowed, if using refinement."%(max_gpu_processes / 3)) else: print("WARNING: estimated matrix size is larger than available GPU memory!") else: @@ -454,72 +476,78 @@ def match_grid(sconf): max_layers = 0 nxi = 0 sconf['vec_xi'] = [] - for i in range(sconf['configurations']): - layers = sconf['nshl'][i] - if (sconf['application'] == "INCLUSION" and i == 0): - layers += 1 - for j in range(layers): - file_idx = sconf['dielec_id'][i][j] - dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) - file_name = str(dielec_path) - dielec_file = open(file_name, 'r') - wavelengths = [] - rpart = [] - ipart = [] - str_line = dielec_file.readline() - while (str_line != ""): - if (not str_line.startswith('#')): - split_line = str_line.split(',') - if (len(split_line) == 3): - wavelengths.append(float(split_line[0])) - rpart.append(float(split_line[1])) - ipart.append(float(split_line[2])) + err_arg = "" + try: + for i in range(sconf['configurations']): + layers = sconf['nshl'][i] + if (sconf['application'] == "INCLUSION" and i == 0): + layers += 1 + for j in range(layers): + file_idx = sconf['dielec_id'][i][j] + dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) + err_arg = str(dielec_path) + file_name = err_arg + dielec_file = open(file_name, 'r') + wavelengths = [] + rpart = [] + ipart = [] str_line = dielec_file.readline() - dielec_file.close() - if (max_layers == 0): - # This is executed only once - max_layers = max(sconf['nshl']) - if (sconf['application'] == "INCLUSION" and max_layers < sconf['nshl'][0] + 1): - max_layers = sconf['nshl'][0] + 1 - w_start = sconf['xi_start'] - w_end = sconf['xi_end'] - for wi in range(len(wavelengths)): - w = wavelengths[wi] - if (w >= w_start and w <= w_end): - sconf['vec_xi'].append(w) - nxi += 1 - sconf['rdc0'] = [ - [ + while (str_line != ""): + if (not str_line.startswith('#')): + split_line = str_line.split(',') + if (len(split_line) == 3): + wavelengths.append(float(split_line[0])) + rpart.append(float(split_line[1])) + ipart.append(float(split_line[2])) + str_line = dielec_file.readline() + dielec_file.close() + if (max_layers == 0): + # This is executed only once + max_layers = max(sconf['nshl']) + if (sconf['application'] == "INCLUSION" and max_layers < sconf['nshl'][0] + 1): + max_layers = sconf['nshl'][0] + 1 + w_start = sconf['xi_start'] + w_end = sconf['xi_end'] + for wi in range(len(wavelengths)): + w = wavelengths[wi] + if (w >= w_start and w <= w_end): + sconf['vec_xi'].append(w) + nxi += 1 + sconf['rdc0'] = [ [ - 0.0 for dk in range(nxi) - ] for dj in range(sconf['configurations']) - ] for di in range(max_layers) - ] - sconf['idc0'] = [ - [ + [ + 0.0 for dk in range(nxi) + ] for dj in range(sconf['configurations']) + ] for di in range(max_layers) + ] + sconf['idc0'] = [ [ - 0.0 for dk in range(nxi) - ] for dj in range(sconf['configurations']) - ] for di in range(max_layers) - ] - sconf['nxi'] = nxi - # This is executed for all layers in all configurations - wi = 0 - x = wavelengths[wi] - ry = rpart[wi] - iy = ipart[wi] - for dci in range(sconf['nxi']): - w = sconf['vec_xi'][dci] - while (w > x): - x = wavelengths[wi] - ry = rpart[wi] - iy = ipart[wi] - if (wi == len(wavelengths)): - print("ERROR: file %s does not cover requested wavelengths!"%file_name) - return 1 - wi += 1 - sconf['rdc0'][j][i][dci] = ry - sconf['idc0'][j][i][dci] = iy + [ + 0.0 for dk in range(nxi) + ] for dj in range(sconf['configurations']) + ] for di in range(max_layers) + ] + sconf['nxi'] = nxi + # This is executed for all layers in all configurations + wi = 0 + x = wavelengths[wi] + ry = rpart[wi] + iy = ipart[wi] + for dci in range(sconf['nxi']): + w = sconf['vec_xi'][dci] + while (w > x): + x = wavelengths[wi] + ry = rpart[wi] + iy = ipart[wi] + if (wi == len(wavelengths)): + print("ERROR: file %s does not cover requested wavelengths!"%file_name) + return 1 + wi += 1 + sconf['rdc0'][j][i][dci] = ry + sconf['idc0'][j][i][dci] = iy + except FileNotFoundError as ex: + print("ERROR: file not found %s!"%err_arg) + return 3 return result ## \brief Parse the command line arguments. @@ -587,15 +615,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): vec_thetas = [0.0 for i in range(nsph)] vec_phis = [0.0 for i in range(nsph)] vec_rads = [0.0 for i in range(nsph)] + vec_types = [] n_types = scatterer['configurations'] if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(nsph): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype + if (scatterer['application'] == "INCLUSION"): + scatterer['vec_types'][0] = 1 sph_type_index = scatterer['vec_types'][0] - 1 vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] vec_rads[0] = scatterer['ros'][sph_type_index] + vec_types.append(sph_type_index + 1) placed_spheres = 1 attempts = 0 for i in range(1, nsph): @@ -669,7 +701,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): }) is_placed = True placed_spheres += 1 + vec_types.append(sph_type_index + 1) attempts = 0 + scatterer['vec_types'] = vec_types sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] @@ -677,6 +711,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 + result = placed_spheres return result ## \brief Generate a random compact cluster from YAML configuration options. @@ -699,45 +734,30 @@ def random_compact(scatterer, geometry, seed, max_rad): random.seed(seed) nsph = scatterer['nsph'] n_types = scatterer['configurations'] - if (0 in scatterer['vec_types']): - tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 - for ti in range(nsph): - itype = tincrement + int(n_types * random.random()) - scatterer['vec_types'][ti] = itype + radius = scatterer['ros'][0] + # Return an error code if types have different radii if (max(scatterer['ros']) != min(scatterer['ros'])): - result = 1 + result = -1 + elif (radius > max_rad): + # Requested spheres are larger than the maximum allowed volume. + # End function with error code -2. + result = -2 else: - radius = scatterer['ros'][0] - x_centers = np.arange(-1.0 * max_rad + radius, max_rad, 2.0 * radius) - y_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) - z_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) - x_offset = radius - y_offset = radius - x_layer_offset = radius - y_layer_offset = radius / math.sqrt(3.0) + x_centers = np.arange(-1.0 * max_rad + 2.0 * radius, max_rad, 2.0 * radius) + x_size = len(x_centers) + y_size = int(2.0 * max_rad / ((1.0 + math.sqrt(3.0) / 3.0) * radius)) + z_size = int(2.0 * max_rad / ((1.0 + 2.0 * math.sqrt(6.0) / 3.0) * radius)) tmp_spheres = [] - n_cells = len(x_centers) * len(y_centers) * len(z_centers) + n_cells = x_size * y_size * z_size print("INFO: the cubic space would contain %d spheres."%n_cells) - n_max_spheres = int((max_rad / radius) * (max_rad / radius) * (max_rad / radius) * 0.74) - print("INFO: the maximum radius allows for %d spheres."%n_max_spheres) - for zi in range(len(z_centers)): - if (x_layer_offset == 0.0): - x_layer_offset = radius - else: - x_layer_offset = 0.0 - if (y_offset == 0.0): - y_offset = radius - else: - y_offset = 0.0 - for yi in range(len(y_centers)): - if (x_offset == 0.0): - x_offset = radius - else: - x_offset = 0.0 - for xi in range(len(x_centers)): - x = x_centers[xi] + x_offset + x_layer_offset - y = y_centers[yi] + y_offset - z = z_centers[zi] + k = 0 + z = -max_rad + radius + while (z < max_rad - radius): + j = 0 + y = -max_rad + radius + while (y < max_rad - radius): + for i in range(len(x_centers)): + x = (2 * (i + 1) + (j + k) % 2) * radius - max_rad extent = radius + math.sqrt(x * x + y * y + z * z) if (extent < max_rad): tmp_spheres.append({ @@ -746,6 +766,11 @@ def random_compact(scatterer, geometry, seed, max_rad): 'y': y, 'z': z }) + # + j += 1 + y = math.sqrt(3.0) * (j + (k % 2) / 3.0) * radius - max_rad + radius + k += 1 + z = 2.0 / 3.0 * math.sqrt(6.0) * k * radius - max_rad + radius #tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] current_n = len(tmp_spheres) print("INFO: before erosion there are %d spheres in use."%current_n) @@ -777,20 +802,20 @@ def random_compact(scatterer, geometry, seed, max_rad): current_n -= 1 vec_spheres = [] sph_index = 0 + # Generate a vector of types if none is given + if (0 in scatterer['vec_types']): + tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 + for ti in range(current_n): + itype = tincrement + int(n_types * random.random()) + scatterer['vec_types'][ti] = itype + if (scatterer['application'] == "INCLUSION"): + scatterer['vec_types'][0] = 1 for ti in range(len(tmp_spheres)): sphere = tmp_spheres[ti] if (sphere['x'] < max_rad): sphere['itype'] = scatterer['vec_types'][sph_index] sph_index += 1 vec_spheres.append(sphere) - #pl = pv.Plotter() - #for si in range(len(vec_spheres)): - # x = vec_spheres[si]['x'] / max_rad - # y = vec_spheres[si]['y'] / max_rad - # z = vec_spheres[si]['z'] / max_rad - # mesh = pv.Sphere(radius / max_rad, (x, y, z)) - # pl.add_mesh(mesh) - #pl.export_obj("scene.obj") sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] @@ -798,7 +823,7 @@ def random_compact(scatterer, geometry, seed, max_rad): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 - return result + return current_n ## \brief Write the geometry configuration dictionary to legacy format. # @@ -952,9 +977,10 @@ def write_legacy_sconf(conf): ## \brief Export the model to a set of OBJ files for 3D visualization. # -# This function exports the model as a set of OBJ files (one for every -# spherical unit, plus a single scene file) to allow for model visualization -# with 3D software tools. +# This function exports the model as a single OBJ file, containing the +# information to visualize the particle with 3D software tools. The model +# file is associated with a MTL material libray file, used to assign colors +# to spheres of different type. # # \param scatterer: `dict` Scatterer configuration dictionary (gets modified) # \param geometry: `dict` Geometry configuration dictionary (gets modified) diff --git a/src/trapping/cfrfme.cpp b/src/trapping/cfrfme.cpp index d2c19f3cec6abf5358e09d9f91975d0739019bd4..5901be17f7a617910bc6b3e4413a5bc8eaa879cb 100644 --- a/src/trapping/cfrfme.cpp +++ b/src/trapping/cfrfme.cpp @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif +#ifdef USE_NVTX +#include <nvtx3/nvToolsExt.h> +#endif + using namespace std; /*! \brief C++ implementation of FRFME @@ -64,13 +68,15 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void frfme(string data_file, string output_path) { +#ifdef USE_NVTX + nvtxRangePush("Running frfme()"); +#endif string tfrfme_name = output_path + "/c_TFRFME.hd5"; TFRFME *tfrfme = NULL; Swap1 *tt1 = NULL; Swap2 *tt2 = NULL; char namef[7]; char more; - dcomplex **w = NULL; dcomplex *wk = NULL; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; @@ -98,6 +104,9 @@ void frfme(string data_file, string output_path) { int wsum_size; // End of vector size variables if (jlmf != 1) { +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf != 1"); +#endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { @@ -140,7 +149,16 @@ void frfme(string data_file, string output_path) { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; - } else { // label 16 +#ifdef USE_NVTX + nvtxRangePop(); +#endif + } else { // label 16; jlfm = 1 +#ifdef USE_NVTX + nvtxRangePush("frfme() with jlmf == 1"); +#endif +#ifdef USE_NVTX + nvtxRangePush("Setup operations"); +#endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { @@ -176,6 +194,9 @@ void frfme(string data_file, string output_path) { } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { @@ -193,6 +214,9 @@ void frfme(string data_file, string output_path) { string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TEDF data import"); +#endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); @@ -216,6 +240,9 @@ void frfme(string data_file, string output_path) { xi = xip; } // label 20 +#ifdef USE_NVTX + nvtxRangePop(); +#endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; @@ -236,6 +263,9 @@ void frfme(string data_file, string output_path) { fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 +#ifdef USE_NVTX + nvtxRangePush("Memory data loading"); +#endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; @@ -279,6 +309,12 @@ void frfme(string data_file, string output_path) { double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); +#ifdef USE_NVTX + nvtxRangePop(); +#endif +#ifdef USE_NVTX + nvtxRangePush("Looped vector initialization"); +#endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; @@ -297,7 +333,13 @@ void frfme(string data_file, string output_path) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif if (tfrfme != NULL) { +#ifdef USE_NVTX + nvtxRangePush("TFRFME initialization"); +#endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); @@ -329,19 +371,20 @@ void frfme(string data_file, string output_path) { tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif + dcomplex *vec_w = new dcomplex[nkv * nkv](); + dcomplex **w = new dcomplex*[nkv]; + for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; +#ifdef USE_NVTX + nvtxRangePush("j80 loop"); +#endif for (int j80 = jlmf; j80 <= jlml; j80++) { - dcomplex *tt1_wk = tt1->get_vector(); int wk_index = 0; - // w matrix - if (w != NULL) { - for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; - delete[] w; - } - w = new dcomplex*[nkv]; - for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv](); for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jx50 = 0; jx50 < nkv; jx50++) { - for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++]; + for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++]; w[jx50][jy50] = wk[j80 - 1]; } // jx50 } // jy50 loop @@ -377,7 +420,15 @@ void frfme(string data_file, string output_path) { } // iy70 loop } // iz75 loop } // j80 loop + delete[] vec_w; + delete[] w; +#ifdef USE_NVTX + nvtxRangePop(); +#endif // label 88 +#ifdef USE_NVTX + nvtxRangePush("Closing operations"); +#endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); @@ -386,6 +437,9 @@ void frfme(string data_file, string output_path) { if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); +#ifdef USE_NVTX + nvtxRangePop(); +#endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } @@ -398,16 +452,24 @@ void frfme(string data_file, string output_path) { fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } +#ifdef USE_NVTX + nvtxRangePop(); +#endif } // label 45 +#ifdef USE_NVTX + nvtxRangePush("frfme() memory clean"); +#endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; - if (w != NULL) { - for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; - delete[] w; - } if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; +#ifdef USE_NVTX + nvtxRangePop(); +#endif printf("FRFME: Done.\n"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif } diff --git a/src/trapping/clffft.cpp b/src/trapping/clffft.cpp index 46828b2ddfb1d5de61249f6a570fa704dfc1f9e5..4d66d8bad883686c89c169c7afb90641735f1729 100644 --- a/src/trapping/clffft.cpp +++ b/src/trapping/clffft.cpp @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif +#ifdef USE_NVTX +#include <nvtx3/nvToolsExt.h> +#endif + using namespace std; /*! \brief C++ implementation of LFFFT @@ -64,6 +68,9 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void lffft(string data_file, string output_path) { +#ifdef USE_NVTX + nvtxRangePush("Running lffft()"); +#endif const dcomplex uim = 0.0 + 1.0 * I; const double sq2i = 1.0 / sqrt(2.0); const dcomplex sq2iti = sq2i * uim; @@ -476,4 +483,7 @@ void lffft(string data_file, string output_path) { delete ccr; delete[] file_lines; printf("LFFT: Done.\n"); +#ifdef USE_NVTX + nvtxRangePop(); +#endif }