From b1ef1121af924a7646cc30721b2329549b21a6e4 Mon Sep 17 00:00:00 2001 From: Giovanni La Mura <giovanni.lamura@inaf.it> Date: Wed, 6 Mar 2024 16:44:49 +0100 Subject: [PATCH] Implement optimization fall-back for algebraic functions --- src/cluster/cluster.cpp | 20 +++++++++++++++---- src/include/algebraic.h | 30 ++++++++++++++++++++++++++++ src/include/clu_subs.h | 6 +++--- src/libnptm/Makefile | 6 +++--- src/libnptm/algebraic.cpp | 30 ++++++++++++++++++++++++++++ src/libnptm/clu_subs.cpp | 38 ++++++++++++++++++------------------ src/libnptm/lapack_calls.cpp | 4 ++-- 7 files changed, 103 insertions(+), 31 deletions(-) create mode 100644 src/include/algebraic.h create mode 100644 src/libnptm/algebraic.cpp diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index a31b2b71..a0fc6642 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -37,6 +37,14 @@ #include "../include/lapack_calls.h" #endif +#ifndef INCLUDE_ALGEBRAIC_H_ +#include "../include/algebraic.h" +#endif + +#ifdef LAPACK_ILP64 +#define USE_LAPACK +#endif + using namespace std; /*! \brief C++ implementation of CLU @@ -71,7 +79,7 @@ void cluster(string config_file, string data_file, string output_path) { if (sconf->number_of_spheres == gconf->number_of_spheres) { // Shortcuts to variables stored in configuration objects int nsph = gconf->number_of_spheres; - int mxndm = gconf->mxndm; + lapack_int mxndm = (lapack_int)gconf->mxndm; int inpol = gconf->in_pol; int npnt = gconf->npnt; int npntts = gconf->npntts; @@ -273,6 +281,11 @@ void cluster(string config_file, string data_file, string output_path) { string tppoan_name = output_path + "/c_TPPOAN"; tppoan.open(tppoan_name.c_str(), ios::out | ios::binary); if (tppoan.is_open()) { +#ifdef USE_LAPACK + printf("INFO: should use LAPACK calls.\n"); +#else + printf("INFO: should use fall-back lucin() calls.\n"); +#endif tppoan.write(reinterpret_cast<char *>(&iavm), sizeof(int)); tppoan.write(reinterpret_cast<char *>(&isam), sizeof(int)); tppoan.write(reinterpret_cast<char *>(&inpol), sizeof(int)); @@ -340,10 +353,9 @@ void cluster(string config_file, string data_file, string output_path) { if (jer != 0) break; } // i132 loop cms(am, c1, c1ao, c4, c6); - //cms(tam, c1, c1ao, c4, c6); lapack_int ndit = 2 * nsph * c4->nlim; - //lucin(am, mxndm, ndit, jer); - zinvert(am, ndit, jer); + invert_matrix(am, ndit, jer, mxndm); + //zinvert(am, ndit, jer); if (jer != 0) break; // jxi488 loop: goes to memory clean ztm(am, c1, c1ao, c4, c6, c9); if (sconf->idfc >= 0) { diff --git a/src/include/algebraic.h b/src/include/algebraic.h new file mode 100644 index 00000000..8c0cf124 --- /dev/null +++ b/src/include/algebraic.h @@ -0,0 +1,30 @@ +/*! \file algebraic.h + * + * \brief Declaration of algebraic functions with different call-backs. + * + * In principle, the system that runs NP_TMcode may offer various types of + * optimized features, such as multi-core or multi-node scaling, GPU offload, + * or external libraries. This header collects a set of functions that can + * perform standard algebraic operations choosing the most optimized available + * system as a call-back. If no optimization is detected, eventually the + * legacy serial function implementation is used as a fall-back. + */ + +#ifndef lapack_int +#define lapack_int int64_t +#endif + +#ifndef INCLUDE_ALGEBRAIC_H_ +#define INCLUDE_ALGEBRAIC_H_ + +/*! \brief Perform in-place matrix inversion. + * + * \param mat: Matrix of complex. The matrix to be inverted (must be a square matrix). + * \param size: `lapack_int` The size of the matrix (i.e. the number of its rows or columns). + * \param ier: `int &` Reference to an integer variable for returning a result flag. + * \param max_size: `lapack_int` The maximum expected size (required by some call-backs, + * optional, defaults to 0). + */ +void invert_matrix(std::complex<double> **mat, lapack_int size, int &ier, lapack_int max_size=0); + +#endif diff --git a/src/include/clu_subs.h b/src/include/clu_subs.h index 49b89e20..6420e619 100644 --- a/src/include/clu_subs.h +++ b/src/include/clu_subs.h @@ -154,11 +154,11 @@ void hjv( * LU decomposition. See Eq. (5.29) in Borghese, Denti & Saija (2007). * * \param am: Matrix of complex. - * \param nddmst: `const int` - * \param n: `int` + * \param nddmst: `const int64_t` + * \param n: `int64_t` * \param ier: `int &` */ -void lucin(std::complex<double> **am, const int nddmst, int n, int &ier); +void lucin(std::complex<double> **am, const int64_t nddmst, int64_t n, int &ier); /*! \brief Compute the average extinction cross-section. * diff --git a/src/libnptm/Makefile b/src/libnptm/Makefile index 7fedc1fb..61e447c4 100644 --- a/src/libnptm/Makefile +++ b/src/libnptm/Makefile @@ -19,11 +19,11 @@ endif include ../make.inc -CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o $(OBJDIR)/lapack_calls.o +CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o $(OBJDIR)/lapack_calls.o $(OBJDIR)/algebraic.o -CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o $(DYNOBJDIR)/lapack_calls.o +CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o $(DYNOBJDIR)/lapack_calls.o $(DYNOBJDIR)/algebraic.o -CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g* $(OBJDIR)/lapack_calls.g* +CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g* $(OBJDIR)/lapack_calls.g* $(OBJDIR)/algebraic.g* all: $(BUILDDIR_NPTM)/libnptm.a $(BUILDDIR_NPTM)/libnptm.so diff --git a/src/libnptm/algebraic.cpp b/src/libnptm/algebraic.cpp new file mode 100644 index 00000000..a1c42031 --- /dev/null +++ b/src/libnptm/algebraic.cpp @@ -0,0 +1,30 @@ +/*! \file algebraic.cpp + * + * \brief Implementation of algebraic functions with different call-backs. + */ + +#include <complex> + +#ifndef INCLUDE_ALGEBRAIC_H_ +#include "../include/algebraic.h" +#endif + +#ifndef INCLUDE_LAPACK_CALLS_H_ +#include "lapacke.h" +#include "../include/lapack_calls.h" +#endif + +// >>> FALL-BACK FUNCTIONS DECLARATION <<< // +extern void lucin(std::complex<double> **mat, int64_t max_size, int64_t size, int &ier); +// >>> END OF FALL-BACK FUNCTIONS <<< // + +using namespace std; + +void invert_matrix(std::complex<double> **mat, lapack_int size, int &ier, lapack_int max_size) { + ier = 0; +#ifdef USE_LAPACK + zinvert(mat, size, ier); +#else + lucin(mat, (int64_t)max_size, (int64_t)size, ier); +#endif +} diff --git a/src/libnptm/clu_subs.cpp b/src/libnptm/clu_subs.cpp index 22287bb7..48f087b9 100644 --- a/src/libnptm/clu_subs.cpp +++ b/src/libnptm/clu_subs.cpp @@ -892,7 +892,7 @@ void hjv( delete[] rfn; } -void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { +void lucin(std::complex<double> **am, const int64_t nddmst, int64_t n, int &ier) { /* NDDMST FIRST DIMENSION OF AM AS DECLARED IN DIMENSION * STATEMENT. * N NUMBER OF ROWS IN AM. @@ -903,9 +903,9 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { complex<double> cc0 = complex<double>(0.0, 0.0); ier = 0; int nminus = n - 1; - for (int i = 1; i <= n; i++) { + for (int64_t i = 1; i <= n; i++) { double sum = 0.0; - for (int j = 1; j <= n; j++) { + for (int64_t j = 1; j <= n; j++) { sum += ( am[i - 1][j - 1].real() * am[i - 1][j - 1].real() + am[i - 1][j - 1].imag() * am[i - 1][j - 1].imag() @@ -918,12 +918,12 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS // ARE PLACED IN V.) /* >>> THERE APPEARS TO BE A BUG IN THE FOLLOWING LOOP <<< */ - for (int k = 1; k <= n; k++) { - int kplus = k + 1; - int kminus = k - 1; - int l = k; + for (int64_t k = 1; k <= n; k++) { + int64_t kplus = k + 1; + int64_t kminus = k - 1; + int64_t l = k; double psqmax = 0.0; - for (int i = k; i <= n; i++) { + for (int64_t i = k; i <= n; i++) { cfun = cdtp(-am[i - 1][k - 1], am, i, 1, k, kminus); ctemp = -cfun; am[i - 1][k - 1] = ctemp; @@ -934,7 +934,7 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { } } // i2029 loop if (l != k) { - for (int j = 1; j <= n; j++) { + for (int64_t j = 1; j <= n; j++) { ctemp = am[k - 1][j - 1]; am[k - 1][j - 1] = am[l - 1][j - 1]; am[l - 1][j - 1] = ctemp; @@ -951,7 +951,7 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { ctemp = 1.0 / am[k - 1][k - 1]; am[k - 1][k - 1] = ctemp; if (kplus <= n) { - for (int j = kplus; j <= n; j++) { + for (int64_t j = kplus; j <= n; j++) { cfun = cdtp(-am[k - 1][j - 1], am, k, 1, j, kminus); am[k - 1][j - 1] = -ctemp * cfun; } // j2059 loop @@ -959,9 +959,9 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { } // k2019 loop // 4. REPLACE AM BY ITS INVERSE AMINV. // 4.1 REPLACE L AND U BY THEIR INVERSE LINV AND UINV. - for (int k = 1; k <= nminus; k++) { - int kplus = k + 1; - for (int i = kplus; i <= n; i++) { + for (int64_t k = 1; k <= nminus; k++) { + int64_t kplus = k + 1; + for (int64_t i = kplus; i <= n; i++) { cfun = cdtp(cc0, am, i, k, k, i - k); am[i - 1][k - 1] = -am[i - 1][i - 1] * cfun; cfun = cdtp(am[k - 1][i - 1], am, k, kplus, i, i - k - 1); @@ -969,8 +969,8 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { } // i4119 loop } // k4109 loop // 4.2 FORM AMINV=UINV*LINV. - for (int k = 1; k <= n; k++) { - for (int i = 1; i <= n; i++) { + for (int64_t k = 1; k <= n; k++) { + for (int64_t i = 1; i <= n; i++) { if (i < k) { cfun = cdtp(cc0, am, i, k, k, n - k + 1); am[i - 1][k -1] = cfun; @@ -983,11 +983,11 @@ void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) { } // k4209 loop // 4.3 INTERCHANGE COLUMNS OF AMINV AS SPECIFIED BY V, BUT IN REVERSE // ORDER. - for (int l = 1; l <= n; l++) { - int k = n - l + 1; - int kcol = (int)(v[k - 1]); + for (int64_t l = 1; l <= n; l++) { + int64_t k = n - l + 1; + int64_t kcol = (int64_t)(v[k - 1]); if (kcol != k) { - for (int i = 1; i <= n; i++) { + for (int64_t i = 1; i <= n; i++) { ctemp = am[i - 1][k - 1]; am[i - 1][k - 1] = am[i - 1][kcol - 1]; am[i - 1][kcol - 1] = ctemp; diff --git a/src/libnptm/lapack_calls.cpp b/src/libnptm/lapack_calls.cpp index 0b43c046..462bdaec 100644 --- a/src/libnptm/lapack_calls.cpp +++ b/src/libnptm/lapack_calls.cpp @@ -27,6 +27,6 @@ void zinvert(std::complex<double> **mat, lapack_int n, int &jer) { mat[j][i] = std::complex<double>(creal(arr[idx]), cimag(arr[idx])); } } - delete [] IPIV; - delete [] arr; + delete[] IPIV; + delete[] arr; } -- GitLab