From ffbff0ca7ad734bfc999ad8c4da62a6251a6732e Mon Sep 17 00:00:00 2001 From: Giovanni La Mura <giovanni.lamura@inaf.it> Date: Mon, 4 Mar 2024 17:13:06 +0100 Subject: [PATCH] Use LAPACK zgetri instead of lucin() --- src/cluster/Makefile | 2 +- src/cluster/cluster.cpp | 20 +++++++++++++++++--- src/include/lapack_calls.h | 3 ++- src/libnptm/Makefile | 6 +++--- src/libnptm/lapack_calls.cpp | 16 ++++++++-------- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/cluster/Makefile b/src/cluster/Makefile index 22bc637a..3fc31183 100644 --- a/src/cluster/Makefile +++ b/src/cluster/Makefile @@ -46,7 +46,7 @@ $(BUILDDIR_CLU)/edfb_clu: $(OBJDIR) $(OBJDIR)/edfb_clu.o $(BUILDDIR_CLU) # We put $(LIBNPTM) as an object to link in directly, so that it will be found at runtime even if it is a shared object library. May change in the future when we have an install: target $(BUILDDIR_CLU)/np_cluster: $(OBJDIR) $(CXX_CLU_OBJS) $(BUILDDIR_CLU) $(LIBNPTM) - $(CXX) $(CXXFLAGS) -o $(BUILDDIR_CLU)/np_cluster $(CXX_CLU_OBJS) $(LIBNPTM) $(CXXLDFLAGS) + $(CXX) $(CXXFLAGS) -o $(BUILDDIR_CLU)/np_cluster $(CXX_CLU_OBJS) $(LIBNPTM) $(CXXLDFLAGS) -llapacke clean: rm -f $(F_CLU_OBJS) $(CXX_CLU_OBJS) $(CXX_CLU_DEBUG) diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 4591b28a..ea86b739 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -32,6 +32,10 @@ #include "../include/TransitionMatrix.h" #endif +#ifndef INCLUDE_LAPACK_CALLS_H_ +#include "../include/lapack_calls.h" +#endif + using namespace std; /*! \brief C++ implementation of CLU @@ -124,7 +128,11 @@ void cluster(string config_file, string data_file, string output_path) { } C2 *c2 = new C2(nsph, configurations, npnt, npntts); complex<double> **am = new complex<double>*[mxndm]; - for (int ai = 0; ai < mxndm; ai++) am[ai] = new complex<double>[mxndm](); + //complex<double> **tam = new complex<double>*[mxndm]; + for (int ai = 0; ai < mxndm; ai++) { + am[ai] = new complex<double>[mxndm](); + //tam[ai] = new complex<double>[mxndm](); + } const int ndi = c4->nsph * c4->nlim; C9 *c9 = new C9(ndi, c4->nlem, 2 * ndi, 2 * c4->nlem); double *gaps = new double[nsph](); @@ -331,8 +339,10 @@ void cluster(string config_file, string data_file, string output_path) { if (jer != 0) break; } // i132 loop cms(am, c1, c1ao, c4, c6); + //cms(tam, c1, c1ao, c4, c6); int ndit = 2 * nsph * c4->nlim; - lucin(am, mxndm, ndit, jer); + //lucin(am, mxndm, ndit, jer); + zinvert(am, ndit, jer); if (jer != 0) break; // jxi488 loop: goes to memory clean ztm(am, c1, c1ao, c4, c6, c9); if (sconf->idfc >= 0) { @@ -910,8 +920,12 @@ void cluster(string config_file, string data_file, string output_path) { delete[] zpv[zi]; } delete[] zpv; - for (int ai = mxndm - 1; ai > -1; ai--) delete[] am[ai]; + for (int ai = mxndm - 1; ai > -1; ai--) { + delete[] am[ai]; + //delete[] tam[ai]; + } delete[] am; + //delete[] tam; delete[] gaps; for (int ti = 1; ti > -1; ti--) { delete[] tqse[ti]; diff --git a/src/include/lapack_calls.h b/src/include/lapack_calls.h index 0270a05c..52996a53 100644 --- a/src/include/lapack_calls.h +++ b/src/include/lapack_calls.h @@ -14,7 +14,8 @@ * * \param mat: Matrix of complex. The matrix to be inverted. * \param n: `int` The number of rows and columns of the [n x n] matrix. + * \param jer: `int &` Reference to an integer return flag. */ -void zinvert(std::complex<double> **mat, int n); +void zinvert(std::complex<double> **mat, int n, int &jer); #endif diff --git a/src/libnptm/Makefile b/src/libnptm/Makefile index b2fbc322..7fedc1fb 100644 --- a/src/libnptm/Makefile +++ b/src/libnptm/Makefile @@ -19,11 +19,11 @@ endif include ../make.inc -CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o +CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o $(OBJDIR)/lapack_calls.o -CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o +CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o $(DYNOBJDIR)/lapack_calls.o -CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g* +CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g* $(OBJDIR)/lapack_calls.g* all: $(BUILDDIR_NPTM)/libnptm.a $(BUILDDIR_NPTM)/libnptm.so diff --git a/src/libnptm/lapack_calls.cpp b/src/libnptm/lapack_calls.cpp index 0a6eaf34..f57e31da 100644 --- a/src/libnptm/lapack_calls.cpp +++ b/src/libnptm/lapack_calls.cpp @@ -3,30 +3,30 @@ #include "lapacke.h" #ifndef INCLUDE_LAPACK_CALLS_H_ -#define INCLUDE_LAPACK_CALLS_H_ #include "../include/lapack_calls.h" #endif using namespace std; -void zinvert(std::complex<double> **mat, int n){ +void zinvert(std::complex<double> **mat, int n, int &jer) { + jer = 0; __complex__ double *arr = new __complex__ double[n * n]; const __complex__ double uim = 1.0di; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { - int idx = i * n + j; - arr[idx] = mat[i][j].real() + uim * mat[i][j].imag(); + int idx = i + n * j; + arr[idx] = mat[j][i].real() + uim * mat[j][i].imag(); } } int* IPIV = new int[n]; - LAPACKE_zgetrf(LAPACK_ROW_MAJOR, N, N, arr, N, IPIV); - LAPACKE_zgetri(LAPACK_ROW_MAJOR, N, arr, N, IPIV); + LAPACKE_zgetrf(LAPACK_ROW_MAJOR, n, n, arr, n, IPIV); + LAPACKE_zgetri(LAPACK_ROW_MAJOR, n, arr, n, IPIV); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { - int idx = i * n + j; - mat[i][j] = complex<double>(__real__ arr[idx], __imag__ arr[idx]); + int idx = i + n * j; + mat[j][i] = complex<double>(__real__ arr[idx], __imag__ arr[idx]); } } delete [] IPIV; -- GitLab