From ffbff0ca7ad734bfc999ad8c4da62a6251a6732e Mon Sep 17 00:00:00 2001
From: Giovanni La Mura <giovanni.lamura@inaf.it>
Date: Mon, 4 Mar 2024 17:13:06 +0100
Subject: [PATCH] Use LAPACK zgetri instead of lucin()

---
 src/cluster/Makefile         |  2 +-
 src/cluster/cluster.cpp      | 20 +++++++++++++++++---
 src/include/lapack_calls.h   |  3 ++-
 src/libnptm/Makefile         |  6 +++---
 src/libnptm/lapack_calls.cpp | 16 ++++++++--------
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/cluster/Makefile b/src/cluster/Makefile
index 22bc637a..3fc31183 100644
--- a/src/cluster/Makefile
+++ b/src/cluster/Makefile
@@ -46,7 +46,7 @@ $(BUILDDIR_CLU)/edfb_clu: $(OBJDIR) $(OBJDIR)/edfb_clu.o $(BUILDDIR_CLU)
 
 # We put $(LIBNPTM) as an object to link in directly, so that it will be found at runtime even if it is a shared object library. May change in the future when we have an install: target
 $(BUILDDIR_CLU)/np_cluster: $(OBJDIR) $(CXX_CLU_OBJS) $(BUILDDIR_CLU) $(LIBNPTM)
-	$(CXX) $(CXXFLAGS) -o $(BUILDDIR_CLU)/np_cluster $(CXX_CLU_OBJS) $(LIBNPTM) $(CXXLDFLAGS) 
+	$(CXX) $(CXXFLAGS) -o $(BUILDDIR_CLU)/np_cluster $(CXX_CLU_OBJS) $(LIBNPTM) $(CXXLDFLAGS) -llapacke
 
 clean:
 	rm -f $(F_CLU_OBJS) $(CXX_CLU_OBJS) $(CXX_CLU_DEBUG)
diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index 4591b28a..ea86b739 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -32,6 +32,10 @@
 #include "../include/TransitionMatrix.h"
 #endif
 
+#ifndef INCLUDE_LAPACK_CALLS_H_
+#include "../include/lapack_calls.h"
+#endif
+
 using namespace std;
 
 /*! \brief C++ implementation of CLU
@@ -124,7 +128,11 @@ void cluster(string config_file, string data_file, string output_path) {
     }
     C2 *c2 = new C2(nsph, configurations, npnt, npntts);
     complex<double> **am = new complex<double>*[mxndm];
-    for (int ai = 0; ai < mxndm; ai++) am[ai] = new complex<double>[mxndm]();
+    //complex<double> **tam = new complex<double>*[mxndm];
+    for (int ai = 0; ai < mxndm; ai++) {
+      am[ai] = new complex<double>[mxndm](); 
+      //tam[ai] = new complex<double>[mxndm]();
+    }
     const int ndi = c4->nsph * c4->nlim;
     C9 *c9 = new C9(ndi, c4->nlem, 2 * ndi, 2 * c4->nlem);
     double *gaps = new double[nsph]();
@@ -331,8 +339,10 @@ void cluster(string config_file, string data_file, string output_path) {
 	  if (jer != 0) break;
 	} // i132 loop
 	cms(am, c1, c1ao, c4, c6);
+	//cms(tam, c1, c1ao, c4, c6);
 	int ndit = 2 * nsph * c4->nlim;
-	lucin(am, mxndm, ndit, jer);
+	//lucin(am, mxndm, ndit, jer);
+	zinvert(am, ndit, jer);
 	if (jer != 0) break; // jxi488 loop: goes to memory clean
 	ztm(am, c1, c1ao, c4, c6, c9);
 	if (sconf->idfc >= 0) {
@@ -910,8 +920,12 @@ void cluster(string config_file, string data_file, string output_path) {
       delete[] zpv[zi];
     }
     delete[] zpv;
-    for (int ai = mxndm - 1; ai > -1; ai--) delete[] am[ai];
+    for (int ai = mxndm - 1; ai > -1; ai--) {
+      delete[] am[ai];
+      //delete[] tam[ai];
+    }
     delete[] am;
+    //delete[] tam;
     delete[] gaps;
     for (int ti = 1; ti > -1; ti--) {
       delete[] tqse[ti];
diff --git a/src/include/lapack_calls.h b/src/include/lapack_calls.h
index 0270a05c..52996a53 100644
--- a/src/include/lapack_calls.h
+++ b/src/include/lapack_calls.h
@@ -14,7 +14,8 @@
  *
  * \param mat: Matrix of complex. The matrix to be inverted.
  * \param n: `int` The number of rows and columns of the [n x n] matrix.
+ * \param jer: `int &` Reference to an integer return flag.
  */
-void zinvert(std::complex<double> **mat, int n);
+void zinvert(std::complex<double> **mat, int n, int &jer);
 
 #endif
diff --git a/src/libnptm/Makefile b/src/libnptm/Makefile
index b2fbc322..7fedc1fb 100644
--- a/src/libnptm/Makefile
+++ b/src/libnptm/Makefile
@@ -19,11 +19,11 @@ endif
 include ../make.inc
 
 
-CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o
+CXX_NPTM_OBJS=$(OBJDIR)/Commons.o $(OBJDIR)/Configuration.o $(OBJDIR)/file_io.o $(OBJDIR)/Parsers.o $(OBJDIR)/sph_subs.o $(OBJDIR)/clu_subs.o $(OBJDIR)/tfrfme.o $(OBJDIR)/tra_subs.o $(OBJDIR)/TransitionMatrix.o $(OBJDIR)/lapack_calls.o
 
-CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o
+CXX_NPTM_DYNOBJS=$(DYNOBJDIR)/Commons.o $(DYNOBJDIR)/Configuration.o $(DYNOBJDIR)/file_io.o $(DYNOBJDIR)/Parsers.o $(DYNOBJDIR)/sph_subs.o $(DYNOBJDIR)/clu_subs.o $(DYNOBJDIR)/tfrfme.o $(DYNOBJDIR)/tra_subs.o $(DYNOBJDIR)/TransitionMatrix.o $(DYNOBJDIR)/lapack_calls.o
 
-CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g*
+CXX_NPTM_DEBUG=$(OBJDIR)/Commons.g* $(OBJDIR)/Configuration.g* $(OBJDIR)/file_io.g* $(OBJDIR)/Parsers.g* $(OBJDIR)/sph_subs.g* $(OBJDIR)/clu_subs.g* $(OBJDIR)/tfrfme.g* $(OBJDIR)/tra_subs.g* $(OBJDIR)/TransitionMatrix.g* $(OBJDIR)/lapack_calls.g*
 
 all: $(BUILDDIR_NPTM)/libnptm.a $(BUILDDIR_NPTM)/libnptm.so
 
diff --git a/src/libnptm/lapack_calls.cpp b/src/libnptm/lapack_calls.cpp
index 0a6eaf34..f57e31da 100644
--- a/src/libnptm/lapack_calls.cpp
+++ b/src/libnptm/lapack_calls.cpp
@@ -3,30 +3,30 @@
 #include "lapacke.h"
 
 #ifndef INCLUDE_LAPACK_CALLS_H_
-#define INCLUDE_LAPACK_CALLS_H_
 #include "../include/lapack_calls.h"
 #endif
 
 using namespace std;
 
-void zinvert(std::complex<double> **mat, int n){
+void zinvert(std::complex<double> **mat, int n, int &jer) {
+  jer = 0;
   __complex__ double *arr = new __complex__ double[n * n];
   const __complex__ double uim = 1.0di;
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < n; j++) {
-      int idx = i * n + j;
-      arr[idx] = mat[i][j].real() + uim * mat[i][j].imag();
+      int idx = i + n * j;
+      arr[idx] = mat[j][i].real() + uim * mat[j][i].imag();
     }
   }
   
   int* IPIV = new int[n];
   
-  LAPACKE_zgetrf(LAPACK_ROW_MAJOR, N, N, arr, N, IPIV);
-  LAPACKE_zgetri(LAPACK_ROW_MAJOR, N, arr, N, IPIV);
+  LAPACKE_zgetrf(LAPACK_ROW_MAJOR, n, n, arr, n, IPIV);
+  LAPACKE_zgetri(LAPACK_ROW_MAJOR, n, arr, n, IPIV);
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < n; j++) {
-      int idx = i * n + j;
-      mat[i][j] = complex<double>(__real__ arr[idx], __imag__ arr[idx]);
+      int idx = i + n * j;
+      mat[j][i] = complex<double>(__real__ arr[idx], __imag__ arr[idx]);
     }
   }
   delete [] IPIV;
-- 
GitLab