From 6293080ad7751afc615a0e60314e1debb4d126c9 Mon Sep 17 00:00:00 2001
From: Giovanni La Mura <giovanni.lamura@inaf.it>
Date: Fri, 8 Mar 2024 18:02:36 +0100
Subject: [PATCH] Transform the AM matrix into a contiguous memory object and
 skip copying before LAPCK call

---
 src/cluster/cluster.cpp      | 14 ++++++--------
 src/libnptm/lapack_calls.cpp | 15 +--------------
 2 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index bcdb133f..e2debd90 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -144,9 +144,11 @@ void cluster(string config_file, string data_file, string output_path) {
       if (sconf->iog_vec[ci -1] >= ci) configurations++;
     }
     C2 *c2 = new C2(nsph, configurations, npnt, npntts);
-    dcomplex **am = new dcomplex*[mxndm];
-    for (int ai = 0; ai < mxndm; ai++) {
-      am[ai] = new dcomplex[mxndm](); 
+    lapack_int ndit = 2 * nsph * c4->nlim;
+    dcomplex *am_vector = new dcomplex[ndit * ndit]();
+    dcomplex **am = new dcomplex*[ndit];
+    for (int ai = 0; ai < ndit; ai++) {
+      am[ai] = (am_vector + ai * ndit);
     }
     const int ndi = c4->nsph * c4->nlim;
     C9 *c9 = new C9(ndi, c4->nlem, 2 * ndi, 2 * c4->nlem);
@@ -359,7 +361,6 @@ void cluster(string config_file, string data_file, string output_path) {
 	  if (jer != 0) break;
 	} // i132 loop
 	cms(am, c1, c1ao, c4, c6);
-	lapack_int ndit = 2 * nsph * c4->nlim;
 	invert_matrix(am, ndit, jer, mxndm);
 	if (jer != 0) break; // jxi488 loop: goes to memory clean
 	ztm(am, c1, c1ao, c4, c6, c9);
@@ -937,10 +938,7 @@ void cluster(string config_file, string data_file, string output_path) {
       delete[] zpv[zi];
     }
     delete[] zpv;
-    for (int ai = mxndm - 1; ai > -1; ai--) {
-      delete[] am[ai];
-      //delete[] tam[ai];
-    }
+    delete[] am_vector;
     delete[] am;
     //delete[] tam;
     delete[] gaps;
diff --git a/src/libnptm/lapack_calls.cpp b/src/libnptm/lapack_calls.cpp
index ddf8ddd4..0b1dac5a 100644
--- a/src/libnptm/lapack_calls.cpp
+++ b/src/libnptm/lapack_calls.cpp
@@ -23,18 +23,12 @@
 #ifdef USE_LAPACK
 void zinvert(dcomplex **mat, lapack_int n, int &jer) {
   jer = 0;
-  dcomplex *arr = new dcomplex[n * n];
+  dcomplex *arr = &(mat[0][0]);
   const dcomplex uim = 0.0 + 1.0 * I;
 
 #ifdef USE_MKL
   MKL_Complex16 *arr2 = (MKL_Complex16 *) arr;
 #endif
-  for (lapack_int i = 0; i < n; i++) {
-    for (lapack_int j = 0; j < n; j++) {
-      lapack_int idx = i + n * j;
-      arr[idx] = mat[j][i];
-    }
-  }
   
   lapack_int* IPIV = new lapack_int[n]();
   
@@ -46,13 +40,6 @@ void zinvert(dcomplex **mat, lapack_int n, int &jer) {
   LAPACKE_zgetri(LAPACK_ROW_MAJOR, n, arr, n, IPIV);
 #endif
 
-  for (lapack_int i = 0; i < n; i++) {
-    for (lapack_int j = 0; j < n; j++) {
-      lapack_int idx = i + n * j;
-      mat[j][i] = arr[idx];
-    }
-  }
   delete[] IPIV;
-  delete[] arr;
 }
 #endif
-- 
GitLab