From 6293080ad7751afc615a0e60314e1debb4d126c9 Mon Sep 17 00:00:00 2001 From: Giovanni La Mura <giovanni.lamura@inaf.it> Date: Fri, 8 Mar 2024 18:02:36 +0100 Subject: [PATCH] Transform the AM matrix into a contiguous memory object and skip copying before LAPCK call --- src/cluster/cluster.cpp | 14 ++++++-------- src/libnptm/lapack_calls.cpp | 15 +-------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index bcdb133f..e2debd90 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -144,9 +144,11 @@ void cluster(string config_file, string data_file, string output_path) { if (sconf->iog_vec[ci -1] >= ci) configurations++; } C2 *c2 = new C2(nsph, configurations, npnt, npntts); - dcomplex **am = new dcomplex*[mxndm]; - for (int ai = 0; ai < mxndm; ai++) { - am[ai] = new dcomplex[mxndm](); + lapack_int ndit = 2 * nsph * c4->nlim; + dcomplex *am_vector = new dcomplex[ndit * ndit](); + dcomplex **am = new dcomplex*[ndit]; + for (int ai = 0; ai < ndit; ai++) { + am[ai] = (am_vector + ai * ndit); } const int ndi = c4->nsph * c4->nlim; C9 *c9 = new C9(ndi, c4->nlem, 2 * ndi, 2 * c4->nlem); @@ -359,7 +361,6 @@ void cluster(string config_file, string data_file, string output_path) { if (jer != 0) break; } // i132 loop cms(am, c1, c1ao, c4, c6); - lapack_int ndit = 2 * nsph * c4->nlim; invert_matrix(am, ndit, jer, mxndm); if (jer != 0) break; // jxi488 loop: goes to memory clean ztm(am, c1, c1ao, c4, c6, c9); @@ -937,10 +938,7 @@ void cluster(string config_file, string data_file, string output_path) { delete[] zpv[zi]; } delete[] zpv; - for (int ai = mxndm - 1; ai > -1; ai--) { - delete[] am[ai]; - //delete[] tam[ai]; - } + delete[] am_vector; delete[] am; //delete[] tam; delete[] gaps; diff --git a/src/libnptm/lapack_calls.cpp b/src/libnptm/lapack_calls.cpp index ddf8ddd4..0b1dac5a 100644 --- a/src/libnptm/lapack_calls.cpp +++ b/src/libnptm/lapack_calls.cpp @@ -23,18 +23,12 @@ #ifdef USE_LAPACK void zinvert(dcomplex **mat, lapack_int n, int &jer) { jer = 0; - dcomplex *arr = new dcomplex[n * n]; + dcomplex *arr = &(mat[0][0]); const dcomplex uim = 0.0 + 1.0 * I; #ifdef USE_MKL MKL_Complex16 *arr2 = (MKL_Complex16 *) arr; #endif - for (lapack_int i = 0; i < n; i++) { - for (lapack_int j = 0; j < n; j++) { - lapack_int idx = i + n * j; - arr[idx] = mat[j][i]; - } - } lapack_int* IPIV = new lapack_int[n](); @@ -46,13 +40,6 @@ void zinvert(dcomplex **mat, lapack_int n, int &jer) { LAPACKE_zgetri(LAPACK_ROW_MAJOR, n, arr, n, IPIV); #endif - for (lapack_int i = 0; i < n; i++) { - for (lapack_int j = 0; j < n; j++) { - lapack_int idx = i + n * j; - mat[j][i] = arr[idx]; - } - } delete[] IPIV; - delete[] arr; } #endif -- GitLab