From 1b49f1f09f3f97cdaac559f1b60253911ea343c9 Mon Sep 17 00:00:00 2001
From: Giovanni La Mura <giovanni.lamura@inaf.it>
Date: Wed, 17 Apr 2024 17:08:41 +0200
Subject: [PATCH] Bring am matrix definition within thread loop and delete
 immediately after use

---
 src/cluster/cluster.cpp | 40 ++++++++++++++--------------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index 4c6cbeaa..79b967a2 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -52,7 +52,7 @@ using namespace std;
 
 // I would like to put it all in a struct, but then I'd have to write a constructor for it, due to members defined as references, creating a worse nightmare than the one I'd like to simplify...
 
-int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, C1 *c1, C1_AddOns *c1ao, C2 *c2, C3 *c3, C4 *c4, C6 *c6, C9 *c9, FILE *output, string output_path, double *gaps, double **tqse, dcomplex **tqspe, double **tqss, dcomplex **tqsps, double ****zpv, double **gapm, dcomplex **gappm, int nth, int nths, int nph, int nphs, int nk, int nks, int nkks, double *argi, double *args, double **gap, dcomplex **gapp, double **tqce, dcomplex **tqcpe, double **tqcs, dcomplex **tqcps, double *duk, fstream &tppoan, double **cextlr, double **cext, double **cmullr, double **cmul, double *gapv, double *tqev, double *tqsv, int nxi, int nsph, np_int mxndm, int inpol, int iavm, int npnt, int npntts, int isam, int lm, double th, double thstp, double thlst, double ths, double thsstp, double thslst, double ph, double phstp, double phlst, double phs, double phsstp, double phslst, double th1, double ph1, double ths1, double phs1, double thsca, double *u, double *us, double *un, double *uns, double *up, double *ups, double *unmp, double *unsmp, double *upmp, double *upsmp, double &scan, double &cfmp, double &sfmp, double &cfsp, double &sfsp, double sqsfi, double exri, int lcalc, dcomplex arg, double wn, double vk, np_int ndit, dcomplex **am, int isq, int ibf, Logger *logger);
+int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, C1 *c1, C1_AddOns *c1ao, C2 *c2, C3 *c3, C4 *c4, C6 *c6, C9 *c9, FILE *output, string output_path, double *gaps, double **tqse, dcomplex **tqspe, double **tqss, dcomplex **tqsps, double ****zpv, double **gapm, dcomplex **gappm, int nth, int nths, int nph, int nphs, int nk, int nks, int nkks, double *argi, double *args, double **gap, dcomplex **gapp, double **tqce, dcomplex **tqcpe, double **tqcs, dcomplex **tqcps, double *duk, fstream &tppoan, double **cextlr, double **cext, double **cmullr, double **cmul, double *gapv, double *tqev, double *tqsv, int nxi, int nsph, np_int mxndm, int inpol, int iavm, int npnt, int npntts, int isam, int lm, double th, double thstp, double thlst, double ths, double thsstp, double thslst, double ph, double phstp, double phlst, double phs, double phsstp, double phslst, double th1, double ph1, double ths1, double phs1, double thsca, double *u, double *us, double *un, double *uns, double *up, double *ups, double *unmp, double *unsmp, double *upmp, double *upsmp, double &scan, double &cfmp, double &sfmp, double &cfsp, double &sfsp, double sqsfi, double exri, int lcalc, dcomplex arg, double wn, double vk, np_int ndit, int isq, int ibf, Logger *logger);
 
 /*! \brief C++ implementation of CLU
  *
@@ -132,11 +132,6 @@ void cluster(string config_file, string data_file, string output_path) {
     C2 *c2 = new C2(nsph, configurations, npnt, npntts);
     np_int ndit = 2 * nsph * c4->nlim;
     logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
-    dcomplex *am_vector = new dcomplex[ndit * ndit]();
-    dcomplex **am = new dcomplex*[ndit];
-    for (int ai = 0; ai < ndit; ai++) {
-      am[ai] = (am_vector + ai * ndit);
-    }
     const int ndi = c4->nsph * c4->nlim;
     C9 *c9 = new C9(ndi, c4->nlem, 2 * ndi, 2 * c4->nlem);
     double *gaps = new double[nsph]();
@@ -302,7 +297,7 @@ void cluster(string config_file, string data_file, string output_path) {
       }
       // do the first iteration on jxi488 separately, since it seems to be different from the others
       int jxi488 = 1;
-      jer = cluster_jxi488_cycle(jxi488, sconf, gconf, c1, c1ao, c2, c3, c4, c6, c9, output, output_path, gaps, tqse, tqspe, tqss, tqsps, zpv, gapm, gappm, nth, nths, nph, nphs, nk, nks, nkks, argi, args, gap, gapp, tqce, tqcpe, tqcs, tqcps, duk, tppoan, cextlr, cext, cmullr, cmul, gapv, tqev, tqsv, nxi, nsph, mxndm, inpol, iavm, npnt, npntts, isam, lm, th, thstp, thlst, ths, thsstp, thslst, ph, phstp, phlst, phs, phsstp, phslst, th1, ph1, ths1, phs1, thsca, u, us, un, uns, up, ups, unmp, unsmp, upmp, upsmp, scan, cfmp, sfmp, cfsp, sfsp, sqsfi, exri, lcalc, arg, wn, vk, ndit, am, isq, ibf, logger);
+      jer = cluster_jxi488_cycle(jxi488, sconf, gconf, c1, c1ao, c2, c3, c4, c6, c9, output, output_path, gaps, tqse, tqspe, tqss, tqsps, zpv, gapm, gappm, nth, nths, nph, nphs, nk, nks, nkks, argi, args, gap, gapp, tqce, tqcpe, tqcs, tqcps, duk, tppoan, cextlr, cext, cmullr, cmul, gapv, tqev, tqsv, nxi, nsph, mxndm, inpol, iavm, npnt, npntts, isam, lm, th, thstp, thlst, ths, thsstp, thslst, ph, phstp, phlst, phs, phsstp, phslst, th1, ph1, ths1, phs1, thsca, u, us, un, uns, up, ups, unmp, unsmp, upmp, upsmp, scan, cfmp, sfmp, cfsp, sfsp, sqsfi, exri, lcalc, arg, wn, vk, ndit, isq, ibf, logger);
 
       // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
       int ompnumthreads = 1;
@@ -400,8 +395,6 @@ void cluster(string config_file, string data_file, string output_path) {
 	double wn_2 = wn;
 	double vk_2 = vk;
 	np_int ndit_2 = ndit;
-	dcomplex **am_2 = NULL;
-	dcomplex *am_vector_2 = NULL;
 	int isq_2 = isq;
 	int ibf_2 = ibf;
 	int nth_2 = nth;
@@ -458,7 +451,6 @@ void cluster(string config_file, string data_file, string output_path) {
 	  unsmp_2 = unsmp;
 	  upmp_2 = upmp;
 	  upsmp_2 = upsmp;
-	  am_2 = am;
 	}
 	else {
 	  // this is not thread 0, so do create fresh copies of all local variables
@@ -584,20 +576,15 @@ void cluster(string config_file, string data_file, string output_path) {
 	    upmp_2[ti] = upmp[ti];
 	    upsmp_2[ti] = upsmp[ti];
 	  }
-	  am_2 = new dcomplex*[ndit];
-	  am_vector_2 = new dcomplex[ndit * ndit]();
-	  for (int ai = 0; ai < ndit; ai++) {
-	    am_2[ai] = (am_vector_2 + ai * ndit);
-	  }
 	}
 	fstream &tppoan_2 = *tppoanp_2;
 	// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
 #pragma omp barrier
-	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengthsn\n");
+	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
 	// ok, now I can actually start the parallel calculations
 #pragma omp for
 	for (jxi488 = 2; jxi488 <= nxi; jxi488++) {
-	  jer = cluster_jxi488_cycle(jxi488, sconf_2, gconf_2, c1_2, c1ao_2, c2_2, c3_2, c4_2, c6_2, c9_2, output_2, output_path, gaps_2, tqse_2, tqspe_2, tqss_2, tqsps_2, zpv_2, gapm_2, gappm_2, nth_2, nths_2, nph_2, nphs_2, nk_2, nks_2, nkks_2, argi_2, args_2, gap_2, gapp_2, tqce_2, tqcpe_2, tqcs_2, tqcps_2, duk_2, tppoan_2, cextlr_2, cext_2, cmullr_2, cmul_2, gapv_2, tqev_2, tqsv_2, nxi_2, nsph_2, mxndm_2, inpol_2, iavm_2, npnt_2, npntts_2, isam_2, lm_2, th_2, thstp_2, thlst_2, ths_2, thsstp_2, thslst_2, ph_2, phstp_2, phlst_2, phs_2, phsstp_2, phslst_2, th1_2, ph1_2, ths1_2, phs1_2, thsca_2, u_2, us_2, un_2, uns_2, up_2, ups_2, unmp_2, unsmp_2, upmp_2, upsmp_2, scan_2, cfmp_2, sfmp_2, cfsp_2, sfsp_2, sqsfi_2, exri_2, lcalc_2, arg_2, wn_2, vk_2, ndit_2, am_2, isq_2, ibf_2, logger);
+	  jer = cluster_jxi488_cycle(jxi488, sconf_2, gconf_2, c1_2, c1ao_2, c2_2, c3_2, c4_2, c6_2, c9_2, output_2, output_path, gaps_2, tqse_2, tqspe_2, tqss_2, tqsps_2, zpv_2, gapm_2, gappm_2, nth_2, nths_2, nph_2, nphs_2, nk_2, nks_2, nkks_2, argi_2, args_2, gap_2, gapp_2, tqce_2, tqcpe_2, tqcs_2, tqcps_2, duk_2, tppoan_2, cextlr_2, cext_2, cmullr_2, cmul_2, gapv_2, tqev_2, tqsv_2, nxi_2, nsph_2, mxndm_2, inpol_2, iavm_2, npnt_2, npntts_2, isam_2, lm_2, th_2, thstp_2, thlst_2, ths_2, thsstp_2, thslst_2, ph_2, phstp_2, phlst_2, phs_2, phsstp_2, phslst_2, th1_2, ph1_2, ths1_2, phs1_2, thsca_2, u_2, us_2, un_2, uns_2, up_2, ups_2, unmp_2, unsmp_2, upmp_2, upsmp_2, scan_2, cfmp_2, sfmp_2, cfsp_2, sfsp_2, sqsfi_2, exri_2, lcalc_2, arg_2, wn_2, vk_2, ndit_2, isq_2, ibf_2, logger);
 	}
 
 #pragma omp barrier
@@ -680,8 +667,6 @@ void cluster(string config_file, string data_file, string output_path) {
 	  delete[] unsmp_2;
 	  delete[] upmp_2;
 	  delete[] upsmp_2;
-	  delete[] am_vector_2;
-	  delete[] am_2;
 	}
 #pragma omp barrier
 	{
@@ -748,8 +733,6 @@ void cluster(string config_file, string data_file, string output_path) {
     delete c4;
     delete c6;
     delete c9;
-    delete[] am_vector;
-    delete[] am;
     delete[] gaps;
     for (int ti = 1; ti > -1; ti--) {
       delete[] tqse[ti];
@@ -817,7 +800,7 @@ void cluster(string config_file, string data_file, string output_path) {
 }
 
 
-int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, C1 *c1, C1_AddOns *c1ao, C2 *c2, C3 *c3, C4 *c4, C6 *c6, C9 *c9, FILE *output, string output_path, double *gaps, double **tqse, dcomplex **tqspe, double **tqss, dcomplex **tqsps, double ****zpv, double **gapm, dcomplex **gappm, int nth, int nths, int nph, int nphs, int nk, int nks, int nkks, double *argi, double *args, double **gap, dcomplex **gapp, double **tqce, dcomplex **tqcpe, double **tqcs, dcomplex **tqcps, double *duk, fstream &tppoan, double **cextlr, double **cext, double **cmullr, double **cmul, double *gapv, double *tqev, double *tqsv, int nxi, int nsph, np_int mxndm, int inpol, int iavm, int npnt, int npntts, int isam, int lm, double th, double thstp, double thlst, double ths, double thsstp, double thslst, double ph, double phstp, double phlst, double phs, double phsstp, double phslst, double th1, double ph1, double ths1, double phs1, double thsca, double *u, double *us, double *un, double *uns, double *up, double *ups, double *unmp, double *unsmp, double *upmp, double *upsmp, double &scan, double &cfmp, double &sfmp, double &cfsp, double &sfsp, double sqsfi, double exri, int lcalc, dcomplex arg, double wn, double vk, np_int ndit, dcomplex **am, int isq, int ibf, Logger *logger)
+int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, C1 *c1, C1_AddOns *c1ao, C2 *c2, C3 *c3, C4 *c4, C6 *c6, C9 *c9, FILE *output, string output_path, double *gaps, double **tqse, dcomplex **tqspe, double **tqss, dcomplex **tqsps, double ****zpv, double **gapm, dcomplex **gappm, int nth, int nths, int nph, int nphs, int nk, int nks, int nkks, double *argi, double *args, double **gap, dcomplex **gapp, double **tqce, dcomplex **tqcpe, double **tqcs, dcomplex **tqcps, double *duk, fstream &tppoan, double **cextlr, double **cext, double **cmullr, double **cmul, double *gapv, double *tqev, double *tqsv, int nxi, int nsph, np_int mxndm, int inpol, int iavm, int npnt, int npntts, int isam, int lm, double th, double thstp, double thlst, double ths, double thsstp, double thslst, double ph, double phstp, double phlst, double phs, double phsstp, double phslst, double th1, double ph1, double ths1, double phs1, double thsca, double *u, double *us, double *un, double *uns, double *up, double *ups, double *unmp, double *unsmp, double *upmp, double *upsmp, double &scan, double &cfmp, double &sfmp, double &cfsp, double &sfsp, double sqsfi, double exri, int lcalc, dcomplex arg, double wn, double vk, np_int ndit, int isq, int ibf, Logger *logger)
 {
   logger->log("INFO: running scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n");
   int jer = 0;
@@ -904,6 +887,11 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
       //break;
     }
   } // i132 loop
+  dcomplex *am_vector = new dcomplex[ndit * ndit]();
+  dcomplex **am = new dcomplex*[ndit];
+  for (int ai = 0; ai < ndit; ai++) {
+    am[ai] = (am_vector + ai * ndit);
+  }
   cms(am, c1, c1ao, c4, c6);
   invert_matrix(am, ndit, jer, mxndm);
   if (jer != 0) {
@@ -917,12 +905,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
     // delete[] unsmp;
     // delete[] upmp;
     // delete[] upsmp;
-    // delete[] am_vector;
-    // delete[] am;
+    delete[] am_vector;
+    delete[] am;
     return jer;
     // break; // jxi488 loop: goes to memory clean
   }
   ztm(am, c1, c1ao, c4, c6, c9);
+  delete[] am_vector;
+  delete[] am;
   if (idfc >= 0) {
     if (jxi488 == jwtm) {
       int nlemt = 2 * c4->nlem;
@@ -1485,8 +1475,6 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
   // delete[] unsmp;
   // delete[] upmp;
   // delete[] upsmp;
-  // delete[] am_vector;
-  // delete[] am;
 
   logger->log("INFO: finished scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n");
 
-- 
GitLab