diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 6f1b9aaf87c64924fa5355059a8bb44b5307b970..93737f22beedbeb90e70dbe8f0ce92094abb2cbf 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -46,54 +46,7 @@ using namespace std; -// I would like to put it all in a struct, but then I'd have to write a constructor for it, due to members defined as -// references, creating a worse nightmare than the one I'd like to simplify... -// -// struct clu_jxi488_cycle_args { -// int jxi488; -// ScattererConfiguration *sconf; -// GeometryConfiguration *gconf; -// C1 *c1; -// C1_AddOns *c1ao; -// C2 *c2; -// C3 *c3; -// C4 *c4; -// C6 *c6; -// C9 *c9; -// FILE *output; -// string output_path; -// double *gaps; -// double **tqse; -// dcomplex **tqspe; -// double **tqss; -// dcomplex **tqsps; -// double ****zpv; -// double **gapm; -// dcomplex **gappm; -// int nth, int nths; -// int nph; -// int nphs; -// int nk; -// int nks; -// int nkks; -// double *argi; -// double *args; -// double **gap; -// dcomplex **gapp; -// double **tqce; -// dcomplex **tqcpe; -// double **tqcs; -// dcomplex **tqcps; -// double *duk; -// fstream &tppoan; -// double **cextlr; -// double **cext; -// double **cmullr; -// double **cmul; -// double *gapv; -// double *tqev; -// double *tqsv; -// } +// I would like to put it all in a struct, but then I'd have to write a constructor for it, due to members defined as references, creating a worse nightmare than the one I'd like to simplify... int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, C1 *c1, C1_AddOns *c1ao, C2 *c2, C3 *c3, C4 *c4, C6 *c6, C9 *c9, FILE *output, string output_path, double *gaps, double **tqse, dcomplex **tqspe, double **tqss, dcomplex **tqsps, double ****zpv, double **gapm, dcomplex **gappm, int nth, int nths, int nph, int nphs, int nk, int nks, int nkks, double *argi, double *args, double **gap, dcomplex **gapp, double **tqce, dcomplex **tqcpe, double **tqcs, dcomplex **tqcps, double *duk, fstream &tppoan, double **cextlr, double **cext, double **cmullr, double **cmul, double *gapv, double *tqev, double *tqsv, int nxi, int nsph, np_int mxndm, int inpol, int iavm, int npnt, int npntts, int isam, int lm, double th, double thstp, double thlst, double ths, double thsstp, double thslst, double ph, double phstp, double phlst, double phs, double phsstp, double phslst, double th1, double ph1, double ths1, double phs1, double thsca, double *u, double *us, double *un, double *uns, double *up, double *ups, double *unmp, double *unsmp, double *upmp, double *upsmp, double &scan, double &cfmp, double &sfmp, double &cfsp, double &sfsp, double sqsfi, double exri, int lcalc, dcomplex arg, double wn, double vk, np_int ndit, dcomplex **am, int isq, int ibf); @@ -178,6 +131,7 @@ void cluster(string config_file, string data_file, string output_path) { } C2 *c2 = new C2(nsph, configurations, npnt, npntts); np_int ndit = 2 * nsph * c4->nlim; + printf("Size of matrices to invert: %ld x %ld\n", (int64_t) ndit, (int64_t) ndit); dcomplex *am_vector = new dcomplex[ndit * ndit](); dcomplex **am = new dcomplex*[ndit]; for (int ai = 0; ai < ndit; ai++) { @@ -318,7 +272,8 @@ void cluster(string config_file, string data_file, string output_path) { double exri = sqrt(sconf->exdc); double vk = 0.0; // NOTE: Not really sure it should be initialized at 0 fprintf(output, " REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri); - fstream tppoan; + fstream *tppoanp = new fstream; + fstream &tppoan = *tppoanp; string tppoan_name = output_path + "/c_TPPOAN"; tppoan.open(tppoan_name.c_str(), ios::out | ios::binary); if (tppoan.is_open()) { @@ -351,118 +306,49 @@ void cluster(string config_file, string data_file, string output_path) { #pragma omp parallel { - // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one - ScattererConfiguration *sconf_2 = new ScattererConfiguration(*sconf); - GeometryConfiguration *gconf_2 = new GeometryConfiguration(*gconf); - C1 *c1_2 = new C1(*c1); - C1_AddOns *c1ao_2 = new C1_AddOns(*c1ao); - C2 *c2_2 = new C2(*c2); - C3 *c3_2 = new C3(*c3); - C4 *c4_2 = new C4(*c4); - C6 *c6_2 = new C6(*c6); - C9 *c9_2 = new C9(*c9); // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway int myompthread = 0; #ifdef _OPENMP // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files myompthread = omp_get_thread_num(); if (myompthread == 0) ompnumthreads = omp_get_num_threads(); - FILE *output_2 = fopen((output_path + "/c_OCLU_" + to_string(myompthread)).c_str(), "w"); - fstream tppoan_2; - tppoan_2.open((output_path + "/c_TPPOAN_" + to_string(myompthread)).c_str(), ios::out | ios::binary); -#else - // If OpenMP is not enabled, just keep doing the output to the global files - FILE *output_2 = output; - fstream &tppoan_2 = tppoan; #endif - double *gaps_2 = new double[nsph](); - double **tqse_2 = new double*[2]; - double **tqce_2 = new double*[2]; - double **tqcs_2 = new double*[2]; - dcomplex **tqspe_2 = new dcomplex*[2]; - dcomplex **tqcpe_2 = new dcomplex*[2]; - dcomplex **tqcps_2 = new dcomplex*[2]; - double **tqss_2 = new double*[2]; - dcomplex **tqsps_2 = new dcomplex*[2]; - for (int ti = 0; ti < 2; ti++) { - tqse_2[ti] = new double[nsph](); - tqce_2[ti] = new double[3](); - tqcs_2[ti] = new double[3](); - tqspe_2[ti] = new dcomplex[nsph](); - tqcpe_2[ti] = new dcomplex[3](); - tqcps_2[ti] = new dcomplex[3](); - tqss_2[ti] = new double[nsph](); - tqsps_2[ti] = new dcomplex[nsph](); - for (int tj=0; tj<nsph; tj++) { - tqse_2[ti][tj] = tqse[ti][tj]; - tqspe_2[ti][tj] = tqspe[ti][tj]; - tqss_2[ti][tj] = tqss[ti][tj]; - tqsps_2[ti][tj] = tqsps[ti][tj]; - } - for (int tj=0; tj<3; tj++) { - tqce_2[ti][tj] = tqce[ti][tj]; - tqcs_2[ti][tj] = tqcs[ti][tj]; - tqcpe_2[ti][tj] = tqcpe[ti][tj]; - tqcps_2[ti][tj] = tqcps[ti][tj]; - } - } - double ****zpv_2 = new double***[c4->lm]; //[gconf->l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2] - for (int zi = 0; zi < c4->lm; zi++) { - zpv_2[zi] = new double**[3]; - for (int zj = 0; zj < 3; zj++) { - zpv_2[zi][zj] = new double*[2]; - for (int zk = 0; zk < 2; zk++) { - zpv_2[zi][zj][zk] = new double[2](); - for (int zl = 0; zl < 2; zl++) zpv_2[zi][zj][zk][zl] = zpv[zi][zj][zk][zl]; - } - } - } - double **gapm_2 = new double*[3]; - dcomplex **gappm_2 = new dcomplex*[3]; - double **gap_2 = new double*[3]; - dcomplex **gapp_2 = new dcomplex*[3]; - for (int gi = 0; gi < 3; gi++) { - gap_2[gi] = new double[2](); - gapm_2[gi] = new double[2](); - gapp_2[gi] = new dcomplex[2](); - gappm_2[gi] = new dcomplex[2](); - for (int gj=0; gj<2; gj++) { - gap_2[gi][gj] = gap[gi][gj]; - gapp_2[gi][gj] = gapp[gi][gj]; - gapm_2[gi][gj] = gapm[gi][gj]; - gappm_2[gi][gj] = gappm[gi][gj]; - } - } - double *argi_2 = new double[1](); - argi_2[0] = argi[0]; - double *args_2 = new double[1](); - args_2[0] = args[0]; - double *duk_2 = new double[3](); - for (int di=0; di<3; di++) duk_2[di] = duk[di]; - double **cextlr_2 = new double*[4]; - double **cext_2 = new double*[4]; - double **cmullr_2 = new double*[4]; - double **cmul_2 = new double*[4]; - for (int ci = 0; ci < 4; ci++) { - cextlr_2[ci] = new double[4](); - cext_2[ci] = new double[4](); - cmullr_2[ci] = new double[4](); - cmul_2[ci] = new double[4](); - for (int cj=0; cj<4; cj++) { - cextlr_2[ci][cj] = cextlr[ci][cj]; - cext_2[ci][cj] = cext[ci][cj]; - cmullr_2[ci][cj] = cmullr[ci][cj]; - cmul_2[ci][cj] = cmul[ci][cj]; - } - } - double *gapv_2 = new double[3](); - for (int gi=0; gi<3; gi++) gapv_2[gi] = gapv[gi]; - double *tqev_2 = new double[3](); - double *tqsv_2 = new double[3](); - for (int ti=0; ti<3; ti++) { - tqev_2[ti] = tqev[ti]; - tqsv_2[ti] = tqsv[ti]; - } + // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number + ScattererConfiguration *sconf_2 = NULL; + GeometryConfiguration *gconf_2 = NULL; + C1 *c1_2 = NULL; + C1_AddOns *c1ao_2 = NULL; + C2 *c2_2 = NULL; + C3 *c3_2 = NULL; + C4 *c4_2 = NULL; + C6 *c6_2 = NULL; + C9 *c9_2 = NULL; + FILE *output_2 = NULL; + fstream *tppoanp_2 = NULL; + double *gaps_2 = NULL; + double **tqse_2 = NULL; + double **tqce_2 = NULL; + double **tqcs_2 = NULL; + dcomplex **tqspe_2 = NULL; + dcomplex **tqcpe_2 = NULL; + dcomplex **tqcps_2 = NULL; + double **tqss_2 = NULL; + dcomplex **tqsps_2 = NULL; + double ****zpv_2 = NULL; + double **gapm_2 = NULL; + dcomplex **gappm_2 = NULL; + double **gap_2 = NULL; + dcomplex **gapp_2 = NULL; + double *argi_2 = NULL; + double *args_2 = NULL; + double *duk_2 = NULL; + double **cextlr_2 = NULL; + double **cext_2 = NULL; + double **cmullr_2 = NULL; + double **cmul_2 = NULL; + double *gapv_2 = NULL; + double *tqev_2 = NULL; + double *tqsv_2 = NULL; int nxi_2 = nxi; int nsph_2 = nsph; np_int mxndm_2 = mxndm; @@ -489,28 +375,16 @@ void cluster(string config_file, string data_file, string output_path) { double ths1_2 = ths1; double phs1_2 = phs1; double thsca_2 = thsca; - double *u_2 = new double[3](); - double *us_2 = new double[3](); - double *un_2 = new double[3](); - double *uns_2 = new double[3](); - double *up_2 = new double[3](); - double *ups_2 = new double[3](); - double *unmp_2 = new double[3](); - double *unsmp_2 = new double[3](); - double *upmp_2 = new double[3](); - double *upsmp_2 = new double[3](); - for (int ti=0; ti<3; ti++) { - u_2[ti] = u[ti]; - us_2[ti] = us[ti]; - un_2[ti] = un[ti]; - uns_2[ti] = uns[ti]; - up_2[ti] = up[ti]; - ups_2[ti] = ups[ti]; - unmp_2[ti] = unmp[ti]; - unsmp_2[ti] = unsmp[ti]; - upmp_2[ti] = upmp[ti]; - upsmp_2[ti] = upsmp[ti]; - } + double *u_2 = NULL; + double *us_2 = NULL; + double *un_2 = NULL; + double *uns_2 = NULL; + double *up_2 = NULL; + double *ups_2 = NULL; + double *unmp_2 = NULL; + double *unsmp_2 = NULL; + double *upmp_2 = NULL; + double *upsmp_2 = NULL; double scan_2 = scan; double cfmp_2 = cfmp; double sfmp_2 = sfmp; @@ -523,11 +397,8 @@ void cluster(string config_file, string data_file, string output_path) { double wn_2 = wn; double vk_2 = vk; np_int ndit_2 = ndit; - dcomplex **am_2 = new dcomplex*[ndit]; - dcomplex *am_vector_2 = new dcomplex[ndit * ndit](); - for (int ai = 0; ai < ndit; ai++) { - am_2[ai] = (am_vector_2 + ai * ndit); - } + dcomplex **am_2 = NULL; + dcomplex *am_vector_2 = NULL; int isq_2 = isq; int ibf_2 = ibf; int nth_2 = nth; @@ -537,104 +408,288 @@ void cluster(string config_file, string data_file, string output_path) { int nk_2 = nk; int nks_2 = nks; int nkks_2 = nkks; - - + // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones + if (myompthread == 0) { + sconf_2 = sconf; + gconf_2 = gconf; + c1_2 = c1; + c1ao_2 = c1ao; + c2_2 = c2; + c3_2 = c3; + c4_2 = c4; + c6_2 = c6; + c9_2 = c9; + output_2 = output; + tppoanp_2 = tppoanp; + gaps_2 = gaps; + tqse_2 = tqse; + tqce_2 = tqce; + tqcs_2 = tqcs; + tqspe_2 = tqspe; + tqcpe_2 = tqcpe; + tqcps_2 = tqcps; + tqss_2 = tqss; + tqsps_2 = tqsps; + zpv_2 = zpv; + gapm_2 = gapm; + gappm_2 = gappm; + gap_2 = gap; + gapp_2 = gapp; + argi_2 = argi; + args_2 = args; + duk_2 = duk; + cextlr_2 = cextlr; + cext_2 = cext; + cmullr_2 = cmullr; + cmul_2 = cmul; + gapv_2 = gapv; + tqev_2 = tqev; + tqsv_2 = tqsv; + u_2 = u; + us_2 = us; + un_2 = un; + uns_2 = uns; + up_2 = up; + ups_2 = ups; + unmp_2 = unmp; + unsmp_2 = unsmp; + upmp_2 = upmp; + upsmp_2 = upsmp; + am_2 = am; + } + else { + // this is not thread 0, so do create fresh copies of all local variables + sconf_2 = new ScattererConfiguration(*sconf); + gconf_2 = new GeometryConfiguration(*gconf); + c1_2 = new C1(*c1); + c1ao_2 = new C1_AddOns(*c1ao); + c2_2 = new C2(*c2); + c3_2 = new C3(*c3); + c4_2 = new C4(*c4); + c6_2 = new C6(*c6); + c9_2 = new C9(*c9); + output_2 = fopen((output_path + "/c_OCLU_" + to_string(myompthread)).c_str(), "w"); + tppoanp_2 = new fstream; + tppoanp_2->open((output_path + "/c_TPPOAN_" + to_string(myompthread)).c_str(), ios::out | ios::binary); + gaps_2 = new double[nsph](); + tqse_2 = new double*[2]; + tqce_2 = new double*[2]; + tqcs_2 = new double*[2]; + tqspe_2 = new dcomplex*[2]; + tqcpe_2 = new dcomplex*[2]; + tqcps_2 = new dcomplex*[2]; + tqss_2 = new double*[2]; + tqsps_2 = new dcomplex*[2]; + for (int ti = 0; ti < 2; ti++) { + tqse_2[ti] = new double[nsph](); + tqce_2[ti] = new double[3](); + tqcs_2[ti] = new double[3](); + tqspe_2[ti] = new dcomplex[nsph](); + tqcpe_2[ti] = new dcomplex[3](); + tqcps_2[ti] = new dcomplex[3](); + tqss_2[ti] = new double[nsph](); + tqsps_2[ti] = new dcomplex[nsph](); + for (int tj=0; tj<nsph; tj++) { + tqse_2[ti][tj] = tqse[ti][tj]; + tqspe_2[ti][tj] = tqspe[ti][tj]; + tqss_2[ti][tj] = tqss[ti][tj]; + tqsps_2[ti][tj] = tqsps[ti][tj]; + } + for (int tj=0; tj<3; tj++) { + tqce_2[ti][tj] = tqce[ti][tj]; + tqcs_2[ti][tj] = tqcs[ti][tj]; + tqcpe_2[ti][tj] = tqcpe[ti][tj]; + tqcps_2[ti][tj] = tqcps[ti][tj]; + } + } + zpv_2 = new double***[c4->lm]; //[gconf->l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2] + for (int zi = 0; zi < c4->lm; zi++) { + zpv_2[zi] = new double**[3]; + for (int zj = 0; zj < 3; zj++) { + zpv_2[zi][zj] = new double*[2]; + for (int zk = 0; zk < 2; zk++) { + zpv_2[zi][zj][zk] = new double[2](); + for (int zl = 0; zl < 2; zl++) zpv_2[zi][zj][zk][zl] = zpv[zi][zj][zk][zl]; + } + } + } + gapm_2 = new double*[3]; + gappm_2 = new dcomplex*[3]; + gap_2 = new double*[3]; + gapp_2 = new dcomplex*[3]; + for (int gi = 0; gi < 3; gi++) { + gap_2[gi] = new double[2](); + gapm_2[gi] = new double[2](); + gapp_2[gi] = new dcomplex[2](); + gappm_2[gi] = new dcomplex[2](); + for (int gj=0; gj<2; gj++) { + gap_2[gi][gj] = gap[gi][gj]; + gapp_2[gi][gj] = gapp[gi][gj]; + gapm_2[gi][gj] = gapm[gi][gj]; + gappm_2[gi][gj] = gappm[gi][gj]; + } + } + argi_2 = new double[1](); + argi_2[0] = argi[0]; + args_2 = new double[1](); + args_2[0] = args[0]; + duk_2 = new double[3](); + for (int di=0; di<3; di++) duk_2[di] = duk[di]; + cextlr_2 = new double*[4]; + cext_2 = new double*[4]; + cmullr_2 = new double*[4]; + cmul_2 = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cextlr_2[ci] = new double[4](); + cext_2[ci] = new double[4](); + cmullr_2[ci] = new double[4](); + cmul_2[ci] = new double[4](); + for (int cj=0; cj<4; cj++) { + cextlr_2[ci][cj] = cextlr[ci][cj]; + cext_2[ci][cj] = cext[ci][cj]; + cmullr_2[ci][cj] = cmullr[ci][cj]; + cmul_2[ci][cj] = cmul[ci][cj]; + } + } + gapv_2 = new double[3](); + for (int gi=0; gi<3; gi++) gapv_2[gi] = gapv[gi]; + tqev_2 = new double[3](); + tqsv_2 = new double[3](); + for (int ti=0; ti<3; ti++) { + tqev_2[ti] = tqev[ti]; + tqsv_2[ti] = tqsv[ti]; + } + u_2 = new double[3](); + us_2 = new double[3](); + un_2 = new double[3](); + uns_2 = new double[3](); + up_2 = new double[3](); + ups_2 = new double[3](); + unmp_2 = new double[3](); + unsmp_2 = new double[3](); + upmp_2 = new double[3](); + upsmp_2 = new double[3](); + for (int ti=0; ti<3; ti++) { + u_2[ti] = u[ti]; + us_2[ti] = us[ti]; + un_2[ti] = un[ti]; + uns_2[ti] = uns[ti]; + up_2[ti] = up[ti]; + ups_2[ti] = ups[ti]; + unmp_2[ti] = unmp[ti]; + unsmp_2[ti] = unsmp[ti]; + upmp_2[ti] = upmp[ti]; + upsmp_2[ti] = upsmp[ti]; + } + am_2 = new dcomplex*[ndit]; + am_vector_2 = new dcomplex[ndit * ndit](); + for (int ai = 0; ai < ndit; ai++) { + am_2[ai] = (am_vector_2 + ai * ndit); + } + } + fstream &tppoan_2 = *tppoanp_2; + // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads #pragma omp barrier + if (myompthread==0) printf("Syncing OpenMP threads and starting the loop on wavelengths\n"); + // ok, now I can actually start the parallel calculations #pragma omp for for (jxi488 = 2; jxi488 <= nxi; jxi488++) { - jer = cluster_jxi488_cycle(jxi488, sconf_2, gconf_2, c1_2, c1ao_2, c2_2, c3_2, c4_2, c6_2, c9_2, output_2, output_path, gaps_2, tqse_2, tqspe_2, tqss_2, tqsps_2, zpv_2, gapm_2, gappm_2, nth_2, nths_2, nph_2, nphs_2, nk_2, nks_2, nkks_2, argi_2, args_2, gap_2, gapp_2, tqce_2, tqcpe_2, tqcs_2, tqcps_2, duk_2, tppoan_2, cextlr_2, cext_2, cmullr_2, cmul_2, gapv_2, tqev_2, tqsv_2, nxi_2, nsph_2, mxndm_2, inpol_2, iavm_2, npnt_2, npntts_2, isam_2, lm_2, th_2, thstp_2, thlst_2, ths_2, thsstp_2, thslst_2, ph_2, phstp_2, phlst_2, phs_2, phsstp_2, phslst_2, th1_2, ph1_2, ths1_2, phs1_2, thsca_2, u_2, us_2, un_2, uns_2, up_2, ups_2, unmp_2, unsmp_2, upmp_2, upsmp_2, scan_2, cfmp_2, sfmp_2, cfsp_2, sfsp_2, sqsfi_2, exri_2, lcalc_2, arg_2, wn_2, vk_2, ndit_2, am_2, isq_2, ibf_2); + jer = cluster_jxi488_cycle(jxi488, sconf_2, gconf_2, c1_2, c1ao_2, c2_2, c3_2, c4_2, c6_2, c9_2, output_2, output_path, gaps_2, tqse_2, tqspe_2, tqss_2, tqsps_2, zpv_2, gapm_2, gappm_2, nth_2, nths_2, nph_2, nphs_2, nk_2, nks_2, nkks_2, argi_2, args_2, gap_2, gapp_2, tqce_2, tqcpe_2, tqcs_2, tqcps_2, duk_2, tppoan_2, cextlr_2, cext_2, cmullr_2, cmul_2, gapv_2, tqev_2, tqsv_2, nxi_2, nsph_2, mxndm_2, inpol_2, iavm_2, npnt_2, npntts_2, isam_2, lm_2, th_2, thstp_2, thlst_2, ths_2, thsstp_2, thslst_2, ph_2, phstp_2, phlst_2, phs_2, phsstp_2, phslst_2, th1_2, ph1_2, ths1_2, phs1_2, thsca_2, u_2, us_2, un_2, uns_2, up_2, ups_2, unmp_2, unsmp_2, upmp_2, upsmp_2, scan_2, cfmp_2, sfmp_2, cfsp_2, sfsp_2, sqsfi_2, exri_2, lcalc_2, arg_2, wn_2, vk_2, ndit_2, am_2, isq_2, ibf_2); } #pragma omp barrier - delete sconf_2; - delete gconf_2; - delete c1_2; - delete c1ao_2; - delete c2_2; - delete c3_2; - delete c4_2; - delete c6_2; - delete c9_2; -#ifdef _OPENMP - fclose(output_2); - tppoan_2.close(); -#pragma omp barrier - { - printf("Closing thread-local output files of thread %d and syncing threads\n", myompthread); - } -#endif - delete[] gaps_2; - for (int ti = 0; ti <2 -1; ti++) { - delete[] tqse_2[ti]; - delete[] tqce_2[ti]; - delete[] tqcs_2[ti]; - delete[] tqspe_2[ti]; - delete[] tqcpe_2[ti]; - delete[] tqcps_2[ti]; - delete[] tqss_2[ti]; - delete[] tqsps_2[ti]; - } - delete[] tqse_2; - delete[] tqce_2; - delete[] tqcs_2; - delete[] tqspe_2; - delete[] tqcpe_2; - delete[] tqcps_2; - delete[] tqss_2; - delete[] tqsps_2; - for (int zi = 0; zi < c4->lm; zi++) { - for (int zj = 0; zj < 3; zj++) { - for (int zk = 0; zk < 2; zk++) { - delete[] zpv_2[zi][zj][zk]; + // only threads different from 0 have to free local copies of variables and close local files + if (myompthread != 0) { + delete sconf_2; + delete gconf_2; + delete c1_2; + delete c1ao_2; + delete c2_2; + delete c3_2; + delete c4_2; + delete c6_2; + delete c9_2; + fclose(output_2); + tppoanp_2->close(); + delete tppoanp_2; + delete[] gaps_2; + for (int ti = 0; ti <2 -1; ti++) { + delete[] tqse_2[ti]; + delete[] tqce_2[ti]; + delete[] tqcs_2[ti]; + delete[] tqspe_2[ti]; + delete[] tqcpe_2[ti]; + delete[] tqcps_2[ti]; + delete[] tqss_2[ti]; + delete[] tqsps_2[ti]; + } + delete[] tqse_2; + delete[] tqce_2; + delete[] tqcs_2; + delete[] tqspe_2; + delete[] tqcpe_2; + delete[] tqcps_2; + delete[] tqss_2; + delete[] tqsps_2; + for (int zi = 0; zi < c4->lm; zi++) { + for (int zj = 0; zj < 3; zj++) { + for (int zk = 0; zk < 2; zk++) { + delete[] zpv_2[zi][zj][zk]; + } + delete[] zpv_2[zi][zj]; } - delete[] zpv_2[zi][zj]; + delete[] zpv_2[zi]; } - delete[] zpv_2[zi]; - } - delete[] zpv_2; - for (int gi = 0; gi < 3; gi++) { - delete[] gap_2[gi]; - delete[] gapp_2[gi]; - delete[] gapm_2[gi]; - delete[] gappm_2[gi]; + delete[] zpv_2; + for (int gi = 0; gi < 3; gi++) { + delete[] gap_2[gi]; + delete[] gapp_2[gi]; + delete[] gapm_2[gi]; + delete[] gappm_2[gi]; + } + delete[] gap_2; + delete[] gapp_2; + delete[] gapm_2; + delete[] gappm_2; + delete[] argi_2; + delete[] args_2; + delete[] duk_2; + for (int ci = 0; ci < 4; ci++) { + delete[] cextlr_2[ci]; + delete[] cext_2[ci]; + delete[] cmullr_2[ci]; + delete[] cmul_2[ci]; + } + delete[] cextlr_2; + delete[] cext_2; + delete[] cmullr_2; + delete[] cmul_2; + delete[] gapv_2; + delete[] tqev_2; + delete[] tqsv_2; + delete[] u_2; + delete[] us_2; + delete[] un_2; + delete[] uns_2; + delete[] up_2; + delete[] ups_2; + delete[] unmp_2; + delete[] unsmp_2; + delete[] upmp_2; + delete[] upsmp_2; + delete[] am_vector_2; + delete[] am_2; } - delete[] gap_2; - delete[] gapp_2; - delete[] gapm_2; - delete[] gappm_2; - delete[] argi_2; - delete[] args_2; - delete[] duk_2; - for (int ci = 0; ci < 4; ci++) { - delete[] cextlr_2[ci]; - delete[] cext_2[ci]; - delete[] cmullr_2[ci]; - delete[] cmul_2[ci]; +#pragma omp barrier + { + printf("Closed thread-local output files of thread %d, syncing threads\n", myompthread); } - delete[] cextlr_2; - delete[] cext_2; - delete[] cmullr_2; - delete[] cmul_2; - delete[] gapv_2; - delete[] tqev_2; - delete[] tqsv_2; - delete[] u_2; - delete[] us_2; - delete[] un_2; - delete[] uns_2; - delete[] up_2; - delete[] ups_2; - delete[] unmp_2; - delete[] unsmp_2; - delete[] upmp_2; - delete[] upsmp_2; - delete[] am_vector_2; - delete[] am_2; } // closes pragma omp parallel #ifdef _OPENMP #pragma omp barrier { - for (int ri = 0; ri < ompnumthreads; ri++) { + // thread 0 already wrote on global files, skip it and take care of appending the others + for (int ri = 1; ri < ompnumthreads; ri++) { // Giovanni, please add here in this loop code to reopen the temporary files, reread them and append them respectively to the global output and tppoan, before closing them string partial_file_name = output_path + "/c_OCLU_" + to_string(ri); printf("Copying ASCII output of thread %d of %d... ", ri, ompnumthreads - 1); @@ -647,9 +702,10 @@ void cluster(string config_file, string data_file, string output_path) { fclose(partial_output); remove(partial_file_name.c_str()); printf("done.\n"); - } - for (int ri = 0; ri < ompnumthreads; ri++) { - string partial_file_name = output_path + "/c_TPPOAN_" + to_string(ri); + // } + //for (int ri = 1; ri < ompnumthreads; ri++) { + //string partial_file_name = output_path + "/c_TPPOAN_" + to_string(ri); + partial_file_name = output_path + "/c_TPPOAN_" + to_string(ri); printf("Copying binary output of thread %d of %d... ", ri, ompnumthreads - 1); fstream partial_tppoan; partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary); @@ -666,7 +722,8 @@ void cluster(string config_file, string data_file, string output_path) { } } #endif - tppoan.close(); + tppoanp->close(); + delete tppoanp; } else { // In case TPPOAN could not be opened. Should never happen. printf("\nERROR: failed to open TPPOAN file.\n"); } diff --git a/src/include/types.h b/src/include/types.h index 6eda961a429ebaa1a149ebb700cdea2d5ef69e4e..8c6ab4b214a0e8cfe10c07f3d8b7823897b73533 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -14,6 +14,9 @@ typedef __complex__ double dcomplex; #ifdef USE_LAPACK #ifdef USE_MKL +#ifndef MKL_INT +#define MKL_INT int64_t +#endif #include <mkl_lapacke.h> #else #include <lapacke.h> diff --git a/src/libnptm/Commons.cpp b/src/libnptm/Commons.cpp index 4fad87d23d69b55eaef8e5c71a7e8a34fd49bf95..da4b62360dd72b2b2cac1355b762f4ab70d20d57 100644 --- a/src/libnptm/Commons.cpp +++ b/src/libnptm/Commons.cpp @@ -325,6 +325,7 @@ C1_AddOns::~C1_AddOns() { delete[] v3j0; delete[] vh; delete[] vj0; + delete[] vj; for (int ai = nlemt - 1; ai > -1; ai--) { delete[] am0m[ai]; }