Skip to content
Snippets Groups Projects
Commit ee43703f authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Place NVIDIA markers at the same code block level and add them to the parallel loop function

parent 73c5bf01
No related branches found
No related tags found
No related merge requests found
...@@ -85,7 +85,6 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -85,7 +85,6 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
*/ */
void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) { void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
nvtxRangePush("Set up starts");
chrono::duration<double> elapsed; chrono::duration<double> elapsed;
string message; string message;
string timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log"; string timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
...@@ -109,6 +108,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -109,6 +108,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
#endif #endif
// the following only happens on MPI process 0 // the following only happens on MPI process 0
if (mpidata->rank == 0) { if (mpidata->rank == 0) {
nvtxRangePush("Set up");
logger->log("INFO: making legacy configuration...", LOG_INFO); logger->log("INFO: making legacy configuration...", LOG_INFO);
ScattererConfiguration *sconf = NULL; ScattererConfiguration *sconf = NULL;
try { try {
...@@ -139,6 +139,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -139,6 +139,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
return; return;
} }
logger->log(" done.\n", LOG_INFO); logger->log(" done.\n", LOG_INFO);
nvtxRangePop();
int s_nsph = sconf->number_of_spheres; int s_nsph = sconf->number_of_spheres;
int nsph = gconf->number_of_spheres; int nsph = gconf->number_of_spheres;
if (s_nsph == nsph) { if (s_nsph == nsph) {
...@@ -221,11 +222,10 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -221,11 +222,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
fprintf(output, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk); fprintf(output, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk);
fprintf(output, " \n"); fprintf(output, " \n");
} }
nvtxRangePop();
// do the first iteration on jxi488 separately, since it seems to be different from the others // do the first iteration on jxi488 separately, since it seems to be different from the others
int jxi488 = 1; int jxi488 = 1;
chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
nvtxRangePush("First iteration starts"); nvtxRangePush("First iteration");
int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan); int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan);
nvtxRangePop(); nvtxRangePop();
chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
...@@ -263,7 +263,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -263,7 +263,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
// Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
int ompnumthreads = 1; int ompnumthreads = 1;
nvtxRangePush("Parallel loop starts"); nvtxRangePush("Parallel loop");
#pragma omp parallel #pragma omp parallel
{ {
// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
...@@ -317,7 +317,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -317,7 +317,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
} // closes pragma omp parallel } // closes pragma omp parallel
nvtxRangePop(); nvtxRangePop();
nvtxRangePush("Output concatenation starts"); nvtxRangePush("Output concatenation");
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp barrier #pragma omp barrier
{ {
...@@ -597,7 +597,8 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -597,7 +597,8 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
int jwtm = gconf->jwtm; int jwtm = gconf->jwtm;
np_int ndit = 2 * nsph * cid->c4->nlim; np_int ndit = 2 * nsph * cid->c4->nlim;
int isq, ibf; int isq, ibf;
nvtxRangePush("Prepare matrix calculation");
fprintf(output, "========== JXI =%3d ====================\n", jxi488); fprintf(output, "========== JXI =%3d ====================\n", jxi488);
double xi = sconf->get_scale(jxi488 - 1); double xi = sconf->get_scale(jxi488 - 1);
double exdc = sconf->exdc; double exdc = sconf->exdc;
...@@ -654,14 +655,19 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -654,14 +655,19 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
//break; //break;
} }
} // i132 loop } // i132 loop
nvtxRangePop();
interval_start = chrono::high_resolution_clock::now(); interval_start = chrono::high_resolution_clock::now();
nvtxRangePush("Calculate inverted matrix");
cms(cid->am, cid->c1, cid->c1ao, cid->c4, cid->c6); cms(cid->am, cid->c1, cid->c1ao, cid->c4, cid->c6);
nvtxRangePop();
interval_end = chrono::high_resolution_clock::now(); interval_end = chrono::high_resolution_clock::now();
elapsed = interval_end - interval_start; elapsed = interval_end - interval_start;
message = "INFO: matrix calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; message = "INFO: matrix calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
logger->log(message); logger->log(message);
interval_start = chrono::high_resolution_clock::now(); interval_start = chrono::high_resolution_clock::now();
nvtxRangePush("Invert the matrix");
invert_matrix(cid->am, ndit, jer, mxndm); invert_matrix(cid->am, ndit, jer, mxndm);
nvtxRangePop();
interval_end = chrono::high_resolution_clock::now(); interval_end = chrono::high_resolution_clock::now();
elapsed = interval_end - interval_start; elapsed = interval_end - interval_start;
message = "INFO: matrix inversion for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; message = "INFO: matrix inversion for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
...@@ -673,6 +679,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -673,6 +679,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
// break; // jxi488 loop: goes to memory clean // break; // jxi488 loop: goes to memory clean
} }
interval_start = chrono::high_resolution_clock::now(); interval_start = chrono::high_resolution_clock::now();
nvtxRangePush("Average calculation");
ztm(cid->am, cid->c1, cid->c1ao, cid->c4, cid->c6, cid->c9); ztm(cid->am, cid->c1, cid->c1ao, cid->c4, cid->c6, cid->c9);
if (idfc >= 0) { if (idfc >= 0) {
if (jxi488 == jwtm) { if (jxi488 == jwtm) {
...@@ -740,11 +747,13 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -740,11 +747,13 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
tppoan.write(reinterpret_cast<char *>(&(cid->vk)), sizeof(double)); tppoan.write(reinterpret_cast<char *>(&(cid->vk)), sizeof(double));
pcrsm0(cid->vk, exri, inpol, cid->c1, cid->c1ao, cid->c4); pcrsm0(cid->vk, exri, inpol, cid->c1, cid->c1ao, cid->c4);
apcra(cid->zpv, cid->c4->le, cid->c1ao->am0m, inpol, sqk, cid->gapm, cid->gappm); apcra(cid->zpv, cid->c4->le, cid->c1ao->am0m, inpol, sqk, cid->gapm, cid->gappm);
nvtxRangePop();
interval_end = chrono::high_resolution_clock::now(); interval_end = chrono::high_resolution_clock::now();
elapsed = interval_end - interval_start; elapsed = interval_end - interval_start;
message = "INFO: average calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; message = "INFO: average calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
logger->log(message); logger->log(message);
interval_start = chrono::high_resolution_clock::now(); interval_start = chrono::high_resolution_clock::now();
nvtxRangePush("Angle loop");
double th = sa->th; double th = sa->th;
for (int jth486 = 1; jth486 <= sa->nth; jth486++) { // OpenMP portable? for (int jth486 = 1; jth486 <= sa->nth; jth486++) { // OpenMP portable?
double ph = sa->ph; double ph = sa->ph;
...@@ -1229,6 +1238,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -1229,6 +1238,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
} // jph484 loop } // jph484 loop
th += sa->thstp; th += sa->thstp;
} // jth486 loop } // jth486 loop
nvtxRangePop();
interval_end = chrono::high_resolution_clock::now(); interval_end = chrono::high_resolution_clock::now();
elapsed = interval_end - interval_start; elapsed = interval_end - interval_start;
message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment