Skip to content
Snippets Groups Projects
Commit 73c5bf01 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Enable NVIDIA markers

parent a603bb7c
No related branches found
No related tags found
No related merge requests found
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <exception> #include <exception>
#include <fstream> #include <fstream>
#include <string> #include <string>
#include <nvtx3/nvToolsExt.h>
#ifdef _OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
#endif #endif
...@@ -84,6 +85,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf ...@@ -84,6 +85,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
*/ */
void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) { void cluster(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) {
chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> t_start = chrono::high_resolution_clock::now();
nvtxRangePush("Set up starts");
chrono::duration<double> elapsed; chrono::duration<double> elapsed;
string message; string message;
string timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log"; string timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log";
...@@ -219,10 +221,13 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -219,10 +221,13 @@ void cluster(const string& config_file, const string& data_file, const string& o
fprintf(output, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk); fprintf(output, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk);
fprintf(output, " \n"); fprintf(output, " \n");
} }
nvtxRangePop();
// do the first iteration on jxi488 separately, since it seems to be different from the others // do the first iteration on jxi488 separately, since it seems to be different from the others
int jxi488 = 1; int jxi488 = 1;
chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
nvtxRangePush("First iteration starts");
int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan); int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, output, output_path, tppoan);
nvtxRangePop();
chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now(); chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
elapsed = start_iter_1 - t_start; elapsed = start_iter_1 - t_start;
string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n"; string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
...@@ -258,6 +263,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -258,6 +263,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
// Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
int ompnumthreads = 1; int ompnumthreads = 1;
nvtxRangePush("Parallel loop starts");
#pragma omp parallel #pragma omp parallel
{ {
// Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway
...@@ -309,7 +315,9 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -309,7 +315,9 @@ void cluster(const string& config_file, const string& data_file, const string& o
logger->log(message); logger->log(message);
} }
} // closes pragma omp parallel } // closes pragma omp parallel
nvtxRangePop();
nvtxRangePush("Output concatenation starts");
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp barrier #pragma omp barrier
{ {
...@@ -383,6 +391,7 @@ void cluster(const string& config_file, const string& data_file, const string& o ...@@ -383,6 +391,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
} }
} }
#endif #endif
nvtxRangePop();
tppoanp->close(); tppoanp->close();
delete tppoanp; delete tppoanp;
} else { // In case TPPOAN could not be opened. Should never happen. } else { // In case TPPOAN could not be opened. Should never happen.
......
...@@ -102,7 +102,7 @@ endif ...@@ -102,7 +102,7 @@ endif
ifdef CUDA_HOME ifdef CUDA_HOME
override MAGMA_LDFLAGS+= -L$(CUDA_HOME)/lib64 override MAGMA_LDFLAGS+= -L$(CUDA_HOME)/lib64
endif endif
override MAGMA_LDFLAGS+= -lmagma -lcudart override MAGMA_LDFLAGS+= -lmagma -lcudart -lnvToolsExt
#the next endif is for USE_MAGMA #the next endif is for USE_MAGMA
endif endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment