From 4ade768e728fe6fc5ce0143204a865ba54d6acf5 Mon Sep 17 00:00:00 2001 From: "Mulas, Giacomo" Date: Sun, 8 Dec 2024 08:51:41 +0100 Subject: [PATCH] - fix configure to properly detect cublas even when no additional compilation flags are included - accept even an accuracy as bad as 0.1 in refinement, if no better can be achieved --- build/configure | 14 ++++---------- build/configure.ac | 7 ++----- src/cluster/cluster.cpp | 31 ++++++++++++++----------------- src/libnptm/algebraic.cpp | 14 +++++++------- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/build/configure b/build/configure index f05c7ac0..49a0060f 100755 --- a/build/configure +++ b/build/configure @@ -25505,13 +25505,10 @@ then : fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection - if test "x$CUDAFLAGS" != "x"; then - # somehow CUDAFLAGS was defined + if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then + # somehow CUDAFLAGS or CUDALDFLAGS was defined export CUDAFLAGS export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}" - fi - if test "x$CUDALDFLAGS" != "x"; then - # somehow CUDALDFLAGS was defined export CUDALDFLAGS export CUBLASLDFLAGS="${CUDALDFLAGS}" fi @@ -25583,13 +25580,10 @@ else case e in #( fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection - if test "x$CUDAFLAGS" != "x"; then - # somehow CUDAFLAGS was defined + if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then + # somehow CUDAFLAGS or CUDALDFLAGS was defined export CUDAFLAGS export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}" - fi - if test "x$CUDALDFLAGS" != "x"; then - # somehow CUDALDFLAGS was defined export CUDALDFLAGS export CUBLASLDFLAGS="${CUDALDFLAGS}" fi diff --git a/build/configure.ac b/build/configure.ac index 6b6a85c2..50c600be 100644 --- a/build/configure.ac +++ b/build/configure.ac @@ -213,13 +213,10 @@ m4_define( fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection - if test "x$CUDAFLAGS" != "x"; then - # somehow CUDAFLAGS was defined + if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then + # somehow CUDAFLAGS or CUDALDFLAGS was defined export CUDAFLAGS export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}" - fi - if test "x$CUDALDFLAGS" != "x"; then - # somehow CUDALDFLAGS was defined export CUDALDFLAGS export CUBLASLDFLAGS="${CUDALDFLAGS}" fi diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 5ecea15e..8950265a 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -126,10 +126,7 @@ void cluster(const string& config_file, const string& data_file, const string& o Logger *logger = new Logger(LOG_DEBG); int device_count = 0; -#ifdef USE_CUBLAS - cudaGetDeviceCount(&device_count); - logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); -#elif defined USE_MAGMA +#ifdef USE_MAGMA //=========== // Initialise MAGMA //=========== @@ -155,7 +152,11 @@ void cluster(const string& config_file, const string& data_file, const string& o delete logger; return; } -#endif // end MAGMA initialisation +// end MAGMA initialisation +#elif defined USE_CUBLAS + cudaGetDeviceCount(&device_count); + logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); +#endif //=========================== // the following only happens on MPI process 0 @@ -297,10 +298,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // Create empty virtual binary file VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(); string tppoan_name = output_path + "/c_TPPOAN"; -#ifdef USE_CUBLAS - logger->log("INFO: using CUBLAS calls.\n", LOG_INFO); -#elif defined USE_MAGMA +#ifdef USE_MAGMA logger->log("INFO: using MAGMA calls.\n", LOG_INFO); +#elif defined USE_CUBLAS + logger->log("INFO: using CUBLAS calls.\n", LOG_INFO); #elif defined USE_LAPACK logger->log("INFO: using LAPACK calls.\n", LOG_INFO); #else @@ -564,9 +565,7 @@ void cluster(const string& config_file, const string& data_file, const string& o delete sconf; delete gconf; -#ifdef USE_CUBLAS - // just a placeholder to skip magma finalisation if we are using cublas -#elif defined USE_MAGMA +#ifdef USE_MAGMA logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n"); magma_finalize(); #endif @@ -689,9 +688,7 @@ void cluster(const string& config_file, const string& data_file, const string& o delete sconf; delete gconf; #endif -#ifdef USE_CUBLAS - // placeholder to avoid magma if using cublas -#elif defined USE_MAGMA +#ifdef USE_MAGMA logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n"); magma_finalize(); #endif @@ -842,7 +839,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf #ifdef USE_NVTX nvtxRangePush("Invert the matrix"); #endif - // we the accuracygoal in, get the actual accuracy back out + // we put the accuracygoal in, get the actual accuracy back out double actualaccuracy = cid->accuracygoal; invert_matrix(cid->am, ndit, jer, cid->maxrefiters, actualaccuracy, cid->refinemode, mxndm, cid->proc_device); // in principle, we should check whether the returned actualaccuracy is indeed lower than the accuracygoal, and do something about it if not @@ -850,8 +847,8 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf if (cid->refinemode==2) { message = "INFO: calibration obtained accuracy " + to_string(actualaccuracy) + " (" + to_string(cid->accuracygoal) + " requested) in " + to_string(cid->maxrefiters) + " refinement iterations\n"; logger->log(message); - if (actualaccuracy > 1e-2) { - printf("Accuracy worse than 0.01, stopping"); + if (actualaccuracy > 1e-1) { + printf("Accuracy worse than 0.1, stopping"); exit(1); } } diff --git a/src/libnptm/algebraic.cpp b/src/libnptm/algebraic.cpp index 516c20c3..1df84050 100644 --- a/src/libnptm/algebraic.cpp +++ b/src/libnptm/algebraic.cpp @@ -60,13 +60,7 @@ using namespace std; void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size, int target_device) { ier = 0; -#ifdef USE_CUBLAS -#ifdef USE_REFINEMENT - cublas_zinvert_and_refine(mat, size, maxrefiters, accuracygoal, refinemode, target_device); -#else - cublas_zinvert(mat, size, target_device); -#endif -#elif defined USE_MAGMA +#ifdef USE_MAGMA #ifdef USE_REFINEMENT // try using the iterative refinement to obtain a more accurate solution // we pass to magma_zinvert_and_refine() the accuracygoal in, get the actual @@ -75,6 +69,12 @@ void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, doub #else magma_zinvert(mat, size, ier, target_device); #endif +#elif defined USE_CUBLAS +#ifdef USE_REFINEMENT + cublas_zinvert_and_refine(mat, size, maxrefiters, accuracygoal, refinemode, target_device); +#else + cublas_zinvert(mat, size, target_device); +#endif #elif defined USE_LAPACK #ifdef USE_REFINEMENT zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode); -- GitLab