From d4c1387f9c697ef7315ea7b51e355cba4db572b0 Mon Sep 17 00:00:00 2001 From: "Mulas, Giacomo" <gmulas@oa-cagliari.inaf.it> Date: Thu, 16 May 2024 12:52:14 +0200 Subject: [PATCH] correct inconsistent devices between cuda and magma in case of ilp64 --- src/cluster/cluster.cpp | 28 ++++++++++++++++++---------- src/make.inc | 13 +++++++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 17889133..831242bc 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -95,24 +95,28 @@ void cluster(const string& config_file, const string& data_file, const string& o cudaGetDeviceCount(&device_count); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); - magma_device_t *devices = new magma_device_t[device_count]; - cudaSetValidDevices(devices, device_count); + magma_device_t *magmadevices = new magma_device_t[device_count]; + int *cudadevices = new int[device_count]; + cudaSetValidDevices(cudadevices, device_count); + for (int ci=0; ci<device_count; ci++) magmadevices[ci] = (magma_device_t) cudadevices[ci]; magma_int_t num_devices; - magma_getdevices(devices, device_count, &num_devices); + magma_getdevices(magmadevices, device_count, &num_devices); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(num_devices) + " MAGMA devices.\n", LOG_DEBG); magma_int_t magma_result = magma_init(); if (magma_result != MAGMA_SUCCESS) { logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n"); logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n"); fclose(timing_file); - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete time_logger; delete logger; return; } #else - int *devices = new int[1]; - devices[0] = -1; + int *cudadevices = new int[1]; + int *magmadevices = new int[1]; + cudadevices[0] = magmadevices[0] = -1; #endif // the following only happens on MPI process 0 if (mpidata->rank == 0) { @@ -125,7 +129,8 @@ void cluster(const string& config_file, const string& data_file, const string& o string message = "FILE: " + string(ex.what()) + "\n"; logger->err(message); fclose(timing_file); - delete[] devices; + delete[] cudadevices; + delete[] magmadevices; delete time_logger; delete logger; return; @@ -142,7 +147,8 @@ void cluster(const string& config_file, const string& data_file, const string& o logger->err(message); if (sconf) delete sconf; fclose(timing_file); - delete[] devices; + delete[] cudadevices; + delete[] magmadevices; delete time_logger; delete logger; return; @@ -248,7 +254,8 @@ void cluster(const string& config_file, const string& data_file, const string& o tppoan.close(); fclose(timing_file); fclose(output); - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete p_scattering_angles; delete cid; delete logger; @@ -558,7 +565,8 @@ void cluster(const string& config_file, const string& data_file, const string& o } } // Clean memory - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete cid; delete p_scattering_angles; delete sconf; diff --git a/src/make.inc b/src/make.inc index a619faab..d6acdf1a 100644 --- a/src/make.inc +++ b/src/make.inc @@ -97,10 +97,12 @@ endif # define (outside) USE_MAGMA for magma support ifdef USE_MAGMA ifdef MAGMA_LIB -MAGMA_LDFLAGS= -L$MAGMA_LIB -lmagma -lcudart -else -MAGMA_LDFLAGS= -lmagma -lcudart +override MAGMA_LDFLAGS= -L$(MAGMA_LIB) +endif +ifdef CUDA_HOME +override MAGMA_LDFLAGS+= -L$(CUDA_HOME)/lib64 endif +override MAGMA_LDFLAGS+= -lmagma -lcudart #the next endif is for USE_MAGMA endif @@ -139,8 +141,11 @@ endif ifdef USE_MAGMA override CXXFLAGS+= -DUSE_MAGMA +ifdef CUDA_HOME +override CXXFLAGS+= -I$(CUDA_HOME)/include +endif ifdef MAGMA_INCLUDE -override CXXFLAGS+= -I$MAGMA_INCLUDE +override CXXFLAGS+= -I$(MAGMA_INCLUDE) endif ifdef USE_ILP64 override CXXFLAGS+= -DMAGMA_ILP64 -- GitLab