From d4c1387f9c697ef7315ea7b51e355cba4db572b0 Mon Sep 17 00:00:00 2001
From: "Mulas, Giacomo" <gmulas@oa-cagliari.inaf.it>
Date: Thu, 16 May 2024 12:52:14 +0200
Subject: [PATCH] correct inconsistent devices between cuda and magma in case
 of ilp64

---
 src/cluster/cluster.cpp | 28 ++++++++++++++++++----------
 src/make.inc            | 13 +++++++++----
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index 17889133..831242bc 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -95,24 +95,28 @@ void cluster(const string& config_file, const string& data_file, const string& o
   cudaGetDeviceCount(&device_count);
   logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
   logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
-  magma_device_t *devices = new magma_device_t[device_count];
-  cudaSetValidDevices(devices, device_count);
+  magma_device_t *magmadevices = new magma_device_t[device_count];
+  int *cudadevices = new int[device_count];
+  cudaSetValidDevices(cudadevices, device_count);
+  for (int ci=0; ci<device_count; ci++) magmadevices[ci] = (magma_device_t) cudadevices[ci];
   magma_int_t num_devices;
-  magma_getdevices(devices, device_count, &num_devices);
+  magma_getdevices(magmadevices, device_count, &num_devices);
   logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(num_devices) + " MAGMA devices.\n", LOG_DEBG);
   magma_int_t magma_result = magma_init();
   if (magma_result != MAGMA_SUCCESS) {
     logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n");
     logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n");
     fclose(timing_file);
-    delete[] devices;
+    delete[] magmadevices;
+    delete[] cudadevices;
     delete time_logger;
     delete logger;
     return;
   }
 #else
-  int *devices = new int[1];
-  devices[0] = -1;
+  int *cudadevices = new int[1];
+  int *magmadevices = new int[1];
+  cudadevices[0] = magmadevices[0] = -1;
 #endif
   // the following only happens on MPI process 0
   if (mpidata->rank == 0) {
@@ -125,7 +129,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
       string message = "FILE: " + string(ex.what()) + "\n";
       logger->err(message);
       fclose(timing_file);
-      delete[] devices;
+      delete[] cudadevices;
+      delete[] magmadevices;
       delete time_logger;
       delete logger;
       return;
@@ -142,7 +147,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
       logger->err(message);
       if (sconf) delete sconf;
       fclose(timing_file);
-      delete[] devices;
+      delete[] cudadevices;
+      delete[] magmadevices;
       delete time_logger;
       delete logger;
       return;
@@ -248,7 +254,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
 	  tppoan.close();
 	  fclose(timing_file);
 	  fclose(output);
-	  delete[] devices;
+	  delete[] magmadevices;
+	  delete[] cudadevices;
 	  delete p_scattering_angles;
 	  delete cid;
 	  delete logger;
@@ -558,7 +565,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
       }
     }
     // Clean memory
-    delete[] devices;
+    delete[] magmadevices;
+    delete[] cudadevices;
     delete cid;
     delete p_scattering_angles;
     delete sconf;
diff --git a/src/make.inc b/src/make.inc
index a619faab..d6acdf1a 100644
--- a/src/make.inc
+++ b/src/make.inc
@@ -97,10 +97,12 @@ endif
 # define (outside) USE_MAGMA for magma support
 ifdef USE_MAGMA
 ifdef MAGMA_LIB
-MAGMA_LDFLAGS= -L$MAGMA_LIB -lmagma -lcudart
-else
-MAGMA_LDFLAGS= -lmagma -lcudart
+override MAGMA_LDFLAGS= -L$(MAGMA_LIB)
+endif
+ifdef CUDA_HOME
+override MAGMA_LDFLAGS+= -L$(CUDA_HOME)/lib64
 endif
+override MAGMA_LDFLAGS+= -lmagma -lcudart
 #the next endif is for USE_MAGMA
 endif
 
@@ -139,8 +141,11 @@ endif
 
 ifdef USE_MAGMA
 override CXXFLAGS+= -DUSE_MAGMA
+ifdef CUDA_HOME
+override CXXFLAGS+= -I$(CUDA_HOME)/include
+endif
 ifdef MAGMA_INCLUDE
-override CXXFLAGS+= -I$MAGMA_INCLUDE
+override CXXFLAGS+= -I$(MAGMA_INCLUDE)
 endif
 ifdef USE_ILP64
 override CXXFLAGS+= -DMAGMA_ILP64
-- 
GitLab