diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 17889133143a53587fdfcdccc63dfaa8563529de..831242bc67b85d8e2d24b905eaf5a814d5b9390c 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -95,24 +95,28 @@ void cluster(const string& config_file, const string& data_file, const string& o cudaGetDeviceCount(&device_count); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); - magma_device_t *devices = new magma_device_t[device_count]; - cudaSetValidDevices(devices, device_count); + magma_device_t *magmadevices = new magma_device_t[device_count]; + int *cudadevices = new int[device_count]; + cudaSetValidDevices(cudadevices, device_count); + for (int ci=0; ci<device_count; ci++) magmadevices[ci] = (magma_device_t) cudadevices[ci]; magma_int_t num_devices; - magma_getdevices(devices, device_count, &num_devices); + magma_getdevices(magmadevices, device_count, &num_devices); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(num_devices) + " MAGMA devices.\n", LOG_DEBG); magma_int_t magma_result = magma_init(); if (magma_result != MAGMA_SUCCESS) { logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n"); logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n"); fclose(timing_file); - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete time_logger; delete logger; return; } #else - int *devices = new int[1]; - devices[0] = -1; + int *cudadevices = new int[1]; + int *magmadevices = new int[1]; + cudadevices[0] = magmadevices[0] = -1; #endif // the following only happens on MPI process 0 if (mpidata->rank == 0) { @@ -125,7 +129,8 @@ void cluster(const string& config_file, const string& data_file, const string& o string message = "FILE: " + string(ex.what()) + "\n"; logger->err(message); fclose(timing_file); - delete[] devices; + delete[] cudadevices; + delete[] magmadevices; delete time_logger; delete logger; return; @@ -142,7 +147,8 @@ void cluster(const string& config_file, const string& data_file, const string& o logger->err(message); if (sconf) delete sconf; fclose(timing_file); - delete[] devices; + delete[] cudadevices; + delete[] magmadevices; delete time_logger; delete logger; return; @@ -248,7 +254,8 @@ void cluster(const string& config_file, const string& data_file, const string& o tppoan.close(); fclose(timing_file); fclose(output); - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete p_scattering_angles; delete cid; delete logger; @@ -558,7 +565,8 @@ void cluster(const string& config_file, const string& data_file, const string& o } } // Clean memory - delete[] devices; + delete[] magmadevices; + delete[] cudadevices; delete cid; delete p_scattering_angles; delete sconf; diff --git a/src/include/types.h b/src/include/types.h index 4ed9a11c7f0b8a088ab1e22382eb427fdeb43cd4..f6fb68ab31e7683ea18256860eb9db9a82c1631f 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -27,9 +27,15 @@ typedef __complex__ double dcomplex; #ifdef USE_LAPACK #ifdef USE_MKL +#ifdef USE_ILP64 #ifndef MKL_INT #define MKL_INT int64_t #endif // MKL_INT +#else +#ifndef MKL_INT +#define MKL_INT int32_t +#endif // MKL_INT +#endif #include <mkl_lapacke.h> #else #include <lapacke.h> @@ -44,7 +50,11 @@ typedef __complex__ double dcomplex; #ifdef lapack_int #define np_int lapack_int #else +#ifdef USE_ILP64 #define np_int int64_t +#else +#define np_int int32_t +#endif // USE_ILP64 #endif // lapack_int #endif // np_int diff --git a/src/make.inc b/src/make.inc index 3fadc9a8162964204c11d1dac12d801fd473faf2..d6acdf1adc670531ae582b82bc29a7a854fd3b69 100644 --- a/src/make.inc +++ b/src/make.inc @@ -48,32 +48,47 @@ endif # define (outside) USE_LAPACK for lapacke support, LAPACK_ILP64 for ilp64 interface, MKL_ILP64 the same if using MKL implementation ifdef USE_LAPACK +# define (outside) USE_ILP64 for long long int support in lapack/mkl/magma interfaces +ifdef USE_ILP64 ifndef LAPACK_ILP64 override LAPACK_ILP64=1 +endif #LAPACK_ILP64 endif # define (outside) USE_MKL to use the MKL implementation of lapacke ifdef USE_MKL +# define (outside) USE_ILP64 for long long int support in lapack/mkl/magma interfaces +ifdef USE_ILP64 ifndef MKL_ILP64 override MKL_ILP64=1 +endif #MKL_ILP64 endif ifndef LAPACK_INCLUDE # this is for the MKL implementation override LAPACK_INCLUDE=$(MKLROOT)/include -endif +endif #LAPACK_INCLUDE ifndef LAPACK_LDFLAGS # this is for the MKL implementation +# define (outside) USE_ILP64 for long long int support in lapack/mkl/magma interfaces +ifdef USE_ILP64 override LAPACK_LDFLAGS=-L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl +else +override LAPACK_LDFLAGS=-L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl endif +endif #LAPACK_LDFLAGS # the next else refers to USE_MKL -else +else #this is for when USE_MKL is _not_ defined ifndef LAPACK_INCLUDE # this is for standard "vanilla" lapacke64 override LAPACK_INCLUDE=/usr/include -endif +endif # LAPACK_INCLUDE ifndef LAPACK_LDFLAGS +ifdef USE_ILP64 # this is for standard "vanilla" lapacke64 override LAPACK_LDFLAGS=-llapacke64 +else +override LAPACK_LDFLAGS=-llapacke endif +endif #LAPACK_LDFLAGS # the next endif is for USE_MKL endif #the next endif is for USE_LAPACK @@ -81,7 +96,13 @@ endif # define (outside) USE_MAGMA for magma support ifdef USE_MAGMA -MAGMA_LDFLAGS= -lmagma -lcudart +ifdef MAGMA_LIB +override MAGMA_LDFLAGS= -L$(MAGMA_LIB) +endif +ifdef CUDA_HOME +override MAGMA_LDFLAGS+= -L$(CUDA_HOME)/lib64 +endif +override MAGMA_LDFLAGS+= -lmagma -lcudart #the next endif is for USE_MAGMA endif @@ -92,25 +113,50 @@ ifdef USE_OPENMP override CXXFLAGS+= -fopenmp # closes USE_OPENMP endif + +ifdef USE_ILP64 +override CXXFLAGS+= -DUSE_ILP64 +endif + ifdef USE_LAPACK -override CXXFLAGS+= -DUSE_LAPACK -DLAPACK_ILP64 +override CXXFLAGS+= -DUSE_LAPACK +ifdef USE_ILP64 +override CXXFLAGS+= -DLAPACK_ILP64 +endif +# closes USE_LAPACK +endif + ifdef USE_MKL -override CXXFLAGS+= -DMKL_ILP64 -DUSE_MKL -I$(MKLROOT)/include +override CXXFLAGS+= -DUSE_MKL -I$(MKLROOT)/include +ifdef USE_ILP64 +override CXXFLAGS+= -DMKL_ILP64 +endif # closes USE_MKL endif + ifdef USE_OPENMP override CXXFLAGS+= -fopenmp # closes USE_OPENMP endif -# closes USE_LAPACK -endif + ifdef USE_MAGMA override CXXFLAGS+= -DUSE_MAGMA +ifdef CUDA_HOME +override CXXFLAGS+= -I$(CUDA_HOME)/include +endif +ifdef MAGMA_INCLUDE +override CXXFLAGS+= -I$(MAGMA_INCLUDE) +endif +ifdef USE_ILP64 +override CXXFLAGS+= -DMAGMA_ILP64 +endif # closes USE_MAGMA endif + # closes CXXFLAGS endif + # HDF5_LIB defines the default path to the HDF5 libraries to use # CXXLDFLAGS defines the default linker flags to use for C++ codes ifndef CXXLDFLAGS