diff --git a/src/include/algebraic.h b/src/include/algebraic.h index accdf8188139ea955e2409c4b0fe067413c49859..f052bb2d43727b8a3dc22e59d3a96bb570e2dafa 100644 --- a/src/include/algebraic.h +++ b/src/include/algebraic.h @@ -36,7 +36,8 @@ * \param ier: `int &` Reference to an integer variable for returning a result flag. * \param max_size: `np_int` The maximum expected size (required by some call-backs, * optional, defaults to 0). + * \param target_device: `int` ID of target GPU, if available (defaults to 0). */ -void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size=0); +void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size=0, int target_device=0); #endif diff --git a/src/include/magma_calls.h b/src/include/magma_calls.h index bf39c3a5c736b859d5311b16a2d6509f8238eafd..1002d351dac8cdbc41c875e9bfdc818b1b06f54f 100644 --- a/src/include/magma_calls.h +++ b/src/include/magma_calls.h @@ -31,7 +31,8 @@ * \param mat: Matrix of complex. The matrix to be inverted. * \param n: `np_int` The number of rows and columns of the [n x n] matrix. * \param jer: `int &` Reference to an integer return flag. + * \param device_id: `int` ID of the device for matrix inversion offloading. */ -void magma_zinvert(dcomplex **mat, np_int n, int &jer); +void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id=0); #endif diff --git a/src/libnptm/algebraic.cpp b/src/libnptm/algebraic.cpp index c942a1fdd3a4f242fd409a6390975da344d93cee..c25ea0aa92dd1d7f9f9d27b13f6be50e82f7506d 100644 --- a/src/libnptm/algebraic.cpp +++ b/src/libnptm/algebraic.cpp @@ -44,10 +44,10 @@ extern void lucin(dcomplex **mat, np_int max_size, np_int size, int &ier); using namespace std; -void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size) { +void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size, int target_device) { ier = 0; #ifdef USE_MAGMA - magma_zinvert(mat, size, ier); + magma_zinvert(mat, size, ier, target_device); #elif defined USE_LAPACK zinvert(mat, size, ier); #else diff --git a/src/libnptm/magma_calls.cpp b/src/libnptm/magma_calls.cpp index d9875d9bae54e25b974e337a2f11c3152fd410c4..4cbae602b473d145325c9c529f91fd1e550b1453 100644 --- a/src/libnptm/magma_calls.cpp +++ b/src/libnptm/magma_calls.cpp @@ -27,19 +27,19 @@ #include "../include/magma_calls.h" #endif -void magma_zinvert(dcomplex **mat, np_int n, int &jer) { +void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id) { // magma_int_t result = magma_init(); magma_int_t err = MAGMA_SUCCESS; magma_queue_t queue = NULL; - magma_device_t dev = 0; + magma_device_t dev = (magma_device_t)device_id; magma_queue_create(dev, &queue); - magmaDoubleComplex *dwork; // dwork - workspace + magmaDoubleComplex *dwork; // workspace magma_int_t ldwork; // size of dwork - magma_int_t *piv , info; // piv - array of indices of inter - + magma_int_t *piv , info; // array of pivot indices magma_int_t m = (magma_int_t)n; // changed rows; a - mxm matrix - magma_int_t mm = m * m; // size of a, r, c - magmaDoubleComplex *a = (magmaDoubleComplex *)&(mat[0][0]); // a - mxm matrix on the host - magmaDoubleComplex *d_a; // d_a - mxm matrix a on the device + magma_int_t mm = m * m; // size of a + magmaDoubleComplex *a = (magmaDoubleComplex *)&(mat[0][0]); // pointer to first element on host + magmaDoubleComplex *d_a; // pointer to first element on device ldwork = m * magma_get_zgetri_nb(m); // optimal block size // allocate matrices err = magma_zmalloc(&d_a, mm); // device memory for a @@ -50,7 +50,7 @@ void magma_zinvert(dcomplex **mat, np_int n, int &jer) { magma_zgetrf_gpu(m, m, d_a, m, piv, &info); magma_zgetri_gpu(m, d_a, m, piv, dwork, ldwork, &info); - magma_zgetmatrix( m, m, d_a , m, a, m, queue); // copy d_a -> a + magma_zgetmatrix(m, m, d_a , m, a, m, queue); // copy d_a -> a delete[] piv; // free host memory magma_free(d_a); // free device memory magma_queue_destroy(queue); // destroy queue