diff --git a/src/include/magma_calls.h b/src/include/magma_calls.h index 1002d351dac8cdbc41c875e9bfdc818b1b06f54f..fb680484cf792ae9ee2d914bf82cc77423a52bfa 100644 --- a/src/include/magma_calls.h +++ b/src/include/magma_calls.h @@ -23,6 +23,8 @@ #ifndef INCLUDE_MAGMA_CALLS_H_ #define INCLUDE_MAGMA_CALLS_H_ +#define MIN(a,b) ((a) > (b) ? (b) : (a)) + /*! \brief Invert a complex matrix with double precision elements. * * Use LAPACKE64 to perform an in-place matrix inversion for a complex @@ -35,4 +37,16 @@ */ void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id=0); -#endif +#ifdef USE_MAGMA_SVD +/*! \brief Invert a complex matrix using MAGMA zgesdd implementation. + * + * Use LAPACKE64 to perform an in-place matrix inversion for a complex + * matrix with double precision elements. + * + * \param mat: Matrix of complex. The matrix to be inverted. + * \param n: `np_int` The number of rows and columns of the [n x n] matrix. + * \param jer: `int &` Reference to an integer return flag. + */ +void magma_svd_zinvert(dcomplex **mat, np_int n, int &jer, int device_id=0); +#endif //USE_MAGMA_SVD +#endif // INCLUDE_MAGMA_CALLS_H_ diff --git a/src/libnptm/algebraic.cpp b/src/libnptm/algebraic.cpp index c25ea0aa92dd1d7f9f9d27b13f6be50e82f7506d..c7b4a2d6dfa2f7217f7a80e3a0b56751a1307ab5 100644 --- a/src/libnptm/algebraic.cpp +++ b/src/libnptm/algebraic.cpp @@ -47,10 +47,14 @@ using namespace std; void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size, int target_device) { ier = 0; #ifdef USE_MAGMA +#ifdef USE_MAGMA_SVD + magma_svd_zinvert(mat, size, ier, target_device); +#else magma_zinvert(mat, size, ier, target_device); +#endif // USE_MAGMA_SVD #elif defined USE_LAPACK zinvert(mat, size, ier); #else lucin(mat, max_size, size, ier); -#endif +#endif // USE_MAGMA } diff --git a/src/libnptm/magma_calls.cpp b/src/libnptm/magma_calls.cpp index 4cbae602b473d145325c9c529f91fd1e550b1453..c566a6496002f6318509534d4bd9892a5dc06574 100644 --- a/src/libnptm/magma_calls.cpp +++ b/src/libnptm/magma_calls.cpp @@ -27,15 +27,18 @@ #include "../include/magma_calls.h" #endif +#ifdef USE_MAGMA_SVD +#include "magma_operators.h" +#endif + void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id) { - // magma_int_t result = magma_init(); magma_int_t err = MAGMA_SUCCESS; magma_queue_t queue = NULL; magma_device_t dev = (magma_device_t)device_id; magma_queue_create(dev, &queue); magmaDoubleComplex *dwork; // workspace magma_int_t ldwork; // size of dwork - magma_int_t *piv , info; // array of pivot indices + magma_int_t *piv; // array of pivot indices magma_int_t m = (magma_int_t)n; // changed rows; a - mxm matrix magma_int_t mm = m * m; // size of a magmaDoubleComplex *a = (magmaDoubleComplex *)&(mat[0][0]); // pointer to first element on host @@ -47,8 +50,10 @@ void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id) { piv = new magma_int_t[m]; // host mem. magma_zsetmatrix(m, m, a, m, d_a , m, queue); // copy a -> d_a - magma_zgetrf_gpu(m, m, d_a, m, piv, &info); - magma_zgetri_gpu(m, d_a, m, piv, dwork, ldwork, &info); + magma_zgetrf_gpu(m, m, d_a, m, piv, &err); + if (err == MAGMA_SUCCESS) { + magma_zgetri_gpu(m, d_a, m, piv, dwork, ldwork, &err); + } magma_zgetmatrix(m, m, d_a , m, a, m, queue); // copy d_a -> a delete[] piv; // free host memory @@ -57,4 +62,81 @@ void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id) { // result = magma_finalize(); jer = (int)err; } -#endif + +#ifdef USE_MAGMA_SVD +void magma_svd_zinvert(dcomplex **mat, np_int n, int &jer, int device_id) { + magma_int_t err = MAGMA_SUCCESS; + magma_queue_t queue = NULL; + magma_device_t dev = (magma_device_t)device_id; + magma_queue_create(dev, &queue); + magmaDoubleComplex *a = reinterpret_cast<magmaDoubleComplex *>(&mat[0][0]); + magmaDoubleComplex* guess_work; + magma_zmalloc_cpu(&guess_work, n * n); + double *s; + magma_dmalloc_cpu(&s, n); + magmaDoubleComplex *u; + magma_zmalloc_cpu(&u, n * n); + magmaDoubleComplex *vt; + magma_zmalloc_cpu(&vt, n * n); + double *rwork; + magma_dmalloc_cpu(&rwork, 5 * n); + magma_int_t *iwork; + magma_imalloc_cpu(&iwork, 8 * n); + + magma_zgesvd( + MagmaAllVec, MagmaAllVec, n, n, a, n, s, u, n, vt, n, guess_work, -1, rwork, &err + ); + if (err == MAGMA_SUCCESS) { + const magmaDoubleComplex cc0 = MAGMA_Z_MAKE(0.0, 0.0); + const magmaDoubleComplex cc1 = MAGMA_Z_MAKE(1.0, 0.0); + magma_int_t lwork = (magma_int_t)real(real(guess_work[0])); + magmaDoubleComplex *work; + magma_zmalloc_cpu(&work, lwork); + magma_zgesvd( + MagmaAllVec, MagmaAllVec, n, n, a, n, s, u, n, vt, n, work, lwork, rwork, &err + ); + + magma_free_cpu(work); + for (magma_int_t si = 0; si < n; si++) + s[si] = (s[si] == 0.0) ? 0.0 : 1.0 / s[si]; + + for (magma_int_t ri = 0; ri < n; ri++) { + for (magma_int_t rj = 0; rj < n; rj++) { + u[n * ri + rj] = MAGMA_Z_MAKE(s[ri] * real(u[n * ri + rj]), -s[ri] * imag(u[n * ri + rj])); + vt[n * ri + rj] = MAGMA_Z_MAKE(real(vt[n * ri + rj]), -imag(vt[n * ri + rj])); + } + } + + magmaDoubleComplex value; + for (magma_int_t mi = 0; mi < n; mi++) { + for (magma_int_t mj = 0; mj < n; mj++) { + value = MAGMA_Z_MAKE(0.0, 0.0); + for (magma_int_t mk = 0; mk < n; mk++) { + magmaDoubleComplex elem1 = vt[n * mi + mk]; + magmaDoubleComplex elem2 = u[n * mk + mj]; + value = MAGMA_Z_ADD(value, MAGMA_Z_MUL(elem1, elem2)); + } + a[n * mi + mj] = value; + } + } + + magmaDoubleComplex *d_a; + magma_zmalloc(&d_a, n * n); + magma_zsetmatrix(n, n, a, n, d_a , n, queue); + magmablas_ztranspose_inplace(n, d_a, n, queue); + magma_zgetmatrix(n, n, d_a, n, a , n, queue); + magma_free(d_a); + } else { + jer = (int)err; + } + magma_free_cpu(guess_work); + magma_free_cpu(iwork); + magma_free_cpu(rwork); + magma_free_cpu(s); + magma_free_cpu(u); + magma_free_cpu(vt); + magma_queue_destroy(queue); + jer = (int)err; +} +#endif // USE_MAGMA_SVD +#endif // USE_MAGMA