Skip to content
Snippets Groups Projects
Commit 35cd635f authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

use less memory for identity subtraction in iterative refinement

parent 7b044c50
No related branches found
No related tags found
No related merge requests found
......@@ -143,12 +143,12 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
cudacall(cudaMalloc<cuDoubleComplex>(&d_a_residual, m*m*sizeof(cuDoubleComplex)));
cudacall(cudaMalloc<cuDoubleComplex>(&d_a_refine, m*m*sizeof(cuDoubleComplex)));
// allocate memory for the temporary matrix products
dcomplex *native_id = new dcomplex[m];
for (np_int i=0; i<m; i++) native_id[i] = 1;
dcomplex *native_id = new dcomplex[1];
native_id[0] = 1;
cuDoubleComplex *m_id = (cuDoubleComplex *) &(native_id[0]);
// fill it with 1
cudacall(cudaMalloc<cuDoubleComplex>(&d_id, m*sizeof(cuDoubleComplex)));
cudacall(cudaMemcpy(d_id, m_id, m*sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector
cudacall(cudaMalloc<cuDoubleComplex>(&d_id, sizeof(cuDoubleComplex)));
cudacall(cudaMemcpy(d_id, m_id, sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector
delete[] native_id; // free identity vector on host
}
bool iteraterefine = true;
......@@ -167,7 +167,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
// Fortran-style arrays, whereas our arrays are C-style row-major.
cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m));
// add the identity to the product
cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1));
cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1));
double oldmax=0;
if (refinemode >0) {
np_int maxindex;
......@@ -191,7 +191,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
// multiply minus the original matrix times the new inverse matrix
cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m));
// add the identity to the product
cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1));
cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1));
if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) {
// find the maximum absolute value of the residual
np_int maxindex;
......
......@@ -92,6 +92,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
#endif
np_int nn = n*n;
np_int incx = 1;
np_int incx0 = 0;
#ifdef USE_MKL
MKL_Complex16 *arr_orig = NULL;
MKL_Complex16 *arr_residual = NULL;
......@@ -108,17 +109,15 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
arr_orig = new MKL_Complex16[nn];
arr_residual = new MKL_Complex16[nn];
arr_refine = new MKL_Complex16[nn];
id = new MKL_Complex16[n];
for (np_int i=0; i<n ; i++) {
id[i].real = 1;
id[i].imag = 0;
}
id = new MKL_Complex16[1];
id[0].real = 1;
id[0].imag = 0;
#else
arr_orig = new dcomplex[nn];
arr_residual = new dcomplex[nn];
arr_refine = new dcomplex[nn];
id = new dcomplex[n];
for (np_int i=0; i<n ; i++) id[i] = (dcomplex) 1;
id = new dcomplex[1];
id[0] = (dcomplex) 1;
#endif
zcopy_(&nn, arr, &incx, arr_orig, &incx);
}
......@@ -153,7 +152,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
// Fortran-style arrays, whereas our arrays are C-style row-major.
zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n);
np_int incy = n+1;
zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy);
zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy);
double oldmax = 0;
if (refinemode >0) {
np_int maxindex = izamax_(&nn, arr_residual, &incx);
......@@ -171,7 +170,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
zaxpy_(&nn, &dcone, arr_refine, &incx, arr, &incx);
// zcopy_(&nn, arr_refine, &incx, arr, &incx);
zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n);
zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy);
zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy);
if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) {
np_int maxindex = izamax_(&nn, arr_residual, &incx);
#ifdef USE_MKL
......
......@@ -132,16 +132,16 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
}
// allocate memory for the identity vector on the host
{
dcomplex *native_id = new dcomplex[m];
for (magma_int_t i=0; i<m; i++) native_id[i] = 1;
dcomplex *native_id = new dcomplex[1];
native_id[0] = 1;
magmaDoubleComplex *id = (magmaDoubleComplex *) &(native_id[0]);
// fill it with 1
err = magma_zmalloc(&d_id, m);
err = magma_zmalloc(&d_id, 1);
if (err != MAGMA_SUCCESS) {
printf("Error allocating d_id\n");
exit(1);
}
magma_zsetvector(m, id, 1, d_id, 1, queue); // copy identity to device vector
magma_zsetvector(1, id, 1, d_id, 1, queue); // copy identity to device vector
delete[] native_id; // free identity vector on host
}
}
......@@ -161,7 +161,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
// Fortran-style arrays, whereas our arrays are C-style row-major.
magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue);
// add the identity to the product
magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue);
magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue);
double oldmax=0;
if (refinemode >0) {
// find the maximum absolute value of the residual
......@@ -184,7 +184,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
// multiply minus the original matrix times the new inverse matrix
magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue);
// add the identity to the product
magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue);
magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue);
if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) {
// find the maximum absolute value of the residual
magma_int_t maxindex = magma_izamax(mm, d_a_residual, 1, queue);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment