Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NP_TMcode
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Giacomo Mulas
NP_TMcode
Commits
35cd635f
Commit
35cd635f
authored
8 months ago
by
Mulas, Giacomo
Browse files
Options
Downloads
Patches
Plain Diff
use less memory for identity subtraction in iterative refinement
parent
7b044c50
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/libnptm/cublas_calls.cpp
+6
-6
6 additions, 6 deletions
src/libnptm/cublas_calls.cpp
src/libnptm/lapack_calls.cpp
+8
-9
8 additions, 9 deletions
src/libnptm/lapack_calls.cpp
src/libnptm/magma_calls.cpp
+6
-6
6 additions, 6 deletions
src/libnptm/magma_calls.cpp
with
20 additions
and
21 deletions
src/libnptm/cublas_calls.cpp
+
6
−
6
View file @
35cd635f
...
...
@@ -143,12 +143,12 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
cudacall
(
cudaMalloc
<
cuDoubleComplex
>
(
&
d_a_residual
,
m
*
m
*
sizeof
(
cuDoubleComplex
)));
cudacall
(
cudaMalloc
<
cuDoubleComplex
>
(
&
d_a_refine
,
m
*
m
*
sizeof
(
cuDoubleComplex
)));
// allocate memory for the temporary matrix products
dcomplex
*
native_id
=
new
dcomplex
[
m
];
for
(
np_int
i
=
0
;
i
<
m
;
i
++
)
native_id
[
i
]
=
1
;
dcomplex
*
native_id
=
new
dcomplex
[
1
];
native_id
[
0
]
=
1
;
cuDoubleComplex
*
m_id
=
(
cuDoubleComplex
*
)
&
(
native_id
[
0
]);
// fill it with 1
cudacall
(
cudaMalloc
<
cuDoubleComplex
>
(
&
d_id
,
m
*
sizeof
(
cuDoubleComplex
)));
cudacall
(
cudaMemcpy
(
d_id
,
m_id
,
m
*
sizeof
(
cuDoubleComplex
),
cudaMemcpyHostToDevice
));
// copy identity to device vector
cudacall
(
cudaMalloc
<
cuDoubleComplex
>
(
&
d_id
,
sizeof
(
cuDoubleComplex
)));
cudacall
(
cudaMemcpy
(
d_id
,
m_id
,
sizeof
(
cuDoubleComplex
),
cudaMemcpyHostToDevice
));
// copy identity to device vector
delete
[]
native_id
;
// free identity vector on host
}
bool
iteraterefine
=
true
;
...
...
@@ -167,7 +167,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
// Fortran-style arrays, whereas our arrays are C-style row-major.
cublascall
(
CUZGEMM
(
handle
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
m
,
m
,
m
,
&
cu_mone
,
d_c
,
m
,
d_a
,
m
,
&
cu_zero
,
d_a_residual
,
m
));
// add the identity to the product
cublascall
(
CUZAXPY
(
handle
,
m
,
&
cu_one
,
d_id
,
1
,
d_a_residual
,
m
+
1
));
cublascall
(
CUZAXPY
(
handle
,
m
,
&
cu_one
,
d_id
,
0
,
d_a_residual
,
m
+
1
));
double
oldmax
=
0
;
if
(
refinemode
>
0
)
{
np_int
maxindex
;
...
...
@@ -191,7 +191,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double &
// multiply minus the original matrix times the new inverse matrix
cublascall
(
CUZGEMM
(
handle
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
m
,
m
,
m
,
&
cu_mone
,
d_c
,
m
,
d_a
,
m
,
&
cu_zero
,
d_a_residual
,
m
));
// add the identity to the product
cublascall
(
CUZAXPY
(
handle
,
m
,
&
cu_one
,
d_id
,
1
,
d_a_residual
,
m
+
1
));
cublascall
(
CUZAXPY
(
handle
,
m
,
&
cu_one
,
d_id
,
0
,
d_a_residual
,
m
+
1
));
if
((
refinemode
==
2
)
||
((
refinemode
==
1
)
&&
(
iter
==
(
maxiters
-
1
))))
{
// find the maximum absolute value of the residual
np_int
maxindex
;
...
...
This diff is collapsed.
Click to expand it.
src/libnptm/lapack_calls.cpp
+
8
−
9
View file @
35cd635f
...
...
@@ -92,6 +92,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
#endif
np_int
nn
=
n
*
n
;
np_int
incx
=
1
;
np_int
incx0
=
0
;
#ifdef USE_MKL
MKL_Complex16
*
arr_orig
=
NULL
;
MKL_Complex16
*
arr_residual
=
NULL
;
...
...
@@ -108,17 +109,15 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
arr_orig
=
new
MKL_Complex16
[
nn
];
arr_residual
=
new
MKL_Complex16
[
nn
];
arr_refine
=
new
MKL_Complex16
[
nn
];
id
=
new
MKL_Complex16
[
n
];
for
(
np_int
i
=
0
;
i
<
n
;
i
++
)
{
id
[
i
].
real
=
1
;
id
[
i
].
imag
=
0
;
}
id
=
new
MKL_Complex16
[
1
];
id
[
0
].
real
=
1
;
id
[
0
].
imag
=
0
;
#else
arr_orig
=
new
dcomplex
[
nn
];
arr_residual
=
new
dcomplex
[
nn
];
arr_refine
=
new
dcomplex
[
nn
];
id
=
new
dcomplex
[
n
];
for
(
np_int
i
=
0
;
i
<
n
;
i
++
)
id
[
i
]
=
(
dcomplex
)
1
;
id
=
new
dcomplex
[
1
];
id
[
0
]
=
(
dcomplex
)
1
;
#endif
zcopy_
(
&
nn
,
arr
,
&
incx
,
arr_orig
,
&
incx
);
}
...
...
@@ -153,7 +152,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
// Fortran-style arrays, whereas our arrays are C-style row-major.
zgemm_
(
&
transa
,
&
transa
,
&
n
,
&
n
,
&
n
,
&
dcmone
,
arr
,
&
n
,
arr_orig
,
&
n
,
&
dczero
,
arr_residual
,
&
n
);
np_int
incy
=
n
+
1
;
zaxpy_
(
&
n
,
&
dcone
,
id
,
&
incx
,
arr_residual
,
&
incy
);
zaxpy_
(
&
n
,
&
dcone
,
id
,
&
incx
0
,
arr_residual
,
&
incy
);
double
oldmax
=
0
;
if
(
refinemode
>
0
)
{
np_int
maxindex
=
izamax_
(
&
nn
,
arr_residual
,
&
incx
);
...
...
@@ -171,7 +170,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl
zaxpy_
(
&
nn
,
&
dcone
,
arr_refine
,
&
incx
,
arr
,
&
incx
);
// zcopy_(&nn, arr_refine, &incx, arr, &incx);
zgemm_
(
&
transa
,
&
transa
,
&
n
,
&
n
,
&
n
,
&
dcmone
,
arr
,
&
n
,
arr_orig
,
&
n
,
&
dczero
,
arr_residual
,
&
n
);
zaxpy_
(
&
n
,
&
dcone
,
id
,
&
incx
,
arr_residual
,
&
incy
);
zaxpy_
(
&
n
,
&
dcone
,
id
,
&
incx
0
,
arr_residual
,
&
incy
);
if
((
refinemode
==
2
)
||
((
refinemode
==
1
)
&&
(
iter
==
(
maxiters
-
1
))))
{
np_int
maxindex
=
izamax_
(
&
nn
,
arr_residual
,
&
incx
);
#ifdef USE_MKL
...
...
This diff is collapsed.
Click to expand it.
src/libnptm/magma_calls.cpp
+
6
−
6
View file @
35cd635f
...
...
@@ -132,16 +132,16 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
}
// allocate memory for the identity vector on the host
{
dcomplex
*
native_id
=
new
dcomplex
[
m
];
for
(
magma_int_t
i
=
0
;
i
<
m
;
i
++
)
native_id
[
i
]
=
1
;
dcomplex
*
native_id
=
new
dcomplex
[
1
];
native_id
[
0
]
=
1
;
magmaDoubleComplex
*
id
=
(
magmaDoubleComplex
*
)
&
(
native_id
[
0
]);
// fill it with 1
err
=
magma_zmalloc
(
&
d_id
,
m
);
err
=
magma_zmalloc
(
&
d_id
,
1
);
if
(
err
!=
MAGMA_SUCCESS
)
{
printf
(
"Error allocating d_id
\n
"
);
exit
(
1
);
}
magma_zsetvector
(
m
,
id
,
1
,
d_id
,
1
,
queue
);
// copy identity to device vector
magma_zsetvector
(
1
,
id
,
1
,
d_id
,
1
,
queue
);
// copy identity to device vector
delete
[]
native_id
;
// free identity vector on host
}
}
...
...
@@ -161,7 +161,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
// Fortran-style arrays, whereas our arrays are C-style row-major.
magma_zgemm
(
MagmaNoTrans
,
MagmaNoTrans
,
m
,
m
,
m
,
magma_mone
,
d_a
,
m
,
d_a_orig
,
m
,
magma_zero
,
d_a_residual
,
m
,
queue
);
// add the identity to the product
magma_zaxpy
(
m
,
magma_one
,
d_id
,
1
,
d_a_residual
,
m
+
1
,
queue
);
magma_zaxpy
(
m
,
magma_one
,
d_id
,
0
,
d_a_residual
,
m
+
1
,
queue
);
double
oldmax
=
0
;
if
(
refinemode
>
0
)
{
// find the maximum absolute value of the residual
...
...
@@ -184,7 +184,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters,
// multiply minus the original matrix times the new inverse matrix
magma_zgemm
(
MagmaNoTrans
,
MagmaNoTrans
,
m
,
m
,
m
,
magma_mone
,
d_a
,
m
,
d_a_orig
,
m
,
magma_zero
,
d_a_residual
,
m
,
queue
);
// add the identity to the product
magma_zaxpy
(
m
,
magma_one
,
d_id
,
1
,
d_a_residual
,
m
+
1
,
queue
);
magma_zaxpy
(
m
,
magma_one
,
d_id
,
0
,
d_a_residual
,
m
+
1
,
queue
);
if
((
refinemode
==
2
)
||
((
refinemode
==
1
)
&&
(
iter
==
(
maxiters
-
1
))))
{
// find the maximum absolute value of the residual
magma_int_t
maxindex
=
magma_izamax
(
mm
,
d_a_residual
,
1
,
queue
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment