Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NP_TMcode
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Giacomo Mulas
NP_TMcode
Commits
e279e64d
Commit
e279e64d
authored
1 month ago
by
Giovanni La Mura
Browse files
Options
Downloads
Patches
Plain Diff
Prepare nested OpenMP parallelism in TRAPPING
parent
7dd24b46
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/trapping/cfrfme.cpp
+23
-21
23 additions, 21 deletions
src/trapping/cfrfme.cpp
with
23 additions
and
21 deletions
src/trapping/cfrfme.cpp
+
23
−
21
View file @
e279e64d
...
...
@@ -398,7 +398,8 @@ void frfme(string data_file, string output_path) {
dcomplex
*
vec_wsum
=
tfrfme
->
wsum
[
0
];
int
size_wsum
=
nlmmt
*
nrvc
;
double
*
vec_vkzm
=
vkzm
[
0
];
int
size_vkzm
=
nkv
*
nkv
;
int
nkvs
=
nkv
*
nkv
;
int
size_vkzm
=
nkvs
;
const
dcomplex
*
vec_tt1_wk
=
tt1
->
wk
;
int
size_tt1_wk
=
nkv
*
nkv
*
nlmmt
;
int
size_global_vec_w
=
nkv
*
nkv
*
(
jlml
-
jlmf
+
1
);
...
...
@@ -412,34 +413,34 @@ void frfme(string data_file, string output_path) {
device_id
=
omp_get_default_device
();
global_vec_w
=
(
dcomplex
*
)
omp_target_alloc
(
size_global_vec_w
*
sizeof
(
dcomplex
),
device_id
);
global_w
=
(
dcomplex
**
)
omp_target_alloc
(
size_global_w
*
sizeof
(
dcomplex
),
device_id
);
#pragma omp target teams distribute parallel for
simd map(tofrom: vec_wsum[0:size_wsum])
\
map(to
:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv
]) \
map(to:
global_vec_w, global_w
) \
firstprivate(jlmf, jlml, nkv, nlmmt, nrvc, nxv,
nyv, nzv,
frsh, uim, delks
)
#pragma omp target teams distribute parallel for \
map(to
from: vec_wsum[0:size_wsum
]) \
map(to:
vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv]
) \
map(to: _yv[0:
nyv
]
,
_zv[0:
nzv
]
,
global_vec_w, global_w
)
#else
// Fall-back host work-space allocation
global_vec_w
=
=
new
dcomplex
[
size_global_vec_w
]();
global_w
=
new
dcomplex
*
[
size_global_w
];
#pragma omp parallel for
simd
#endif
#pragma omp parallel for
#endif
// USE_TARGET_OFFLOAD
for
(
int
j80
=
jlmf
-
1
;
j80
<
jlml
;
j80
++
)
{
int
nkvs
=
nkv
*
nkv
;
dcomplex
*
vec_w
=
global_vec_w
+
nkvs
*
(
j80
-
jlmf
+
1
);
dcomplex
**
w
=
global_w
+
nkv
*
(
j80
-
jlmf
+
1
);
#pragma omp parallel for simd
for
(
int
wi
=
0
;
wi
<
nkv
;
wi
++
)
w
[
wi
]
=
vec_w
+
wi
*
nkv
;
dcomplex
wk_value
;
int
wk_index
=
0
;
#pragma omp parallel for simd
for
(
int
jxy50
=
0
;
jxy50
<
nkvs
;
jxy50
++
)
{
wk_index
=
nlmmt
*
jxy50
;
wk_value
=
vec_tt1_wk
[
wk_index
+
j80
];
int
wk_index
=
nlmmt
*
jxy50
;
dcomplex
wk_value
=
vec_tt1_wk
[
wk_index
+
j80
];
int
jy50
=
jxy50
/
nkv
;
int
jx50
=
jxy50
%
nkv
;
vec_w
[(
nkv
*
jx50
)
+
jy50
]
=
wk_value
;
}
// jxy50 loop
int
ixyz
=
0
;
#pragma omp parallel for simd
for
(
int
wj
=
0
;
wj
<
nrvc
;
wj
++
)
vec_wsum
[(
j80
*
nrvc
)
+
wj
]
=
cc0
;
int
nvtot
=
nxv
*
nyv
*
nzv
;
int
nvxy
=
nxv
*
nyv
;
#pragma omp parallel for
for
(
int
ixyz
=
0
;
ixyz
<
nvtot
;
ixyz
++
)
{
int
iz75
=
ixyz
/
nvxy
;
int
iy70
=
(
ixyz
%
nvxy
)
/
nxv
;
...
...
@@ -448,6 +449,7 @@ void frfme(string data_file, string output_path) {
double
y
=
_yv
[
iy70
];
double
x
=
_xv
[
ix65
];
dcomplex
sumy
=
cc0
;
#pragma omp parallel for simd
for
(
int
jy60x55
=
0
;
jy60x55
<
nkvs
;
jy60x55
++
)
{
int
jy60
=
jy60x55
/
nkv
;
int
jx55
=
jy60x55
%
nkv
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment