Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NP_TMcode
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Giacomo Mulas
NP_TMcode
Commits
704e038d
Commit
704e038d
authored
1 month ago
by
Mulas, Giacomo
Browse files
Options
Downloads
Patches
Plain Diff
linearise loops in cfrfme.cpp, to prepare for omp target offload
parent
4a8cfbd6
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/trapping/cfrfme.cpp
+80
-44
80 additions, 44 deletions
src/trapping/cfrfme.cpp
with
80 additions
and
44 deletions
src/trapping/cfrfme.cpp
+
80
−
44
View file @
704e038d
...
...
@@ -61,6 +61,14 @@
#include
<nvtx3/nvToolsExt.h>
#endif
#ifdef _OPENMP
#include
<omp.h>
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif
using
namespace
std
;
/*! \brief C++ implementation of FRFME
...
...
@@ -391,57 +399,84 @@ void frfme(string data_file, string output_path) {
#ifdef USE_NVTX
nvtxRangePush
(
"j80 loop"
);
#endif
#pragma omp parallel for
for
(
int
j80
=
jlmf
;
j80
<=
jlml
;
j80
++
)
{
dcomplex
*
vec_w
=
new
dcomplex
[
nkv
*
nkv
]();
dcomplex
**
w
=
new
dcomplex
*
[
nkv
];
dcomplex
*
vec_wsum
=
tfrfme
->
wsum
[
0
];
double
*
vec_vkzm
=
vkzm
[
0
];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd
#endif
// #pragma omp parallel for
for
(
int
j80
=
jlmf
-
1
;
j80
<
jlml
;
j80
++
)
{
int
nkvs
=
nkv
*
nkv
;
dcomplex
*
vec_w
=
(
dcomplex
*
)
calloc
(
nkvs
,
sizeof
(
dcomplex
));
dcomplex
**
w
=
(
dcomplex
**
)
calloc
(
nkv
,
sizeof
(
dcomplex
*
));
// dcomplex *wk_local = new dcomplex[nlmmt]();
for
(
int
wi
=
0
;
wi
<
nkv
;
wi
++
)
w
[
wi
]
=
vec_w
+
wi
*
nkv
;
dcomplex
wk_value
;
int
wk_index
=
0
;
int
nkvs
=
nkv
*
nkv
;
for
(
int
jy50
=
0
;
jy50
<
nkv
;
jy50
++
)
{
for
(
int
jx50
=
0
;
jx50
<
nkv
;
jx50
++
)
{
// for (int jy50 = 0; jy50 < nkv; jy50++) {
// for (int jx50 = 0; jx50 < nkv; jx50++) {
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd
// #endif
// #pragma omp parallel for
for
(
int
jxy50
=
0
;
jxy50
<
nkvs
;
jxy50
++
)
{
// for (int wi = 0; wi < nlmmt; wi++) wk_local[wi] = tt1->wk[wk_index++];
// w[jx50][jy50] = wk_local[j80 - 1];
wk_value
=
tt1
->
wk
[
wk_index
+
j80
-
1
];
wk_index
+=
nlmmt
;
w
[
jx50
][
jy50
]
=
wk_value
;
}
// jx50
}
// jy50 loop
// w[jx50][jy50] = wk_local[j80];
wk_index
=
nlmmt
*
jxy50
;
wk_value
=
tt1
->
wk
[
wk_index
+
j80
];
// wk_index += nlmmt;
int
jy50
=
jxy50
/
nkv
;
int
jx50
=
jxy50
%
nkv
;
vec_w
[(
nkv
*
jx50
)
+
jy50
]
=
wk_value
;
// w[jx50][jy50] = wk_value;
}
// jxy50 loop
// } // jx50
// } // jy50 loop
int
ixyz
=
0
;
for
(
int
wj
=
0
;
wj
<
nrvc
;
wj
++
)
tfrfme
->
wsum
[
j80
-
1
][
wj
]
=
cc0
;
for
(
int
iz75
=
0
;
iz75
<
nzv
;
iz75
++
)
{
for
(
int
wj
=
0
;
wj
<
nrvc
;
wj
++
)
vec_wsum
[(
j80
*
nrvc
)
+
wj
]
=
cc0
;
int
nvtot
=
nxv
*
nyv
*
nzv
;
int
nvxy
=
nxv
*
nyv
;
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for
// #endif
// #pragma omp parallel for
for
(
int
ixyz
=
0
;
ixyz
<
nvtot
;
ixyz
++
)
{
int
iz75
=
ixyz
/
nvxy
;
int
iy70
=
(
ixyz
%
nvxy
)
/
nxv
;
int
ix65
=
ixyz
%
nxv
;
// for (int iz75 = 0; iz75 < nzv; iz75++) {
double
z
=
_zv
[
iz75
]
+
frsh
;
for
(
int
iy70
=
0
;
iy70
<
nyv
;
iy70
++
)
{
//
for (int iy70 = 0; iy70 < nyv; iy70++) {
double
y
=
_yv
[
iy70
];
for
(
int
ix65
=
0
;
ix65
<
nxv
;
ix65
++
)
{
//
for (int ix65 = 0; ix65 < nxv; ix65++) {
double
x
=
_xv
[
ix65
];
ixyz
++
;
//
ixyz++;
dcomplex
sumy
=
cc0
;
for
(
int
jy60
=
0
;
jy60
<
nkv
;
jy60
++
)
{
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target parallel for simd reduction(+:sumy)
// #endif
for
(
int
jy60x55
=
0
;
jy60x55
<
nkvs
;
jy60x55
++
)
{
int
jy60
=
jy60x55
/
nkv
;
int
jx55
=
jy60x55
%
nkv
;
double
vky
=
vkv
[
jy60
];
double
vkx
=
vkv
[
nkv
-
1
];
double
vkzf
=
vkzm
[
0
][
jy60
];
dcomplex
phasf
=
cexp
(
uim
*
(
-
vkx
*
x
+
vky
*
y
+
vkzf
*
z
));
double
vkzl
=
vkzm
[
nkv
-
1
][
jy60
];
dcomplex
phasl
=
cexp
(
uim
*
(
vkx
*
x
+
vky
*
y
+
vkzl
*
z
));
dcomplex
sumx
=
0.5
*
(
w
[
0
][
jy60
]
*
phasf
+
w
[
nkv
-
1
][
jy60
]
*
phasl
);
for
(
int
jx55
=
2
;
jx55
<=
nks
;
jx55
++
)
{
vkx
=
vkv
[
jx55
-
1
];
double
vkz
=
vkzm
[
jx55
-
1
][
jy60
];
dcomplex
phas
=
cexp
(
uim
*
(
vkx
*
x
+
vky
*
y
+
vkz
*
z
));
sumx
+=
(
w
[
jx55
-
1
][
jy60
]
*
phas
);
}
// jx55 loop
if
(
jy60
==
0
||
jy60
==
nkv
-
1
)
sumx
*=
0.5
;
double
vkx
=
(
jx55
==
0
)
?
vkv
[
nkv
-
1
]
:
vkv
[
jx55
];
double
vkz
=
vec_vkzm
[(
jx55
*
nkv
)
+
jy60
];
dcomplex
phas
=
(
jx55
==
0
)
?
cexp
(
uim
*
(
-
vkx
*
x
+
vky
*
y
+
vkz
*
z
))
:
cexp
(
uim
*
(
vkx
*
x
+
vky
*
y
+
vkz
*
z
));
dcomplex
sumx
=
vec_w
[(
jx55
*
nkv
)
+
jy60
]
*
phas
;
double
factor1
=
((
jx55
==
0
)
||
(
jx55
==
(
nkv
-
1
)))
?
0.5
:
1.0
;
double
factor2
=
((
jy60
==
0
)
||
(
jy60
==
(
nkv
-
1
)))
?
0.5
:
1.0
;
sumx
*=
factor1
*
factor2
;
sumy
+=
sumx
;
}
// jy60 loop
tfrfme
->
wsum
[
j80
-
1
][
ixyz
-
1
]
=
sumy
*
delks
;
}
// ix65 loop
}
// iy70 loop
}
// iz75 loop
delete
[]
vec_w
;
delete
[]
w
;
}
// jy60x55 loop
vec_wsum
[((
j80
)
*
nrvc
)
+
ixyz
]
=
sumy
*
delks
;
// } // ix65 loop
// } // iy70 loop
// } // iz75 loop
}
// ixyz loop
free
(
vec_w
);
free
(
w
);
// delete[] wk_local;
}
// j80 loop
#ifdef USE_NVTX
...
...
@@ -499,3 +534,4 @@ void frfme(string data_file, string output_path) {
nvtxRangePop
();
#endif
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment