Skip to content
Snippets Groups Projects
Commit e279e64d authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Prepare nested OpenMP parallelism in TRAPPING

parent 7dd24b46
No related branches found
No related tags found
No related merge requests found
......@@ -398,7 +398,8 @@ void frfme(string data_file, string output_path) {
dcomplex *vec_wsum = tfrfme->wsum[0];
int size_wsum = nlmmt * nrvc;
double *vec_vkzm = vkzm[0];
int size_vkzm = nkv * nkv;
int nkvs = nkv * nkv;
int size_vkzm = nkvs;
const dcomplex *vec_tt1_wk = tt1->wk;
int size_tt1_wk = nkv * nkv * nlmmt;
int size_global_vec_w = nkv * nkv * (jlml - jlmf + 1);
......@@ -412,34 +413,34 @@ void frfme(string data_file, string output_path) {
device_id = omp_get_default_device();
global_vec_w = (dcomplex *)omp_target_alloc(size_global_vec_w * sizeof(dcomplex), device_id);
global_w = (dcomplex **)omp_target_alloc(size_global_w * sizeof(dcomplex), device_id);
#pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) \
map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) \
map(to: global_vec_w, global_w) \
firstprivate(jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks)
#pragma omp target teams distribute parallel for \
map(tofrom: vec_wsum[0:size_wsum]) \
map(to: vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv]) \
map(to: _yv[0:nyv], _zv[0:nzv], global_vec_w, global_w)
#else
// Fall-back host work-space allocation
global_vec_w = = new dcomplex[size_global_vec_w]();
global_w = new dcomplex*[size_global_w];
#pragma omp parallel for simd
#endif
#pragma omp parallel for
#endif // USE_TARGET_OFFLOAD
for (int j80 = jlmf - 1; j80 < jlml; j80++) {
int nkvs = nkv * nkv;
dcomplex *vec_w = global_vec_w + nkvs * (j80 - jlmf + 1);
dcomplex **w = global_w + nkv * (j80 - jlmf + 1);
#pragma omp parallel for simd
for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
dcomplex wk_value;
int wk_index = 0;
#pragma omp parallel for simd
for (int jxy50 = 0; jxy50 < nkvs; jxy50++) {
wk_index = nlmmt * jxy50;
wk_value = vec_tt1_wk[wk_index + j80];
int wk_index = nlmmt * jxy50;
dcomplex wk_value = vec_tt1_wk[wk_index + j80];
int jy50 = jxy50 / nkv;
int jx50 = jxy50 % nkv;
vec_w[(nkv * jx50) + jy50] = wk_value;
} // jxy50 loop
int ixyz = 0;
#pragma omp parallel for simd
for (int wj = 0; wj < nrvc; wj++) vec_wsum[(j80 * nrvc) + wj] = cc0;
int nvtot = nxv * nyv * nzv;
int nvxy = nxv * nyv;
#pragma omp parallel for
for (int ixyz = 0; ixyz < nvtot; ixyz++) {
int iz75 = ixyz / nvxy;
int iy70 = (ixyz % nvxy) / nxv;
......@@ -448,6 +449,7 @@ void frfme(string data_file, string output_path) {
double y = _yv[iy70];
double x = _xv[ix65];
dcomplex sumy = cc0;
#pragma omp parallel for simd
for (int jy60x55 = 0; jy60x55 < nkvs ; jy60x55++) {
int jy60 = jy60x55 / nkv;
int jx55 = jy60x55 % nkv;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment