diff --git a/src/trapping/cfrfme.cpp b/src/trapping/cfrfme.cpp index 14a55fed4205eb37df3f19c4c0eb829b63a4899a..51ace8e1db39680cd482a4917e049f96a361d3f4 100644 --- a/src/trapping/cfrfme.cpp +++ b/src/trapping/cfrfme.cpp @@ -401,15 +401,22 @@ void frfme(string data_file, string output_path) { int size_vkzm = nkv * nkv; const dcomplex *vec_tt1_wk = tt1->wk; int size_tt1_wk = nkv * nkv * nlmmt; + dcomplex *global_vec_w = new dcomplex[nkv * nkv * (jlml - jlmf + 1)](); + dcomplex **global_w = new dcomplex*[nkv * (jlml - jlmf + 1)]; + int size_global_vec_w = nkv * nkv * (jlml - jlmf + 1); + int size_global_w = nkv * (jlml - jlmf + 1); #ifdef USE_TARGET_OFFLOAD -#pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv], jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks) +#pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) \ + map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) \ + map(alloc:global_w[0:size_global_w], global_vec_w[0:size_global_vec_w]) \ + firstprivate(jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks) #else #pragma omp parallel for simd #endif for (int j80 = jlmf-1; j80 < jlml; j80++) { int nkvs = nkv * nkv; - dcomplex *vec_w = (dcomplex *) calloc(nkvs, sizeof(dcomplex)); - dcomplex **w = (dcomplex **) calloc(nkv, sizeof(dcomplex *)); + dcomplex *vec_w = global_vec_w + nkvs * (j80 - jlmf + 1); + dcomplex **w = global_w + nkv * (j80 - jlmf + 1); for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; dcomplex wk_value; int wk_index = 0; @@ -449,9 +456,9 @@ void frfme(string data_file, string output_path) { } // jy60x55 loop vec_wsum[((j80) * nrvc) + ixyz] = sumy * delks; } // ixyz loop - free(vec_w); - free(w); } // j80 loop + delete[] global_w; + delete[] global_vec_w; #ifdef USE_NVTX nvtxRangePop(); nvtxRangePush("Closing operations");