diff --git a/timing.h b/timing.h index b74634c9f2ee213fc32bb054ed189f985cecea58..eb2570b2bdd410f00f794da8d399846745158df4 100644 --- a/timing.h +++ b/timing.h @@ -72,7 +72,8 @@ typedef struct { double compose; // double phase; // double write; // - double total; } timing_t; + double total; + double offload;} timing_t; extern timing_t timing_wt; // wall-clock process timing, at Task 0 diff --git a/w-stacking.cu b/w-stacking.cu index 36d704c98b8175d5cfed47c79c01a9cd691993a0..309fd6edf6daba1ac9850fa777ce332942d43aec 100644 --- a/w-stacking.cu +++ b/w-stacking.cu @@ -328,7 +328,7 @@ void wstack( #ifdef ACCOMP omp_set_default_device(rank % omp_get_num_devices()); uint Nvis = num_points*freq_per_chan*polarizations; - #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y]) + #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) #endif @@ -420,8 +420,9 @@ void wstack( } } - - #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan],grid[0:2*num_w_planes*grid_size_x*grid_size_y]) + #ifdef ACCOMP + #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2*num_w_planes*grid_size_x*grid_size_y]) + #endif // End switch between CUDA and CPU versions #endif //for (int i=0; i<100000; i++)printf("%f\n",grid[i]); diff --git a/w-stacking.h b/w-stacking.h index f06bbbc0ae51dbea747afdb4d805d0c1cb6b8ece..c953ee6f7ef59f57138a6932afcdc81c4e54b423 100644 --- a/w-stacking.h +++ b/w-stacking.h @@ -82,6 +82,9 @@ void phase_correction( int); +#ifdef ACCOMP +#pragma omp declare target (gauss_kernel_norm) +#endif #ifdef __CUDACC__ extern "C"