From 5a864f5c7b5476e86df83ba2b4562b865c470a49 Mon Sep 17 00:00:00 2001 From: David Goz <david.goz@inaf.it> Date: Wed, 7 May 2025 17:38:31 +0200 Subject: [PATCH] cuda-omp/omp/miscellaneous/globals update --- cuda-omp/omp/miscellaneous/globals/Makefile | 4 +- .../omp/miscellaneous/globals/allvars.hpp | 2 +- .../omp/miscellaneous/globals/globals.cpp | 69 +++++++++---------- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/cuda-omp/omp/miscellaneous/globals/Makefile b/cuda-omp/omp/miscellaneous/globals/Makefile index 6c73464..c9f0777 100644 --- a/cuda-omp/omp/miscellaneous/globals/Makefile +++ b/cuda-omp/omp/miscellaneous/globals/Makefile @@ -1,6 +1,6 @@ -COMPILER_CXX ?= clang++-18 +COMPILER_CXX ?= clang++ DEBUG ?= YES -FLAGS ?= -fopenmp --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda +FLAGS ?= # -fopenmp # --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda # executable name EXEC ?= globals diff --git a/cuda-omp/omp/miscellaneous/globals/allvars.hpp b/cuda-omp/omp/miscellaneous/globals/allvars.hpp index 50907cb..b7945aa 100644 --- a/cuda-omp/omp/miscellaneous/globals/allvars.hpp +++ b/cuda-omp/omp/miscellaneous/globals/allvars.hpp @@ -5,7 +5,7 @@ constexpr std::size_t X = 3; constexpr std::size_t Y = 6; constexpr std::size_t Z = 65536; -using MyData = double; +using MyData = int; // Global pointer declared in target region #pragma omp declare target diff --git a/cuda-omp/omp/miscellaneous/globals/globals.cpp b/cuda-omp/omp/miscellaneous/globals/globals.cpp index 287a09d..059e1d9 100644 --- a/cuda-omp/omp/miscellaneous/globals/globals.cpp +++ b/cuda-omp/omp/miscellaneous/globals/globals.cpp @@ -3,6 +3,7 @@ #include <omp.h> #include <cassert> #include <new> +#include <algorithm> #include "allvars.hpp" @@ -23,54 +24,46 @@ int main() for (std::size_t z=0 ; z<Z ; z++) { - global_ptr[x][y][z] = static_cast<MyData>(1); + global_ptr[x][y][z] = static_cast<MyData>(z + 1); } // loop over Z } // loop over Y } // loop over X std::cout << "\n\t global_ptr allocated on the host \n" << std::endl; -// // Allocate memory on the device and set the global pointer -// #pragma omp target enter data map(alloc: global_ptr[0:1][0:6][0:SIZE]) + // Allocate memory on the device and set the global pointer + #pragma omp target enter data map(to: global_ptr[0:X][0:Y][0:Z]) -// for - + #pragma omp target teams distribute parallel for + for (std::size_t index=0 ; index<Z ; index++) + { + MyData diff = 0; + for (std::size_t x=0 ; x<X ; x++) + for (std::size_t y=0 ; y<Y ; y++) + diff += global_ptr[x][y][index]; + + for (std::size_t x=0 ; x<X ; x++) + for (std::size_t y=0 ; y<Y ; y++) + global_ptr[x][y][index] = (diff / (X * Y * (index + 1))); + } // kernel -// // Copy data from host to device -// #pragma omp target data map(to: host_data[0: SIZE]) -// { -// #pragma omp target teams distribute parallel for -// for (int index=0 ; index<SIZE ; index++) -// { -// const int tid = omp_get_thread_num(); -// const int team = omp_get_team_num(); -// const int nthr = omp_get_num_threads(); -// const int whoAmI = tid + (team * nthr); - -// MyData diff[6]; -// for (std::size_t i=0 ; i<6 : i++) -// { -// diff[i] = global_ptr[0][i][index] * ; -// } - -// { -// global_ptr[i] = (host_data[i] * 2); -// } -// } // kernel - -// // Copy data back from device to host using the global pointer -// #pragma omp target update from(global_ptr[0: SIZE]) -// } + // Device-host synchronization + #pragma omp target update from(global_ptr[0:X][0:Y][0:Z]) -// std::cout << "\n\t Result after device computation:" << std::endl; -// for (std::size_t i=0 ; i<SIZE ; i++) -// { -// std::cout << global_ptr[i] << " "; -// } -// std::cout << std::endl; + // Check if any element along Z is equal to 1 + for (std::size_t x=0 ; x<X ; x++) + for (std::size_t y=0 ; y<Y ; y++) + { + const bool One = std::all_of(&global_ptr[x][y][0], &global_ptr[x][y][Z], [](const MyData x) {return (x == 1);}); + if (One == false) + { + std::cout << "\n\t Test failed \n" << std::endl; + return -1; + } + } // Deallocate memory on the device - //#pragma omp target exit data map(delete: global_ptr) + #pragma omp target exit data map(delete: global_ptr[0:X][0:Y][0:Z]) // deallocate host memory for (std::size_t x=0 ; x<X ; x++) @@ -83,5 +76,7 @@ int main() } delete[] global_ptr; + std::cout << "\n\t Test OK! \n" << std::endl; + return 0; } -- GitLab