From dbc6ef2d8a486428f77d0f28578e9aebea27a676 Mon Sep 17 00:00:00 2001 From: Giovanni La Mura <giovanni.lamura@inaf.it> Date: Wed, 17 Jul 2024 09:50:48 +0200 Subject: [PATCH] Test for target offloading capability --- build/Makefile.in | 1 + build/configure | 109 +++++++++++++++++++++++++++++++++++++++++++-- build/configure.ac | 68 ++++++++++++++++++++++++++-- 3 files changed, 172 insertions(+), 6 deletions(-) diff --git a/build/Makefile.in b/build/Makefile.in index 85cae453..98ff73a9 100644 --- a/build/Makefile.in +++ b/build/Makefile.in @@ -445,6 +445,7 @@ NMEDIT = @NMEDIT@ NVTXFLAGS = @NVTXFLAGS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ +OFFLOADFLAGS = @OFFLOADFLAGS@ OMPFLAGS = @OMPFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ diff --git a/build/configure b/build/configure index c5db0810..7dee52ae 100755 --- a/build/configure +++ b/build/configure @@ -664,6 +664,7 @@ MAGMAFLAGS LAPACKLDFLAGS LAPACKFLAGS OMPFLAGS +OFFLOADFLAGS HDF5_LDFLAGS HDF5_LIB HDF5_INCLUDE @@ -810,6 +811,7 @@ with_aix_soname with_gnu_ld with_sysroot enable_libtool_lock +enable_offload enable_openmp with_lapack with_magma @@ -1472,6 +1474,8 @@ Optional Features: --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) + --enable-offload enable target offloading (requires g++ version >= + 13) [default=auto] --enable-openmp enable OpneMP multi-threading [default=yes] --enable-nvtx use NVTX profiling [default=no] @@ -24883,18 +24887,117 @@ esac fi # Configure the optional features +# Check whether --enable-offload was given. +if test ${enable_offload+y} +then : + enableval=$enable_offload; + if test "x$enableval" != "xno"; then + + cat > np_test_offload.cpp <<EOF +#include <omp.h> +#pragma omp requires unified_shared_memory + +#pragma omp begin declare target device_type(any) +void fill_with_ones(int *array) { +#pragma omp target teams distribute parallel for + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < 1000; j++) { + array[(1000 * i) + j] = 1; + } + } +} +#pragma omp end declare target +int main(int argc, char** argv) { + int *numbers = new int[1000000](); + fill_with_ones(numbers); + delete[] numbers; + return 0; +} +EOF + $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1 + export CXX_SUPPORTS_OFFLOAD=$? + rm np_test_offload.cpp + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + rm np_test_offload.o + fi + + + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 +printf "%s\n" "$as_me: Enabling offload." >&6;} + OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp" + + else + as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5 + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 +printf "%s\n" "$as_me: Disabling offload." >&6;} + OFFLOADFLAGS="" + + fi + +else case e in #( + e) + + cat > np_test_offload.cpp <<EOF +#include <omp.h> +#pragma omp requires unified_shared_memory + +#pragma omp begin declare target device_type(any) +void fill_with_ones(int *array) { +#pragma omp target teams distribute parallel for + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < 1000; j++) { + array[(1000 * i) + j] = 1; + } + } +} +#pragma omp end declare target +int main(int argc, char** argv) { + int *numbers = new int[1000000](); + fill_with_ones(numbers); + delete[] numbers; + return 0; +} +EOF + $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1 + export CXX_SUPPORTS_OFFLOAD=$? + rm np_test_offload.cpp + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + rm np_test_offload.o + fi + + + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 +printf "%s\n" "$as_me: Enabling offload." >&6;} + OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp" + + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 +printf "%s\n" "$as_me: Disabling offload." >&6;} + OFFLOADFLAGS="" + + fi + + ;; +esac +fi + + # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : enableval=$enable_openmp; if test "x$enableval" != "xno"; then - OMPFLAGS="-fopenmp" + OMPFLAGS="-DUSE_OPENMP -fopenmp" fi else case e in #( e) - OMPFLAGS="-fopenmp" + OMPFLAGS="-DUSE_OPENMP -fopenmp" ;; @@ -25175,7 +25278,7 @@ esac fi -CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" +CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output diff --git a/build/configure.ac b/build/configure.ac index e7db1944..bd1141e1 100644 --- a/build/configure.ac +++ b/build/configure.ac @@ -93,6 +93,39 @@ EOF fi ] ) + +m4_define( + [M4_TEST_OFFLOAD], + [ + cat > np_test_offload.cpp <<EOF +#include <omp.h> +#pragma omp requires unified_shared_memory + +#pragma omp begin declare target device_type(any) +void fill_with_ones(int *array) { +#pragma omp target teams distribute parallel for + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < 1000; j++) { + array[[(1000 * i) + j]] = 1; + } + } +} +#pragma omp end declare target +int main(int argc, char** argv) { + int *numbers = new int[[1000000]](); + fill_with_ones(numbers); + delete[[]] numbers; + return 0; +} +EOF + $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1 + export CXX_SUPPORTS_OFFLOAD=$? + rm np_test_offload.cpp + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + rm np_test_offload.o + fi + ] +) # END CAPABILITY TESTING MACROS # autoconf setup initialization @@ -219,16 +252,45 @@ AS_IF( ) # Configure the optional features +AC_ARG_ENABLE( + [offload], + [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])], + [ + if test "x$enableval" != "xno"; then + M4_TEST_OFFLOAD + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + AC_MSG_NOTICE([Enabling offload.]) + AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"]) + else + AC_MSG_ERROR([Target offload was requested, but it is not supported!]) + fi + else + AC_MSG_NOTICE([Disabling offload.]) + AC_SUBST([OFFLOADFLAGS], [""]) + fi + ], + [ + M4_TEST_OFFLOAD + if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then + AC_MSG_NOTICE([Enabling offload.]) + AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"]) + else + AC_MSG_NOTICE([Disabling offload.]) + AC_SUBST([OFFLOADFLAGS], [""]) + fi + ] +) + AC_ARG_ENABLE( [openmp], [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])], [ if test "x$enableval" != "xno"; then - AC_SUBST([OMPFLAGS], ["-fopenmp"]) + AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"]) fi ], [ - AC_SUBST([OMPFLAGS], ["-fopenmp"]) + AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"]) ] ) @@ -346,7 +408,7 @@ AC_ARG_WITH( ] ) -CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" +CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output -- GitLab