From dbc6ef2d8a486428f77d0f28578e9aebea27a676 Mon Sep 17 00:00:00 2001
From: Giovanni La Mura <giovanni.lamura@inaf.it>
Date: Wed, 17 Jul 2024 09:50:48 +0200
Subject: [PATCH] Test for target offloading capability

---
 build/Makefile.in  |   1 +
 build/configure    | 109 +++++++++++++++++++++++++++++++++++++++++++--
 build/configure.ac |  68 ++++++++++++++++++++++++++--
 3 files changed, 172 insertions(+), 6 deletions(-)

diff --git a/build/Makefile.in b/build/Makefile.in
index 85cae453..98ff73a9 100644
--- a/build/Makefile.in
+++ b/build/Makefile.in
@@ -445,6 +445,7 @@ NMEDIT = @NMEDIT@
 NVTXFLAGS = @NVTXFLAGS@
 OBJDUMP = @OBJDUMP@
 OBJEXT = @OBJEXT@
+OFFLOADFLAGS = @OFFLOADFLAGS@
 OMPFLAGS = @OMPFLAGS@
 OTOOL = @OTOOL@
 OTOOL64 = @OTOOL64@
diff --git a/build/configure b/build/configure
index c5db0810..7dee52ae 100755
--- a/build/configure
+++ b/build/configure
@@ -664,6 +664,7 @@ MAGMAFLAGS
 LAPACKLDFLAGS
 LAPACKFLAGS
 OMPFLAGS
+OFFLOADFLAGS
 HDF5_LDFLAGS
 HDF5_LIB
 HDF5_INCLUDE
@@ -810,6 +811,7 @@ with_aix_soname
 with_gnu_ld
 with_sysroot
 enable_libtool_lock
+enable_offload
 enable_openmp
 with_lapack
 with_magma
@@ -1472,6 +1474,8 @@ Optional Features:
   --enable-fast-install[=PKGS]
                           optimize for fast installation [default=yes]
   --disable-libtool-lock  avoid locking (might break parallel builds)
+  --enable-offload        enable target offloading (requires g++ version >=
+                          13) [default=auto]
   --enable-openmp         enable OpneMP multi-threading [default=yes]
   --enable-nvtx           use NVTX profiling [default=no]
 
@@ -24883,18 +24887,117 @@ esac
 fi
 
 # Configure the optional features
+# Check whether --enable-offload was given.
+if test ${enable_offload+y}
+then :
+  enableval=$enable_offload;
+    if test "x$enableval" != "xno"; then
+
+    cat > np_test_offload.cpp <<EOF
+#include <omp.h>
+#pragma omp requires unified_shared_memory
+
+#pragma omp begin declare target device_type(any)
+void fill_with_ones(int *array) {
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 1000; i++) {
+    for (int j = 0; j < 1000; j++) {
+      array[(1000 * i) + j] = 1;
+    }
+  }
+}
+#pragma omp end declare target
+int main(int argc, char** argv) {
+  int *numbers = new int[1000000]();
+  fill_with_ones(numbers);
+  delete[] numbers;
+  return 0;
+}
+EOF
+    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
+    export CXX_SUPPORTS_OFFLOAD=$?
+    rm np_test_offload.cpp
+    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+      rm np_test_offload.o
+    fi
+
+
+      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
+printf "%s\n" "$as_me: Enabling offload." >&6;}
+        OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"
+
+      else
+        as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5
+      fi
+    else
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
+printf "%s\n" "$as_me: Disabling offload." >&6;}
+      OFFLOADFLAGS=""
+
+    fi
+
+else case e in #(
+  e)
+
+    cat > np_test_offload.cpp <<EOF
+#include <omp.h>
+#pragma omp requires unified_shared_memory
+
+#pragma omp begin declare target device_type(any)
+void fill_with_ones(int *array) {
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 1000; i++) {
+    for (int j = 0; j < 1000; j++) {
+      array[(1000 * i) + j] = 1;
+    }
+  }
+}
+#pragma omp end declare target
+int main(int argc, char** argv) {
+  int *numbers = new int[1000000]();
+  fill_with_ones(numbers);
+  delete[] numbers;
+  return 0;
+}
+EOF
+    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
+    export CXX_SUPPORTS_OFFLOAD=$?
+    rm np_test_offload.cpp
+    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+      rm np_test_offload.o
+    fi
+
+
+    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
+printf "%s\n" "$as_me: Enabling offload." >&6;}
+      OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"
+
+    else
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
+printf "%s\n" "$as_me: Disabling offload." >&6;}
+      OFFLOADFLAGS=""
+
+    fi
+
+ ;;
+esac
+fi
+
+
 # Check whether --enable-openmp was given.
 if test ${enable_openmp+y}
 then :
   enableval=$enable_openmp;
     if test "x$enableval" != "xno"; then
-      OMPFLAGS="-fopenmp"
+      OMPFLAGS="-DUSE_OPENMP -fopenmp"
 
     fi
 
 else case e in #(
   e)
-    OMPFLAGS="-fopenmp"
+    OMPFLAGS="-DUSE_OPENMP -fopenmp"
 
 
  ;;
@@ -25175,7 +25278,7 @@ esac
 fi
 
 
-CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
+CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
 SUBDIRS="cluster libnptm sphere testing trapping"
 
 # Generate the output
diff --git a/build/configure.ac b/build/configure.ac
index e7db1944..bd1141e1 100644
--- a/build/configure.ac
+++ b/build/configure.ac
@@ -93,6 +93,39 @@ EOF
     fi
   ]
 )
+
+m4_define(
+  [M4_TEST_OFFLOAD],
+  [
+    cat > np_test_offload.cpp <<EOF
+#include <omp.h>
+#pragma omp requires unified_shared_memory
+
+#pragma omp begin declare target device_type(any)
+void fill_with_ones(int *array) {
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 1000; i++) {
+    for (int j = 0; j < 1000; j++) {
+      array[[(1000 * i) + j]] = 1;
+    }
+  }
+}
+#pragma omp end declare target
+int main(int argc, char** argv) {
+  int *numbers = new int[[1000000]]();
+  fill_with_ones(numbers);
+  delete[[]] numbers;
+  return 0;
+}
+EOF
+    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
+    export CXX_SUPPORTS_OFFLOAD=$?
+    rm np_test_offload.cpp
+    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+      rm np_test_offload.o
+    fi
+  ]
+)
 # END CAPABILITY TESTING MACROS
 
 # autoconf setup initialization
@@ -219,16 +252,45 @@ AS_IF(
 )
 
 # Configure the optional features
+AC_ARG_ENABLE(
+  [offload],
+  [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])],
+  [
+    if test "x$enableval" != "xno"; then
+      M4_TEST_OFFLOAD
+      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+        AC_MSG_NOTICE([Enabling offload.])
+        AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"])
+      else
+        AC_MSG_ERROR([Target offload was requested, but it is not supported!])
+      fi
+    else
+      AC_MSG_NOTICE([Disabling offload.])
+      AC_SUBST([OFFLOADFLAGS], [""])
+    fi
+  ],
+  [
+    M4_TEST_OFFLOAD
+    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
+      AC_MSG_NOTICE([Enabling offload.])
+      AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"])
+    else
+      AC_MSG_NOTICE([Disabling offload.])
+      AC_SUBST([OFFLOADFLAGS], [""])
+    fi
+  ]
+)
+
 AC_ARG_ENABLE(
   [openmp],
   [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])],
   [
     if test "x$enableval" != "xno"; then
-      AC_SUBST([OMPFLAGS], ["-fopenmp"])
+      AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"])
     fi
   ],
   [
-    AC_SUBST([OMPFLAGS], ["-fopenmp"])
+    AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"])
   ]
 )
 
@@ -346,7 +408,7 @@ AC_ARG_WITH(
   ]
 )
 
-CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
+CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
 SUBDIRS="cluster libnptm sphere testing trapping"
 
 # Generate the output
-- 
GitLab