From b317d509f8ec1a8ebb64bbd919bef1c9252e58ce Mon Sep 17 00:00:00 2001
From: David Goz <david.goz@inaf.it>
Date: Sat, 6 Jul 2024 16:18:48 +0200
Subject: [PATCH] cuda-ompgpu examples

---
 cuda-omp/cuda/1/classwork_1.cu | 62 ++++++++++++++++++++++++++
 cuda-omp/cuda/1/classwork_2.cu | 79 ++++++++++++++++++++++++++++++++++
 cuda-omp/omp/1/classwork_1.c   | 62 ++++++++++++++++++++++++++
 3 files changed, 203 insertions(+)
 create mode 100644 cuda-omp/cuda/1/classwork_1.cu
 create mode 100644 cuda-omp/cuda/1/classwork_2.cu
 create mode 100644 cuda-omp/omp/1/classwork_1.c

diff --git a/cuda-omp/cuda/1/classwork_1.cu b/cuda-omp/cuda/1/classwork_1.cu
new file mode 100644
index 0000000..446b300
--- /dev/null
+++ b/cuda-omp/cuda/1/classwork_1.cu
@@ -0,0 +1,62 @@
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigment : write a CUDA code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   for (int i=0 ; i<N ; i++)
+//     printf("%d\n", (i * i));
+
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 06.07.2024
+//
+// - Compile the code:
+//   $ nvcc classwork_1.cu -o classwork_1_cuda
+// - Run the code:
+//   $ ./classwork_1_cuda
+// - Check the result:
+//   $ ./classwork_1_cuda | tail -n 100 | sort -nk 5
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <stdio.h>
+#include <cuda.h>
+
+#define N        100
+#define NThreads 1024
+
+__global__ void GPUkernel(const int size)
+{
+  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
+  
+  if (myID >= size)
+    return;
+
+  // C printf is supported on CUDA
+  // C++ cout class is not supported in CUDA
+  printf("Hello from CUDA thread: %d - result %d\n", myID, (myID * myID));
+
+  return;
+}
+
+int main()
+{
+  printf("\n\t The host issues the kernel on the GPU \n");
+  
+  // kernel lunch
+  GPUkernel<<<1, NThreads>>>(N);
+
+  printf("\n\t cudaDeviceSynchronize \n");
+  // GPU synchronization
+  cudaDeviceSynchronize();
+
+  return 0;
+}
diff --git a/cuda-omp/cuda/1/classwork_2.cu b/cuda-omp/cuda/1/classwork_2.cu
new file mode 100644
index 0000000..cae8098
--- /dev/null
+++ b/cuda-omp/cuda/1/classwork_2.cu
@@ -0,0 +1,79 @@
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigment : write a CUDA code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   int A[N];
+//
+//   for (int i=0 ; i<N ; i++)
+//     A[i] = (i * i);
+//
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 17.11.2022
+//
+// - Compile the code:
+//   $ nvcc classwork_2.cu -o classwork_2
+// - Run the code:
+//   $ ./classwork_2
+// - Check the result:
+//   $ ./classwork_2 | tail -n 100 | sort -nk 5
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <iostream>
+#include <stdlib.h>
+#include <cuda.h>
+
+#define N        100
+#define NThreads 1024
+
+__global__ void GPUkernel(      int *A,
+			  const int size)
+{
+  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
+  
+  if (myID < size)
+    A[myID] = (myID * myID);
+
+  return;
+}
+
+int main()
+{
+  // allocate array that allows direct access of both host and device
+  //  CUDA is responsible 
+  int *A;
+  const size_t size = (N * sizeof(int));
+  cudaError error = cudaMallocManaged(&A, size);
+  if (!error)
+    std::cout << "Memory allocated for the host/device" << std::endl;
+  else
+    {
+      std::cout << "Cannot allocate memory for the host/device. CUDA error : " << error << " ... aborting" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  
+  // kernel lunch
+  GPUkernel<<<1, NThreads>>>(A, N);
+  
+  // device synchronization
+  cudaDeviceSynchronize();
+  
+  // check the result
+  for (size_t i=0 ; i<N ; i++)
+    std::cout << "A[" << i << "] - Result: " << A[i] << std::endl;
+
+  // free the memory
+  cudaFree(A);
+  
+  return 0;
+}
diff --git a/cuda-omp/omp/1/classwork_1.c b/cuda-omp/omp/1/classwork_1.c
new file mode 100644
index 0000000..6ff9cdf
--- /dev/null
+++ b/cuda-omp/omp/1/classwork_1.c
@@ -0,0 +1,62 @@
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// OpenMP GPU Offload is available only on systems with NVIDIA GPUs with compute capability '>= cc70'
+//
+// Assigment : write a OMP-GPU code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   for (int i=0 ; i<N ; i++)
+//     printf("%d\n", (i * i));
+
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 06.07.2024
+//
+// - Compile the code to run on :
+//   $ nvcc classwork_1.c -o classwork_1_omp
+// - Run the code:
+//   $ ./classwork_1_omp
+// - Check the result:
+//   $ ./classwork_1_omp | tail -n 100 | sort -nk 5
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <stdio.h>
+#include <omp.h>
+
+#define N        100
+#define NThreads 1024
+
+void GPUkernelSerial(const int size)
+{
+#pragma omp target
+  {
+    if (!omp_is_initial_device())
+      printf("\n\t GPU is executing GPUkernelSerial\n" );
+    else
+      printf("\n\t CPU is executing GPUkernelSerial\n" );
+    
+    for (int i=0 ; i<size ; i++)
+      printf("Hello from OMP-GPU thread: %d - result %d\n", i, (i * i));
+  }
+  return;
+}
+
+int main()
+{
+  printf("\n\t The host issues the kernel on the GPU \n");
+  
+  /* kernel lunch using one GPU thread */
+  GPUkernelSerial(N);
+
+  return 0;
+}
-- 
GitLab