cuda-ompgpu examples

b317d509 · David Goz · 46b8b006 · b317d509 · b317d509 · b317d509
Commit b317d509 authored 1 year ago by David Goz
--- a/cuda-omp/cuda/1/classwork_1.cu
+++ b/cuda-omp/cuda/1/classwork_1.cu
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigment : write a CUDA code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   for (int i=0 ; i<N ; i++)
+//     printf("%d\n", (i * i));
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 06.07.2024
+//
+// - Compile the code:
+//   $ nvcc classwork_1.cu -o classwork_1_cuda
+// - Run the code:
+//   $ ./classwork_1_cuda
+// - Check the result:
+//   $ ./classwork_1_cuda | tail -n 100 | sort -nk 5
+//////////////////////////////////////////////////////////////////////////////////////////////////
+#include <stdio.h>
+#include <cuda.h>
+#define N        100
+#define NThreads 1024
+__global__ void GPUkernel(const int size)
+{
+  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
+  if (myID >= size)
+    return;
+  // C printf is supported on CUDA
+  // C++ cout class is not supported in CUDA
+  printf("Hello from CUDA thread: %d - result %d\n", myID, (myID * myID));
+  return;
+}
+int main()
+{
+  printf("\n\t The host issues the kernel on the GPU \n");
+  // kernel lunch
+  GPUkernel<<<1, NThreads>>>(N);
+  printf("\n\t cudaDeviceSynchronize \n");
+  // GPU synchronization
+  cudaDeviceSynchronize();
+  return 0;
+}
--- a/cuda-omp/cuda/1/classwork_2.cu
+++ b/cuda-omp/cuda/1/classwork_2.cu
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Assigment : write a CUDA code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   int A[N];
+//
+//   for (int i=0 ; i<N ; i++)
+//     A[i] = (i * i);
+//
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 17.11.2022
+//
+// - Compile the code:
+//   $ nvcc classwork_2.cu -o classwork_2
+// - Run the code:
+//   $ ./classwork_2
+// - Check the result:
+//   $ ./classwork_2 | tail -n 100 | sort -nk 5
+//////////////////////////////////////////////////////////////////////////////////////////////////
+#include <iostream>
+#include <stdlib.h>
+#include <cuda.h>
+#define N        100
+#define NThreads 1024
+__global__ void GPUkernel(      int *A,
+			  const int size)
+{
+  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
+  if (myID < size)
+    A[myID] = (myID * myID);
+  return;
+}
+int main()
+{
+  // allocate array that allows direct access of both host and device
+  //  CUDA is responsible 
+  int *A;
+  const size_t size = (N * sizeof(int));
+  cudaError error = cudaMallocManaged(&A, size);
+  if (!error)
+    std::cout << "Memory allocated for the host/device" << std::endl;
+  else
+    {
+      std::cout << "Cannot allocate memory for the host/device. CUDA error : " << error << " ... aborting" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  // kernel lunch
+  GPUkernel<<<1, NThreads>>>(A, N);
+  // device synchronization
+  cudaDeviceSynchronize();
+  // check the result
+  for (size_t i=0 ; i<N ; i++)
+    std::cout << "A[" << i << "] - Result: " << A[i] << std::endl;
+  // free the memory
+  cudaFree(A);
+  return 0;
+}
--- a/cuda-omp/omp/1/classwork_1.c
+++ b/cuda-omp/omp/1/classwork_1.c
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// OpenMP GPU Offload is available only on systems with NVIDIA GPUs with compute capability '>= cc70'
+//
+// Assigment : write a OMP-GPU code corresponding to the
+// following sequential C code
+//
+// #include <stdio.h>
+// #define N 100
+// int main()
+// {
+//   for (int i=0 ; i<N ; i++)
+//     printf("%d\n", (i * i));
+//   return 0;
+// }
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Author: David Goz
+// mail  : david.goz@inaf.it
+// date  : 06.07.2024
+//
+// - Compile the code to run on :
+//   $ nvcc classwork_1.c -o classwork_1_omp
+// - Run the code:
+//   $ ./classwork_1_omp
+// - Check the result:
+//   $ ./classwork_1_omp | tail -n 100 | sort -nk 5
+//////////////////////////////////////////////////////////////////////////////////////////////////
+#include <stdio.h>
+#include <omp.h>
+#define N        100
+#define NThreads 1024
+void GPUkernelSerial(const int size)
+{
+#pragma omp target
+  {
+    if (!omp_is_initial_device())
+      printf("\n\t GPU is executing GPUkernelSerial\n" );
+    else
+      printf("\n\t CPU is executing GPUkernelSerial\n" );
+    for (int i=0 ; i<size ; i++)
+      printf("Hello from OMP-GPU thread: %d - result %d\n", i, (i * i));
+  }
+  return;
+}
+int main()
+{
+  printf("\n\t The host issues the kernel on the GPU \n");
+  /* kernel lunch using one GPU thread */
+  GPUkernelSerial(N);
+  return 0;
+}