From b317d509f8ec1a8ebb64bbd919bef1c9252e58ce Mon Sep 17 00:00:00 2001 From: David Goz <david.goz@inaf.it> Date: Sat, 6 Jul 2024 16:18:48 +0200 Subject: [PATCH] cuda-ompgpu examples --- cuda-omp/cuda/1/classwork_1.cu | 62 ++++++++++++++++++++++++++ cuda-omp/cuda/1/classwork_2.cu | 79 ++++++++++++++++++++++++++++++++++ cuda-omp/omp/1/classwork_1.c | 62 ++++++++++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 cuda-omp/cuda/1/classwork_1.cu create mode 100644 cuda-omp/cuda/1/classwork_2.cu create mode 100644 cuda-omp/omp/1/classwork_1.c diff --git a/cuda-omp/cuda/1/classwork_1.cu b/cuda-omp/cuda/1/classwork_1.cu new file mode 100644 index 0000000..446b300 --- /dev/null +++ b/cuda-omp/cuda/1/classwork_1.cu @@ -0,0 +1,62 @@ +////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigment : write a CUDA code corresponding to the +// following sequential C code +// +// #include <stdio.h> +// #define N 100 +// int main() +// { +// for (int i=0 ; i<N ; i++) +// printf("%d\n", (i * i)); + +// return 0; +// } +////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Author: David Goz +// mail : david.goz@inaf.it +// date : 06.07.2024 +// +// - Compile the code: +// $ nvcc classwork_1.cu -o classwork_1_cuda +// - Run the code: +// $ ./classwork_1_cuda +// - Check the result: +// $ ./classwork_1_cuda | tail -n 100 | sort -nk 5 + +////////////////////////////////////////////////////////////////////////////////////////////////// + +#include <stdio.h> +#include <cuda.h> + +#define N 100 +#define NThreads 1024 + +__global__ void GPUkernel(const int size) +{ + const int myID = threadIdx.x + (blockIdx.x * blockDim.x); + + if (myID >= size) + return; + + // C printf is supported on CUDA + // C++ cout class is not supported in CUDA + printf("Hello from CUDA thread: %d - result %d\n", myID, (myID * myID)); + + return; +} + +int main() +{ + printf("\n\t The host issues the kernel on the GPU \n"); + + // kernel lunch + GPUkernel<<<1, NThreads>>>(N); + + printf("\n\t cudaDeviceSynchronize \n"); + // GPU synchronization + cudaDeviceSynchronize(); + + return 0; +} diff --git a/cuda-omp/cuda/1/classwork_2.cu b/cuda-omp/cuda/1/classwork_2.cu new file mode 100644 index 0000000..cae8098 --- /dev/null +++ b/cuda-omp/cuda/1/classwork_2.cu @@ -0,0 +1,79 @@ +////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigment : write a CUDA code corresponding to the +// following sequential C code +// +// #include <stdio.h> +// #define N 100 +// int main() +// { +// int A[N]; +// +// for (int i=0 ; i<N ; i++) +// A[i] = (i * i); +// +// return 0; +// } +////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Author: David Goz +// mail : david.goz@inaf.it +// date : 17.11.2022 +// +// - Compile the code: +// $ nvcc classwork_2.cu -o classwork_2 +// - Run the code: +// $ ./classwork_2 +// - Check the result: +// $ ./classwork_2 | tail -n 100 | sort -nk 5 + +////////////////////////////////////////////////////////////////////////////////////////////////// + +#include <iostream> +#include <stdlib.h> +#include <cuda.h> + +#define N 100 +#define NThreads 1024 + +__global__ void GPUkernel( int *A, + const int size) +{ + const int myID = threadIdx.x + (blockIdx.x * blockDim.x); + + if (myID < size) + A[myID] = (myID * myID); + + return; +} + +int main() +{ + // allocate array that allows direct access of both host and device + // CUDA is responsible + int *A; + const size_t size = (N * sizeof(int)); + cudaError error = cudaMallocManaged(&A, size); + if (!error) + std::cout << "Memory allocated for the host/device" << std::endl; + else + { + std::cout << "Cannot allocate memory for the host/device. CUDA error : " << error << " ... aborting" << std::endl; + exit(EXIT_FAILURE); + } + + // kernel lunch + GPUkernel<<<1, NThreads>>>(A, N); + + // device synchronization + cudaDeviceSynchronize(); + + // check the result + for (size_t i=0 ; i<N ; i++) + std::cout << "A[" << i << "] - Result: " << A[i] << std::endl; + + // free the memory + cudaFree(A); + + return 0; +} diff --git a/cuda-omp/omp/1/classwork_1.c b/cuda-omp/omp/1/classwork_1.c new file mode 100644 index 0000000..6ff9cdf --- /dev/null +++ b/cuda-omp/omp/1/classwork_1.c @@ -0,0 +1,62 @@ +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// OpenMP GPU Offload is available only on systems with NVIDIA GPUs with compute capability '>= cc70' +// +// Assigment : write a OMP-GPU code corresponding to the +// following sequential C code +// +// #include <stdio.h> +// #define N 100 +// int main() +// { +// for (int i=0 ; i<N ; i++) +// printf("%d\n", (i * i)); + +// return 0; +// } +////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Author: David Goz +// mail : david.goz@inaf.it +// date : 06.07.2024 +// +// - Compile the code to run on : +// $ nvcc classwork_1.c -o classwork_1_omp +// - Run the code: +// $ ./classwork_1_omp +// - Check the result: +// $ ./classwork_1_omp | tail -n 100 | sort -nk 5 + +////////////////////////////////////////////////////////////////////////////////////////////////// + +#include <stdio.h> +#include <omp.h> + +#define N 100 +#define NThreads 1024 + +void GPUkernelSerial(const int size) +{ +#pragma omp target + { + if (!omp_is_initial_device()) + printf("\n\t GPU is executing GPUkernelSerial\n" ); + else + printf("\n\t CPU is executing GPUkernelSerial\n" ); + + for (int i=0 ; i<size ; i++) + printf("Hello from OMP-GPU thread: %d - result %d\n", i, (i * i)); + } + return; +} + +int main() +{ + printf("\n\t The host issues the kernel on the GPU \n"); + + /* kernel lunch using one GPU thread */ + GPUkernelSerial(N); + + return 0; +} -- GitLab