Skip to content
Snippets Groups Projects
Commit b317d509 authored by David Goz's avatar David Goz :sleeping:
Browse files

cuda-ompgpu examples

parent 46b8b006
Branches
No related tags found
No related merge requests found
//////////////////////////////////////////////////////////////////////////////////////////////////
// Assigment : write a CUDA code corresponding to the
// following sequential C code
//
// #include <stdio.h>
// #define N 100
// int main()
// {
// for (int i=0 ; i<N ; i++)
// printf("%d\n", (i * i));
// return 0;
// }
//////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
// Author: David Goz
// mail : david.goz@inaf.it
// date : 06.07.2024
//
// - Compile the code:
// $ nvcc classwork_1.cu -o classwork_1_cuda
// - Run the code:
// $ ./classwork_1_cuda
// - Check the result:
// $ ./classwork_1_cuda | tail -n 100 | sort -nk 5
//////////////////////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <cuda.h>
#define N 100
#define NThreads 1024
__global__ void GPUkernel(const int size)
{
const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
if (myID >= size)
return;
// C printf is supported on CUDA
// C++ cout class is not supported in CUDA
printf("Hello from CUDA thread: %d - result %d\n", myID, (myID * myID));
return;
}
int main()
{
printf("\n\t The host issues the kernel on the GPU \n");
// kernel lunch
GPUkernel<<<1, NThreads>>>(N);
printf("\n\t cudaDeviceSynchronize \n");
// GPU synchronization
cudaDeviceSynchronize();
return 0;
}
//////////////////////////////////////////////////////////////////////////////////////////////////
// Assigment : write a CUDA code corresponding to the
// following sequential C code
//
// #include <stdio.h>
// #define N 100
// int main()
// {
// int A[N];
//
// for (int i=0 ; i<N ; i++)
// A[i] = (i * i);
//
// return 0;
// }
//////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
// Author: David Goz
// mail : david.goz@inaf.it
// date : 17.11.2022
//
// - Compile the code:
// $ nvcc classwork_2.cu -o classwork_2
// - Run the code:
// $ ./classwork_2
// - Check the result:
// $ ./classwork_2 | tail -n 100 | sort -nk 5
//////////////////////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <stdlib.h>
#include <cuda.h>
#define N 100
#define NThreads 1024
__global__ void GPUkernel( int *A,
const int size)
{
const int myID = threadIdx.x + (blockIdx.x * blockDim.x);
if (myID < size)
A[myID] = (myID * myID);
return;
}
int main()
{
// allocate array that allows direct access of both host and device
// CUDA is responsible
int *A;
const size_t size = (N * sizeof(int));
cudaError error = cudaMallocManaged(&A, size);
if (!error)
std::cout << "Memory allocated for the host/device" << std::endl;
else
{
std::cout << "Cannot allocate memory for the host/device. CUDA error : " << error << " ... aborting" << std::endl;
exit(EXIT_FAILURE);
}
// kernel lunch
GPUkernel<<<1, NThreads>>>(A, N);
// device synchronization
cudaDeviceSynchronize();
// check the result
for (size_t i=0 ; i<N ; i++)
std::cout << "A[" << i << "] - Result: " << A[i] << std::endl;
// free the memory
cudaFree(A);
return 0;
}
//////////////////////////////////////////////////////////////////////////////////////////////////
//
// OpenMP GPU Offload is available only on systems with NVIDIA GPUs with compute capability '>= cc70'
//
// Assigment : write a OMP-GPU code corresponding to the
// following sequential C code
//
// #include <stdio.h>
// #define N 100
// int main()
// {
// for (int i=0 ; i<N ; i++)
// printf("%d\n", (i * i));
// return 0;
// }
//////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
// Author: David Goz
// mail : david.goz@inaf.it
// date : 06.07.2024
//
// - Compile the code to run on :
// $ nvcc classwork_1.c -o classwork_1_omp
// - Run the code:
// $ ./classwork_1_omp
// - Check the result:
// $ ./classwork_1_omp | tail -n 100 | sort -nk 5
//////////////////////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <omp.h>
#define N 100
#define NThreads 1024
void GPUkernelSerial(const int size)
{
#pragma omp target
{
if (!omp_is_initial_device())
printf("\n\t GPU is executing GPUkernelSerial\n" );
else
printf("\n\t CPU is executing GPUkernelSerial\n" );
for (int i=0 ; i<size ; i++)
printf("Hello from OMP-GPU thread: %d - result %d\n", i, (i * i));
}
return;
}
int main()
{
printf("\n\t The host issues the kernel on the GPU \n");
/* kernel lunch using one GPU thread */
GPUkernelSerial(N);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment