//////////////////////////////////////////////////////////////////////////////////////////////////
// Assigment : write a CUDA code corresponding to the
// following sequential C code
//
// #include <stdio.h>
// #define N 100
// int main()
// {
//   for (int i=0 ; i<N ; i++)
//     printf("%d\n", (i * i));

//   return 0;
// }
//////////////////////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////////////////////////
// Author: David Goz
// mail  : david.goz@inaf.it
// date  : 06.07.2024
// code tested using nvhpc
//
// - Compile the code:
//   $ nvc++ classwork_1.cu -o classwork_1_cuda
// - Run the code:
//   $ ./classwork_1_cuda
// - Check the result:
//   $ ./classwork_1_cuda | tail -n 100 | sort -nk 5

//////////////////////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <cuda.h>

#define N        100
#define NThreads 1024

__global__ void GPUkernelSerial(const int size)
{
  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);

  // C printf is supported on CUDA
  // C++ cout class is not supported in CUDA
  for (int i=0 ; i<size ; i++)
    printf("Hello from CUDA thread: %d - result %d\n", myID, (i * i));

  return;
}

__global__ void GPUkernelParallel(const int size)
{
  const int myID = threadIdx.x + (blockIdx.x * blockDim.x);

  /* guard if the number of available threads is larger than size */
  if (myID >= size)
    return;

  // C printf is supported on CUDA
  // C++ cout class is not supported in CUDA
  printf("Hello from CUDA thread: %d - result %d\n", myID, (myID * myID));

  return;
}

int main()
{
  printf("\n\t The host issues the kernel on the GPU in serial \n");  
  // kernel lunch
  GPUkernelSerial<<<1, 1>>>(N);
  // GPU synchronization
  cudaDeviceSynchronize();

  printf("\n\t The host issues the kernel on the GPU in parallel \n");
  // kernel lunch
  GPUkernelParallel<<<1, NThreads>>>(N);
  // GPU synchronization
  cudaDeviceSynchronize();
  
  return 0;
}
