////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Task dependencies:
// example of synchronization by defining explicit dependencies between the tasks.
//
// Author: David Goz
// mail  : david.goz@inaf.it
// date  : 28.08.2024
// code tested using nvhpc
//
// - Compile the code:
//   $ nvc -mp=gpu -gpu=ccnative,debug,lineinfo -target=gpu -Minfo=all -v dependencies.c -o dependencies_omp
// - Run the code:
//   $ export OMP_TARGET_OFFLOAD=mandatory
//   $ ./dependencies_omp
////////////////////////////////////////////////////////////////////////////////////////////////////


#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include <assert.h>

typedef int MyData;

#define NDEBUG

void check(const MyData *const C,
	   const size_t        size)
{
  int flag = 0;
  for (size_t i=0 ; i<size ; i++)
    flag = ((C[i] != 0) ? 1 : flag);

  if (flag)
    printf("\n\t Result wrong \n");
  else
    printf("\n\t Result OK \n");

  return;
}

int main()
{
  const int size = 1000000;

  MyData *C = (MyData *)malloc(size * sizeof(MyData));
  assert(C != NULL);

  // alloc data on the device
  const int dev_gpu = omp_get_default_device();
  MyData *gpu_buffer = (MyData *)omp_target_alloc(2 * size * sizeof(MyData), dev_gpu);
  assert(gpu_buffer != NULL);
  MyData *A = gpu_buffer;
  MyData *B = A + size;

  // init C with random number
  for (int i=0 ; i<size ; i++)
    C[i] = rand() % size;
  
  #pragma omp target enter data map(to: C[0:size])
  
  // init A
  #pragma omp target nowait depend(out: A[0:size]) is_device_ptr(A)
  {
    #pragma omp loop
    for (int i=0 ; i<size; i++)
      A[i] = i;
  }

  // init B
  #pragma omp target nowait depend(out: B[0:size]) is_device_ptr(B)
  {
    #pragma omp loop
    for (int i=0 ; i<size; i++)
      B[i] = -i;
  }
  
  // vector add
 #pragma omp target nowait depend(in: A[0:size], B[0:size]) depend(out: C[0:size]) is_device_ptr(A, B)
  {
    #pragma omp loop
    for (int i=0 ; i<size; i++)
      C[i] = A[i] + B[i];
  }
  
 #pragma omp target update from (C[0:size]) depend(in: C[0:size])

  check(C, size);
  omp_target_free(gpu_buffer, dev_gpu);
  free(C);  
  #pragma omp target exit data map(release: C[0:size])
  
  return 0;
}
