////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Author: David Goz
// mail  : david.goz@inaf.it
// date  : 31.07.2024
// code tested using nvhpc
//
// - Compile the code:
//   $ nvc -mp=gpu -gpu=ccnative,debug,lineinfo -target=gpu -Minfo=all -v structure_routines.c -o structure_routines_omp
// - Run the code:
//   $ export OMP_TARGET_OFFLOAD=mandatory
//   $ ./structure_routines_omp
////////////////////////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <omp.h>

#define SIZE   8
#define SIZE_2 (SIZE / 2)
typedef double MyData;

typedef struct my_span
{
  size_t N;
  MyData *A;
  MyData *B;
} span;

span d_S;
#pragma omp declare target(d_S)

void allocate(      span  *my_struct,
	      const size_t size)
{
  /* allocate the buffer on the host memory */
  my_struct->A = (MyData *)calloc(size, sizeof(MyData));
  my_struct->B = (MyData *)calloc(size, sizeof(MyData));
  assert((my_struct->A != NULL) && (my_struct->B != NULL));
  my_struct->N = size;

  for (size_t i=0 ; i<size; i++)
    {
      my_struct->A[i] = (MyData)(3 * i);
      my_struct->B[i] = (MyData)(2 * i);
    }
  
  return;
}

void print(const span *const ptr,
	   const char *const string)
{
  int flag = 0;
  
  printf("\n");
  for (int i=0 ; i<ptr->N ; i++)
    {
      printf("\n\t %s[%d] = %lg", string, i, ptr->A[i]);
      printf("\n\t %s[%d] = %lg", string, i, ptr->B[i]);
      flag = (((ptr->A[i] != 0) || (ptr->B[i] != 0)) ? 1 : flag);
    }
  printf("\n");

  if (flag)
    printf("\n\t Result wrong \n\n");
  else
    printf("\n\t Result OK \n\n");
  
  return;
}

int main()
{
  /* host allocation */
  span h_S;
  allocate(&h_S, SIZE);
  
  /* allocating GPU memory using OMP routines */
  const int dev  = omp_get_default_device();
  const int host = omp_get_initial_device();
  MyData *d_buffer = (double *)omp_target_alloc(2 * SIZE * sizeof(MyData), dev);
  assert(d_buffer != NULL);

  /* set the pointers within the GPU */
#pragma omp target is_device_ptr(d_buffer) device(dev)
  {
    d_S.N = SIZE;
    d_S.A = d_buffer;
    d_S.B = d_buffer + SIZE;
  }

  /* copy data to the GPU */
  omp_target_memcpy(d_buffer, h_S.A, (SIZE * sizeof(MyData)), 0, 0, dev, host);
  omp_target_memcpy(d_buffer, h_S.B, (SIZE * sizeof(MyData)), (SIZE * sizeof(MyData)), 0, dev, host);
  
  /* perform the calculation on the GPU */
#pragma omp target device(dev)
  {
    #pragma omp loop
    for (size_t i=0 ; i<d_S.N ; i++)
      {
	d_S.A[i] -= (MyData)(3 * i);
	d_S.B[i] -= (MyData)(2 * i);
      }
  }

  /* copy data from the GPU */
  omp_target_memcpy(h_S.A, d_buffer, (SIZE * sizeof(MyData)),
		    0, 0, host, dev);
  omp_target_memcpy(h_S.B, d_buffer, (SIZE * sizeof(MyData)),
  		    0, (SIZE * sizeof(MyData)), host, dev);

  /* check the data */
  print(&h_S, "d_S");

  /* free GPU memory */
  omp_target_free(d_buffer, dev);

  /* free host memory */
  free(h_S.A);
  free(h_S.B);
  
  return 0;
}
