#pragma once
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <stdint.h>
#include <time.h>
#include "../tree/heap.h"
//#include <stdarg.h>

typedef struct datapoint_info_t {
    idx_t array_idx;
  heap ngbh;
  float_t g;
  float_t log_rho;
  float_t log_rho_c;
  float_t log_rho_err;
  idx_t kstar;
  int is_center;
  int cluster_idx;
} datapoint_info_t;

#define MAX(A,B) ((A) > (B) ? (A) : (B))
#define MIN(A,B) ((A) < (B) ? (A) : (B))

#ifdef USE_FLOAT32
	#define float_t float
#else
	#define float_t double
#endif

#define MY_TRUE  1
#define MY_FALSE 0

#define DB_PRINT(...) printf(__VA_ARGS__)
#ifdef NDEBUG
	#undef DB_PRINT(...)
	#define DB_PRINT(...)
#endif

#define MPI_DB_PRINT(...) mpi_printf(ctx,__VA_ARGS__)
#ifdef NDEBUG
	#undef MPI_DB_PRINT(...)
	#define MPI_DB_PRINT(...)
#endif

#define MPI_PRINT(...) mpi_printf(ctx,__VA_ARGS__)

#ifdef NDEBUG
    #define TIME_DEF 
    #define TIME_START 
    #define TIME_STOP 
    #define LOG_WRITE
#else 
    #define TIME_DEF struct timespec __start, __end;
    #define TIME_START { \
        MPI_Barrier(ctx -> mpi_communicator); \
        clock_gettime(CLOCK_MONOTONIC,&__start); \
    }
    #define TIME_STOP \
        (clock_gettime(CLOCK_MONOTONIC,&__end), \
        (double)(__end.tv_sec - __start.tv_sec) + (__end.tv_nsec - __start.tv_nsec)/1e9)
    #define LOG_WRITE(sec_name,time) { \
        if(time > 0) \
        { \
            double max, min, avg; \
            MPI_Reduce(&time, &avg, 1, MPI_DOUBLE, MPI_SUM, 0, ctx -> mpi_communicator); \
            MPI_Reduce(&time, &min, 1, MPI_DOUBLE, MPI_MIN, 0, ctx -> mpi_communicator); \
            MPI_Reduce(&time, &max, 1, MPI_DOUBLE, MPI_MAX, 0, ctx -> mpi_communicator); \
            MPI_DB_PRINT("%50.50s -> [avg: %.2lfs, min: %.2lfs, max: %.2lfs]\n", sec_name, avg/((double)ctx -> world_size), min, max); \
        } \
        else \
        { \
            MPI_DB_PRINT("%s\n", sec_name);\
        }\
    }
    
#endif

/*
 * from Spriengel code Gadget4
 */

#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C2x does not require the second parameter for va_start. */
#define va_start(ap, ...) __builtin_va_start(ap, 0)
#else
/* Versions before C2x do require the second parameter. */
#define va_start(ap, param) __builtin_va_start(ap, param)
#endif
#define va_end(ap)          __builtin_va_end(ap)
#define va_arg(ap, type)    __builtin_va_arg(ap, type)

#if defined(NDEBUG)
    FILE* __log_file;
    #define LOG_START __log_file = fopen("","w"); 
    #define LOG
    #define LOG_END
#else 
    #define LOG_START 
    #define LOG
    #define LOG_END
#endif


struct global_context_t 
{
    int world_size; 
    int mpi_rank;
    int __processor_name_len;
	float_t* local_data;
	float_t* lb_box;
	float_t* ub_box;
    int* n_halo_points_recv;
    int* n_halo_points_send;
    idx_t** idx_halo_points_recv;
    idx_t** idx_halo_points_send;
    size_t n_points;
    size_t idx_start;
    size_t local_n_points;
    datapoint_info_t*  local_datapoints;
    datapoint_info_t** halo_datapoints;
    heap_node* __recieved_heap_data;
    uint32_t dims;
    int* rank_idx_start;
    int* rank_n_points;
	char processor_mame[MPI_MAX_PROCESSOR_NAME];
	MPI_Comm mpi_communicator;
};

struct pointset_t
{
	size_t n_points;
	size_t __capacity;
	uint32_t dims;
	float_t* data;
	float_t* lb_box;
	float_t* ub_box;
};

typedef struct pointset_t pointset_t;
typedef struct global_context_t global_context_t;

void mpi_printf(global_context_t*, const char *fmt, ...);
void get_context(global_context_t*);
void print_global_context(global_context_t* );
void free_context(global_context_t* );
void free_pointset(pointset_t* );

void generate_random_matrix(float_t** ,int ,size_t ,global_context_t*);


