diff --git a/Makefile b/Makefile index f2c818021bdd2202eace0978faa5b49ebe87896f..69838e47a21f84f57dac797f47291eca911b7ddf 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ CC=mpicc -CFLAGS=-O0 -g +CFLAGS=-O3 -g LDFLAGS=-lm all: main -obj=src/main/main.c src/tree/tree.c src/common/common.c +obj=src/main/main.c src/tree/tree.c src/common/common.c src/tree/kdtreeV2.c src/tree/heap.c main: ${obj} ${CC} ${CFLAGS} ${LDFLAGS} ${obj} -o $@ diff --git a/src/common/common.h b/src/common/common.h index a4956c557ef8513443ef9ca2dbd36d5fdfe5809a..7352445a898bd5b2a3e60f96500f28502f8ed01c 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -49,16 +49,17 @@ struct global_context_t { size_t n_points; - MPI_Comm mpi_communicator; - int world_size; - char processor_mame[MPI_MAX_PROCESSOR_NAME]; - int __processor_name_len; - int mpi_rank; + size_t idx_start; size_t local_n_points; uint32_t dims; float_t* local_data; float_t* lb_box; float_t* ub_box; + int world_size; + int mpi_rank; + int __processor_name_len; + char processor_mame[MPI_MAX_PROCESSOR_NAME]; + MPI_Comm mpi_communicator; }; struct pointset_t diff --git a/src/tree/heap.c b/src/tree/heap.c new file mode 100644 index 0000000000000000000000000000000000000000..57afac7960cbb3b56cc1b3d0d5a5a51e9c331631 --- /dev/null +++ b/src/tree/heap.c @@ -0,0 +1,213 @@ +#include "heap.h" + +void swap_heap_node(heap_node* a, heap_node* b){ + heap_node tmp; + memcpy(&tmp,a,sizeof(heap_node)); + memcpy(a,b,sizeof(heap_node)); + memcpy(b,&tmp,sizeof(heap_node)); + return; +} + +void allocate_heap(heap* H, idx_t n){ + H -> data = (heap_node*)malloc(n*sizeof(heap_node)); + H -> N = n; + H -> count = 0; + return; +} + +void init_heap(heap* H){ + for(idx_t i = 0; i < H -> N; ++i) + { + H -> data[i].value = 0.; + H -> data[i].array_idx = MY_SIZE_MAX; + } + return; +} + +void free_heap(heap * H){ free(H -> data);} + + +void heapify_max_heap(heap* H, idx_t node){ + idx_t nn = node; + idx_t largest = nn; + /* + Found gratest between children of node and boundcheck if the node is a leaf + */ + while(1) + { + largest = (HEAP_LCH(nn) < H -> N) && + (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; + + largest = (HEAP_RCH(nn) < H -> N) && + (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; + if(largest != nn) + { + swap_heap_node(H -> data + nn, H -> data + largest); + nn = largest; + } + else + { + break; + } + } + + //if(HEAP_LCH(node) < H -> N){ + // //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node); + // largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; + //} + //if(HEAP_RCH(node) < H -> N){ + // //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node); + // largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; + //} + //if(largest == node){ + // return; + //} + //else{ + // swap_heap_node(H -> data + node, H -> data + largest); + // heapify_max_heap(H, largest); + //} +} + + +void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx){ + H -> data[0].value = val; + H -> data[0].array_idx = array_idx; + heapify_max_heap(H,0); + return; +} + +void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ + heap_node tmpNode = {.value = val, .array_idx = array_idx}; + if(H -> count < H -> N) + { + idx_t idx = H -> count; + H -> data[idx] = tmpNode; + ++(H -> count); + while(idx >= 1) + { + if(H -> data[idx].value < H -> data[idx - 1].value) + { + swap_heap_node((H -> data) + idx, (H -> data) + idx - 1); + idx--; + } + else + { + break; + } + } + + } + else + { + if(H -> data[H -> count - 1].value > val) + { + idx_t idx = H -> count - 1; + H -> data[idx] = tmpNode; + while(idx >= 1) + { + if(H -> data[idx].value < H -> data[idx - 1].value) + { + swap_heap_node(H -> data + idx, H -> data + idx - 1); + idx--; + } + else + { + break; + } + } + } + } + return; +} + +void insert_max_heap(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ + int c1 = H -> count < H -> N; + int c2 = (val < H -> data[0].value) && (!c1); + int ctot = c1 + 2*c2; + switch (ctot) { + case 1: + { + idx_t node = H->count; + ++(H -> count); + H -> data[node].value = val; + H -> data[node].array_idx = array_idx; + /* + * Push up the node through the heap + */ + while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) + { + swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); + node = HEAP_PARENT(node); + //if(node == 0) break; + } + } + break; + + case 2: + { + set_root_max_heap(H,val,array_idx); + } + break; + default: + break; + } + /* alternative solution, left here if something breaks*/ + + //if(H -> count < H -> N){ + // idx_t node = H->count; + // ++(H -> count); + // H -> data[node].value = val; + // H -> data[node].array_idx = array_idx; + // /* + // * Push up the node through the heap + // */ + // while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) + // { + // swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); + // node = HEAP_PARENT(node); + // //if(node == 0) break; + // } + // return; + //} + //else if (val < H -> data[0].value) + //{ + // set_root_max_heap(H,val,array_idx); + // return; + //} + //else + //{ + // return; + //} + + +} + +#ifdef USE_FLOAT32 + #define EPS 5.96e-08 +#else + #define EPS 2.11e-16 +#endif + +int cmp_heap_nodes(const void* a, const void* b) +{ + const heap_node* aa = (const heap_node*)a; + const heap_node* bb = (const heap_node*)b; + int val = (aa -> value > bb -> value) - (aa -> value < bb -> value); + //return vv; + return val; + + +} + + +void heap_sort(heap* H){ + idx_t n = H -> N; + qsort(H -> data, n, sizeof(heap_node),cmp_heap_nodes); + //for(idx_t i= (H -> N) - 1; i > 0; --i) + //{ + // swap_heap_node(H -> data, H -> data + i); + // H -> N = i; + // heapify_max_heap(H,0); + //} + //H -> N = n; +} diff --git a/src/tree/heap.h b/src/tree/heap.h new file mode 100644 index 0000000000000000000000000000000000000000..9982a796d1718c6b19d5238a44994253e93101c4 --- /dev/null +++ b/src/tree/heap.h @@ -0,0 +1,74 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#define T double +#define DATA_DIMS 0 + +#ifdef USE_FLOAT32 + #define FLOAT_TYPE float +#else + #define FLOAT_TYPE double +#endif + +#ifdef USE_INT32 + #define MY_SIZE_MAX UINT32_MAX + #define idx_t uint32_t +#else + #define MY_SIZE_MAX UINT64_MAX + #define idx_t uint64_t +#endif + +#define HEAP_LCH(x) (2*x + 1) +#define HEAP_RCH(x) (2*x + 2) +#define HEAP_PARENT(x) (x-1)/2 + +#define HP_LEFT_SIDE 0 +#define HP_RIGHT_SIDE 1 + + +struct heap_node +{ + FLOAT_TYPE value; + idx_t array_idx; +} ; + +struct heap +{ + idx_t N; + idx_t count; + struct heap_node* data; + +} ; + +struct SimpleHeap +{ + idx_t N; + T * data; +} ; + +typedef struct SimpleHeap SimpleHeap; +typedef struct heap heap; +typedef struct heap_node heap_node; + + +void allocate_heap(heap* H, idx_t n); + +void init_heap(heap* H); + +void free_heap(heap * H); + +void heapify_max_heap(heap* H, idx_t node); + +void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); + +void insert_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); + +void heap_sort(heap* H); +void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx); + + +int cmp_heap_nodes(const void* a, const void* b); diff --git a/src/tree/kdtreeV2.c b/src/tree/kdtreeV2.c new file mode 100644 index 0000000000000000000000000000000000000000..a097c23819dd99f346a5df825d53f7d8ccfe46a9 --- /dev/null +++ b/src/tree/kdtreeV2.c @@ -0,0 +1,395 @@ +#include "kdtreeV2.h" +#include "heap.h" +#include +#include +#include + +#define DEFAULT_LEAF_SIZE 30 + + +extern unsigned int data_dims; + +FLOAT_TYPE eud_kdtree_v2(FLOAT_TYPE* restrict p1, FLOAT_TYPE* restrict p2){ + register FLOAT_TYPE d = 0; + for(unsigned int i = 0; i data, sizeof(FLOAT_TYPE)*data_dims); + memcpy(x -> data, y -> data, sizeof(FLOAT_TYPE)*data_dims); + memcpy(y -> data, swapMem_kdv2, sizeof(FLOAT_TYPE)*data_dims); + FLOAT_TYPE* tmpPtr = x -> data; + x -> data = y -> data; + y -> data = tmpPtr; + #endif +} + + +/** + * + * KDtree implementation + * + * +*/ + +void initialize_kdnodes_v2(kdnode_v2* node_array, FLOAT_TYPE* d, idx_t n ) +{ + for(idx_t i = 0; i < n; ++i) + { + node_array[i].data = d + (i*data_dims); + node_array[i].array_idx = i; + node_array[i].lch = NULL; + node_array[i].rch = NULL; + node_array[i].parent = NULL; + node_array[i].level = -1; + node_array[i].is_leaf = 0; + node_array[i].split_var = -1; + node_array[i].node_list.data = NULL; + node_array[i].node_list.count = 0; + } +} +/* +void initializePTRS(kdnode_v2** node_ptr_array, kdnode_v2* node_array, idx_t n ) +{ + for(idx_t i = 0; i < n; ++i) + { + node_ptr_array[i] = node_array + i; + } +} +*/ + +int cmpKDnodesV2(kdnode_v2* a, kdnode_v2* b, int var){ + + FLOAT_TYPE res = a->data[var] - b->data[var]; + return (res > 0); +} + +void printKDnodeV2(kdnode_v2* node) +{ + printf("Node %p:\n",node); + printf("\t array_idx: %lu\n", (uint64_t)(node -> array_idx)); + printf("\t data: "); + for(unsigned int i=0; idata[i]); + printf("\n"); + printf("\t parent: %p\n",node -> parent); + printf("\t lch: %p\n",node -> lch); + printf("\t rch: %p\n",node -> rch); + printf("\t level: %d\n", node -> level); + printf("\t split_var: %d\n", node -> split_var); + printf("\n"); +} + +// Standard Lomuto partition function + +int partition_kdnode_v2(kdnode_v2* arr, int low, int high, int split_var) +{ + kdnode_v2 pivot = arr[high]; + + int i = (low - 1); + for (int j = low; j <= high - 1; j++) { + if (!cmpKDnodesV2(arr + j,&pivot,split_var)) { + i++; + swap_kdnode_v2(arr + i, arr + j); + } + } + swap_kdnode_v2(arr + i + 1, arr + high); + return (i + 1); +} + +// Implementation of QuickSelect +int median_of_nodes_kdnode_v2(kdnode_v2* a, int left, int right, int split_var) +{ + //printf("----------\n"); + int k = left + ((right - left + 1)/2); + + if(left == right) return left; + if(left == (right - 1)){ + if(cmpKDnodesV2(a + left,a + right,split_var)) {swap_kdnode_v2(a + left, a + right);} + return right; + } + while (left <= right) { + + // Partition a[left..right] around a pivot + // and find the position of the pivot + int pivotIndex = partition_kdnode_v2(a, left, right,split_var); + //printf("%d %d %d %d\n",left, right, k, pivotIndex); + + // If pivot itself is the k-th smallest element + if (pivotIndex == k) + return pivotIndex; + + // If there are more than k-1 elements on + // left of pivot, then k-th smallest must be + // on left side. + + else if (pivotIndex > k) + right = pivotIndex - 1; + + // Else k-th smallest is on right side. + else + left = pivotIndex + 1; + } + return -1; +} + +kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* parent, int level) +{ + kdnode_v2 *n = NULL; + int split_var = level % data_dims; + FLOAT_TYPE max_diff = -999999.; + for(unsigned int v = 0; v < data_dims; ++v) + { + FLOAT_TYPE max_v = -9999999.; + FLOAT_TYPE min_v = 9999999.; + for(int i = start; i <= end; ++i) + { + max_v = t[i].data[v] > max_v ? t[i].data[v] : max_v; + min_v = t[i].data[v] < min_v ? t[i].data[v] : min_v; + } + if((max_v - min_v) > max_diff) + { + max_diff = max_v - min_v; + split_var = v; + } + } + + #ifdef SWMEM + if(parent == NULL) + { + swapMem_kdv2 = (FLOAT_TYPE*)malloc(sizeof(FLOAT_TYPE)*data_dims); + } + #endif + + + + if(end - start < DEFAULT_LEAF_SIZE) + { + n = t + start; + n -> is_leaf = 1; + n -> parent = parent; + n -> lch = NULL; + n -> rch = NULL; + size_t j = 0; + n -> node_list.count = (size_t)(end - start + 1); + n -> node_list.data = (kdnode_v2**)malloc(n -> node_list.count * sizeof(kdnode_v2*)); + for(int i = start; i <= end; ++i){ + t[i].parent = n; + t[i].is_leaf = 1; + t[i].lch = NULL; + t[i].rch = NULL; + n -> node_list.data[j] = t + i; + ++j; + } + return n; + + } + + + + + int median_idx = -1; + + //if ((end - start) < 0) return 0; + //if (end == start) { + // n = t + start; + // n -> split_var = split_var; + // n->parent = parent; + // n->level = level; + // n -> lch = NULL; + // n -> rch = NULL; + // return n; + //} + + median_idx = median_of_nodes_kdnode_v2(t, start, end, split_var); + //printf("%d median idx\n", median_idx); + if(median_idx > -1){ + swap_kdnode_v2(t + start, t + median_idx); + //n = t + median_idx; + n = t + start; + //n->lch = make_tree_kdnode_v2(t, start, median_idx - 1, n, level + 1); + n->lch = make_tree_kdnode_v2(t, start + 1, median_idx, n, level + 1); + //n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); + n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); + n -> split_var = split_var; + n->parent = parent; + n->level = level; + } + + #ifdef SWMEM + if(parent == NULL) + { + swapMem_kdv2 = malloc(sizeof(FLOAT_TYPE)*data_dims); + } + #endif + + return n; +} + +static inline FLOAT_TYPE hyper_plane_dist(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) +{ + return p1[var] - p2[var]; +} + +static inline int hyper_plane_side(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) +{ + return p1[var] > p2[var]; +} + +void knn_sub_tree_search_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* root, heap * H) +{ + if(root -> is_leaf) + { + for(size_t i = 0; i < root -> node_list.count; ++i) + { + kdnode_v2* n = root -> node_list.data[i]; + __builtin_prefetch(root -> node_list.data + i + 1, 0, 3); + FLOAT_TYPE distance = eud_kdtree_v2(point, n -> data); + insert_max_heap(H, distance,n -> array_idx); + } + return; + } + + FLOAT_TYPE current_distance = eud_kdtree_v2(point, root -> data); + FLOAT_TYPE hp_distance = hyper_plane_dist(point, root -> data, root -> split_var); + insert_max_heap(H, current_distance, root -> array_idx); + __builtin_prefetch(root -> lch, 0, 3); + __builtin_prefetch(root -> rch, 0, 3); + + int side = hp_distance > 0.f; + + switch (side) + { + case HP_LEFT_SIDE: + if(root -> lch) + { + knn_sub_tree_search_kdtree_v2(point, root -> lch, H); + } + break; + + case HP_RIGHT_SIDE: + if(root -> rch) + { + knn_sub_tree_search_kdtree_v2(point, root -> rch, H); + } + break; + + default: + break; + } + FLOAT_TYPE max_d = H -> data[0].value; + int c = max_d > (hp_distance * hp_distance); + + //if(c || (H -> count) < (H -> N)) + if(c) + { + + switch (side) + { + case HP_LEFT_SIDE: + if(root -> rch) + { + knn_sub_tree_search_kdtree_v2(point, root -> rch, H); + } + break; + + case HP_RIGHT_SIDE: + if(root -> lch) + { + knn_sub_tree_search_kdtree_v2(point, root -> lch, H); + } + break; + + default: + break; + } + } + return; +} + +void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned int dims) +{ + data_dims = dims; + tree -> n_nodes = n_nodes; + tree -> _nodes = (kdnode_v2*)malloc(n_nodes * sizeof(kdnode_v2)); + initialize_kdnodes_v2(tree -> _nodes, data, n_nodes); + tree -> root = NULL; +} + +void kdtree_v2_free(kdtree_v2* tree) +{ + free(tree -> _nodes); +} + + +heap knn_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) +{ + heap H; + allocate_heap(&H,maxk); + init_heap(&H); + knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); + heap_sort(&H); + return H; +} + +heap knn_kdtree_v2_no_heapsort(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) +{ + heap H; + allocate_heap(&H,maxk); + init_heap(&H); + knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); + return H; +} + +kdnode_v2 * build_tree_kdtree_v2(kdnode_v2* kd_ptrs, size_t n, size_t dimensions ) +{ + /************************************************* + * Wrapper for make_tree function. * + * Simplifies interfaces and takes time measures * + *************************************************/ + data_dims = dimensions; + kdnode_v2* root = make_tree_kdnode_v2(kd_ptrs, 0, n-1, NULL ,0); + return root; + +} diff --git a/src/tree/kdtreeV2.h b/src/tree/kdtreeV2.h new file mode 100644 index 0000000000000000000000000000000000000000..26cfa1d247651cb2f5dc80a79c82898f73b4118c --- /dev/null +++ b/src/tree/kdtreeV2.h @@ -0,0 +1,64 @@ +#pragma once +#include "heap.h" +#include +#include +#include +#include +#include +#include +#define T double +#define DATA_DIMS 0 + +#ifdef USE_FLOAT32 + #define FLOAT_TYPE float +#else + #define FLOAT_TYPE double +#endif + +#ifdef USE_INT32 + #define MY_SIZE_MAX UINT32_MAX + #define idx_t uint32_t +#else + #define MY_SIZE_MAX UINT64_MAX + #define idx_t uint64_t +#endif + +struct kdnode_v2_list +{ + size_t count; + struct kdnode_v2** data; +}; + + +struct kdnode_v2 +{ + FLOAT_TYPE * data; + int level; + int split_var; + int is_leaf; + idx_t array_idx; + struct kdnode_v2* parent; + struct kdnode_v2* lch; + struct kdnode_v2* rch; + struct kdnode_v2_list node_list; +}; + +struct kdtree_v2 +{ + size_t n_nodes; + struct kdnode_v2* _nodes; + struct kdnode_v2* root; +}; + + +typedef struct kdnode_v2 kdnode_v2; +typedef struct kdtree_v2 kdtree_v2; + +void initialize_kdnodes_v2(kdnode_v2* node_array, FLOAT_TYPE* d, idx_t n ); + +heap knn_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk); +heap knn_kdtree_v2_no_heapsort(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk); + +kdnode_v2 * build_tree_kdtree_v2(kdnode_v2* kd_ptrs, size_t n, size_t dimensions); +void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned int dims); +void kdtree_v2_free(kdtree_v2* tree); diff --git a/src/tree/tree.c b/src/tree/tree.c index 509a69aff881d051976611cf704dd9d9f8d41953..1f6e17a57f6c34736306fa8c09416144547a659e 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -8,6 +8,7 @@ * neighbors */ #include "tree.h" +#include "kdtreeV2.h" #include "mpi.h" #include #include @@ -27,6 +28,8 @@ #define TOP_TREE_LCH 0 #define NO_CHILD -1 +unsigned int data_dims; + float_t *read_data_file(global_context_t *ctx, const char *fname, const int file_in_float32) { @@ -1029,6 +1032,8 @@ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , return store_index; // maybe, update, it works :) } + + void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { int* points_per_proc = (int*)malloc(ctx -> world_size * sizeof(int)); @@ -1062,27 +1067,6 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) points_per_proc[i] = points_per_proc[i] - points_per_proc[i - 1]; } - - /* - int* points_per_proc_all = (int*)malloc(ctx -> world_size * sizeof(int)); - MPI_Allreduce(MPI_IN_PLACE, points_per_proc_all, ctx -> world_size, MPI_INT,MPI_SUM, ctx -> mpi_communicator); - - size_t test_num = 0; - for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc_all[i]; - MPI_DB_PRINT("Master has n_points %lu and in node population has %lu points\n", ctx -> n_points, test_num); - - - - MPI_DB_PRINT("Points per proc after: "); - for(int i = 0; i < ctx -> world_size; ++i) - { - MPI_DB_PRINT("%lu ", points_per_proc_all[i]); - } - MPI_DB_PRINT("\n"); - - free(points_per_proc_all); - */ - int* rcvcount = (int*)malloc(ctx -> world_size * sizeof(int)); int* displs = (int*)malloc(ctx -> world_size * sizeof(int)); float_t* rcvbuffer = NULL; @@ -1110,17 +1094,28 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) //DB_PRINT("[RANK %d] is recieving %d elements %d points\n", rcv, tot_count, tot_count / ctx -> dims); rcvbuffer = (float_t*)malloc(tot_count * sizeof(float_t)); } - MPI_Gatherv(send_buffer, points_per_proc[rcv], MPI_MY_FLOAT, rcvbuffer, rcvcount, displs, MPI_MY_FLOAT, rcv, ctx -> mpi_communicator); + MPI_Gatherv(send_buffer, ctx -> dims * points_per_proc[rcv], MPI_MY_FLOAT, rcvbuffer, rcvcount, displs, MPI_MY_FLOAT, rcv, ctx -> mpi_communicator); + } + + ctx -> local_n_points = tot_count / ctx -> dims; + ctx -> idx_start = 0; + for(int i = 0; i < ctx -> mpi_rank - 1; ++i) + { + ctx -> idx_start += points_per_proc[i]; } - ctx -> local_n_points = tot_count; /* free prv pointer */ free(ctx -> local_data); ctx -> local_data = rcvbuffer; - - + /* check exchange */ + for(size_t i = 0; i < ctx -> local_n_points; ++i) + { + /* tree walk */ + int o = compute_point_owner(ctx, tree, ctx -> local_data + (i * ctx -> dims)); + if(o != ctx -> mpi_rank) DB_PRINT("rank %d got an error\n",ctx -> mpi_rank); + } free(points_owners); free(points_per_proc); free(partition_offset); @@ -1128,6 +1123,222 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) free(displs); } +static inline size_t local_to_global_idx(global_context_t* ctx, size_t local_idx) +{ + return local_idx + ctx -> idx_start; +} + +void translate_tree_idx_to_global(global_context_t* ctx, kdtree_v2* local_tree) +{ + for(size_t i = 0; i < ctx -> local_n_points; ++i) + { + local_tree -> _nodes[i].array_idx = local_to_global_idx(ctx, local_tree -> _nodes[i].array_idx); + } +} + +heap ngbh_search_kdtree(global_context_t* ctx, kdtree_v2* local_tree, float_t* data, int k) +{ + data_dims = ctx -> dims; + return knn_kdtree_v2(data, local_tree -> root, k); +} + +void tree_walk( + global_context_t* ctx, + top_kdtree_node_t* root, + int point_idx, + float_t max_dist, + float_t* point, + float_t** data_to_send_per_proc, + int** local_idx_of_the_point, + int* point_to_send_count, + int* point_to_send_capacity) +{ + + if(root -> owner != -1 && root -> owner != ctx -> mpi_rank) + { + /* put the leaf on the requests array */ + int owner = root -> owner; + int idx = point_to_send_count[owner]; + int capacity = point_to_send_capacity[owner]; + int len = 1 + ctx -> dims; + if(idx == capacity) + { + data_to_send_per_proc[owner] = realloc(data_to_send_per_proc[owner], (capacity * 1.1) * (1 + ctx -> dims) * sizeof(float_t)); + local_idx_of_the_point[owner] = realloc(local_idx_of_the_point[owner], (capacity * 1.1) * sizeof(int)); + point_to_send_capacity[owner] = capacity * 1.1; + } + + float_t* base = data_to_send_per_proc[owner] + (len * idx); + base[0] = max_dist; + memcpy(base + 1, point, ctx -> dims * sizeof(float_t)); + local_idx_of_the_point[owner][idx] = point_idx; + + point_to_send_count[owner]++; + + } + else + { + /* tree walk */ + int split_var = root -> split_dim; + float_t hp_distance = point[split_var] - root -> split_val; + __builtin_prefetch(root -> lch, 0, 3); + __builtin_prefetch(root -> rch, 0, 3); + + int side = hp_distance > 0.f; + + switch (side) + { + case TOP_TREE_LCH: + if(root -> lch) + { + /* walk on the left */ + tree_walk(ctx, root -> lch, point_idx, max_dist, point, + data_to_send_per_proc, local_idx_of_the_point, + point_to_send_count, point_to_send_capacity); + } + break; + + case TOP_TREE_RCH: + if(root -> rch) + { + /* walk on the right */ + tree_walk(ctx, root -> rch, point_idx, max_dist, point, + data_to_send_per_proc, local_idx_of_the_point, + point_to_send_count, point_to_send_capacity); + } + break; + + default: + break; + } + + int c = max_dist > (hp_distance * hp_distance); + + //if(c || (H -> count) < (H -> N)) + if(c) + { + + switch (side) + { + case HP_LEFT_SIDE: + if(root -> rch) + { + /* walk on the right */ + tree_walk(ctx, root -> rch, point_idx, max_dist, point, + data_to_send_per_proc, local_idx_of_the_point, + point_to_send_count, point_to_send_capacity); + } + break; + + case HP_RIGHT_SIDE: + if(root -> lch) + { + /* walk on the left */ + tree_walk(ctx, root -> lch, point_idx, max_dist, point, + data_to_send_per_proc, local_idx_of_the_point, + point_to_send_count, point_to_send_capacity); + } + break; + + default: + break; + } + } + } + +} + +void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtree_t* top_tree, kdtree_v2* local_tree, float_t* data, int k) +{ + /* local search */ + MPI_DB_PRINT("Ngbh search\n"); + for(int p = 0; p < ctx -> local_n_points; ++p) + { + idx_t idx = local_tree -> _nodes[p].array_idx; + /* actually we want to preserve the heap to then insert guesses from other nodes */ + dp_info[idx].ngbh = knn_kdtree_v2_no_heapsort(local_tree -> _nodes[p].data, local_tree -> root, k); + dp_info[idx].cluster_idx = -1; + dp_info[idx].is_center = 0; + dp_info[idx].array_idx = idx; + } + MPI_DB_PRINT("Done\n"); + + /* find if a points needs a refine on the global tree */ + float_t** data_to_send_per_proc = (float_t**)malloc(ctx -> world_size * sizeof(float_t*)); + int** local_idx_of_the_point = (int**)malloc(ctx -> world_size * sizeof(int*)); + int* point_to_send_count = (int*)malloc(ctx -> world_size * sizeof(int)); + int* point_to_send_capacity = (int*)malloc(ctx -> world_size * sizeof(int)); + + for(int i = 0; i < ctx -> world_size; ++i) + { + data_to_send_per_proc[i] = (float_t*)malloc(100 * (1 + ctx -> dims) * sizeof(float_t)); + local_idx_of_the_point[i] = (int*)malloc(100 * sizeof(int)); + point_to_send_capacity[i] = 100; + point_to_send_count[i] = 0; + } + + /* for each point walk the tree and find to which proc send data */ + /* actually compute intersection of ngbh radius of each point to node box */ + + /* tree walk for each point */ + for(int i = 0; i < ctx -> local_n_points; ++i) + { + /* + MPI_DB_PRINT("%lu\n",dp_info[i].array_idx); + if(i > 10) break; + */ + float_t max_dist = dp_info[i].ngbh.data[0].value; + float_t* point = ctx -> local_data + (i * ctx -> dims); + + tree_walk(ctx, top_tree -> root, i, max_dist, + point, data_to_send_per_proc, local_idx_of_the_point, + point_to_send_count, point_to_send_capacity); + } + + for(int i = 0; i < ctx -> world_size; ++i) + { + free(data_to_send_per_proc[i]); + free(local_idx_of_the_point[i]); + } + + free(point_to_send_count); + free(point_to_send_capacity); +} + +void test_the_idea(global_context_t* ctx) +{ + int* num = (int*)malloc(ctx -> world_size * sizeof(int)); + for(int i = 0; i < ctx -> world_size; ++i) num[i] = i; + + for(int i = 0; i < ctx -> world_size; ++i) + { + MPI_Request request; + MPI_Isend(num + i, 1, MPI_INT, i, 0, ctx -> mpi_communicator, &request); + } + MPI_Barrier(ctx -> mpi_communicator); + MPI_Status status; + int flag; + int cc = 0; + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_SOURCE, ctx -> mpi_communicator, &flag, &status); + while(flag) + { + cc++; + MPI_Request request; + MPI_Recv(num + status.MPI_SOURCE, 1, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, ctx -> mpi_communicator, &status); + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_SOURCE, ctx -> mpi_communicator, &flag, &status); + + } + MPI_DB_PRINT("Recieved %d msgs\n",cc); + free(num); + +} + +void build_local_tree(global_context_t* ctx, kdtree_v2* local_tree) +{ + local_tree -> root = build_tree_kdtree_v2(local_tree -> _nodes, local_tree -> n_nodes, ctx -> dims); +} + + void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) { float_t *data; @@ -1223,14 +1434,27 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) top_tree_init(ctx, &tree); build_top_kdtree(ctx, &original_ps, &tree, k_global, tol); exchange_points(ctx, &tree); + //test_the_idea(ctx); - top_tree_free(ctx, &tree); + kdtree_v2 local_tree; + kdtree_v2_init( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); + int k = 100; + MPI_DB_PRINT("uu\n"); + datapoint_info_t* dp_info = (datapoint_info_t*)malloc(ctx -> local_n_points * sizeof(datapoint_info_t)); + build_local_tree(ctx, &local_tree); + MPI_DB_PRINT("Mi pianto qua\n"); + mpi_ngbh_search(ctx, dp_info, &local_tree, ctx -> local_data, k); + + + top_tree_free(ctx, &tree); + kdtree_v2_free(&local_tree); free(send_counts); free(displacements); + free(dp_info); if (ctx->mpi_rank == 0) free(data); diff --git a/src/tree/tree.h b/src/tree/tree.h index b8142ddfe089d61fe704aa9122293dd11a0278ce..7439b1594c558e5caffd118a5a6df26b593e7cb5 100644 --- a/src/tree/tree.h +++ b/src/tree/tree.h @@ -1,9 +1,27 @@ #pragma once +#include "../common/common.h" +#include "heap.h" +#include "kdtreeV2.h" #include #include #include #include -#include "../common/common.h" +#include +#include +#include +#include +#include +#include +#include + +#define DTHR 23.92812698 +#define PI_F 3.1415926f +#define ARRAY_INCREMENT 500 +#define DA_DTYPE idx_t +#define NOBORDER MY_SIZE_MAX + +#define VERBOSE_TRUE 1 +#define VERBOSE_FALSE 0 typedef struct mpi_double_int{ @@ -63,6 +81,21 @@ typedef struct top_kdtree_t } top_kdtree_t; + +typedef struct datapoint_info_t { + float_t g; + heap ngbh; + idx_t array_idx; + float_t log_rho; + float_t log_rho_c; + float_t log_rho_err; + idx_t kstar; + int is_center; + int cluster_idx; +} datapoint_info_t; + + + void simulate_master_read_and_scatter(int, size_t, global_context_t* );