diff --git a/.gitignore b/.gitignore index 91f70de24db5bb5940c9816e892d4272f96ed2f4..6cf8f6c3c7c26eea530764668c5943e8362f94a6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ main sync.sh bb .ipy* +scalability_results diff --git a/src/tree/heap.c b/src/tree/heap.c index 57afac7960cbb3b56cc1b3d0d5a5a51e9c331631..abe8e8945f7b0728375240ef284516105d927105 100644 --- a/src/tree/heap.c +++ b/src/tree/heap.c @@ -33,6 +33,7 @@ void heapify_max_heap(heap* H, idx_t node){ /* Found gratest between children of node and boundcheck if the node is a leaf */ + while(1) { largest = (HEAP_LCH(nn) < H -> N) && @@ -50,22 +51,25 @@ void heapify_max_heap(heap* H, idx_t node){ break; } } + - //if(HEAP_LCH(node) < H -> N){ - // //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node); - // largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; - //} - //if(HEAP_RCH(node) < H -> N){ - // //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node); - // largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; - //} - //if(largest == node){ - // return; - //} - //else{ - // swap_heap_node(H -> data + node, H -> data + largest); - // heapify_max_heap(H, largest); - //} + /* + if(HEAP_LCH(node) < H -> N){ + //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node); + largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; + } + if(HEAP_RCH(node) < H -> N){ + //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node); + largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; + } + if(largest == node){ + return; + } + else{ + swap_heap_node(H -> data + node, H -> data + largest); + heapify_max_heap(H, largest); + } + */ } @@ -126,27 +130,27 @@ void insert_max_heap(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ int ctot = c1 + 2*c2; switch (ctot) { case 1: - { - idx_t node = H->count; - ++(H -> count); - H -> data[node].value = val; - H -> data[node].array_idx = array_idx; - /* - * Push up the node through the heap - */ - while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) - { - swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); - node = HEAP_PARENT(node); - //if(node == 0) break; - } - } - break; + { + idx_t node = H->count; + ++(H -> count); + H -> data[node].value = val; + H -> data[node].array_idx = array_idx; + /* + * Push up the node through the heap + */ + while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) + { + swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); + node = HEAP_PARENT(node); + //if(node == 0) break; + } + } + break; case 2: - { - set_root_max_heap(H,val,array_idx); - } + { + set_root_max_heap(H,val,array_idx); + } break; default: break; diff --git a/src/tree/kdtreeV2.c b/src/tree/kdtreeV2.c index aa0f21ee052dd09a87a1fb177598f8ac2508aa21..51a88c375488038d2a57460ec6018463e7fa4922 100644 --- a/src/tree/kdtreeV2.c +++ b/src/tree/kdtreeV2.c @@ -195,12 +195,14 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare } } + /* #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = (FLOAT_TYPE*)malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif + */ @@ -257,12 +259,14 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare n->level = level; } + /* #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif + */ return n; } @@ -359,6 +363,9 @@ void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned void kdtree_v2_free(kdtree_v2* tree) { + for(uint64_t i = 0; i < tree->n_nodes; ++i) + if(tree -> _nodes[i].node_list.data) free(tree -> _nodes[i].node_list.data); + free(tree -> _nodes); } diff --git a/src/tree/tree.c b/src/tree/tree.c index 4af8fa8d2d89cf1bf4ffd6687c7db06b90475760..9ce69440ad6bf2e1a7ee6bb2f7b0a1653b2b5bcd 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -692,10 +692,24 @@ int compute_n_nodes(int n) void top_tree_init(global_context_t *ctx, top_kdtree_t *tree) { /* we want procs leaves */ - int l = (int)(ceil(log2((float_t)ctx -> world_size))); - int tree_nodes = (1 << (l + 1)) - 1; + int l = (int)(ceil(log2((float_t)ctx -> world_size))); + int tree_nodes = (1 << (l + 1)) - 1; + //int tree_nodes = compute_n_nodes(ctx -> world_size); //MPI_DB_PRINT("Tree nodes %d %d %d %d\n", ctx -> world_size,l, tree_nodes, compute_n_nodes(ctx -> world_size)); tree->_nodes = (top_kdtree_node_t*)malloc(tree_nodes * sizeof(top_kdtree_node_t)); + for(int i = 0; i < tree_nodes; ++i) + { + tree -> _nodes[i].lch = NULL; + tree -> _nodes[i].rch = NULL; + tree -> _nodes[i].parent = NULL; + tree -> _nodes[i].owner = -1; + tree -> _nodes[i].n_points = 0; + tree -> _nodes[i].split_dim = -1; + tree -> _nodes[i].split_val = 0.f; + tree -> _nodes[i].lb_node_box = NULL; + tree -> _nodes[i].ub_node_box = NULL; + + } tree->_capacity = tree_nodes; tree->dims = ctx->dims; tree->count = 0; @@ -718,6 +732,7 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t top_kdtree_node_t* ptr = tree -> _nodes + tree -> count; ptr -> lch = NULL; ptr -> rch = NULL; + ptr -> parent = NULL; ptr -> lb_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> ub_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> owner = -1; @@ -1131,13 +1146,13 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) ctx -> local_data = rcvbuffer; /* check exchange */ - /* + for(size_t i = 0; i < ctx -> local_n_points; ++i) { int o = compute_point_owner(ctx, tree, ctx -> local_data + (i * ctx -> dims)); if(o != ctx -> mpi_rank) DB_PRINT("rank %d got an error\n",ctx -> mpi_rank); } - */ + free(points_owners); free(points_per_proc); free(partition_offset); @@ -1176,7 +1191,6 @@ void tree_walk( int* point_to_send_count, int* point_to_send_capacity) { - if(root -> owner != -1 && root -> owner != ctx -> mpi_rank) { @@ -1675,6 +1689,13 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre if(local_idx_of_the_point[i]) free(local_idx_of_the_point[i]); } + for(int i = 0; i < ctx -> local_n_points; ++i) + { + free(dp_info[i].ngbh.data); + } + + free(data_to_send_per_proc); + free(local_idx_of_the_point); free(heap_batches_per_node); free(rcv_heap_batches); free(rcv_work_batches); @@ -1786,10 +1807,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // 190M points // std_g2980844_091_0000 - data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE); + data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE); /* 1M points ca.*/ - //data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); + // data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); /* 8M points */ @@ -1805,8 +1826,8 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; - ctx->n_points = (ctx->n_points * 10) / 10; - //ctx -> n_points = ctx -> world_size * 1000; + ctx->n_points = (ctx->n_points * 10 ) / 10; + // ctx -> n_points = ctx -> world_size * 1000; mpi_printf(ctx, "Read %lu points in %u dims\n", ctx->n_points, ctx->dims); } @@ -1870,8 +1891,24 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) kdtree_v2 local_tree; kdtree_v2_init( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); int k = 300; + //int k = 30; datapoint_info_t* dp_info = (datapoint_info_t*)malloc(ctx -> local_n_points * sizeof(datapoint_info_t)); + /* initialize, to cope with valgrind */ + for(uint64_t i = 0; i < ctx -> local_n_points; ++i) + { + dp_info[i].ngbh.data = NULL; + dp_info[i].ngbh.N = 0; + dp_info[i].ngbh.count = 0; + dp_info[i].g = 0.f; + dp_info[i].log_rho = 0.f; + dp_info[i].log_rho_c = 0.f; + dp_info[i].log_rho_err = 0.f; + dp_info[i].array_idx = -1; + dp_info[i].kstar = -1; + dp_info[i].is_center = -1; + dp_info[i].cluster_idx = -1; + } TIME_START build_local_tree(ctx, &local_tree);