From fe30144acebcbce03bae2fcffde757d05d827f61 Mon Sep 17 00:00:00 2001 From: lykos98 Date: Thu, 29 Feb 2024 14:29:41 +0100 Subject: [PATCH] tree build complete, working on diagnostics --- src/tree/tree.c | 195 ++++++++++++++++++++++++++++++++++-------------- src/tree/tree.h | 8 +- 2 files changed, 143 insertions(+), 60 deletions(-) diff --git a/src/tree/tree.c b/src/tree/tree.c index c8c9035..ac507a4 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -378,8 +378,6 @@ float_t check_pc_pointset_parallel(global_context_t *ctx, pointset_t *ps, guess_ } void compute_bounding_box_pointset(global_context_t *ctx, pointset_t *ps) { - ps->lb_box = (float_t *)malloc(ps->dims * sizeof(float_t)); - ps->ub_box = (float_t *)malloc(ps->dims * sizeof(float_t)); for (size_t d = 0; d < ps->dims; ++d) { @@ -432,6 +430,8 @@ void compute_bounding_box_pointset(global_context_t *ctx, pointset_t *ps) { lb[d], ub[d]); MPI_DB_PRINT("\n"); */ + + #undef local_data #undef lb #undef ub @@ -466,6 +466,7 @@ void compute_adaptive_binning_pointset(global_context_t *ctx, pointset_t *ps, qsort(ps->data, ps->n_points, ps->dims * sizeof(float_t), compare_data_element_sort); } + /* * Now what is more convenient? We have to also * keep track of who owns the most "median thing" @@ -781,7 +782,9 @@ int retrieve_guess_adaptive(global_context_t *ctx, pointset_t *ps, float_t x_guess = (pc - y0) / (y1 - y0) * (x1 - x0) + x0; + /* MPI_DB_PRINT("[MASTER] best guess @ %lf is %lf on bin %d on dimension %d --- x0 %lf x1 %lf y0 %lf y1 %lf\n", pc, x_guess, idx, d, x0, x1, y0, y1); + */ /* find nearest point btw guess */ @@ -862,9 +865,11 @@ guess_t retrieve_guess_pure(global_context_t *ctx, pointset_t *ps, float_t x_guess = (pc - y0) / (y1 - y0) * (x1 - x0) + x0; - /* + + /* MPI_DB_PRINT("[MASTER] best guess @ %lf is %lf on bin %d on dimension %d --- x0 %lf x1 %lf y0 %lf y1 %lf\n",pc, x_guess,idx, d, x0, x1, y0, y1); */ + guess_t g = {.bin_idx = idx, .x_guess = x_guess}; @@ -979,22 +984,42 @@ void compute_pure_global_binning(global_context_t *ctx, pointset_t *ps, { /* compute binning of data along dimension d */ uint64_t *local_bin_count = (uint64_t *)malloc(k_global * sizeof(uint64_t)); + //MPI_DB_PRINT("%p %p %p %p %p\n", local_bin_count, global_bin_counts, ps -> data, ps -> lb_box, ps -> ub_box); + //DB_PRINT("rank %d npoints %lu %p %p %p %p %p\n",ctx -> mpi_rank, ps -> n_points, local_bin_count, global_bin_counts, ps -> data, ps -> lb_box, ps -> ub_box); for (size_t k = 0; k < k_global; ++k) { local_bin_count[k] = 0; global_bin_counts[k] = 0; } - float_t bin_w = (ps->ub_box[d] - ps->lb_box[d]) / k_global; + /* + MPI_DB_PRINT("[PS BOUNDING BOX %d]: ", ctx -> mpi_rank); + for(size_t d = 0; d < ps -> dims; ++d) MPI_DB_PRINT("d%d:[%lf, %lf] ",(int)d, ps -> lb_box[d], ps -> ub_box[d]); MPI_DB_PRINT("\n"); + MPI_DB_PRINT("\n"); + */ + + + + float_t bin_w = (ps-> ub_box[d] - ps->lb_box[d]) / (float_t)k_global; for (size_t i = 0; i < ps->n_points; ++i) { float_t p = ps->data[i * ps->dims + d]; int bin_idx = (int)((p - ps->lb_box[d]) / bin_w); + /* + if(bin_idx < 0) + { + DB_PRINT("rank %d qua %lf %lf %d %lf\n",ctx -> mpi_rank, (p - ps->lb_box[d]), (p - ps->lb_box[d]) / bin_w, bin_idx, bin_w); + DB_PRINT("[PS BOUNDING BOX %d i have %d]: ", ctx -> mpi_rank,d); + for(size_t d = 0; d < ps -> dims; ++d) DB_PRINT("d%d:[%lf, %lf] ",(int)d, ps -> lb_box[d], ps -> ub_box[d]); MPI_DB_PRINT("\n"); + DB_PRINT("\n"); + } + */ local_bin_count[bin_idx]++; } MPI_Allreduce(local_bin_count, global_bin_counts, k_global, MPI_UNSIGNED_LONG, MPI_SUM, ctx->mpi_communicator); + free(local_bin_count); } int partition_data_around_value(float_t *array, int vec_len, int compare_dim, @@ -1093,8 +1118,8 @@ guess_t refine_pure_binning(global_context_t *ctx, pointset_t *ps, tmp_ps.n_points = end_idx - start_idx; tmp_ps.data = ps->data + start_idx * ps->dims; tmp_ps.dims = ps->dims; - tmp_ps.lb_box = NULL; - tmp_ps.ub_box = NULL; + tmp_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); + tmp_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); compute_bounding_box_pointset(ctx, &tmp_ps); @@ -1146,10 +1171,8 @@ void free_queue(partition_queue_t *pq) { free(pq->data); } void get_pointset_from_partition(pointset_t *ps, partition_t *part) { - ps->lb_box = NULL; - ps->ub_box = NULL; ps->n_points = part->n_points; - ps->data = part->base_ptr; + ps->data = part->base_ptr; ps->n_points = part->n_points; } @@ -1184,6 +1207,11 @@ void top_tree_init(global_context_t *ctx, top_kdtree_t *tree) void top_tree_free(global_context_t *ctx, top_kdtree_t *tree) { + for(int i = 0; i < tree -> count; ++i) + { + if(tree -> _nodes[i].node_box_lb) free(tree -> _nodes[i].node_box_lb); + if(tree -> _nodes[i].node_box_ub) free(tree -> _nodes[i].node_box_ub); + } free(tree->_nodes); return; } @@ -1197,11 +1225,18 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t tree->_capacity = new_cap; } top_kdtree_node_t* ptr = tree -> _nodes + tree -> count; + ptr -> node_box_lb = (float_t*)malloc(ctx -> dims * sizeof(float_t)); + ptr -> node_box_ub = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ++tree -> count; return ptr; } +void compute_boxes(global_context_t* ctx, top_kdtree_t* tree) +{ + return; +} + void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree_t *tree, int n_bins, float_t tolerance) { size_t tot_n_points = 0; @@ -1231,60 +1266,107 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree enqueue_partition(&queue, current_partition); pointset_t current_pointset; + current_pointset.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); + current_pointset.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); while (queue.count) { + /*dequeue the partition to process */ current_partition = dequeue_partition(&queue); + + /* generate e pointset for that partition */ + get_pointset_from_partition(¤t_pointset, ¤t_partition); current_pointset.dims = ctx->dims; - /* handle partition */ - compute_bounding_box_pointset(ctx, ¤t_pointset); - float_t fraction = (current_partition.n_procs / 2) / (float_t)current_partition.n_procs; - guess_t g = compute_median_pure_binning(ctx, ¤t_pointset, fraction, selected_dim, n_bins, tolerance); - int pv = partition_data_around_value(current_pointset.data, ctx->dims, selected_dim, 0, current_pointset.n_points, g.x_guess); + + /*generate a tree node */ + + top_kdtree_node_t* current_node = current_node = top_tree_generate_node(ctx, tree); /* insert node */ + MPI_DB_PRINT("Handling partition: \n\tcurrent_node %p, \n\tdim %d, \n\tn_points %d, \n\tstart_proc %d, \n\tn_procs %d, \n\tparent %p\n", + current_node, + current_partition.d, + current_partition.n_points, + current_partition.start_proc, + current_partition.n_procs, + current_partition.parent); + MPI_DB_PRINT("-------------------\n\n"); + + switch (current_partition.lr) { + case TOP_TREE_LCH: + if(current_partition.parent) + { + current_node -> parent = current_partition.parent; + current_node -> parent -> lch = current_node; + } + break; - /* - * - * if points in the pointset is less than exp point per node then create a - * leaf actually better to have a margin of x percent around ppn - * - */ + case TOP_TREE_RCH: + if(current_partition.parent) + { + current_node -> parent = current_partition.parent; + current_node -> parent -> rch = current_node; + } + break; + default: + break; + } - top_kdtree_node_t* current_node; - current_node -> data = g.x_guess; current_node -> split_dim = selected_dim; current_node -> parent = current_partition.parent; + current_node -> lch = NULL; + current_node -> rch = NULL; - size_t points_left = current_partition.n_points * fraction; - size_t points_right = current_partition.n_points - points_left; - - int procs_left = current_partition.n_procs * fraction; - int procs_right = current_partition.n_procs - procs_left; - - int next_dimension = (++selected_dim) % (ctx->dims); - partition_t left_partition = { - .n_points = points_left, - .n_procs = procs_left, - .parent = current_node, - .lr = TOP_TREE_LCH, - .base_ptr = current_pointset.data, - .d = next_dimension, - }; - - partition_t right_partition = { - .n_points = points_right, - .n_procs = procs_right, - .parent = current_node, - .lr = TOP_TREE_RCH, - .base_ptr = current_pointset.data + pv, - .d = next_dimension - }; - - /* get left and right pointset */ + /* handle partition */ + if(current_partition.n_procs > 1) + { + float_t fraction = (current_partition.n_procs / 2) / (float_t)current_partition.n_procs; + guess_t g = compute_median_pure_binning(ctx, ¤t_pointset, fraction, current_partition.d, n_bins, tolerance); + int pv = partition_data_around_value(current_pointset.data, ctx->dims, current_partition.d, 0, current_pointset.n_points, g.x_guess); + + current_node -> data = g.x_guess; + + size_t points_left = (size_t)pv; + size_t points_right = current_partition.n_points - points_left; + + int procs_left = current_partition.n_procs * fraction; + int procs_right = current_partition.n_procs - procs_left; + + int next_dimension = (++selected_dim) % (ctx->dims); + partition_t left_partition = { + .n_points = points_left, + .n_procs = procs_left, + .start_proc = current_partition.start_proc, + .parent = current_node, + .lr = TOP_TREE_LCH, + .base_ptr = current_pointset.data, + .d = next_dimension, + }; + + partition_t right_partition = { + .n_points = points_right, + .n_procs = procs_right, + .start_proc = current_partition.start_proc + procs_left, + .parent = current_node, + .lr = TOP_TREE_RCH, + .base_ptr = current_pointset.data + pv * current_pointset.dims, + .d = next_dimension + }; + + enqueue_partition(&queue, left_partition); + enqueue_partition(&queue, right_partition); + MPI_Barrier(ctx -> mpi_communicator); + } + else + { + current_node -> owner = current_partition.start_proc; + } + /* set the root */ + if(current_node -> parent == NULL) tree -> root = current_node; } + MPI_DB_PRINT("Root is %p\n", tree -> root); free_queue(&queue); } @@ -1365,11 +1447,12 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.data = ctx->local_data; original_ps.dims = ctx->dims; original_ps.n_points = ctx->local_n_points; - original_ps.lb_box = NULL; - original_ps.ub_box = NULL; + original_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); + original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); float_t incr = 0.05; float_t tol = 0.001; + /* for (int d = 0; d < ctx->dims; ++d) { for (float_t f = incr; f < 1; f += incr) @@ -1390,13 +1473,15 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) } MPI_DB_PRINT("--------------------------------------\n\n"); } + */ + + top_kdtree_t tree; + top_tree_init(ctx, &tree); + build_top_kdtree(ctx, &original_ps, &tree, k_global, tol); + + top_tree_free(ctx, &tree); - // compute_bounding_box(ctx); - // global_binning_check(ctx, data,d, k_global); - // retrieve_pc(ctx, global_bin_counts, best_guess, k_global, d, f); - // check_pc(ctx, best_guess, data, d, f); - // compute_medians_and_check(ctx,data); free(send_counts); free(displacements); diff --git a/src/tree/tree.h b/src/tree/tree.h index 2e0ae32..f066ba8 100644 --- a/src/tree/tree.h +++ b/src/tree/tree.h @@ -24,6 +24,7 @@ typedef struct partition_t { int d; int n_procs; + int start_proc; size_t n_points; float_t* base_ptr; int lr; @@ -41,14 +42,11 @@ typedef struct partition_queue_t typedef struct top_kdtree_node_t { float_t data; - //float_t* node_box_lb; //Needed? - //float_t* node_box_ub; //Needed? + float_t* node_box_lb; //Needed? + float_t* node_box_ub; //Needed? int owner; int split_dim; - int is_leaf; size_t n_points; - float_t* lb_box; - float_t* ub_box; struct top_kdtree_node_t* lch; struct top_kdtree_node_t* rch; struct top_kdtree_node_t* parent; -- GitLab