diff --git a/README.md b/README.md index 8ee631970a9df371db758a7b30f93093331bcf66..a08809c09453dd5aea5bbe47b7d2081913ffa290 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,9 @@ The suggestion is to run it with one mpi task per socket. - [x] ~~arugment parser~~ - [x] ~~H2: graph reduction~~ + - [x] ~~kdtree: implement slim heap~~ - [ ] prettify overall stdout - [ ] H1: implementation of lock free centers elimination - - [ ] kdtree: implement slim heap - [ ] kdtree: optimization an profiling - [ ] io: curation of IO using mpi IO or other solutions diff --git a/src/adp/adp.c b/src/adp/adp.c index 8544b57a6c45f1c74a42bced150aaa9806384041..737bf8f8e1ade09d40387eae525fcee558221f4c 100644 --- a/src/adp/adp.c +++ b/src/adp/adp.c @@ -55,7 +55,7 @@ float_t compute_ID_two_NN_ML(global_context_t* ctx, datapoint_info_t* dp_info, i float_t log_mus = 0.; for(idx_t i = 0; i < n; ++i) { - log_mus += 0.5 * log(dp_info[i].ngbh.data[2].value/dp_info[i].ngbh.data[1].value); + log_mus += 0.5 * log(dp_info[i].ngbh[2].value/dp_info[i].ngbh[1].value); } float_t d = 0; @@ -82,7 +82,7 @@ float_t get_j_ksel_dist(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win expo if(owner == ctx -> mpi_rank) { idx_t pos = j - ctx -> idx_start; - return ctx -> local_datapoints[pos].ngbh.data[ksel].value; + return ctx -> local_datapoints[pos].ngbh[ksel].value; } else { @@ -103,7 +103,7 @@ idx_t get_j_ksel_idx(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win exposed if(owner == ctx -> mpi_rank) { idx_t pos = j - ctx -> idx_start; - return ctx -> local_datapoints[pos].ngbh.data[ksel].array_idx; + return ctx -> local_datapoints[pos].ngbh[ksel].array_idx; } else { @@ -132,7 +132,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int MPI_Barrier(ctx -> mpi_communicator); - idx_t k = ctx -> local_datapoints[0].ngbh.N; + idx_t k = ctx -> k; struct timespec start_tot, finish_tot; double elapsed_tot; @@ -145,7 +145,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int clock_gettime(CLOCK_MONOTONIC, &start_tot); } - idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1; + idx_t kMAX = ctx -> k - 1; float_t omega = 0.; if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);} @@ -174,7 +174,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int { for(idx_t k = 0; k <= kMAX; ++k) { - local_datapoints[i].ngbh.data[k].value = omega * pow(local_datapoints[i].ngbh.data[k].value, d/2.); + local_datapoints[i].ngbh[k].value = omega * pow(local_datapoints[i].ngbh[k].value, d/2.); } //initialize kstar at 0 local_datapoints[i].kstar = 0; @@ -191,8 +191,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int //request data idx_t ksel = j - 1; -#if defined(THREAD_FUNNELED) -#else +#if !defined(THREAD_FUNNELED) #pragma omp parallel for #endif @@ -203,7 +202,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int { //vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.); - idx_t jj = local_datapoints[i].ngbh.data[j].array_idx; + idx_t jj = local_datapoints[i].ngbh[j].array_idx; /* * note jj can be an halo point @@ -215,7 +214,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int if(owner == ctx -> mpi_rank) { - scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh.data[ksel]; + scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh[ksel]; } else { @@ -234,8 +233,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int MPI_Win_fence(MPI_MODE_NOPUT,exposed_ngbh); //process data -#if defined(THREAD_FUNNELED) -#else +#if !defined(THREAD_FUNNELED) #pragma omp parallel for #endif for(idx_t i = 0; i < ctx -> local_n_points; ++i) @@ -243,7 +241,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int if(ctx -> local_datapoints[i].kstar == 0) { float_t vvi, vvj, vp, dL; - vvi = local_datapoints[i].ngbh.data[ksel].value; + vvi = local_datapoints[i].ngbh[ksel].value; vvj = scratch_heap_nodes[i].value; vp = (vvi + vvj)*(vvi + vvj); dL = -2.0 * ksel * log(4.*vvi*vvj/vp); @@ -278,7 +276,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int { idx_t k = kMAX - 1; - float_t vvi = ctx -> local_datapoints[i].ngbh.data[k].value; + float_t vvi = ctx -> local_datapoints[i].ngbh[k].value; local_datapoints[i].kstar = k; local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points))); @@ -326,7 +324,7 @@ float_t get_j_ksel_dist_v2(global_context_t* ctx, idx_t i, idx_t j, idx_t ksel, if(owner == ctx -> mpi_rank) { idx_t pos = j - ctx -> idx_start; - return ctx -> local_datapoints[pos].ngbh.data[ksel].value; + return ctx -> local_datapoints[pos].ngbh[ksel].value; } else { @@ -646,10 +644,9 @@ clusters_t Heuristic1(global_context_t *ctx) dp_info[i].is_center = 1; dp_info[i].cluster_idx = -1; //printf("%lf\n",p -> g); - heap i_ngbh = dp_info[i].ngbh; for(idx_t k = 1; k < maxk; ++k) { - idx_t ngbh_index = i_ngbh.data[k].array_idx; + idx_t ngbh_index = dp_info[i].ngbh[k].array_idx; datapoint_info_t dj = find_possibly_halo_datapoint_rma(ctx, ngbh_index, win_datapoints); float_t gj = dj.g; if(gj > gi){ @@ -697,7 +694,7 @@ clusters_t Heuristic1(global_context_t *ctx) for(idx_t j = 1; j < i_point.kstar + 1; ++j) { - idx_t jidx = i_point.ngbh.data[j].array_idx; + idx_t jidx = i_point.ngbh[j].array_idx; datapoint_info_t j_point = find_possibly_halo_datapoint_rma(ctx, jidx, win_datapoints); @@ -863,7 +860,7 @@ clusters_t Heuristic1(global_context_t *ctx) int cluster = -1; idx_t k = 0; idx_t p_idx; - idx_t max_k = p -> ngbh.N; + idx_t max_k = ctx -> k; /*assign each particle at the same cluster as the nearest particle of higher density*/ datapoint_info_t p_retrieved; while( k < max_k - 1 && cluster == -1) @@ -871,7 +868,7 @@ clusters_t Heuristic1(global_context_t *ctx) ++k; - p_idx = p -> ngbh.data[k].array_idx; + p_idx = p -> ngbh[k].array_idx; p_retrieved = find_possibly_halo_datapoint_rma(ctx, p_idx, win_datapoints); int flag = p_retrieved.g > p -> g; @@ -890,7 +887,7 @@ clusters_t Heuristic1(global_context_t *ctx) idx_t gm_index = SIZE_MAX; for(idx_t k = 0; k < max_k; ++k) { - idx_t ngbh_index = p -> ngbh.data[k].array_idx; + idx_t ngbh_index = p -> ngbh[k].array_idx; for(idx_t m = 0; m < removed_centers.count; ++m) { idx_t max_rho_idx = max_rho.data[m]; @@ -1006,7 +1003,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster) } idx_t nclus = cluster->centers.count; - idx_t max_k = dp_info[0].ngbh.N; + idx_t max_k = ctx -> k; for(idx_t i = 0; i < n; ++i) { @@ -1018,7 +1015,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster) for(idx_t k = 1; k < dp_info[i].kstar + 1; ++k) { /*index of the kth ngbh of n*/ - idx_t j = dp_info[i].ngbh.data[k].array_idx; + idx_t j = dp_info[i].ngbh[k].array_idx; pp = NOBORDER; /*Loop over kn neigbhours to find if n is the nearest*/ /*if cluster of the particle in nbhg is c then check is neighborhood*/ diff --git a/src/common/common.h b/src/common/common.h index c1df3b6d6a66b521fa574e853d70739a30879d1a..d9bdc7e799df5bcc60a1a8488d7a05002e168beb 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -125,7 +125,7 @@ typedef struct datapoint_info_t { /* * datapoint object */ - heap ngbh; //heap object stores nearest neighbors of each point + heap_node* ngbh; //heap object stores nearest neighbors of each point int is_center; //flag signaling if a point is a cluster center int cluster_idx; //cluster index idx_t array_idx; //global index of the point in the dataset diff --git a/src/main/main.c b/src/main/main.c index 2c09e47f2652b25ea2b6e184a091c5eb4ed8addb..a4d797b93f1b8d6848cc8b46f5a2b0ebe87b6bea 100644 --- a/src/main/main.c +++ b/src/main/main.c @@ -384,9 +384,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t)); for(uint64_t i = 0; i < ctx -> local_n_points; ++i) { - dp_info[i].ngbh.data = NULL; - dp_info[i].ngbh.N = 0; - dp_info[i].ngbh.count = 0; + dp_info[i].ngbh = NULL; dp_info[i].g = 0.f; dp_info[i].log_rho = 0.f; dp_info[i].log_rho_c = 0.f; diff --git a/src/tree/tree.c b/src/tree/tree.c index f66c6f2687c3e04d3e65d7e4fcff40e98114f404..12f181e3ec0ba4068f08e027c594c145df53622a 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -1483,17 +1483,19 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre { idx_t idx = local_tree -> _nodes[p].array_idx; /* actually we want to preserve the heap to then insert guesses from other nodes */ - dp_info[idx].ngbh.data = ctx -> __local_heap_buffers + k * idx; - dp_info[idx].ngbh.count = 0; - dp_info[idx].ngbh.N = k; + heap tmp_heap; + tmp_heap.data = ctx -> __local_heap_buffers + k * idx; + tmp_heap.count = 0; + tmp_heap.N = k; //dp_info[idx].ngbh = knn_kdtree_v2_no_heapsort(local_tree -> _nodes[p].data, local_tree -> root, k); - knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &(dp_info[idx].ngbh)); + knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &tmp_heap); - convert_heap_idx_to_global(ctx, &(dp_info[idx].ngbh)); + convert_heap_idx_to_global(ctx, &tmp_heap); dp_info[idx].cluster_idx = -1; dp_info[idx].is_center = 0; dp_info[idx].array_idx = idx + ctx -> idx_start; + dp_info[idx].ngbh = tmp_heap.data; } elapsed_time = TIME_STOP; LOG_WRITE("Local neighborhood search", elapsed_time); @@ -1517,7 +1519,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre #pragma omp parallel for for(int i = 0; i < ctx -> local_n_points; ++i) { - float_t max_dist = dp_info[i].ngbh.data[0].value; + float_t max_dist = dp_info[i].ngbh[0].value; float_t* point = ctx -> local_data + (i * ctx -> dims); tree_walk_v2_find_n_points(ctx, top_tree -> root, i, max_dist, point, point_to_snd_capacity); @@ -1537,7 +1539,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre #pragma omp parallel for for(int i = 0; i < ctx -> local_n_points; ++i) { - float_t max_dist = dp_info[i].ngbh.data[0].value; + float_t max_dist = dp_info[i].ngbh[0].value; float_t* point = ctx -> local_data + (i * ctx -> dims); tree_walk_v2_append_points(ctx, top_tree -> root, i, max_dist, point, data_to_send_per_proc, local_idx_of_the_point, point_to_snd_count); @@ -1832,7 +1834,11 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre /* insert the points into the heap */ for(int j = 0; j < k; ++j) { - insert_max_heap(&(dp_info[idx].ngbh), H.data[j].value, H.data[j].array_idx); + heap tmp_heap; + tmp_heap.N = k; + tmp_heap.count = k; + tmp_heap.data = dp_info[idx].ngbh; + insert_max_heap(&(tmp_heap), H.data[j].value, H.data[j].array_idx); } } @@ -1858,7 +1864,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre #pragma omp parallel for for(int i = 0; i < ctx -> local_n_points; ++i) { - heap_sort(&(dp_info[i].ngbh)); + heap tmp_heap; + tmp_heap.N = k; + tmp_heap.count = k; + tmp_heap.data = dp_info[i].ngbh; + + heap_sort(&(tmp_heap)); } elapsed_time = TIME_STOP;