From 04c3417bab2964c168c289bfb38b1960c34537f0 Mon Sep 17 00:00:00 2001 From: lykos98 <francy273998@gmail.com> Date: Tue, 29 Apr 2025 09:45:23 +0200 Subject: [PATCH] =?UTF-8?q?added=20working=20implementation=20of=20h1=20op?= =?UTF-8?q?tim=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/adp/adp.c | 55 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/src/adp/adp.c b/src/adp/adp.c index 014f3d0..1f1c229 100644 --- a/src/adp/adp.c +++ b/src/adp/adp.c @@ -510,17 +510,13 @@ datapoint_info_t find_possibly_halo_datapoint_rma(global_context_t* ctx, idx_t i else { datapoint_info_t tmp_dp; - #pragma omp critical - { - idx_t i = idx - ctx -> rank_idx_start[owner]; - MPI_Request request; - MPI_Status status; - - MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner, - i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + idx_t i = idx - ctx -> rank_idx_start[owner]; + MPI_Request request; + MPI_Status status; - } + MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner, + i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request); + MPI_Wait(&request, MPI_STATUS_IGNORE); return tmp_dp; } @@ -680,9 +676,11 @@ clusters_t Heuristic1(global_context_t *ctx) struct timespec start_tot, finish_tot; double elapsed_tot; + double elapsed_time; TIME_DEF; + TIME_START; lu_dynamic_array_t all_centers, removed_centers, actual_centers, max_rho; lu_dynamic_array_allocate(&all_centers); @@ -698,7 +696,7 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_create(ctx -> local_datapoints, ctx -> local_n_points * sizeof(datapoint_info_t), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_datapoints); MPI_Win_fence(0, win_datapoints); - MPI_Win_lock_all(0, win_datapoints); + //MPI_Win_lock_all(0, win_datapoints); #if !defined(THREAD_FUNNELED) #pragma omp parallel for @@ -744,7 +742,6 @@ clusters_t Heuristic1(global_context_t *ctx) * * optimized v2 use a queue of center removal and then exchange them */ - heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node)); for(idx_t p = 0; p < n; ++p) @@ -752,6 +749,9 @@ clusters_t Heuristic1(global_context_t *ctx) to_remove_mask[p].array_idx = MY_SIZE_MAX; to_remove_mask[p].value = -9999999; } + + // sort by density + qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP); /** @@ -774,6 +774,13 @@ clusters_t Heuristic1(global_context_t *ctx) omp_init_lock(lock_array + i); } + elapsed_time = TIME_STOP; + LOG_WRITE("Putative centers", elapsed_time); + + TIME_START; + + MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); + #if !defined(THREAD_FUNNELED) #pragma omp parallel for schedule(dynamic) #endif @@ -793,9 +800,10 @@ clusters_t Heuristic1(global_context_t *ctx) // actually is the p-th point int owner = foreign_owner(ctx, jidx); //if local process it + idx_t jpos = jidx - ctx -> idx_start; if(owner == ctx -> mpi_rank) { - idx_t jpos = jidx - ctx -> idx_start; + //acquire the lock omp_set_lock(lock_array + jpos); if(i_point.g > to_remove_mask[jpos].value) { @@ -815,7 +823,13 @@ clusters_t Heuristic1(global_context_t *ctx) } } + MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); + //assemble arrays into a single buffer + + elapsed_time = TIME_STOP; + LOG_WRITE("Finding centers to prune", elapsed_time); + TIME_START; idx_t tot_removal = 0; for(idx_t p = 0; p < n; ++p) @@ -963,8 +977,12 @@ clusters_t Heuristic1(global_context_t *ctx) recv_removals, recv_counts, recv_displs, MPI_BYTE, ctx -> mpi_communicator); // merge into the mask + + elapsed_time = TIME_STOP; + LOG_WRITE("Communicating eliminations", elapsed_time); + TIME_START; - #pragma omp parallel for + #pragma omp parallel for schedule(dynamic) for(idx_t i = 0; i < tot_recv_counts; ++i) { idx_t el_pos = recv_removals[i].target_id - ctx -> idx_start; @@ -1046,6 +1064,11 @@ clusters_t Heuristic1(global_context_t *ctx) free(lock_array); free(recv_removals); + elapsed_time = TIME_STOP; + LOG_WRITE("Merging", elapsed_time); + + TIME_START; + int n_centers = (int)actual_centers.count; int tot_centers; MPI_Allreduce(&n_centers, &tot_centers, 1, MPI_INT, MPI_SUM, ctx -> mpi_communicator); @@ -1188,7 +1211,7 @@ clusters_t Heuristic1(global_context_t *ctx) } - MPI_Win_unlock_all(win_datapoints); + //MPI_Win_unlock_all(win_datapoints); MPI_Win_fence(0, win_datapoints); MPI_Win_free(&win_datapoints); @@ -1209,6 +1232,8 @@ clusters_t Heuristic1(global_context_t *ctx) free(ks); #endif + elapsed_time = TIME_STOP; + LOG_WRITE("Cluster assign", elapsed_time); free(actual_centers.data); actual_centers.size = tot_centers; -- GitLab