From 04c3417bab2964c168c289bfb38b1960c34537f0 Mon Sep 17 00:00:00 2001
From: lykos98 <francy273998@gmail.com>
Date: Tue, 29 Apr 2025 09:45:23 +0200
Subject: [PATCH] =?UTF-8?q?added=20working=20implementation=20of=20h1=20op?=
 =?UTF-8?q?tim=C2=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/adp/adp.c | 55 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/src/adp/adp.c b/src/adp/adp.c
index 014f3d0..1f1c229 100644
--- a/src/adp/adp.c
+++ b/src/adp/adp.c
@@ -510,17 +510,13 @@ datapoint_info_t find_possibly_halo_datapoint_rma(global_context_t* ctx, idx_t i
     else
     {
         datapoint_info_t tmp_dp;
-        #pragma omp critical
-        {
-            idx_t i = idx - ctx -> rank_idx_start[owner];
-            MPI_Request request;
-            MPI_Status status;
-
-            MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner,
-                    i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
-            MPI_Wait(&request, MPI_STATUS_IGNORE);
+        idx_t i = idx - ctx -> rank_idx_start[owner];
+        MPI_Request request;
+        MPI_Status status;
 
-        }
+        MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner,
+                i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
+        MPI_Wait(&request, MPI_STATUS_IGNORE);
 
         return tmp_dp;         
     }                 
@@ -680,9 +676,11 @@ clusters_t Heuristic1(global_context_t *ctx)
 
     struct timespec start_tot, finish_tot;
     double elapsed_tot;
+    double elapsed_time;
 
     TIME_DEF;
 
+    TIME_START;
     lu_dynamic_array_t all_centers, removed_centers, actual_centers, max_rho;
 
     lu_dynamic_array_allocate(&all_centers);
@@ -698,7 +696,7 @@ clusters_t Heuristic1(global_context_t *ctx)
     MPI_Win_create(ctx -> local_datapoints, ctx -> local_n_points * sizeof(datapoint_info_t), 
                    1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_datapoints);
     MPI_Win_fence(0, win_datapoints);
-    MPI_Win_lock_all(0,  win_datapoints);
+    //MPI_Win_lock_all(0,  win_datapoints);
 
 #if !defined(THREAD_FUNNELED)
     #pragma omp parallel for
@@ -744,7 +742,6 @@ clusters_t Heuristic1(global_context_t *ctx)
      *
      * optimized v2 use a queue of center removal and then exchange them
 	 */
-		
 	heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node));
 
     for(idx_t p = 0; p < n; ++p) 
@@ -752,6 +749,9 @@ clusters_t Heuristic1(global_context_t *ctx)
         to_remove_mask[p].array_idx = MY_SIZE_MAX;
         to_remove_mask[p].value = -9999999;
     }
+
+    // sort by density
+
     qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP);
 
     /**
@@ -774,6 +774,13 @@ clusters_t Heuristic1(global_context_t *ctx)
         omp_init_lock(lock_array + i);
     }
 
+    elapsed_time = TIME_STOP;
+    LOG_WRITE("Putative centers", elapsed_time);
+		
+    TIME_START;
+
+    MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints);
+
 #if !defined(THREAD_FUNNELED)
     #pragma omp parallel for schedule(dynamic)
 #endif
@@ -793,9 +800,10 @@ clusters_t Heuristic1(global_context_t *ctx)
                 // actually is the p-th point
                 int owner = foreign_owner(ctx, jidx);
                 //if local process it
+                idx_t jpos = jidx - ctx -> idx_start;
                 if(owner == ctx -> mpi_rank)
                 {
-                    idx_t jpos = jidx - ctx -> idx_start;
+                    //acquire the lock
                     omp_set_lock(lock_array + jpos);
                     if(i_point.g > to_remove_mask[jpos].value)
                     {
@@ -815,7 +823,13 @@ clusters_t Heuristic1(global_context_t *ctx)
         }
     }
 
+    MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints);
+
     //assemble arrays into a single buffer
+
+    elapsed_time = TIME_STOP;
+    LOG_WRITE("Finding centers to prune", elapsed_time);
+    TIME_START;
     
     idx_t tot_removal = 0;
     for(idx_t p = 0; p < n; ++p)
@@ -963,8 +977,12 @@ clusters_t Heuristic1(global_context_t *ctx)
                    recv_removals, recv_counts, recv_displs, MPI_BYTE, ctx -> mpi_communicator);
 
     // merge into the mask
+
+    elapsed_time = TIME_STOP;
+    LOG_WRITE("Communicating eliminations", elapsed_time);
+    TIME_START;
     
-    #pragma omp parallel for
+    #pragma omp parallel for schedule(dynamic)
     for(idx_t i = 0; i < tot_recv_counts; ++i)
     {
         idx_t el_pos = recv_removals[i].target_id - ctx -> idx_start;
@@ -1046,6 +1064,11 @@ clusters_t Heuristic1(global_context_t *ctx)
     free(lock_array);
     free(recv_removals);
 
+    elapsed_time = TIME_STOP;
+    LOG_WRITE("Merging", elapsed_time);
+
+    TIME_START;
+
     int n_centers = (int)actual_centers.count;
     int tot_centers;
     MPI_Allreduce(&n_centers, &tot_centers, 1, MPI_INT, MPI_SUM, ctx -> mpi_communicator);
@@ -1188,7 +1211,7 @@ clusters_t Heuristic1(global_context_t *ctx)
 
     }
 
-    MPI_Win_unlock_all(win_datapoints);
+    //MPI_Win_unlock_all(win_datapoints);
     MPI_Win_fence(0, win_datapoints);
     MPI_Win_free(&win_datapoints);
 
@@ -1209,6 +1232,8 @@ clusters_t Heuristic1(global_context_t *ctx)
         free(ks);
     #endif
 
+    elapsed_time = TIME_STOP;
+    LOG_WRITE("Cluster assign", elapsed_time);
 
     free(actual_centers.data);
     actual_centers.size  = tot_centers;
-- 
GitLab