diff --git a/README.md b/README.md
index a08809c09453dd5aea5bbe47b7d2081913ffa290..7949127e45470faf53f2d4eb06bc4cd3a601eef5 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,12 @@ The suggestion is to run it with one mpi task per socket.
 
 # Todo
 
+ - [ ] H1: implementation of lock free centers elimination
+ - [ ] context: open all windows in a single shot, close them all togheter
+ - [ ] io: curation of IO using mpi IO or other solutions 
+ - [ ] kdtree: optimization an profiling
+ - [ ] prettify overall stdout
  - [x] ~~arugment parser~~
  - [x] ~~H2: graph reduction~~
  - [x] ~~kdtree: implement slim heap~~
- - [ ] prettify overall stdout
- - [ ] H1: implementation of lock free centers elimination
- - [ ] kdtree: optimization an profiling
- - [ ] io: curation of IO using mpi IO or other solutions 
 
diff --git a/src/adp/adp.c b/src/adp/adp.c
index 737bf8f8e1ade09d40387eae525fcee558221f4c..13f1c47675f2e6e521ea32123cc7e0f5158a5a90 100644
--- a/src/adp/adp.c
+++ b/src/adp/adp.c
@@ -595,6 +595,56 @@ void compute_correction(global_context_t* ctx, float_t Z)
 
 }
 
+/* maybe this should return an error?*/
+
+#define LOCK_ACQUIRED 1
+#define LOCK_FREE     0
+
+#define lock_t     int
+#define MPI_LOCK_T MPI_INT
+
+lock_t h1_lock_acquire(global_context_t* ctx, MPI_Win lock_window, int owner, idx_t pos, lock_t state)
+{
+    if(state == LOCK_FREE)
+    {
+        state = LOCK_ACQUIRED;
+
+        lock_t compare = LOCK_FREE;
+        lock_t result  = LOCK_ACQUIRED;
+
+        int err = MPI_SUCCESS;
+
+        while(result == LOCK_ACQUIRED && err == MPI_SUCCESS)
+        {
+            err = MPI_Compare_and_swap(&state, &compare, &result, MPI_LOCK_T, owner, pos, lock_window);
+        }
+
+        if(err != MPI_SUCCESS)
+        {
+            printf("/!\\ Rank %d at line %u\n encountered an error while using MPI_RMA, aborting\n", ctx -> mpi_rank, __LINE__);
+            print_error_code(err);
+            exit(1);
+        }
+
+    }
+    return state;
+                
+}
+
+lock_t h1_lock_free(global_context_t* ctx, MPI_Win lock_window, int owner, idx_t pos, lock_t state)
+{
+    if(state == LOCK_ACQUIRED)
+    {
+        state = LOCK_FREE;
+
+        MPI_Accumulate(&state, 1, MPI_LOCK_T, owner, 
+                        pos,   1, MPI_LOCK_T, MPI_REPLACE, lock_window);
+
+    }
+    return state;
+    
+}
+
 clusters_t Heuristic1(global_context_t *ctx)
 {
     /*
@@ -656,7 +706,7 @@ clusters_t Heuristic1(global_context_t *ctx)
         }
         if(dp_info[i].is_center)
         {
-            #pragma omp critical
+            #pragma omp critical (push_candidate_center)
             {
                 lu_dynamic_array_pushBack(&all_centers, i);
             }
@@ -671,11 +721,14 @@ clusters_t Heuristic1(global_context_t *ctx)
 	 * ends, center, removed centers, and max_rho arrays are populated
 	 */
 		
+    lock_t*    lock_array     = (lock_t*)MY_MALLOC(n * sizeof(lock_t));
 	heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node));
+
     for(idx_t p = 0; p < n; ++p) 
     {
         to_remove_mask[p].array_idx = MY_SIZE_MAX;
         to_remove_mask[p].value = 9999999;
+        lock_array[p] = LOCK_FREE;
     }
     qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP);
 
@@ -684,6 +737,12 @@ clusters_t Heuristic1(global_context_t *ctx)
     MPI_Win_create(to_remove_mask, n * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_to_remove_mask);
     MPI_Win_fence(0, win_to_remove_mask);
 
+    MPI_Win win_locks;
+    MPI_Win_create(lock_array, n * sizeof(lock_t), sizeof(lock_t), MPI_INFO_NULL, ctx -> mpi_communicator, &win_locks);
+    MPI_Win_fence(0, win_locks);
+
+
+
 #if defined(THREAD_FUNNELED)
 #else
     #pragma omp parallel for
@@ -700,22 +759,49 @@ clusters_t Heuristic1(global_context_t *ctx)
 
             if(j_point.is_center && i_point.g > j_point.g)
             {
-                #pragma omp critical
+                /*
+                 *
+                 * TODO: Implement it without this but using private locks
+                 * use an array of locks, and compare and swap to actually gain control of the thing
+                 *
+                 * */
+                int owner = foreign_owner(ctx, jidx);
+                idx_t jpos = jidx - ctx -> rank_idx_start[owner];
+
+                lock_t state = LOCK_FREE;
+
+                state = h1_lock_acquire(ctx, win_locks, owner, jpos, state);
+
+                heap_node mask_element;
+                MPI_Request request;
+                MPI_Rget(&mask_element, sizeof(heap_node), MPI_BYTE, 
+                         owner, jpos * sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request);
+                MPI_Wait(&request, MPI_STATUS_IGNORE);
+
+                int flag = mask_element.array_idx == MY_SIZE_MAX;							
+                if(flag || i_point.g > mask_element.value )
+                {
+                    heap_node tmp_mask_element = {.array_idx = i_point.array_idx, .value = i_point.g};
+                    MPI_Request request;
+                    MPI_Rput(&tmp_mask_element, sizeof(heap_node), MPI_BYTE, owner, 
+                            jpos*sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request);
+                    MPI_Wait(&request, MPI_STATUS_IGNORE);
+
+                }
+
+                state = h1_lock_free(ctx, win_locks, owner, jpos, state);
+
+                /*
+                #pragma omp critical (h1_centers_elimination)
                 {
-                    /*
-                     *
-                     * TODO: Implement it without this but using private locks
-                     * use an array of locks, and compare and swap to actually gain control of the thing
-                     *
-                     * */
                     int owner = foreign_owner(ctx, jidx);
                     idx_t jpos = jidx - ctx -> rank_idx_start[owner];
 
                     MPI_Win_lock(MPI_LOCK_EXCLUSIVE, owner, 0, win_to_remove_mask);
                     heap_node mask_element;
                     MPI_Request request;
-                    MPI_Rget(   &mask_element, sizeof(heap_node), MPI_BYTE, 
-                            owner, jpos * sizeof(heap_node) , sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request);
+                    MPI_Rget(&mask_element, sizeof(heap_node), MPI_BYTE, 
+                             owner, jpos * sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request);
                     MPI_Wait(&request, MPI_STATUS_IGNORE);
 
                     int flag = mask_element.array_idx == MY_SIZE_MAX;							
@@ -731,11 +817,13 @@ clusters_t Heuristic1(global_context_t *ctx)
 
                     MPI_Win_unlock(owner, win_to_remove_mask);
                 }
+                */
             }
         }
     }
     
     MPI_Win_fence(0, win_to_remove_mask);
+    MPI_Win_fence(0, win_locks);
     MPI_Barrier(ctx -> mpi_communicator);
 
 	/* populate the usual arrays */
@@ -784,6 +872,9 @@ clusters_t Heuristic1(global_context_t *ctx)
 
     MPI_Win_free(&win_to_remove_mask);
 	free(to_remove_mask);
+    
+    MPI_Win_free(&win_locks);
+	free(lock_array);
 
     int n_centers = (int)actual_centers.count;
     int tot_centers;
diff --git a/src/main/main.c b/src/main/main.c
index a4d797b93f1b8d6848cc8b46f5a2b0ebe87b6bea..8c90cfee2667a35126dd0b26b865511c4dd3f6d5 100644
--- a/src/main/main.c
+++ b/src/main/main.c
@@ -183,8 +183,6 @@ int main(int argc, char** argv) {
     //parse command line
     
     parse_args(&ctx, argc, argv);
-    printf("DIO\n");
-
 	/*
 	 * Generate a random matrix of lenght of some kind
 	 */