diff --git a/src/tree/tree.c b/src/tree/tree.c
index 10807c1f0f6b5f54a5bd82af837a5e1ca160e13c..167791e35541cce1c6dafcdf7137a8a0c0883130 100644
--- a/src/tree/tree.c
+++ b/src/tree/tree.c
@@ -1788,8 +1788,6 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
         }
     }
     
-    HERE
-
     /* sendout results */
 
     /* 
@@ -2665,14 +2663,18 @@ datapoint_info_t find_possibly_halo_datapoint_rma(global_context_t* ctx, idx_t i
     }
     else
     {
-        idx_t i = idx - ctx -> rank_idx_start[owner];
         datapoint_info_t tmp_dp;
-        MPI_Request request;
-        MPI_Status status;
+        #pragma omp critical
+        {
+            idx_t i = idx - ctx -> rank_idx_start[owner];
+            MPI_Request request;
+            MPI_Status status;
 
-        MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner,
-                i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
-        MPI_Wait(&request, MPI_STATUS_IGNORE);
+            MPI_Rget(&tmp_dp, sizeof(datapoint_info_t), MPI_BYTE, owner,
+                    i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
+            MPI_Wait(&request, MPI_STATUS_IGNORE);
+
+        }
 
         return tmp_dp;         
     }                 
@@ -3453,6 +3455,8 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
      *                                                            
      * args:                                                      
      */
+
+    HERE
     datapoint_info_t* dp_info = ctx -> local_datapoints;
     idx_t n = ctx -> local_n_points; 
 
@@ -3494,6 +3498,7 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
                    1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_datapoints);
     MPI_Win_fence(0, win_datapoints);
     MPI_Win_lock_all(0,  win_datapoints);
+    HERE
 
 #if defined(THREAD_FUNNELED)
 #else
@@ -3527,10 +3532,15 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
         if(dp_info[i].is_center)
         {
             //lu_dynamic_array_pushBack(&all_centers, dp_info[i].array_idx);
-            lu_dynamic_array_pushBack(&all_centers, i);
+            #pragma omp critical
+            {
+                lu_dynamic_array_pushBack(&all_centers, i);
+            }
         }
     }
 
+    HERE
+
     if(verbose)
 	{
 		clock_gettime(CLOCK_MONOTONIC, &finish);
@@ -3596,7 +3606,7 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
 
             if(j_point.is_center && i_point.g > j_point.g)
             {
-                //#pragma omp critical
+                #pragma omp critical
                 {
                     int owner = foreign_owner(ctx, jidx);
                     idx_t jpos = jidx - ctx -> rank_idx_start[owner];
@@ -3626,6 +3636,8 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
     }
     
     MPI_Win_fence(0, win_to_remove_mask);
+    MPI_Barrier(ctx -> mpi_communicator);
+    HERE
 
 	/* populate the usual arrays */
     for(idx_t p = 0; p < all_centers.count; ++p)
@@ -3668,6 +3680,9 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
                 break;
         }
     }
+
+    HERE
+    MPI_Win_free(&win_to_remove_mask);
 	free(to_remove_mask);
 
     if(verbose)
@@ -3834,6 +3849,8 @@ clusters_t Heuristic1_rma(global_context_t *ctx, int verbose)
     MPI_Win_fence(0, win_datapoints);
     MPI_Win_free(&win_datapoints);
 
+    MPI_Barrier(ctx -> mpi_communicator);
+
     if(verbose)
 	{
 		clock_gettime(CLOCK_MONOTONIC, &finish);
@@ -4743,7 +4760,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
         // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE);
         
         /* 1M points ca.*/
-        data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE);
+        // data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE);
 
         /* BOX */
         // data = read_data_file(ctx,"../norm_data/std_Box_256_30_092_0000",MY_TRUE);
@@ -4753,15 +4770,15 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
         // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",MY_TRUE);
 
         //88M 
-        // data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE);
+        data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE);
 
         //
         //34 M
         // data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE);
         ctx->dims = 5;
 
-        ctx -> n_points = 5 * 100000;
-        //ctx->n_points = ctx->n_points / ctx->dims;
+        //ctx -> n_points = 5 * 100000;
+        ctx->n_points = ctx->n_points / ctx->dims;
         //ctx->n_points = (ctx->n_points * 5) / 10;
         // ctx -> n_points = ctx -> world_size * 1000;