diff --git a/README.md b/README.md
index 8ee631970a9df371db758a7b30f93093331bcf66..a08809c09453dd5aea5bbe47b7d2081913ffa290 100644
--- a/README.md
+++ b/README.md
@@ -17,9 +17,9 @@ The suggestion is to run it with one mpi task per socket.
 
  - [x] ~~arugment parser~~
  - [x] ~~H2: graph reduction~~
+ - [x] ~~kdtree: implement slim heap~~
  - [ ] prettify overall stdout
  - [ ] H1: implementation of lock free centers elimination
- - [ ] kdtree: implement slim heap
  - [ ] kdtree: optimization an profiling
  - [ ] io: curation of IO using mpi IO or other solutions 
 
diff --git a/src/adp/adp.c b/src/adp/adp.c
index 8544b57a6c45f1c74a42bced150aaa9806384041..737bf8f8e1ade09d40387eae525fcee558221f4c 100644
--- a/src/adp/adp.c
+++ b/src/adp/adp.c
@@ -55,7 +55,7 @@ float_t compute_ID_two_NN_ML(global_context_t* ctx, datapoint_info_t* dp_info, i
     float_t log_mus = 0.;
     for(idx_t i = 0; i < n; ++i)
     {
-        log_mus += 0.5 * log(dp_info[i].ngbh.data[2].value/dp_info[i].ngbh.data[1].value);
+        log_mus += 0.5 * log(dp_info[i].ngbh[2].value/dp_info[i].ngbh[1].value);
     }
 
     float_t d = 0;
@@ -82,7 +82,7 @@ float_t get_j_ksel_dist(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win expo
     if(owner == ctx -> mpi_rank)
     {
         idx_t pos = j - ctx -> idx_start;
-        return ctx -> local_datapoints[pos].ngbh.data[ksel].value;
+        return ctx -> local_datapoints[pos].ngbh[ksel].value;
     }
     else
     {
@@ -103,7 +103,7 @@ idx_t get_j_ksel_idx(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win exposed
     if(owner == ctx -> mpi_rank)
     {
         idx_t pos = j - ctx -> idx_start;
-        return ctx -> local_datapoints[pos].ngbh.data[ksel].array_idx;
+        return ctx -> local_datapoints[pos].ngbh[ksel].array_idx;
     }
     else
     {
@@ -132,7 +132,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
 
 
     MPI_Barrier(ctx -> mpi_communicator);
-    idx_t k = ctx -> local_datapoints[0].ngbh.N;
+    idx_t k = ctx -> k;
 
     struct timespec start_tot, finish_tot;
     double elapsed_tot;
@@ -145,7 +145,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
 		clock_gettime(CLOCK_MONOTONIC, &start_tot);
 	}
 
-    idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1;   
+    idx_t kMAX = ctx -> k - 1;   
 
     float_t omega = 0.;  
     if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);}  
@@ -174,7 +174,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
     {
         for(idx_t k = 0; k <= kMAX; ++k)
         {
-            local_datapoints[i].ngbh.data[k].value = omega * pow(local_datapoints[i].ngbh.data[k].value, d/2.);  
+            local_datapoints[i].ngbh[k].value = omega * pow(local_datapoints[i].ngbh[k].value, d/2.);  
         }
         //initialize kstar at 0
         local_datapoints[i].kstar = 0;
@@ -191,8 +191,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
             //request data
             idx_t ksel = j - 1;
 
-#if defined(THREAD_FUNNELED)
-#else
+#if !defined(THREAD_FUNNELED)
             #pragma omp parallel for
 #endif
 
@@ -203,7 +202,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
                 {
                     //vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.);
 
-                    idx_t jj = local_datapoints[i].ngbh.data[j].array_idx;
+                    idx_t jj = local_datapoints[i].ngbh[j].array_idx;
 
                     /* 
                      * note jj can be an halo point 
@@ -215,7 +214,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
 
                     if(owner == ctx -> mpi_rank)
                     {
-                        scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh.data[ksel];
+                        scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh[ksel];
                     }
                     else
                     {
@@ -234,8 +233,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
 
             MPI_Win_fence(MPI_MODE_NOPUT,exposed_ngbh); 
             //process data
-#if defined(THREAD_FUNNELED)
-#else
+#if !defined(THREAD_FUNNELED)
             #pragma omp parallel for
 #endif
             for(idx_t i = 0; i < ctx -> local_n_points; ++i)
@@ -243,7 +241,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
                 if(ctx -> local_datapoints[i].kstar == 0)
                 {
                     float_t vvi, vvj, vp, dL;
-                    vvi = local_datapoints[i].ngbh.data[ksel].value;
+                    vvi = local_datapoints[i].ngbh[ksel].value;
                     vvj = scratch_heap_nodes[i].value;
                     vp = (vvi + vvj)*(vvi + vvj);
                     dL = -2.0 * ksel * log(4.*vvi*vvj/vp);
@@ -278,7 +276,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
             {
                 idx_t k = kMAX - 1;
 
-                float_t vvi = ctx -> local_datapoints[i].ngbh.data[k].value;
+                float_t vvi = ctx -> local_datapoints[i].ngbh[k].value;
 
                 local_datapoints[i].kstar = k;
                 local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points)));
@@ -326,7 +324,7 @@ float_t get_j_ksel_dist_v2(global_context_t* ctx, idx_t i, idx_t j, idx_t ksel,
         if(owner == ctx -> mpi_rank)
         {
             idx_t pos = j - ctx -> idx_start;
-            return ctx -> local_datapoints[pos].ngbh.data[ksel].value;
+            return ctx -> local_datapoints[pos].ngbh[ksel].value;
         }
         else
         {
@@ -646,10 +644,9 @@ clusters_t Heuristic1(global_context_t *ctx)
         dp_info[i].is_center = 1;
         dp_info[i].cluster_idx = -1;
         //printf("%lf\n",p -> g);
-        heap i_ngbh = dp_info[i].ngbh;
         for(idx_t k = 1; k < maxk; ++k)
         {
-            idx_t ngbh_index = i_ngbh.data[k].array_idx;
+            idx_t ngbh_index = dp_info[i].ngbh[k].array_idx;
             datapoint_info_t dj = find_possibly_halo_datapoint_rma(ctx, ngbh_index, win_datapoints);
             float_t gj = dj.g;
             if(gj > gi){
@@ -697,7 +694,7 @@ clusters_t Heuristic1(global_context_t *ctx)
 
         for(idx_t j = 1; j < i_point.kstar + 1; ++j)
         {
-            idx_t jidx = i_point.ngbh.data[j].array_idx; 
+            idx_t jidx = i_point.ngbh[j].array_idx; 
 
             datapoint_info_t j_point = find_possibly_halo_datapoint_rma(ctx, jidx,  win_datapoints);
 
@@ -863,7 +860,7 @@ clusters_t Heuristic1(global_context_t *ctx)
                 int cluster = -1;
                 idx_t k = 0;
                 idx_t p_idx;
-                idx_t max_k = p -> ngbh.N;
+                idx_t max_k = ctx -> k;
                 /*assign each particle at the same cluster as the nearest particle of higher density*/
                 datapoint_info_t p_retrieved;
                 while( k < max_k - 1 && cluster == -1)
@@ -871,7 +868,7 @@ clusters_t Heuristic1(global_context_t *ctx)
 
 
                     ++k;
-                    p_idx = p -> ngbh.data[k].array_idx;
+                    p_idx = p -> ngbh[k].array_idx;
                     p_retrieved = find_possibly_halo_datapoint_rma(ctx, p_idx, win_datapoints);
 
                     int flag = p_retrieved.g > p -> g;
@@ -890,7 +887,7 @@ clusters_t Heuristic1(global_context_t *ctx)
                     idx_t gm_index = SIZE_MAX;
                     for(idx_t k = 0; k < max_k; ++k)
                     {
-                        idx_t ngbh_index = p -> ngbh.data[k].array_idx;
+                        idx_t ngbh_index = p -> ngbh[k].array_idx;
                         for(idx_t m = 0; m < removed_centers.count; ++m)
                         {
                             idx_t max_rho_idx = max_rho.data[m];
@@ -1006,7 +1003,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster)
 	}
 
     idx_t nclus = cluster->centers.count; 
-    idx_t max_k = dp_info[0].ngbh.N;
+    idx_t max_k = ctx -> k;
 
     for(idx_t i = 0; i < n; ++i)
     {
@@ -1018,7 +1015,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster)
             for(idx_t k = 1; k < dp_info[i].kstar + 1; ++k)
             {
                 /*index of the kth ngbh of n*/
-                idx_t j = dp_info[i].ngbh.data[k].array_idx;
+                idx_t j = dp_info[i].ngbh[k].array_idx;
                 pp = NOBORDER;
                 /*Loop over kn neigbhours to find if n is the nearest*/
                 /*if cluster of the particle in nbhg is c then check is neighborhood*/                                                
diff --git a/src/common/common.h b/src/common/common.h
index c1df3b6d6a66b521fa574e853d70739a30879d1a..d9bdc7e799df5bcc60a1a8488d7a05002e168beb 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -125,7 +125,7 @@ typedef struct datapoint_info_t {
     /*
      * datapoint object 
      */
-    heap ngbh;              //heap object stores nearest neighbors of each point
+    heap_node* ngbh;        //heap object stores nearest neighbors of each point
     int is_center;          //flag signaling if a point is a cluster center
     int cluster_idx;        //cluster index
     idx_t array_idx;        //global index of the point in the dataset
diff --git a/src/main/main.c b/src/main/main.c
index 2c09e47f2652b25ea2b6e184a091c5eb4ed8addb..a4d797b93f1b8d6848cc8b46f5a2b0ebe87b6bea 100644
--- a/src/main/main.c
+++ b/src/main/main.c
@@ -384,9 +384,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
     datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t));            
     for(uint64_t i = 0; i < ctx -> local_n_points; ++i)
     {
-        dp_info[i].ngbh.data = NULL;
-        dp_info[i].ngbh.N = 0;
-        dp_info[i].ngbh.count = 0;
+        dp_info[i].ngbh = NULL;
         dp_info[i].g = 0.f;
         dp_info[i].log_rho = 0.f;
         dp_info[i].log_rho_c = 0.f;
diff --git a/src/tree/tree.c b/src/tree/tree.c
index f66c6f2687c3e04d3e65d7e4fcff40e98114f404..12f181e3ec0ba4068f08e027c594c145df53622a 100644
--- a/src/tree/tree.c
+++ b/src/tree/tree.c
@@ -1483,17 +1483,19 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
     {
         idx_t idx = local_tree -> _nodes[p].array_idx;
         /* actually we want to preserve the heap to then insert guesses from other nodes */
-        dp_info[idx].ngbh.data  = ctx -> __local_heap_buffers + k * idx;
-        dp_info[idx].ngbh.count = 0;
-        dp_info[idx].ngbh.N     = k;
+        heap tmp_heap;
+        tmp_heap.data  = ctx -> __local_heap_buffers + k * idx;
+        tmp_heap.count = 0;
+        tmp_heap.N     = k;
 
         //dp_info[idx].ngbh = knn_kdtree_v2_no_heapsort(local_tree -> _nodes[p].data, local_tree -> root, k);
-        knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &(dp_info[idx].ngbh));
+        knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &tmp_heap);
 
-        convert_heap_idx_to_global(ctx, &(dp_info[idx].ngbh));
+        convert_heap_idx_to_global(ctx, &tmp_heap);
         dp_info[idx].cluster_idx = -1;
         dp_info[idx].is_center = 0;
         dp_info[idx].array_idx = idx + ctx -> idx_start;
+        dp_info[idx].ngbh = tmp_heap.data;
     }
     elapsed_time = TIME_STOP;
     LOG_WRITE("Local neighborhood search", elapsed_time);
@@ -1517,7 +1519,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
     #pragma omp parallel for
     for(int i = 0; i < ctx -> local_n_points; ++i)
     {
-        float_t max_dist = dp_info[i].ngbh.data[0].value;
+        float_t max_dist = dp_info[i].ngbh[0].value;
         float_t* point   = ctx -> local_data + (i * ctx -> dims);
         
         tree_walk_v2_find_n_points(ctx, top_tree -> root, i, max_dist, point, point_to_snd_capacity);
@@ -1537,7 +1539,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
     #pragma omp parallel for
     for(int i = 0; i < ctx -> local_n_points; ++i)
     {
-        float_t max_dist = dp_info[i].ngbh.data[0].value;
+        float_t max_dist = dp_info[i].ngbh[0].value;
         float_t* point   = ctx -> local_data + (i * ctx -> dims);
 
         tree_walk_v2_append_points(ctx, top_tree -> root, i, max_dist, point, data_to_send_per_proc, local_idx_of_the_point, point_to_snd_count);
@@ -1832,7 +1834,11 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
             /* insert the points into the heap */
             for(int j = 0; j < k; ++j)
             {
-                insert_max_heap(&(dp_info[idx].ngbh), H.data[j].value, H.data[j].array_idx);
+                heap tmp_heap;
+                tmp_heap.N     = k;
+                tmp_heap.count = k;
+                tmp_heap.data  = dp_info[idx].ngbh;
+                insert_max_heap(&(tmp_heap), H.data[j].value, H.data[j].array_idx);
             }
         }
 
@@ -1858,7 +1864,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
     #pragma omp parallel for
     for(int i = 0; i < ctx -> local_n_points; ++i)
     {
-        heap_sort(&(dp_info[i].ngbh));
+        heap tmp_heap;
+        tmp_heap.N     = k;
+        tmp_heap.count = k;
+        tmp_heap.data  = dp_info[i].ngbh;
+
+        heap_sort(&(tmp_heap));
     }
 
     elapsed_time = TIME_STOP;