diff --git a/.gitignore b/.gitignore
index 91f70de24db5bb5940c9816e892d4272f96ed2f4..6cf8f6c3c7c26eea530764668c5943e8362f94a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ main
 sync.sh
 bb
 .ipy*
+scalability_results
diff --git a/src/tree/heap.c b/src/tree/heap.c
index 57afac7960cbb3b56cc1b3d0d5a5a51e9c331631..abe8e8945f7b0728375240ef284516105d927105 100644
--- a/src/tree/heap.c
+++ b/src/tree/heap.c
@@ -33,6 +33,7 @@ void heapify_max_heap(heap* H, idx_t node){
     /*
     Found gratest between children of node and boundcheck if the node is a leaf 
     */
+    
 	while(1)
 	{
 		largest = 	(HEAP_LCH(nn) < H -> N) && 
@@ -50,22 +51,25 @@ void heapify_max_heap(heap* H, idx_t node){
 			break;
 		}
 	}
+    
 
-    //if(HEAP_LCH(node) < H -> N){
-    //    //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node);
-	//	largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest;
-    //}
-    //if(HEAP_RCH(node) < H -> N){
-    //    //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node);
-	//	largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest;
-    //}
-    //if(largest == node){
-    //    return;
-    //}
-    //else{
-    //    swap_heap_node(H -> data + node, H -> data + largest);
-    //    heapify_max_heap(H, largest);
-    //}
+    /*
+    if(HEAP_LCH(node) < H -> N){
+        //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node);
+		largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest;
+    }
+    if(HEAP_RCH(node) < H -> N){
+        //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node);
+		largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest;
+    }
+    if(largest == node){
+        return;
+    }
+    else{
+        swap_heap_node(H -> data + node, H -> data + largest);
+        heapify_max_heap(H, largest);
+    }
+    */
 }
 
 
@@ -126,27 +130,27 @@ void insert_max_heap(heap * H,const FLOAT_TYPE val,const idx_t array_idx){
 	int ctot = c1 + 2*c2;
 	switch (ctot) {
 		case 1:
-		{
-				idx_t node = H->count;
-				++(H -> count);
-				H -> data[node].value = val;
-				H -> data[node].array_idx = array_idx;
-				/*
-				* Push up the node through the heap 
-				*/
-				while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value)
-				{
-					swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node));
-					node = HEAP_PARENT(node);
-					//if(node == 0) break;
-			}
-		}
-		break;
+            {
+                idx_t node = H->count;
+                ++(H -> count);
+                H -> data[node].value = val;
+                H -> data[node].array_idx = array_idx;
+                /*
+                * Push up the node through the heap 
+                */
+                while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value)
+                {
+                    swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node));
+                    node = HEAP_PARENT(node);
+                    //if(node == 0) break;
+                }
+            }
+	    	break;
 
 		case 2: 
-		{
-			set_root_max_heap(H,val,array_idx);
-		}
+            {
+                set_root_max_heap(H,val,array_idx);
+            }
 			break;
 		default:
 			break;
diff --git a/src/tree/kdtreeV2.c b/src/tree/kdtreeV2.c
index aa0f21ee052dd09a87a1fb177598f8ac2508aa21..51a88c375488038d2a57460ec6018463e7fa4922 100644
--- a/src/tree/kdtreeV2.c
+++ b/src/tree/kdtreeV2.c
@@ -195,12 +195,14 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare
 		}
 	}
 	
+    /*
 	#ifdef SWMEM	
 		if(parent == NULL)
 		{
 			swapMem_kdv2 = (FLOAT_TYPE*)malloc(sizeof(FLOAT_TYPE)*data_dims);
 		}
 	#endif
+    */
 	
 	
 	
@@ -257,12 +259,14 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare
         n->level = level;
     }
 	
+    /*
 	#ifdef SWMEM
 		if(parent == NULL)
 		{
 			swapMem_kdv2 = malloc(sizeof(FLOAT_TYPE)*data_dims);
 		}
 	#endif
+    */
 
     return n;
 }
@@ -359,6 +363,9 @@ void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned
 
 void kdtree_v2_free(kdtree_v2* tree)
 {
+    for(uint64_t i = 0; i < tree->n_nodes; ++i) 
+        if(tree -> _nodes[i].node_list.data) free(tree -> _nodes[i].node_list.data);
+
 	free(tree -> _nodes);
 }
 
diff --git a/src/tree/tree.c b/src/tree/tree.c
index 4af8fa8d2d89cf1bf4ffd6687c7db06b90475760..9ce69440ad6bf2e1a7ee6bb2f7b0a1653b2b5bcd 100644
--- a/src/tree/tree.c
+++ b/src/tree/tree.c
@@ -692,10 +692,24 @@ int compute_n_nodes(int n)
 void top_tree_init(global_context_t *ctx, top_kdtree_t *tree) 
 {
 	/* we want procs leaves */
-	int l = (int)(ceil(log2((float_t)ctx -> world_size)));	
-	int tree_nodes = (1 << (l + 1)) - 1;
+    int l = (int)(ceil(log2((float_t)ctx -> world_size)));	
+    int tree_nodes = (1 << (l + 1)) - 1;
+	//int tree_nodes = compute_n_nodes(ctx -> world_size);	
 	//MPI_DB_PRINT("Tree nodes %d %d %d %d\n", ctx -> world_size,l, tree_nodes, compute_n_nodes(ctx -> world_size));
 	tree->_nodes  	= (top_kdtree_node_t*)malloc(tree_nodes * sizeof(top_kdtree_node_t));
+    for(int i = 0; i < tree_nodes; ++i)
+    {
+        tree -> _nodes[i].lch = NULL;
+        tree -> _nodes[i].rch = NULL;
+        tree -> _nodes[i].parent = NULL;
+        tree -> _nodes[i].owner = -1;
+        tree -> _nodes[i].n_points = 0;
+        tree -> _nodes[i].split_dim = -1;
+        tree -> _nodes[i].split_val = 0.f;
+        tree -> _nodes[i].lb_node_box = NULL;
+        tree -> _nodes[i].ub_node_box = NULL;
+
+    }
 	tree->_capacity = tree_nodes;
 	tree->dims 		= ctx->dims;
 	tree->count 	= 0;
@@ -718,6 +732,7 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t
 	top_kdtree_node_t* ptr = tree -> _nodes + tree -> count;
 	ptr -> lch = NULL;
 	ptr -> rch = NULL;
+    ptr -> parent = NULL;
 	ptr -> lb_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));
 	ptr -> ub_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));
 	ptr -> owner 	   = -1;
@@ -1131,13 +1146,13 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree)
 	ctx -> local_data = rcvbuffer;
 
 	/* check exchange */
-    /*
+    
 	for(size_t i = 0; i < ctx -> local_n_points; ++i)
 	{
 		int o = compute_point_owner(ctx, tree, ctx -> local_data + (i * ctx -> dims));
 		if(o != ctx -> mpi_rank) DB_PRINT("rank %d got an error\n",ctx -> mpi_rank);
 	}
-    */
+
 	free(points_owners);
 	free(points_per_proc);
 	free(partition_offset);
@@ -1176,7 +1191,6 @@ void tree_walk(
 		int* point_to_send_count, 
 		int* point_to_send_capacity)
 {
-
 	if(root -> owner != -1 && root -> owner != ctx -> mpi_rank)
 	{
         
@@ -1675,6 +1689,13 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
 		if(local_idx_of_the_point[i]) free(local_idx_of_the_point[i]);
 	}
 
+    for(int i = 0; i < ctx -> local_n_points; ++i)
+    {
+        free(dp_info[i].ngbh.data);
+    }
+
+    free(data_to_send_per_proc);
+    free(local_idx_of_the_point);
     free(heap_batches_per_node);
     free(rcv_heap_batches);
     free(rcv_work_batches);
@@ -1786,10 +1807,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
     
         // 190M points
         // std_g2980844_091_0000
-		data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE);
+		 data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE);
         
         /* 1M points ca.*/
-		//data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE);
+		// data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE);
 
         /* 8M points */
         
@@ -1805,8 +1826,8 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
 
 		// ctx -> n_points = 48*5*2000;
 		ctx->n_points = ctx->n_points / ctx->dims;
-		ctx->n_points = (ctx->n_points * 10) / 10;
-        //ctx -> n_points = ctx -> world_size * 1000;
+		ctx->n_points = (ctx->n_points * 10 ) / 10;
+        // ctx -> n_points = ctx -> world_size * 1000;
 		mpi_printf(ctx, "Read %lu points in %u dims\n", ctx->n_points, ctx->dims);
 	}
 	
@@ -1870,8 +1891,24 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
 	kdtree_v2 local_tree;
 	kdtree_v2_init( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims);
 	int k = 300;
+	//int k = 30;
 
 	datapoint_info_t* dp_info = (datapoint_info_t*)malloc(ctx -> local_n_points * sizeof(datapoint_info_t));			
+    /* initialize, to cope with valgrind */
+    for(uint64_t i = 0; i < ctx -> local_n_points; ++i)
+    {
+        dp_info[i].ngbh.data = NULL;
+        dp_info[i].ngbh.N = 0;
+        dp_info[i].ngbh.count = 0;
+        dp_info[i].g = 0.f;
+        dp_info[i].log_rho = 0.f;
+        dp_info[i].log_rho_c = 0.f;
+        dp_info[i].log_rho_err = 0.f;
+        dp_info[i].array_idx = -1;
+        dp_info[i].kstar = -1;
+        dp_info[i].is_center = -1;
+        dp_info[i].cluster_idx = -1;
+    }
 
     TIME_START
 	build_local_tree(ctx, &local_tree);