diff --git a/check.py b/check.py index cd2e2f7b21e551c6e3562ca862636fef93c57e44..11072fc430b763c80c0af955c519355d7355191f 100644 --- a/check.py +++ b/check.py @@ -7,7 +7,7 @@ from sklearn.neighbors import NearestNeighbors ndims = 5 k = 500 -p = 2 +p = 10 with open("bb/top_nodes.csv","r") as f: l = f.readlines() diff --git a/src/tree/tree.c b/src/tree/tree.c index 6183f7e1b1c684e1c8e3b6f05dfedc148a2cb8d3..061007441c1058e43bc1710b6b7a0300897d8d20 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -1381,6 +1381,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre float_t* __snd_points = (float_t*)malloc(tot_points_snd * (1 + ctx -> dims) * sizeof(float_t)); + /* copy data to send in contiguous memory */ for(int i = 0; i < ctx -> world_size; ++i) { @@ -1404,25 +1405,43 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre int work_batch_stride = 1 + ctx -> dims; - heap_node* __heap_batches_to_snd = (heap_node*)malloc(k * tot_points_rcv * sizeof(heap_node)); - heap_node* __heap_batches_to_rcv = (heap_node*)malloc(k * tot_points_snd * sizeof(heap_node)); + heap_node* __heap_batches_to_snd = (heap_node*)malloc((uint64_t)k * (uint64_t)tot_points_rcv * sizeof(heap_node)); + heap_node* __heap_batches_to_rcv = (heap_node*)malloc((uint64_t)k * (uint64_t)tot_points_snd * sizeof(heap_node)); - rcv_displ[0] = 0; - snd_displ[0] = 0; /* - rcv_count[0] = point_to_rcv_count[0] * k * sizeof(heap_node); - snd_count[0] = point_to_snd_count[0] * k * sizeof(heap_node); + for(int i = 0; i < ctx -> world_size; ++i) + { + if(i == ctx -> mpi_rank) + { + DB_PRINT("[RANK %d]\t", ctx -> mpi_rank); + DB_PRINT("tot point rcv %d tot point send %d \t",tot_points_rcv, tot_points_snd); + DB_PRINT("bytes rcv %lu snd %lu", (uint64_t)k * (uint64_t)tot_points_rcv * sizeof(heap_node), (uint64_t)k * (uint64_t)tot_points_snd * sizeof(heap_node)); + DB_PRINT("\n"); + } + } */ + + if( __heap_batches_to_rcv == NULL) + { + DB_PRINT("Rank %d failed to allocate rcv_heaps %luB required\n",ctx -> mpi_rank, (uint64_t)k * (uint64_t)tot_points_rcv * sizeof(heap_node)); + } + + if( __heap_batches_to_snd == NULL) + { + DB_PRINT("Rank %d failed to allocate snd_heaps %luB required\n",ctx -> mpi_rank, (uint64_t)k * (uint64_t)tot_points_snd * sizeof(heap_node)); + } + + MPI_Barrier(ctx -> mpi_communicator); + + + rcv_displ[0] = 0; + snd_displ[0] = 0; rcv_count[0] = point_to_rcv_count[0]; snd_count[0] = point_to_snd_count[0]; for(int i = 1; i < ctx -> world_size; ++i) { - /* - rcv_count[i] = point_to_rcv_count[i] * k * sizeof(heap_node); - snd_count[i] = point_to_snd_count[i] * k * sizeof(heap_node); - */ rcv_count[i] = point_to_rcv_count[i]; snd_count[i] = point_to_snd_count[i]; @@ -1434,14 +1453,13 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre heap_node** heap_batches_per_node = (heap_node**)malloc(ctx -> world_size * sizeof(heap_node*)); for(int p = 0; p < ctx -> world_size; ++p) { - //heap_batches_per_node[p] = NULL; - //heap_batches_per_node[p] = __heap_batches_to_snd + rcv_displ[p] / sizeof(heap_node); heap_batches_per_node[p] = __heap_batches_to_snd + rcv_displ[p] * k; } /* compute everything */ MPI_DB_PRINT("[MASTER] Working on recieved points\n"); + for(int p = 0; p < ctx -> world_size; ++p) { if(point_to_rcv_count[p] > 0) @@ -1467,7 +1485,9 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre * counts are inverted since I have to recieve as many batches as points I * Have originally sended */ + MPI_DB_PRINT("[MASTER] Sending out results\n"); + /* for(int i = 0; i < ctx -> world_size; ++i) { if(i == ctx -> mpi_rank) @@ -1479,6 +1499,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre DB_PRINT("\n"); } } + */ MPI_Datatype MPI_my_heap; @@ -1821,16 +1842,23 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // std_g0163178_Me14_091_0000 /* 10^6 points ca.*/ - //data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); + data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); /* 10^7 ~ 8M points */ - //data = read_data_file(ctx,"../norm_data/std_g0163178_Me14_091_0001",MY_TRUE); - data = read_data_file(ctx,"../norm_data/std_g5503149_091_0001",MY_TRUE); + + //data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",MY_TRUE); + + //88M BREAKS + //data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE); + + // + //34 M + //data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE); ctx->dims = 5; // ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; - ctx->n_points = ctx->n_points / 2; + //ctx->n_points = ctx->n_points / 2; //ctx -> n_points = ctx -> world_size * 1000; mpi_printf(ctx, "Read %lu points in %u dims\n", ctx->n_points, ctx->dims); }