diff --git a/src/common/common.c b/src/common/common.c index 452e8df912ab2a4b1f50f8cc9dc47136b749feb3..5da59fa0e040c87f336ab7e253cd91397411d52d 100644 --- a/src/common/common.c +++ b/src/common/common.c @@ -10,6 +10,8 @@ void get_context(global_context_t* ctx) ctx -> local_data = NULL; ctx -> lb_box = NULL; ctx -> ub_box = NULL; + ctx -> rank_n_points = (int*)malloc(ctx -> world_size * sizeof(int)); + ctx -> rank_idx_start = (int*)malloc(ctx -> world_size * sizeof(int)); } void free_context(global_context_t* ctx) @@ -31,7 +33,8 @@ void free_context(global_context_t* ctx) free(ctx -> lb_box); ctx -> lb_box = NULL; } - + free(ctx -> rank_n_points); + free(ctx -> rank_idx_start); } void free_pointset(pointset_t* ps) diff --git a/src/common/common.h b/src/common/common.h index ded8368aa0d8d1c38eb1c2fdd92e9484b5c894aa..6b84cde46d1f180879fdd2fcdd6f921f4ffdd282 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -92,16 +92,18 @@ struct global_context_t { - size_t n_points; - size_t idx_start; - size_t local_n_points; - uint32_t dims; + int world_size; + int mpi_rank; + int __processor_name_len; float_t* local_data; float_t* lb_box; float_t* ub_box; - int world_size; - int mpi_rank; - int __processor_name_len; + size_t n_points; + size_t idx_start; + size_t local_n_points; + uint32_t dims; + int* rank_idx_start; + int* rank_n_points; char processor_mame[MPI_MAX_PROCESSOR_NAME]; MPI_Comm mpi_communicator; }; diff --git a/src/tree/tree.c b/src/tree/tree.c index bca429f4891b3fe6b914243ccc683a70707da4a8..bd1b165b7a36b76006393a07f179b850ee9fb0e1 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -1152,7 +1152,12 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { ctx -> idx_start += ppp[i]; } - //DB_PRINT("rank %d start %lu\n", ctx -> mpi_rank, ctx -> idx_start); + + /* find slices of indices */ + for(int i = 0; i < ctx -> world_size; ++i) ctx -> rank_n_points[i] = ppp[i]; + + ctx -> rank_idx_start[0] = 0; + for(int i = 1; i < ctx -> world_size; ++i) ctx -> rank_idx_start[i] = ppp[i - 1] + ctx -> rank_idx_start[i - 1]; /* free prv pointer */ free(ppp); @@ -1167,6 +1172,8 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) if(o != ctx -> mpi_rank) DB_PRINT("rank %d got an error\n",ctx -> mpi_rank); } + + free(points_owners); free(points_per_proc); free(partition_offset); @@ -1718,20 +1725,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre MPI_Barrier(ctx -> mpi_communicator); - - - - for(int i = 0; i < ctx -> world_size; ++i) { if(data_to_send_per_proc[i]) free(data_to_send_per_proc[i]); if(local_idx_of_the_point[i]) free(local_idx_of_the_point[i]); } - for(int i = 0; i < ctx -> local_n_points; ++i) - { - free(dp_info[i].ngbh.data); - } free(data_to_send_per_proc); free(local_idx_of_the_point); @@ -1826,10 +1825,104 @@ void ordered_data_to_file(global_context_t* ctx) } MPI_Barrier(ctx -> mpi_communicator); - gg } +static inline int foreign_owner(global_context_t* ctx, idx_t idx) +{ + int owner; + if( idx >= ctx -> idx_start && idx < ctx -> idx_start + ctx -> local_n_points) + { + return ctx -> mpi_rank; + } + + for(int i = 0; i < ctx -> world_size; ++i) + { + owner = i; + if( idx >= ctx -> rank_idx_start[i] && idx < ctx -> rank_idx_start[i] + ctx -> rank_n_points[i]) break; + } + return owner; +} + +static inline void push_on_foreign_idx_list(idx_t element, int owner, int* counts, int* capacities, idx_t** lists) +{ + + if(capacities[owner] == counts[owner]) + { + int new_cap = capacities[owner] * 1.1; + lists[owner] = (idx_t*)realloc(lists[owner], new_cap * sizeof(idx_t)); + capacities[owner] = new_cap; + } + + + /* find the plausible place */ + int idx_to_insert = counts[owner]; + + int flag = 1; + /* check if point is already inserted */ + for(int i = 0; i < idx_to_insert; ++i) + { + flag = flag && lists[owner][i] != element; + } + /* if this is the case insert it */ + + if(flag) + { + counts[owner]++; + lists[owner][idx_to_insert] = element; + } + +} + +void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp) +{ + int k = dp[0].ngbh.count; + + idx_t** array_indexes_to_request = (idx_t**)malloc(ctx -> world_size * sizeof(idx_t*)); + int* count_to_request = (int*)malloc(ctx -> world_size * sizeof(int)); + int* capacities = (int*)malloc(ctx -> world_size * sizeof(int)); + + for(int i = 0; i < ctx -> world_size; ++i) + { + array_indexes_to_request[i] = (idx_t*)malloc(100 * sizeof(idx_t)); + capacities[i] = 100; + count_to_request[i] = 0; + } + + #pragma omp parallel for + for(uint32_t i = 0; i < ctx -> local_n_points; ++i) + { + for(int j = 0; j < k; ++j) + { + idx_t element = dp[i].ngbh.data[j].array_idx; + int owner = foreign_owner(ctx, element); + //DB_PRINT("%lu %d\n", element, owner); + + if(owner != ctx -> mpi_rank) + { + //DB_PRINT("Hehe"); + #pragma omp critical + { + push_on_foreign_idx_list(element, owner, count_to_request, capacities, array_indexes_to_request); + } + } + } + } + + DB_PRINT("[RANK %d elements to request]", ctx -> mpi_rank); + for(int i = 0; i < ctx -> world_size; ++i) + { + DB_PRINT("%d\t", count_to_request[i]); + } + DB_PRINT("\n"); + + + + for(int i = 0; i < ctx -> world_size; ++i) free(array_indexes_to_request[i]); + free(array_indexes_to_request); + free(count_to_request); + free(capacities); +} void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) { @@ -1846,10 +1939,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // 190M points // std_g2980844_091_0000 - data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE); + // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",MY_TRUE); /* 1M points ca.*/ - // data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); + data = read_data_file(ctx,"../norm_data/std_LR_091_0001",MY_TRUE); /* 8M points */ @@ -1865,7 +1958,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) //ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; - ctx->n_points = (ctx->n_points * 6) / 10; + ctx->n_points = (ctx->n_points * 5) / 10; // ctx -> n_points = ctx -> world_size * 1000; //ctx -> n_points = 10000000 * ctx -> world_size; @@ -1967,11 +2060,18 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search", elapsed_time) - + + find_foreign_nodes(ctx, dp_info); + #if defined (WRITE_NGBH) ordered_data_to_file(ctx); #endif + + for(int i = 0; i < ctx -> local_n_points; ++i) + { + free(dp_info[i].ngbh.data); + } top_tree_free(ctx, &tree); @@ -1986,4 +2086,5 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.data = NULL; free_pointset(&original_ps); free(global_bin_counts_int); + } diff --git a/src/tree/tree.h b/src/tree/tree.h index 6b38c79bc63669c35cef78c7c0e9bcbfe8f4eef8..39086a27ac49a21375c2042c21cd5a6575c75565 100644 --- a/src/tree/tree.h +++ b/src/tree/tree.h @@ -83,8 +83,8 @@ typedef struct top_kdtree_t typedef struct datapoint_info_t { + idx_t array_idx; heap ngbh; - idx_t array_idx; float_t g; float_t log_rho; float_t log_rho_c;