From 6ff072dac6cbff3c158b67082994fe523d123c1f Mon Sep 17 00:00:00 2001 From: lykos98 Date: Wed, 20 Mar 2024 12:54:24 +0100 Subject: [PATCH] added diagnostics and time print --- src/common/common.h | 16 ++++++++++++++++ src/tree/tree.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/common/common.h b/src/common/common.h index 7352445..9772b9b 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -4,6 +4,8 @@ #include #include #include +#include + //#include #ifdef USE_FLOAT32 @@ -29,6 +31,20 @@ #define MPI_PRINT(...) mpi_printf(ctx,__VA_ARGS__) +#ifdef NDEBUG + #define TIME_DEF + #define TIME_START + #define TIME_STOP +#else + #define TIME_DEF struct timespec __start,__end; + #define TIME_START clock_gettime(CLOCK_MONOTONIC,&__start); + #define TIME_STOP { \ + clock_gettime(CLOCK_MONOTONIC,&__end); \ + MPI_DB_PRINT("[TIME] elapsed time %.2lfs\n", (double)(__end.tv_sec - __start.tv_sec) \ + + (__end.tv_nsec - __start.tv_nsec)/1e9); \ + } +#endif + #define MAX(A,B) ((A) > (B) ? (A) : (B)) #define MIN(A,B) ((A) < (B) ? (A) : (B)) diff --git a/src/tree/tree.c b/src/tree/tree.c index 060d07a..7d35b6a 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -19,8 +19,8 @@ #include #include -#define WRITE_NGBH -#define WRITE_TOP_NODES +//#define WRITE_NGBH +//#define WRITE_TOP_NODES #ifdef USE_FLOAT32 #define MPI_MY_FLOAT MPI_FLOAT @@ -1038,6 +1038,7 @@ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { + MPI_DB_PRINT("[MASTER] Domain decomposition\n"); int* points_per_proc = (int*)malloc(ctx -> world_size * sizeof(int)); int* points_owners = (int*)malloc(ctx -> dims * ctx -> local_n_points * sizeof(float_t)); int* partition_offset = (int*)malloc(ctx -> world_size * sizeof(int)); @@ -1324,7 +1325,10 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre print_diagnositcs(ctx, k); + TIME_DEF + MPI_DB_PRINT("[MASTER] Local ngbh search "); + TIME_START MPI_Barrier(ctx -> mpi_communicator); #pragma omp parallel for for(int p = 0; p < ctx -> local_n_points; ++p) @@ -1339,8 +1343,11 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre } MPI_DB_PRINT("---> Done\n"); + TIME_STOP + MPI_DB_PRINT("[MASTER] Finding point to send to other procs\n"); + TIME_START /* find if a points needs a refine on the global tree */ float_t** data_to_send_per_proc = (float_t**)malloc(ctx -> world_size * sizeof(float_t*)); int** local_idx_of_the_point = (int**)malloc(ctx -> world_size * sizeof(int*)); @@ -1374,9 +1381,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre point_to_snd_count, point_to_snd_capacity); } + TIME_STOP + MPI_DB_PRINT("[MASTER] Sending points to send to other procs\n"); + TIME_START int* point_to_rcv_count = (int*)malloc(ctx -> world_size * sizeof(int)); /* exchange points to work on*/ MPI_Alltoall(point_to_snd_count, 1, MPI_INT, point_to_rcv_count, 1, MPI_INT, ctx -> mpi_communicator); @@ -1487,11 +1497,13 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre { heap_batches_per_node[p] = __heap_batches_to_snd + rcv_displ[p] * k; } + TIME_STOP /* compute everything */ MPI_DB_PRINT("[MASTER] Working on recieved points\n"); + MPI_Barrier(ctx -> mpi_communicator); - + TIME_START for(int p = 0; p < ctx -> world_size; ++p) { if(point_to_rcv_count[p] > 0) @@ -1517,8 +1529,11 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre * counts are inverted since I have to recieve as many batches as points I * Have originally sended */ + TIME_STOP MPI_DB_PRINT("[MASTER] Sending out results\n"); + MPI_Barrier(ctx -> mpi_communicator); + TIME_START /* for(int i = 0; i < ctx -> world_size; ++i) { @@ -1684,10 +1699,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre free(already_sent_points); free(already_rcvd_points); + TIME_STOP /* merge old with new heaps */ MPI_DB_PRINT("[MASTER] Merging resutls\n"); + TIME_START for(int i = 0; i < ctx -> world_size; ++i) { @@ -1738,6 +1755,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre #endif MPI_Barrier(ctx -> mpi_communicator); + TIME_STOP @@ -1798,6 +1816,7 @@ void test_the_idea(global_context_t* ctx) void build_local_tree(global_context_t* ctx, kdtree_v2* local_tree) { + MPI_DB_PRINT("[MASTER] Building local trees\n"); local_tree -> root = build_tree_kdtree_v2(local_tree -> _nodes, local_tree -> n_nodes, ctx -> dims); } @@ -1878,14 +1897,14 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) /* 10^7 ~ 8M points */ - //data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",MY_TRUE); + data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",MY_TRUE); //88M BREAKS //data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE); // //34 M - data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE); + //data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE); ctx->dims = 5; // ctx -> n_points = 48*5*2000; @@ -1953,14 +1972,20 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) float_t tol = 0.002; top_kdtree_t tree; + TIME_DEF + TIME_START top_tree_init(ctx, &tree); + TIME_STOP + + TIME_START build_top_kdtree(ctx, &original_ps, &tree, k_global, tol); exchange_points(ctx, &tree); + TIME_STOP //test_the_idea(ctx); kdtree_v2 local_tree; kdtree_v2_init( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); - int k = 500; + int k = 400; datapoint_info_t* dp_info = (datapoint_info_t*)malloc(ctx -> local_n_points * sizeof(datapoint_info_t)); build_local_tree(ctx, &local_tree); -- GitLab