diff --git a/README.md b/README.md index a08809c09453dd5aea5bbe47b7d2081913ffa290..7949127e45470faf53f2d4eb06bc4cd3a601eef5 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,12 @@ The suggestion is to run it with one mpi task per socket. # Todo + - [ ] H1: implementation of lock free centers elimination + - [ ] context: open all windows in a single shot, close them all togheter + - [ ] io: curation of IO using mpi IO or other solutions + - [ ] kdtree: optimization an profiling + - [ ] prettify overall stdout - [x] ~~arugment parser~~ - [x] ~~H2: graph reduction~~ - [x] ~~kdtree: implement slim heap~~ - - [ ] prettify overall stdout - - [ ] H1: implementation of lock free centers elimination - - [ ] kdtree: optimization an profiling - - [ ] io: curation of IO using mpi IO or other solutions diff --git a/src/adp/adp.c b/src/adp/adp.c index 737bf8f8e1ade09d40387eae525fcee558221f4c..13f1c47675f2e6e521ea32123cc7e0f5158a5a90 100644 --- a/src/adp/adp.c +++ b/src/adp/adp.c @@ -595,6 +595,56 @@ void compute_correction(global_context_t* ctx, float_t Z) } +/* maybe this should return an error?*/ + +#define LOCK_ACQUIRED 1 +#define LOCK_FREE 0 + +#define lock_t int +#define MPI_LOCK_T MPI_INT + +lock_t h1_lock_acquire(global_context_t* ctx, MPI_Win lock_window, int owner, idx_t pos, lock_t state) +{ + if(state == LOCK_FREE) + { + state = LOCK_ACQUIRED; + + lock_t compare = LOCK_FREE; + lock_t result = LOCK_ACQUIRED; + + int err = MPI_SUCCESS; + + while(result == LOCK_ACQUIRED && err == MPI_SUCCESS) + { + err = MPI_Compare_and_swap(&state, &compare, &result, MPI_LOCK_T, owner, pos, lock_window); + } + + if(err != MPI_SUCCESS) + { + printf("/!\\ Rank %d at line %u\n encountered an error while using MPI_RMA, aborting\n", ctx -> mpi_rank, __LINE__); + print_error_code(err); + exit(1); + } + + } + return state; + +} + +lock_t h1_lock_free(global_context_t* ctx, MPI_Win lock_window, int owner, idx_t pos, lock_t state) +{ + if(state == LOCK_ACQUIRED) + { + state = LOCK_FREE; + + MPI_Accumulate(&state, 1, MPI_LOCK_T, owner, + pos, 1, MPI_LOCK_T, MPI_REPLACE, lock_window); + + } + return state; + +} + clusters_t Heuristic1(global_context_t *ctx) { /* @@ -656,7 +706,7 @@ clusters_t Heuristic1(global_context_t *ctx) } if(dp_info[i].is_center) { - #pragma omp critical + #pragma omp critical (push_candidate_center) { lu_dynamic_array_pushBack(&all_centers, i); } @@ -671,11 +721,14 @@ clusters_t Heuristic1(global_context_t *ctx) * ends, center, removed centers, and max_rho arrays are populated */ + lock_t* lock_array = (lock_t*)MY_MALLOC(n * sizeof(lock_t)); heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node)); + for(idx_t p = 0; p < n; ++p) { to_remove_mask[p].array_idx = MY_SIZE_MAX; to_remove_mask[p].value = 9999999; + lock_array[p] = LOCK_FREE; } qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP); @@ -684,6 +737,12 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_create(to_remove_mask, n * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_to_remove_mask); MPI_Win_fence(0, win_to_remove_mask); + MPI_Win win_locks; + MPI_Win_create(lock_array, n * sizeof(lock_t), sizeof(lock_t), MPI_INFO_NULL, ctx -> mpi_communicator, &win_locks); + MPI_Win_fence(0, win_locks); + + + #if defined(THREAD_FUNNELED) #else #pragma omp parallel for @@ -700,22 +759,49 @@ clusters_t Heuristic1(global_context_t *ctx) if(j_point.is_center && i_point.g > j_point.g) { - #pragma omp critical + /* + * + * TODO: Implement it without this but using private locks + * use an array of locks, and compare and swap to actually gain control of the thing + * + * */ + int owner = foreign_owner(ctx, jidx); + idx_t jpos = jidx - ctx -> rank_idx_start[owner]; + + lock_t state = LOCK_FREE; + + state = h1_lock_acquire(ctx, win_locks, owner, jpos, state); + + heap_node mask_element; + MPI_Request request; + MPI_Rget(&mask_element, sizeof(heap_node), MPI_BYTE, + owner, jpos * sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request); + MPI_Wait(&request, MPI_STATUS_IGNORE); + + int flag = mask_element.array_idx == MY_SIZE_MAX; + if(flag || i_point.g > mask_element.value ) + { + heap_node tmp_mask_element = {.array_idx = i_point.array_idx, .value = i_point.g}; + MPI_Request request; + MPI_Rput(&tmp_mask_element, sizeof(heap_node), MPI_BYTE, owner, + jpos*sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request); + MPI_Wait(&request, MPI_STATUS_IGNORE); + + } + + state = h1_lock_free(ctx, win_locks, owner, jpos, state); + + /* + #pragma omp critical (h1_centers_elimination) { - /* - * - * TODO: Implement it without this but using private locks - * use an array of locks, and compare and swap to actually gain control of the thing - * - * */ int owner = foreign_owner(ctx, jidx); idx_t jpos = jidx - ctx -> rank_idx_start[owner]; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, owner, 0, win_to_remove_mask); heap_node mask_element; MPI_Request request; - MPI_Rget( &mask_element, sizeof(heap_node), MPI_BYTE, - owner, jpos * sizeof(heap_node) , sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request); + MPI_Rget(&mask_element, sizeof(heap_node), MPI_BYTE, + owner, jpos * sizeof(heap_node), sizeof(heap_node), MPI_BYTE, win_to_remove_mask, &request); MPI_Wait(&request, MPI_STATUS_IGNORE); int flag = mask_element.array_idx == MY_SIZE_MAX; @@ -731,11 +817,13 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_unlock(owner, win_to_remove_mask); } + */ } } } MPI_Win_fence(0, win_to_remove_mask); + MPI_Win_fence(0, win_locks); MPI_Barrier(ctx -> mpi_communicator); /* populate the usual arrays */ @@ -784,6 +872,9 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_free(&win_to_remove_mask); free(to_remove_mask); + + MPI_Win_free(&win_locks); + free(lock_array); int n_centers = (int)actual_centers.count; int tot_centers; diff --git a/src/main/main.c b/src/main/main.c index a4d797b93f1b8d6848cc8b46f5a2b0ebe87b6bea..8c90cfee2667a35126dd0b26b865511c4dd3f6d5 100644 --- a/src/main/main.c +++ b/src/main/main.c @@ -183,8 +183,6 @@ int main(int argc, char** argv) { //parse command line parse_args(&ctx, argc, argv); - printf("DIO\n"); - /* * Generate a random matrix of lenght of some kind */