Skip to content
Snippets Groups Projects
Commit 6a7d8d5f authored by lykos98's avatar lykos98
Browse files

substituted critical region in ngbh search while finding points to send to other processors

parent 128583d4
No related branches found
No related tags found
No related merge requests found
......@@ -19,8 +19,8 @@
#include <omp.h>
#include <sys/sysinfo.h>
//#define WRITE_NGBH
//#define WRITE_TOP_NODES
#define WRITE_NGBH
#define WRITE_TOP_NODES
/*
* Maximum bytes to send with a single mpi send/recv, used
......@@ -1315,6 +1315,180 @@ void tree_walk(
}
void tree_walk_v2_find_n_points(
global_context_t* ctx,
top_kdtree_node_t* root,
int point_idx,
float_t max_dist,
float_t* point,
int* point_to_send_capacity)
{
if(root -> owner != -1 && root -> owner != ctx -> mpi_rank)
{
#pragma omp atomic update
point_to_send_capacity[root -> owner]++;
}
else
{
/* tree walk */
int split_var = root -> split_dim;
float_t hp_distance = point[split_var] - root -> split_val;
__builtin_prefetch(root -> lch, 0, 3);
__builtin_prefetch(root -> rch, 0, 3);
int side = hp_distance > 0.f;
switch (side)
{
case TOP_TREE_LCH:
if(root -> lch)
{
/* walk on the left */
tree_walk_v2_find_n_points(ctx, root -> lch, point_idx, max_dist, point, point_to_send_capacity);
}
break;
case TOP_TREE_RCH:
if(root -> rch)
{
/* walk on the right */
tree_walk_v2_find_n_points(ctx, root -> rch, point_idx, max_dist, point, point_to_send_capacity);
}
break;
default:
break;
}
int c = max_dist > (hp_distance * hp_distance);
//if(c || (H -> count) < (H -> N))
if(c)
{
switch (side)
{
case HP_LEFT_SIDE:
if(root -> rch)
{
/* walk on the right */
tree_walk_v2_find_n_points(ctx, root -> rch, point_idx, max_dist, point, point_to_send_capacity);
}
break;
case HP_RIGHT_SIDE:
if(root -> lch)
{
/* walk on the left */
tree_walk_v2_find_n_points(ctx, root -> lch, point_idx, max_dist, point, point_to_send_capacity);
}
break;
default:
break;
}
}
}
}
void tree_walk_v2_append_points(
global_context_t* ctx,
top_kdtree_node_t* root,
int point_idx,
float_t max_dist,
float_t* point,
float_t** data_to_send_per_proc,
int** local_idx_of_the_point,
int* point_to_send_count)
{
if(root -> owner != -1 && root -> owner != ctx -> mpi_rank)
{
/* put the leaf on the requests array */
int owner = root -> owner;
int idx;
#pragma omp atomic capture
idx = point_to_send_count[owner]++;
int len = ctx -> dims;
float_t* base = data_to_send_per_proc[owner] + (len * idx);
memcpy(base, point, ctx -> dims * sizeof(float_t));
local_idx_of_the_point[owner][idx] = point_idx;
}
else
{
/* tree walk */
int split_var = root -> split_dim;
float_t hp_distance = point[split_var] - root -> split_val;
__builtin_prefetch(root -> lch, 0, 3);
__builtin_prefetch(root -> rch, 0, 3);
int side = hp_distance > 0.f;
switch (side)
{
case TOP_TREE_LCH:
if(root -> lch)
{
/* walk on the left */
tree_walk_v2_append_points(ctx, root -> lch, point_idx, max_dist, point,
data_to_send_per_proc, local_idx_of_the_point, point_to_send_count);
}
break;
case TOP_TREE_RCH:
if(root -> rch)
{
/* walk on the right */
tree_walk_v2_append_points(ctx, root -> rch, point_idx, max_dist, point,
data_to_send_per_proc, local_idx_of_the_point, point_to_send_count);
}
break;
default:
break;
}
int c = max_dist > (hp_distance * hp_distance);
//if(c || (H -> count) < (H -> N))
if(c)
{
switch (side)
{
case HP_LEFT_SIDE:
if(root -> rch)
{
/* walk on the right */
tree_walk_v2_append_points(ctx, root -> rch, point_idx, max_dist, point,
data_to_send_per_proc, local_idx_of_the_point, point_to_send_count);
}
break;
case HP_RIGHT_SIDE:
if(root -> lch)
{
/* walk on the left */
tree_walk_v2_append_points(ctx, root -> lch, point_idx, max_dist, point,
data_to_send_per_proc, local_idx_of_the_point, point_to_send_count);
}
break;
default:
break;
}
}
}
}
void convert_heap_idx_to_global(global_context_t* ctx, heap* H)
{
for(uint64_t i = 0; i < H -> count; ++i)
......@@ -1387,23 +1561,27 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
for(int i = 0; i < ctx -> world_size; ++i)
{
//data_to_send_per_proc[i] = (float_t*)malloc(100 * (1 + ctx -> dims) * sizeof(float_t));
/* allocate it afterwards */
/* OLD VERSION
data_to_send_per_proc[i] = (float_t*)malloc(100 * (ctx -> dims) * sizeof(float_t));
local_idx_of_the_point[i] = (int*)malloc(100 * sizeof(int));
point_to_snd_capacity[i] = 100;
*/
/* NEW VERSION with double tree walk */
point_to_snd_capacity[i] = 0;
point_to_snd_count[i] = 0;
}
/* for each point walk the tree and find to which proc send data */
/* actually compute intersection of ngbh radius of each point to node box */
/* OLD VERSION SINGLE TREE WALK */
/*
#pragma omp parallel for
for(int i = 0; i < ctx -> local_n_points; ++i)
{
/*
MPI_DB_PRINT("%lu\n",dp_info[i].array_idx);
if(i > 10) break;
*/
float_t max_dist = dp_info[i].ngbh.data[0].value;
float_t* point = ctx -> local_data + (i * ctx -> dims);
......@@ -1411,6 +1589,38 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
point, data_to_send_per_proc, local_idx_of_the_point,
point_to_snd_count, point_to_snd_capacity);
}
*/
/* NEW VERSION double tree walk */
#pragma omp parallel for
for(int i = 0; i < ctx -> local_n_points; ++i)
{
float_t max_dist = dp_info[i].ngbh.data[0].value;
float_t* point = ctx -> local_data + (i * ctx -> dims);
tree_walk_v2_find_n_points(ctx, top_tree -> root, i, max_dist, point, point_to_snd_capacity);
}
/* allocate needed space */
for(int i = 0; i < ctx -> world_size; ++i)
{
int np = point_to_snd_capacity[i];
data_to_send_per_proc[i] = (float_t*)malloc(np * (ctx -> dims) * sizeof(float_t));
local_idx_of_the_point[i] = (int*)malloc(np * sizeof(int));
}
#pragma omp parallel for
for(int i = 0; i < ctx -> local_n_points; ++i)
{
float_t max_dist = dp_info[i].ngbh.data[0].value;
float_t* point = ctx -> local_data + (i * ctx -> dims);
tree_walk_v2_append_points(ctx, top_tree -> root, i, max_dist, point, data_to_send_per_proc, local_idx_of_the_point, point_to_snd_count);
}
elapsed_time = TIME_STOP;
LOG_WRITE("Finding points to refine", elapsed_time);
......@@ -1958,7 +2168,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
//ctx -> n_points = 48*5*2000;
ctx->n_points = ctx->n_points / ctx->dims;
ctx->n_points = (ctx->n_points * 5) / 10;
ctx->n_points = (ctx->n_points * 6) / 10;
// ctx -> n_points = ctx -> world_size * 1000;
//ctx -> n_points = 10000000 * ctx -> world_size;
......@@ -2061,7 +2271,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
elapsed_time = TIME_STOP;
LOG_WRITE("Total time for all knn search", elapsed_time)
find_foreign_nodes(ctx, dp_info);
//find_foreign_nodes(ctx, dp_info);
#if defined (WRITE_NGBH)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment