diff --git a/coherency_tests.ipynb b/coherency_tests.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b2251501f7f3b5d7057b8657927987de41af761b --- /dev/null +++ b/coherency_tests.ipynb @@ -0,0 +1,278 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d1f5f17d-9949-4707-b553-e03c15bc59ee", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import dadac \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "aeb8a19b-be6d-48d8-86e0-479b4d6d99cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1842842, 5)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = np.fromfile(\"bb/ordered_data.npy\", dtype = np.float64)\n", + "x = x.reshape((x.shape[0]//5, 5))\n", + "x.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "550c24bc-062c-4e9a-83ab-5fb51aa44dc4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You are running in a notebook maybe the timing output will break, but everything should be fine \n" + ] + } + ], + "source": [ + "data = dadac.Data(x) " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "83e48b9f-1739-4b4c-aeea-87fb732c02e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building the KDtree v2:\n", + "\tTotal time: 1.741s\n", + "\n", + "knn search:\n", + "\tTotal time: 18.676s\n", + "\n", + "ID estimation:\n", + "\tID value: 3.920865\n", + "\tTotal time: 0.391s\n", + "\n", + "Density and k* estimation:\n", + "\tTotal time: 5.015s\n", + "\n" + ] + } + ], + "source": [ + "data.compute_distances(299)\n", + "data.compute_id_2NN()\n", + "#data.id = 4\n", + "data.compute_density_kstarNN()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "314b516a-02ec-412a-bc7e-1de8f790e429", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.920865231328582\n" + ] + } + ], + "source": [ + "den_gt = data.log_den\n", + "den_comp = np.fromfile(\"bb/ordered_density.npy\", np.float64)\n", + "print(data.id)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "92225324-e798-4388-a022-b6bb8906768c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0001499206205206319" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.average(np.abs(den_comp - den_gt))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "67a8bfdf-f1c1-421a-ab06-0ed9dc57b1b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-3.64725383, -4.53602916, -4.30573383, ..., 7.18148125,\n", + " -6.25416517, -3.54944958])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "den_gt" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a7093d5f-d40b-43d9-a4d0-1f70638d9bbd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-3.64714384, -4.53592253, -4.30561972, ..., 7.18179655,\n", + " -6.25407934, -3.54934192])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "den_comp" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f02c64d5-18b8-4b88-a756-e00fa9295e64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.5825421810150146 -1.2760780561214355\n" + ] + }, + { + "data": { + "text/plain": [ + "1789174" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "a = np.argmax(np.abs(den_comp - den_gt))\n", + "print(den_comp[a], den_gt[a])\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "10dcc6ee-af0f-412d-8be9-d7500be6cd08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6935358751064209" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.abs(den_comp - den_gt)[a]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "19b1c9e6-bf00-44f9-822f-f13412263ce5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "130" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.kstar[a]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd441b5f-f7d8-476c-9b8f-2c1a7f92380f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nope", + "language": "python", + "name": "nope" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/common/common.c b/src/common/common.c index d678abad5479c2e4f0bb9dc9c2f953ffaced2f4f..1df7043f4ab5d953a2e3ed70d84f1b634a486b8f 100644 --- a/src/common/common.c +++ b/src/common/common.c @@ -18,7 +18,8 @@ void get_context(global_context_t* ctx) ctx -> idx_halo_points_send = NULL; ctx -> n_halo_points_recv = NULL; ctx -> n_halo_points_send = NULL; - ctx -> halo_datapoints = NULL; + ctx -> halo_datapoints = NULL; + ctx -> local_datapoints = NULL; } void free_context(global_context_t* ctx) @@ -28,9 +29,22 @@ void free_context(global_context_t* ctx) FREE_NOT_NULL(ctx -> ub_box); FREE_NOT_NULL(ctx -> lb_box); + if(ctx -> local_datapoints) + { + for(int i = 0; i < ctx -> local_n_points; ++i) FREE_NOT_NULL(ctx -> local_datapoints[i].ngbh.data); + } + + FREE_NOT_NULL(ctx -> local_datapoints); if(ctx -> halo_datapoints) { - for(int i = 0; i < ctx -> world_size; ++i) FREE_NOT_NULL(ctx -> halo_datapoints[i]); + for(int i = 0; i < ctx -> world_size; ++i) + { + for(int j = 0; j < ctx -> n_halo_points_recv[i]; ++j) + { + FREE_NOT_NULL(ctx -> halo_datapoints[i][j].ngbh.data); + } + FREE_NOT_NULL(ctx -> halo_datapoints[i]); + } } FREE_NOT_NULL(ctx -> halo_datapoints); diff --git a/src/tree/tree.c b/src/tree/tree.c index 7c7d94b5619d09171ee6b3619d54d5d82dc731c4..adfd107b54985e03503c49d1bb5936ab6e29ebea 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -21,6 +21,7 @@ //#define WRITE_NGBH //#define WRITE_TOP_NODES +#define WRITE_DENSITY /* * Maximum bytes to send with a single mpi send/recv, used @@ -49,6 +50,15 @@ unsigned int data_dims; + +int cmp_float_t(const void* a, const void* b) +{ + float_t aa = *((float_t*)a); + float_t bb = *((float_t*)b); + return (aa > bb) - (aa < bb); +} + + float_t *read_data_file(global_context_t *ctx, const char *fname, const int file_in_float32) { @@ -1546,7 +1556,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre convert_heap_idx_to_global(ctx, &(dp_info[idx].ngbh)); dp_info[idx].cluster_idx = -1; dp_info[idx].is_center = 0; - dp_info[idx].array_idx = idx; + dp_info[idx].array_idx = idx + ctx -> idx_start; } elapsed_time = TIME_STOP; LOG_WRITE("Local neighborhood search", elapsed_time); @@ -2032,6 +2042,53 @@ void ordered_data_to_file(global_context_t* ctx) free(tmp_data); free(ppp); free(displs); + } + MPI_Barrier(ctx -> mpi_communicator); +} + +void ordered_buffer_to_file(global_context_t* ctx, void* buffer, size_t el_size, uint64_t n, const char* fname) +{ + //MPI_Barrier(ctx -> mpi_communicator); + MPI_DB_PRINT("[MASTER] writing to file\n"); + void* tmp_data; + int* ppp; + int* displs; + + MPI_Barrier(ctx -> mpi_communicator); + + uint64_t tot_n = 0; + MPI_Reduce(&n, &tot_n, 1, MPI_UINT64_T , MPI_SUM, 0, ctx -> mpi_communicator); + + if(I_AM_MASTER) + { + tmp_data = (void*)malloc(el_size * tot_n ); + ppp = (int*)malloc(ctx -> world_size * sizeof(int)); + displs = (int*)malloc(ctx -> world_size * sizeof(int)); + + } + + int nn = (int)n; + MPI_Gather(&nn, 1, MPI_INT, ppp, 1, MPI_INT, 0, ctx -> mpi_communicator); + + if(I_AM_MASTER) + { + displs[0] = 0; + for(int i = 0; i < ctx -> world_size; ++i) ppp[i] = el_size * ppp[i]; + for(int i = 1; i < ctx -> world_size; ++i) displs[i] = displs[i - 1] + ppp[i - 1]; + + } + + MPI_Gatherv(buffer, (int)(el_size * n), + MPI_CHAR, tmp_data, ppp, displs, MPI_CHAR, 0, ctx -> mpi_communicator); + + if(I_AM_MASTER) + { + FILE* file = fopen(fname,"w"); + fwrite(tmp_data, 1, el_size * tot_n, file); + fclose(file); + free(tmp_data); + free(ppp); + free(displs); } MPI_Barrier(ctx -> mpi_communicator); @@ -2101,8 +2158,6 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i if(owner != ctx -> mpi_rank) { - //DB_PRINT("Hehe"); - /* TODO: change this to a series of locks */ #pragma omp atomic update capacities[owner]++; } @@ -2129,8 +2184,6 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i if(owner != ctx -> mpi_rank) { - //DB_PRINT("Hehe"); - /* TODO: change this to a series of locks */ append_foreign_idx_list(element, owner, count_to_request, array_indexes_to_request); } } @@ -2175,24 +2228,6 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i array_indexes_to_request[i] = (idx_t*)realloc(array_indexes_to_request[i], unique_count[i] * sizeof(idx_t)); } - - - DB_PRINT("[RANK %d elements to request]", ctx -> mpi_rank); - for(int i = 0; i < ctx -> world_size; ++i) - { - DB_PRINT("%d\t", unique_count[i]); - } - DB_PRINT("\n"); - - /* - if(I_AM_MASTER) - { - FILE* f = fopen("uniq2","w"); - fwrite(array_indexes_to_request[1], sizeof(idx_t), unique_count[1],f); - fclose(f); - } - */ - for(int i = 0; i < ctx -> world_size; ++i) { foreign_dp[i] = (datapoint_info_t*)calloc(sizeof(datapoint_info_t), unique_count[i]); @@ -2340,7 +2375,7 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i if(foreign_dp[i][j].ngbh.data[0].array_idx != array_indexes_to_request[i][j]) { - printf("Chahah %lu\n",array_indexes_to_request[i][j]); + printf("Error on %lu\n",array_indexes_to_request[i][j]); } } } @@ -2361,6 +2396,9 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i memcpy(ctx -> idx_halo_points_send[i], idx_buffer_to_send + sdispls[i], n_heap_to_send[i] * sizeof(idx_t) ); } + ctx -> halo_datapoints = foreign_dp; + ctx -> local_datapoints = dp; + free(count_to_request); free(capacities); @@ -2368,6 +2406,265 @@ void find_foreign_nodes(global_context_t* ctx, datapoint_info_t* dp, datapoint_i free(heap_buffer_to_send); free(idx_buffer_to_send); free(idx_buffer_to_recv); + + free(sdispls); + free(rdispls); +} + +float_t mEst2(float_t * x, float_t *y, idx_t n) +{ + + /* + * Estimate the m coefficient of a straight + * line passing through the origin + * params: + * - x: x values of the points + * - y: y values of the points + * - n: size of the arrays + */ + + float_t num = 0; + float_t den = 0; + float_t dd; + for(idx_t i = 0; i < n; ++i) + { + float_t xx = x[i]; + float_t yy = y[i]; + + dd = xx; + num += dd*yy; + den += dd*dd; + + } + + return num/den; +} + + + +float_t id_estimate(global_context_t* ctx, datapoint_info_t* dp_info, idx_t n, float_t fraction, int verbose) +{ + + /* + * Estimation of the intrinsic dimension of a dataset + * args: + * - dp_info: array of structs + * - n: number of dp_info + * Estimates the id via 2NN method. Computation of the log ratio of the + * distances of the first 2 neighbors of each point. Then compute the empirical distribution + * of these log ratii + * The intrinsic dimension is found by fitting with a straight line passing through the + * origin + */ + + struct timespec start_tot, finish_tot; + double elapsed_tot; + + if(verbose) + { + printf("ID estimation:\n"); + clock_gettime(CLOCK_MONOTONIC, &start_tot); + } + + //float_t fraction = 0.7; + float_t* r = (float_t*)malloc(n*sizeof(float_t)); + float_t* Pemp = (float_t*)malloc(n*sizeof(float_t)); + + for(idx_t i = 0; i < n; ++i) + { + r[i] = 0.5 * log(dp_info[i].ngbh.data[2].value/dp_info[i].ngbh.data[1].value); + Pemp[i] = -log(1 - (float_t)(i + 1)/(float_t)n); + } + qsort(r,n,sizeof(float_t),cmp_float_t); + + idx_t Neff = (idx_t)(n*fraction); + + float_t d = mEst2(r,Pemp,Neff); + free(r); + free(Pemp); + + if(verbose) + { + clock_gettime(CLOCK_MONOTONIC, &finish_tot); + elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec); + elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0; + printf("\tID value: %.6lf\n", d); + printf("\tTotal time: %.3lfs\n\n", elapsed_tot); + } + + float_t rd = 0; + MPI_Allreduce(&d, &rd, 1, MPI_MY_FLOAT, MPI_SUM, ctx -> mpi_communicator); + rd = rd / ctx -> world_size; + + return rd; + +} + +int binary_search_on_idxs(idx_t* idxs, idx_t key, int n) +{ + #define LEFT 1 + #define RIGHT 0 + + int l = 0; + int r = n - 1; + int center = (r - l)/2; + while(idxs[center] != key && l < r) + { + int lr = key < idxs[center]; + /* if key < place */ + switch (lr) + { + case LEFT: + { + l = l; + r = center - 1; + center = l + (r - l) / 2; + } + break; + + case RIGHT: + { + l = center + 1; + r = r; + center = l + (r - l) / 2; + } + break; + + default: + break; + } + + } + + return center; + + #undef LEFT + #undef RIGHT +} + +datapoint_info_t find_possibly_halo_datapoint(global_context_t* ctx, idx_t idx) +{ + int owner = foreign_owner(ctx, idx); + /* find if datapoint is halo or not */ + if(owner == ctx -> mpi_rank) + { + idx_t i = idx - ctx -> idx_start; + return ctx -> local_datapoints[i]; + } + else + { + datapoint_info_t* halo_dp = ctx -> halo_datapoints[owner]; + idx_t* halo_idxs = ctx -> idx_halo_points_recv[owner]; + int n = ctx -> n_halo_points_recv[owner]; + int i = binary_search_on_idxs(halo_idxs, idx, n); + + if( idx != halo_dp[i].ngbh.data[0].array_idx) + // if( idx != halo_idxs[i]) + { + printf("Osti %lu\n", idx); + } + return halo_dp[i]; + } +} + +void compute_density_kstarnn(global_context_t* ctx, const float_t d, int verbose){ + + /* + * Point density computation: + * args: + * - paricles: array of structs + * - d : intrinsic dimension of the dataset + * - points : number of points in the dataset + */ + + struct timespec start_tot, finish_tot; + double elapsed_tot; + + datapoint_info_t* local_datapoints = ctx -> local_datapoints; + + if(verbose) + { + printf("Density and k* estimation:\n"); + clock_gettime(CLOCK_MONOTONIC, &start_tot); + } + + idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1; + + float_t omega = 0.; + if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);} + else{omega = pow(M_PI,d/2.)/tgamma(d/2.0 + 1.0);} + + #pragma omp parallel for + for(idx_t i = 0; i < ctx -> local_n_points; ++i) + { + + idx_t j = 4; + idx_t k; + float_t dL = 0.; + float_t vvi = 0.; + float_t vvj = 0.; + float_t vp = 0.; + while(j < kMAX && dL < DTHR) + { + idx_t ksel = j - 1; + vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.); + + idx_t jj = local_datapoints[i].ngbh.data[j].array_idx; + + /* + * note jj can be an halo point + * need to search maybe for it in foreign nodes + * */ + + datapoint_info_t tmp_dp = find_possibly_halo_datapoint(ctx, jj); + + vvj = omega * pow(tmp_dp.ngbh.data[ksel].value,d/2.); + + /* TO REMOVE + if(local_datapoints[i].array_idx == 17734) printf("%lu ksel i %lu j %lu tmp_dp %lu di %lf fj %lf vvi %lf vvj %lf\n", ksel, i, jj, tmp_dp.array_idx, + sqrt(local_datapoints[i].ngbh.data[ksel].value), sqrt(tmp_dp.ngbh.data[ksel].value), vvi, vvj); + */ + + vp = (vvi + vvj)*(vvi + vvj); + dL = -2.0 * ksel * log(4.*vvi*vvj/vp); + j = j + 1; + } + if(j == kMAX) + { + k = j - 1; + vvi = omega * pow(ctx -> local_datapoints[i].ngbh.data[k].value,d/2.); + } + else + { + k = j - 2; + } + local_datapoints[i].kstar = k; + local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points))); + //dp_info[i].log_rho = log((float_t)(k)) - log(vvi) -log((float_t)(points)); + local_datapoints[i].log_rho_err = 1.0/sqrt((float_t)k); //(float_t)(-Q_rsqrt((float)k)); + local_datapoints[i].g = local_datapoints[i].log_rho - local_datapoints[i].log_rho_err; + } + + if(verbose) + { + clock_gettime(CLOCK_MONOTONIC, &finish_tot); + elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec); + elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0; + printf("\tTotal time: %.3lfs\n\n", elapsed_tot); + } + + #if defined(WRITE_DENSITY) + /* densities */ + float_t* den = (float_t*)malloc(ctx -> local_n_points * sizeof(float_t)); + for(int i = 0; i < ctx -> local_n_points; ++i) den[i] = ctx -> local_datapoints[i].log_rho; + + ordered_buffer_to_file(ctx, den, sizeof(float_t), ctx -> local_n_points, "bb/ordered_density.npy"); + ordered_data_to_file(ctx); + free(den); + #endif + return; + + } void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) @@ -2404,7 +2701,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) //ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; - ctx->n_points = (ctx->n_points * 0.1) / 10; + // ctx->n_points = (ctx->n_points * 0.5) / 10; // ctx -> n_points = ctx -> world_size * 1000; //ctx -> n_points = 10000000 * ctx -> world_size; @@ -2507,6 +2804,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search", elapsed_time) + TIME_START; datapoint_info_t** foreign_dp_info = (datapoint_info_t**)malloc(ctx -> world_size * sizeof(datapoint_info_t*)); @@ -2514,11 +2812,33 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) elapsed_time = TIME_STOP; LOG_WRITE("Finding points to request the ngbh", elapsed_time) + TIME_START; + float_t id = id_estimate(ctx, dp_info, ctx -> local_n_points, 0.9, MY_FALSE); + elapsed_time = TIME_STOP; + //id = 3.920865231328582; + //id = 4.008350298212649; + //id = 4.; + LOG_WRITE("ID estimate", elapsed_time) + + MPI_DB_PRINT("ID %lf \n",id); + + TIME_START; + compute_density_kstarnn(ctx, id, MY_FALSE); + elapsed_time = TIME_STOP; + LOG_WRITE("Density estimate", elapsed_time) + + + + /* find density */ + + + #if defined (WRITE_NGBH) ordered_data_to_file(ctx); #endif + /* for(int i = 0; i < ctx -> local_n_points; ++i) { free(dp_info[i].ngbh.data); @@ -2534,6 +2854,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) } free(foreign_dp_info); + */ top_tree_free(ctx, &tree); @@ -2541,7 +2862,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) free(send_counts); free(displacements); - free(dp_info); + //free(dp_info); if (ctx->mpi_rank == 0) free(data);