diff --git a/allvars_rccl.h b/allvars_rccl.h index 5e9691effa40db742c0422d5bfa1a09e4ae67e2c..227952545d46971d9b3d9839d9582470e4962c57 100755 --- a/allvars_rccl.h +++ b/allvars_rccl.h @@ -121,7 +121,7 @@ extern struct meta { myuint Nmeasures; - myuint Nvis; + myull Nvis; myuint Nweights; myuint freq_per_chan; myuint polarisations; diff --git a/allvars_rccl.hip.hpp b/allvars_rccl.hip.hpp index 04bb24c3681b3b28b60b7e9968ff280ae0f0d94b..a22cb5be13c333ecf034bfd1a194068efdb4b180 100755 --- a/allvars_rccl.hip.hpp +++ b/allvars_rccl.hip.hpp @@ -71,8 +71,8 @@ typedef double float_t; typedef float float_t; #endif -typedef unsigned int uint; -typedef unsigned long long ull; +typedef unsigned int myuint; +typedef unsigned long long myull; extern struct io @@ -115,15 +115,15 @@ extern struct op extern struct meta { - uint Nmeasures; - uint Nvis; - uint Nweights; - uint freq_per_chan; - uint polarisations; - uint Ntimes; + myuint Nmeasures; + myull Nvis; + myuint Nweights; + myuint freq_per_chan; + myuint polarisations; + myuint Ntimes; double dt; double thours; - uint baselines; + myuint baselines; double uvmin; double uvmax; double wmin; @@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN]; extern int xaxis, yaxis; extern int rank; extern int size; -extern uint nsectors; -extern uint startrow; +extern myuint nsectors; +extern myuint startrow; extern double_t resolution, dx, dw, w_supporth; -extern uint **sectorarray; -extern uint *histo_send; +extern myuint **sectorarray; +extern myuint *histo_send; extern int verbose_level; -extern uint size_of_grid; +extern myuint size_of_grid; extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu; extern MPI_Comm MYMPI_COMM_WORLD; diff --git a/w-stacking.cu b/w-stacking.cu index d7f1c36690948a0103ef9251e2a24e97ce9475eb..3983c0f41dd32652263d7790345437d04093eb6f 100755 --- a/w-stacking.cu +++ b/w-stacking.cu @@ -134,7 +134,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; - unsigned long visindex = i*freq_per_chan*polarizations; + myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; @@ -182,7 +182,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; - unsigned long ifine = visindex; + myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; @@ -245,7 +245,7 @@ void wstack( { myuint i; //myuint index; - unsigned long visindex; + myull visindex; // initialize the convolution kernel // gaussian: @@ -274,7 +274,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; - unsigned long Nvis = num_points*freq_per_chan*polarizations; + myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; cudaGetDeviceCount(&ndevices); @@ -441,7 +441,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); - myuint Nvis = num_points*freq_per_chan*polarizations; + myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) @@ -507,7 +507,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; - unsigned long ifine = visindex; + myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { diff --git a/w-stacking.hip.cpp b/w-stacking.hip.cpp index bc5b7301b7f039b1bf6495b08e535757a6d5204a..809986ecc51efc688e0baa4b9484ca4b492f8339 100755 --- a/w-stacking.hip.cpp +++ b/w-stacking.hip.cpp @@ -130,7 +130,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; - myuint visindex = i*freq_per_chan*polarizations; + myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; @@ -178,7 +178,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; - myuint ifine = visindex; + myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; @@ -233,7 +233,7 @@ void wstack( { myuint i; //myuint index; - myuint visindex; + myull visindex; // initialize the convolution kernel // gaussian: @@ -262,7 +262,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; - myuint Nvis = num_points*freq_per_chan*polarizations; + myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; int num = hipGetDeviceCount(&ndevices); @@ -404,7 +404,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); - myuint Nvis = num_points*freq_per_chan*polarizations; + myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) @@ -470,7 +470,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; - myuint ifine = visindex; + myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) {