Skip to content
Snippets Groups Projects
Commit cd33eaa6 authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Fixed data types and print intermediate data (AMD & CUDA version)

parent a2a03413
No related branches found
No related tags found
No related merge requests found
...@@ -121,7 +121,7 @@ extern struct meta ...@@ -121,7 +121,7 @@ extern struct meta
{ {
myuint Nmeasures; myuint Nmeasures;
myuint Nvis; myull Nvis;
myuint Nweights; myuint Nweights;
myuint freq_per_chan; myuint freq_per_chan;
myuint polarisations; myuint polarisations;
......
...@@ -71,8 +71,8 @@ typedef double float_t; ...@@ -71,8 +71,8 @@ typedef double float_t;
typedef float float_t; typedef float float_t;
#endif #endif
typedef unsigned int uint; typedef unsigned int myuint;
typedef unsigned long long ull; typedef unsigned long long myull;
extern struct io extern struct io
...@@ -115,15 +115,15 @@ extern struct op ...@@ -115,15 +115,15 @@ extern struct op
extern struct meta extern struct meta
{ {
uint Nmeasures; myuint Nmeasures;
uint Nvis; myull Nvis;
uint Nweights; myuint Nweights;
uint freq_per_chan; myuint freq_per_chan;
uint polarisations; myuint polarisations;
uint Ntimes; myuint Ntimes;
double dt; double dt;
double thours; double thours;
uint baselines; myuint baselines;
double uvmin; double uvmin;
double uvmax; double uvmax;
double wmin; double wmin;
...@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN]; ...@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
extern int xaxis, yaxis; extern int xaxis, yaxis;
extern int rank; extern int rank;
extern int size; extern int size;
extern uint nsectors; extern myuint nsectors;
extern uint startrow; extern myuint startrow;
extern double_t resolution, dx, dw, w_supporth; extern double_t resolution, dx, dw, w_supporth;
extern uint **sectorarray; extern myuint **sectorarray;
extern uint *histo_send; extern myuint *histo_send;
extern int verbose_level; extern int verbose_level;
extern uint size_of_grid; extern myuint size_of_grid;
extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu; extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu;
extern MPI_Comm MYMPI_COMM_WORLD; extern MPI_Comm MYMPI_COMM_WORLD;
......
...@@ -134,7 +134,7 @@ __global__ void convolve_g( ...@@ -134,7 +134,7 @@ __global__ void convolve_g(
if(gid < num_points) if(gid < num_points)
{ {
myuint i = gid; myuint i = gid;
unsigned long visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations;
double norm = std22/PI; double norm = std22/PI;
int j, k; int j, k;
...@@ -182,7 +182,7 @@ __global__ void convolve_g( ...@@ -182,7 +182,7 @@ __global__ void convolve_g(
// Loops over frequencies and polarizations // Loops over frequencies and polarizations
double add_term_real = 0.0; double add_term_real = 0.0;
double add_term_img = 0.0; double add_term_img = 0.0;
unsigned long ifine = visindex; myull ifine = visindex;
for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
{ {
myuint iweight = visindex/freq_per_chan; myuint iweight = visindex/freq_per_chan;
...@@ -245,7 +245,7 @@ void wstack( ...@@ -245,7 +245,7 @@ void wstack(
{ {
myuint i; myuint i;
//myuint index; //myuint index;
unsigned long visindex; myull visindex;
// initialize the convolution kernel // initialize the convolution kernel
// gaussian: // gaussian:
...@@ -274,7 +274,7 @@ void wstack( ...@@ -274,7 +274,7 @@ void wstack(
int Nth = NTHREADS; int Nth = NTHREADS;
myuint Nbl = (myuint)(num_points/Nth) + 1; myuint Nbl = (myuint)(num_points/Nth) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
unsigned long Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations;
int ndevices; int ndevices;
cudaGetDeviceCount(&ndevices); cudaGetDeviceCount(&ndevices);
...@@ -441,7 +441,7 @@ void wstack( ...@@ -441,7 +441,7 @@ void wstack(
#if defined(ACCOMP) && (GPU_STACKING) #if defined(ACCOMP) && (GPU_STACKING)
omp_set_default_device(rank % omp_get_num_devices()); omp_set_default_device(rank % omp_get_num_devices());
myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else #else
#pragma omp parallel for private(visindex) #pragma omp parallel for private(visindex)
...@@ -507,7 +507,7 @@ void wstack( ...@@ -507,7 +507,7 @@ void wstack(
// Loops over frequencies and polarizations // Loops over frequencies and polarizations
double add_term_real = 0.0; double add_term_real = 0.0;
double add_term_img = 0.0; double add_term_img = 0.0;
unsigned long ifine = visindex; myull ifine = visindex;
// DAV: the following two loops are performend by each thread separately: no problems of race conditions // DAV: the following two loops are performend by each thread separately: no problems of race conditions
for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
{ {
......
...@@ -130,7 +130,7 @@ __global__ void convolve_g( ...@@ -130,7 +130,7 @@ __global__ void convolve_g(
if(gid < num_points) if(gid < num_points)
{ {
myuint i = gid; myuint i = gid;
myuint visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations;
double norm = std22/PI; double norm = std22/PI;
int j, k; int j, k;
...@@ -178,7 +178,7 @@ __global__ void convolve_g( ...@@ -178,7 +178,7 @@ __global__ void convolve_g(
// Loops over frequencies and polarizations // Loops over frequencies and polarizations
double add_term_real = 0.0; double add_term_real = 0.0;
double add_term_img = 0.0; double add_term_img = 0.0;
myuint ifine = visindex; myull ifine = visindex;
for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
{ {
myuint iweight = visindex/freq_per_chan; myuint iweight = visindex/freq_per_chan;
...@@ -233,7 +233,7 @@ void wstack( ...@@ -233,7 +233,7 @@ void wstack(
{ {
myuint i; myuint i;
//myuint index; //myuint index;
myuint visindex; myull visindex;
// initialize the convolution kernel // initialize the convolution kernel
// gaussian: // gaussian:
...@@ -262,7 +262,7 @@ void wstack( ...@@ -262,7 +262,7 @@ void wstack(
int Nth = NTHREADS; int Nth = NTHREADS;
myuint Nbl = (myuint)(num_points/Nth) + 1; myuint Nbl = (myuint)(num_points/Nth) + 1;
if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations;
int ndevices; int ndevices;
int num = hipGetDeviceCount(&ndevices); int num = hipGetDeviceCount(&ndevices);
...@@ -404,7 +404,7 @@ void wstack( ...@@ -404,7 +404,7 @@ void wstack(
#if defined(ACCOMP) && (GPU_STACKING) #if defined(ACCOMP) && (GPU_STACKING)
omp_set_default_device(rank % omp_get_num_devices()); omp_set_default_device(rank % omp_get_num_devices());
myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else #else
#pragma omp parallel for private(visindex) #pragma omp parallel for private(visindex)
...@@ -470,7 +470,7 @@ void wstack( ...@@ -470,7 +470,7 @@ void wstack(
// Loops over frequencies and polarizations // Loops over frequencies and polarizations
double add_term_real = 0.0; double add_term_real = 0.0;
double add_term_img = 0.0; double add_term_img = 0.0;
myuint ifine = visindex; myull ifine = visindex;
// DAV: the following two loops are performend by each thread separately: no problems of race conditions // DAV: the following two loops are performend by each thread separately: no problems of race conditions
for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment