Fixed data types and print intermediate data (AMD & CUDA version)

cd33eaa6 · Emanuele De Rubeis · a2a03413 · cd33eaa6 · cd33eaa6 · cd33eaa6
Commit cd33eaa6 authored 1 year ago by Emanuele De Rubeis
--- a/allvars_rccl.h
+++ b/allvars_rccl.h
@@ -121,7 +121,7 @@ extern struct meta
 {
  myuint   Nmeasures;
-  myuint   Nvis;
+  myull   Nvis;
  myuint   Nweights;
  myuint   freq_per_chan;
  myuint   polarisations;

--- a/allvars_rccl.hip.hpp
+++ b/allvars_rccl.hip.hpp
@@ -71,8 +71,8 @@ typedef double float_t;
 typedef float float_t;
 #endif
-typedef unsigned int       uint;
+typedef unsigned int       myuint;
-typedef unsigned long long ull;
+typedef unsigned long long myull;
 extern struct io
@@ -115,15 +115,15 @@ extern struct op
 extern struct meta
 {
-  uint   Nmeasures;
+  myuint   Nmeasures;
-  uint   Nvis;
+  myull   Nvis;
-  uint   Nweights;
+  myuint   Nweights;
-  uint   freq_per_chan;
+  myuint   freq_per_chan;
-  uint   polarisations;
+  myuint   polarisations;
-  uint   Ntimes;
+  myuint   Ntimes;
  double dt;
  double thours;
-  uint   baselines;
+  myuint   baselines;
  double uvmin;
  double uvmax;
  double wmin;
@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
 extern int  xaxis, yaxis;
 extern int  rank;
 extern int  size;
-extern uint nsectors;
+extern myuint nsectors;
-extern uint startrow;
+extern myuint startrow;
 extern double_t resolution, dx, dw, w_supporth;
-extern uint **sectorarray;
+extern myuint **sectorarray;
-extern uint  *histo_send;
+extern myuint  *histo_send;
 extern int    verbose_level; 
-extern uint    size_of_grid;
+extern myuint    size_of_grid;
 extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu;
 extern MPI_Comm MYMPI_COMM_WORLD;

--- a/w-stacking.cu
+++ b/w-stacking.cu
@@ -134,7 +134,7 @@ __global__ void convolve_g(
  if(gid < num_points)
    {
      myuint i = gid;
-      unsigned long visindex = i*freq_per_chan*polarizations;
+      myull visindex = i*freq_per_chan*polarizations;
      double norm = std22/PI;
      int j, k;
@@ -182,7 +182,7 @@ __global__ void convolve_g(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      unsigned long ifine = visindex;
+	      myull ifine = visindex;
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
 		  myuint iweight = visindex/freq_per_chan;
@@ -245,7 +245,7 @@ void wstack(
 {
  myuint i;
  //myuint index;
-  unsigned long visindex;
+  myull visindex;
  // initialize the convolution kernel
  // gaussian:
@@ -274,7 +274,7 @@ void wstack(
  int Nth = NTHREADS;
  myuint Nbl = (myuint)(num_points/Nth) + 1;
  if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
-  unsigned long Nvis = num_points*freq_per_chan*polarizations;
+  myull Nvis = num_points*freq_per_chan*polarizations;
  int ndevices;
  cudaGetDeviceCount(&ndevices);
@@ -441,7 +441,7 @@ void wstack(
 #if defined(ACCOMP) && (GPU_STACKING)
  omp_set_default_device(rank % omp_get_num_devices());
-  myuint Nvis = num_points*freq_per_chan*polarizations;
+  myull Nvis = num_points*freq_per_chan*polarizations;
 #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
 #else
 #pragma omp parallel for private(visindex)
@@ -507,7 +507,7 @@ void wstack(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      unsigned long ifine = visindex;
+	      myull ifine = visindex;
 	      // DAV: the following two loops are performend by each thread separately: no problems of race conditions
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{

--- a/w-stacking.hip.cpp
+++ b/w-stacking.hip.cpp
@@ -130,7 +130,7 @@ __global__ void convolve_g(
  if(gid < num_points)
    {
      myuint i = gid;
-      myuint visindex = i*freq_per_chan*polarizations;
+      myull visindex = i*freq_per_chan*polarizations;
      double norm = std22/PI;
      int j, k;
@@ -178,7 +178,7 @@ __global__ void convolve_g(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      myuint ifine = visindex;
+	      myull ifine = visindex;
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
 		  myuint iweight = visindex/freq_per_chan;
@@ -233,7 +233,7 @@ void wstack(
 {
    myuint i;
    //myuint index;
-    myuint visindex;
+    myull visindex;
    // initialize the convolution kernel
    // gaussian:
@@ -262,7 +262,7 @@ void wstack(
    int Nth = NTHREADS;
    myuint Nbl = (myuint)(num_points/Nth) + 1;
    if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
-    myuint Nvis = num_points*freq_per_chan*polarizations;
+    myull Nvis = num_points*freq_per_chan*polarizations;
    int ndevices;
    int num = hipGetDeviceCount(&ndevices);
@@ -404,7 +404,7 @@ void wstack(
   #if defined(ACCOMP) && (GPU_STACKING)
    omp_set_default_device(rank % omp_get_num_devices());
-    myuint Nvis = num_points*freq_per_chan*polarizations;
+    myull Nvis = num_points*freq_per_chan*polarizations;
   #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
 #else
    #pragma omp parallel for private(visindex)
@@ -470,7 +470,7 @@ void wstack(
 		// Loops over frequencies and polarizations
 		double add_term_real = 0.0;
 		double add_term_img = 0.0;
-		myuint ifine = visindex;
+		myull ifine = visindex;
 		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
 		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{