From cd33eaa66a90426100b9f99468963b1e48e2a8e0 Mon Sep 17 00:00:00 2001
From: Emanuele De Rubeis <ederubei@login07.leonardo.local>
Date: Fri, 14 Jun 2024 15:54:18 +0200
Subject: [PATCH] Fixed data types and print intermediate data (AMD & CUDA
 version)

---
 allvars_rccl.h       |  2 +-
 allvars_rccl.hip.hpp | 28 ++++++++++++++--------------
 w-stacking.cu        | 12 ++++++------
 w-stacking.hip.cpp   | 12 ++++++------
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/allvars_rccl.h b/allvars_rccl.h
index 5e9691e..2279525 100755
--- a/allvars_rccl.h
+++ b/allvars_rccl.h
@@ -121,7 +121,7 @@ extern struct meta
 {
 
   myuint   Nmeasures;
-  myuint   Nvis;
+  myull   Nvis;
   myuint   Nweights;
   myuint   freq_per_chan;
   myuint   polarisations;
diff --git a/allvars_rccl.hip.hpp b/allvars_rccl.hip.hpp
index 04bb24c..a22cb5b 100755
--- a/allvars_rccl.hip.hpp
+++ b/allvars_rccl.hip.hpp
@@ -71,8 +71,8 @@ typedef double float_t;
 typedef float float_t;
 #endif
 
-typedef unsigned int       uint;
-typedef unsigned long long ull;
+typedef unsigned int       myuint;
+typedef unsigned long long myull;
 
 
 extern struct io
@@ -115,15 +115,15 @@ extern struct op
 extern struct meta
 {
 
-  uint   Nmeasures;
-  uint   Nvis;
-  uint   Nweights;
-  uint   freq_per_chan;
-  uint   polarisations;
-  uint   Ntimes;
+  myuint   Nmeasures;
+  myull   Nvis;
+  myuint   Nweights;
+  myuint   freq_per_chan;
+  myuint   polarisations;
+  myuint   Ntimes;
   double dt;
   double thours;
-  uint   baselines;
+  myuint   baselines;
   double uvmin;
   double uvmax;
   double wmin;
@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
 extern int  xaxis, yaxis;
 extern int  rank;
 extern int  size;
-extern uint nsectors;
-extern uint startrow;
+extern myuint nsectors;
+extern myuint startrow;
 extern double_t resolution, dx, dw, w_supporth;
 
-extern uint **sectorarray;
-extern uint  *histo_send;
+extern myuint **sectorarray;
+extern myuint  *histo_send;
 extern int    verbose_level; 
 
 
-extern uint    size_of_grid;
+extern myuint    size_of_grid;
 extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu;
 
 extern MPI_Comm MYMPI_COMM_WORLD;
diff --git a/w-stacking.cu b/w-stacking.cu
index d7f1c36..3983c0f 100755
--- a/w-stacking.cu
+++ b/w-stacking.cu
@@ -134,7 +134,7 @@ __global__ void convolve_g(
   if(gid < num_points)
     {
       myuint i = gid;
-      unsigned long visindex = i*freq_per_chan*polarizations;
+      myull visindex = i*freq_per_chan*polarizations;
       double norm = std22/PI;
 
       int j, k;
@@ -182,7 +182,7 @@ __global__ void convolve_g(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      unsigned long ifine = visindex;
+	      myull ifine = visindex;
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
 		  myuint iweight = visindex/freq_per_chan;
@@ -245,7 +245,7 @@ void wstack(
 {
   myuint i;
   //myuint index;
-  unsigned long visindex;
+  myull visindex;
   
   // initialize the convolution kernel
   // gaussian:
@@ -274,7 +274,7 @@ void wstack(
   int Nth = NTHREADS;
   myuint Nbl = (myuint)(num_points/Nth) + 1;
   if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
-  unsigned long Nvis = num_points*freq_per_chan*polarizations;
+  myull Nvis = num_points*freq_per_chan*polarizations;
   
   int ndevices;
   cudaGetDeviceCount(&ndevices);
@@ -441,7 +441,7 @@ void wstack(
 
 #if defined(ACCOMP) && (GPU_STACKING)
   omp_set_default_device(rank % omp_get_num_devices());
-  myuint Nvis = num_points*freq_per_chan*polarizations;
+  myull Nvis = num_points*freq_per_chan*polarizations;
 #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
 #else
 #pragma omp parallel for private(visindex)
@@ -507,7 +507,7 @@ void wstack(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      unsigned long ifine = visindex;
+	      myull ifine = visindex;
 	      // DAV: the following two loops are performend by each thread separately: no problems of race conditions
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
diff --git a/w-stacking.hip.cpp b/w-stacking.hip.cpp
index bc5b730..809986e 100755
--- a/w-stacking.hip.cpp
+++ b/w-stacking.hip.cpp
@@ -130,7 +130,7 @@ __global__ void convolve_g(
   if(gid < num_points)
     {
       myuint i = gid;
-      myuint visindex = i*freq_per_chan*polarizations;
+      myull visindex = i*freq_per_chan*polarizations;
       double norm = std22/PI;
 
       int j, k;
@@ -178,7 +178,7 @@ __global__ void convolve_g(
 	      // Loops over frequencies and polarizations
 	      double add_term_real = 0.0;
 	      double add_term_img = 0.0;
-	      myuint ifine = visindex;
+	      myull ifine = visindex;
 	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
 		  myuint iweight = visindex/freq_per_chan;
@@ -233,7 +233,7 @@ void wstack(
 {
     myuint i;
     //myuint index;
-    myuint visindex;
+    myull visindex;
 
     // initialize the convolution kernel
     // gaussian:
@@ -262,7 +262,7 @@ void wstack(
     int Nth = NTHREADS;
     myuint Nbl = (myuint)(num_points/Nth) + 1;
     if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
-    myuint Nvis = num_points*freq_per_chan*polarizations;
+    myull Nvis = num_points*freq_per_chan*polarizations;
 
     int ndevices;
     int num = hipGetDeviceCount(&ndevices);
@@ -404,7 +404,7 @@ void wstack(
 
    #if defined(ACCOMP) && (GPU_STACKING)
     omp_set_default_device(rank % omp_get_num_devices());
-    myuint Nvis = num_points*freq_per_chan*polarizations;
+    myull Nvis = num_points*freq_per_chan*polarizations;
    #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
 #else
     #pragma omp parallel for private(visindex)
@@ -470,7 +470,7 @@ void wstack(
 		// Loops over frequencies and polarizations
 		double add_term_real = 0.0;
 		double add_term_img = 0.0;
-		myuint ifine = visindex;
+		myull ifine = visindex;
 		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
 		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
 		{
-- 
GitLab