Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
HPC_Imaging
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Claudio Gheller
HPC_Imaging
Commits
cd33eaa6
Commit
cd33eaa6
authored
1 year ago
by
Emanuele De Rubeis
Browse files
Options
Downloads
Patches
Plain Diff
Fixed data types and print intermediate data (AMD & CUDA version)
parent
a2a03413
No related branches found
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
allvars_rccl.h
+1
-1
1 addition, 1 deletion
allvars_rccl.h
allvars_rccl.hip.hpp
+14
-14
14 additions, 14 deletions
allvars_rccl.hip.hpp
w-stacking.cu
+6
-6
6 additions, 6 deletions
w-stacking.cu
w-stacking.hip.cpp
+6
-6
6 additions, 6 deletions
w-stacking.hip.cpp
with
27 additions
and
27 deletions
allvars_rccl.h
+
1
−
1
View file @
cd33eaa6
...
@@ -121,7 +121,7 @@ extern struct meta
...
@@ -121,7 +121,7 @@ extern struct meta
{
{
myuint
Nmeasures
;
myuint
Nmeasures
;
myu
int
Nvis
;
myu
ll
Nvis
;
myuint
Nweights
;
myuint
Nweights
;
myuint
freq_per_chan
;
myuint
freq_per_chan
;
myuint
polarisations
;
myuint
polarisations
;
...
...
This diff is collapsed.
Click to expand it.
allvars_rccl.hip.hpp
+
14
−
14
View file @
cd33eaa6
...
@@ -71,8 +71,8 @@ typedef double float_t;
...
@@ -71,8 +71,8 @@ typedef double float_t;
typedef
float
float_t
;
typedef
float
float_t
;
#endif
#endif
typedef
unsigned
int
uint
;
typedef
unsigned
int
my
uint
;
typedef
unsigned
long
long
ull
;
typedef
unsigned
long
long
my
ull
;
extern
struct
io
extern
struct
io
...
@@ -115,15 +115,15 @@ extern struct op
...
@@ -115,15 +115,15 @@ extern struct op
extern
struct
meta
extern
struct
meta
{
{
uint
Nmeasures
;
my
uint
Nmeasures
;
uint
Nvis
;
myull
Nvis
;
uint
Nweights
;
my
uint
Nweights
;
uint
freq_per_chan
;
my
uint
freq_per_chan
;
uint
polarisations
;
my
uint
polarisations
;
uint
Ntimes
;
my
uint
Ntimes
;
double
dt
;
double
dt
;
double
thours
;
double
thours
;
uint
baselines
;
my
uint
baselines
;
double
uvmin
;
double
uvmin
;
double
uvmax
;
double
uvmax
;
double
wmin
;
double
wmin
;
...
@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
...
@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
extern
int
xaxis
,
yaxis
;
extern
int
xaxis
,
yaxis
;
extern
int
rank
;
extern
int
rank
;
extern
int
size
;
extern
int
size
;
extern
uint
nsectors
;
extern
my
uint
nsectors
;
extern
uint
startrow
;
extern
my
uint
startrow
;
extern
double_t
resolution
,
dx
,
dw
,
w_supporth
;
extern
double_t
resolution
,
dx
,
dw
,
w_supporth
;
extern
uint
**
sectorarray
;
extern
my
uint
**
sectorarray
;
extern
uint
*
histo_send
;
extern
my
uint
*
histo_send
;
extern
int
verbose_level
;
extern
int
verbose_level
;
extern
uint
size_of_grid
;
extern
my
uint
size_of_grid
;
extern
double_t
*
grid_pointers
,
*
grid
,
*
gridss
,
*
gridss_real
,
*
gridss_img
,
*
gridss_w
,
*
grid_gpu
,
*
gridss_gpu
;
extern
double_t
*
grid_pointers
,
*
grid
,
*
gridss
,
*
gridss_real
,
*
gridss_img
,
*
gridss_w
,
*
grid_gpu
,
*
gridss_gpu
;
extern
MPI_Comm
MYMPI_COMM_WORLD
;
extern
MPI_Comm
MYMPI_COMM_WORLD
;
...
...
This diff is collapsed.
Click to expand it.
w-stacking.cu
+
6
−
6
View file @
cd33eaa6
...
@@ -134,7 +134,7 @@ __global__ void convolve_g(
...
@@ -134,7 +134,7 @@ __global__ void convolve_g(
if
(
gid
<
num_points
)
if
(
gid
<
num_points
)
{
{
myuint
i
=
gid
;
myuint
i
=
gid
;
unsigned
long
visindex
=
i
*
freq_per_chan
*
polarizations
;
myull
visindex
=
i
*
freq_per_chan
*
polarizations
;
double
norm
=
std22
/
PI
;
double
norm
=
std22
/
PI
;
int
j
,
k
;
int
j
,
k
;
...
@@ -182,7 +182,7 @@ __global__ void convolve_g(
...
@@ -182,7 +182,7 @@ __global__ void convolve_g(
// Loops over frequencies and polarizations
// Loops over frequencies and polarizations
double
add_term_real
=
0.0
;
double
add_term_real
=
0.0
;
double
add_term_img
=
0.0
;
double
add_term_img
=
0.0
;
unsigned
long
ifine
=
visindex
;
myull
ifine
=
visindex
;
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
{
{
myuint
iweight
=
visindex
/
freq_per_chan
;
myuint
iweight
=
visindex
/
freq_per_chan
;
...
@@ -245,7 +245,7 @@ void wstack(
...
@@ -245,7 +245,7 @@ void wstack(
{
{
myuint
i
;
myuint
i
;
//myuint index;
//myuint index;
unsigned
long
visindex
;
myull
visindex
;
// initialize the convolution kernel
// initialize the convolution kernel
// gaussian:
// gaussian:
...
@@ -274,7 +274,7 @@ void wstack(
...
@@ -274,7 +274,7 @@ void wstack(
int
Nth
=
NTHREADS
;
int
Nth
=
NTHREADS
;
myuint
Nbl
=
(
myuint
)(
num_points
/
Nth
)
+
1
;
myuint
Nbl
=
(
myuint
)(
num_points
/
Nth
)
+
1
;
if
(
NWORKERS
==
1
)
{
Nbl
=
1
;
Nth
=
1
;};
if
(
NWORKERS
==
1
)
{
Nbl
=
1
;
Nth
=
1
;};
unsigned
long
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
myull
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
int
ndevices
;
int
ndevices
;
cudaGetDeviceCount
(
&
ndevices
);
cudaGetDeviceCount
(
&
ndevices
);
...
@@ -441,7 +441,7 @@ void wstack(
...
@@ -441,7 +441,7 @@ void wstack(
#if defined(ACCOMP) && (GPU_STACKING)
#if defined(ACCOMP) && (GPU_STACKING)
omp_set_default_device
(
rank
%
omp_get_num_devices
());
omp_set_default_device
(
rank
%
omp_get_num_devices
());
myu
int
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
myu
ll
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
#else
#pragma omp parallel for private(visindex)
#pragma omp parallel for private(visindex)
...
@@ -507,7 +507,7 @@ void wstack(
...
@@ -507,7 +507,7 @@ void wstack(
// Loops over frequencies and polarizations
// Loops over frequencies and polarizations
double
add_term_real
=
0.0
;
double
add_term_real
=
0.0
;
double
add_term_img
=
0.0
;
double
add_term_img
=
0.0
;
unsigned
long
ifine
=
visindex
;
myull
ifine
=
visindex
;
// DAV: the following two loops are performend by each thread separately: no problems of race conditions
// DAV: the following two loops are performend by each thread separately: no problems of race conditions
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
{
{
...
...
This diff is collapsed.
Click to expand it.
w-stacking.hip.cpp
+
6
−
6
View file @
cd33eaa6
...
@@ -130,7 +130,7 @@ __global__ void convolve_g(
...
@@ -130,7 +130,7 @@ __global__ void convolve_g(
if
(
gid
<
num_points
)
if
(
gid
<
num_points
)
{
{
myuint
i
=
gid
;
myuint
i
=
gid
;
myu
int
visindex
=
i
*
freq_per_chan
*
polarizations
;
myu
ll
visindex
=
i
*
freq_per_chan
*
polarizations
;
double
norm
=
std22
/
PI
;
double
norm
=
std22
/
PI
;
int
j
,
k
;
int
j
,
k
;
...
@@ -178,7 +178,7 @@ __global__ void convolve_g(
...
@@ -178,7 +178,7 @@ __global__ void convolve_g(
// Loops over frequencies and polarizations
// Loops over frequencies and polarizations
double
add_term_real
=
0.0
;
double
add_term_real
=
0.0
;
double
add_term_img
=
0.0
;
double
add_term_img
=
0.0
;
myu
int
ifine
=
visindex
;
myu
ll
ifine
=
visindex
;
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
{
{
myuint
iweight
=
visindex
/
freq_per_chan
;
myuint
iweight
=
visindex
/
freq_per_chan
;
...
@@ -233,7 +233,7 @@ void wstack(
...
@@ -233,7 +233,7 @@ void wstack(
{
{
myuint
i
;
myuint
i
;
//myuint index;
//myuint index;
myu
int
visindex
;
myu
ll
visindex
;
// initialize the convolution kernel
// initialize the convolution kernel
// gaussian:
// gaussian:
...
@@ -262,7 +262,7 @@ void wstack(
...
@@ -262,7 +262,7 @@ void wstack(
int
Nth
=
NTHREADS
;
int
Nth
=
NTHREADS
;
myuint
Nbl
=
(
myuint
)(
num_points
/
Nth
)
+
1
;
myuint
Nbl
=
(
myuint
)(
num_points
/
Nth
)
+
1
;
if
(
NWORKERS
==
1
)
{
Nbl
=
1
;
Nth
=
1
;};
if
(
NWORKERS
==
1
)
{
Nbl
=
1
;
Nth
=
1
;};
myu
int
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
myu
ll
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
int
ndevices
;
int
ndevices
;
int
num
=
hipGetDeviceCount
(
&
ndevices
);
int
num
=
hipGetDeviceCount
(
&
ndevices
);
...
@@ -404,7 +404,7 @@ void wstack(
...
@@ -404,7 +404,7 @@ void wstack(
#if defined(ACCOMP) && (GPU_STACKING)
#if defined(ACCOMP) && (GPU_STACKING)
omp_set_default_device
(
rank
%
omp_get_num_devices
());
omp_set_default_device
(
rank
%
omp_get_num_devices
());
myu
int
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
myu
ll
Nvis
=
num_points
*
freq_per_chan
*
polarizations
;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
#else
#pragma omp parallel for private(visindex)
#pragma omp parallel for private(visindex)
...
@@ -470,7 +470,7 @@ void wstack(
...
@@ -470,7 +470,7 @@ void wstack(
// Loops over frequencies and polarizations
// Loops over frequencies and polarizations
double
add_term_real
=
0.0
;
double
add_term_real
=
0.0
;
double
add_term_img
=
0.0
;
double
add_term_img
=
0.0
;
myu
int
ifine
=
visindex
;
myu
ll
ifine
=
visindex
;
// DAV: the following two loops are performend by each thread separately: no problems of race conditions
// DAV: the following two loops are performend by each thread separately: no problems of race conditions
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
for
(
myuint
ifreq
=
0
;
ifreq
<
freq_per_chan
;
ifreq
++
)
{
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment