Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
HPC_Imaging
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Claudio Gheller
HPC_Imaging
Commits
4760ede6
Commit
4760ede6
authored
Jan 24, 2023
by
Emanuele De Rubeis
Browse files
Options
Downloads
Patches
Plain Diff
CFITSIO implemented
parent
ef5ea6f4
No related branches found
No related tags found
1 merge request
!3
CFITSIO and parallel images writing implementation
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
w-stacking-fftw-cfitsio.c
+1089
-0
1089 additions, 0 deletions
w-stacking-fftw-cfitsio.c
with
1089 additions
and
0 deletions
w-stacking-fftw-cfitsio.c
0 → 100644
+
1089
−
0
View file @
4760ede6
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#ifdef FITSIO
#include
"/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h"
#endif
#ifdef USE_MPI
#include
<mpi.h>
#ifdef USE_FFTW
#include
<fftw3-mpi.h>
#endif
#endif
#include
<omp.h>
#include
<math.h>
#include
<time.h>
#include
<unistd.h>
#ifdef ACCOMP
#include
"w-stacking_omp.h"
#else
#include
"w-stacking.h"
#endif
#define PI 3.14159265359
#define NUM_OF_SECTORS -1
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
#define NOVERBOSE
#define NFILES 100
#define FILENAMELENGTH 30
// Linked List set-up
struct
sectorlist
{
long
index
;
struct
sectorlist
*
next
;
};
void
Push
(
struct
sectorlist
**
headRef
,
long
data
)
{
struct
sectorlist
*
newNode
=
malloc
(
sizeof
(
struct
sectorlist
));
newNode
->
index
=
data
;
newNode
->
next
=
*
headRef
;
*
headRef
=
newNode
;
}
// Main Code
int
main
(
int
argc
,
char
*
argv
[])
{
// Main MPI parameters
int
rank
;
int
size
;
// Define main filenames
FILE
*
pFile
;
FILE
*
pFile1
;
FILE
*
pFilereal
;
FILE
*
pFileimg
;
// Global filename to be composed
char
filename
[
1000
];
// MS paths
char
datapath
[
900
];
char
datapath_multi
[
NFILES
][
900
];
char
ufile
[
FILENAMELENGTH
]
=
"ucoord.bin"
;
char
vfile
[
FILENAMELENGTH
]
=
"vcoord.bin"
;
char
wfile
[
FILENAMELENGTH
]
=
"wcoord.bin"
;
char
weightsfile
[
FILENAMELENGTH
]
=
"weights.bin"
;
char
visrealfile
[
FILENAMELENGTH
]
=
"visibilities_real.bin"
;
char
visimgfile
[
FILENAMELENGTH
]
=
"visibilities_img.bin"
;
char
metafile
[
FILENAMELENGTH
]
=
"meta.txt"
;
// Mesh related files
char
outfile
[
FILENAMELENGTH
]
=
"grid.txt"
;
char
outfile1
[
FILENAMELENGTH
]
=
"coords.txt"
;
char
outfile2
[
FILENAMELENGTH
]
=
"grid_real.bin"
;
char
outfile3
[
FILENAMELENGTH
]
=
"grid_img.bin"
;
char
fftfile
[
FILENAMELENGTH
]
=
"fft.txt"
;
char
fftfile2
[
FILENAMELENGTH
]
=
"fft_real.bin"
;
char
fftfile3
[
FILENAMELENGTH
]
=
"fft_img.bin"
;
char
logfile
[
FILENAMELENGTH
]
=
"run.log"
;
char
extension
[
FILENAMELENGTH
]
=
".txt"
;
char
srank
[
4
];
char
timingfile
[
FILENAMELENGTH
]
=
"timings.dat"
;
// Visibilities related variables
double
*
uu
;
double
*
vv
;
double
*
ww
;
float
*
weights
;
float
*
visreal
;
float
*
visimg
;
long
Nmeasures
,
Nmeasures0
;
long
Nvis
,
Nvis0
;
long
Nweights
,
Nweights0
;
long
freq_per_chan
,
freq_per_chan0
;
long
polarisations
,
polarisations0
;
long
Ntimes
,
Ntimes0
;
double
dt
,
dt0
;
double
thours
,
thours0
;
long
baselines
,
baselines0
;
double
uvmin
,
uvmin0
;
double
uvmax
,
uvmax0
;
double
wmin
,
wmin0
;
double
wmax
,
wmax0
;
double
resolution
;
// Mesh related parameters: global size
int
grid_size_x
=
2048
;
int
grid_size_y
=
2048
;
// Split Mesh size (auto-calculated)
int
local_grid_size_x
;
int
local_grid_size_y
;
int
xaxis
;
int
yaxis
;
// Number of planes in the w direction
int
num_w_planes
=
8
;
// Size of the convoutional kernel support
int
w_support
=
7
;
// Number of OpenMP threads (input parameter)
int
num_threads
;
// Resolution
double
dx
=
0
.
5
/
(
double
)
grid_size_x
;
double
dw
=
0
.
5
/
(
double
)
num_w_planes
;
// Half support size
double
w_supporth
=
(
double
)((
w_support
-
1
)
/
2
)
*
dx
;
// Initialize FITS image parameters
fitsfile
*
fptreal
;
fitsfile
*
fptrimg
;
int
status
;
long
nelements
;
// long fpixel, lpixel;
char
testfitsreal
[
FILENAMELENGTH
]
=
"parallel_np2_real.fits"
;
char
testfitsimag
[
FILENAMELENGTH
]
=
"parallel_np2_img.fits"
;
long
naxis
=
2
;
long
naxes
[
2
]
=
{
grid_size_x
,
grid_size_y
};
nelements
=
naxes
[
0
]
*
naxes
[
1
];
// Internal profiling parameters
clock_t
start
,
end
,
start0
,
startk
,
endk
;
double
setup_time
,
process_time
,
mpi_time
,
fftw_time
,
tot_time
,
kernel_time
,
reduce_time
,
compose_time
,
phase_time
;
double
setup_time1
,
process_time1
,
mpi_time1
,
fftw_time1
,
tot_time1
,
kernel_time1
,
reduce_time1
,
compose_time1
,
phase_time1
;
double
writetime
,
writetime1
;
struct
timespec
begin
,
finish
,
begin0
,
begink
,
finishk
;
double
elapsed
;
// Number of sectors in which the mesh is divided along the v direction
// IF nsectors < 0, nsectors = NUMBER OF MPI RANKS
long
nsectors
=
NUM_OF_SECTORS
;
/* Get number of threads, exit if not given */
if
(
argc
==
1
)
{
fprintf
(
stderr
,
"Usage: %s number_of_OMP_Threads
\n
"
,
argv
[
0
]);
exit
(
1
);
}
// Set the number of OpenMP threads
num_threads
=
atoi
(
argv
[
1
]);
// Number of threads <= 0 not acceptable
if
(
num_threads
<=
0
)
{
fprintf
(
stderr
,
"Wrong parameter: %s
\n\n
"
,
argv
[
1
]);
fprintf
(
stderr
,
"Usage: %s number_of_OMP_Threads
\n
"
,
argv
[
0
]);
exit
(
1
);
}
// Internal profiling
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin0
);
start0
=
clock
();
#ifdef USE_MPI
// Intialize MPI environment
MPI_Init
(
&
argc
,
&
argv
);
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
if
(
rank
==
0
)
printf
(
"Running with %d MPI tasks
\n
"
,
size
);
#ifdef USE_FFTW
// Initialize FFTW environment
fftw_mpi_init
();
#endif
#else
// If MPI is not used the two parameters are set for consistency
rank
=
0
;
size
=
1
;
#endif
if
(
rank
==
0
)
printf
(
"Running with %d threads
\n
"
,
num_threads
);
#ifdef ACCOMP
if
(
rank
==
0
){
if
(
0
==
omp_get_num_devices
())
{
printf
(
"No accelerator found ... exit
\n
"
);
exit
(
255
);
}
printf
(
"Number of available GPUs %d
\n
"
,
omp_get_num_devices
());
#ifdef NVIDIA
prtAccelInfo
();
#endif
}
#endif
// set the local size of the image
local_grid_size_x
=
grid_size_x
;
if
(
nsectors
<
0
)
nsectors
=
size
;
local_grid_size_y
=
grid_size_y
/
nsectors
;
//nsectors = size;
// LOCAL grid size
xaxis
=
local_grid_size_x
;
yaxis
=
local_grid_size_y
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
start
=
clock
();
// INPUT FILES (only the first ndatasets entries are used)
int
ndatasets
=
1
;
strcpy
(
datapath_multi
[
0
],
"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"
);
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
//strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");
strcpy
(
datapath
,
datapath_multi
[
0
]);
// Read metadata
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
metafile
);
pFile
=
fopen
(
filename
,
"r"
);
fscanf
(
pFile
,
"%ld"
,
&
Nmeasures
);
fscanf
(
pFile
,
"%ld"
,
&
Nvis
);
fscanf
(
pFile
,
"%ld"
,
&
freq_per_chan
);
fscanf
(
pFile
,
"%ld"
,
&
polarisations
);
fscanf
(
pFile
,
"%ld"
,
&
Ntimes
);
fscanf
(
pFile
,
"%lf"
,
&
dt
);
fscanf
(
pFile
,
"%lf"
,
&
thours
);
fscanf
(
pFile
,
"%ld"
,
&
baselines
);
fscanf
(
pFile
,
"%lf"
,
&
uvmin
);
fscanf
(
pFile
,
"%lf"
,
&
uvmax
);
fscanf
(
pFile
,
"%lf"
,
&
wmin
);
fscanf
(
pFile
,
"%lf"
,
&
wmax
);
fclose
(
pFile
);
// WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
int
nsub
=
1000
;
//int nsub = 10;
printf
(
"Subtracting last %d measurements
\n
"
,
nsub
);
Nmeasures
=
Nmeasures
-
nsub
;
Nvis
=
Nmeasures
*
freq_per_chan
*
polarisations
;
// calculate the coordinates of the center
double
uvshift
=
uvmin
/
(
uvmax
-
uvmin
);
//printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift);
if
(
rank
==
0
)
{
printf
(
"N. measurements %ld
\n
"
,
Nmeasures
);
printf
(
"N. visibilities %ld
\n
"
,
Nvis
);
}
// Set temporary local size of points
long
nm_pe
=
(
long
)(
Nmeasures
/
size
);
long
remaining
=
Nmeasures
%
size
;
long
startrow
=
rank
*
nm_pe
;
if
(
rank
==
size
-
1
)
nm_pe
=
nm_pe
+
remaining
;
long
Nmeasures_tot
=
Nmeasures
;
Nmeasures
=
nm_pe
;
long
Nvis_tot
=
Nvis
;
Nvis
=
Nmeasures
*
freq_per_chan
*
polarisations
;
Nweights
=
Nmeasures
*
polarisations
;
#ifdef VERBOSE
printf
(
"N. measurements on %d %ld
\n
"
,
rank
,
Nmeasures
);
printf
(
"N. visibilities on %d %ld
\n
"
,
rank
,
Nvis
);
#endif
// DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used
// all the sizes are rescaled by the number of MPI tasks
// Allocate arrays
uu
=
(
double
*
)
calloc
(
Nmeasures
,
sizeof
(
double
));
vv
=
(
double
*
)
calloc
(
Nmeasures
,
sizeof
(
double
));
ww
=
(
double
*
)
calloc
(
Nmeasures
,
sizeof
(
double
));
weights
=
(
float
*
)
calloc
(
Nweights
,
sizeof
(
float
));
visreal
=
(
float
*
)
calloc
(
Nvis
,
sizeof
(
float
));
visimg
=
(
float
*
)
calloc
(
Nvis
,
sizeof
(
float
));
if
(
rank
==
0
)
printf
(
"READING DATA
\n
"
);
// Read data
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
ufile
);
//printf("Reading %s\n",filename);
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
sizeof
(
double
),
SEEK_SET
);
fread
(
uu
,
Nmeasures
*
sizeof
(
double
),
1
,
pFile
);
fclose
(
pFile
);
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
vfile
);
//printf("Reading %s\n",filename);
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
sizeof
(
double
),
SEEK_SET
);
fread
(
vv
,
Nmeasures
*
sizeof
(
double
),
1
,
pFile
);
fclose
(
pFile
);
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
wfile
);
//printf("Reading %s\n",filename);
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
sizeof
(
double
),
SEEK_SET
);
fread
(
ww
,
Nmeasures
*
sizeof
(
double
),
1
,
pFile
);
fclose
(
pFile
);
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
clock_gettime
(
CLOCK_MONOTONIC
,
&
finish
);
end
=
clock
();
setup_time
=
((
double
)
(
end
-
start
))
/
CLOCKS_PER_SEC
;
setup_time1
=
(
finish
.
tv_sec
-
begin
.
tv_sec
);
setup_time1
+=
(
finish
.
tv_nsec
-
begin
.
tv_nsec
)
/
1000000000
.
0
;
if
(
rank
==
0
)
printf
(
"GRIDDING DATA
\n
"
);
// Create histograms and linked lists
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
start
=
clock
();
// Initialize linked list
struct
sectorlist
**
sectorhead
;
sectorhead
=
(
struct
sectorlist
**
)
malloc
((
nsectors
+
1
)
*
sizeof
(
struct
sectorlist
));
for
(
int
isec
=
0
;
isec
<=
nsectors
;
isec
++
)
{
sectorhead
[
isec
]
=
malloc
(
sizeof
(
struct
sectorlist
));
sectorhead
[
isec
]
->
index
=
-
1
;
sectorhead
[
isec
]
->
next
=
NULL
;
}
long
*
histo_send
=
(
long
*
)
calloc
(
nsectors
+
1
,
sizeof
(
long
));
int
*
boundary
=
(
int
*
)
calloc
(
Nmeasures
,
sizeof
(
int
));
double
uuh
,
vvh
;
for
(
long
iphi
=
0
;
iphi
<
Nmeasures
;
iphi
++
)
{
boundary
[
iphi
]
=
-
1
;
uuh
=
uu
[
iphi
];
vvh
=
vv
[
iphi
];
int
binphi
=
(
int
)(
vvh
*
nsectors
);
// check if the point influence also neighboring slabs
double
updist
=
(
double
)((
binphi
+
1
)
*
yaxis
)
*
dx
-
vvh
;
double
downdist
=
vvh
-
(
double
)(
binphi
*
yaxis
)
*
dx
;
//
histo_send
[
binphi
]
++
;
Push
(
&
sectorhead
[
binphi
],
iphi
);
if
(
updist
<
w_supporth
&&
updist
>=
0
.
0
)
{
histo_send
[
binphi
+
1
]
++
;
boundary
[
iphi
]
=
binphi
+
1
;
Push
(
&
sectorhead
[
binphi
+
1
],
iphi
);};
if
(
downdist
<
w_supporth
&&
binphi
>
0
&&
downdist
>=
0
.
0
)
{
histo_send
[
binphi
-
1
]
++
;
boundary
[
iphi
]
=
binphi
-
1
;
Push
(
&
sectorhead
[
binphi
-
1
],
iphi
);};
}
#ifdef PIPPO
struct
sectorlist
*
current
;
long
iiii
=
0
;
for
(
int
j
=
0
;
j
<
nsectors
;
j
++
)
{
current
=
sectorhead
[
j
];
iiii
=
0
;
while
(
current
->
index
!=
-
1
)
{
printf
(
"%d %d %ld %ld %ld
\n
"
,
rank
,
j
,
iiii
,
histo_send
[
j
],
current
->
index
);
current
=
current
->
next
;
iiii
++
;
}
}
#endif
#ifdef VERBOSE
for
(
int
iii
=
0
;
iii
<
nsectors
+
1
;
iii
++
)
printf
(
"HISTO %d %d %ld
\n
"
,
rank
,
iii
,
histo_send
[
iii
]);
#endif
// Create sector grid
double
*
gridss
;
double
*
gridss_w
;
double
*
gridss_real
;
double
*
gridss_img
;
double
*
grid
;
long
size_of_grid
;
size_of_grid
=
2
*
num_w_planes
*
xaxis
*
yaxis
;
gridss
=
(
double
*
)
calloc
(
size_of_grid
,
sizeof
(
double
));
gridss_w
=
(
double
*
)
calloc
(
size_of_grid
,
sizeof
(
double
));
gridss_real
=
(
double
*
)
calloc
(
size_of_grid
/
2
,
sizeof
(
double
));
gridss_img
=
(
double
*
)
calloc
(
size_of_grid
/
2
,
sizeof
(
double
));
// Create destination slab
grid
=
(
double
*
)
calloc
(
size_of_grid
,
sizeof
(
double
));
// Create temporary global grid
#ifndef USE_MPI
double
*
gridtot
=
(
double
*
)
calloc
(
2
*
grid_size_x
*
grid_size_y
*
num_w_planes
,
sizeof
(
double
));
#endif
double
shift
=
(
double
)(
dx
*
yaxis
);
// Open the MPI Memory Window for the slab
#ifdef USE_MPI
MPI_Win
slabwin
;
MPI_Win_create
(
grid
,
size_of_grid
*
sizeof
(
double
),
sizeof
(
double
),
MPI_INFO_NULL
,
MPI_COMM_WORLD
,
&
slabwin
);
MPI_Win_fence
(
0
,
slabwin
);
#endif
#ifndef USE_MPI
pFile1
=
fopen
(
outfile1
,
"w"
);
#endif
// loop over files
//
kernel_time
=
0
.
0
;
kernel_time1
=
0
.
0
;
reduce_time
=
0
.
0
;
reduce_time1
=
0
.
0
;
compose_time
=
0
.
0
;
compose_time1
=
0
.
0
;
// MAIN LOOP OVER FILES
//
for
(
int
ifiles
=
0
;
ifiles
<
ndatasets
;
ifiles
++
)
{
strcpy
(
filename
,
datapath_multi
[
ifiles
]);
printf
(
"Processing %s, %d of %d
\n
"
,
filename
,
ifiles
+
1
,
ndatasets
);
// Read metadata
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
metafile
);
pFile
=
fopen
(
filename
,
"r"
);
fscanf
(
pFile
,
"%ld"
,
&
Nmeasures0
);
fscanf
(
pFile
,
"%ld"
,
&
Nvis0
);
fscanf
(
pFile
,
"%ld"
,
&
freq_per_chan0
);
fscanf
(
pFile
,
"%ld"
,
&
polarisations0
);
fscanf
(
pFile
,
"%ld"
,
&
Ntimes0
);
fscanf
(
pFile
,
"%lf"
,
&
dt0
);
fscanf
(
pFile
,
"%lf"
,
&
thours0
);
fscanf
(
pFile
,
"%ld"
,
&
baselines0
);
fscanf
(
pFile
,
"%lf"
,
&
uvmin
);
fscanf
(
pFile
,
"%lf"
,
&
uvmax
);
fscanf
(
pFile
,
"%lf"
,
&
wmin0
);
fscanf
(
pFile
,
"%lf"
,
&
wmax0
);
fclose
(
pFile
);
// calculate the resolution in radians
resolution
=
1
.
0
/
MAX
(
abs
(
uvmin
),
abs
(
uvmax
));
// calculate the resolution in arcsec
double
resolution_asec
=
(
3600
.
0
*
180
.
0
)
/
MAX
(
abs
(
uvmin
),
abs
(
uvmax
))
/
PI
;
printf
(
"RESOLUTION = %f rad, %f arcsec
\n
"
,
resolution
,
resolution_asec
);
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
weightsfile
);
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
polarisations
*
sizeof
(
float
),
SEEK_SET
);
fread
(
weights
,(
Nweights
)
*
sizeof
(
float
),
1
,
pFile
);
fclose
(
pFile
);
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
visrealfile
);
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
freq_per_chan
*
polarisations
*
sizeof
(
float
),
SEEK_SET
);
fread
(
visreal
,
Nvis
*
sizeof
(
float
),
1
,
pFile
);
fclose
(
pFile
);
strcpy
(
filename
,
datapath
);
strcat
(
filename
,
visimgfile
);
#ifdef VERBOSE
printf
(
"Reading %s
\n
"
,
filename
);
#endif
pFile
=
fopen
(
filename
,
"rb"
);
fseek
(
pFile
,
startrow
*
freq_per_chan
*
polarisations
*
sizeof
(
float
),
SEEK_SET
);
fread
(
visimg
,
Nvis
*
sizeof
(
float
),
1
,
pFile
);
fclose
(
pFile
);
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
// Declare temporary arrays for the masking
double
*
uus
;
double
*
vvs
;
double
*
wws
;
float
*
visreals
;
float
*
visimgs
;
float
*
weightss
;
long
isector
;
for
(
long
isector_count
=
0
;
isector_count
<
nsectors
;
isector_count
++
)
{
clock_gettime
(
CLOCK_MONOTONIC
,
&
begink
);
startk
=
clock
();
// define local destination sector
//isector = (isector_count+rank)%size;
isector
=
isector_count
;
// allocate sector arrays
long
Nsec
=
histo_send
[
isector
];
uus
=
(
double
*
)
malloc
(
Nsec
*
sizeof
(
double
));
vvs
=
(
double
*
)
malloc
(
Nsec
*
sizeof
(
double
));
wws
=
(
double
*
)
malloc
(
Nsec
*
sizeof
(
double
));
long
Nweightss
=
Nsec
*
polarisations
;
long
Nvissec
=
Nweightss
*
freq_per_chan
;
weightss
=
(
float
*
)
malloc
(
Nweightss
*
sizeof
(
float
));
visreals
=
(
float
*
)
malloc
(
Nvissec
*
sizeof
(
float
));
visimgs
=
(
float
*
)
malloc
(
Nvissec
*
sizeof
(
float
));
// select data for this sector
long
icount
=
0
;
long
ip
=
0
;
long
inu
=
0
;
//CLAAAA
struct
sectorlist
*
current
;
current
=
sectorhead
[
isector
];
while
(
current
->
index
!=
-
1
)
{
long
ilocal
=
current
->
index
;
//double vvh = vv[ilocal];
//int binphi = (int)(vvh*nsectors);
//if (binphi == isector || boundary[ilocal] == isector) {
uus
[
icount
]
=
uu
[
ilocal
];
vvs
[
icount
]
=
vv
[
ilocal
]
-
isector
*
shift
;
wws
[
icount
]
=
ww
[
ilocal
];
for
(
long
ipol
=
0
;
ipol
<
polarisations
;
ipol
++
)
{
weightss
[
ip
]
=
weights
[
ilocal
*
polarisations
+
ipol
];
ip
++
;
}
for
(
long
ifreq
=
0
;
ifreq
<
polarisations
*
freq_per_chan
;
ifreq
++
)
{
visreals
[
inu
]
=
visreal
[
ilocal
*
polarisations
*
freq_per_chan
+
ifreq
];
visimgs
[
inu
]
=
visimg
[
ilocal
*
polarisations
*
freq_per_chan
+
ifreq
];
//if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis);
inu
++
;
}
icount
++
;
//}
current
=
current
->
next
;
}
clock_gettime
(
CLOCK_MONOTONIC
,
&
finishk
);
endk
=
clock
();
compose_time
+=
((
double
)
(
endk
-
startk
))
/
CLOCKS_PER_SEC
;
compose_time1
+=
(
finishk
.
tv_sec
-
begink
.
tv_sec
);
compose_time1
+=
(
finishk
.
tv_nsec
-
begink
.
tv_nsec
)
/
1000000000
.
0
;
#ifndef USE_MPI
double
uumin
=
1e20
;
double
vvmin
=
1e20
;
double
uumax
=
-
1e20
;
double
vvmax
=
-
1e20
;
for
(
long
ipart
=
0
;
ipart
<
Nsec
;
ipart
++
)
{
uumin
=
MIN
(
uumin
,
uus
[
ipart
]);
uumax
=
MAX
(
uumax
,
uus
[
ipart
]);
vvmin
=
MIN
(
vvmin
,
vvs
[
ipart
]);
vvmax
=
MAX
(
vvmax
,
vvs
[
ipart
]);
if
(
ipart
%
10
==
0
)
fprintf
(
pFile
,
"%ld %f %f %f
\n
"
,
isector
,
uus
[
ipart
],
vvs
[
ipart
]
+
isector
*
shift
,
wws
[
ipart
]);
}
printf
(
"UU, VV, min, max = %f %f %f %f
\n
"
,
uumin
,
uumax
,
vvmin
,
vvmax
);
#endif
// Make convolution on the grid
#ifdef VERBOSE
printf
(
"Processing sector %ld
\n
"
,
isector
);
#endif
clock_gettime
(
CLOCK_MONOTONIC
,
&
begink
);
startk
=
clock
();
wstack
(
num_w_planes
,
Nsec
,
freq_per_chan
,
polarisations
,
uus
,
vvs
,
wws
,
visreals
,
visimgs
,
weightss
,
dx
,
dw
,
w_support
,
xaxis
,
yaxis
,
gridss
,
num_threads
);
/* int z =0 ;
#pragma omp target map(to:test_i_gpu) map(from:z)
{
int x; // only accessible from accelerator
x = 2;
z = x + test_i_gpu;
}*/
clock_gettime
(
CLOCK_MONOTONIC
,
&
finishk
);
endk
=
clock
();
kernel_time
+=
((
double
)
(
endk
-
startk
))
/
CLOCKS_PER_SEC
;
kernel_time1
+=
(
finishk
.
tv_sec
-
begink
.
tv_sec
);
kernel_time1
+=
(
finishk
.
tv_nsec
-
begink
.
tv_nsec
)
/
1000000000
.
0
;
#ifdef VERBOSE
printf
(
"Processed sector %ld
\n
"
,
isector
);
#endif
clock_gettime
(
CLOCK_MONOTONIC
,
&
begink
);
startk
=
clock
();
//for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]);
#ifndef USE_MPI
long
stride
=
isector
*
2
*
xaxis
*
yaxis
*
num_w_planes
;
for
(
long
iii
=
0
;
iii
<
2
*
xaxis
*
yaxis
*
num_w_planes
;
iii
++
)
gridtot
[
stride
+
iii
]
=
gridss
[
iii
];
#endif
// Write grid in the corresponding remote slab
#ifdef USE_MPI
int
target_rank
=
(
int
)
isector
;
//int target_rank = (int)(size-isector-1);
#ifdef ONE_SIDE
printf
(
"One Side communication active
\n
"
);
MPI_Win_lock
(
MPI_LOCK_SHARED
,
target_rank
,
0
,
slabwin
);
MPI_Accumulate
(
gridss
,
size_of_grid
,
MPI_DOUBLE
,
target_rank
,
0
,
size_of_grid
,
MPI_DOUBLE
,
MPI_SUM
,
slabwin
);
MPI_Win_unlock
(
target_rank
,
slabwin
);
//MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin);
#else
MPI_Reduce
(
gridss
,
grid
,
size_of_grid
,
MPI_DOUBLE
,
MPI_SUM
,
target_rank
,
MPI_COMM_WORLD
);
#endif //ONE_SIDE
#endif //USE_MPI
clock_gettime
(
CLOCK_MONOTONIC
,
&
finishk
);
endk
=
clock
();
reduce_time
+=
((
double
)
(
endk
-
startk
))
/
CLOCKS_PER_SEC
;
reduce_time1
+=
(
finishk
.
tv_sec
-
begink
.
tv_sec
);
reduce_time1
+=
(
finishk
.
tv_nsec
-
begink
.
tv_nsec
)
/
1000000000
.
0
;
// Go to next sector
for
(
long
inull
=
0
;
inull
<
2
*
num_w_planes
*
xaxis
*
yaxis
;
inull
++
)
gridss
[
inull
]
=
0
.
0
;
// Deallocate all sector arrays
free
(
uus
);
free
(
vvs
);
free
(
wws
);
free
(
weightss
);
free
(
visreals
);
free
(
visimgs
);
// End of loop over sectors
}
// End of loop over input files
}
// Finalize MPI communication
#ifdef USE_MPI
MPI_Win_fence
(
0
,
slabwin
);
#endif
// Swap left and right parts APPARENTLY NOT NECESSARY
/*
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=0; iswap<xaxis; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
gridss[index_origin] = grid[index_origin];
gridss[index_origin+1] = grid[index_origin+1];
}
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=0; iswap<xaxis/2; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
long index_destination = 2*(iswap+xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
grid[index_destination] = gridss[index_origin];
grid[index_destination+1] = gridss[index_origin+1];
}
for (long kswap=0; kswap<num_w_planes; kswap++)
for (long jswap=0; jswap<yaxis; jswap++)
for (long iswap=xaxis/2; iswap<xaxis; iswap++)
{
long index_origin = 2*(iswap + jswap*xaxis + kswap*yaxis*xaxis);
long index_destination = 2*(iswap-xaxis/2 + jswap*xaxis + kswap*yaxis*xaxis);
grid[index_destination] = gridss[index_origin];
grid[index_destination+1] = gridss[index_origin+1];
}
*/
#ifndef USE_MPI
fclose
(
pFile1
);
#endif
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
end
=
clock
();
clock_gettime
(
CLOCK_MONOTONIC
,
&
finish
);
process_time
=
((
double
)
(
end
-
start
))
/
CLOCKS_PER_SEC
;
process_time1
=
(
finish
.
tv_sec
-
begin
.
tv_sec
);
process_time1
+=
(
finish
.
tv_nsec
-
begin
.
tv_nsec
)
/
1000000000
.
0
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
#ifdef WRITE_DATA
// Write results
if
(
rank
==
0
)
{
printf
(
"WRITING GRIDDED DATA
\n
"
);
pFilereal
=
fopen
(
outfile2
,
"wb"
);
pFileimg
=
fopen
(
outfile3
,
"wb"
);
#ifdef USE_MPI
for
(
int
isector
=
0
;
isector
<
nsectors
;
isector
++
)
{
MPI_Win_lock
(
MPI_LOCK_SHARED
,
isector
,
0
,
slabwin
);
MPI_Get
(
gridss
,
size_of_grid
,
MPI_DOUBLE
,
isector
,
0
,
size_of_grid
,
MPI_DOUBLE
,
slabwin
);
MPI_Win_unlock
(
isector
,
slabwin
);
for
(
long
i
=
0
;
i
<
size_of_grid
/
2
;
i
++
)
{
gridss_real
[
i
]
=
gridss
[
2
*
i
];
gridss_img
[
i
]
=
gridss
[
2
*
i
+
1
];
}
if
(
num_w_planes
>
1
)
{
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
long
global_index
=
(
iu
+
(
iv
+
isector
*
yaxis
)
*
xaxis
+
iw
*
grid_size_x
*
grid_size_y
)
*
sizeof
(
double
);
long
index
=
iu
+
iv
*
xaxis
+
iw
*
xaxis
*
yaxis
;
fseek
(
pFilereal
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_real
[
index
],
1
,
sizeof
(
double
),
pFilereal
);
}
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
long
global_index
=
(
iu
+
(
iv
+
isector
*
yaxis
)
*
xaxis
+
iw
*
grid_size_x
*
grid_size_y
)
*
sizeof
(
double
);
long
index
=
iu
+
iv
*
xaxis
+
iw
*
xaxis
*
yaxis
;
fseek
(
pFileimg
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_img
[
index
],
1
,
sizeof
(
double
),
pFileimg
);
//double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]);
//fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm);
}
}
else
{
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
{
long
global_index
=
(
xaxis
*
isector
*
yaxis
+
iw
*
grid_size_x
*
grid_size_y
)
*
sizeof
(
double
);
long
index
=
iw
*
xaxis
*
yaxis
;
fseek
(
pFilereal
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_real
[
index
],
xaxis
*
yaxis
,
sizeof
(
double
),
pFilereal
);
fseek
(
pFileimg
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_img
[
index
],
xaxis
*
yaxis
,
sizeof
(
double
),
pFileimg
);
}
}
}
#else
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
for
(
int
iv
=
0
;
iv
<
grid_size_y
;
iv
++
)
for
(
int
iu
=
0
;
iu
<
grid_size_x
;
iu
++
)
{
long
index
=
2
*
(
iu
+
iv
*
grid_size_x
+
iw
*
grid_size_x
*
grid_size_y
);
fwrite
(
&
gridtot
[
index
],
1
,
sizeof
(
double
),
pFilereal
);
fwrite
(
&
gridtot
[
index
+
1
],
1
,
sizeof
(
double
),
pFileimg
);
//double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
//fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
#endif
fclose
(
pFilereal
);
fclose
(
pFileimg
);
}
#ifdef USE_MPI
MPI_Win_fence
(
0
,
slabwin
);
#endif
#endif //WRITE_DATA
#ifdef USE_FFTW
// FFT transform the data (using distributed FFTW)
if
(
rank
==
0
)
printf
(
"PERFORMING FFT
\n
"
);
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
start
=
clock
();
fftw_plan
plan
;
fftw_complex
*
fftwgrid
;
ptrdiff_t
alloc_local
,
local_n0
,
local_0_start
;
double
norm
=
1
.
0
/
(
double
)(
grid_size_x
*
grid_size_y
);
// map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
// x is the direction of contiguous data and maps to the second parameter
// y is the parallelized direction and corresponds to the first parameter (--> n0)
// and perform the FFT per w plane
alloc_local
=
fftw_mpi_local_size_2d
(
grid_size_y
,
grid_size_x
,
MPI_COMM_WORLD
,
&
local_n0
,
&
local_0_start
);
fftwgrid
=
fftw_alloc_complex
(
alloc_local
);
plan
=
fftw_mpi_plan_dft_2d
(
grid_size_y
,
grid_size_x
,
fftwgrid
,
fftwgrid
,
MPI_COMM_WORLD
,
FFTW_BACKWARD
,
FFTW_ESTIMATE
);
long
fftwindex
=
0
;
long
fftwindex2D
=
0
;
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
{
//printf("FFTing plan %d\n",iw);
// select the w-plane to transform
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
{
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
fftwindex2D
=
iu
+
iv
*
xaxis
;
fftwindex
=
2
*
(
fftwindex2D
+
iw
*
xaxis
*
yaxis
);
fftwgrid
[
fftwindex2D
][
0
]
=
grid
[
fftwindex
];
fftwgrid
[
fftwindex2D
][
1
]
=
grid
[
fftwindex
+
1
];
}
}
// do the transform for each w-plane
fftw_execute
(
plan
);
// save the transformed w-plane
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
{
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
fftwindex2D
=
iu
+
iv
*
xaxis
;
fftwindex
=
2
*
(
fftwindex2D
+
iw
*
xaxis
*
yaxis
);
gridss
[
fftwindex
]
=
norm
*
fftwgrid
[
fftwindex2D
][
0
];
gridss
[
fftwindex
+
1
]
=
norm
*
fftwgrid
[
fftwindex2D
][
1
];
}
}
}
fftw_destroy_plan
(
plan
);
#ifdef USE_MPI
MPI_Win_fence
(
0
,
slabwin
);
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
end
=
clock
();
clock_gettime
(
CLOCK_MONOTONIC
,
&
finish
);
fftw_time
=
((
double
)
(
end
-
start
))
/
CLOCKS_PER_SEC
;
fftw_time1
=
(
finish
.
tv_sec
-
begin
.
tv_sec
);
fftw_time1
+=
(
finish
.
tv_nsec
-
begin
.
tv_nsec
)
/
1000000000
.
0
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
#ifdef WRITE_DATA
// Write results
#ifdef USE_MPI
MPI_Win
writewin
;
MPI_Win_create
(
gridss
,
size_of_grid
*
sizeof
(
double
),
sizeof
(
double
),
MPI_INFO_NULL
,
MPI_COMM_WORLD
,
&
writewin
);
MPI_Win_fence
(
0
,
writewin
);
#endif
if
(
rank
==
0
)
{
printf
(
"WRITING FFT TRANSFORMED DATA
\n
"
);
pFilereal
=
fopen
(
fftfile2
,
"wb"
);
pFileimg
=
fopen
(
fftfile3
,
"wb"
);
#ifdef USE_MPI
for
(
int
isector
=
0
;
isector
<
nsectors
;
isector
++
)
{
MPI_Win_lock
(
MPI_LOCK_SHARED
,
isector
,
0
,
writewin
);
MPI_Get
(
gridss_w
,
size_of_grid
,
MPI_DOUBLE
,
isector
,
0
,
size_of_grid
,
MPI_DOUBLE
,
writewin
);
MPI_Win_unlock
(
isector
,
writewin
);
for
(
long
i
=
0
;
i
<
size_of_grid
/
2
;
i
++
)
{
gridss_real
[
i
]
=
gridss_w
[
2
*
i
];
gridss_img
[
i
]
=
gridss_w
[
2
*
i
+
1
];
}
if
(
num_w_planes
>
1
)
{
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
long
global_index
=
(
iu
+
(
iv
+
isector
*
yaxis
)
*
xaxis
+
iw
*
grid_size_x
*
grid_size_y
)
*
sizeof
(
double
);
long
index
=
iu
+
iv
*
xaxis
+
iw
*
xaxis
*
yaxis
;
fseek
(
pFilereal
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_real
[
index
],
1
,
sizeof
(
double
),
pFilereal
);
}
for
(
int
iw
=
0
;
iw
<
num_w_planes
;
iw
++
)
for
(
int
iv
=
0
;
iv
<
yaxis
;
iv
++
)
for
(
int
iu
=
0
;
iu
<
xaxis
;
iu
++
)
{
long
global_index
=
(
iu
+
(
iv
+
isector
*
yaxis
)
*
xaxis
+
iw
*
grid_size_x
*
grid_size_y
)
*
sizeof
(
double
);
long
index
=
iu
+
iv
*
xaxis
+
iw
*
xaxis
*
yaxis
;
fseek
(
pFileimg
,
global_index
,
SEEK_SET
);
fwrite
(
&
gridss_img
[
index
],
1
,
sizeof
(
double
),
pFileimg
);
}
}
else
{
fwrite
(
gridss_real
,
size_of_grid
/
2
,
sizeof
(
double
),
pFilereal
);
fwrite
(
gridss_img
,
size_of_grid
/
2
,
sizeof
(
double
),
pFileimg
);
}
}
#else
/*
for (int iw=0; iw<num_w_planes; iw++)
for (int iv=0; iv<grid_size_y; iv++)
for (int iu=0; iu<grid_size_x; iu++)
{
int isector = 0;
long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
}
*/
#endif
fclose
(
pFilereal
);
fclose
(
pFileimg
);
}
#ifdef USE_MPI
//MPI_Win_fence(0,writewin);
MPI_Win_fence
(
0
,
writewin
);
MPI_Win_free
(
&
writewin
);
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
#endif //WRITE_DATA
fftw_free
(
fftwgrid
);
// Phase correction
clock_gettime
(
CLOCK_MONOTONIC
,
&
begin
);
start
=
clock
();
if
(
rank
==
0
)
printf
(
"PHASE CORRECTION
\n
"
);
double
*
image_real
=
(
double
*
)
calloc
(
xaxis
*
yaxis
,
sizeof
(
double
));
double
*
image_imag
=
(
double
*
)
calloc
(
xaxis
*
yaxis
,
sizeof
(
double
));
phase_correction
(
gridss
,
image_real
,
image_imag
,
xaxis
,
yaxis
,
num_w_planes
,
grid_size_x
,
grid_size_y
,
resolution
,
wmin
,
wmax
,
num_threads
);
end
=
clock
();
clock_gettime
(
CLOCK_MONOTONIC
,
&
finish
);
phase_time
=
((
double
)
(
end
-
start
))
/
CLOCKS_PER_SEC
;
phase_time1
=
(
finish
.
tv_sec
-
begin
.
tv_sec
);
phase_time1
+=
(
finish
.
tv_nsec
-
begin
.
tv_nsec
)
/
1000000000
.
0
;
#ifdef WRITE_IMAGE
if
(
rank
==
0
)
{
#ifdef FITSIO
printf
(
"REMOVING RESIDUAL FITS FILE
\n
"
);
remove
(
testfitsreal
);
remove
(
testfitsimag
);
printf
(
"FITS CREATING
\n
"
);
status
=
0
;
fits_create_file
(
&
fptrimg
,
testfitsimag
,
&
status
);
fits_create_img
(
fptrimg
,
DOUBLE_IMG
,
naxis
,
naxes
,
&
status
);
// fits_write_img(fptrimg, TDOUBLE, fpixel, nelements, image_imag, &status);
fits_close_file
(
fptrimg
,
&
status
);
status
=
0
;
fits_create_file
(
&
fptreal
,
testfitsreal
,
&
status
);
fits_create_img
(
fptreal
,
DOUBLE_IMG
,
naxis
,
naxes
,
&
status
);
// fits_write_img(fptreal, TDOUBLE, fpixel, nelements, image_real, &status);
fits_close_file
(
fptreal
,
&
status
);
#endif
pFilereal
=
fopen
(
fftfile2
,
"wb"
);
pFileimg
=
fopen
(
fftfile3
,
"wb"
);
fclose
(
pFilereal
);
fclose
(
pFileimg
);
}
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
if
(
rank
==
0
)
printf
(
"WRITING IMAGE
\n
"
);
for
(
int
isector
=
0
;
isector
<
size
;
isector
++
)
{
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
if
(
isector
==
rank
)
//if(rank == 0)
{
#ifdef FITSIO
printf
(
"%d writing
\n
"
,
isector
);
long
*
fpixel
=
(
long
*
)
malloc
(
sizeof
(
long
)
*
naxis
);
long
*
lpixel
=
(
long
*
)
malloc
(
sizeof
(
long
)
*
naxis
);
fpixel
[
0
]
=
1
;
fpixel
[
1
]
=
isector
*
yaxis
+
1
;
lpixel
[
0
]
=
xaxis
;
lpixel
[
1
]
=
(
isector
+
1
)
*
yaxis
;
//printf("fpixel %d, %d\n", fpixel[0], fpixel[1]);
//printf("lpixel %d, %d\n", lpixel[0], lpixel[1]);
status
=
0
;
fits_open_image
(
&
fptreal
,
testfitsreal
,
READWRITE
,
&
status
);
fits_write_subset
(
fptreal
,
TDOUBLE
,
fpixel
,
lpixel
,
image_real
,
&
status
);
fits_close_file
(
fptreal
,
&
status
);
status
=
0
;
fits_open_image
(
&
fptrimg
,
testfitsimag
,
READWRITE
,
&
status
);
fits_write_subset
(
fptrimg
,
TDOUBLE
,
fpixel
,
lpixel
,
image_imag
,
&
status
);
fits_close_file
(
fptrimg
,
&
status
);
#endif
pFilereal
=
fopen
(
fftfile2
,
"ab"
);
pFileimg
=
fopen
(
fftfile3
,
"ab"
);
long
global_index
=
isector
*
(
xaxis
*
yaxis
)
*
sizeof
(
double
);
fseek
(
pFilereal
,
global_index
,
SEEK_SET
);
fwrite
(
image_real
,
xaxis
*
yaxis
,
sizeof
(
double
),
pFilereal
);
fseek
(
pFileimg
,
global_index
,
SEEK_SET
);
fwrite
(
image_imag
,
xaxis
*
yaxis
,
sizeof
(
double
),
pFileimg
);
fclose
(
pFilereal
);
fclose
(
pFileimg
);
}
}
#ifdef USE_MPI
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
#endif //WRITE_IMAGE
#endif //USE_FFTW
end
=
clock
();
clock_gettime
(
CLOCK_MONOTONIC
,
&
finish
);
tot_time
=
((
double
)
(
end
-
start0
))
/
CLOCKS_PER_SEC
;
tot_time1
=
(
finish
.
tv_sec
-
begin0
.
tv_sec
);
tot_time1
+=
(
finish
.
tv_nsec
-
begin0
.
tv_nsec
)
/
1000000000
.
0
;
if
(
rank
==
0
)
{
printf
(
"Setup time: %f sec
\n
"
,
setup_time
);
printf
(
"Process time: %f sec
\n
"
,
process_time
);
printf
(
"Kernel time = %f, Array Composition time %f, Reduce time: %f sec
\n
"
,
kernel_time
,
compose_time
,
reduce_time
);
#ifdef USE_FFTW
printf
(
"FFTW time: %f sec
\n
"
,
fftw_time
);
printf
(
"Phase time: %f sec
\n
"
,
phase_time
);
#endif
printf
(
"TOT time: %f sec
\n
"
,
tot_time
);
if
(
num_threads
>
1
)
{
printf
(
"PSetup time: %f sec
\n
"
,
setup_time1
);
printf
(
"PProcess time: %f sec
\n
"
,
process_time1
);
printf
(
"PKernel time = %f, PArray Composition time %f, PReduce time: %f sec
\n
"
,
kernel_time1
,
compose_time1
,
reduce_time1
);
#ifdef USE_FFTW
printf
(
"PFFTW time: %f sec
\n
"
,
fftw_time1
);
printf
(
"PPhase time: %f sec
\n
"
,
phase_time1
);
#endif
printf
(
"PTOT time: %f sec
\n
"
,
tot_time1
);
}
}
if
(
rank
==
0
)
{
pFile
=
fopen
(
timingfile
,
"w"
);
if
(
num_threads
==
1
)
{
fprintf
(
pFile
,
"%f %f %f %f %f %f %f
\n
"
,
setup_time
,
kernel_time
,
compose_time
,
reduce_time
,
fftw_time
,
phase_time
,
tot_time
);
}
else
{
fprintf
(
pFile
,
"%f %f %f %f %f %f %f
\n
"
,
setup_time1
,
kernel_time1
,
compose_time1
,
reduce_time1
,
fftw_time1
,
phase_time1
,
tot_time1
);
}
fclose
(
pFile
);
}
// Close MPI environment
#ifdef USE_MPI
MPI_Win_fence
(
0
,
slabwin
);
MPI_Win_free
(
&
slabwin
);
MPI_Finalize
();
#endif
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment