Skip to content
Snippets Groups Projects
Commit e0a23993 authored by David Goz's avatar David Goz :sleeping:
Browse files

jacobi/gpu added

parent 61df9364
Branches
No related tags found
No related merge requests found
#######################################################################
# Author: David Goz (david.goz@inaf.it) #
# June 2024 #
#######################################################################
#
# To see all the compilation options
# $ make info
#######################################################################
# make.def defines how the application is compiled
include make.def
#######################################################################
.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean
info:
@echo ' '
@echo '-----------------------------------------------------------------------------------------------'
@echo '$$ make serial ---> compile the serial application '
@echo '$$ make debug ---> debug the serial application '
@echo '$$ make valgrind_memcheck ---> run the serial application using Valgrind under Memcheck '
@echo '$$ make valgrind_callgrind ---> run the serial application using Valgrind under Callgrind '
@echo '$$ make valgrind_cachegrind ---> run the serial application using Valgrind under Cachegrind '
@echo '$$ make clean ---> clean up all '
@echo '$$ make info ---> get make info '
@echo '-----------------------------------------------------------------------------------------------'
@echo ' '
serial: $(PROG)
debug: $(PROG_DEBUG)
@echo 'OOOooo... debugging ...oooOOO'
gdb --args ./$<
@echo 'OOOooo... debugging ... oooOOO'
valgrind_memcheck: $(PROG_MEMCHECK)
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10
@echo 'oooOOO... valgrind_memcheck ...OOOooo'
valgrind_callgrind: $(PROG_CALLGRIND)
@echo 'oooOOO... valgrind_callgrind ...OOOooo'
valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
@echo ' '
@echo 'To generate a function-by-function summary from the profile data file:'
@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
valgrind_cachegrind: $(PROG_CACHEGRIND)
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
valgrind --tool=cachegrind --cache-sim=yes --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
@echo '(kcachegrind is required in order to visualize the output using the GUI)'
@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
clean:
rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
rm -f valgrind_*.txt
rm -f cachegrind.out.*
rm -f callgrind.*
rm -f *bin
rm -f jacobi_serial_opt_*
#pragma once
#define NGHOST 1
#define NDIM 2 /* 2D */
#define X 0
#define Y 1
#define TOL 1.e-5
typedef double MyData;
#pragma once
#include "allvars.h"
#include <assert.h>
#include <sys/time.h>
#include <time.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
/* function prototypes */
MyData **Allocate_2DdblArray(const int nx, const int ny);
void Show_2DdblArray(const MyData **const A,
const int nx,
const int ny,
const char *const string);
double seconds(void);
CC = gcc
CFLAGS = -Wall -Wextra -march=native
LIBS = -lm
SYSTYPE = $(strip $(shell uname -n))
PROG = jacobi_serial_opt_$(SYSTYPE)
PROG_DEBUG = $(PROG)_DEBUG
PROG_MEMCHECK = $(PROG)_MEMCHECK
PROG_CALLGRIND = $(PROG)_CALLGRIND
PROG_CACHEGRIND = $(PROG)_CACHEGRIND
HEADERS = $(wildcard ./include/*.h)
SOURCES = $(wildcard ./src/*.c)
DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile
$(PROG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_DEBUG): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_MEMCHECK): $(DEPENDENCIES)
$(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CALLGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
$(PROG_CACHEGRIND): $(DEPENDENCIES)
$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
@echo ' '
@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
@echo ' '
/* ///////////////////////////////////////////////////////////////////// */
/* Authors: A. Mignone (mignone@to.infn.it) */
/* V. Cesare (valentina.cesare@inaf.it) */
/* D. Goz (david.goz@inaf.it) */
/* */
/* Date : June 2024 */
/* */
/* ///////////////////////////////////////////////////////////////////// */
#include "allvars.h"
#include "tools.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* function prototypes */
void BoundaryConditions(MyData **const restrict,
MyData *const restrict,
MyData *const restrict,
const int,
const int);
void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
const MyData *const restrict, const int, const int, const int, const int);
void WriteSolution(MyData **const phi, const int nx, const int ny);
int main(int argc, char **argv)
{
if (argc <= 2)
{
printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> \n\n");
exit(EXIT_FAILURE);
}
/* global X and Y grid size */
const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
const int NY_GLOB = (int) strtol(argv[2], NULL, 10);
const MyData xbeg = 0.0;
const MyData xend = 1.0;
const MyData ybeg = 0.0;
const MyData yend = 1.0;
const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
(yend - ybeg)/(NY_GLOB + 1)};
/* --------------------------------------------------------
1. Set grid indices
-------------------------------------------------------- */
const int ibeg = NGHOST;
const int iend = ibeg + NX_GLOB - 1;
const int nx = iend - ibeg + 1;
const int nx_tot = nx + 2 * NGHOST;
const int jbeg = NGHOST;
const int jend = jbeg + NY_GLOB - 1;
const int ny = jend - jbeg + 1;
const int ny_tot = ny + 2 * NGHOST;
printf("\n\t Grid indices:");
printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d" ,ibeg, iend, nx_tot);
printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
/* --------------------------------------------------------
2. Generate grid, allocate memory
-------------------------------------------------------- */
/* memory allocation */
MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
assert((xg != NULL) && (yg != NULL));
/* initial conditions */
for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
MyData *x = xg; /* Global and local grids are the same */
MyData *y = yg; /* for serial version of the code */
/* grids memory allocation */
MyData **phi = Allocate_2DdblArray(ny_tot, nx_tot);
MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
/* --------------------------------------------------------
3. Initialize solution array to 0
-------------------------------------------------------- */
for (int j=jbeg ; j<=jend ; j++)
for (int i=ibeg ; i<=iend ; i++)
phi0[j][i] = 0.0;
/* --------------------------------------------------------
4. Main iteration cycle
-------------------------------------------------------- */
const double time_start = seconds();
/* -- 4a. Set boundary conditions first -- */
BoundaryConditions(phi0, x, y, nx, ny);
BoundaryConditions(phi, x, y, nx, ny);
MyData err = 1.0;
/* iterations */
int k = 0;
/* - allocate phi0 and phi on GPU */
/* - copy phi0 from host to gpu */
#pragma acc data copy(phi0[0:nx_tot][0:ny_tot]), create(phi[0:nx_tot][0:ny_tot])
while (1)
{
/* -- 4b. Jacobi's method and residual (interior points) -- */
/* core algorithm */
err = 0.0;
JacobiAlgorithm(phi, phi0, &err, delta,
ibeg, iend, jbeg, jend);
printf ("\n\t Iteration = %d - err = %lg\n",k, err);
/* increase the counter of loop iterations */
k++;
/* check convergence */
if (err <= TOL)
break;
/* swap the pointers */
MyData **tmp = phi;
phi = phi0;
phi0 = tmp;
}
WriteSolution(phi, nx, ny);
printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
printf("\n\t Time = %lf [s]\n\n", seconds() - time_start);
// free memory
if (phi0)
{
free(phi0[0]);
free(phi0);
}
if (phi)
{
free(phi[0]);
free(phi);
}
if (yg)
free(yg);
if (xg)
free(xg);
return 0;
}
/* ********************************************************************* */
void BoundaryConditions(MyData **const restrict phi,
MyData *const restrict x,
MyData *const restrict y,
const int nx,
const int ny)
/*
*********************************************************************** */
{
const int ibeg = NGHOST;
const int iend = ibeg + nx - 1;
const int jbeg = NGHOST;
const int jend = jbeg + ny - 1;
int i,j;
/* -- Left -- */
i = ibeg - 1;
for (int j=jbeg ; j<=jend ; j++)
phi[j][i] = (1.0 - y[j]);
/* -- Right -- */
i = jend + 1;
for (int j=jbeg ; j<=jend ; j++)
phi[j][i] = (y[j] * y[j]);
/* -- Bottom -- */
j = jbeg - 1;
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = (1.0 - x[i]);
/* -- Top -- */
j = jend + 1;
for (int i=ibeg ; i<=iend ; i++)
phi[j][i] = x[i];
return;
}
/* ********************************************************************* */
void JacobiAlgorithm(MyData **const restrict Phi,
MyData **const restrict Phi0,
MyData *const restrict error,
const MyData *const restrict delta,
const int ibeg,
const int iend,
const int jbeg,
const int jend)
{
double gpu_err=0.0;
#pragma acc parallel loop reduction(+: err)
for (int j=jbeg ; j<=jend ; j++)
{
for (int i=ibeg ; i<=iend ; i++)
{
Phi[j][i] = 0.25 * (Phi0[j ][i-1] +
Phi0[j ][i+1] +
Phi0[j-1][i ] +
Phi0[j+1][i ]);
/* avoid fabs from math library */
const MyData diff = (Phi[j][i] - Phi0[j][i]);
*error += ((diff > 0) ? diff : -diff);
} /* loop over columns */
} /* loop over rows */
gpu_err *= (delta[X] * delta[Y]);
*error = gpu_err;
return;
}
/* ********************************************************************* */
/* ********************************************************************* */
void WriteSolution (MyData **const phi,
const int nx,
const int ny)
/*
*********************************************************************** */
{
const int ibeg = NGHOST;
const int jbeg = NGHOST;
const int jend = jbeg + ny - 1;
static int nfile = 0; /* File counter */
char fname[32];
sprintf(fname,"jacobi2D_serial_opt_%02d.bin", nfile);
FILE *fp;
printf ("> Writing %s\n",fname);
fp = fopen(fname, "wb");
for (int j=jbeg ; j<=jend ; j++)
{
fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
}
nfile++;
fclose(fp);
}
/* ///////////////////////////////////////////////////////////////////// */
/* Authors: A. Mignone (mignone@to.infn.it) */
/* V. Cesare (valentina.cesare@inaf.it) */
/* D. Goz (david.goz@inaf.it) */
/* */
/* Date : June 2024 */
/* */
/* ///////////////////////////////////////////////////////////////////// */
#include "tools.h"
/* ********************************************************************* */
MyData **Allocate_2DdblArray(const int nx, const int ny)
/*
* Allocate memory for a double precision array with
* nx rows and ny columns
*********************************************************************** */
{
MyData **buf = malloc(nx * sizeof(MyData *));
assert(buf != NULL);
buf[0] = (MyData *) malloc(nx * ny * sizeof(MyData));
assert(buf[0] != NULL);
for (int j=1 ; j<nx ; j++)
buf[j] = buf[j-1] + ny;
return buf;
}
/* ********************************************************************* */
void Show_2DdblArray(const MyData **const A,
const int nx,
const int ny,
const char *const string)
/* *********************************************************************** */
{
printf ("%s\n",string);
printf ("------------------------------\n");
for (int i=0 ; i<nx ; i++)
{
for (int j=0 ; j<ny ; j++)
{
printf ("%8.2f ", A[i][j]);
}
printf ("\n");
}
printf ("------------------------------\n");
}
/* ********************************************************************* */
double seconds()
{
struct timespec ts;
return (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ),
(double)ts.tv_sec +
(double)ts.tv_nsec * 1e-9);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment