# comment/uncomment the various options depending hoe you want to build the program
# create MPI code
OPT += -DUSE_MPI
# use FFTW (it can be switched on ONLY if MPI is active)
OPT += -DUSE_FFTW
# perform one-side communication (suggested) instead of reduce (only if MPI is active)
OPT += -DONE_SIDE
# write the full 3D cube of gridded visibilities and its FFT transform
#OPT += -DWRITE_DATA
# write the final image
OPT += -DWRITE_IMAGE
# perform w-stacking phase correction
#OPT += -DPHASE_ON

CC = gcc
CXX = g++
ifeq (USE_MPI,$(findstring USE_MPI,$(OPT)))
  CC = mpicc
  CXX = mpiCC 
endif

OMP = -fopenmp 
#OMP = 

CFLAGS += -O3 -mcpu=native
CFLAGS += -I.
LIBS = -L$(FFTW_LIB) -lfftw3_mpi -lfftw3 -lm

NVCC = nvcc
NVFLAGS = -arch=sm_70 -Xcompiler -mno-float128 -std=c++11
NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda

DEPS = w-stacking.h w-stacking-fftw.c w-stacking.cu phase_correction.cu
COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o

w-stacking.c: w-stacking.cu
	cp w-stacking.cu w-stacking.c

phase_correction.c: phase_correction.cu
	cp phase_correction.cu phase_correction.c

%.o: %.c $(DEPS)
	$(CC) $(OMP) -c -o $@ $< $(CFLAGS) $(OPT)

serial: $(COBJ)
	$(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm

serial_cuda:
	$(NVCC) $(OPT) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm

mpi: $(COBJ) 
	$(CC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)

mpi_cuda:
	$(NVCC) $(NVFLAGS) $(OPT) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm

clean:
	rm *.o
	rm w-stacking.c
	rm phase_correction.c

