diff --git a/containers/docker/Dockerfile b/containers/docker/Dockerfile index 3abcd3d1eba8df6083911cc61821cecf99eddb80..cd921b412d7f5bf796f14fa952a54fa5990249b5 100644 --- a/containers/docker/Dockerfile +++ b/containers/docker/Dockerfile @@ -57,15 +57,15 @@ ADD doc /root/np-tmcode/doc ADD build /root/np-tmcode/build ADD test_data /root/np-tmcode/test_data #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_magma_mpi -RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --with-magma --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_magma_mpi && mv sphere/np_sphere sphere/np_sphere_magma_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_magma_mpi && mv trapping/np_trapping trapping/np_trapping_magma_mpi +RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --with-magma --enable-refinement --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_magma_mpi && mv sphere/np_sphere sphere/np_sphere_magma_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_magma_mpi && mv trapping/np_trapping trapping/np_trapping_magma_mpi #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_magma_serial -RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --with-magma --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_magma_serial && mv sphere/np_sphere sphere/np_sphere_magma_serial && mv inclusion/np_inclusion inclusion/np_inclusion_magma_serial && mv trapping/np_trapping trapping/np_trapping_magma_serial -RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --with-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_cublas_mpi && mv sphere/np_sphere sphere/np_sphere_cublas_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_cublas_mpi && mv trapping/np_trapping trapping/np_trapping_cublas_mpi -RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --with-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_cublas_serial && mv sphere/np_sphere sphere/np_sphere_cublas_serial && mv inclusion/np_inclusion inclusion/np_inclusion_cublas_serial && mv trapping/np_trapping trapping/np_trapping_cublas_serial +RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --with-magma --enable-refinement --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_magma_serial && mv sphere/np_sphere sphere/np_sphere_magma_serial && mv inclusion/np_inclusion inclusion/np_inclusion_magma_serial && mv trapping/np_trapping trapping/np_trapping_magma_serial +RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --enable-refinement --with-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_cublas_mpi && mv sphere/np_sphere sphere/np_sphere_cublas_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_cublas_mpi && mv trapping/np_trapping trapping/np_trapping_cublas_mpi +RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --enable-refinement --with-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_cublas_serial && mv sphere/np_sphere sphere/np_sphere_cublas_serial && mv inclusion/np_inclusion inclusion/np_inclusion_cublas_serial && mv trapping/np_trapping trapping/np_trapping_cublas_serial #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_lapack_mpi && cd ../build/cluster && ln -s np_cluster_lapack_mpi np_cluster -RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_lapack_mpi && mv sphere/np_sphere sphere/np_sphere_lapack_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_lapack_mpi && mv trapping/np_trapping trapping/np_trapping_lapack_mpi && cd cluster && ln -s np_cluster_lapack_mpi np_cluster && cd ../sphere && ln -s np_sphere_lapack_mpi np_sphere && cd ../inclusion && ln -s np_inclusion_lapack_mpi np_inclusion && cd ../trapping && ln -s np_trapping_lapack_mpi np_trapping +RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --enable-refinement --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_lapack_mpi && mv sphere/np_sphere sphere/np_sphere_lapack_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_lapack_mpi && mv trapping/np_trapping trapping/np_trapping_lapack_mpi && cd cluster && ln -s np_cluster_lapack_mpi np_cluster && cd ../sphere && ln -s np_sphere_lapack_mpi np_sphere && cd ../inclusion && ln -s np_inclusion_lapack_mpi np_inclusion && cd ../trapping && ln -s np_trapping_lapack_mpi np_trapping #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_lapack_serial -RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_lapack_serial && mv sphere/np_sphere sphere/np_sphere_lapack_serial && mv inclusion/np_inclusion inclusion/np_inclusion_lapack_serial && mv trapping/np_trapping trapping/np_trapping_lapack_serial +RUN cd np-tmcode/build && CXX=g++ FC=gfortran ./configure --enable-openmp --with-lapack --without-magma --enable-refinement --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_lapack_serial && mv sphere/np_sphere sphere/np_sphere_lapack_serial && mv inclusion/np_inclusion inclusion/np_inclusion_lapack_serial && mv trapping/np_trapping trapping/np_trapping_lapack_serial #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_legacy_mpi RUN cd np-tmcode/build && CXX=mpicxx FC=gfortran ./configure --enable-openmp --without-lapack --without-magma --without-cublas && make clean && make -j && mv cluster/np_cluster cluster/np_cluster_legacy_mpi && mv sphere/np_sphere sphere/np_sphere_legacy_mpi && mv inclusion/np_inclusion inclusion/np_inclusion_legacy_mpi && mv trapping/np_trapping trapping/np_trapping_legacy_mpi #RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_legacy_serial diff --git a/containers/singularity/np-tmcode-run.def b/containers/singularity/np-tmcode-run.def index 34adab72f97c5e6e25de3b4a93d34ef979cab150..a9f336788acc5126a7b0a68d1ecfd1961e9e970e 100644 --- a/containers/singularity/np-tmcode-run.def +++ b/containers/singularity/np-tmcode-run.def @@ -4,9 +4,7 @@ Stage: np-tmcode-run-dev %files ../../src /usr/local/np-tmcode/src - ../../doc /usr/local/np-tmcode/doc ../../build /usr/local/np-tmcode/build - ../../test_data /usr/local/np-tmcode/test_data toinstall/magma-compiled /usr/local/magma-compiled #toinstall/magma-compiled/include /usr/local/magma-compiled/include toinstall/apt/debian.sources /etc/apt/sources.list.d/debian.sources @@ -16,9 +14,9 @@ Stage: np-tmcode-run-dev apt -y upgrade apt -y install g++ gfortran make gcc-offload-nvptx libhdf5-dev liblapacke-dev liblapacke64-dev libopenblas-openmp-dev libopenblas64-openmp-dev libquadmath0 libgcc-s1 libgomp1 nvidia-cuda-dev libnvjitlink12 libcublaslt12 libcudart12 libcusparse12 libcublas12 mpi-default-dev mpi-default-bin #apt -y install nvidia-cuda-toolkit-gcc - apt -y install python3 python-is-python3 python3-regex - apt -y install doxygen - apt -y install texlive-latex-base texlive-latex-recommended texlive-latex-extra texlive-font-utils + apt -y install python3 python-is-python3 python3-regex python3-yaml python3-matplotlib python3-numpy + #apt -y install doxygen + #apt -y install texlive-latex-base texlive-latex-recommended texlive-latex-extra texlive-font-utils cd /usr/local/magma-compiled chown -R root: usr mv usr/include/* /usr/include/ @@ -31,47 +29,89 @@ Stage: np-tmcode-run-dev # repeat for every "flavour" we want to compile # with magma, ilp64, mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe - CXX=mpicxx FC=gfortran ./configure --enable-static --enable-mpi --enable-ilp64 --enable-openmp --with-lapack --with-magma + CXX=mpicxx FC=gfortran ./configure --disable-shared --enable-refinement --enable-openmp --with-lapack --with-magma --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_magma_mpi + mv inclusion/np_inclusion inclusion/np_inclusion_magma_mpi + mv sphere/np_sphere sphere/np_sphere_magma_mpi + mv trapping/np_trapping trapping/np_trapping_magma_mpi # with magma, ilp64, no mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe - CXX=g++ FC=gfortran ./configure --enable-static --disable-mpi --enable-ilp64 --enable-openmp --with-lapack --with-magma + CXX=g++ FC=gfortran ./configure --disable-shared --enable-ilp64 --enable-openmp --with-lapack --with-magma --enable-refinement --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_magma_serial + mv inclusion/np_inclusion inclusion/np_inclusion_magma_serial + mv sphere/np_sphere sphere/np_sphere_magma_serial + mv trapping/np_trapping trapping/np_trapping_magma_serial + # with cublas, ilp64, mpi, openmp + #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe + CXX=mpicxx FC=gfortran ./configure --disable-shared --enable-refinement --enable-openmp --with-lapack --without-magma --with-cublas + make clean + make -j + mv cluster/np_cluster cluster/np_cluster_cublas_mpi + mv inclusion/np_inclusion inclusion/np_inclusion_cublas_mpi + mv sphere/np_sphere sphere/np_sphere_cublas_mpi + mv trapping/np_trapping trapping/np_trapping_cublas_mpi + # with cublas, ilp64, no mpi, openmp + #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe + CXX=g++ FC=gfortran ./configure --disable-shared --enable-ilp64 --enable-openmp --with-lapack --without-magma --enable-refinement --with-cublas + make clean + make -j + mv cluster/np_cluster cluster/np_cluster_cublas_serial + mv inclusion/np_inclusion inclusion/np_inclusion_cublas_serial + mv sphere/np_sphere sphere/np_sphere_cublas_serial + mv trapping/np_trapping trapping/np_trapping_cublas_serial # with lapack, ilp64, mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe - CXX=mpicxx FC=gfortran ./configure --enable-static --enable-mpi --enable-ilp64 --enable-openmp --with-lapack --without-magma + CXX=mpicxx FC=gfortran ./configure --disable-shared --enable-ilp64 --enable-openmp --with-lapack --without-magma --enable-refinement --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_lapack_mpi + mv inclusion/np_inclusion inclusion/np_inclusion_lapack_mpi + mv sphere/np_sphere sphere/np_sphere_lapack_mpi + mv trapping/np_trapping trapping/np_trapping_lapack_mpi # with lapack, ilp64, no mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe - CXX=g++ FC=gfortran ./configure --enable-static --disable-mpi --enable-ilp64 --enable-openmp --with-lapack --without-magma + CXX=g++ FC=gfortran ./configure --enable-ilp64 --enable-openmp --with-lapack --without-magma --disable-shared --enable-refinement --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_lapack_serial + mv inclusion/np_inclusion inclusion/np_inclusion_lapack_serial + mv sphere/np_sphere sphere/np_sphere_lapack_serial + mv trapping/np_trapping trapping/np_trapping_lapack_serial # with lucin, ilp64, mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe - CXX=mpicxx FC=gfortran ./configure --enable-static --enable-mpi --enable-ilp64 --enable-openmp --without-lapack --without-magma + CXX=mpicxx FC=gfortran ./configure --enable-ilp64 --enable-openmp --without-lapack --without-magma --disable-shared --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_legacy_mpi + mv inclusion/np_inclusion inclusion/np_inclusion_legacy_mpi + mv sphere/np_sphere sphere/np_sphere_legacy_mpi + mv trapping/np_trapping trapping/np_trapping_legacy_mpi # with lucin, ilp64, no mpi, openmp #BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe - CXX=g++ FC=gfortran ./configure --enable-static --disable-mpi --enable-ilp64 --enable-openmp --without-lapack --without-magma + CXX=g++ FC=gfortran ./configure --enable-ilp64 --enable-openmp --without-lapack --without-magma --disable-shared --without-cublas make clean make -j mv cluster/np_cluster cluster/np_cluster_legacy_serial - cd ../doc/src - doxygen config.dox - cd ../build/latex - make -j - cd ../../../build/cluster + mv inclusion/np_inclusion inclusion/np_inclusion_legacy_serial + mv sphere/np_sphere sphere/np_sphere_legacy_serial + mv trapping/np_trapping trapping/np_trapping_legacy_serial + #cd ../doc/src + #doxygen config.dox + #cd ../build/latex + #make -j # this is the default + cd cluster ln -s np_cluster_lapack_mpi np_cluster + cd ../inclusion + ln -s np_inclusion_lapack_mpi np_inclusion + cd ../sphere + ln -s np_sphere_lapack_mpi np_sphere + cd ../trapping + ln -s np_trapping_lapack_mpi np_trapping Bootstrap: docker @@ -87,19 +127,20 @@ Stage: np-tmcode-run-minimal %post apt update apt -y upgrade - apt -y install libgfortran5 libgcc-s1 libhdf5-103-1t64 libstdc++6 libssl3t64 libcurl4t64 libsz2 zlib1g libnghttp2-14 libidn2-0 librtmp1 libssh2-1t64 libpsl5t64 libgssapi-krb5-2 libldap-2.5-0 libzstd1 libbrotli1 libaec0 libunistring5 libgmp10 libkrb5-3 libk5crypto3 libcom-err2 libkrb5support0 libsasl2-2 libp11-kit0 libtasn1-6 libkeyutils1 libffi8 liblapacke64 libopenblas64-0-openmp python3 python-is-python3 python3-regex hdf5-tools libquadmath0 libgcc-s1 libgomp1 libnvjitlink12 libcublaslt12 libcudart12 libcusparse12 libcublas12 mpi-default-bin + apt -y install libgfortran5 libgcc-s1 libhdf5-310 libstdc++6 libssl3t64 libcurl4t64 libsz2 zlib1g libnghttp2-14 libidn2-0 librtmp1 libssh2-1t64 libpsl5t64 libgssapi-krb5-2 libldap2 libzstd1 libbrotli1 libaec0 libunistring5 libgmp10 libkrb5-3 libk5crypto3 libcom-err2 libkrb5support0 libsasl2-2 libp11-kit0 libtasn1-6 libkeyutils1 libffi8 liblapacke64 libopenblas64-0-openmp python3 python-is-python3 python3-regex python3-yaml python3-matplotlib python3-numpy hdf5-tools libquadmath0 libgcc-s1 libgomp1 libnvjitlink12 libcublaslt12 libcudart12 libcusparse12 libcublas12 mpi-default-bin rm -rf /var/lib/apt/lists/* cd /usr/local/np-tmcode find build -name "*.o" -exec rm -v \{\} \; find build -name "*.gcno" -exec rm -v \{\} \; + find build -name ".git*" -exec rm -v \{\} \; + find build -name "configure*" -exec rm -v \{\} \; + find build -name "Makefile*" -exec rm -v \{\} \; + find build -name "error.log" -exec rm -v \{\} \; + rm -rfv build/libnptm cd src - rm -rvf cluster libnptm trapping include sphere Makefile make.inc README.md - cd .. + rm -rvf cluster include inclusion libnptm make.bak Makefile.bak README.md sphere trapping testing rm -rvf containers - cd doc - rm -rvf src/cluster /src/include /src/libntpm /src/sphere /src/trapping /src/Makefile /src/make.inc - cd build/latex - rm -rvf *.tex *.out *.sty *.ind *.log *.toc *.ilg *.idx *.aux *.eps Makefile class*.pdf + rm -rvf doc cp -a /usr/bin/ld.so /usr/bin/sh /usr/bin/dash /usr/bin/mkdir /usr/bin/rm /usr/bin/cp /usr/bin/python* /usr/bin/h5* /usr/local/bin/ rm -rvf /bin/* /usr/bin/* /sbin /usr/sbin /usr/games /usr/local/games /usr/local/bin/mkdir -p /bin @@ -114,4 +155,4 @@ Stage: np-tmcode-run / / %runscript - PATH=/bin:/usr/local/np-tmcode/src/scripts:/usr/local/np-tmcode/build/trapping:/usr/local/np-tmcode/build/cluster:/usr/local/np-tmcode/build/sphere $* + PATH=/bin:/usr/local/np-tmcode/src/scripts:/usr/local/np-tmcode/build/trapping:/usr/local/np-tmcode/build/cluster:/usr/local/np-tmcode/build/inclusion:/usr/local/np-tmcode/build/sphere $* diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index 3b50640f5c4ab253c5a40e8b8b0d1217cdaf4f32..46de0e4970fd32402cd1b9b96d1b9ae902131263 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -610,7 +610,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // each thread puts a copy of the pointers to its virtual files in the shared arrays vtppoanarray[myompthread] = vtppoanp_2; #pragma omp barrier - if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n"); + if (myompthread==0) logger->log("Syncing OpenMP threads and starting one iteration block on wavelengths\n"); // ok, now I can actually start the parallel calculations // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism if (myjxi488 <= cid_2->number_of_scales) { @@ -620,16 +620,18 @@ void cluster(const string& config_file, const string& data_file, const string& o p_outarray[myompthread] = p_output_2; } else { // Thread 0 of non-zero MPI processes needs to allocate memory for the - // output of all threads. - p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads); + // output of all threads _doing something_. + int iterstodo = cid_2->number_of_scales - myjxi488 + 1; + if (iterstodo > ompnumthreads) iterstodo = ompnumthreads; + p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, iterstodo); p_outarray[0] = p_output_2; } int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); } else { - if (myompthread > 0) { + // if (myompthread > 0) { // If there is no input for this thread, set the output pointer to NULL. p_outarray[myompthread] = NULL; - } + //} } #pragma omp barrier @@ -647,8 +649,18 @@ void cluster(const string& config_file, const string& data_file, const string& o // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them for (int rr=1; rr<mpidata->nprocs; rr++) { if (rr == mpidata->rank) { - p_outarray[0]->mpisend(mpidata); - delete p_outarray[0]; + if (p_outarray[0] == NULL) { + // signal that we are not sending anything + int skip_flag = 1; + MPI_Send(&skip_flag, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD); + } + else { + // signal that we are sending something + int skip_flag = 0; + MPI_Send(&skip_flag, 1, MPI_INT32_T, 0, 10, MPI_COMM_WORLD); + p_outarray[0]->mpisend(mpidata); + delete p_outarray[0]; + } vtppoanarray[0]->mpisend(mpidata); delete vtppoanarray[0]; } diff --git a/src/libnptm/magma_calls.cpp b/src/libnptm/magma_calls.cpp index 22e32c8900c536ea1893b1fa3703061d91d41a3b..88a5b811088659bfd213b4d56dcb8406d48a322a 100644 --- a/src/libnptm/magma_calls.cpp +++ b/src/libnptm/magma_calls.cpp @@ -137,6 +137,7 @@ void magma_zinvert1(dcomplex * &inva, np_int n, int &jer, int device_id) { magma_zgetri_gpu(m, d_a, m, piv, dwork, ldwork, &info); magma_zgetmatrix(m, m, d_a , m, a, m, queue); // copy d_a -> a delete[] piv; // free host memory + magma_free(d_a); magma_free(dwork); #endif magma_queue_destroy(queue); // destroy queue diff --git a/src/libnptm/outputs.cpp b/src/libnptm/outputs.cpp index 36abbc3b2a5aad669a10e872a74e68c576061c5d..3cf5c1b474dcd3eeca1d71677abbd882b54532e9 100644 --- a/src/libnptm/outputs.cpp +++ b/src/libnptm/outputs.cpp @@ -2356,203 +2356,208 @@ int ClusterOutputInfo::write_legacy(const std::string &output) { #ifdef MPI_VERSION int ClusterOutputInfo::mpireceive(const mixMPI *mpidata, int pid) { int result = 0; + int skip_flag; int chk_nsph, chk_inpol, chk_iavm, chk_isam, chk_num_theta, chk_num_thetas; int chk_num_phi, chk_num_phis, chk_ndirs, chk_idfc, chk_configs; double chk_exri; - MPI_Recv(&chk_nsph, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_inpol, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_iavm, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_isam, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_num_theta, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_num_thetas, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_num_phi, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_num_phis, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_ndirs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_exri, 1, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_idfc, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chk_configs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - result += (chk_nsph == nsph) ? 0 : 1; - result += (chk_inpol == inpol) ? 0 : 1; - result += (chk_iavm == iavm) ? 0 : 1; - result += (chk_isam == isam) ? 0 : 1; - result += (chk_num_theta == _num_theta) ? 0 : 1; - result += (chk_num_thetas == _num_thetas) ? 0 : 1; - result += (chk_num_phi == _num_phi) ? 0 : 1; - result += (chk_num_phis == _num_phis) ? 0 : 1; - result += (chk_ndirs == ndirs) ? 0 : 1; - result += (chk_exri == exri) ? 0 : 1; - result += (chk_idfc == idfc) ? 0 : 1; - result += (chk_configs == configurations) ? 0 : 1; - if (result == 0) { - int xi1, offset, chunk_size; - MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD); - MPI_Recv(&xi1, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - // Receive vectors of single values per scale - offset = xi1 - _first_xi; - MPI_Recv(vec_jxi + offset, chunk_size, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_ier + offset, chunk_size, MPI_SHORT, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_vk + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_xi + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsat + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_pschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_s0magt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_scc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_scc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_abc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_abc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_exc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_exc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_albedc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_albedc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qscamc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qscamc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qabsmc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qabsmc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qextmc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qextmc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sccrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sccrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_abcrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_abcrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_excrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_excrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_pschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_pschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_s0magc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_s0magc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_cosavc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_cosavc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_raprc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_raprc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fkc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fkc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&skip_flag, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // Proceed with the rest _only if__ skip_flag==0, else nothing is to be received + if (skip_flag == 0) { + MPI_Recv(&chk_nsph, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_inpol, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_iavm, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_isam, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_num_theta, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_num_thetas, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_num_phi, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_num_phis, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_ndirs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_exri, 1, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_idfc, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chk_configs, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + result += (chk_nsph == nsph) ? 0 : 1; + result += (chk_inpol == inpol) ? 0 : 1; + result += (chk_iavm == iavm) ? 0 : 1; + result += (chk_isam == isam) ? 0 : 1; + result += (chk_num_theta == _num_theta) ? 0 : 1; + result += (chk_num_thetas == _num_thetas) ? 0 : 1; + result += (chk_num_phi == _num_phi) ? 0 : 1; + result += (chk_num_phis == _num_phis) ? 0 : 1; + result += (chk_ndirs == ndirs) ? 0 : 1; + result += (chk_exri == exri) ? 0 : 1; + result += (chk_idfc == idfc) ? 0 : 1; + result += (chk_configs == configurations) ? 0 : 1; + if (result == 0) { + int xi1, offset, chunk_size; + MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD); + MPI_Recv(&xi1, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // Receive vectors of single values per scale + offset = xi1 - _first_xi; + MPI_Recv(vec_jxi + offset, chunk_size, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_ier + offset, chunk_size, MPI_SHORT, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_vk + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_xi + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsat + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_pschut + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_s0magt + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_scc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_scc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_abc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_abc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_exc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_exc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_albedc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_albedc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qscamc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qscamc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qabsmc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qabsmc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qextmc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qextmc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sccrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sccrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_abcrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_abcrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_excrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_excrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_pschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_pschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_s0magc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_s0magc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_cosavc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_cosavc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_raprc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_raprc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fkc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fkc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - // Receive vectors of multiple configuration values per scale - MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - offset = (xi1 - _first_xi) * configurations; - MPI_Recv(vec_sphere_sizes + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_ref_indices + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_scs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_abs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_exs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_albs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_sqscs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_sqabs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_sphere_sqexs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_fsas + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_qschus + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_pschus + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_s0mags + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_cosavs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_raprs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_tqek1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_tqsk1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_tqek2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_tqsk2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // Receive vectors of multiple configuration values per scale + MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + offset = (xi1 - _first_xi) * configurations; + MPI_Recv(vec_sphere_sizes + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_ref_indices + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_scs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_abs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_exs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_albs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_sqscs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_sqabs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_sphere_sqexs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_fsas + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_qschus + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_pschus + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_s0mags + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_cosavs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_raprs + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_tqek1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_tqsk1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_tqek2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_tqsk2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - // Receive vectors whose sizes depend on directions and configurations. - MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - offset = (xi1 - _first_xi) * ndirs * configurations; - MPI_Recv(vec_dir_sas11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sas21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sas12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sas22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_muls + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_mulslr + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // Receive vectors whose sizes depend on directions and configurations. + MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + offset = (xi1 - _first_xi) * ndirs * configurations; + MPI_Recv(vec_dir_sas11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sas21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sas12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sas22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_muls + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_mulslr + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - // Receive vectors whose sizes depend on directions and scales. - MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - offset = (xi1 - _first_xi) * ndirs; - MPI_Recv(vec_dir_sat11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sat21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sat12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sat22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_scc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_scc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_abc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_abc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_exc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_exc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_albedc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_albedc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qscc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qscc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qabc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qabc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qexc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qexc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sccrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sccrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_abcrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_abcrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_excrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_excrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fsac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fsac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fsac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fsac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_sac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_qschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_pschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_pschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_s0magc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_s0magc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_cosavc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_cosavc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_raprc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_raprc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_flc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_flc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_frc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_frc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fkc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fkc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fxc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fxc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fzc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_fzc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqelc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqelc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqerc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqerc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqekc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqekc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqexc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqexc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqeyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqeyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqezc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqezc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqslc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqslc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsrc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsrc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqskc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqskc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsxc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsxc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqsyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqszc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_tqszc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_mulc + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(vec_dir_mulclr + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } else { - MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD); + // Receive vectors whose sizes depend on directions and scales. + MPI_Recv(&chunk_size, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + offset = (xi1 - _first_xi) * ndirs; + MPI_Recv(vec_dir_sat11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sat21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sat12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sat22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_scc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_scc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_abc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_abc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_exc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_exc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_albedc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_albedc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qscc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qscc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qabc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qabc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qexc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qexc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sccrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sccrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_abcrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_abcrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_excrt1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_excrt2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fsac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fsac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fsac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fsac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sac11 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sac21 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sac12 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_sac22 + offset, chunk_size, MPI_C_DOUBLE_COMPLEX, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_qschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_pschuc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_pschuc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_s0magc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_s0magc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_cosavc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_cosavc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_raprc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_raprc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_flc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_flc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_frc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_frc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fkc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fkc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fxc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fxc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fzc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_fzc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqelc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqelc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqerc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqerc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqekc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqekc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqexc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqexc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqeyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqeyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqezc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqezc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqslc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqslc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsrc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsrc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqskc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqskc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsxc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsxc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsyc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqsyc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqszc1 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_tqszc2 + offset, chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_mulc + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(vec_dir_mulclr + 16 * offset, 16 * chunk_size, MPI_DOUBLE, pid, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + MPI_Send(&result, 1, MPI_INT32_T, pid, 10, MPI_COMM_WORLD); + } } return result; }