diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e33e36d8794e763a95051a8bfc9441fe606538b2..24a87265b60e3188af7c525164f1e0e87d9f0d66 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -83,7 +83,7 @@ compatibility_stage: - mkdir build_gnu14_refine - cd build_gnu14_refine - cp -r ../build/* . - - CXX=g++-14 F77=gfortran-14 ./configure --with-refinement + - CXX=g++-14 F77=gfortran-14 ./configure --enable-refinement - make clean - make -j - echo "Running make with flang version 16 and clang version 16..." @@ -134,6 +134,7 @@ building_stage: artifacts: paths: - build/cluster/* + - build/inclusion/* - build/sphere/* - build/testing/* - build/trapping/* @@ -153,7 +154,7 @@ building_stage: - cat /etc/os-release - cd build - echo "Configuring with default compilers (MAGMA disabled)..." - - ./configure --without-magma --disable-offload --with-refinement + - ./configure --without-magma --disable-offload --enable-refinement - make clean - echo "Building the default configuration..." - make -j @@ -201,6 +202,7 @@ running_stage: artifacts: paths: - build/cluster/c_* + - build/inclusion/c_* - build/sphere/c_* #- build/trapping/* exclude: @@ -221,6 +223,10 @@ running_stage: - echo "Running np_cluster" - chmod +x np_cluster - OMP_NUM_THREADS=1 ./np_cluster + - cd ../inclusion + - echo "Running np_inclusion" + - chmod +x np_inclusion + - OMP_NUM_THREADS=1 ./np_inclusion testing_stage: stage: test @@ -235,6 +241,7 @@ testing_stage: artifacts: paths: - build/cluster/pycompare.html + - build/inclusion/pycompare.html - build/sphere/pycompare.html exclude: - ".git*" @@ -252,6 +259,10 @@ testing_stage: - python3 ../../src/scripts/pycompare.py --no-progress --ffile $FFILE --cfile c_OSPH --html - echo "Checking consistency among legacy and HDF5 configuration files" - ../testing/test_TEDF ../../test_data/sphere/DEDFB c_TEDF c_TEDF.hd5 + - cd ../inclusion + - echo "Comparing output of INCLUSION" + - export FFILE=../../test_data/inclusion/OINCLU + - python3 ../../src/scripts/pycompare.py --no-progress --ffile $FFILE --cfile c_OINCLU --html - cd ../cluster - echo "Comparing output of CLUSTER" - export FFILE=../../test_data/cluster/OCLU diff --git a/build/Makefile.am b/build/Makefile.am index e7975de764c19ed3c4703afe54eb8f924b4a1c30..cbfcd574ce12fce5a71b66fdd9f46ca7a5636f2d 100644 --- a/build/Makefile.am +++ b/build/Makefile.am @@ -1,6 +1,6 @@ LDADD=libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${BLASLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES=libnptm/libnptm.la -libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/cublas_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/utils.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp +libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/inclu_subs.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/cublas_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/utils.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp if BUILDFORTRAN PROGS=cluster/edfb_clu cluster/clu cluster/np_cluster inclusion/edfb_inclu inclusion/inclu inclusion/np_inclusion sphere/edfb_sph sphere/sph sphere/np_sphere trapping/frfme trapping/lffft trapping/np_trapping testing/test_ParticleDescriptor testing/test_TEDF testing/test_TTMS bin_PROGRAMS=$(PROGS) diff --git a/build/Makefile.in b/build/Makefile.in index e13dfc21dcdf9a25144fdad022a3a3f23ab27ac0..bdddb965366c8a04c4b81e0a1b71f99a84029dfb 100644 --- a/build/Makefile.in +++ b/build/Makefile.in @@ -160,11 +160,11 @@ am__dirstamp = $(am__leading_dot)dirstamp am_libnptm_libnptm_la_OBJECTS = ../src/libnptm/algebraic.lo \ ../src/libnptm/clu_subs.lo ../src/libnptm/Commons.lo \ ../src/libnptm/Configuration.lo ../src/libnptm/file_io.lo \ - ../src/libnptm/lapack_calls.lo ../src/libnptm/logging.lo \ - ../src/libnptm/magma_calls.lo ../src/libnptm/Parsers.lo \ - ../src/libnptm/sph_subs.lo ../src/libnptm/utils.lo \ - ../src/libnptm/tfrfme.lo ../src/libnptm/TransitionMatrix.lo \ - ../src/libnptm/tra_subs.lo + ../src/libnptm/inclu_subs.lo ../src/libnptm/lapack_calls.lo \ + ../src/libnptm/logging.lo ../src/libnptm/magma_calls.lo \ + ../src/libnptm/Parsers.lo ../src/libnptm/sph_subs.lo \ + ../src/libnptm/utils.lo ../src/libnptm/tfrfme.lo \ + ../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo libnptm_libnptm_la_OBJECTS = $(am_libnptm_libnptm_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -359,6 +359,7 @@ am__depfiles_remade = ../src/cluster/$(DEPDIR)/cluster.Po \ ../src/libnptm/$(DEPDIR)/algebraic.Plo \ ../src/libnptm/$(DEPDIR)/clu_subs.Plo \ ../src/libnptm/$(DEPDIR)/file_io.Plo \ + ../src/libnptm/$(DEPDIR)/inclu_subs.Plo \ ../src/libnptm/$(DEPDIR)/lapack_calls.Plo \ ../src/libnptm/$(DEPDIR)/logging.Plo \ ../src/libnptm/$(DEPDIR)/magma_calls.Plo \ @@ -651,7 +652,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LDADD = libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${BLASLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES = libnptm/libnptm.la -libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/utils.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp +libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/inclu_subs.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/utils.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp @BUILDFORTRAN_FALSE@PROGS = cluster/np_cluster inclusion/np_inclusion sphere/np_sphere trapping/np_trapping testing/test_ParticleDescriptor testing/test_TEDF testing/test_TTMS @BUILDFORTRAN_TRUE@PROGS = cluster/edfb_clu cluster/clu cluster/np_cluster inclusion/edfb_inclu inclusion/inclu inclusion/np_inclusion sphere/edfb_sph sphere/sph sphere/np_sphere trapping/frfme trapping/lffft trapping/np_trapping testing/test_ParticleDescriptor testing/test_TEDF testing/test_TTMS @BUILDFORTRAN_TRUE@EDFBCLUSOURCES = ../src/cluster/edfb_clu.f @@ -821,6 +822,8 @@ clean-libLTLIBRARIES: ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/file_io.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) +../src/libnptm/inclu_subs.lo: ../src/libnptm/$(am__dirstamp) \ + ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/lapack_calls.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/logging.lo: ../src/libnptm/$(am__dirstamp) \ @@ -1020,6 +1023,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/algebraic.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/clu_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/file_io.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/inclu_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/lapack_calls.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/logging.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/magma_calls.Plo@am__quote@ # am--include-marker @@ -1396,6 +1400,7 @@ distclean: distclean-am -rm -f ../src/libnptm/$(DEPDIR)/algebraic.Plo -rm -f ../src/libnptm/$(DEPDIR)/clu_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/file_io.Plo + -rm -f ../src/libnptm/$(DEPDIR)/inclu_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/lapack_calls.Plo -rm -f ../src/libnptm/$(DEPDIR)/logging.Plo -rm -f ../src/libnptm/$(DEPDIR)/magma_calls.Plo @@ -1469,6 +1474,7 @@ maintainer-clean: maintainer-clean-am -rm -f ../src/libnptm/$(DEPDIR)/algebraic.Plo -rm -f ../src/libnptm/$(DEPDIR)/clu_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/file_io.Plo + -rm -f ../src/libnptm/$(DEPDIR)/inclu_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/lapack_calls.Plo -rm -f ../src/libnptm/$(DEPDIR)/logging.Plo -rm -f ../src/libnptm/$(DEPDIR)/magma_calls.Plo diff --git a/build/build_aux/config.guess b/build/build_aux/config.guess index 7f76b6228f73d674f58cfcc3523f99e253ee5515..cdfc4392047ce3843a7a98f5451bbe97cb8200ea 100755 --- a/build/build_aux/config.guess +++ b/build/build_aux/config.guess @@ -1,10 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2022 Free Software Foundation, Inc. +# Copyright 1992-2023 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2022-01-09' +timestamp='2023-08-22' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -47,7 +47,7 @@ me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] -Output the configuration name of the system \`$me' is run on. +Output the configuration name of the system '$me' is run on. Options: -h, --help print this help, then exit @@ -60,13 +60,13 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2022 Free Software Foundation, Inc. +Copyright 1992-2023 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" -Try \`$me --help' for more information." +Try '$me --help' for more information." # Parse command line while test $# -gt 0 ; do @@ -102,8 +102,8 @@ GUESS= # temporary files to be created and, as you can see below, it is a # headache to deal with in a portable fashion. -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. +# Historically, 'CC_FOR_BUILD' used to be named 'HOST_CC'. We still +# use 'HOST_CC' if defined, but it is deprecated. # Portable tmp directory creation inspired by the Autoconf team. @@ -155,6 +155,9 @@ Linux|GNU|GNU/*) set_cc_for_build cat <<-EOF > "$dummy.c" + #if defined(__ANDROID__) + LIBC=android + #else #include #if defined(__UCLIBC__) LIBC=uclibc @@ -169,6 +172,7 @@ Linux|GNU|GNU/*) LIBC=musl #endif #endif + #endif EOF cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` eval "$cc_set_libc" @@ -459,7 +463,7 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in UNAME_RELEASE=`uname -v` ;; esac - # Japanese Language versions have a version number like `4.1.3-JL'. + # Japanese Language versions have a version number like '4.1.3-JL'. SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` GUESS=sparc-sun-sunos$SUN_REL ;; @@ -904,7 +908,7 @@ EOF fi ;; *:FreeBSD:*:*) - UNAME_PROCESSOR=`/usr/bin/uname -p` + UNAME_PROCESSOR=`uname -p` case $UNAME_PROCESSOR in amd64) UNAME_PROCESSOR=x86_64 ;; @@ -966,11 +970,37 @@ EOF GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC ;; + x86_64:[Mm]anagarm:*:*|i?86:[Mm]anagarm:*:*) + GUESS="$UNAME_MACHINE-pc-managarm-mlibc" + ;; + *:[Mm]anagarm:*:*) + GUESS="$UNAME_MACHINE-unknown-managarm-mlibc" + ;; *:Minix:*:*) GUESS=$UNAME_MACHINE-unknown-minix ;; aarch64:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + set_cc_for_build + CPU=$UNAME_MACHINE + LIBCABI=$LIBC + if test "$CC_FOR_BUILD" != no_compiler_found; then + ABI=64 + sed 's/^ //' << EOF > "$dummy.c" + #ifdef __ARM_EABI__ + #ifdef __ARM_PCS_VFP + ABI=eabihf + #else + ABI=eabi + #endif + #endif +EOF + cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'` + eval "$cc_set_abi" + case $ABI in + eabi | eabihf) CPU=armv8l; LIBCABI=$LIBC$ABI ;; + esac + fi + GUESS=$CPU-unknown-linux-$LIBCABI ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be @@ -1036,7 +1066,16 @@ EOF k1om:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; - loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + kvx:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + kvx:cos:*:*) + GUESS=$UNAME_MACHINE-unknown-cos + ;; + kvx:mbr:*:*) + GUESS=$UNAME_MACHINE-unknown-mbr + ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; m32r*:Linux:*:*) @@ -1151,16 +1190,27 @@ EOF ;; x86_64:Linux:*:*) set_cc_for_build + CPU=$UNAME_MACHINE LIBCABI=$LIBC if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_X32 >/dev/null - then - LIBCABI=${LIBC}x32 - fi + ABI=64 + sed 's/^ //' << EOF > "$dummy.c" + #ifdef __i386__ + ABI=x86 + #else + #ifdef __ILP32__ + ABI=x32 + #endif + #endif +EOF + cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'` + eval "$cc_set_abi" + case $ABI in + x86) CPU=i686 ;; + x32) LIBCABI=${LIBC}x32 ;; + esac fi - GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI + GUESS=$CPU-pc-linux-$LIBCABI ;; xtensa*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC @@ -1180,7 +1230,7 @@ EOF GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION ;; i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility + # If we were able to find 'uname', then EMX Unix compatibility # is probably installed. GUESS=$UNAME_MACHINE-pc-os2-emx ;; @@ -1321,7 +1371,7 @@ EOF GUESS=ns32k-sni-sysv fi ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + PENTIUM:*:4.0*:*) # Unisys 'ClearPath HMP IX 4000' SVR4/MP effort # says GUESS=i586-unisys-sysv4 ;; @@ -1367,8 +1417,11 @@ EOF BePC:Haiku:*:*) # Haiku running on Intel PC compatible. GUESS=i586-pc-haiku ;; - x86_64:Haiku:*:*) - GUESS=x86_64-unknown-haiku + ppc:Haiku:*:*) # Haiku running on Apple PowerPC + GUESS=powerpc-apple-haiku + ;; + *:Haiku:*:*) # Haiku modern gcc (not bound by BeOS compat) + GUESS=$UNAME_MACHINE-unknown-haiku ;; SX-4:SUPER-UX:*:*) GUESS=sx4-nec-superux$UNAME_RELEASE diff --git a/build/build_aux/config.sub b/build/build_aux/config.sub index dba16e84c77c7d25871d80c24deff717faf4c094..defe52c0c874baa521e591c2b520f15de8a5f024 100755 --- a/build/build_aux/config.sub +++ b/build/build_aux/config.sub @@ -1,10 +1,10 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2022 Free Software Foundation, Inc. +# Copyright 1992-2023 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2022-01-03' +timestamp='2023-09-19' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -76,13 +76,13 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2022 Free Software Foundation, Inc. +Copyright 1992-2023 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" -Try \`$me --help' for more information." +Try '$me --help' for more information." # Parse command line while test $# -gt 0 ; do @@ -130,7 +130,7 @@ IFS=$saved_IFS # Separate into logical components for further validation case $1 in *-*-*-*-*) - echo Invalid configuration \`"$1"\': more than four components >&2 + echo "Invalid configuration '$1': more than four components" >&2 exit 1 ;; *-*-*-*) @@ -145,7 +145,8 @@ case $1 in nto-qnx* | linux-* | uclinux-uclibc* \ | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ - | storm-chaos* | os2-emx* | rtmk-nova*) + | storm-chaos* | os2-emx* | rtmk-nova* | managarm-* \ + | windows-* ) basic_machine=$field1 basic_os=$maybe_os ;; @@ -943,7 +944,7 @@ $basic_machine EOF IFS=$saved_IFS ;; - # We use `pc' rather than `unknown' + # We use 'pc' rather than 'unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. i*86 | x86_64) @@ -1075,7 +1076,7 @@ case $cpu-$vendor in pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) cpu=i586 ;; - pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*) + pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*) cpu=i686 ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) @@ -1180,7 +1181,7 @@ case $cpu-$vendor in case $cpu in 1750a | 580 \ | a29k \ - | aarch64 | aarch64_be \ + | aarch64 | aarch64_be | aarch64c | arm64ec \ | abacus \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \ @@ -1199,45 +1200,23 @@ case $cpu-$vendor in | d10v | d30v | dlx | dsp16xx \ | e2k | elxsi | epiphany \ | f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \ + | javascript \ | h8300 | h8500 \ | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i*86 | i860 | i960 | ia16 | ia64 \ | ip2k | iq2000 \ | k1om \ + | kvx \ | le32 | le64 \ | lm32 \ - | loongarch32 | loongarch64 | loongarchx32 \ + | loongarch32 | loongarch64 \ | m32c | m32r | m32rle \ | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \ | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \ | m88110 | m88k | maxq | mb | mcore | mep | metag \ | microblaze | microblazeel \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64eb | mips64el \ - | mips64octeon | mips64octeonel \ - | mips64orion | mips64orionel \ - | mips64r5900 | mips64r5900el \ - | mips64vr | mips64vrel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa32r3 | mipsisa32r3el \ - | mipsisa32r5 | mipsisa32r5el \ - | mipsisa32r6 | mipsisa32r6el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64r3 | mipsisa64r3el \ - | mipsisa64r5 | mipsisa64r5el \ - | mipsisa64r6 | mipsisa64r6el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipsr5900 | mipsr5900el \ - | mipstx39 | mipstx39el \ + | mips* \ | mmix \ | mn10200 | mn10300 \ | moxie \ @@ -1285,7 +1264,7 @@ case $cpu-$vendor in ;; *) - echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2 + echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2 exit 1 ;; esac @@ -1306,11 +1285,12 @@ esac # Decode manufacturer-specific aliases for certain operating systems. -if test x$basic_os != x +if test x"$basic_os" != x then # First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. +obj= case $basic_os in gnu/linux*) kernel=linux @@ -1341,6 +1321,10 @@ EOF kernel=linux os=`echo "$basic_os" | sed -e 's|linux|gnu|'` ;; + managarm*) + kernel=managarm + os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'` + ;; *) kernel= os=$basic_os @@ -1506,10 +1490,16 @@ case $os in os=eabi ;; *) - os=elf + os= + obj=elf ;; esac ;; + aout* | coff* | elf* | pe*) + # These are machine code file formats, not OSes + obj=$os + os= + ;; *) # No normalization, but not necessarily accepted, that comes below. ;; @@ -1528,12 +1518,15 @@ else # system, and we'll never get to this point. kernel= +obj= case $cpu-$vendor in score-*) - os=elf + os= + obj=elf ;; spu-*) - os=elf + os= + obj=elf ;; *-acorn) os=riscix1.2 @@ -1543,28 +1536,35 @@ case $cpu-$vendor in os=gnu ;; arm*-semi) - os=aout + os= + obj=aout ;; c4x-* | tic4x-*) - os=coff + os= + obj=coff ;; c8051-*) - os=elf + os= + obj=elf ;; clipper-intergraph) os=clix ;; hexagon-*) - os=elf + os= + obj=elf ;; tic54x-*) - os=coff + os= + obj=coff ;; tic55x-*) - os=coff + os= + obj=coff ;; tic6x-*) - os=coff + os= + obj=coff ;; # This must come before the *-dec entry. pdp10-*) @@ -1586,19 +1586,24 @@ case $cpu-$vendor in os=sunos3 ;; m68*-cisco) - os=aout + os= + obj=aout ;; mep-*) - os=elf + os= + obj=elf ;; mips*-cisco) - os=elf + os= + obj=elf ;; mips*-*) - os=elf + os= + obj=elf ;; or32-*) - os=coff + os= + obj=coff ;; *-tti) # must be before sparc entry or we get the wrong os. os=sysv3 @@ -1607,7 +1612,8 @@ case $cpu-$vendor in os=sunos4.1.1 ;; pru-*) - os=elf + os= + obj=elf ;; *-be) os=beos @@ -1688,10 +1694,12 @@ case $cpu-$vendor in os=uxpv ;; *-rom68k) - os=coff + os= + obj=coff ;; *-*bug) - os=coff + os= + obj=coff ;; *-apple) os=macos @@ -1709,7 +1717,8 @@ esac fi -# Now, validate our (potentially fixed-up) OS. +# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ). + case $os in # Sometimes we do "kernel-libc", so those need to count as OSes. musl* | newlib* | relibc* | uclibc*) @@ -1720,6 +1729,9 @@ case $os in # VxWorks passes extra cpu info in the 4th filed. simlinux | simwindows | spe) ;; + # See `case $cpu-$os` validation below + ghcjs) + ;; # Now accept the basic system types. # The portable systems comes first. # Each alternative MUST end in a * to match a version number. @@ -1728,7 +1740,7 @@ case $os in | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \ | sym* | plan9* | psp* | sim* | xray* | os68k* | v88r* \ | hiux* | abug | nacl* | netware* | windows* \ - | os9* | macos* | osx* | ios* \ + | os9* | macos* | osx* | ios* | tvos* | watchos* \ | mpw* | magic* | mmixware* | mon960* | lnews* \ | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \ | aos* | aros* | cloudabi* | sortix* | twizzler* \ @@ -1737,11 +1749,11 @@ case $os in | mirbsd* | netbsd* | dicos* | openedition* | ose* \ | bitrig* | openbsd* | secbsd* | solidbsd* | libertybsd* | os108* \ | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \ - | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \ - | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \ + | bosx* | nextstep* | cxux* | oabi* \ + | ptx* | ecoff* | winnt* | domain* | vsta* \ | udi* | lites* | ieee* | go32* | aux* | hcos* \ | chorusrdb* | cegcc* | glidix* | serenity* \ - | cygwin* | msys* | pe* | moss* | proelf* | rtems* \ + | cygwin* | msys* | moss* | proelf* | rtems* \ | midipix* | mingw32* | mingw64* | mint* \ | uxpv* | beos* | mpeix* | udk* | moxiebox* \ | interix* | uwin* | mks* | rhapsody* | darwin* \ @@ -1754,7 +1766,7 @@ case $os in | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \ - | fiwix* ) + | fiwix* | mlibc* | cos* | mbr* ) ;; # This one is extra strict with allowed versions sco3.2v2 | sco3.2v[4-9]* | sco5v6*) @@ -1762,41 +1774,99 @@ case $os in ;; none) ;; + kernel* | msvc* ) + # Restricted further below + ;; + '') + if test x"$obj" = x + then + echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2 + fi + ;; + *) + echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2 + exit 1 + ;; +esac + +case $obj in + aout* | coff* | elf* | pe*) + ;; + '') + # empty is fine + ;; *) - echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2 + echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2 + exit 1 + ;; +esac + +# Here we handle the constraint that a (synthetic) cpu and os are +# valid only in combination with each other and nowhere else. +case $cpu-$os in + # The "javascript-unknown-ghcjs" triple is used by GHC; we + # accept it here in order to tolerate that, but reject any + # variations. + javascript-ghcjs) + ;; + javascript-* | *-ghcjs) + echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2 exit 1 ;; esac # As a final step for OS-related things, validate the OS-kernel combination # (given a valid OS), if there is a kernel. -case $kernel-$os in - linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ - | linux-musl* | linux-relibc* | linux-uclibc* ) +case $kernel-$os-$obj in + linux-gnu*- | linux-dietlibc*- | linux-android*- | linux-newlib*- \ + | linux-musl*- | linux-relibc*- | linux-uclibc*- | linux-mlibc*- ) + ;; + uclinux-uclibc*- ) ;; - uclinux-uclibc* ) + managarm-mlibc*- | managarm-kernel*- ) ;; - -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) + windows*-msvc*-) + ;; + -dietlibc*- | -newlib*- | -musl*- | -relibc*- | -uclibc*- | -mlibc*- ) # These are just libc implementations, not actual OSes, and thus # require a kernel. - echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2 exit 1 ;; - kfreebsd*-gnu* | kopensolaris*-gnu*) + -kernel*- ) + echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2 + exit 1 ;; - vxworks-simlinux | vxworks-simwindows | vxworks-spe) + *-kernel*- ) + echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2 + exit 1 ;; - nto-qnx*) + *-msvc*- ) + echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2 + exit 1 ;; - os2-emx) + kfreebsd*-gnu*- | kopensolaris*-gnu*-) ;; - *-eabi* | *-gnueabi*) + vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-) ;; - -*) + nto-qnx*-) + ;; + os2-emx-) + ;; + *-eabi*- | *-gnueabi*-) + ;; + none--*) + # None (no kernel, i.e. freestanding / bare metal), + # can be paired with an machine code file format + ;; + -*-) # Blank kernel with real OS is always fine. ;; - *-*) - echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + --*) + # Blank kernel and OS with real machine code file format is always fine. + ;; + *-*-*) + echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2 exit 1 ;; esac @@ -1879,7 +1949,7 @@ case $vendor in ;; esac -echo "$cpu-$vendor-${kernel:+$kernel-}$os" +echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}" exit # Local variables: diff --git a/build/build_aux/install-sh b/build/build_aux/install-sh index ec298b53740270ce82b326c4c2deaa5dcdec4596..7c56c9c015103600a06f59ab1183eb3966a513ab 100755 --- a/build/build_aux/install-sh +++ b/build/build_aux/install-sh @@ -1,7 +1,7 @@ #!/bin/sh # install - install a program, script, or datafile -scriptversion=2020-11-14.01; # UTC +scriptversion=2023-11-23.18; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the @@ -124,9 +124,9 @@ it's up to you to specify -f if you want it. If -S is not specified, no backups are attempted. -Email bug reports to bug-automake@gnu.org. -Automake home page: https://www.gnu.org/software/automake/ -" +Report bugs to . +GNU Automake home page: . +General help using GNU software: ." while test $# -ne 0; do case $1 in diff --git a/build/build_aux/ltmain.sh b/build/build_aux/ltmain.sh index 4fdde9a05ebdb2d232252f41f233b4986659fffb..977e5237bb01a985aed489ff49b3d1d885cf75d5 100755 --- a/build/build_aux/ltmain.sh +++ b/build/build_aux/ltmain.sh @@ -31,7 +31,7 @@ PROGRAM=libtool PACKAGE=libtool -VERSION="2.4.7 Debian-2.4.7-8" +VERSION="2.4.7 Debian-2.4.7-7build1" package_revision=2.4.7 @@ -2296,7 +2296,7 @@ include the following information: compiler: $LTCC compiler flags: $LTCFLAGS linker: $LD (gnu? $with_gnu_ld) - version: $progname $scriptversion Debian-2.4.7-8 + version: $progname $scriptversion Debian-2.4.7-7build1 automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` diff --git a/build/configure b/build/configure index 35ffddf26d6650dec008a696bd5ee022ead53202..a19d45e9152087491d48c71e630c9b9796356682 100755 --- a/build/configure +++ b/build/configure @@ -824,7 +824,7 @@ enable_offload enable_openmp enable_optimize with_lapack -with_refinement +enable_refinement with_magma enable_nvtx with_include @@ -1492,6 +1492,8 @@ Optional Features: 13) [default=auto] --enable-openmp enable OpneMP multi-threading [default=yes] --enable-optimize=LEVEL use optimization level LEVEL [default=3] + --enable-refinement use iterative refinement for matrix inversion + [default=no] --enable-nvtx use NVTX profiling [default=no] Optional Packages: @@ -1506,8 +1508,6 @@ Optional Packages: --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified). --with-lapack use LAPACK [default=auto] - --with-refinement use iterative refinement for matrix inversion - [default=no] --with-magma[=MAGMA_DIR] use MAGMA [default=auto] --with-include additional include folders [default=none] @@ -25404,12 +25404,11 @@ esac fi - -# Check whether --with-refinement was given. -if test ${with_refinement+y} +# Check whether --enable-refinement was given. +if test ${enable_refinement+y} then : - withval=$with_refinement; - if test "x$withval" = "xno"; then + enableval=$enable_refinement; + if test "x$enableval" = "xno"; then REFINEFLAGS="" else @@ -25455,7 +25454,7 @@ then : fi # end of 64-bit decision tree pkg-config --version > /dev/null use_pkg_config=$? - if test "x${CUDAFLAGS}${CUDALDFLAGS}" = "x"; then + if test "x${CUDAFLAGS}" = "x"; then if test "x$use_pkg_config" = "x0"; then # pkg-config is available declare -a pkg_array=$(pkg-config --list-all | grep cudart) @@ -25465,24 +25464,15 @@ then : # CUDA runtime detected cuda_pkg=$(for i in "${pkg_array[@]}"; do echo "$i" | cut --delimiter=" " -f1; done | grep cudart) CUDAFLAGS=$(pkg-config --cflags ${cuda_pkg}) - CUDALDFLAGS=$(pkg-config --libs ${cuda_pkg}) fi # end of CUDA runtime decision tree - echo $CUDALDFLAGS | grep cudart > /dev/null - cudart_check=$? - if test "x${cudart_check}" != "x0"; then - CUDALDFLAGS="$CUDALDFLAGS -lcudart" - fi else # pkg-config is not available if test -f /usr/local/cuda/include/cuda.h; then CUDAFLAGS="-I/usr/local/cuda/include" - CUDALDFLAGS="-L/usr/local/cuda/lib64 -lcudart" elif test -f /usr/include/cuda.h; then CUDAFLAGS="-I/usr/include" - CUDALDFLAGS="-lcudart" elif test "x$CUDA_HOME" != "x"; then CUDAFLAGS="-I${CUDA_HOME}/include" - CUDALDFLAGS="-L${CUDA_HOME}/lib64 -lcudart" fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection @@ -25499,17 +25489,17 @@ then : MAGMA_INCLUDE=$(pkg-config --cflags-only-I ${magma_pkg}) MAGMA_LIBS_DIR=$(pkg-config --libs-only-L ${magma_pkg}) export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS ${MAGMA_INCLUDE}" - export MAGMALDFLAGS="$CUDALDFLAGS ${MAGMA_LIBS_DIR} -lmagma" + export MAGMALDFLAGS="${MAGMA_LIBS_DIR} -lmagma" fi # end of MAGMA decision tree else # search for MAGMA in some standard folders if test "x$CUDAFLAGS" != "x"; then if test -f /usr/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" elif test -f /usr/local/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/local/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" fi fi fi # end of pkg-config decision tree @@ -25521,7 +25511,7 @@ then : MAGMA_ROOT="${MAGMA_DIR}" fi export MAGMAFLAGS="-DUSE_MAGMA -DMAGMA_ILP64 $CUDAFLAGS -I${MAGMA_ROOT}/include" - export MAGMALDFLAGS="$CUDALDFLAGS -L${MAGMA_ROOT}/lib -lmagma" + export MAGMALDFLAGS="-L${MAGMA_ROOT}/lib -lmagma" fi @@ -25555,7 +25545,7 @@ else case e in #( fi # end of 64-bit decision tree pkg-config --version > /dev/null use_pkg_config=$? - if test "x${CUDAFLAGS}${CUDALDFLAGS}" = "x"; then + if test "x${CUDAFLAGS}" = "x"; then if test "x$use_pkg_config" = "x0"; then # pkg-config is available declare -a pkg_array=$(pkg-config --list-all | grep cudart) @@ -25565,24 +25555,15 @@ else case e in #( # CUDA runtime detected cuda_pkg=$(for i in "${pkg_array[@]}"; do echo "$i" | cut --delimiter=" " -f1; done | grep cudart) CUDAFLAGS=$(pkg-config --cflags ${cuda_pkg}) - CUDALDFLAGS=$(pkg-config --libs ${cuda_pkg}) fi # end of CUDA runtime decision tree - echo $CUDALDFLAGS | grep cudart > /dev/null - cudart_check=$? - if test "x${cudart_check}" != "x0"; then - CUDALDFLAGS="$CUDALDFLAGS -lcudart" - fi else # pkg-config is not available if test -f /usr/local/cuda/include/cuda.h; then CUDAFLAGS="-I/usr/local/cuda/include" - CUDALDFLAGS="-L/usr/local/cuda/lib64 -lcudart" elif test -f /usr/include/cuda.h; then CUDAFLAGS="-I/usr/include" - CUDALDFLAGS="-lcudart" elif test "x$CUDA_HOME" != "x"; then CUDAFLAGS="-I${CUDA_HOME}/include" - CUDALDFLAGS="-L${CUDA_HOME}/lib64 -lcudart" fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection @@ -25599,17 +25580,17 @@ else case e in #( MAGMA_INCLUDE=$(pkg-config --cflags-only-I ${magma_pkg}) MAGMA_LIBS_DIR=$(pkg-config --libs-only-L ${magma_pkg}) export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS ${MAGMA_INCLUDE}" - export MAGMALDFLAGS="$CUDALDFLAGS ${MAGMA_LIBS_DIR} -lmagma" + export MAGMALDFLAGS="${MAGMA_LIBS_DIR} -lmagma" fi # end of MAGMA decision tree else # search for MAGMA in some standard folders if test "x$CUDAFLAGS" != "x"; then if test -f /usr/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" elif test -f /usr/local/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/local/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" fi fi fi # end of pkg-config decision tree @@ -25621,7 +25602,7 @@ else case e in #( MAGMA_ROOT="${MAGMA_DIR}" fi export MAGMAFLAGS="-DUSE_MAGMA -DMAGMA_ILP64 $CUDAFLAGS -I${MAGMA_ROOT}/include" - export MAGMALDFLAGS="$CUDALDFLAGS -L${MAGMA_ROOT}/lib -lmagma" + export MAGMALDFLAGS="-L${MAGMA_ROOT}/lib -lmagma" fi diff --git a/build/configure.ac b/build/configure.ac index f5b1b99f8a0785eee4ff717c37a5eb4633d9628f..b4b639535dc09a326723faadd1d183aa69803dcb 100644 --- a/build/configure.ac +++ b/build/configure.ac @@ -169,7 +169,7 @@ m4_define( fi # end of 64-bit decision tree pkg-config --version > /dev/null use_pkg_config=$? - if test "x${CUDAFLAGS}${CUDALDFLAGS}" = "x"; then + if test "x${CUDAFLAGS}" = "x"; then if test "x$use_pkg_config" = "x0"; then # pkg-config is available declare -a pkg_array=$(pkg-config --list-all | grep cudart) @@ -179,24 +179,15 @@ m4_define( # CUDA runtime detected cuda_pkg=$(for i in "${pkg_array[[@]]}"; do echo "$i" | cut --delimiter=" " -f1; done | grep cudart) CUDAFLAGS=$(pkg-config --cflags ${cuda_pkg}) - CUDALDFLAGS=$(pkg-config --libs ${cuda_pkg}) fi # end of CUDA runtime decision tree - echo $CUDALDFLAGS | grep cudart > /dev/null - cudart_check=$? - if test "x${cudart_check}" != "x0"; then - CUDALDFLAGS="$CUDALDFLAGS -lcudart" - fi else # pkg-config is not available if test -f /usr/local/cuda/include/cuda.h; then CUDAFLAGS="-I/usr/local/cuda/include" - CUDALDFLAGS="-L/usr/local/cuda/lib64 -lcudart" elif test -f /usr/include/cuda.h; then CUDAFLAGS="-I/usr/include" - CUDALDFLAGS="-lcudart" elif test "x$CUDA_HOME" != "x"; then CUDAFLAGS="-I${CUDA_HOME}/include" - CUDALDFLAGS="-L${CUDA_HOME}/lib64 -lcudart" fi fi # end of pkg-config decision tree fi # end of CUDAFLAGS user override protection @@ -213,17 +204,17 @@ m4_define( MAGMA_INCLUDE=$(pkg-config --cflags-only-I ${magma_pkg}) MAGMA_LIBS_DIR=$(pkg-config --libs-only-L ${magma_pkg}) export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS ${MAGMA_INCLUDE}" - export MAGMALDFLAGS="$CUDALDFLAGS ${MAGMA_LIBS_DIR} -lmagma" + export MAGMALDFLAGS="${MAGMA_LIBS_DIR} -lmagma" fi # end of MAGMA decision tree else # search for MAGMA in some standard folders if test "x$CUDAFLAGS" != "x"; then if test -f /usr/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" elif test -f /usr/local/include/magma_v2.h; then export MAGMAFLAGS="-DUSE_MAGMA ${MAGMA_ILP64_FLAG} $CUDAFLAGS -I/usr/local/include" - export MAGMALDFLAGS="$CUDALDFLAGS -lmagma" + export MAGMALDFLAGS="-lmagma" fi fi fi # end of pkg-config decision tree @@ -235,7 +226,7 @@ m4_define( MAGMA_ROOT="${MAGMA_DIR}" fi export MAGMAFLAGS="-DUSE_MAGMA -DMAGMA_ILP64 $CUDAFLAGS -I${MAGMA_ROOT}/include" - export MAGMALDFLAGS="$CUDALDFLAGS -L${MAGMA_ROOT}/lib -lmagma" + export MAGMALDFLAGS="-L${MAGMA_ROOT}/lib -lmagma" fi ] ) @@ -580,11 +571,11 @@ AC_ARG_WITH( ] ) -AC_ARG_WITH( +AC_ARG_ENABLE( [refinement], - [AS_HELP_STRING([--with-refinement], [use iterative refinement for matrix inversion @<:@default=no@:>@])], + [AS_HELP_STRING([--enable-refinement], [use iterative refinement for matrix inversion @<:@default=no@:>@])], [ - if test "x$withval" = "xno"; then + if test "x$enableval" = "xno"; then AC_SUBST([REFINEFLAGS], [""]) else AC_SUBST([REFINEFLAGS], ["-DUSE_REFINEMENT"]) diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp index f78465a7534419f0aceadf1304e73215d68ee7d3..1b320af2e5c07ccfe03b09d620c706f59062add1 100644 --- a/src/cluster/cluster.cpp +++ b/src/cluster/cluster.cpp @@ -35,11 +35,6 @@ #ifdef USE_NVTX #include #endif -#ifdef USE_MAGMA -#include -#endif -// define by hand for a first test -//#define USE_REFINEMENT 1 #ifndef INCLUDE_TYPES_H_ #include "../include/types.h" @@ -121,9 +116,16 @@ void cluster(const string& config_file, const string& data_file, const string& o // Initialise MAGMA //=========== #ifdef USE_MAGMA - // GMu note: MAGMA does not necessarily rely on CUDA, it may just as well run on openCL or HIP, we should consider alternative ways to detect the number of devices if MAGMA is not using CUDA but something else - cudaGetDeviceCount(&device_count); - logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); + const magma_int_t d_array_max_size = 32; // TEMPORARY: can become configurable parameter + magma_device_t *device_array = new magma_device_t[d_array_max_size]; + magma_int_t num_devices; + magma_getdevices(device_array, d_array_max_size, &num_devices); + device_count = (int)num_devices; + delete[] device_array; + message = "DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " GPU "; + if (device_count > 1) message += "devices.\n"; + else message += "device.\n"; + logger->log(message, LOG_DEBG); logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); magma_int_t magma_result = magma_init(); if (magma_result != MAGMA_SUCCESS) { @@ -136,8 +138,7 @@ void cluster(const string& config_file, const string& data_file, const string& o delete logger; return; } -#endif - // end MAGMA initialisation +#endif // end MAGMA initialisation //=========================== // the following only happens on MPI process 0 @@ -165,7 +166,7 @@ void cluster(const string& config_file, const string& data_file, const string& o sconf->write_formatted(output_path + "/c_OEDFB"); sconf->write_binary(output_path + "/c_TEDF"); sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5"); - // end logger initialisation + // end scatterer initialisation //======================== // Initialise gconf from configuration files @@ -223,7 +224,7 @@ void cluster(const string& config_file, const string& data_file, const string& o nsph, cid->c1->li, cid->c1->le, gconf->mxndm, gconf->in_pol, gconf->npnt, gconf->npntts, gconf->iavm, gconf->iavm ); -#endif +#endif // USE_ILP64 p_output->append_line(virtual_line); sprintf(virtual_line, " READ(IR,*)RXX(I),RYY(I),RZZ(I)\n"); p_output->append_line(virtual_line); @@ -269,7 +270,7 @@ void cluster(const string& config_file, const string& data_file, const string& o p_output->append_line(virtual_line); sprintf(virtual_line, " \n"); p_output->append_line(virtual_line); - str(sconf, cid->c1, cid->c3, cid->c6); + str(sconf, cid->c1); thdps(cid->c1->lm, cid->zpv); double exdc = sconf->exdc; double exri = sqrt(exdc); @@ -316,12 +317,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // do the first iteration on jxi488 separately, since it seems to be different from the others int jxi488 = 1; - // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations) - cid->refinemode = 2; - // maxrefiters and accuracygoal should be configurable and preferably set somewhere else - cid->maxrefiters = 20; int initialmaxrefiters = cid->maxrefiters; - cid->accuracygoal = 1e-6; chrono::time_point start_iter_1 = chrono::high_resolution_clock::now(); #ifdef USE_NVTX nvtxRangePush("First iteration"); @@ -753,17 +749,17 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf int ici = (nsh + 1) / 2; if (idfc == 0) { for (int ic = 0; ic < ici; ic++) - cid->c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, jxi488 - 1); + cid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, jxi488 - 1); } else { if (jxi488 == 1) { for (int ic = 0; ic < ici; ic++) - cid->c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, 0); + cid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132 - 1, 0); } } - if (nsh % 2 == 0) cid->c2->dc0[ici] = exdc; + if (nsh % 2 == 0) cid->c1->dc0[ici] = exdc; dme( cid->c1->li, i132, npnt, npntts, vkarg, exdc, exri, - cid->c1, cid->c2, jer, lcalc, cid->arg, last_configuration + cid->c1, jer, lcalc, cid->arg, last_configuration ); if (jer != 0) { sprintf(virtual_line, " STOP IN DME\n"); @@ -796,7 +792,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf outam0->write_to_disk(outam0_name); delete outam0; #endif - cms(cid->am, cid->c1, cid->c6); + cms(cid->am, cid->c1); #ifdef DEBUG_AM VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; @@ -861,7 +857,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf #ifdef USE_NVTX nvtxRangePush("Average calculation"); #endif - ztm(cid->am, cid->c1, cid->c6, cid->c9); + ztm(cid->am, cid->c1); #ifdef DEBUG_AM VirtualAsciiFile *outam3 = new VirtualAsciiFile(); string outam3_name = output_path + "/c_AM3_JXI" + to_string(jxi488) + ".txt"; @@ -894,7 +890,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf double cs0 = 0.25 * cid->vk * cid->vk * cid->vk / acos(0.0); double csch = 0.0, qschu = 0.0, pschu = 0.0, s0mag = 0.0; dcomplex s0 = 0.0 + 0.0 * I; - scr0(cid->vk, exri, cid->c1, cid->c3); + scr0(cid->vk, exri, cid->c1); double sqk = cid->vk * cid->vk * exdc; aps(cid->zpv, cid->c1->li, nsph, cid->c1, sqk, cid->gaps); rabas(inpol, cid->c1->li, nsph, cid->c1, cid->tqse, cid->tqspe, cid->tqss, cid->tqsps); @@ -914,10 +910,10 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf sprintf(virtual_line, " SPHERE %2d\n", i170); output->append_line(virtual_line); if (cid->c1->nshl[last_configuration - 1] != 1) { - sprintf(virtual_line, " SIZE=%15.7lE\n", cid->c2->vsz[i]); + sprintf(virtual_line, " SIZE=%15.7lE\n", cid->c1->vsz[i]); output->append_line(virtual_line); } else { // label 162 - sprintf(virtual_line, " SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", cid->c2->vsz[i], real(cid->c2->vkt[i]), imag(cid->c2->vkt[i])); + sprintf(virtual_line, " SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", cid->c1->vsz[i], real(cid->c1->vkt[i]), imag(cid->c1->vkt[i])); output->append_line(virtual_line); } // label 164 @@ -951,10 +947,10 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf output->append_line(virtual_line); } } // i170 loop - sprintf(virtual_line, " FSAT=%15.7lE%15.7lE\n", real(cid->c3->tfsas), imag(cid->c3->tfsas)); + sprintf(virtual_line, " FSAT=%15.7lE%15.7lE\n", real(cid->c1->tfsas), imag(cid->c1->tfsas)); output->append_line(virtual_line); - csch = 2.0 * cid->vk * cid->sqsfi / cid->c3->gcs; - s0 = cid->c3->tfsas * exri; + csch = 2.0 * cid->vk * cid->sqsfi / cid->c1->gcs; + s0 = cid->c1->tfsas * exri; qschu = imag(s0) * csch; pschu = real(s0) * csch; s0mag = cabs(s0) * cs0; @@ -1055,7 +1051,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf } } // label 194 - if (iavm == 1) crsm1(cid->vk, exri, cid->c1, cid->c6); + if (iavm == 1) crsm1(cid->vk, exri, cid->c1); if (isam < 0) { apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp); raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps); @@ -1123,23 +1119,23 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf int ipol = (ilr210 % 2 == 0) ? 1 : -1; if (ilr210 == 2) jlr = 1; double extsm = cid->c1->ecscm[ilr210 - 1]; - double qextm = extsm * cid->sqsfi / cid->c3->gcs; - double extrm = extsm / cid->c3->ecs; + double qextm = extsm * cid->sqsfi / cid->c1->gcs; + double extrm = extsm / cid->c1->ecs; double scasm = cid->c1->scscm[ilr210 - 1]; double albdm = scasm / extsm; - double qscam = scasm * cid->sqsfi / cid->c3->gcs; - double scarm = scasm / cid->c3->scs; + double qscam = scasm * cid->sqsfi / cid->c1->gcs; + double scarm = scasm / cid->c1->scs; double abssm = extsm - scasm; - double qabsm = abssm * cid->sqsfi / cid->c3->gcs; - double absrm = abssm / cid->c3->acs; - double acsecs = cid->c3->acs / cid->c3->ecs; + double qabsm = abssm * cid->sqsfi / cid->c1->gcs; + double absrm = abssm / cid->c1->acs; + double acsecs = cid->c1->acs / cid->c1->ecs; if (acsecs >= -1.0e-6 && acsecs <= 1.0e-6) absrm = 1.0; dcomplex s0m = cid->c1->fsacm[ilr210 - 1][ilr210 - 1] * exri; double qschum = imag(s0m) * csch; double pschum = real(s0m) * csch; double s0magm = cabs(s0m) * cs0; - double rfinrm = real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / real(cid->c3->tfsas); - double extcrm = imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / imag(cid->c3->tfsas); + double rfinrm = real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / real(cid->c1->tfsas); + double extcrm = imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / imag(cid->c1->tfsas); if (inpol == 0) { sprintf(virtual_line, " LIN %2d\n", ipol); output->append_line(virtual_line); @@ -1223,7 +1219,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf output->append_line(virtual_line); } // label 224 - scr2(cid->vk, vkarg, exri, cid->duk, cid->c1, cid->c3); + scr2(cid->vk, vkarg, exri, cid->duk, cid->c1); if (cid->c1->li != cid->c1->le) { sprintf(virtual_line, " SPHERES; LMX=MIN0(LI,LE)\n"); output->append_line(virtual_line); @@ -1270,14 +1266,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf } // i226 loop sprintf( virtual_line, " SAT(1,1)=%15.7lE%15.7lE, SAT(2,1)=%15.7lE%15.7lE\n", - real(cid->c3->tsas[0][0]), imag(cid->c3->tsas[0][0]), - real(cid->c3->tsas[1][0]), imag(cid->c3->tsas[1][0]) + real(cid->c1->tsas[0][0]), imag(cid->c1->tsas[0][0]), + real(cid->c1->tsas[1][0]), imag(cid->c1->tsas[1][0]) ); output->append_line(virtual_line); sprintf( virtual_line, " SAT(1,2)=%15.7lE%15.7lE, SAT(2,2)=%15.7lE%15.7lE\n", - real(cid->c3->tsas[0][1]), imag(cid->c3->tsas[0][1]), - real(cid->c3->tsas[1][1]), imag(cid->c3->tsas[1][1]) + real(cid->c1->tsas[0][1]), imag(cid->c1->tsas[0][1]), + real(cid->c1->tsas[1][1]), imag(cid->c1->tsas[1][1]) ); output->append_line(virtual_line); sprintf(virtual_line, " CLUSTER\n"); @@ -1387,23 +1383,23 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf int ipol = (ilr290 % 2 == 0) ? 1 : -1; if (ilr290 == 2) jlr = 1; double extsec = cid->c1->ecsc[ilr290 - 1]; - double qext = extsec * cid->sqsfi / cid->c3->gcs; - double extrat = extsec / cid->c3->ecs; + double qext = extsec * cid->sqsfi / cid->c1->gcs; + double extrat = extsec / cid->c1->ecs; double scasec = cid->c1->scsc[ilr290 - 1]; double albedc = scasec / extsec; - double qsca = scasec * cid->sqsfi / cid->c3->gcs; - double scarat = scasec / cid->c3->scs; + double qsca = scasec * cid->sqsfi / cid->c1->gcs; + double scarat = scasec / cid->c1->scs; double abssec = extsec - scasec; - double qabs = abssec * cid->sqsfi / cid->c3->gcs; + double qabs = abssec * cid->sqsfi / cid->c1->gcs; double absrat = 1.0; - double ratio = cid->c3->acs / cid->c3->ecs; - if (ratio < -1.0e-6 || ratio > 1.0e-6) absrat = abssec / cid->c3->acs; + double ratio = cid->c1->acs / cid->c1->ecs; + if (ratio < -1.0e-6 || ratio > 1.0e-6) absrat = abssec / cid->c1->acs; s0 = cid->c1->fsac[ilr290 - 1][ilr290 - 1] * exri; double qschu = imag(s0) * csch; double pschu = real(s0) * csch; s0mag = cabs(s0) * cs0; - double refinr = real(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / real(cid->c3->tfsas); - double extcor = imag(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / imag(cid->c3->tfsas); + double refinr = real(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / real(cid->c1->tfsas); + double extcor = imag(cid->c1->fsac[ilr290 - 1][ilr290 - 1]) / imag(cid->c1->tfsas); if (inpol == 0) { sprintf(virtual_line, " LIN %2d\n", ipol); output->append_line(virtual_line); diff --git a/src/include/Commons.h b/src/include/Commons.h index b5ba46139573dd1a217a4dc254e0692a7aa0c3d3..88958c8d8ff15b9b45f38b5f2f3d717c276f22d8 100644 --- a/src/include/Commons.h +++ b/src/include/Commons.h @@ -38,208 +38,8 @@ #endif class ParticleDescriptor; -class mixMPI; -/*! \brief Representation of the FORTRAN C2 blocks. - * - */ -class C2 { -protected: - //! \brief Number of spheres. - int nsph; - //! \brief Number of required orders. - int nhspo; - //! \brief QUESTION: what is nl? - int nl; - -public: - //! \brief QUESTION: definition? - dcomplex *ris; - //! \brief QUESTION: definition? - dcomplex *dlri; - //! \brief Vector of dielectric constants. - dcomplex *dc0; - //! \brief QUESTION: definition? - dcomplex *vkt; - //! Vector of scaled sizes. QUESTION: correct? - double *vsz; - - /*! \brief C2 instance constructor. - * - * \param gconf: `GeometryConfiguration*` Pointer to a GeometryConfiguration instance. - * \param sconf: `ScattererConfiguration*` Pointer to a ScattererConfiguration instance. - */ - C2(GeometryConfiguration *gconf, ScattererConfiguration *sconf); - - /*! \brief C2 instance constructor copying its contents from preexisting instance. - * - * \param rhs: `C2` object to copy contents from - */ - C2(const C2& rhs); - - //! \brief C2 instance destroyer. - ~C2(); - -#ifdef MPI_VERSION - /*! \brief C2 instance constructor copying all contents off MPI broadcast from MPI process 0 - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - C2(const mixMPI *mpidata); - - /*! \brief send C2 instance from MPI process 0 via MPI broadcasts to all other processes - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - void mpibcast(const mixMPI *mpidata); -#endif - -}; - -/*! \brief Representation of the FORTRAN C3 blocks. - */ -class C3 { -public: - //! \brief QUESTION: definition? - dcomplex tfsas; - //! \brief QUESTION: definition? - dcomplex **tsas; - //! \brief Total geometric cross-section. - double gcs; - //! \brief Total scattering cross-section. - double scs; - //! \brief Total extinction cross-section. - double ecs; - //! \brief Total absorption cross-section. - double acs; - - /*! \brief C3 instance constructor. - */ - C3(); - - /*! \brief C3 instance constructor copying its contents from a preexisting object. - */ - C3(const C3& rhs); - - /*! \brief C3 instance destroyer. - */ - ~C3(); - -#ifdef MPI_VERSION - /*! \brief C3 instance constructor copying all contents off MPI broadcast from MPI process 0 - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - C3(const mixMPI *mpidata); - - /*! \brief send C3 instance from MPI process 0 via MPI broadcasts to all other processes - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - void mpibcast(const mixMPI *mpidata); -#endif - -}; - -/*! \brief Representation of the FORTRAN C6 blocks. - */ -class C6 { -public: - //! \brief LMTPO = 2 * LM + 1. - int lmtpo; - //! \brief QUESTION: definition? - double *rac3j; - - /*! \brief C6 instance constructor. - * - * \param lmtpo: `int` QUESTION: definition? - */ - C6(int lmtpo); - - /*! \brief C6 instance constructor copying contents from preexisting object. - * - * \param lmtpo: `int` QUESTION: definition? - */ - C6(const C6& rhs); - - /*! \brief C6 instance destroyer. - */ - ~C6(); - -#ifdef MPI_VERSION - /*! \brief C6 instance constructor copying all contents off MPI broadcast from MPI process 0 - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - C6(const mixMPI *mpidata); - - /*! \brief send C6 instance from MPI process 0 via MPI broadcasts to all other processes - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - void mpibcast(const mixMPI *mpidata); -#endif - -}; - - -/*! \brief Representation of the FORTRAN C9 blocks. - */ -class C9 { -protected: - //! \brief Number of rows in the GIS and GLS matrices - int gis_size_0; - //! \brief Number of rows in the SAM matrix - int sam_size_0; - -public: - //! \brief NLEM = LE * (LE + 2) - int nlem; - //! \brief NLEMT = 2 * LE * (LE + 2) - int nlemt; - //! \brief QUESTION: definition? - dcomplex **gis; - //! \brief QUESTION: definition? - dcomplex **gls; - //! \brief QUESTION: definition? - dcomplex **sam; - - /*! \brief C9 instance constructor. - * - * \param ndi: `int` NDI = NSPH * LI * (LI + 2) - * \param nlem: `int` NLEM = LE * (LE + 2) - * \param ndit: `int` NDIT = 2 * NSPH * LI * (LI + 2) - * \param nlemt: `int` NLEMT = 2 * LE * (LE + 2) - */ - C9(int ndi, int nlem, int ndit, int nlemt); - - /*! \brief C9 instance constructor copying contents from preexisting object. - * - * \param rhs: `C9` preexisting object to copy from - */ - C9(const C9& rhs); - - /*! \brief C9 instance destroyer. - */ - ~C9(); - -#ifdef MPI_VERSION - /*! \brief C9 instance constructor copying all contents off MPI broadcast from MPI process 0 - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - C9(const mixMPI *mpidata); - - /*! \brief send C9 instance from MPI process 0 via MPI broadcasts to all other processes - * - * \param mpidata: `mixMPI *` pointer to MPI data structure. - */ - void mpibcast(const mixMPI *mpidata); -#endif - -}; - -/*! \brief structure with essential MPI data. +/*! \brief Structure with essential MPI data. */ class mixMPI { public: @@ -274,16 +74,8 @@ public: */ class ClusterIterationData { public: - //! \brief Pointer to a C1 structure. + //! \brief Pointer to a ParticleDescriptor structure. ParticleDescriptor *c1; - //! \brief Pointer to a C2 structure. - C2 *c2; - //! \brief Pointer to a C3 structure. - C3 *c3; - //! \brief Pointer to a C6 structure. - C6 *c6; - //! \brief Pointer to a C9 structure. - C9 *c9; //! \brief Vector of geometric asymmetry factors. double *gaps; double **tqse; @@ -341,8 +133,11 @@ public: int lastxi; //! \brief ID of the GPU used by one MPI process. int proc_device; + //! \brief Refinement mode selction flag. int refinemode; + //! \brief Maximum number of refinement iterations. int maxrefiters; + //! \brief Required accuracy level. double accuracygoal; ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count); @@ -387,6 +182,14 @@ protected: int _num_configurations; //! \brief Total number of layers from all sphere types. int _num_layers; + //! \brief Space for different sphere types. + int _nl; + //! \brief NHSPO = 2 * MAX(NPNT,NPNTTS) - 1 + int _nhspo; + //! \brief Number of points for numerical integration in layered spheres. + int _npnt; + //! \brief Number of points for numerical integration in transition layer. + int _npntts; //! \brief Contiguous space for RMI. dcomplex *vec_rmi; //! \brief Contiguous space for REI. @@ -404,14 +207,25 @@ protected: dcomplex *vec_vints; // >>> END OF SECTION NEEDED BY SPHERE AND CLUSTER <<< // + // >>> NEEDED BY CLUSTER <<< // + //! \brief Contiguous space for TSAS. + dcomplex *vec_tsas; + //! \brief Contiguous space for GIS. + dcomplex *vec_gis; + //! \brief Contiguous space for GLS. + dcomplex *vec_gls; + //! \brief Contiguous space for SAM. + dcomplex *vec_sam; + // >>> END OF SECTION NEEDED BY CLUSTER <<< // + // >>> NEEDED BY CLUSTER AND INCLU <<< //! \brief Maximum external field expansion order. int _le; //! \brief Maximum field expansion order. int _lm; - //! \brief NLIM = NSPH * LI * (LI + 2) + //! \brief NLIM = LI * (LI + 2) int _nlim; - //! \brief NLEM = NSPH * LE * (LE + 2) + //! \brief NLEM = LE * (LE + 2) int _nlem; //! \brief NLEMT = 2 * NLEM int _nlemt; @@ -429,18 +243,28 @@ protected: int _lmtpos; //! \brief NV3J = (LM * (LM + 1) * (2 * LM + 7)) / 6 int _nv3j; - //! \brief Contiguous space for AM0M + //! \brief NDI = NSPH * NLIM + int _ndi; + //! \brief NDIT = 2 * NSPH * NLIM + int _ndit; + //! \brief Contiguous space for AM0M. dcomplex *vec_am0m; - //! \brief Contiguous space for FSAC + //! \brief Contiguous space for FSAC. dcomplex *vec_fsac; - //! \brief Contiguous space for SAC + //! \brief Contiguous space for SAC. dcomplex *vec_sac; - //! \brief Contiguous space for FSACM + //! \brief Contiguous space for FSACM. dcomplex *vec_fsacm; - //! \brief Contiguous space for IND3J + //! \brief Contiguous space for IND3J. int *vec_ind3j; // >>> END OF SECTION NEEDED BY CLUSTER AND INCLU <<< // + // >>> NEEDED BY INCLU <<< // + //! \brief NDM = NDIT + NLEMT + int _ndm; + //! \brief Contiguous space for AT. + dcomplex *vec_at; + // >>> END OF SECTION NEEDED BY INCLU <<< // public: // >>> COMMON TO ALL DESCRIPTOR TYPES <<< // //! \brief Base sub-class identification code. @@ -462,11 +286,19 @@ public: const int &num_configurations = _num_configurations; //! \brief Read-only view of total number of layers from all sphere types. const int &num_layers = _num_layers; - //! \brief Matrix of inverse radial M coefficients. + //! \brief Read-only view on the space for different sphere configurations. + const int &nl = _nl; + //! \brief Read-only view of NHSPO. + const int &nhspo = _nhspo; + //! \brief Read-only view on number of points for numerical integration in layered spheres. + const int &npnt = _npnt; + //! \brief Read-only view on number of points for numerical integration in transition layer. + const int &npntts = _npntts; + //! \brief Matrix of Mie scattering b-coefficients for different orders and spheres. dcomplex **rmi; - //! \brief Matrix of inverse radial E coefficients. + //! \brief Matrix of Matrix of Mie scattering a-coefficients for different orders and spheres. dcomplex **rei; - //! \brief Matrix of W coefficients. + //! \brief Matrix of multipole amplitudes for incident and scattered fields. dcomplex **w; //! \brief Vector of intensity components. dcomplex *vint; @@ -484,6 +316,16 @@ public: int *iog; //! \brief Vector of number of layers in sphere type. int *nshl; + //! \brief TBD + dcomplex *ris; + //! \brief TBD + dcomplex *dlri; + //! \brief Vector of dielectric constants. + dcomplex *dc0; + //! \brief TBD + dcomplex *vkt; + //! \brief Vector of sizes in units of 2*PI/LAMBDA + double *vsz; // >>> END OF SECTION COMMON TO ALL DESCRIPTOR TYPES <<< // // >>> NEEDED BY SPHERE AND CLUSTER <<< // @@ -510,7 +352,24 @@ public: // >>> END OF SECTION NEEDED BY SPHERE AND CLUSTER <<< // // >>> NEEDED BY CLUSTER <<< + // \brief Vector of field intensity components. dcomplex *vintt; + //! \brief Total forward scattering amplitude. + dcomplex tfsas; + //! \brief Total scattering amplitude. + dcomplex **tsas; + //! \brief Total scattering cross-section. + double scs; + //! \brief Total extinction cross-section. + double ecs; + //! \brief Total absorption cross-section. + double acs; + //! \brief TBD. + dcomplex **gis; + //! \brief TBD. + dcomplex **gls; + //! \brief TBD. + dcomplex **sam; // >>> END OF SECTION NEEDED BY CLUSTER <<< // // >>> NEEDED BY CLUSTER AND INCLU <<< @@ -538,6 +397,14 @@ public: const int& lmtpos = _lmtpos; //! \brief Read-only view of NV3J. const int& nv3j = _nv3j; + //! \brief Read-only view of NDI. + const int& ndi = _ndi; + //! \brief Read-only view of NDIT. + const int& ndit = _ndit; + //! \brief Read-only view of NDM. + const int& ndm = _ndm; + //! \brief Total geometric cross-section. + double gcs; //! \brief TBD dcomplex *vh; @@ -579,6 +446,8 @@ public: double *ecscm; //! \brief J-vector components index matrix. int **ind3j; + //! \brief J-vector boundary values. QUESTION: correct? + double *rac3j; // >>> END OF SECTION NEEDED BY CLUSTER AND INCLU <<< // // >>> NEEDED BY INCLU <<< // @@ -598,6 +467,8 @@ public: dcomplex *tm0; //! \brief TBD dcomplex *te0; + //! \brief TBD + dcomplex **at; // >>> END OF SECTION NEEDED BY INCLU <<< // /*! \brief ParticleDescriptor instance constructor. diff --git a/src/include/clu_subs.h b/src/include/clu_subs.h index 87475bf72e59154266b46fdae617eaba8e1d51fb..32994085c4dfc5e3a4cf2ccaf0cb1dffab3b7d2e 100644 --- a/src/include/clu_subs.h +++ b/src/include/clu_subs.h @@ -99,9 +99,8 @@ double cgev(int ipamo, int mu, int l, int m); * * \param am: `complex double **` * \param c1: `ParticleDescriptor *` - * \param c6: `C6 *` */ -void cms(dcomplex **am, ParticleDescriptor *c1, C6 *c6); +void cms(dcomplex **am, ParticleDescriptor *c1); /*! \brief Compute orientation-averaged scattered field intensity. * @@ -112,9 +111,8 @@ void cms(dcomplex **am, ParticleDescriptor *c1, C6 *c6); * \param vk: `double` Wave number. * \param exri: `double` External medium refractive index. * \param c1: `ParticleDescriptor *` - * \param c6: `C6 *` */ -void crsm1(double vk, double exri, ParticleDescriptor *c1, C6 *c6); +void crsm1(double vk, double exri, ParticleDescriptor *c1); /*! \brief Compute the transfer vector from N2 to N1. * @@ -129,11 +127,10 @@ void crsm1(double vk, double exri, ParticleDescriptor *c1, C6 *c6); * \param l2: `int` * \param m2: `int` * \param c1: `ParticleDescriptor *` - * \param rac3j: `double *` */ dcomplex ghit_d( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, - ParticleDescriptor *c1, double *rac3j + ParticleDescriptor *c1 ); /*! \brief Compute the transfer vector from N2 to N1. @@ -148,12 +145,11 @@ dcomplex ghit_d( * \param m1: `int` * \param l2: `int` * \param m2: `int` - * \param c1: `C1 *` - * \param c6: `C6 *` + * \param c1: `ParticleDescriptor *` Poiunter to a ParticleDescriptor instance. */ dcomplex ghit( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, - ParticleDescriptor *c1, C6 *c6 + ParticleDescriptor *c1 ); /*! \brief Compute Hankel funtion and Bessel functions. @@ -249,9 +245,9 @@ void polar( * * \param j2: `int` * \param j3: `int` - * \param c6: `C6 *` Pointer to a C6 instance. + * \param rac3j: `double *` Vector of 3j symbols. */ -void r3j000(int j2, int j3, C6 *c6); +void r3j000(int j2, int j3, double *rac3j); /*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions. * @@ -262,9 +258,9 @@ void r3j000(int j2, int j3, C6 *c6); * \param j3: `int` * \param m2: `int` * \param m3: `int` - * \param c6: `C6 *` + * \param rac3j: `double *` Vector of 3j symbols. */ -void r3jjr(int j2, int j3, int m2, int m3, C6 *c6); +void r3jjr(int j2, int j3, int m2, int m3, double *rac3j); /*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions. * @@ -288,9 +284,9 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j); * \param j2: `int` * \param j3: `int` * \param m1: `int` - * \param c6: `C6 *` + * \param rac3j: `double *` Vector of 3j symbols. */ -void r3jmr(int j1, int j2, int j3, int m1, C6 *c6); +void r3jmr(int j1, int j2, int j3, int m1, double *rac3j); /*! \brief Compute radiation torques on a particle in Cartesian coordinates. * @@ -348,9 +344,8 @@ void rftr( * \param vk: `double` Wave number * \param exri: `double` External medium refractive index. * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. - * \param c3: `C3 *` Pointer to a C3 instance. */ -void scr0(double vk, double exri, ParticleDescriptor *c1, C3 *c3); +void scr0(double vk, double exri, ParticleDescriptor *c1); /*! \brief Compute the scattering amplitude for a single sphere in an aggregate. * @@ -362,11 +357,9 @@ void scr0(double vk, double exri, ParticleDescriptor *c1, C3 *c3); * \param exri: `double` External medium refractive index. * \param duk: `double *` QUESTION: definition? * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. - * \param c3: `C3 *` Pointer to a C3 instance. */ void scr2( - double vk, double vkarg, double exri, double *duk, ParticleDescriptor *c1, - C3 *c3 + double vk, double vkarg, double exri, double *duk, ParticleDescriptor *c1 ); /*! \brief Transform sphere Cartesian coordinates to spherical coordinates. @@ -377,10 +370,8 @@ void scr2( * * \param sconf: `ScattererConfiguration *` Pointer to scatterer configuration object. * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. - * \param c3: `C3 *` Pointer to a C3 instance. - * \param c6: `C6 *` Pointer to a C6 instance. */ -void str(ScattererConfiguration *sconf, ParticleDescriptor *c1, C3 *c3, C6 *c6); +void str(ScattererConfiguration *sconf, ParticleDescriptor *c1); /*! \brief Compute radiation torques on particles on a k-vector oriented system. * @@ -413,9 +404,7 @@ void tqr( * * \param am: `complex double **` * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. - * \param c6: `C6 *` Pointer to a C6 instance. - * \param c9: `C9 *` Pointer to a C9 instance. */ -void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9); +void ztm(dcomplex **am, ParticleDescriptor *c1); #endif diff --git a/src/include/inclu_subs.h b/src/include/inclu_subs.h new file mode 100644 index 0000000000000000000000000000000000000000..d8696d96103ffaccd76cd1ab5efdd36ee7856f4a --- /dev/null +++ b/src/include/inclu_subs.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2024 INAF - Osservatorio Astronomico di Cagliari + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License is distributed along with + this program in the COPYING file. If not, see: . + */ + +/*! \file inclu_subs.h + * + * \brief C++ porting of INCLU functions and subroutines. + * + * This library includes a collection of functions that are used to solve the + * scattering problem in the case of a sphere with a cluster of inclusions. Like + * the other use cases, many of the functions declared here execute various + * calculations on different data structures. In order to manage access to such + * variety of calculations, most functions are declared as `void` and they operate + * on output arguments passed by reference. + */ + +#ifndef INCLUDE_INCLU_SUBS_H_ +#define INCLUDE_INCLU_SUBS_H_ + +/*! \brief C++ porting of CNF. + * + * \param n: `int` Bessel y function order. + * \param z: `dcomplex` Argument of Bessel y function. + * \param nm: `int` Maximum computed order. + * \param csj: `dcomplex *` TBD. + * \param csy: `dcomplex *` Complex spherical Bessel functions up to desired order. + */ +void cnf(int n, dcomplex z, int nm, dcomplex *csj, dcomplex *csy); + +/*! \brief C++ porting of EXMA. + * + * \param am: `dcomplex **` Field transition coefficients matrix. + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. + */ +void exma(dcomplex **am, ParticleDescriptor *c1); + +/*! \brief C++ porting of INCMS. + * + * \param am: `dcomplex **` Field transition coefficients matrix. + * \param enti: `double` TBD. + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. + */ +void incms(dcomplex **am, double enti, ParticleDescriptor *c1); + +/*! \brief C++ porting of INDME. + * + * \param li: `int` Maximum internal field expansion order. + * \param i: `int` 1-based sphere configuration index. + * \param npnt: `int` TBD. + * \param npntts: `int` TBD. + * \param vk: `double` Vacuum wave vector magnitude. + * \param ent: `dcomplex` TBD. + * \param enti: `double` TBD. + * \param entn: `dcomplex` TBD. + * \param jer: `int &` Error code flag. + * \param lcalc: `int &` Maximum order achieved in calculation. + * \param arg: `dcomplex &` TBD. + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. + */ +void indme( + int i, int npnt, int npntts, double vk, dcomplex ent, double enti, + dcomplex entn, int &jer, int &lcalc, dcomplex &arg, ParticleDescriptor *c1 +); + +/*! \brief C++ porting of INSTR. + * + * \param sconf: `ScattererConfiguration *` Pointer to a ScattererConfiguration instance. + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. + */ +void instr(ScattererConfiguration *sconf, ParticleDescriptor *c1); + +/*! \brief C++ porting of OSPV. + * + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. + * \param vk: `double` TBD. + * \param sze: `double` TBD. + * \param exri: `double` External medium refractive index. + * \param entn: `dcomplex` Outer sphere refractive index. + * \param enti: `double` Imaginary part of the outer medium refractive index. + * \param jer: `int &` Reference to an integer error flag. + * \param lcalc: `int &` Maximum order achieved in calculation. + * \param arg: `dcomplex` Complex Bessel function argument. + */ +void ospv(ParticleDescriptor *c1, double vk, double sze, double exri, dcomplex entn, double enti, int &jer, int &lcalc, dcomplex &arg); + +#endif // INCLUDE_INCLU_SUBS_H_ diff --git a/src/include/sph_subs.h b/src/include/sph_subs.h index 0ae5b2809f8216534785082b857c28568df37973..d4d9bc7bf03fabd198674c3abe96c95c7f1d2dd9 100644 --- a/src/include/sph_subs.h +++ b/src/include/sph_subs.h @@ -89,9 +89,8 @@ double cg1(int lmpml, int mu, int l, int m); * \param ic: `int` * \param vk: `double` * \param c1: `ParticleDescriptor *` Pointer to `ParticleDescriptor` data structure. - * \param c2: `C2 *` Pointer to `C2` data structure. */ -void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1, C2 *c2); +void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1); /*! \brief Compute Mie scattering coefficients. * @@ -108,7 +107,6 @@ void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1, * \param exdc: `double` External medium dielectric constant. * \param exri: `double` External medium refractive index. * \param c1: `ParticleDescriptor *` Pointer to a `ParticleDescriptor` data structure. - * \param c2: `C2 *` Pointer to a `C2` data structure. * \param jer: `int &` Reference to integer error code variable. * \param lcalc: `int &` Reference to integer variable recording the maximum expansion order accounted for. * \param arg: `complex double &` @@ -116,7 +114,7 @@ void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1, */ void dme( int li, int i, int npnt, int npntts, double vk, double exdc, double exri, - ParticleDescriptor *c1, C2 *c2, int &jer, int &lcalc, dcomplex &arg, int last_conf=0 + ParticleDescriptor *c1, int &jer, int &lcalc, dcomplex &arg, int last_conf=0 ); /*! \brief Bessel function calculation control parameters. @@ -262,11 +260,11 @@ void rkc( * \param y2: `complex double &` * \param dy1: `complex double &` * \param dy2: `complex double &` - * \param c2: `C2 *` Pointer to a `C2` data structure. + * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. */ void rkt( int npntmo, double step, double &x, int lpo, dcomplex &y1, - dcomplex &y2, dcomplex &dy1, dcomplex &dy2, C2 *c2 + dcomplex &y2, dcomplex &dy1, dcomplex &dy2, ParticleDescriptor *c1 ); /*! \brief Spherical Bessel functions. diff --git a/src/inclusion/inclusion.cpp b/src/inclusion/inclusion.cpp index c75bbd76d78dfc5ad8ba0e7591d956e4160f8078..8b01327cfc027683d4680e77303a0f9a3e48f889 100644 --- a/src/inclusion/inclusion.cpp +++ b/src/inclusion/inclusion.cpp @@ -67,6 +67,10 @@ #include "../include/clu_subs.h" #endif +#ifndef INCLUDE_INCLU_SUBS_H_ +#include "../include/inclu_subs.h" +#endif + #ifndef INCLUDE_TRANSITIONMATRIX_H_ #include "../include/TransitionMatrix.h" #endif @@ -89,6 +93,657 @@ using namespace std; +// >>> InclusionIterationData header <<< // +/*! \brief A data structure representing the information used for a single scale + * of the INCLUSION case. + */ +class InclusionIterationData { +protected: + double *vec_zpv; + +public: + int nimd; + double extr; + + //! \brief Pointer to a ParticleDescriptor structure. + ParticleDescriptor *c1; + //! \brief Vector of geometric asymmetry factors. + double *gaps; + double **tqse; + dcomplex **tqspe; + double **tqss; + dcomplex **tqsps; + double ****zpv; + double **gapm; + dcomplex **gappm; + double *argi; + double *args; + double **gap; + dcomplex **gapp; + double **tqce; + dcomplex **tqcpe; + double **tqcs; + dcomplex **tqcps; + double *duk; + double **cextlr; + double **cext; + double **cmullr; + double **cmul; + double *gapv; + double *tqev; + double *tqsv; + double *u; + double *us; + double *un; + double *uns; + double *up; + double *ups; + double *unmp; + double *unsmp; + double *upmp; + double *upsmp; + //! \brief Scattering angle. + double scan; + double cfmp; + double sfmp; + double cfsp; + double sfsp; + double qsfi; + double sqsfi; + dcomplex *am_vector; + dcomplex **am; + dcomplex arg; + //! \brief Vacuum magnitude of wave vector. + double vk; + //! \brief Wave number. + double wn; + double xip; + int number_of_scales; + int xiblock; + int firstxi; + int lastxi; + //! \brief ID of the GPU used by one MPI process. + int proc_device; + //! \brief Refinement mode selction flag. + int refinemode; + //! \brief Maximum number of refinement iterations. + int maxrefiters; + //! \brief Required accuracy level. + double accuracygoal; + + InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count); + + InclusionIterationData(const InclusionIterationData& rhs); + +#ifdef MPI_VERSION + InclusionIterationData(const mixMPI *mpidata, const int device_count); + + /*! \brief Broadcast over MPI the InclusionIterationData instance from MPI process 0 to all others. + * + * When using MPI, the initial InclusionIterationData instance created by MPI process 0 + * needs to be replicated on all other processes. This function sends it using + * MPI broadcast calls. The MPI broadcast calls in this function must match those + * in the constructor using the mixMPI pointer. + * + * \param mpidata: `mixMPI *` Pointer to the mpi structure used to do the MPI broadcast. + */ + void mpibcast(const mixMPI *mpidata); +#endif + + ~InclusionIterationData(); + +}; + +// >>> End of InclusionIterationData header <<< // + +// >>> InclusionIterationData implementation <<< // +InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) { + c1 = new ParticleDescriptorInclusion(gconf, sconf); + const int ndi = c1->nsph * c1->nlim; + const np_int ndit = 2 * ndi; + gaps = new double[c1->nsph](); + tqev = new double[3](); + tqsv = new double[3](); + tqse = new double*[2]; + tqspe = new dcomplex*[2]; + tqss = new double*[2]; + tqsps = new dcomplex*[2]; + tqce = new double*[2]; + tqcpe = new dcomplex*[2]; + tqcs = new double*[2]; + tqcps = new dcomplex*[2]; + for (int ti = 0; ti < 2; ti++) { + tqse[ti] = new double[c1->nsph](); + tqspe[ti] = new dcomplex[c1->nsph](); + tqss[ti] = new double[c1->nsph](); + tqsps[ti] = new dcomplex[c1->nsph](); + tqce[ti] = new double[3](); + tqcpe[ti] = new dcomplex[3](); + tqcs[ti] = new double[3](); + tqcps[ti] = new dcomplex[3](); + } + gapv = new double[3](); + gapp = new dcomplex*[3]; + gappm = new dcomplex*[3]; + gap = new double*[3]; + gapm = new double*[3]; + for (int gi = 0; gi < 3; gi++) { + gapp[gi] = new dcomplex[2](); + gappm[gi] = new dcomplex[2](); + gap[gi] = new double[2](); + gapm[gi] = new double[2](); + } + u = new double[3](); + us = new double[3](); + un = new double[3](); + uns = new double[3](); + up = new double[3](); + ups = new double[3](); + unmp = new double[3](); + unsmp = new double[3](); + upmp = new double[3](); + upsmp = new double[3](); + argi = new double[1](); + args = new double[1](); + duk = new double[3](); + cextlr = new double*[4]; + cext = new double*[4]; + cmullr = new double*[4];; + cmul = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cextlr[ci] = new double[4](); + cext[ci] = new double[4](); + cmullr[ci] = new double[4](); + cmul[ci] = new double[4](); + } + vec_zpv = new double[c1->lm * 12](); + zpv = new double***[c1->lm]; + for (int zi = 0; zi < c1->lm; zi++) { + zpv[zi] = new double**[12]; + for (int zj = 0; zj < 3; zj++) { + zpv[zi][zj] = new double*[4]; + zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4); + zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2; + } + } + am_vector = new dcomplex[c1->ndm * c1->ndm](); + am = new dcomplex*[c1->ndm]; + for (int ai = 0; ai < c1->ndm; ai++) { + am[ai] = (am_vector + ai * c1->ndm); + } + + arg = 0.0 + 0.0 * I; + // These are suspect initializations + scan = 0.0; + cfmp = 0.0; + sfmp = 0.0; + cfsp = 0.0; + sfsp = 0.0; + qsfi = 0.0; + // End of suspect initializations + wn = sconf->wp / 3.0e8; + xip = sconf->xip; + sqsfi = 1.0; + vk = 0.0; + number_of_scales = sconf->number_of_scales; + xiblock = (int) ceil(((double) (sconf->number_of_scales-1))/((double) mpidata->nprocs)); + lastxi = ((mpidata->rank+1) * xiblock)+1; + firstxi = lastxi-xiblock+1; + if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales; + + nimd = c1->nshl[0] + 1; + c1->rc[0][nimd - 1] = c1->ros[0] * sconf->get_rcf(0, nimd - 1); + extr = c1->rc[0][nimd - 1]; + const double pig = acos(0.0) * 2.0; + c1->gcs = pig * extr * extr; + +#ifdef USE_MAGMA + proc_device = mpidata->rank % device_count; +#else + proc_device = 0; +#endif + + // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations) + refinemode = 2; + // maxrefiters and accuracygoal should be configurable and preferably set somewhere else + maxrefiters = 20; + accuracygoal = 1e-6; +} + +InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs) { + c1 = new ParticleDescriptorInclusion(reinterpret_cast(*(rhs.c1))); + const int ndi = c1->nsph * c1->nlim; + const np_int ndit = 2 * ndi; + gaps = new double[c1->nsph](); + for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi]; + tqev = new double[3](); + tqsv = new double[3](); + for (int ti = 0; ti < 3; ti++) { + tqev[ti] = rhs.tqev[ti]; + tqsv[ti] = rhs.tqsv[ti]; + } + tqse = new double*[2]; + tqspe = new dcomplex*[2]; + tqss = new double*[2]; + tqsps = new dcomplex*[2]; + tqce = new double*[2]; + tqcpe = new dcomplex*[2]; + tqcs = new double*[2]; + tqcps = new dcomplex*[2]; + for (int ti = 0; ti < 2; ti++) { + tqse[ti] = new double[c1->nsph](); + tqspe[ti] = new dcomplex[c1->nsph](); + tqss[ti] = new double[c1->nsph](); + tqsps[ti] = new dcomplex[c1->nsph](); + for (int tj = 0; tj < c1->nsph; tj++) { + tqse[ti][tj] = rhs.tqse[ti][tj]; + tqspe[ti][tj] = rhs.tqspe[ti][tj]; + tqss[ti][tj] = rhs.tqss[ti][tj]; + tqsps[ti][tj] = rhs.tqsps[ti][tj]; + } + tqce[ti] = new double[3](); + tqcpe[ti] = new dcomplex[3](); + tqcs[ti] = new double[3](); + tqcps[ti] = new dcomplex[3](); + for (int tj = 0; tj < 3; tj++) { + tqce[ti][tj] = rhs.tqce[ti][tj]; + tqcpe[ti][tj] = rhs.tqcpe[ti][tj]; + tqcs[ti][tj] = rhs.tqcs[ti][tj]; + tqcps[ti][tj] = rhs.tqcps[ti][tj]; + } + } + gapv = new double[3](); + gapp = new dcomplex*[3]; + gappm = new dcomplex*[3]; + gap = new double*[3]; + gapm = new double*[3]; + for (int gi = 0; gi < 3; gi++) { + gapv[gi] = rhs.gapv[gi]; + gapp[gi] = new dcomplex[2](); + gappm[gi] = new dcomplex[2](); + gap[gi] = new double[2](); + gapm[gi] = new double[2](); + for (int gj = 0; gj < 2; gj++) { + gapp[gi][gj] = rhs.gapp[gi][gj]; + gappm[gi][gj] = rhs.gappm[gi][gj]; + gap[gi][gj] = rhs.gap[gi][gj]; + gapm[gi][gj] = rhs.gapm[gi][gj]; + } + } + u = new double[3](); + us = new double[3](); + un = new double[3](); + uns = new double[3](); + up = new double[3](); + ups = new double[3](); + unmp = new double[3](); + unsmp = new double[3](); + upmp = new double[3](); + upsmp = new double[3](); + duk = new double[3](); + for (int ui = 0; ui < 3; ui++) { + u[ui] = rhs.u[ui]; + us[ui] = rhs.us[ui]; + un[ui] = rhs.un[ui]; + uns[ui] = rhs.uns[ui]; + up[ui] = rhs.up[ui]; + ups[ui] = rhs.ups[ui]; + unmp[ui] = rhs.unmp[ui]; + unsmp[ui] = rhs.unsmp[ui]; + upmp[ui] = rhs.upmp[ui]; + upsmp[ui] = rhs.upsmp[ui]; + duk[ui] = rhs.duk[ui]; + } + argi = new double[1](); + args = new double[1](); + argi[0] = rhs.argi[0]; + args[0] = rhs.args[0]; + cextlr = new double*[4]; + cext = new double*[4]; + cmullr = new double*[4];; + cmul = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cextlr[ci] = new double[4](); + cext[ci] = new double[4](); + cmullr[ci] = new double[4](); + cmul[ci] = new double[4](); + for (int cj = 0; cj < 4; cj++) { + cextlr[ci][cj] = rhs.cextlr[ci][cj]; + cext[ci][cj] = rhs.cext[ci][cj]; + cmullr[ci][cj] = rhs.cmullr[ci][cj]; + cmul[ci][cj] = rhs.cmul[ci][cj]; + } + } + vec_zpv = new double[c1->lm * 12]; + zpv = new double***[c1->lm]; + for (int zi = 0; zi < c1->lm; zi++) { + zpv[zi] = new double **[12]; + for (int zj = 0; zj < 3; zj++) { + zpv[zi][zj] = new double*[4]; + zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4); + zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2; + zpv[zi][zj][0][0] = rhs.zpv[zi][zj][0][0]; + zpv[zi][zj][0][1] = rhs.zpv[zi][zj][0][1]; + zpv[zi][zj][1][0] = rhs.zpv[zi][zj][1][0]; + zpv[zi][zj][1][1] = rhs.zpv[zi][zj][1][1]; + } + } + am_vector = new dcomplex[c1->ndm * c1->ndm]; + for (int ai = 0; ai < c1->ndm * c1->ndm; ai++) am_vector[ai] = rhs.am_vector[ai]; + am = new dcomplex*[c1->ndm]; + for (int ai = 0; ai < c1->ndm; ai++) { + am[ai] = (am_vector + ai * c1->ndm); + } + + arg = rhs.arg; + // These are suspect initializations + scan = rhs.scan; + cfmp = rhs.cfmp; + sfmp = rhs.sfmp; + cfsp = rhs.cfsp; + sfsp = rhs.sfsp; + qsfi = rhs.qsfi; + // End of suspect initializations + wn = rhs.wn; + xip = rhs.xip; + sqsfi = rhs.sqsfi; + vk = rhs.vk; + firstxi = rhs.firstxi; + lastxi = rhs.lastxi; + xiblock = rhs.xiblock; + number_of_scales = rhs.number_of_scales; + + nimd = rhs.nimd; + extr = rhs.extr; + + proc_device = rhs.proc_device; + refinemode = rhs.refinemode; + maxrefiters = rhs.maxrefiters; + accuracygoal = rhs.accuracygoal; +} + +#ifdef MPI_VERSION +InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int device_count) { + c1 = new ParticleDescriptorInclusion(mpidata); + const int ndi = c1->nsph * c1->nlim; + const np_int ndit = 2 * ndi; + gaps = new double[c1->nsph](); + MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + tqev = new double[3](); + tqsv = new double[3](); + MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + tqse = new double*[2]; + tqspe = new dcomplex*[2]; + tqss = new double*[2]; + tqsps = new dcomplex*[2]; + tqce = new double*[2]; + tqcpe = new dcomplex*[2]; + tqcs = new double*[2]; + tqcps = new dcomplex*[2]; + for (int ti = 0; ti < 2; ti++) { + tqse[ti] = new double[c1->nsph](); + tqspe[ti] = new dcomplex[c1->nsph](); + tqss[ti] = new double[c1->nsph](); + tqsps[ti] = new dcomplex[c1->nsph](); + MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + tqce[ti] = new double[3](); + tqcpe[ti] = new dcomplex[3](); + tqcs[ti] = new double[3](); + tqcps[ti] = new dcomplex[3](); + MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + } + gapv = new double[3](); + gapp = new dcomplex*[3]; + gappm = new dcomplex*[3]; + gap = new double*[3]; + gapm = new double*[3]; + MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + for (int gi = 0; gi < 3; gi++) { + gapp[gi] = new dcomplex[2](); + gappm[gi] = new dcomplex[2](); + gap[gi] = new double[2](); + gapm[gi] = new double[2](); + MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + u = new double[3](); + us = new double[3](); + un = new double[3](); + uns = new double[3](); + up = new double[3](); + ups = new double[3](); + unmp = new double[3](); + unsmp = new double[3](); + upmp = new double[3](); + upsmp = new double[3](); + duk = new double[3](); + MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + argi = new double[1](); + args = new double[1](); + MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + cextlr = new double*[4]; + cext = new double*[4]; + cmullr = new double*[4];; + cmul = new double*[4]; + for (int ci = 0; ci < 4; ci++) { + cextlr[ci] = new double[4](); + cext[ci] = new double[4](); + cmullr[ci] = new double[4](); + cmul[ci] = new double[4](); + MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + vec_zpv = new double[c1->lm * 12]; + MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD); + zpv = new double***[c1->lm]; + for (int zi = 0; zi < c1->lm; zi++) { + zpv[zi] = new double **[12]; + for (int zj = 0; zj < 3; zj++) { + zpv[zi][zj] = new double*[4]; + zpv[zi][zj][0] = vec_zpv + (zi * 12) + (zj * 4); + zpv[zi][zj][1] = vec_zpv + (zi * 12) + (zj * 4) + 2; + } + } + am_vector = new dcomplex[c1->ndm * c1->ndm]; + am = new dcomplex*[c1->ndm]; + for (int ai = 0; ai < c1->ndm; ai++) { + am[ai] = (am_vector + ai * c1->ndm); + MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + } + MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&qsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); + lastxi = ((mpidata->rank+1) * xiblock)+1; + firstxi = lastxi-xiblock+1; + if (lastxi > number_of_scales) lastxi = number_of_scales; + + MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + +#ifdef USE_MAGMA + proc_device = mpidata->rank % device_count; +#else + proc_device = 0; +#endif + MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); +} + +void InclusionIterationData::mpibcast(const mixMPI *mpidata) { + c1->mpibcast(mpidata); + const int ndi = c1->nsph * c1->nlim; + const np_int ndit = 2 * ndi; + MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + for (int ti = 0; ti < 2; ti++) { + MPI_Bcast(tqse[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqspe[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(tqss[ti], c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqsps[ti], c1->nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(tqce[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcpe[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcs[ti], 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(tqcps[ti], 3, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + } + MPI_Bcast(gapv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + for (int gi = 0; gi < 3; gi++) { + MPI_Bcast(gapp[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(gappm[gi], 2, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(gap[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(gapm[gi], 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + MPI_Bcast(u, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(us, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(un, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(uns, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(up, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(ups, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(unsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(upsmp, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(duk, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(argi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(args, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + for (int ci = 0; ci < 4; ci++) { + MPI_Bcast(cextlr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cext[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cmullr[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(cmul[ci], 4, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + MPI_Bcast(vec_zpv, c1->lm * 12, MPI_DOUBLE, 0, MPI_COMM_WORLD); + // since MPI expects an int argument for the number of elements to transfer in one go, transfer am one row at a time + for (int ai = 0; ai < c1->ndm; ai++) { + MPI_Bcast(am[ai], c1->ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + } + MPI_Bcast(&arg, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scan, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfmp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&cfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sfsp, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&qsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&wn, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xip, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&sqsfi, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); +} +#endif + +InclusionIterationData::~InclusionIterationData() { + const int nsph = c1->nsph; + delete[] am_vector; + delete[] am; + for (int zi = 0; zi < c1->lm; zi++) { + for (int zj = 0; zj < 3; zj++) { + delete[] zpv[zi][zj]; + } + delete[] zpv[zi]; + } + delete[] zpv; + delete[] vec_zpv; + delete c1; + delete[] gaps; + for (int ti = 1; ti > -1; ti--) { + delete[] tqse[ti]; + delete[] tqss[ti]; + delete[] tqspe[ti]; + delete[] tqsps[ti]; + delete[] tqce[ti]; + delete[] tqcpe[ti]; + delete[] tqcs[ti]; + delete[] tqcps[ti]; + } + delete[] tqse; + delete[] tqss; + delete[] tqspe; + delete[] tqsps; + delete[] tqce; + delete[] tqcpe; + delete[] tqcs; + delete[] tqcps; + delete[] tqev; + delete[] tqsv; + for (int gi = 2; gi > -1; gi--) { + delete[] gapp[gi]; + delete[] gappm[gi]; + delete[] gap[gi]; + delete[] gapm[gi]; + } + delete[] gapp; + delete[] gappm; + delete[] gap; + delete[] gapm; + delete[] gapv; + delete[] u; + delete[] us; + delete[] un; + delete[] uns; + delete[] up; + delete[] ups; + delete[] unmp; + delete[] unsmp; + delete[] upmp; + delete[] upsmp; + delete[] argi; + delete[] args; + delete[] duk; + for (int ci = 3; ci > -1; ci--) { + delete[] cextlr[ci]; + delete[] cext[ci]; + delete[] cmullr[ci]; + delete[] cmul[ci]; + } + delete[] cextlr; + delete[] cext; + delete[] cmullr; + delete[] cmul; +} +// >>> End of InclusionIterationData implementation <<< // + +int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, VirtualAsciiFile *output, const string& output_path, VirtualBinaryFile *vtppoanp); + /*! \brief C++ implementation of INCLU * * \param config_file: `string` Name of the configuration file. @@ -97,22 +752,1294 @@ using namespace std; * \param mpidata: `mixMPI *` Pointer to an instance of MPI data settings. */ void inclusion(const string& config_file, const string& data_file, const string& output_path, const mixMPI *mpidata) { - ScattererConfiguration *sconf = ScattererConfiguration::from_dedfb(config_file); - GeometryConfiguration *gconf = GeometryConfiguration::from_legacy(data_file); - ParticleDescriptorSphere *pds = NULL; - ParticleDescriptorCluster *pdc = NULL; - ParticleDescriptorInclusion *pdi = NULL; - if (gconf->number_of_spheres == 1) pds = new ParticleDescriptorSphere(gconf, sconf); + chrono::time_point t_start = chrono::high_resolution_clock::now(); + chrono::duration elapsed; + string message; + string timing_name; + FILE *timing_file; + Logger *time_logger; + if (mpidata->rank == 0) { + timing_name = output_path + "/c_timing_mpi"+ to_string(mpidata->rank) +".log"; + timing_file = fopen(timing_name.c_str(), "w"); + time_logger = new Logger(LOG_DEBG, timing_file); + } + Logger *logger = new Logger(LOG_DEBG); + int device_count = 0; + //=========== + // Initialise MAGMA + //=========== +#ifdef USE_MAGMA + const magma_int_t d_array_max_size = 32; // TEMPORARY: can become configurable parameter + magma_device_t *device_array = new magma_device_t[d_array_max_size]; + magma_int_t num_devices; + magma_getdevices(device_array, d_array_max_size, &num_devices); + device_count = (int)num_devices; + delete[] device_array; + message = "DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " GPU "; + if (device_count > 1) message += "devices.\n"; + else message += "device.\n"; + logger->log(message, LOG_DEBG); + logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); + magma_int_t magma_result = magma_init(); + if (magma_result != MAGMA_SUCCESS) { + logger->err("ERROR: Process " + to_string(mpidata->rank) + " failed to initilize MAGMA.\n"); + logger->err("PROC-" + to_string(mpidata->rank) + ": MAGMA error code " + to_string(magma_result) + "\n"); + if (mpidata->rank == 0) { + fclose(timing_file); + delete time_logger; + } + delete logger; + return; + } +#endif // end MAGMA initialisation + + //=========================== + // the following only happens on MPI process 0 + //=========================== + if (mpidata->rank == 0) { +#ifdef USE_NVTX + nvtxRangePush("Set up"); +#endif + //======================= + // Initialise sconf from configuration file + //======================= + logger->log("INFO: making legacy configuration...", LOG_INFO); + ScattererConfiguration *sconf = NULL; + try { + sconf = ScattererConfiguration::from_dedfb(config_file); + } catch(const OpenConfigurationFileException &ex) { + logger->err("\nERROR: failed to open scatterer configuration file.\n"); + string message = "FILE: " + string(ex.what()) + "\n"; + logger->err(message); + fclose(timing_file); + delete time_logger; + delete logger; + return; + } + sconf->write_formatted(output_path + "/c_OEDFB"); + sconf->write_binary(output_path + "/c_TEDF"); + sconf->write_binary(output_path + "/c_TEDF.hd5", "HDF5"); + // end scatterer initialisation + + //======================== + // Initialise gconf from configuration files + //======================== + GeometryConfiguration *gconf = NULL; + try { + gconf = GeometryConfiguration::from_legacy(data_file); + } catch (const OpenConfigurationFileException &ex) { + logger->err("\nERROR: failed to open geometry configuration file.\n"); + string message = "FILE: " + string(ex.what()) + "\n"; + logger->err(message); + if (sconf) delete sconf; + fclose(timing_file); + delete time_logger; + delete logger; + return; + } + logger->log(" done.\n", LOG_INFO); + //end gconf initialisation + +#ifdef USE_NVTX + nvtxRangePop(); +#endif + int s_nsph = sconf->number_of_spheres; + int nsph = gconf->number_of_spheres; + // Sanity check on number of sphere consistency, should always be verified + if (s_nsph == nsph) { + // Shortcuts to variables stored in configuration objects + ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf); + double wp = sconf->wp; + // Open empty virtual ascii file for output + VirtualAsciiFile *p_output = new VirtualAsciiFile(); + char virtual_line[256]; + InclusionIterationData *cid = new InclusionIterationData(gconf, sconf, mpidata, device_count); + const np_int ndi = cid->c1->nsph * cid->c1->nlim; + const np_int ndit = 2 * ndi; + logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n"); + time_logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)cid->c1->ndm) + " x " + to_string((int64_t)cid->c1->ndm) +".\n"); + + //========================== + // Write a block of info to the ascii output file + //========================== + sprintf(virtual_line, " READ(IR,*)NSPH,LI,LE,MXNDM,INPOL,NPNT,NPNTTS,IAVM,ISAM\n"); + p_output->append_line(virtual_line); +#ifdef USE_ILP64 + sprintf(virtual_line, " %5d%5d%5d%5ld%5d%5d%5d%5d%5d\n", + nsph, cid->c1->li, cid->c1->le, gconf->mxndm, gconf->in_pol, gconf->npnt, + gconf->npntts, gconf->iavm, gconf->iavm + ); +#else + sprintf(virtual_line, " %5d%5d%5d%5d%5d%5d%5d%5d%5d\n", + nsph, cid->c1->li, cid->c1->le, gconf->mxndm, gconf->in_pol, gconf->npnt, + gconf->npntts, gconf->iavm, gconf->iavm + ); +#endif // USE_ILP64 + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(IR,*)RXX(I),RYY(I),RZZ(I)\n"); + p_output->append_line(virtual_line); + for (int ri = 0; ri < nsph; ri++) { + sprintf(virtual_line, "%17.8lE%17.8lE%17.8lE\n", + gconf->get_sph_x(ri), gconf->get_sph_y(ri), gconf->get_sph_z(ri) + ); + p_output->append_line(virtual_line); + } + sprintf(virtual_line, " READ(IR,*)TH,THSTP,THLST,THS,THSSTP,THSLST\n"); + p_output->append_line(virtual_line); + sprintf( + virtual_line, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n", + p_scattering_angles->th, p_scattering_angles->thstp, + p_scattering_angles->thlst, p_scattering_angles->ths, + p_scattering_angles->thsstp, p_scattering_angles->thslst + ); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(IR,*)PH,PHSTP,PHLST,PHS,PHSSTP,PHSLST\n"); + p_output->append_line(virtual_line); + sprintf( + virtual_line, " %10.3lE%10.3lE%10.3lE%10.3lE%10.3lE%10.3lE\n", + p_scattering_angles->ph, p_scattering_angles->phstp, + p_scattering_angles->phlst, p_scattering_angles->phs, + p_scattering_angles->phsstp, p_scattering_angles->phslst + ); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(IR,*)JWTM\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " %5d\n", gconf->jwtm); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)NSPHT\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)(IOG(I),I=1,NSPH)\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)EXDC,WP,XIP,IDFC,NXI\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)(XIV(I),I=1,NXI)\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)NSHL(I),ROS(I)\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " READ(ITIN)(RCF(I,NS),NS=1,NSH)\n"); + p_output->append_line(virtual_line); + sprintf(virtual_line, " \n"); + p_output->append_line(virtual_line); + instr(sconf, cid->c1); + thdps(cid->c1->lm, cid->zpv); + double exdc = sconf->exdc; + double exri = sqrt(exdc); + sprintf(virtual_line, " REFR. INDEX OF EXTERNAL MEDIUM=%15.7lE\n", exri); + p_output->append_line(virtual_line); + + // Create an empty bynary file + VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(); + string tppoan_name = output_path + "/c_TPPOAN"; +#ifdef USE_MAGMA + logger->log("INFO: using MAGMA calls.\n", LOG_INFO); +#elif defined USE_LAPACK + logger->log("INFO: using LAPACK calls.\n", LOG_INFO); +#else + logger->log("INFO: using fall-back lucin() calls.\n", LOG_INFO); +#endif + int iavm = gconf->iavm; + int isam = gconf->isam; + int inpol = gconf->in_pol; + int nxi = sconf->number_of_scales; + int nth = p_scattering_angles->nth; + int nths = p_scattering_angles->nths; + int nph = p_scattering_angles->nph; + int nphs = p_scattering_angles->nphs; + + //======================== + // write a block of info to virtual binary file + //======================== + vtppoanp->append_line(VirtualBinaryLine(iavm)); + vtppoanp->append_line(VirtualBinaryLine(isam)); + vtppoanp->append_line(VirtualBinaryLine(inpol)); + vtppoanp->append_line(VirtualBinaryLine(nxi)); + vtppoanp->append_line(VirtualBinaryLine(nth)); + vtppoanp->append_line(VirtualBinaryLine(nph)); + vtppoanp->append_line(VirtualBinaryLine(nths)); + vtppoanp->append_line(VirtualBinaryLine(nphs)); + if (sconf->idfc < 0) { + cid->vk = cid->xip * cid->wn; + sprintf(virtual_line, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", cid->vk); + p_output->append_line(virtual_line); + sprintf(virtual_line, " \n"); + p_output->append_line(virtual_line); + } + + // do the first iteration on jxi488 separately, since it seems to be different from the others + int jxi488 = 1; + int initialmaxrefiters = cid->maxrefiters; + + chrono::time_point start_iter_1 = chrono::high_resolution_clock::now(); +#ifdef USE_NVTX + nvtxRangePush("First iteration"); +#endif + // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration + int jer = 0; +#pragma omp parallel + { +#pragma omp single + { + jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); + } + } +#ifdef USE_NVTX + nvtxRangePop(); +#endif + chrono::time_point end_iter_1 = chrono::high_resolution_clock::now(); + elapsed = start_iter_1 - t_start; + string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + time_logger->log(message); + elapsed = end_iter_1 - start_iter_1; + message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + time_logger->log(message); + /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */ + cid->refinemode = 0; + /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */ + // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++; + if (jer != 0) { + // First loop failed. Halt the calculation. + fclose(timing_file); + delete time_logger; + delete p_output; + delete p_scattering_angles; + delete cid; + delete logger; + delete sconf; + delete gconf; + return; + } + + //================================================== + // do the first outputs here, so that I open here the new files, afterwards I only append + //================================================== + p_output->write_to_disk(output_path + "/c_OINCLU"); + delete p_output; + vtppoanp->write_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanp; + + // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used +#ifdef MPI_VERSION + if (mpidata->mpirunning) { + gconf->mpibcast(mpidata); + sconf->mpibcast(mpidata); + cid->mpibcast(mpidata); + p_scattering_angles->mpibcast(mpidata); + } +#endif + // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled + int ompnumthreads = 1; + // this is for MPI process 0 (or even if we are not using MPI at all) + int myjxi488startoffset = 0; + int myMPIstride = ompnumthreads; + int myMPIblock = ompnumthreads; + // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later + VirtualAsciiFile **p_outarray = NULL; + VirtualBinaryFile **vtppoanarray = NULL; + +#ifdef USE_NVTX + nvtxRangePush("Parallel loop"); +#endif + + //=========================================== + // open the OpenMP parallel context, so each thread can initialise its stuff + //=========================================== +#pragma omp parallel + { + // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway + int myompthread = 0; + +#ifdef _OPENMP + // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files + myompthread = omp_get_thread_num(); + if (myompthread == 0) ompnumthreads = omp_get_num_threads(); +#endif + + if (myompthread == 0) { + // Initialise some shared variables only on thread 0 + p_outarray = new VirtualAsciiFile*[ompnumthreads]; + vtppoanarray = new VirtualBinaryFile*[ompnumthreads]; + myMPIblock = ompnumthreads; + myMPIstride = myMPIblock; + } + +#ifdef MPI_VERSION + if (myompthread == 0) { + if (mpidata->mpirunning) { + // only go through this if MPI has been actually used + for (int rr=1; rrnprocs; rr++) { + // individually send their respective starting points to other MPI processes: they start immediately after the frequencies computed by previous processes so far + int remotejxi488startoffset = myMPIstride; + MPI_Send(&remotejxi488startoffset, 1, MPI_INT, rr, 3, MPI_COMM_WORLD); + int remoteMPIblock; + MPI_Recv(&remoteMPIblock, 1, MPI_INT, rr, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // update myMPIstride to include the ones due to MPI process rr + myMPIstride += remoteMPIblock; + } + // now I know the total myMPIstride, I can send it to all processes + MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD); + } + } +#endif + // add an omp barrier to make sure that the global variables defined by thread 0 are known to all threads below this +#pragma omp barrier + + // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number + InclusionIterationData *cid_2 = NULL; + VirtualAsciiFile *p_output_2 = NULL; + VirtualBinaryFile *vtppoanp_2 = NULL; + // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones + if (myompthread == 0) { + cid_2 = cid; + } else { + // this is not thread 0, so do create fresh copies of all local variables + cid_2 = new InclusionIterationData(*cid); + } + // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads + if (myompthread==0) { + logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n"); + } +#pragma omp barrier + // ok, now I can actually start the parallel calculations + for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { + // the parallel loop over MPI processes covers a different set of indices for each thread +#pragma omp barrier + int myjxi488 = ixi488+myompthread; + // each thread opens new virtual files and stores their pointers in the shared array + p_output_2 = new VirtualAsciiFile(); + vtppoanp_2 = new VirtualBinaryFile(); + // each thread puts a copy of the pointers to its virtual files in the shared arrays + p_outarray[myompthread] = p_output_2; + vtppoanarray[myompthread] = vtppoanp_2; +#pragma omp barrier + + // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism + if (myjxi488 <= cid_2->number_of_scales) { + int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); + } +#pragma omp barrier + +#ifdef USE_NVTX + nvtxRangePush("Output concatenation"); +#endif +#pragma omp barrier + // threads different from 0 append their virtual files to the one of thread 0, and delete them + if (myompthread == 0) { + for (int ti=1; tiappend(*(p_outarray[ti])); + delete p_outarray[ti]; + vtppoanarray[0]->append(*(vtppoanarray[ti])); + delete vtppoanarray[ti]; + } + } +#pragma omp barrier + //============================================== + // Collect all virtual files on thread 0 of MPI process 0, and append them to disk + //============================================== + if (myompthread == 0) { + // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them + p_outarray[0]->append_to_disk(output_path + "/c_OINCLU"); + delete p_outarray[0]; + vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanarray[0]; + +#ifdef MPI_VERSION + if (mpidata->mpirunning) { + // only go through this if MPI has been actually used + for (int rr=1; rrnprocs; rr++) { + // get the data from process rr, creating a new virtual ascii file + VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr); + // append to disk and delete virtual ascii file + p_output->append_to_disk(output_path + "/c_OINCLU"); + delete p_output; + // get the data from process rr, creating a new virtual binary file + VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr); + // append to disk and delete virtual binary file + vtppoanp->append_to_disk(output_path + "/c_TPPOAN"); + delete vtppoanp; + int test = MPI_Barrier(MPI_COMM_WORLD); + } + } +#endif + } + // end block writing to disk +#ifdef USE_NVTX + nvtxRangePop(); +#endif +#pragma omp barrier + + } // close strided loop running on MPI processes, ixi488 loop + // delete the shared arrays I used to make available to thread 0 the virtual files of other threads +#pragma omp barrier + if (myompthread == 0) { + delete[] p_outarray; + delete[] vtppoanarray; + } + { + string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n"; + logger->log(message); + } +#ifdef USE_NVTX + nvtxRangePop(); +#endif + delete cid_2; + } + delete p_scattering_angles; + } + + else { // Sphere number inconsistency error. + throw UnrecognizedConfigurationException( + "Inconsistent geometry and scatterer configurations." + ); + } + + delete sconf; + delete gconf; +#ifdef USE_MAGMA + logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n"); + magma_finalize(); +#endif + chrono::time_point t_end = chrono::high_resolution_clock::now(); + elapsed = t_end - t_start; + string message = "INFO: Calculation lasted " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + logger->log("Finished: output written to " + output_path + "/c_OINCLU\n"); + time_logger->log(message); + fclose(timing_file); + delete time_logger; + } // end instructions block of MPI process 0 + + //=============================== + // instruction block for MPI processes different from 0 + //=============================== +#ifdef MPI_VERSION else { - if (sconf->use_external_sphere) pdi = new ParticleDescriptorInclusion(gconf, sconf); - else pdc = new ParticleDescriptorCluster(gconf, sconf); - } - ParticleDescriptor *pd; - if (pds != NULL) pd = pds; - else if (pdc != NULL) pd = pdc; - else pd = pdi; - printf("INFO: %s\n", (pd->get_descriptor_type()).c_str()); - delete pd; - delete gconf; - delete sconf; + // here go the code for MPI processes other than 0 + // copy gconf, sconf, cid and p_scattering_angles from MPI process 0 + GeometryConfiguration *gconf = new GeometryConfiguration(mpidata); + ScattererConfiguration *sconf = new ScattererConfiguration(mpidata); + InclusionIterationData *cid = new InclusionIterationData(mpidata, device_count); + ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata); + + // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled + int ompnumthreads = 1; + VirtualAsciiFile **p_outarray = NULL; + VirtualBinaryFile **vtppoanarray = NULL; + int myjxi488startoffset; + int myMPIstride = ompnumthreads; + int myMPIblock = ompnumthreads; + +#pragma omp parallel + { + // Create and initialise this variable here, so that if OpenMP is enabled it is local to the thread, and if OpenMP is not enabled it has a well-defiled value anyway + int myompthread = 0; +#ifdef _OPENMP + // If OpenMP is enabled, give actual values to myompthread and ompnumthreads, and open thread-local output files + myompthread = omp_get_thread_num(); + if (myompthread == 0) ompnumthreads = omp_get_num_threads(); +#endif + if (myompthread == 0) { + // receive the start parameter from MPI process 0 + MPI_Recv(&myjxi488startoffset, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + // send my number of omp threads to process 0 + MPI_Send(&ompnumthreads, 1, MPI_INT, 0, 3, MPI_COMM_WORLD); + // receive myMPIstride sent by MPI process 0 to all processes + MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD); + // allocate virtual files for each thread + p_outarray = new VirtualAsciiFile*[ompnumthreads]; + vtppoanarray = new VirtualBinaryFile*[ompnumthreads]; + } +#pragma omp barrier + // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number + InclusionIterationData *cid_2 = NULL; + VirtualAsciiFile *p_output_2 = NULL; + VirtualBinaryFile *vtppoanp_2 = NULL; + // PLACEHOLDER + // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones + if (myompthread == 0) { + cid_2 = cid; + } else { + // this is not thread 0, so do create fresh copies of all local variables + cid_2 = new InclusionIterationData(*cid); + } + // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads +#pragma omp barrier + // ok, now I can actually start the parallel calculations + for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) { + // the parallel loop over MPI processes covers a different set of indices for each thread +#pragma omp barrier + int myjxi488 = ixi488 + myjxi488startoffset + myompthread; + // each thread opens new virtual files and stores their pointers in the shared array + p_output_2 = new VirtualAsciiFile(); + vtppoanp_2 = new VirtualBinaryFile(); + // each thread puts a copy of the pointers to its virtual files in the shared arrays + p_outarray[myompthread] = p_output_2; + vtppoanarray[myompthread] = vtppoanp_2; +#pragma omp barrier + if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n"); + // ok, now I can actually start the parallel calculations + // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism + if (myjxi488 <= cid_2->number_of_scales) { + int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); + } // close the OMP parallel for loop + +#pragma omp barrier + // threads different from 0 append their virtual files to the one of thread 0, and delete them + if (myompthread == 0) { + for (int ti=1; tiappend(*(p_outarray[ti])); + delete p_outarray[ti]; + vtppoanarray[0]->append(*(vtppoanarray[ti])); + delete vtppoanarray[ti]; + } + // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them + for (int rr=1; rrnprocs; rr++) { + if (rr == mpidata->rank) { + p_outarray[0]->mpisend(mpidata); + delete p_outarray[0]; + vtppoanarray[0]->mpisend(mpidata); + delete vtppoanarray[0]; + } + int test = MPI_Barrier(MPI_COMM_WORLD); + } + } + } // close strided loop running on MPI processes + + // Clean memory +#pragma omp barrier + if (myompthread == 0) { + delete[] p_outarray; + delete[] vtppoanarray; + } + delete cid_2; + + } // close pragma omp parallel + delete p_scattering_angles; + delete sconf; + delete gconf; +#endif +#ifdef USE_MAGMA + logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n"); + magma_finalize(); +#endif + delete logger; +#ifdef MPI_VERSION + } +#endif +} + +int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, VirtualAsciiFile *output, const string& output_path, VirtualBinaryFile *vtppoanp) { + int nxi = sconf->number_of_scales; + char virtual_line[256]; + string message = "INFO: running scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n"; + Logger *logger = new Logger(LOG_DEBG); + logger->log(message); + chrono::duration elapsed; + chrono::time_point interval_start, interval_end; + int jer = 0; + int lcalc = 0; + int jaw = 1; + int li = cid->c1->li; + int le = cid->c1->le; + int lm = cid->c1->lm; + int nsph = cid->c1->nsph; + np_int mxndm = gconf->mxndm; + int iavm = gconf->iavm; + int inpol = gconf->in_pol; + int npnt = cid->c1->npnt; + int npntts = cid->c1->npntts; + int isam = gconf->iavm; + int jwtm = gconf->jwtm; + np_int ndit = cid->c1->ndit; + int isq, ibf; + int last_configuration; + dcomplex ent, entn; + double enti; + +#ifdef USE_NVTX + nvtxRangePush("Prepare matrix calculation"); +#endif + sprintf(virtual_line, "========== JXI =%3d ====================\n", jxi488); + output->append_line(virtual_line); + double xi = sconf->get_scale(jxi488 - 1); + double exdc = sconf->exdc; + double exri = sqrt(exdc); + int idfc = (int)sconf->idfc; + double vkarg = 0.0; + if (idfc >= 0) { + cid->vk = xi * cid->wn; + vkarg = cid->vk; + sprintf(virtual_line, " VK=%15.7lE, XI=%15.7lE\n", cid->vk, xi); + output->append_line(virtual_line); + // goes to 120 + } else { // label 119 + vkarg = xi * cid->vk; + cid->sqsfi = 1.0 / (xi * xi); + sprintf(virtual_line, " XI=%15.7lE\n", xi); + output->append_line(virtual_line); + } + // label 120 + double sze = vkarg * cid->extr; + last_configuration = 0; + for (int i133 = 1; i133 <= cid->c1->nsph; i133++) { + int iogi = cid->c1->iog[i133 - 1]; + if (iogi != i133) { + for (int l123 = 1; l123 <= cid->c1->li; l123++) { + cid->c1->rmi[l123 - 1][i133 - 1] = cid->c1->rmi[l123 - 1][iogi - 1]; + cid->c1->rei[l123 - 1][i133 - 1] = cid->c1->rei[l123 - 1][iogi - 1]; + } // l123 loop + } else { // label 125 + last_configuration++; + int nsh = cid->c1->nshl[last_configuration - 1]; + int ici = (nsh + 1) / 2; + if (i133 == 1) ici++; + if (idfc == 0) { + for (int ic = 0; ic < ici; ic++) + cid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i133 - 1, jxi488 - 1); + // goes to 129 + } else { // label 127 + if (jxi488 == 1) { + for (int ic = 0; ic < ici; ic++) { + cid->c1->dc0[ic] = sconf->get_dielectric_constant(ic, i133 - 1, 0); + } + } + } + // label 129 + if (i133 == 1) { + ent = cid->c1->dc0[ici - 1]; + enti = imag(ent); + entn = csqrt(ent); + // goes to 131 + } else { // label 130 + if (nsh % 2 == 0) cid->c1->dc0[ici] = ent; + } + indme(i133, npnt, npntts, vkarg, ent, enti, entn, jer, lcalc, cid->arg, cid->c1); + if (jer != 0) { + sprintf(virtual_line, " STOP IN INDME\n"); + output->append_line(virtual_line); + message = "ERROR: indme failed with error code " + to_string(jer) + ".\n"; + logger->log(message, LOG_ERRO); + delete logger; + return jer; + //break; + } + } + } // i133 loop + ospv(cid->c1, vkarg, sze, exri, entn, enti, jer, lcalc, cid->arg); + if (jer != 0) { + sprintf(virtual_line, " STOP IN OSPV\n"); + output->append_line(virtual_line); + message = "ERROR: ospv failed with error code " + to_string(jer) + ".\n"; + logger->log(message, LOG_ERRO); + delete logger; + return jer; + // break; + } // i133 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif + interval_start = chrono::high_resolution_clock::now(); +#ifdef USE_NVTX + nvtxRangePush("Calculate inverted matrix"); +#endif +#ifdef DEBUG_AM + /* now, before cms, output am to p_outam0 */ + VirtualAsciiFile *outam0 = new VirtualAsciiFile(); + string outam0_name = output_path + "/c_AM0_JXI" + to_string(jxi488) + ".txt"; + sprintf(virtual_line, " AM matrix before CMS\n"); + outam0->append_line(virtual_line); + sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); + outam0->append_line(virtual_line); + write_dcomplex_matrix(outam0, cid->am, cid->c1->ndm, cid->c1->ndm); + outam0->write_to_disk(outam0_name); + delete outam0; +#endif + incms(cid->am, enti, cid->c1); +#ifdef DEBUG_AM + VirtualAsciiFile *outam1 = new VirtualAsciiFile(); + string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; + sprintf(virtual_line, " AM matrix after CMS before LUCIN\n"); + outam1->append_line(virtual_line); + sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); + outam1->append_line(virtual_line); + write_dcomplex_matrix(outam1, cid->am, cid->c1->ndm, cid->c1->ndm, " %5d %5d (%17.8lE,%17.8lE)\n", 1); + outam1->write_to_disk(outam1_name); + delete outam1; +#endif +#ifdef USE_NVTX + nvtxRangePop(); +#endif + interval_end = chrono::high_resolution_clock::now(); + elapsed = interval_end - interval_start; + message = "INFO: matrix calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + interval_start = chrono::high_resolution_clock::now(); +#ifdef USE_NVTX + nvtxRangePush("Invert the matrix"); +#endif + // we the accuracygoal in, get the actual accuracy back out + double actualaccuracy = cid->accuracygoal; + invert_matrix(cid->am, cid->c1->ndm, jer, cid->maxrefiters, actualaccuracy, cid->refinemode, mxndm, cid->proc_device); + // in principle, we should check whether the returned actualaccuracy is indeed lower than the accuracygoal, and do something about it if not +#ifdef USE_REFINEMENT + if (cid->refinemode==2) { + message = "INFO: calibration obtained accuracy " + to_string(actualaccuracy) + " (" + to_string(cid->accuracygoal) + " requested) in " + to_string(cid->maxrefiters) + " refinement iterations\n"; + logger->log(message); + if (actualaccuracy > 1e-2) { + printf("Accuracy worse than 0.01, stopping"); + exit(1); + } + } +#endif // USE_REFINEMENT +#ifdef USE_NVTX + nvtxRangePop(); +#endif + interval_end = chrono::high_resolution_clock::now(); + elapsed = interval_end - interval_start; + message = "INFO: matrix inversion for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + if (jer != 0) { + message = "ERROR: matrix inversion ended with error code " + to_string(jer) + ".\n"; + logger->err(message); + delete logger; + return jer; + // break; // jxi488 loop: goes to memory clean + } + interval_start = chrono::high_resolution_clock::now(); +#ifdef USE_NVTX + nvtxRangePush("Average calculation"); +#endif + exma(cid->am, cid->c1); +#ifdef DEBUG_AM + VirtualAsciiFile *outam3 = new VirtualAsciiFile(); + string outam3_name = output_path + "/c_AM3_JXI" + to_string(jxi488) + ".txt"; + sprintf(virtual_line, " AM matrix after EXMA\n"); + outam3->append_line(virtual_line); + sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); + outam3->append_line(virtual_line); + write_dcomplex_matrix(outam3, cid->am, cid->c1->ndm, cid->c1->ndm); + outam3->write_to_disk(outam3_name); + delete outam3; +#endif + if (idfc >= 0) { + if (jxi488 == jwtm) { + int nlemt = 2 * cid->c1->nlem; + string ttms_name = output_path + "/c_TTMS.hd5"; + TransitionMatrix::write_binary(ttms_name, nlemt, lm, cid->vk, exri, cid->c1->am0m, "HDF5"); + ttms_name = output_path + "/c_TTMS"; + TransitionMatrix::write_binary(ttms_name, nlemt, lm, cid->vk, exri, cid->c1->am0m); + } + } + // label 156: continue from here + for (int i168 = 1; i168 <= nsph; i168++) { + if (cid->c1->iog[i168 - 1] >= i168) { + if (cid->c1->nshl[i168 - 1] != 1) { + sprintf(virtual_line, " SPHERE N.%2d: SIZE=%15.7lE\n", i168, cid->c1->vsz[i168 - 1]); + output->append_line(virtual_line); + } else { + sprintf(virtual_line, " SPHERE N.%2d: SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", i168, cid->c1->vsz[i168 - 1], real(cid->c1->vkt[i168 - 1]), imag(cid->c1->vkt[i168 - 1])); + output->append_line(virtual_line); + } + } + } // i168 loop + sprintf(virtual_line, " EXT. SPHERE: SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", sze, real(entn), imag(entn)); + output->append_line(virtual_line); + // label 160 + double cs0 = 0.25 * cid->vk * cid->vk * cid->vk / acos(0.0); + double csch = 2.0 * cid->vk * cid->sqsfi / cid->c1->gcs; + double sqk = cid->vk * cid->vk * exdc; + vtppoanp->append_line(VirtualBinaryLine(cid->vk)); + pcrsm0(cid->vk, exri, inpol, cid->c1); + apcra(cid->zpv, cid->c1->le, cid->c1->am0m, inpol, sqk, cid->gapm, cid->gappm); +#ifdef USE_NVTX + nvtxRangePop(); +#endif + interval_end = chrono::high_resolution_clock::now(); + elapsed = interval_end - interval_start; + message = "INFO: average calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + interval_start = chrono::high_resolution_clock::now(); +#ifdef USE_NVTX + nvtxRangePush("Angle loop"); +#endif + double th = sa->th; + for (int jth486 = 1; jth486 <= sa->nth; jth486++) { // OpenMP portable? + double ph = sa->ph; + double cost = 0.0, sint = 0.0, cosp = 0.0, sinp = 0.0; + for (int jph484 = 1; jph484 <= sa->nph; jph484++) { + int jw = 0; + if (sa->nk != 1 || jxi488 <= 1) { + upvmp(th, ph, 0, cost, sint, cosp, sinp, cid->u, cid->upmp, cid->unmp); + if (isam >= 0) { + wmamp( + 0, cost, sint, cosp, sinp, inpol, cid->c1->le, 0, + nsph, cid->argi, cid->u, cid->upmp, cid->unmp, cid->c1 + ); + // label 182 + apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp); + raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps); + jw = 1; + } + } else { // label 180, NK == 1 AND JXI488 == 1 + if (isam >= 0) { + // label 182 + apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp); + raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps); + jw = 1; + } + } + // label 184 + double thsl = sa->ths; + double phsph = 0.0; + for (int jths = 1; jths <= sa->nths; jths++) { + double ths = thsl; + int icspnv = 0; + if (isam > 1) ths += sa->thsca; + if (isam >= 1) { + phsph = 0.0; + if (ths < 0.0 || ths > 180.0) phsph = 180.0; + if (ths < 0.0) ths *= -1.0; + if (ths > 180.0) ths = 360.0 - ths; + if (phsph != 0.0) icspnv = 1; + } + // label 186 + double phs = sa->phs; + for (int jphs = 1; jphs <= sa->nphs; jphs++) { + double costs = 0.0, sints = 0.0, cosps = 0.0, sinps = 0.0; + if (isam >= 1) { + phs = sa->ph + phsph; + if (phs > 360.0) phs -= 360.0; + } + // label 188 + bool goto190 = (sa->nks == 1 && (jxi488 > 1 || jth486 > 1 || jph484 > 1)); + if (!goto190) { + upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, cid->us, cid->upsmp, cid->unsmp); + if (isam >= 0) + wmamp( + 2, costs, sints, cosps, sinps, inpol, cid->c1->le, + 0, nsph, cid->args, cid->us, cid->upsmp, cid->unsmp, cid->c1 + ); + } + // label 190 + if (sa->nkks != 1 || jxi488 <= 1) { + upvsp( + cid->u, cid->upmp, cid->unmp, cid->us, cid->upsmp, cid->unsmp, cid->up, cid->un, cid->ups, cid->uns, + cid->duk, isq, ibf, cid->scan, cid->cfmp, cid->sfmp, cid->cfsp, cid->sfsp + ); + if (isam < 0) { + wmasp( + cost, sint, cosp, sinp, costs, sints, cosps, sinps, + cid->u, cid->up, cid->un, cid->us, cid->ups, cid->uns, isq, ibf, inpol, cid->c1->le, + 0, nsph, cid->argi, cid->args, cid->c1 + ); + } else { // label 192 + for (int i193 = 0; i193 < 3; i193++) { + cid->up[i193] = cid->upmp[i193]; + cid->un[i193] = cid->unmp[i193]; + cid->ups[i193] = cid->upsmp[i193]; + cid->uns[i193] = cid->unsmp[i193]; + } + } + } + // label 194 + if (iavm == 1) crsm1(cid->vk, exri, cid->c1); + if (isam < 0) { + apc(cid->zpv, cid->c1->le, cid->c1->am0m, cid->c1->w, sqk, cid->gap, cid->gapp); + raba(cid->c1->le, cid->c1->am0m, cid->c1->w, cid->tqce, cid->tqcpe, cid->tqcs, cid->tqcps); + jw = 1; + } + // label 196 + vtppoanp->append_line(VirtualBinaryLine(th)); + vtppoanp->append_line(VirtualBinaryLine(ph)); + vtppoanp->append_line(VirtualBinaryLine(ths)); + vtppoanp->append_line(VirtualBinaryLine(phs)); + vtppoanp->append_line(VirtualBinaryLine(cid->scan)); + if (jaw != 0) { + jaw = 0; + mextc(cid->vk, exri, cid->c1->fsacm, cid->cextlr, cid->cext); + // We now have some implicit loops writing to binary + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + double value = cid->cext[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + for (int i = 0; i < 2; i++) { + double value = cid->c1->scscm[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->c1->scscpm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->scscpm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = cid->c1->ecscm[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->c1->ecscpm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->ecscpm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + double value = cid->gapm[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->gappm[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->gappm[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + sprintf(virtual_line, " ENSEMBLE AVERAGE, MODE%2d\n", iavm); + output->append_line(virtual_line); + int jlr = 2; + for (int ilr210 = 1; ilr210 <= 2; ilr210++) { + int ipol = (ilr210 % 2 == 0) ? 1 : -1; + if (ilr210 == 2) jlr = 1; + double extsm = cid->c1->ecscm[ilr210 - 1]; + double qextm = extsm * cid->sqsfi / cid->c1->gcs; + double scasm = cid->c1->scscm[ilr210 - 1]; + double albdm = scasm / extsm; + double qscam = scasm * cid->sqsfi / cid->c1->gcs; + double abssm = extsm - scasm; + double qabsm = abssm * cid->sqsfi / cid->c1->gcs; + dcomplex s0m = cid->c1->fsacm[ilr210 - 1][ilr210 - 1] * exri; + double qschum = imag(s0m) * csch; + double pschum = real(s0m) * csch; + double s0magm = cabs(s0m) * cs0; + if (inpol == 0) { + sprintf(virtual_line, " LIN %2d\n", ipol); + output->append_line(virtual_line); + } else { // label 206 + sprintf(virtual_line, " CIRC %2d\n", ipol); + output->append_line(virtual_line); + } + // label 208 + sprintf(virtual_line, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n"); + output->append_line(virtual_line); + sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE%15.7lE\n", scasm, abssm, extsm, albdm); + output->append_line(virtual_line); + double alamb = 2.0 * 3.141592653589793238 / cid->vk; + sprintf(virtual_line, "INSERTION: SCASECM %5d%15.7E%15.7E%15.7E%15.7E\n", ipol, alamb, scasm, abssm, extsm); + output->append_line(virtual_line); + sprintf(virtual_line, " ---- SCS/GS -- ABC/GS -- EXS/GS ---\n"); + output->append_line(virtual_line); + sprintf(virtual_line, " %14.7lE%15.7lE%15.7lE\n", qscam, qabsm, qextm); + output->append_line(virtual_line); + sprintf( + virtual_line, " FSAS(%1d,%1d)=%15.7lE%15.7lE FSAS(%1d,%1d)=%15.7lE%15.7lE\n", + ilr210, ilr210, real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]), + imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]), jlr, ilr210, + real(cid->c1->fsacm[jlr - 1][ilr210 - 1]), imag(cid->c1->fsacm[jlr - 1][ilr210 - 1]) + ); + output->append_line(virtual_line); + sprintf(virtual_line, " QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n", qschum, pschum, s0magm); + output->append_line(virtual_line); + double rapr = cid->c1->ecscm[ilr210 - 1] - cid->gapm[2][ilr210 - 1]; + double cosav = cid->gapm[2][ilr210 - 1] / cid->c1->scscm[ilr210 - 1]; + double fz = rapr; + sprintf(virtual_line, " COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr); + output->append_line(virtual_line); + sprintf(virtual_line, " Fk=%15.7lE\n", fz); + output->append_line(virtual_line); + } // ilr210 loop + double rmbrif = (real(cid->c1->fsacm[0][0]) - real(cid->c1->fsacm[1][1])) / real(cid->c1->fsacm[0][0]); + double rmdchr = (imag(cid->c1->fsacm[0][0]) - imag(cid->c1->fsacm[1][1])) / imag(cid->c1->fsacm[0][0]); + sprintf(virtual_line, " (RE(FSAS(1,1))-RE(FSAS(2,2)))/RE(FSAS(1,1))=%15.7lE\n", rmbrif); + output->append_line(virtual_line); + sprintf(virtual_line, " (IM(FSAS(1,1))-IM(FSAS(2,2)))/IM(FSAS(1,1))=%15.7lE\n", rmdchr); + output->append_line(virtual_line); + } + // label 212 + sprintf(virtual_line, "********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n", jth486, jph484, jths, jphs); + output->append_line(virtual_line); + sprintf(virtual_line, " TIDG=%10.3lE, PIDG=%10.3lE, TSDG=%10.3lE, PSDG=%10.3lE\n", th, ph, ths, phs); + output->append_line(virtual_line); + sprintf(virtual_line, " SCAND=%10.3lE\n", cid->scan); + output->append_line(virtual_line); + sprintf(virtual_line, " CFMP=%15.7lE, SFMP=%15.7lE\n", cid->cfmp, cid->sfmp); + output->append_line(virtual_line); + sprintf(virtual_line, " CFSP=%15.7lE, SFSP=%15.7lE\n", cid->cfsp, cid->sfsp); + output->append_line(virtual_line); + if (isam >= 0) { + sprintf(virtual_line, " UNI=(%12.5lE,%12.5lE,%12.5lE)\n", cid->un[0], cid->un[1], cid->un[2]); + output->append_line(virtual_line); + sprintf(virtual_line, " UNS=(%12.5lE,%12.5lE,%12.5lE)\n", cid->uns[0], cid->uns[1], cid->uns[2]); + output->append_line(virtual_line); + } else { // label 214 + sprintf(virtual_line, " UN=(%12.5lE,%12.5lE,%12.5lE)\n\n", cid->un[0], cid->un[1], cid->un[2]); + output->append_line(virtual_line); + } + // label 220 + pcros(cid->vk, exri, cid->c1); + mextc(cid->vk, exri, cid->c1->fsac, cid->cextlr, cid->cext); + mmulc(cid->c1->vint, cid->cmullr, cid->cmul); + if (jw != 0) { + jw = 0; + // Some implicit loops writing to binary. + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + double value = cid->cext[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + for (int i = 0; i < 2; i++) { + double value = cid->c1->scsc[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->c1->scscp[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->scscp[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = cid->c1->ecsc[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->c1->ecscp[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->ecscp[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + double value = cid->gap[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->gapp[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->gapp[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 3; j++) { + double value = cid->tqce[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->tqcpe[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->tqcpe[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 3; j++) { + double value = cid->tqcs[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = real(cid->tqcps[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->tqcps[i][j]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + for (int i = 0; i < 3; i++) { + double value = cid->u[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = cid->up[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + value = cid->un[i]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + // label 254 + for (int i = 0; i < 16; i++) { + double value = real(cid->c1->vint[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->vint[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + double value = cid->cmul[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + sprintf(virtual_line, " SINGLE SCATTERER\n"); + output->append_line(virtual_line); + int jlr = 2; + for (int ilr290 = 1; ilr290 <= 2; ilr290++) { + int ipol = (ilr290 % 2 == 0) ? 1 : -1; + if (ilr290 == 2) jlr = 1; + double extsec = cid->c1->ecsc[ilr290 - 1]; + double qext = extsec * cid->sqsfi / cid->c1->gcs; + double scasec = cid->c1->scsc[ilr290 - 1]; + double albedc = scasec / extsec; + double qsca = scasec * cid->sqsfi / cid->c1->gcs; + double abssec = extsec - scasec; + double qabs = abssec * cid->sqsfi / cid->c1->gcs; + dcomplex s0 = cid->c1->fsac[ilr290 - 1][ilr290 - 1] * exri; + double qschu = imag(s0) * csch; + double pschu = real(s0) * csch; + double s0mag = cabs(s0) * cs0; + if (inpol == 0) { + sprintf(virtual_line, " LIN %2d\n", ipol); + output->append_line(virtual_line); + } else { // label 273 + sprintf(virtual_line, " CIRC %2d\n", ipol); + output->append_line(virtual_line); + } + // label 275 + sprintf(virtual_line, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n"); + output->append_line(virtual_line); + sprintf( + virtual_line, " %14.7lE%15.7lE%15.7lE%15.7lE\n", + scasec, abssec, extsec, albedc + ); + output->append_line(virtual_line); + double alam = 2.0 * 3.141592653589793238 / cid->vk; + sprintf(virtual_line, "INSERTION: SCASEC %5d%14.7lE%14.7lE%14.7lE%14.7lE\n", ipol, alam, scasec, abssec, extsec); + sprintf(virtual_line, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n"); + output->append_line(virtual_line); + sprintf( + virtual_line, " %14.7lE%15.7lE%15.7lE\n", + qsca, qabs, qext + ); + output->append_line(virtual_line); + sprintf( + virtual_line, + " FSAS(%1d,%1d)=%15.7lE%15.7lE FSAS(%1d,%1d)=%15.7lE%15.7lE\n", + ilr290, ilr290, real(cid->c1->fsac[ilr290 - 1][ilr290 - 1]), + imag(cid->c1->fsac[ilr290 - 1][ilr290 - 1]), jlr, ilr290, + real(cid->c1->fsac[jlr - 1][ilr290 - 1]), + imag(cid->c1->fsac[jlr - 1][ilr290 - 1]) + ); + output->append_line(virtual_line); + sprintf( + virtual_line, + " SAS(%1d,%1d)=%15.7lE%15.7lE SAS(%1d,%1d)=%15.7lE%15.7lE\n", + ilr290, ilr290, real(cid->c1->sac[ilr290 - 1][ilr290 - 1]), + imag(cid->c1->sac[ilr290 - 1][ilr290 - 1]), jlr, ilr290, + real(cid->c1->sac[jlr - 1][ilr290 - 1]), + imag(cid->c1->sac[jlr - 1][ilr290 - 1]) + ); + output->append_line(virtual_line); + sprintf( + virtual_line, " QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n", + qschu, pschu, s0mag + ); + output->append_line(virtual_line); + bool goto290 = isam >= 0 && (jths > 1 || jphs > 1); + if (!goto290) { + cid->gapv[0] = cid->gap[0][ilr290 - 1]; + cid->gapv[1] = cid->gap[1][ilr290 - 1]; + cid->gapv[2] = cid->gap[2][ilr290 - 1]; + double extins = cid->c1->ecsc[ilr290 - 1]; + double scatts = cid->c1->scsc[ilr290 - 1]; + double rapr, cosav, fp, fn, fk, fx, fy, fz; + rftr(cid->u, cid->up, cid->un, cid->gapv, extins, scatts, rapr, cosav, fp, fn, fk, fx, fy, fz); + sprintf(virtual_line, " COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr); + output->append_line(virtual_line); + sprintf(virtual_line, " Fl=%15.7lE, Fr=%15.7lE, Fk=%15.7lE\n", fp, fn, fk); + output->append_line(virtual_line); + sprintf(virtual_line, " Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n", fx, fy, fz); + output->append_line(virtual_line); + cid->tqev[0] = cid->tqce[ilr290 - 1][0]; + cid->tqev[1] = cid->tqce[ilr290 - 1][1]; + cid->tqev[2] = cid->tqce[ilr290 - 1][2]; + cid->tqsv[0] = cid->tqcs[ilr290 - 1][0]; + cid->tqsv[1] = cid->tqcs[ilr290 - 1][1]; + cid->tqsv[2] = cid->tqcs[ilr290 - 1][2]; + double tep, ten, tek, tsp, tsn, tsk; + tqr(cid->u, cid->up, cid->un, cid->tqev, cid->tqsv, tep, ten, tek, tsp, tsn, tsk); + sprintf(virtual_line, " TQEl=%15.7lE, TQEr=%15.7lE, TQEk=%15.7lE\n", tep, ten, tek); + output->append_line(virtual_line); + sprintf(virtual_line, " TQSl=%15.7lE, TQSr=%15.7lE, TQSk=%15.7lE\n", tsp, tsn, tsk); + output->append_line(virtual_line); + sprintf( + virtual_line, " TQEx=%15.7lE, TQEy=%15.7lE, TQEz=%15.7lE\n", + cid->tqce[ilr290 - 1][0], cid->tqce[ilr290 - 1][1], cid->tqce[ilr290 - 1][2] + ); + output->append_line(virtual_line); + sprintf( + virtual_line, " TQSx=%15.7lE, TQSy=%15.7lE, TQSz=%15.7lE\n", + cid->tqcs[ilr290 - 1][0], cid->tqcs[ilr290 - 1][1], cid->tqcs[ilr290 - 1][2] + ); + output->append_line(virtual_line); + } + } //ilr290 loop + double rbirif = (real(cid->c1->fsac[0][0]) - real(cid->c1->fsac[1][1])) / real(cid->c1->fsac[0][0]); + double rdichr = (imag(cid->c1->fsac[0][0]) - imag(cid->c1->fsac[1][1])) / imag(cid->c1->fsac[0][0]); + sprintf(virtual_line, " (RE(FSAS(1,1))-RE(FSAS(2,2)))/RE(FSAS(1,1))=%15.7lE\n", rbirif); + output->append_line(virtual_line); + sprintf(virtual_line, " (IM(FSAS(1,1))-IM(FSAS(2,2)))/IM(FSAS(1,1))=%15.7lE\n", rdichr); + output->append_line(virtual_line); + sprintf(virtual_line, " MULL\n"); + output->append_line(virtual_line); + for (int i = 0; i < 4; i++) { + sprintf( + virtual_line, " %15.7lE%15.7lE%15.7lE%15.7lE\n", + cid->cmul[i][0], cid->cmul[i][1], cid->cmul[i][2], cid->cmul[i][3] + ); + output->append_line(virtual_line); + } + sprintf(virtual_line, " MULLLR\n"); + output->append_line(virtual_line); + for (int i = 0; i < 4; i++) { + sprintf( + virtual_line, " %15.7lE%15.7lE%15.7lE%15.7lE\n", + cid->cmullr[i][0], cid->cmullr[i][1], cid->cmullr[i][2], cid->cmullr[i][3] + ); + output->append_line(virtual_line); + } + if (iavm != 0) { + mmulc(cid->c1->vintm, cid->cmullr, cid->cmul); + // Some implicit loops writing to binary. + for (int i = 0; i < 16; i++) { + double value; + value = real(cid->c1->vintm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + value = imag(cid->c1->vintm[i]); + vtppoanp->append_line(VirtualBinaryLine(value)); + } + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + double value = cid->cmul[i][j]; + vtppoanp->append_line(VirtualBinaryLine(value)); + } + } + sprintf(virtual_line, " CLUSTER (ENSEMBLE AVERAGE, MODE%2d)\n", iavm); + output->append_line(virtual_line); + if (inpol == 0) { + sprintf(virtual_line, " LIN\n"); + output->append_line(virtual_line); + } else { // label 316 + sprintf(virtual_line, " CIRC\n"); + output->append_line(virtual_line); + } + // label 318 + sprintf(virtual_line, " MULC\n"); + output->append_line(virtual_line); + for (int i = 0; i < 4; i++) { + sprintf( + virtual_line, " %15.7lE%15.7lE%15.7lE%15.7lE\n", + cid->cmul[i][0], cid->cmul[i][1], cid->cmul[i][2], cid->cmul[i][3] + ); + output->append_line(virtual_line); + } + sprintf(virtual_line, " MULCLR\n"); + output->append_line(virtual_line); + for (int i = 0; i < 4; i++) { + sprintf( + virtual_line, " %15.7lE%15.7lE%15.7lE%15.7lE\n", + cid->cmullr[i][0], cid->cmullr[i][1], cid->cmullr[i][2], cid->cmullr[i][3] + ); + output->append_line(virtual_line); + } + } + // label 420, continues jphs loop + if (isam < 1) phs += sa->phsstp; + } // jphs loop, labeled 480 + if (isam <= 1) thsl += sa->thsstp; + } // jths loop, labeled 482 + ph += sa->phstp; + } // jph484 loop + th += sa->thstp; + } // jth486 loop +#ifdef USE_NVTX + nvtxRangePop(); +#endif + interval_end = chrono::high_resolution_clock::now(); + elapsed = interval_end - interval_start; + message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n"; + logger->log(message); + + logger->log("INFO: finished scale iteration " + to_string(jxi488) + " of " + to_string(nxi) + ".\n"); + + delete logger; + + return jer; } diff --git a/src/inclusion/np_inclusion.cpp b/src/inclusion/np_inclusion.cpp index a3fe12b5765c9022473980862ec8f08e51c3456e..fde9371b53744de005bfdd6302226ab84a28a948 100644 --- a/src/inclusion/np_inclusion.cpp +++ b/src/inclusion/np_inclusion.cpp @@ -64,8 +64,14 @@ extern void inclusion(const string& config_file, const string& data_file, const * \return result: `int` An exit code passed to the OS (0 for succesful execution). */ int main(int argc, char **argv) { +#ifdef MPI_VERSION + int ierr = MPI_Init(&argc, &argv); + // Create and initialise class with essential MPI data + mixMPI *mpidata = new mixMPI(MPI_COMM_WORLD); +#else // create a the class with dummy data if we are not using MPI at all mixMPI *mpidata = new mixMPI(); +#endif string config_file = "../../test_data/inclusion/DEDFB"; string data_file = "../../test_data/inclusion/DINCLU"; string output_path = "."; @@ -75,6 +81,9 @@ int main(int argc, char **argv) { output_path = string(argv[3]); } inclusion(config_file, data_file, output_path, mpidata); +#ifdef MPI_VERSION + MPI_Finalize(); +#endif delete mpidata; return 0; } diff --git a/src/libnptm/Commons.cpp b/src/libnptm/Commons.cpp index df6d43e5cb9b6d97b29e72fec41ad8b413cacaf1..126ca007bacfdb2acbafaf3c1d2e32855b0a5da9 100644 --- a/src/libnptm/Commons.cpp +++ b/src/libnptm/Commons.cpp @@ -36,249 +36,6 @@ #include #endif -C2::C2(GeometryConfiguration *gconf, ScattererConfiguration *sconf) { - nsph = gconf->number_of_spheres; - int npnt = gconf->npnt; - int npntts = gconf->npntts; - int max_n = (npnt > npntts) ? npnt : npntts; - nhspo = 2 * max_n - 1; - nl = sconf->configurations; - if (nsph == 1 && nl == 1) nl = 5; - ris = new dcomplex[nhspo](); - dlri = new dcomplex[nhspo](); - vkt = new dcomplex[nsph](); - dc0 = new dcomplex[nl](); - vsz = new double[nsph](); -} - -C2::C2(const C2& rhs) { - nsph = rhs.nsph; - nhspo = rhs.nhspo; - nl = rhs.nl; - ris = new dcomplex[nhspo](); - dlri = new dcomplex[nhspo](); - for (int ind=0; indlmtpo); const int ndi = c1->nsph * c1->nlim; const np_int ndit = 2 * ndi; - c9 = new C9(ndi, c1->nlem, 2 * ndi, 2 * c1->nlem); gaps = new double[c1->nsph](); tqev = new double[3](); tqsv = new double[3](); @@ -407,16 +160,18 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter #else proc_device = 0; #endif + + // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations) + refinemode = 2; + // maxrefiters and accuracygoal should be configurable and preferably set somewhere else + maxrefiters = 20; + accuracygoal = 1e-6; } ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { c1 = new ParticleDescriptorCluster(reinterpret_cast(*(rhs.c1))); - c2 = new C2(*(rhs.c2)); - c3 = new C3(*(rhs.c3)); - c6 = new C6(*(rhs.c6)); const int ndi = c1->nsph * c1->nlim; const np_int ndit = 2 * ndi; - c9 = new C9(*(rhs.c9)); gaps = new double[c1->nsph](); for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi]; tqev = new double[3](); @@ -555,17 +310,16 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { number_of_scales = rhs.number_of_scales; proc_device = rhs.proc_device; + refinemode = rhs.refinemode; + maxrefiters = rhs.maxrefiters; + accuracygoal = rhs.accuracygoal; } #ifdef MPI_VERSION ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) { c1 = new ParticleDescriptorCluster(mpidata); - c2 = new C2(mpidata); - c3 = new C3(mpidata); - c6 = new C6(mpidata); const int ndi = c1->nsph * c1->nlim; const np_int ndit = 2 * ndi; - c9 = new C9(mpidata); gaps = new double[c1->nsph](); MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); tqev = new double[3](); @@ -693,16 +447,15 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int devi #else proc_device = 0; #endif + MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } void ClusterIterationData::mpibcast(const mixMPI *mpidata) { c1->mpibcast(mpidata); - c2->mpibcast(mpidata); - c3->mpibcast(mpidata); - c6->mpibcast(mpidata); const int ndi = c1->nsph * c1->nlim; const np_int ndit = 2 * ndi; - c9->mpibcast(mpidata); MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); @@ -766,6 +519,9 @@ void ClusterIterationData::mpibcast(const mixMPI *mpidata) { MPI_Bcast(&vk, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&xiblock, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&number_of_scales, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } #endif @@ -783,10 +539,6 @@ ClusterIterationData::~ClusterIterationData() { } delete[] zpv; delete c1; - delete c2; - delete c3; - delete c6; - delete c9; delete[] gaps; for (int ti = 1; ti > -1; ti--) { delete[] tqse[ti]; @@ -861,6 +613,10 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo _lmtpo = 0; _lmtpos = 0; _nv3j = 0; + _ndi = 0; + _ndit = 0; + _ndm = 0; + gcs = 0.0; _num_configurations = sconf->configurations; _num_layers = (sconf->use_external_sphere) ? 1 : 0; for (int nli = 0; nli < num_configurations; nli++) _num_layers += sconf->get_nshl(nli); @@ -906,6 +662,18 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo ros[ci] = sconf->get_radius(ci); nshl[ci] = sconf->get_nshl(ci); } + _npnt = gconf->npnt; + _npntts = gconf->npntts; + int max_n = (npnt > npntts) ? npnt : npntts; + _nhspo = 2 * max_n - 1; + _nl = sconf->configurations; + if (_nsph == 1 && _nl == 1) _nl = 5; + ris = new dcomplex[_nhspo](); + dlri = new dcomplex[_nhspo](); + vkt = new dcomplex[_nsph](); + dc0 = new dcomplex[_nl](); + vsz = new double[_nsph](); + // >>> NEEDED BY SPHERE AND CLUSTER <<< sas = NULL; vints = NULL; @@ -918,7 +686,14 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo sqabs = NULL; gcsv = NULL; // >>> NEEDED BY CLUSTER <<< + vec_tsas = NULL; vintt = NULL; + tfsas = 0.0 + I * 0.0; + tsas = NULL; + gcs = 0.0; + scs = 0.0; + ecs = 0.0; + acs = 0.0; // >>> NEEDED BY CLUSTER AND INCLU <<< vec_am0m = NULL; vec_fsac = NULL; @@ -945,6 +720,7 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo scscm = NULL; ecscm = NULL; ind3j = NULL; + rac3j = NULL; // >>> NEEDED BY INCLU <<< rm0 = NULL; re0 = NULL; @@ -954,6 +730,8 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo te = NULL; tm0 = NULL; te0 = NULL; + vec_at = NULL; + at = NULL; } ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) { @@ -972,6 +750,10 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) { _lmtpo = rhs._lmtpo; _lmtpos = rhs._lmtpos; _nv3j = rhs._nv3j; + _ndi = rhs._ndi; + _ndit = rhs._ndit; + _ndm = rhs._ndm; + gcs = rhs.gcs; _num_configurations = rhs._num_configurations; _num_layers = rhs._num_layers; @@ -1021,6 +803,26 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) { ros[ci] = rhs.ros[ci]; nshl[ci] = rhs.nshl[ci]; } + _npnt = rhs._npnt; + _npntts = rhs._npntts; + _nhspo = rhs._nhspo; + _nl = rhs._nl; + ris = new dcomplex[_nhspo](); + dlri = new dcomplex[_nhspo](); + for (int ri = 0; ri < _nhspo; ri++) { + ris[ri] = rhs.ris[ri]; + dlri[ri] = rhs.dlri[ri]; + } + vkt = new dcomplex[_nsph](); + vsz = new double[_nsph](); + for (int vi = 0; vi < _nsph; vi++) { + vkt[vi] = rhs.vkt[vi]; + vsz[vi] = rhs.vsz[vi]; + } + dc0 = new dcomplex[_nl](); + for (int di = 0; di < _nl; di++) { + dc0[di] = rhs.dc0[di]; + } // >>> NEEDED BY SPHERE AND CLUSTER <<< sas = NULL; vints = NULL; @@ -1034,6 +836,16 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) { gcsv = NULL; // >>> NEEDED BY CLUSTER <<< vintt = NULL; + tfsas = 0.0 + I * 0.0; + vec_tsas = NULL; + tsas = NULL; + gcs = 0.0; + scs = 0.0; + ecs = 0.0; + acs = 0.0; + vec_gis = NULL; + vec_gls = NULL; + vec_sam = NULL; // >>> NEEDED BY CLUSTER AND INCLU <<< vec_am0m = NULL; vec_fsac = NULL; @@ -1060,6 +872,7 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) { scscm = NULL; ecscm = NULL; ind3j = NULL; + rac3j = NULL; // >>> NEEDED BY INCLU <<< rm0 = NULL; re0 = NULL; @@ -1088,6 +901,10 @@ ParticleDescriptor::ParticleDescriptor(const mixMPI *mpidata) { MPI_Bcast(&_lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndi, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndit, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndm, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&gcs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&_num_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_num_layers, 1, MPI_INT, 0, MPI_COMM_WORLD); @@ -1131,6 +948,20 @@ ParticleDescriptor::ParticleDescriptor(const mixMPI *mpidata) { MPI_Bcast(iog, _nsph, MPI_INT, 0, MPI_COMM_WORLD); ros = new double[_num_configurations]; MPI_Bcast(ros, _num_configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&_npnt, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_npntts, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_nhspo, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_nl, 1, MPI_INT, 0, MPI_COMM_WORLD); + ris = new dcomplex[_nhspo]; + MPI_Bcast(ris, _nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + dlri = new dcomplex[_nhspo]; + MPI_Bcast(dlri, _nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vkt = new dcomplex[_nsph]; + MPI_Bcast(vkt, _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vsz = new double[_nsph]; + MPI_Bcast(vsz, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + dc0 = new dcomplex[_nl]; + MPI_Bcast(dc0, _nl, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); } void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { @@ -1149,6 +980,10 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { MPI_Bcast(&_lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndi, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndit, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_ndm, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&gcs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&_num_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&_num_layers, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(vec_rmi, _nsph * _li, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); @@ -1163,6 +998,15 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { MPI_Bcast(rzz, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(iog, _nsph, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(ros, _num_configurations, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&_npnt, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_npntts, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_nhspo, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&_nl, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(ris, _nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(dlri, _nhspo, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vkt, _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vsz, _nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(dc0, _nl, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); // >>> NEEDED BY SPHERE AND CLUSTER <<< // if (_class_type == SPHERE_TYPE || _class_type == CLUSTER_TYPE) { MPI_Bcast(vec_sas, 4 * _nsph, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); @@ -1180,6 +1024,14 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { // >>> NEEDED BY CLUSTER <<< // if (_class_type == CLUSTER_TYPE) { MPI_Bcast(vintt, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_tsas, 4, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_gis, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_gls, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_sam, _ndit * _nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); } // >>> NEEDED BY CLUSTER AND INCLU <<< // if (_class_type == CLUSTER_TYPE || _class_type == INCLUSION_TYPE) { @@ -1205,6 +1057,7 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { MPI_Bcast(scscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(ecscm, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(v3j0, _nv3j, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(rac3j, _lmtpo, MPI_DOUBLE, 0, MPI_COMM_WORLD); } // >>> NEEDED BY INCLU <<< // if (_class_type == INCLUSION_TYPE) { @@ -1216,6 +1069,7 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) { MPI_Bcast(te, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); MPI_Bcast(tm0, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); MPI_Bcast(te0, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(vec_at, _nlemt * _ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); } } #endif // MPI_VERSION @@ -1237,6 +1091,11 @@ ParticleDescriptor::~ParticleDescriptor() { delete[] vec_rei; delete[] rmi; delete[] vec_rmi; + delete[] ris; + delete[] dlri; + delete[] vkt; + delete[] dc0; + delete[] vsz; // Inclusion class members, destroyed only if sub-class is INCLUSION if (_class_type == INCLUSION_TYPE) { delete[] rm0; @@ -1247,6 +1106,8 @@ ParticleDescriptor::~ParticleDescriptor() { delete[] te; delete[] tm0; delete[] te0; + delete[] at; + delete[] vec_at; } // Inclusion/cluster class members, destroyed if sub-class is INCLUSION or CLUSTER if (_class_type == INCLUSION_TYPE || _class_type == CLUSTER_TYPE) { @@ -1275,6 +1136,7 @@ ParticleDescriptor::~ParticleDescriptor() { delete[] scscm; delete[] ecscm; delete[] ind3j; + delete[] rac3j; } // Cluster/sphere class members, destroyed if sub-class is CLUSTER or SPHERE if (_class_type == CLUSTER_TYPE || _class_type == SPHERE_TYPE) { @@ -1297,6 +1159,14 @@ ParticleDescriptor::~ParticleDescriptor() { // Cluster class members, destroyed only if sub-class is CLUSTER if (_class_type == CLUSTER_TYPE) { delete[] vintt; + delete[] vec_tsas; + delete[] tsas; + delete[] vec_gis; + delete[] vec_gls; + delete[] vec_sam; + delete[] gis; + delete[] gls; + delete[] sam; } } @@ -1328,6 +1198,10 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(GeometryConfiguration *gcon // Needed by CLUSTER vintt = new dcomplex[16](); + vec_tsas = new dcomplex[4](); + tsas = new dcomplex*[2]; + tsas[0] = vec_tsas; + tsas[1] = vec_tsas + 2; // Needed by CLUSTER and INCLU _le = gconf->le; @@ -1342,6 +1216,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(GeometryConfiguration *gcon _lmtpo = _li + _le + 1; _lmtpos = _lmtpo * _lmtpo; _nv3j = (_lm * (_lm + 1) * (2 * _lm + 7)) / 6; + _ndi = _nsph * _nlim; + _ndit = 2 * _nsph * _nlim; vec_am0m = new dcomplex[_nlemt * _nlemt](); vec_fsac = new dcomplex[4](); vec_sac = new dcomplex[4](); @@ -1374,6 +1250,20 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(GeometryConfiguration *gcon ecscm = new double[2](); ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo](); + + // Needed by CLUSTER + vec_gis = new dcomplex[_ndi * _nlem](); + gis = new dcomplex*[_ndi]; + vec_gls = new dcomplex[_ndi * _nlem](); + gls = new dcomplex*[_ndi]; + for (int gi = 0; gi < _ndi; gi++) { + gis[gi] = vec_gis + (gi * _nlem); + gls[gi] = vec_gls + (gi * _nlem); + } + vec_sam = new dcomplex[_ndit * _nlemt](); + sam = new dcomplex*[_ndit]; + for (int si = 0; si < _ndit; si++) sam[si] = vec_sam + (si * _nlemt); } ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorCluster &rhs) : ParticleDescriptor(rhs) { @@ -1412,6 +1302,16 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu // >>> NEEDED BY CLUSTER <<< vintt = new dcomplex[16]; for (int ti = 0; ti < 16; ti++) vintt[ti] = rhs.vintt[ti]; + vec_tsas = new dcomplex[4]; + for (int si = 0; si < 4; si++) vec_tsas[si] = rhs.vec_tsas[si]; + tsas = new dcomplex*[2]; + tsas[0] = vec_tsas; + tsas[1] = vec_tsas + 2; + tfsas = rhs.tfsas; + gcs = rhs.gcs; + scs = rhs.scs; + ecs = rhs.ecs; + acs = rhs.acs; // >>> NEEDED BY CLUSTER AND INCLU <<< vec_am0m = new dcomplex[_nlemt * _nlemt]; np_int nlemts = _nlemt * _nlemt; @@ -1470,6 +1370,26 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo]; + for (int ri = 0; ri < _lmtpo; ri++) rac3j[ri] = rhs.rac3j[ri]; + + // Needed by CLUSTER + vec_gis = new dcomplex[_ndi * _nlem]; + vec_gls = new dcomplex[_ndi * _nlem]; + for (int vi = 0; vi < _ndi * _nlem; vi++) { + vec_gis[vi] = rhs.vec_gis[vi]; + vec_gls[vi] = rhs.vec_gls[vi]; + } + gis = new dcomplex*[_ndi]; + gls = new dcomplex*[_ndi]; + for (int gi = 0; gi < _ndi; gi++) { + gis[gi] = vec_gis + (gi * _nlem); + gls[gi] = vec_gls + (gi * _nlem); + } + vec_sam = new dcomplex[_ndit * _nlemt]; + for (int vi = 0; vi < _ndit * _nlemt; vi++) vec_sam[vi] = rhs.vec_sam[vi]; + sam = new dcomplex*[_ndit]; + for (int si = 0; si < _ndit; si++) sam[si] = vec_sam + (si * _nlemt); } #ifdef MPI_VERSION @@ -1507,6 +1427,31 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const mixMPI *mpidata) : Pa // >>> NEEDED BY CLUSTER <<< // vintt = new dcomplex[16]; MPI_Bcast(vintt, 16, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vec_tsas = new dcomplex[4]; + MPI_Bcast(vec_tsas, 4, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + tsas = new dcomplex*[2]; + tsas[0] = vec_tsas; + tsas[1] = vec_tsas + 2; + MPI_Bcast(&tfsas, 1, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + vec_gis = new dcomplex[_ndi * _nlem]; + MPI_Bcast(vec_gis, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vec_gls = new dcomplex[_ndi * _nlem]; + MPI_Bcast(vec_gls, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vec_sam = new dcomplex[_ndit * _nlemt]; + MPI_Bcast(vec_sam, _ndit * _nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + gis = new dcomplex*[_ndi]; + gls = new dcomplex*[_ndi]; + for (int gi = 0; gi < _ndi; gi++) { + gis[gi] = vec_gis + (gi * _nlem); + gls[gi] = vec_gls + (gi * _nlem); + } + sam = new dcomplex*[_ndit]; + for (int si = 0; si < _ndit; si++) { + sam[si] = vec_sam + (si * _nlemt); + } // >>> NEEDED BY CLUSTER AND INCLU <<< vec_am0m = new dcomplex[_nlemt * _nlemt]; int nlemts = _nlemt * _nlemt; @@ -1561,6 +1506,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const mixMPI *mpidata) : Pa MPI_Bcast(v3j0, _nv3j, MPI_DOUBLE, 0, MPI_COMM_WORLD); ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo]; + MPI_Bcast(rac3j, _lmtpo, MPI_DOUBLE, 0, MPI_COMM_WORLD); } #endif // MPI_VERSION // >>> End of ParticleDescriptorCluster class implementation. <<< // @@ -1577,9 +1524,12 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(GeometryConfiguration * _ncou = _nsph * _nsph - 1; _litpo = _li + _li + 1; _litpos = _litpo * _litpo; + _lmpo = _lm + 1; _lmtpo = _li + _le + 1; _lmtpos = _lmtpo * _lmtpo; _nv3j = (_lm * (_lm + 1) * (2 * _lm + 7)) / 6; + _ndi = _nsph * _nlim; + _ndit = 2 * _nsph * _nlim; vec_am0m = new dcomplex[_nlemt * _nlemt](); vec_fsac = new dcomplex[4](); vec_sac = new dcomplex[4](); @@ -1612,7 +1562,9 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(GeometryConfiguration * ecscm = new double[2](); ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo](); // Needed by INCLU + _ndm = 2 * (_nsph * _nlim + _nlem); rm0 = new dcomplex[_le](); re0 = new dcomplex[_le](); rmw = new dcomplex[_le](); @@ -1621,6 +1573,9 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(GeometryConfiguration * te = new dcomplex[_le](); tm0 = new dcomplex[_le](); te0 = new dcomplex[_le](); + vec_at = new dcomplex[_nlemt * _ndm](); + at = new dcomplex*[_nlemt]; + for (int ai = 0; ai < _nlemt; ai++) at[ai] = vec_at + (ai * _ndm); } ParticleDescriptorInclusion::ParticleDescriptorInclusion(const ParticleDescriptorInclusion &rhs) : ParticleDescriptor(rhs) { @@ -1682,6 +1637,8 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(const ParticleDescripto for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo]; + for (int ri = 0; ri < _lmtpo; ri++) rac3j[ri] = rhs.rac3j[ri]; // >>> NEEDED BY INCLU <<< // rm0 = new dcomplex[_le]; re0 = new dcomplex[_le]; @@ -1701,6 +1658,10 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(const ParticleDescripto tm0[ti] = rhs.tm0[ti]; te0[ti] = rhs.te0[ti]; } + vec_at = new dcomplex[_nlemt * _ndm]; + for (int vi = 0; vi < _nlemt * _ndm; vi++) vec_at[vi] = rhs.vec_at[vi]; + at = new dcomplex*[_nlemt]; + for (int ai = 0; ai < _nlemt; ai++) at[ai] = vec_at + (ai * _ndm); } #ifdef MPI_VERSION @@ -1759,6 +1720,8 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(const mixMPI *mpidata) MPI_Bcast(v3j0, _nv3j, MPI_DOUBLE, 0, MPI_COMM_WORLD); ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); + rac3j = new double[_lmtpo]; + MPI_Bcast(rac3j, _lmtpo, MPI_DOUBLE, 0, MPI_COMM_WORLD); // >>> NEEDED BY INCLU <<< // rm0 = new dcomplex[_le]; MPI_Bcast(rm0, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); @@ -1776,6 +1739,10 @@ ParticleDescriptorInclusion::ParticleDescriptorInclusion(const mixMPI *mpidata) MPI_Bcast(tm0, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); te0 = new dcomplex[_le]; MPI_Bcast(te0, _le, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + vec_at = new dcomplex[_nlemt * _ndm]; + MPI_Bcast(vec_at, _nlemt * _ndm, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); + at = new dcomplex*[_nlemt]; + for (int ai = 0; ai < _nlemt; ai++) at[ai] = vec_at + (ai * _ndm); } #endif // MPI_VERSION // >>> End of ParticleDescriptorInclusion class implementation. <<< // diff --git a/src/libnptm/clu_subs.cpp b/src/libnptm/clu_subs.cpp index 0b96517898900ec64a2a36be4e39639da15458b7..0199f31a96dd9d759be40f90c07d70cef76c7e0b 100644 --- a/src/libnptm/clu_subs.cpp +++ b/src/libnptm/clu_subs.cpp @@ -447,7 +447,7 @@ double cgev(int ipamo, int mu, int l, int m) { #pragma omp end declare target #endif -void cms(dcomplex **am, ParticleDescriptor *c1, C6 *c6) { +void cms(dcomplex **am, ParticleDescriptor *c1) { dcomplex dm, de, cgh, cgk; const dcomplex cc0 = 0.0 + 0.0 * I; int ndi = c1->nsph * c1->nlim; @@ -485,8 +485,8 @@ void cms(dcomplex **am, ParticleDescriptor *c1, C6 *c6) { int i2e = in2 + ilm2e; int j2 = in1 + ilm2; int j2e = in1 + ilm2e; - cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1, c6); - cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1, c6); + cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1); + cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1); am[i1 - 1][i2 - 1] = cgh; am[i1 - 1][i2e - 1] = cgk; am[i1e - 1][i2 - 1] = cgk; @@ -529,7 +529,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1, C6 *c6) { } // n1 loop } -void crsm1(double vk, double exri, ParticleDescriptor *c1, C6 *c6) { +void crsm1(double vk, double exri, ParticleDescriptor *c1) { dcomplex ***svf, ***svw, **svs; const dcomplex cc0 = 0.0 + 0.0 * I; dcomplex cam = cc0; @@ -578,14 +578,14 @@ void crsm1(double vk, double exri, ParticleDescriptor *c1, C6 *c6) { } // im loop for (int im = immn; im <= immx; im++) { int m = im - letpo; - r3jmr(l, l1, l2, m, c6); + r3jmr(l, l1, l2, m, c1->rac3j); int m1mnmo = (-l1 > -l2 - m) ? -(l1 + 1) : -(l2 + m + 1); int nm1 = (l1 < l2 - m) ? (l1 - m1mnmo) : (l2 - m - m1mnmo); for (int im1 = 1; im1 <= nm1; im1++) { int m1 = -im1 - m1mnmo; int isn = 1; if (m1 % 2 != 0) isn = -1; - double cg3j = c6->rac3j[im1 - 1] * isn; + double cg3j = c1->rac3j[im1 - 1] * isn; int ilm1 = il1 + m1; int ilm2 = il2 + m1 - m; int ipa = 0; @@ -871,7 +871,7 @@ dcomplex ghit_d( #endif dcomplex ghit( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, - ParticleDescriptor *c1, C6 *c6 + ParticleDescriptor *c1 ) { /* NBL identifies transfer vector going from N2 to N1; * IHI=0 for Hankel, IHI=1 for Bessel, IHI=2 for Bessel from origin; @@ -936,7 +936,7 @@ dcomplex ghit( } // goes to 22 } else { // label 16 - r3jjr(l1mp, l2, -mupm1, mupm2, c6); + r3jjr(l1mp, l2, -mupm1, mupm2, c1->rac3j); int il = ilin; int lt20 = lminpo; while (lt20 <= lmaxpo) { @@ -946,7 +946,7 @@ dcomplex ghit( int l3 = lt20 - 1; int ny = l3 * l3 + lt20 + m1mm2; double aors = 1.0 * (l3 + lt20); - double f3j = (c6->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; + double f3j = (c1->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; cfun = (c1->vh[nbhj + lt20 - 1] * c1->vyhj[nby + ny - 1]) * f3j; csum += cfun; } @@ -987,7 +987,7 @@ dcomplex ghit( } // goes to 42 } else { // label 36 - r3jjr(l1mp, l2, -mupm1, mupm2, c6); + r3jjr(l1mp, l2, -mupm1, mupm2, c1->rac3j); int il = ilin; int lt40 = lminpo; while (lt40 <= lmaxpo) { @@ -997,7 +997,7 @@ dcomplex ghit( int l3 = lt40 - 1; int ny = l3 * l3 + lt40 + m1mm2; double aors = 1.0 * (l3 + lt40); - double f3j = (c6->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; + double f3j = (c1->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; cfun = (c1->vj * c1->vyhj[nby + ny - 1]) * f3j; csum += cfun; } @@ -1042,7 +1042,7 @@ dcomplex ghit( } // goes to 62 } else { // label 56 - r3jjr(l1mp, l2, -mupm1, mupm2, c6); + r3jjr(l1mp, l2, -mupm1, mupm2, c1->rac3j); int il = ilin; int lt60 = lminpo; while (lt60 <= lmaxpo) { @@ -1052,7 +1052,7 @@ dcomplex ghit( int l3 = lt60 - 1; int ny = l3 * l3 + lt60 + m1mm2; double aors = 1.0 * (l3 + lt60); - double f3j = (c6->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; + double f3j = (c1->rac3j[il - 1] * c1->v3j0[i3j0 - 1] * sqrt(aors)) * jsn; cfun = (c1->vj0[nbhj + lt60 - 1] * c1->vyj0[nby + ny - 1]) * f3j; csum += cfun; } @@ -1530,10 +1530,10 @@ void polar( } } -void r3j000(int j2, int j3, C6 *c6) { +void r3j000(int j2, int j3, double *rac3j) { int jmx = j3 + j2; if (jmx <= 0) { - c6->rac3j[0] = 1.0; + rac3j[0] = 1.0; return; } int jmn = j3 - j2; @@ -1545,7 +1545,7 @@ void r3j000(int j2, int j3, C6 *c6) { if (njmo <= 0) { double sj = 1.0 * jf; double cnr = (1 / sqrt(sj)) * isn; - c6->rac3j[0] = cnr; + rac3j[0] = cnr; return; } double sjr = 1.0 * jf; @@ -1557,17 +1557,17 @@ void r3j000(int j2, int j3, C6 *c6) { int j1mos = j1mo * j1mo; double cjmo = sqrt(1.0 * (jmxpos - j1mos) * (j1mos - jmns)); if (njmo <= 1) { - c6->rac3j[0] = -cj / cjmo; - double sj = sjr + (c6->rac3j[0] * c6->rac3j[0]) * (jf - 4); + rac3j[0] = -cj / cjmo; + double sj = sjr + (rac3j[0] * rac3j[0]) * (jf - 4); double cnr = (1.0 / sqrt(sj)) * isn; - c6->rac3j[1] = cnr; - c6->rac3j[0] *= cnr; + rac3j[1] = cnr; + rac3j[0] *= cnr; return; } int nj = njmo + 1; int nmat = (nj + 1) / 2; - c6->rac3j[nj - 1] = 1.0; - c6->rac3j[njmo - 1] = -cj / cjmo; + rac3j[nj - 1] = 1.0; + rac3j[njmo - 1] = -cj / cjmo; if (nmat != njmo) { int nbr = njmo - nmat; for (int ibr45 = 1; ibr45 <= nbr; ibr45++) { @@ -1578,14 +1578,14 @@ void r3j000(int j2, int j3, C6 *c6) { cj = sqrt(1.0 * (jmxpos - j1s) * (j1s - jmns)); j1mos = j1mo * j1mo; cjmo = sqrt(1.0 * (jmxpos - j1mos) * (j1mos - jmns)); - c6->rac3j[irr - 2] = c6->rac3j[irr - 1] * (-cj / cjmo); - sjr = sjr + (c6->rac3j[irr - 1] * c6->rac3j[irr - 1]) * jf; + rac3j[irr - 2] = rac3j[irr - 1] * (-cj / cjmo); + sjr = sjr + (rac3j[irr - 1] * rac3j[irr - 1]) * jf; } } // label 50 - double racmat = c6->rac3j[nmat - 1]; + double racmat = rac3j[nmat - 1]; sjr = sjr + (racmat * racmat) * (jf - 4); - c6->rac3j[0] = 1.0; + rac3j[0] = 1.0; jf = jmn + jmn + 1; double sjl = 1.0 * jf; int j1pt = jmn + 2; @@ -1593,7 +1593,7 @@ void r3j000(int j2, int j3, C6 *c6) { double cjpo = sqrt(1.0 * (jmxpos - j1pos) * (j1pos - jmns)); int j1pts = j1pt * j1pt; double cjpt = sqrt(1.0 * (jmxpos - j1pts) * (j1pts - jmns)); - c6->rac3j[1] = -cjpo / cjpt; + rac3j[1] = -cjpo / cjpt; int nmatmo = nmat - 1; if (nmatmo >= 2) { for (int irl70 = 2; irl70 <= nmatmo; irl70++) { @@ -1603,29 +1603,29 @@ void r3j000(int j2, int j3, C6 *c6) { cjpo = sqrt(1.0 * (jmxpos - j1pos) * (j1pos - jmns)); j1pts = j1pt * j1pt; cjpt = sqrt(1.0 * (jmxpos - j1pts) * (j1pts - jmns)); - c6->rac3j[irl70] = c6->rac3j[irl70 - 1] * (-cjpo / cjpt); - sjl = sjl + (c6->rac3j[irl70 - 1] * c6->rac3j[irl70 - 1]) * jf; + rac3j[irl70] = rac3j[irl70 - 1] * (-cjpo / cjpt); + sjl = sjl + (rac3j[irl70 - 1] * rac3j[irl70 - 1]) * jf; } } // label 75 - double ratrac = racmat / c6->rac3j[nmat - 1]; + double ratrac = racmat / rac3j[nmat - 1]; double rats = ratrac * ratrac; double sj = sjr + sjl * rats; - c6->rac3j[nmat - 1] = racmat; + rac3j[nmat - 1] = racmat; double cnr = (1.0 / sqrt(sj)) * isn; for (int irr80 = nmat; irr80 <= nj; irr80++) { - c6->rac3j[irr80 - 1] *= cnr; + rac3j[irr80 - 1] *= cnr; } double cnl = cnr * ratrac; for (int irl85 = 1; irl85 <= nmatmo; irl85++) { - c6->rac3j[irl85 - 1] *= cnl; + rac3j[irl85 - 1] *= cnl; } } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif -void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { +void r3jjr(int j2, int j3, int m2, int m3, double *rac3j) { int jmx = j3 + j2; int jdf = j3 - j2; int m1 = -m2 - m3; @@ -1639,7 +1639,7 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { if (njmo <= 0) { double sj = 1.0 * jf; double cnr = (1.0 / sqrt(sj)) * isn; - c6->rac3j[0] = cnr; + rac3j[0] = cnr; } else { // label 15 double sjt = 1.0; double sjr = 1.0 * jf; @@ -1655,17 +1655,17 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { double cj = sqrt(ccj * (jsmpos - j1s)); double dj = 1.0 * jf * (j1 * j1po * mdf + idjc); if (njmo <= 1) { - c6->rac3j[0] = -dj / (cj * j1po); - double sj = sjr + (c6->rac3j[0] * c6->rac3j[0]) * (jf - 2); + rac3j[0] = -dj / (cj * j1po); + double sj = sjr + (rac3j[0] * rac3j[0]) * (jf - 2); double cnr = (1.0 / sqrt(sj)) * isn; - c6->rac3j[1] = cnr; - c6->rac3j[0] *= cnr; + rac3j[1] = cnr; + rac3j[0] *= cnr; } else { // label 20 double cjp = 0.0; int nj = njmo + 1; int nmat = (nj + 1) / 2; - c6->rac3j[nj - 1] = 1.0; - c6->rac3j[njmo - 1] = -dj / (cj * j1po); + rac3j[nj - 1] = 1.0; + rac3j[njmo - 1] = -dj / (cj * j1po); if (nmat != njmo) { int nbr = njmo - nmat; for (int ibr45 = 1; ibr45 <= nbr; ibr45++) { @@ -1677,24 +1677,24 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { cjp = cj; ccj = 1.0 * (j1s - jdfs) * (j1s - m1s); cj = sqrt(ccj * (jsmpos - j1s)); - sjt = c6->rac3j[irr - 1] * c6->rac3j[irr - 1]; + sjt = rac3j[irr - 1] * rac3j[irr - 1]; dj = 1.0 * jf * (j1 * j1po * mdf + idjc); - c6->rac3j[irr - 2] = -(c6->rac3j[irr - 1] * dj - + c6->rac3j[irr] * cjp * j1) / (cj * j1po); + rac3j[irr - 2] = -(rac3j[irr - 1] * dj + + rac3j[irr] * cjp * j1) / (cj * j1po); sjr += (sjt * jf); } // ibr45 loop } // label 50 double osjt = sjt; - sjt = c6->rac3j[nmat - 1] * c6->rac3j[nmat - 1]; + sjt = rac3j[nmat - 1] * rac3j[nmat - 1]; if (sjt >= osjt) { sjr += (sjt * (jf - 2)); } else { // label 55 nmat++; } // label 60 - double racmat = c6->rac3j[nmat - 1]; - c6->rac3j[0] = 1.0; + double racmat = rac3j[nmat - 1]; + rac3j[0] = 1.0; jf = jmn + jmn + 1; double sjl = 1.0 * jf; j1 = jmn; @@ -1704,11 +1704,11 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { double ccjp = 1.0 * (j1pos - jdfs) * (j1pos - m1s); cjp = sqrt(ccjp * (jsmpos - j1pos)); dj = 1.0 * jf * (j1 * j1po * mdf + idjc); - c6->rac3j[1] = - dj / (cjp * j1); + rac3j[1] = - dj / (cjp * j1); } else { // label 62 cjp = sqrt(1.0 * (jsmpos - 1)); dj = 1.0 * mdf; - c6->rac3j[1] = -dj / cjp; + rac3j[1] = -dj / cjp; } // label 63 int nmatmo = nmat - 1; @@ -1721,24 +1721,24 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { cj = cjp; double ccjp = 1.0 * (j1pos - jdfs) * (j1pos - m1s); cjp = sqrt(ccjp * (jsmpos - j1pos)); - sjt = c6->rac3j[irl70 - 1] * c6->rac3j[irl70 - 1]; + sjt = rac3j[irl70 - 1] * rac3j[irl70 - 1]; dj = 1.0 * jf * (j1 * j1po * mdf + idjc); - c6->rac3j[irl70] = -( - c6->rac3j[irl70 - 1] * dj - + c6->rac3j[irl70 - 2] * cj * j1po - ) / (cjp * j1); + rac3j[irl70] = -( + rac3j[irl70 - 1] * dj + + rac3j[irl70 - 2] * cj * j1po + ) / (cjp * j1); sjl += (sjt * jf); } } // label 75 - double ratrac = racmat / c6->rac3j[nmat - 1]; + double ratrac = racmat / rac3j[nmat - 1]; double rats = ratrac * ratrac; double sj = sjr + sjl * rats; - c6->rac3j[nmat - 1] = racmat; + rac3j[nmat - 1] = racmat; double cnr = (1.0 / sqrt(sj)) * isn; - for (int irr80 = nmat; irr80 <= nj; irr80++) c6->rac3j[irr80 - 1] *= cnr; + for (int irr80 = nmat; irr80 <= nj; irr80++) rac3j[irr80 - 1] *= cnr; double cnl = cnr * ratrac; - for (int irl85 = 1; irl85 <= nmatmo; irl85++) c6->rac3j[irl85 - 1] *= cnl; + for (int irl85 = 1; irl85 <= nmatmo; irl85++) rac3j[irl85 - 1] *= cnl; } } } @@ -1870,7 +1870,7 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) { #pragma omp end declare target #endif -void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { +void r3jmr(int j1, int j2, int j3, int m1, double *rac3j) { int mmx = (j2 < j3 - m1) ? j2 : j3 - m1; int mmn = (-j2 > -(j3 + m1)) ? -j2 : -(j3 + m1); int nmmo = mmx - mmn; @@ -1881,7 +1881,7 @@ void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { if (nmmo <= 0) { double sj = 1.0 * j1tpo; double cnr = (1.0 / sqrt(sj)) * isn; - c6->rac3j[0] = cnr; + rac3j[0] = cnr; // returns } else { // label 15 int j1s = j1 * j1po; @@ -1895,17 +1895,17 @@ void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { double cm = sqrt(1.0 * (j2po - m2) * (j2 + m2) * (j3po - m3) * (j3 + m3)); double dm = 1.0 * (id + m2 * m3 * 2); if (nmmo <= 1) { - c6->rac3j[0] = dm / cm; - double sj = (1.0 + c6->rac3j[0] * c6->rac3j[0]) * j1tpo; + rac3j[0] = dm / cm; + double sj = (1.0 + rac3j[0] * rac3j[0]) * j1tpo; double cnr = 1.0 / sqrt(sj) * isn; - c6->rac3j[1] = cnr; - c6->rac3j[0] *= cnr; + rac3j[1] = cnr; + rac3j[0] *= cnr; // returns } else { // label 20 int nm = nmmo + 1; int nmat = (nm + 1) / 2; - c6->rac3j[nm - 1] = 1.0; - c6->rac3j[nmmo - 1] = dm / cm; + rac3j[nm - 1] = 1.0; + rac3j[nmmo - 1] = dm / cm; double sjt = 1.0; double sjr = 1.0; if (nmat != nmmo) { @@ -1916,28 +1916,28 @@ void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { m3 = m1 + m2; double cmp = cm; cm = sqrt(1.0 * (j2po - m2) * (j2 + m2) * (j3po - m3) * (j3 + m3)); - sjt = c6->rac3j[irr - 1] * c6->rac3j[irr - 1]; + sjt = rac3j[irr - 1] * rac3j[irr - 1]; dm = 1.0 * (id + m2 * m3 * 2); - c6->rac3j[irr - 1] *= ((dm - c6->rac3j[irr] * cmp) / cm); + rac3j[irr - 1] *= ((dm - rac3j[irr] * cmp) / cm); sjr += sjt; } // ibr45 loop } // label 50 double osjt = sjt; - sjt = c6->rac3j[nmat - 1] * c6->rac3j[nmat - 1]; + sjt = rac3j[nmat - 1] * rac3j[nmat - 1]; if (sjt >= osjt) { sjr += sjt; } else { // label 55 nmat++; } // label 60 - double racmat = c6->rac3j[nmat - 1]; - c6->rac3j[0] = 1.0; + double racmat = rac3j[nmat - 1]; + rac3j[0] = 1.0; m2 = mmn; m3 = m1 + m2; double cmp = sqrt(1.0 * (j2 - m2) * (j2po + m2) * (j3 - m3) * (j3po + m3)); dm = 1.0 * (id + m2 * m3 * 2); - c6->rac3j[1] = dm / cmp; + rac3j[1] = dm / cmp; double sjl = 1.0; int nmatmo = nmat - 1; if (nmatmo > 1) { @@ -1946,20 +1946,20 @@ void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { m3 = m1 + m2; cm = cmp; cmp = sqrt(1.0 * (j2 - m2) * (j2po + m2) * (j3 - m3) * (j3po + m3)); - sjt = c6->rac3j[irl70 - 1] * c6->rac3j[irl70 - 1]; + sjt = rac3j[irl70 - 1] * rac3j[irl70 - 1]; dm = 1.0 * (id + m2 * m3 * 2); - c6->rac3j[irl70] = (c6->rac3j[irl70 - 1] * dm - c6->rac3j[irl70 - 2] * cm) / cmp; + rac3j[irl70] = (rac3j[irl70 - 1] * dm - rac3j[irl70 - 2] * cm) / cmp; sjl += sjt; } } // label 75 - double ratrac = racmat / c6->rac3j[nmat - 1]; + double ratrac = racmat / rac3j[nmat - 1]; double rats = ratrac * ratrac; double sj = (sjr + sjl * rats) * j1tpo; - c6->rac3j[nmat - 1] = racmat; + rac3j[nmat - 1] = racmat; double cnr = 1.0 / sqrt(sj) * isn; - for (int irr80 = nmat; irr80 <= nm; irr80++) c6->rac3j[irr80 - 1] *= cnr; + for (int irr80 = nmat; irr80 <= nm; irr80++) rac3j[irr80 - 1] *= cnr; double cnl = cnr * ratrac; - for (int irl85 = 1; irl85 <= nmatmo; irl85++) c6->rac3j[irl85 - 1] *= cnl; + for (int irl85 = 1; irl85 <= nmatmo; irl85++) rac3j[irl85 - 1] *= cnl; // returns } } @@ -2179,7 +2179,7 @@ void rftr( fz = u[2] * extins - gapv[2]; } -void scr0(double vk, double exri, ParticleDescriptor *c1, C3 *c3) { +void scr0(double vk, double exri, ParticleDescriptor *c1) { const dcomplex cc0 = 0.0 + 0.0 * I; double exdc = exri * exri; double ccs = 4.0 * acos(0.0) / (vk * vk); @@ -2256,10 +2256,10 @@ void scr0(double vk, double exri, ParticleDescriptor *c1, C3 *c3) { acs += c1->sabs[iogi - 1]; tfsas += c1->fsas[iogi - 1]; } - c3->scs = scs; - c3->ecs = ecs; - c3->acs = acs; - c3->tfsas = tfsas; + c1->scs = scs; + c1->ecs = ecs; + c1->acs = acs; + c1->tfsas = tfsas; #ifdef USE_NVTX nvtxRangePop(); #endif @@ -2267,7 +2267,7 @@ void scr0(double vk, double exri, ParticleDescriptor *c1, C3 *c3) { void scr2( double vk, double vkarg, double exri, double *duk, - ParticleDescriptor *c1, C3 *c3 + ParticleDescriptor *c1 ) { #ifdef USE_NVTX nvtxRangePush("scr2 starts"); @@ -2377,10 +2377,10 @@ void scr2( tsas01 += (c1->sas[iogi - 1][0][1] * phas); tsas11 += (c1->sas[iogi - 1][1][1] * phas); } // i14 loop - c3->tsas[0][0] = tsas00; - c3->tsas[1][0] = tsas10; - c3->tsas[0][1] = tsas01; - c3->tsas[1][1] = tsas11; + c1->tsas[0][0] = tsas00; + c1->tsas[1][0] = tsas10; + c1->tsas[0][1] = tsas01; + c1->tsas[1][1] = tsas11; #ifdef USE_NVTX nvtxRangePop(); //#endif @@ -2429,7 +2429,7 @@ void scr2( for (int ipo2 = 1; ipo2 <= 2; ipo2++) { for (int jpo2 = 1; jpo2 <= 2; jpo2++) { int j = jpo2-1 + (ipo2-1)*2 + (jpo1-1)*4 + (ipo1-1)*8; - c1->vintt[j] = c3->tsas[jpo2 - 1][ipo2 - 1] * dconjg(c3->tsas[jpo1 - 1][ipo1 - 1]) * cfsq; + c1->vintt[j] = c1->tsas[jpo2 - 1][ipo2 - 1] * dconjg(c1->tsas[jpo1 - 1][ipo1 - 1]) * cfsq; } // jpo2 loop } // ipo2 loop } // jpo1 loop @@ -2442,11 +2442,11 @@ void scr2( #endif } -void str(ScattererConfiguration *sconf, ParticleDescriptor *c1, C3 *c3, C6 *c6) { +void str(ScattererConfiguration *sconf, ParticleDescriptor *c1) { dcomplex *ylm; const double pi = acos(-1.0); int last_configuration; - c3->gcs = 0.0; + c1->gcs = 0.0; double gcss = 0.0; last_configuration = 0; for (int i18 = 1; i18 <= c1->nsph; i18++) { @@ -2460,7 +2460,7 @@ void str(ScattererConfiguration *sconf, ParticleDescriptor *c1, C3 *c3, C6 *c6) c1->rc[last_configuration - 1][j16 - 1] = sconf->get_rcf(last_configuration - 1, j16 - 1) * c1->ros[last_configuration - 1]; } // j16 loop } - c3->gcs += gcss; + c1->gcs += gcss; } // i18 loop int ylm_size = (c1->litpos > c1->lmtpos) ? c1->litpos : c1->lmtpos; ylm = new dcomplex[ylm_size](); @@ -2468,7 +2468,7 @@ void str(ScattererConfiguration *sconf, ParticleDescriptor *c1, C3 *c3, C6 *c6) for (int l1po28 = 1; l1po28 <= c1->lmpo; l1po28++) { int l1 = l1po28 - 1; for (int l2 = 1; l2 <= c1->lm; l2++) { - r3j000(l1, l2, c6); + r3j000(l1, l2, c1->rac3j); c1->ind3j[l1po28 - 1][l2 - 1] = i; int lmnpo = (l2 > l1) ? l2 - l1 + 1 : l1 - l2 + 1; int lmxpo = l2 + l1 + 1; @@ -2477,7 +2477,7 @@ void str(ScattererConfiguration *sconf, ParticleDescriptor *c1, C3 *c3, C6 *c6) while (lpo28 <= lmxpo) { i++; il++; - c1->v3j0[i - 1] = c6->rac3j[il - 1]; + c1->v3j0[i - 1] = c1->rac3j[il - 1]; lpo28 += 2; } } // l2 loop @@ -2533,11 +2533,9 @@ void tqr( tsk = u[0] * tqsv[0] + u[1] * tqsv[1] + u[2] * tqsv[2]; } -void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { +void ztm(dcomplex **am, ParticleDescriptor *c1) { dcomplex gie, gle, a1, a2, a3, a4, sum1, sum2, sum3, sum4; const dcomplex cc0 = 0.0 + 0.0 * I; - const np_int ndi = c1->nsph * c1->nlim; - np_int ndit = 2 * ndi; // int i2 = 0; // old implementation #ifdef USE_NVTX nvtxRangePush("ZTM starts"); @@ -2546,9 +2544,9 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { nvtxRangePush("ZTM parallel loop 1"); #endif // C9 *c9_para = new C9(*c9); - dcomplex *gis_v = c9->gis[0]; - dcomplex *gls_v = c9->gls[0]; - double *rac3j_local = (double *) malloc(c6->lmtpo*sizeof(double)); + dcomplex *gis_v = c1->gis[0]; + dcomplex *gls_v = c1->gls[0]; + double *rac3j_local = (double *) malloc(c1->lmtpo*sizeof(double)); int k2max = c1->li*(c1->li+2); int k3max = c1->le*(c1->le+2); // To parallelise, I run a linearised loop directly over k @@ -2591,7 +2589,7 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { int m2 = -l2 - 1 + im2; int i3 = l3 * l3 + im3 - 1; int m3 = -l3 - 1 + im3; - int vecindex = (i2 - 1)*c9->nlem + i3 - 1; + int vecindex = (i2 - 1) * c1->nlem + i3 - 1; gis_v[vecindex] = ghit_d(2, 0, n2, l2, m2, l3, m3, c1, rac3j_local); gls_v[vecindex] = ghit_d(2, 1, n2, l2, m2, l3, m3, c1, rac3j_local); } // close k3 loop, former l3 + im3 loops @@ -2605,26 +2603,26 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { nvtxRangePush("ZTM loop 2"); #endif dcomplex *am_v = am[0]; - dcomplex *sam_v = c9->sam[0]; + dcomplex *sam_v = c1->sam[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) #endif - for (int i1 = 1; i1 <= ndi; i1++) { // GPU portable? + for (int i1 = 1; i1 <= c1->ndi; i1++) { // GPU portable? for (int i3 = 1; i3 <= c1->nlem; i3++) { dcomplex sum1 = cc0; dcomplex sum2 = cc0; dcomplex sum3 = cc0; dcomplex sum4 = cc0; - int i1e = i1 + ndi; + int i1e = i1 + c1->ndi; int i3e = i3 + c1->nlem; #pragma parallel for simd reduction(+:sum1,sum2,sum3,sum4) - for (int i2 = 1; i2 <= ndi; i2++) { - int i2e = i2 + ndi; - int vecindg_23 = (i2 - 1)*c9->nlem + i3 - 1; + for (int i2 = 1; i2 <= c1->ndi; i2++) { + int i2e = i2 + c1->ndi; + int vecindg_23 = (i2 - 1) * c1->nlem + i3 - 1; dcomplex gie = gis_v[vecindg_23]; dcomplex gle = gls_v[vecindg_23]; - np_int vecinda_1 = (i1 - 1)*ndit; - np_int vecinda_1e = (i1 - 1 + ndi)*ndit; + np_int vecinda_1 = (i1 - 1) * c1->ndit; + np_int vecinda_1e = (i1 - 1 + c1->ndi) * c1->ndit; dcomplex a1 = am_v[vecinda_1 + i2 - 1]; dcomplex a2 = am_v[vecinda_1 + i2e - 1]; dcomplex a3 = am_v[vecinda_1e + i2 - 1]; @@ -2634,8 +2632,8 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { sum3 += (a3 * gie + a4 * gle); sum4 += (a3 * gle + a4 * gie); } // i2 loop - int vecind1 = (i1 - 1)*c9->nlemt; - int vecind1e = (i1e - 1)*c9->nlemt; + int vecind1 = (i1 - 1) * c1->nlemt; + int vecind1e = (i1e - 1) * c1->nlemt; sam_v[vecind1 + i3 - 1] = sum1; sam_v[vecind1 + i3e - 1] = sum2; sam_v[vecind1e + i3 - 1] = sum3; @@ -2643,39 +2641,36 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C6 *c6, C9 * c9) { } // i3 loop } // i1 loop #pragma omp parallel for collapse(2) - for (int i1 = 1; i1 <= ndi; i1++) { + for (int i1 = 1; i1 <= c1->ndi; i1++) { for (int i0 = 1; i0 <= c1->nlem; i0++) { - int vecindex = (i1 - 1)*c9->nlem + i0 - 1; + int vecindex = (i1 - 1) * c1->nlem + i0 - 1; gis_v[vecindex] = dconjg(gis_v[vecindex]); gls_v[vecindex] = dconjg(gls_v[vecindex]); - // c9->gis[i1 - 1][i0 - 1] = dconjg(c9->gis[i1 - 1][i0 - 1]); - // c9->gls[i1 - 1][i0 - 1] = dconjg(c9->gls[i1 - 1][i0 - 1]); } // i0 loop } // i1 loop - int nlemt = c1->nlem + c1->nlem; dcomplex *vec_am0m = c1->am0m[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(2) #endif for (int i0 = 1; i0 <= c1->nlem; i0++) { - for (int i3 = 1; i3 <= nlemt; i3++) { + for (int i3 = 1; i3 <= c1->nlemt; i3++) { int i0e = i0 + c1->nlem; dcomplex sum1 = cc0; dcomplex sum2 = cc0; - for (int i1 = 1; i1 <= ndi; i1 ++) { - int i1e = i1 + ndi; - int vecind1 = (i1 - 1)*c9->nlemt; - int vecind1e = (i1e - 1)*c9->nlemt; + for (int i1 = 1; i1 <= c1->ndi; i1 ++) { + int i1e = i1 + c1->ndi; + int vecind1 = (i1 - 1) * c1->nlemt; + int vecind1e = (i1e - 1) * c1->nlemt; a1 = sam_v[vecind1 + i3 - 1]; a2 = sam_v[vecind1e + i3 - 1]; - int vecindex = (i1 - 1)*c9->nlem + i0 - 1; + int vecindex = (i1 - 1) * c1->nlem + i0 - 1; gie = gis_v[vecindex]; gle = gls_v[vecindex]; sum1 += (a1 * gie + a2 * gle); sum2 += (a1 * gle + a2 * gie); } // i1 loop - int vecind0 = (i0 - 1)*nlemt; - int vecind0e = (i0e - 1)*nlemt; + int vecind0 = (i0 - 1) * c1->nlemt; + int vecind0e = (i0e - 1) * c1->nlemt; vec_am0m[vecind0 + i3 - 1] = -sum1; vec_am0m[vecind0e + i3 - 1] = -sum2; // c1->am0m[i0 - 1][i3 - 1] = -sum1; diff --git a/src/libnptm/inclu_subs.cpp b/src/libnptm/inclu_subs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e2eede6927f1701ca34656ba8cd1a6051baeecdc --- /dev/null +++ b/src/libnptm/inclu_subs.cpp @@ -0,0 +1,779 @@ +/* Copyright (C) 2024 INAF - Osservatorio Astronomico di Cagliari + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License is distributed along with + this program in the COPYING file. If not, see: . + */ + +/*! \file inclu_subs.cpp + * + * \brief C++ implementation of INCLUSION subroutines. + */ + +#ifndef INCLUDE_TYPES_H_ +#include "../include/types.h" +#endif + +#ifndef INCLUDE_CONFIGURATION_H_ +#include "../include/Configuration.h" +#endif + +#ifndef INCLUDE_COMMONS_H_ +#include "../include/Commons.h" +#endif + +#ifndef INCLUDE_SPH_SUBS_H_ +#include "../include/sph_subs.h" +#endif + +#ifndef INCLUDE_CLU_SUBS_H_ +#include "../include/clu_subs.h" +#endif + +using namespace std; + +void cnf(int n, dcomplex z, int nm, dcomplex *csj, dcomplex *csy) { + /* + FROM CSPHJY OF LIBRARY specfun + + ========================================================== + Purpose: Compute spherical Bessel functions y + Input : Z --- Complex argument of y + N --- Order of y ( N = 0,1,2,... ) + CSJ(N+1) --- j + NM --- Highest order computed + Output: CSY(N+1) --- y + ========================================================== + */ + double a0 = cabs(z); + if (a0 < 1.0e-60) { + for (int k = 0; k <= n; k++) csy[k] = -1.0e300; + } else { + csy[0] = -ccos(z) / z; + if (n > 0) { + csy[1] = (csy[0] - csin(z)) / z; + if (n > 1) { + for (int k = 2; k <= nm; k++) { + double absjk = cabs(csj[k - 1]); + double absjkmo = cabs(csj[k - 2]); + csy[k] = (absjk > absjkmo) ? + (csj[k] * csy[k - 1] - 1.0 / (z * z)) / csj[k - 1] : + (csj[k] * csy[k - 2] - (2.0 * k - 1.0) / (z * z * z)) / csj[k - 2]; + } + } + } + } +} + +void exma(dcomplex **am, ParticleDescriptor *c1) { + const dcomplex cc0 = 0.0 + I * 0.0; + dcomplex **at = c1->at; + const int ndit = c1->ndit; + const int ndm = c1->ndm; + for (int j20 = 1; j20 <= c1->nlemt; j20++) { + int j0 = j20 + ndit; + for (int i20 = 1; i20 <= c1->nlemt; i20++) { + dcomplex sum = cc0; + for (int k = 0; k < ndm; k++) sum += at[i20 - 1][k] * am[k][j0 - 1]; + c1->am0m[i20 - 1][j20 - 1] = sum; + } + } // j20 loop +} + +void incms(dcomplex **am, double enti, ParticleDescriptor *c1) { + const dcomplex cc0 = 0.0 + I * 0.0; + dcomplex **at = c1->at; + int nbl, i1; + const int ndi = c1->ndi; + const int ndit = ndi + ndi; + const int ndm = c1->ndm; + nbl = 0; + for (int n1 = 1; n1 < c1->nsph; n1++) { + int in1 = (n1 - 1) * c1->nlim; + int n1po = n1 + 1; + for (int n2 = n1po; n2 <= c1->nsph; n2++) { + int in2 = (n2 - 1) * c1->nlim; + nbl++; + for (int l1 = 1; l1 <= c1->li; l1++) { + int l1po = l1 + 1; + int il1 = l1po * l1; + int l1tpo = l1po + l1; + for (int im1 = 1; im1 <= l1tpo; im1++) { + int m1 = im1 - l1po; + int ilm1 = il1 + m1; + int ilm1e = ilm1 + ndi; + i1 = in1 + ilm1; + int i1e = in1 + ilm1e; + int j1 = in2 + ilm1; + int j1e = in2 + ilm1e; + for (int l2 = 1; l2 <= c1->li; l2++) { + int l2po = l2 + 1; + int il2 = l2po * l2; + int l2tpo = l2po + l2; + int ish = ((l2 + l1) % 2 == 0) ? 1 : -1; + int isk = -ish; + for (int im2 = 1; im2 <= l2tpo; im2++) { + int m2 = im2 - l2po; + int ilm2 = il2 + m2; + int ilm2e = ilm2 + ndi; + int i2 = in2 + ilm2; + int i2e = in2 + ilm2e; + int j2 = in1 + ilm2; + int j2e = in1 + ilm2e; + dcomplex cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1); + dcomplex cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1); + am[i1 - 1][i2 - 1] = cgh; + am[i1 - 1][i2e - 1] = cgk; + am[i1e - 1][i2 - 1] = cgk; + am[i1e - 1][i2e - 1] = cgh; + am[j1 - 1][j2 - 1] = cgh * ish; + am[j1 - 1][j2e - 1] = cgk * isk; + am[j1e - 1][j2 - 1] = cgk * isk; + am[j1e - 1][j2e - 1] = cgh * ish; + } // im2 loop 24 + } // l2 loop 24 + } // im1 loop 24 + } // l1 loop 24 + } // n2 loop 26 + } // n1 loop 26 + for (int n1 = 1; n1 <= c1->nsph; n1++) { + int in1 = (n1 - 1) * c1->nlim; + for (int l1 = 1; l1 <= c1->li; l1++) { + dcomplex frm = c1->rmi[l1 - 1][n1 - 1]; + dcomplex fre = c1->rei[l1 - 1][n1 - 1]; + int l1po = l1 + 1; + int il1 = l1po * l1; + int l1tpo = l1po + l1; + for (int im1 = 1; im1 <= l1tpo; im1++) { + int m1 = im1 - l1po; + int ilm1 = il1 + m1; + i1 = in1 + ilm1; + int i1e = i1 + ndi; + for (int ilm2 = 1; ilm2 <= c1->nlim; ilm2++) { + int i2 = in1 + ilm2; + int i2e = i2 + ndi; + am[i1 - 1][i2 - 1] = cc0; + am[i1 - 1][i2e - 1] = cc0; + am[i1e - 1][i2 - 1] = cc0; + am[i1e - 1][i2e - 1] = cc0; + } // ilm2 loop 28 + am[i1 - 1][i1 - 1] = frm; + am[i1e - 1][i1e - 1]= fre; + } // im1 loop 30 + } // l1 loop 30 + } // n1 loop 30 + int nditpo = ndit + 1; + for (i1 = 1; i1 <= c1->nlemt; i1++) { + int i3 = i1 + ndit; + for (int i2 = nditpo; i2 <= ndm; i2++) { + am[i3 - 1][i2 - 1] = cc0; + at[i1 - 1][i2 - 1] = cc0; + } // i2 loop 40 + } // i1 loop 40 + i1 = 0; + for (int l1 = 1; l1 <= c1->le; l1++) { + dcomplex frm = c1->rm0[l1 - 1]; + dcomplex fre = c1->re0[l1 - 1]; + dcomplex ftm = c1->tm0[l1 - 1]; + dcomplex fte = c1->te0[l1 - 1]; + int l1tpo = l1 + l1 + 1; + for (int im1 = 1; im1 <= l1tpo; im1 ++) { + i1++; + int i1e = i1 + c1->nlem; + int i3 = i1 + ndit; + int i3e = i3 + c1->nlem; + am[i3 - 1][i3 - 1] = frm; + am[i3e - 1][i3e - 1] = fre; + at[i1 - 1][i3 - 1] = ftm; + at[i1e - 1][i3e - 1] = fte; + } // im1 loop 45 + } // l1 loop 45 + if (enti != 0.0) { + for (int l2 = 1; l2 <= c1->le; l2++) { + dcomplex frm = c1->rmw[l2 - 1]; + dcomplex fre = c1->rew[l2 - 1]; + dcomplex ftm = c1->tm[l2 - 1]; + dcomplex fte = c1->te[l2 - 1]; + int l2po = l2 + 1; + int il2 = l2po * l2; + int l2tpo = l2po + l2; + for (int im2 = 1; im2 <= l2tpo; im2++) { + int m2 = im2 - l2po; + int i2 = il2 + m2; + int j2 = il2 - m2; + int i2e = i2 + c1->nlem; + int j2e = j2 + c1->nlem; + int i3 = i2 + ndit; + int j3 = j2 + ndit; + int i3e = i3 + c1->nlem; + int j3e = j3 + c1->nlem; + for (int n1 = 1; n1 <= c1->nsph; n1++) { + int in1 = (n1 - 1) * c1->nlim; + for (int l1 = 1; l1 <= c1->li; l1 ++) { + int l1po = l1 + 1; + int il1 = l1po * l1; + int l1tpo = l1po + l1; + for (int im1 = 1; im1 <= l1tpo; im1++) { + int m1 = im1 - l1po; + int ilm1 = il1 + m1; + int jlm1 = il1 - m1; + i1 = in1 + ilm1; + int i1e = i1 + ndi; + int j1 = in1 + jlm1; + int j1e = j1 + ndi; + int isil = ((m2 + m1) % 2 == 0) ? 1 : -1; + dcomplex cgi = ghit(2, 0, n1, l1, m1, l2, m2, c1); + dcomplex cgl = ghit(2, 1, n1, l1, m1, l2, m2, c1); + am[i1 - 1][i3 - 1] = cgi; + am[i1 - 1][i3e - 1] = cgl; + am[i1e - 1][i3 - 1] = cgl; + am[i1e - 1][i3e - 1] = cgi; + am[j3 - 1][j1 - 1] = cgi * frm * isil; + am[j3 - 1][j1e - 1] = cgl * frm * isil; + am[j3e - 1][j1 - 1] = cgl * fre * isil; + am[j3e - 1][j1e - 1] = cgi * fre * isil; + at[j2 - 1][j1 - 1] = cgi * ftm * isil; + at[j2 - 1][j1e - 1] = cgl * ftm * isil; + at[j2e - 1][j1 - 1] = cgl * fte * isil; + at[j2e - 1][j1e - 1] = cgi * fte * isil; + // returns + } // im1 loop 50 + } // l1 loop 50 + } // n1 loop 50 + } // im2 loop 50 + } // l2 loop 50 + } else { + // label 55 + int i2 = 0; + for (int l2 = 1; l2 <= c1->le; l2++) { + dcomplex frm = c1->rmw[l2 - 1]; + dcomplex fre = c1->rew[l2 - 1]; + dcomplex ftm = c1->tm[l2 - 1]; + dcomplex fte = c1->te[l2 - 1]; + int l2tpo = l2 + l2 + 1; + int m2 = -l2 - 1; + for (int im2 = 1; im2 <= l2tpo; im2++) { + m2++; + i2++; + int i2e = i2 + c1->nlem; + int i3 = i2 + ndit; + int i3e = i3 + c1->nlem; + i1 = 0; + for (int n1 = 1; n1 <= c1->nsph; n1++) { + for (int l1 = 1; l1 <= c1->li; l1++) { + int l1tpo = l1 + l1 + 1; + int m1 = -l1 - 1; + for (int im1 = 1; im1 <= l1tpo; im1++) { + m1++; + i1++; + int i1e = i1 + ndi; + dcomplex cgi = ghit(2, 0, n1, l1, m1, l2, m2, c1); + dcomplex cgl = ghit(2, 1, n1, l1, m1, l2, m2, c1); + am[i1 - 1][i3 - 1] = cgi; + am[i1 - 1][i3e - 1] = cgl; + am[i1e - 1][i3 - 1] = cgl; + am[i1e - 1][i3e - 1] = cgi; + cgi = dconjg(cgi); + cgl = dconjg(cgl); + am[i3 - 1][i1 - 1] = cgi * frm; + am[i3 - 1][i1e - 1] = cgl * frm; + am[i3e - 1][i1 - 1] = cgl * fre; + am[i3e - 1][i1e - 1] = cgi * fre; + at[i2 - 1][i1 - 1] = cgi * ftm; + at[i2 - 1][i1e - 1] = cgl * ftm; + at[i2e - 1][i1 - 1] = cgl * fte; + at[i2e - 1][i1e - 1] = cgi * fte; + } // im1 loop 60 + } // l1 loop 60 + } // n1 loop 60 + } // im2 loop 60 + } // l2 loop 60 + } // END OF enti = 0.0 CASE +} + +void indme( + int i, int npnt, int npntts, double vk, dcomplex ent, double enti, + dcomplex entn, int &jer, int &lcalc, dcomplex &arg, ParticleDescriptor *c1) { + const dcomplex uim = 0.0 + I * 1.0; + const int nstp = npnt - 1; + const int nstpts = npntts - 1; + const int lipo = c1->li + 1; + const int lipt = c1->li + 2; + dcomplex *cfj = new dcomplex[lipt](); + dcomplex *cfn = new dcomplex[lipt](); + dcomplex *fb = new dcomplex[lipt](); + dcomplex *fbi = new dcomplex[lipt](); + dcomplex *fn = new dcomplex[lipt](); + double *rfj = new double[lipt](); + double *rfn = new double[lipt](); + jer = 0; + double sz = vk * c1->ros[i - 1]; + c1->vsz[i - 1] = sz; + double vkr1 = vk * c1->rc[i - 1][0]; + int nsh = c1->nshl[i - 1]; + c1->vkt[i - 1] = csqrt(c1->dc0[0]); + arg = vkr1 * c1->vkt[i - 1]; + dcomplex arin = arg; + if (imag(arg) != 0.0) { + cbf(lipo, arg, lcalc, cfj); + if (lcalc < lipo) { + jer = 5; + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + return; + } + // label 122 + for (int j124 = 0; j124 < lipt; j124++) fbi[j124] = cfj[j124]; + } else { // label 126 + double rarg = real(arg); + rbf(lipo, rarg, lcalc, rfj); + if (lcalc < lipo) { + jer = 5; + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + return; + } + // label 128 + for (int j130 = 0; j130 < lipt; j130++) fbi[j130] = rfj[j130]; + } + // label 132 + dcomplex aris = sz * entn; + arg = aris; + if (enti != 0.0) { + cbf(lipo, arg, lcalc, cfj); + if (lcalc < lipo) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + return; + } + cnf(lipo, arg, lcalc, cfj, cfn); + // QUESTION: should we check for lcalc and throw JER=12 if failing? + // see lines 2492 -2505 in INCLU.F (test done in REAL case but not in COMPLEX case). + for (int j143 = 0; j143 < lipt; j143++) { + fb[j143] = cfj[j143]; + fn[j143] = cfn[j143]; + } + } else { // label 145 + double rarg = real(aris); + rbf(lipo, rarg, lcalc, rfj); + if (lcalc < lipo) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + return; + } + rnf(lipo, rarg, lcalc, rfn); + if (lcalc < lipo) { + jer = 12; + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + return; + } + for (int j150 = 0; j150 < lipt; j150++) { + fb[j150] = rfj[j150]; + fn[j150] = rfn[j150]; + } + } + // label 152 + dcomplex *rmf = new dcomplex[c1->li](); + dcomplex *drmf = new dcomplex[c1->li](); + dcomplex *ref = new dcomplex[c1->li](); + dcomplex *dref = new dcomplex[c1->li](); + int ic = 0; + if (nsh < 2) { // nsh == 1 + dcomplex cri = c1->dc0[0] / ent; + for (int l160 = 1; l160 <= c1->li; l160++) { + int lpo = l160 + 1; + int ltpo = lpo + l160; + int lpt = lpo + 1; + dcomplex dfbi = (l160 * fbi[l160 - 1] - lpo * fbi[lpt - 1]) * arin + fbi[lpo - 1] * ltpo; + dcomplex dfb = (l160 * fb[l160 - 1] - lpo * fb[lpt - 1]) * aris + fb[lpo - 1] * ltpo; + dcomplex dfn = (l160 * fn[l160 - 1] - lpo * fn[lpt - 1]) * aris + fn[lpo - 1] * ltpo; + dcomplex ccna = fbi[lpo - 1] * dfn; + dcomplex ccnb = fn[lpo - 1] * dfbi; + dcomplex ccnc = fbi[lpo - 1] * dfb; + dcomplex ccnd = fb[lpo - 1] * dfbi; + c1->rmi[l160 - 1][i - 1] = 1.0 + uim * (ccna - ccnb) / (ccnc - ccnd); + c1->rei[l160 - 1][i - 1] = 1.0 + uim * (cri * ccna - ccnb) / (cri * ccnc - ccnd); + } // l160 loop + } else { // label 165: nsh > 1 + for (int l180 = 1; l180 <= c1->li; l180++) { + int lpo = l180 + 1; + int ltpo = lpo + l180; + int lpt = lpo + 1; + double dltpo = 1.0 * ltpo; + dcomplex y1 = fbi[lpo - 1]; + dcomplex dy1 = (l180 * fbi[l180 - 1] - lpo * fbi[lpt - 1]) * c1->vkt[i - 1] / dltpo; + dcomplex y2 = y1; + dcomplex dy2 = dy1; + ic = 0; + for (int ns = 2; ns <= nsh; ns++) { + int nsmo = ns - 1; + double vkr = vk * c1->rc[i - 1][nsmo - 1]; + if (ns % 2 != 0) { + // ic is incremented before being read in this loop. + int step = vk * (c1->rc[i - 1][ns - 1] - c1->rc[i - 1][nsmo - 1]) / nstp; + arg = c1->dc0[++ic]; + rkc(nstp, step, arg, vkr, lpo, y1, y2, dy1, dy2); + } else { // label 170 + diel(nstpts, nsmo, i, ic, vk, c1); + double stepts = vk * (c1->rc[i - 1][ns - 1] - c1->rc[i - 1][nsmo - 1]) / nstpts; + rkt(nstpts, stepts, vkr, lpo, y1, y2, dy1, dy2, c1); + } + } // ns loop 176 + rmf[l180 - 1] = y1 * sz; + drmf[l180 - 1] = dy1 * sz + y1; + ref[l180 - 1] = y2 * sz; + dref[l180 - 1] = dy2 * sz + y2; + } // l180 loop + dcomplex cri = (nsh % 2 == 0) ? 1.0 + I * 0.0 : c1->dc0[ic - 1] / ent; + for (int l190 = 1; l190 <= c1->li; l190++) { + int lpo = l190 + 1; + int ltpo = lpo + l190; + int lpt = lpo + 1; + dcomplex dfb = (l190 * fb[l190 - 1] - lpo * fb[lpt - 1]) * aris + fb[lpo - 1] * ltpo; + dcomplex dfn = (l190 * fn[l190 - 1] - lpo * fn[lpt - 1]) * aris + fn[lpo - 1] * ltpo; + dcomplex ccna = rmf[l190 - 1] * dfn; + dcomplex ccnb = drmf[l190 - 1] * fn[lpo - 1] * sz * ltpo; + dcomplex ccnc = rmf[l190 - 1] * dfb; + dcomplex ccnd = drmf[l190 - 1] * fb[lpo - 1]* sz * ltpo; + c1->rmi[l190 - 1][i -1] = 1.0 + uim * (ccna - ccnb) / (ccnc - ccnd); + ccna = ref[l190 - 1] * dfn; + ccnb = dref[l190 - 1] * fn[lpo] * sz * ltpo; + ccnc = ref[l190 - 1] * dfb; + ccnd = dref[l190 - 1] * fb[lpo - 1] * sz * ltpo; + c1->rei[l190 - 1][i - 1] =1.0 + uim * (cri * ccna - ccnb) / (cri * ccnc - ccnd); + } // l190 loop + } // nsh if + delete[] cfj; + delete[] cfn; + delete[] fb; + delete[] fbi; + delete[] fn; + delete[] rfj; + delete[] rfn; + delete[] rmf; + delete[] drmf; + delete[] ref; + delete[] dref; +} + +void instr(ScattererConfiguration *sconf, ParticleDescriptor *c1) { + const int ylm_size = (c1->litpos > c1->lmtpos) ? c1->litpos : c1->lmtpos; + dcomplex *ylm = new dcomplex[ylm_size](); + double rx, ry, rz, rr, crth, srth, crph, srph; + int ivy; + for (int i18 = 0; i18 < c1->nsph; i18++) { + int i = i18 + 1; + if (c1->iog[i18] >= i) { + int nsh = c1->nshl[i18]; + for (int j = 0; j < nsh; j++) + c1->rc[i18][j] = sconf->get_rcf(i18, j) * c1->ros[i18]; + } + } // i18 loop + int i = 0; + for (int l1po = 1; l1po <= c1->lmpo; l1po++) { + int l1 = l1po - 1; + for (int l2 = 1; l2 <= c1->lm; l2++) { + r3j000(l1, l2, c1->rac3j); + c1->ind3j[l1po - 1][l2 - 1] = i; + int lmnpo = 1 + ((l2 - l1 > 0) ? l2 - l1 : l1 - l2); + int lmxpo = l2 + l1 + 1; + int il = 0; + int lpo = lmnpo; + while (lpo <= lmxpo) { + c1->v3j0[i++] = c1->rac3j[il++]; + lpo += 2; + } + } // l2 loop + } // l1po loop 28 + int nsphmo = c1->nsph - 1; + int lit = c1->li + c1->li; + ivy = 0; + for (int nf40 = 0; nf40 < nsphmo; nf40++) { + int nf = nf40 + 1; + for (int ns = nf; ns < c1->nsph; ns++) { + rx = c1->rxx[nf40] - c1->rxx[ns]; + ry = c1->ryy[nf40] - c1->ryy[ns]; + rz = c1->rzz[nf40] - c1->rzz[ns]; + polar(rx, ry, rz, rr, crth, srth, crph, srph); + sphar(crth, srth, crph, srph, lit, ylm); + for (int iv38 = 0; iv38 < c1->litpos; iv38++) + c1->vyhj[iv38 + ivy] = dconjg(ylm[iv38]); + ivy += c1->litpos; + } // ns loop + } // nf40 loop + int lmt = c1->li + c1->le; + ivy = 0; + for (int nf50 = 0; nf50 < c1->nsph; nf50++) { + rx = c1->rxx[nf50]; + ry = c1->ryy[nf50]; + rz = c1->rzz[nf50]; + if (rx != 0.0 || ry != 0.0 || rz != 0.0) { + polar(rx, ry, rz, rr, crth, srth, crph, srph); + sphar(crth, srth, crph, srph, lmt, ylm); + for (int iv48 = 0; iv48 < c1->lmtpos; iv48++) + c1->vyj0[iv48 + ivy] = dconjg(ylm[iv48]); + ivy += c1->lmtpos; + } + } // nf50 loop + delete[] ylm; +} + +void ospv(ParticleDescriptor *c1, double vk, double sze, double exri, dcomplex entn, double enti, int &jer, int &lcalc, dcomplex &arg) { + const dcomplex uim = 0.0 + I * 1.0; + const int nsph = c1->nsph; + const int nsphmo = c1->nsph - 1; + const int lit = c1->li + c1->li; + const int litpo = lit + 1; + const int array_size = (c1->litpo > c1->lmtpo) ? c1->litpo : c1->lmtpo; + dcomplex *cfj = new dcomplex[array_size](); + dcomplex *cfn = new dcomplex[array_size](); + double *rfj = new double[array_size](); + double *rfn = new double[array_size](); + + int ivhb = 0; + for (int i130 = 1; i130 <= nsphmo; i130++) { + int ipo = i130 + 1; + for (int j130 = ipo; j130 <= nsph; j130++) { + double rx = c1->rxx[j130 - 1] - c1->rxx[i130 - 1]; + double ry = c1->ryy[j130 - 1] - c1->ryy[i130 - 1]; + double rz = c1->rzz[j130 - 1] - c1->rzz[i130 - 1]; + double rr = sqrt(rx * rx + ry * ry + rz * rz); + arg = rr * vk * entn; + if (enti != 0.0) { + cbf(lit, arg, lcalc, cfj); + if (lcalc < lit) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + return; + } + cnf(lit, arg, lcalc, cfj, cfn); + for (int lpo = 0; lpo < litpo; lpo++) c1->vh[lpo + ivhb] = cfj[lpo] + uim * cfn[lpo]; + // goes to 130 + } else { // label 123 + double rarg = real(arg); + rbf(lit, rarg, lcalc, rfj); + if (lcalc < lit) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + return; + } + rnf(lit, rarg, lcalc, rfn); + if (lcalc < lit) { + jer = 12; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + return; + } + for (int lpo = 0; lpo < litpo; lpo++) c1->vh[lpo + ivhb] = rfj[lpo] + uim * rfn[lpo]; + } + // label 130 + ivhb += litpo; + } // j130 loop + } // i130 loop + const int lmt = c1->li + c1->le; + const int lmtpo = lmt + 1; + ivhb = 0; + for (int i155 = 1; i155 <= nsph; i155++) { + double rx = c1->rxx[i155 - 1]; + double ry = c1->ryy[i155 - 1]; + double rz = c1->rzz[i155 - 1]; + if (rx != 0.0 || ry != 0.0 || rz != 0.0) { + double rr = sqrt(rx * rx + ry * ry + rz * rz); + arg = rr * vk * entn; + if (enti != 0.0) { + cbf(lmt, arg, lcalc, cfj); + if (lcalc < lmt) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + return; + } + for (int lpo = 0; lpo < lmtpo; lpo++) c1->vj0[lpo + ivhb] = cfj[lpo]; + // goes to 155 + } else { // label 150 + double rarg = real(arg); + rbf(lmt, rarg, lcalc, rfj); + if (lcalc < lmt) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + return; + } + for (int lpo = 0; lpo < lmtpo; lpo++) c1->vj0[lpo + ivhb] = rfj[lpo]; + } + } + // label 155 + ivhb += lmtpo; + } // i155 loop + + const int lepo = c1->le + 1; + const int lept = c1->le + 2; + dcomplex *fb0 = new dcomplex[lept](); + dcomplex *fh0 = new dcomplex[lept](); + dcomplex aris0 = sze * entn; + arg = aris0; + if (enti != 0.0) { + cbf(lepo, arg, lcalc, cfj); + if (lcalc < lepo) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + return; + } + cnf(lepo, arg, lcalc, cfj, cfn); + for (int j162 = 0; j162 < lept; j162++) { + fb0[j162] = cfj[j162]; + fh0[j162] = cfj[j162] + uim * cfn[j162]; + } // j162 loop + // goes to 170 + } else { // label 163 + double rarg = real(arg); + rbf(lepo, rarg, lcalc, rfj); + if (lcalc < lepo) { + jer = 11; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + return; + } + rnf(lepo, rarg, lcalc, rfn); + if (lcalc < lepo) { + jer = 12; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + return; + } + for (int j168 = 0; j168 < lept; j168++) { + fb0[j168] = rfj[j168]; + fh0[j168] = rfj[j168] + uim * rfn[j168]; + } // j168 loop + } + // label 170 + double arex = sze * exri; + arg = arex; + double rarg = arex; + rbf(lepo, rarg, lcalc, rfj); + if (lcalc < lepo) { + jer = 1; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + return; + } + rnf(lepo, rarg, lcalc, rfn); + if (lcalc < lepo) { + jer = 2; + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + return; + } + dcomplex *fbe = new dcomplex[lept](); + dcomplex *fhe = new dcomplex[lept](); + for (int j175 = 0; j175 < lept; j175++) { + fbe[j175] = rfj[j175]; + fhe[j175] = rfj[j175] + uim * rfn[j175]; + } // j175 loop + dcomplex cri = exri / entn; + for (int l184 = 1; l184 <= c1->le; l184++) { + int lpo = l184 + 1; + int lpt = lpo + 1; + double dltpo = 1.0 / (lpo + l184); + dcomplex dfb0 = fb0[lpo - 1] + (l184 * fb0[l184 - 1] - lpo * fb0[lpt - 1]) * aris0 * dltpo; + dcomplex dfh0 = fh0[lpo - 1] + (l184 * fh0[l184 - 1] - lpo * fh0[lpt - 1]) * aris0 * dltpo; + dcomplex dfbe = fbe[lpo - 1] + (l184 * fbe[l184 - 1] - lpo * fbe[lpt - 1]) * arex * dltpo; + dcomplex dfhe = fhe[lpo - 1] + (l184 * fhe[l184 - 1] - lpo * fhe[lpt - 1]) * arex * dltpo; + dcomplex ccna = aris0 * fb0[lpo - 1] * dfhe; + dcomplex ccnb = arex * fhe[lpo - 1] * dfb0; + c1->rm0[l184 - 1] = -(ccna * cri - ccnb) * uim; + c1->re0[l184 - 1] = -(ccna - ccnb * cri) * uim; + ccna = aris0 * fh0[lpo - 1] * dfhe; + ccnb = arex * fhe[lpo - 1] * dfh0; + c1->rmw[l184 - 1] = -(ccna * cri - ccnb) * uim; + c1->rew[l184 - 1] = -(ccna - ccnb * cri) * uim; + ccna = aris0 * fh0[lpo - 1] * dfbe; + ccnb = arex * fbe[lpo - 1] * dfh0; + c1->tm[l184 - 1] = (ccna * cri - ccnb) * uim; + c1->te[l184 - 1] = (ccna - ccnb * cri) * uim; + ccna = aris0 * fb0[lpo - 1] * dfbe; + ccnb = arex * fbe[lpo - 1] * dfb0; + c1->tm0[l184 - 1] = (ccna * cri - ccnb) * uim; + c1->te0[l184 - 1] = (ccna - ccnb * cri) * uim; + } // l184 loop + + // Clean up memory. + delete[] cfj; + delete[] cfn; + delete[] rfj; + delete[] rfn; + delete[] fb0; + delete[] fh0; + delete[] fbe; + delete[] fhe; +} diff --git a/src/libnptm/sph_subs.cpp b/src/libnptm/sph_subs.cpp index d24a0fb6c09bd4e78801666965a6c2af64a1928e..a67d2adda340d6ccb6ff78b36297784fbe0d6d7f 100644 --- a/src/libnptm/sph_subs.cpp +++ b/src/libnptm/sph_subs.cpp @@ -99,8 +99,7 @@ void cbf(int n, dcomplex z, int &nm, dcomplex *csj) { * point for backward recurrence * ========================================================== */ - double zz = real(z) * real(z) + imag(z) * imag(z); - double a0 = sqrt(zz); + double a0 = cabs(z); nm = n; if (a0 < 1.0e-60) { for (int k = 2; k <= n + 1; k++) { @@ -113,7 +112,7 @@ void cbf(int n, dcomplex z, int &nm, dcomplex *csj) { if (n == 0) { return; } - csj[1] = (csj[0] -ccos(z)) / z; + csj[1] = (csj[0] - ccos(z)) / z; if (n == 1) { return; } @@ -197,28 +196,28 @@ double cg1(int lmpml, int mu, int l, int m) { return result; } -void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1, C2 *c2) { +void diel(int npntmo, int ns, int i, int ic, double vk, ParticleDescriptor *c1) { const double dif = c1->rc[i - 1][ns] - c1->rc[i - 1][ns - 1]; const double half_step = 0.5 * dif / npntmo; double rr = c1->rc[i - 1][ns - 1]; - const dcomplex delta = c2->dc0[ic] - c2->dc0[ic - 1]; + const dcomplex delta = c1->dc0[ic] - c1->dc0[ic - 1]; const int kpnt = npntmo + npntmo; - c2->ris[kpnt] = c2->dc0[ic]; - c2->dlri[kpnt] = 0.0 + 0.0 * I; + c1->ris[kpnt] = c1->dc0[ic]; + c1->dlri[kpnt] = 0.0 + 0.0 * I; const int i90 = i - 1; const int ns90 = ns - 1; const int ic90 = ic - 1; for (int np90 = 0; np90 < kpnt; np90++) { double ff = (rr - c1->rc[i90][ns90]) / dif; - c2->ris[np90] = delta * ff * ff * (-2.0 * ff + 3.0) + c2->dc0[ic90]; - c2->dlri[np90] = 3.0 * delta * ff * (1.0 - ff) / (dif * vk * c2->ris[np90]); + c1->ris[np90] = delta * ff * ff * (-2.0 * ff + 3.0) + c1->dc0[ic90]; + c1->dlri[np90] = 3.0 * delta * ff * (1.0 - ff) / (dif * vk * c1->ris[np90]); rr += half_step; } } void dme( int li, int i, int npnt, int npntts, double vk, double exdc, double exri, - ParticleDescriptor *c1, C2 *c2, int &jer, int &lcalc, dcomplex &arg, int last_conf + ParticleDescriptor *c1, int &jer, int &lcalc, dcomplex &arg, int last_conf ) { const int lipo = li + 1; const int lipt = li + 2; @@ -234,11 +233,11 @@ void dme( int nstp = npnt - 1; int nstpts = npntts - 1; double sz = vk * c1->ros[sph_index - 1]; - c2->vsz[i - 1] = sz; + c1->vsz[i - 1] = sz; double vkr1 = vk * c1->rc[sph_index - 1][0]; int nsh = c1->nshl[sph_index - 1]; - c2->vkt[i - 1] = csqrt(c2->dc0[0]); - arg = vkr1 * c2->vkt[i - 1]; + c1->vkt[i - 1] = csqrt(c1->dc0[0]); + arg = vkr1 * c1->vkt[i - 1]; arin = arg; bool goto32 = false; if (imag(arg) != 0.0) { @@ -283,7 +282,7 @@ void dme( fn[j43 - 1] = rfn[j43 - 1]; } if (nsh <= 1) { - cri = c2->dc0[0] / exdc; + cri = c1->dc0[0] / exdc; for (int l60 = 1; l60 <= li; l60++) { int lpo = l60 + 1; int ltpo = lpo + l60; @@ -306,7 +305,7 @@ void dme( int lpt = lpo + 1; int dltpo = ltpo; y1 = fbi[lpo - 1]; - dy1 = ((1.0 * l80) * fbi[l80 - 1] - (1.0 * lpo) * fbi[lpt - 1]) * c2->vkt[i - 1] / (1.0 * dltpo); + dy1 = ((1.0 * l80) * fbi[l80 - 1] - (1.0 * lpo) * fbi[lpt - 1]) * c1->vkt[i - 1] / (1.0 * dltpo); y2 = y1; dy2 = dy1; ic = 1; @@ -317,13 +316,13 @@ void dme( ic += 1; double step = 1.0 * nstp; step = vk * (c1->rc[i - 1][ns76 - 1] - c1->rc[i - 1][nsmo - 1]) / step; - arg = c2->dc0[ic - 1]; + arg = c1->dc0[ic - 1]; rkc(nstp, step, arg, vkr, lpo, y1, y2, dy1, dy2); } else { - diel(nstpts, nsmo, i, ic, vk, c1, c2); + diel(nstpts, nsmo, i, ic, vk, c1); double stepts = 1.0 * nstpts; stepts = vk * (c1->rc[i - 1][ns76 - 1] - c1->rc[i - 1][nsmo - 1]) / stepts; - rkt(nstpts, stepts, vkr, lpo, y1, y2, dy1, dy2, c2); + rkt(nstpts, stepts, vkr, lpo, y1, y2, dy1, dy2, c1); } } rmf[l80 - 1] = y1 * sz; @@ -332,7 +331,7 @@ void dme( dref[l80 - 1] = dy2 * sz + y2; } cri = 1.0 + uim * 0.0; - if (nsh % 2 != 0) cri = c2->dc0[ic - 1] / exdc; + if (nsh % 2 != 0) cri = c1->dc0[ic - 1] / exdc; for (int l90 = 1; l90 <= li; l90++) { int lpo = l90 + 1; int ltpo = lpo + l90; @@ -347,7 +346,7 @@ void dme( ccna = ref[l90 - 1] * dfn; ccnb = dref[l90 - 1] * fn[lpo - 1] * (1.0 * sz * ltpo); ccnc = ref[l90 - 1] * dfb; - ccnd = dref[l90 - 1] *fb[lpo - 1] * (1.0 * sz * ltpo); + ccnd = dref[l90 - 1] * fb[lpo - 1] * (1.0 * sz * ltpo); c1->rei[l90 - 1][i - 1] = 1.0 + uim * (cri * ccna - ccnb) / (cri * ccnc - ccnd); } } // nsh <= 1 ? @@ -725,7 +724,7 @@ void rkc( void rkt( int npntmo, double step, double &x, int lpo, dcomplex &y1, - dcomplex &y2, dcomplex &dy1, dcomplex &dy2, C2 *c2 + dcomplex &y2, dcomplex &dy1, dcomplex &dy2, ParticleDescriptor *c1 ) { dcomplex cy1, cdy1, c11, cy23, cdy23, yc2, c12, c13; dcomplex cy4, cdy4, yy, c14, c21, c22, c23, c24; @@ -735,34 +734,34 @@ void rkt( int ipnt = ipnt60 + 1; int jpnt = ipnt + ipnt - 1; int jpnt60 = jpnt - 1; - cy1 = cl / (x * x) - c2->ris[jpnt60]; + cy1 = cl / (x * x) - c1->ris[jpnt60]; cdy1 = -2.0 / x; c11 = (cy1 * y1 + cdy1 * dy1) * step; double xh = x + half_step; int jpntpo = jpnt + 1; - cy23 = cl / (xh * xh) - c2->ris[jpnt]; + cy23 = cl / (xh * xh) - c1->ris[jpnt]; cdy23 = -2.0 / xh; yc2 = y1 + dy1 * half_step; c12 = (cy23 * yc2 + cdy23 * (dy1 + 0.5 * c11)) * step; c13= (cy23 * (yc2 + 0.25 * c11 *step) + cdy23 * (dy1 + 0.5 * c12)) * step; double xn = x + step; //int jpntpt = jpnt + 2; - cy4 = cl / (xn * xn) - c2->ris[jpntpo]; + cy4 = cl / (xn * xn) - c1->ris[jpntpo]; cdy4 = -2.0 / xn; yy = y1 + dy1 * step; c14 = (cy4 * (yy + 0.5 * c12 * step) + cdy4 * (dy1 + c13)) * step; y1= yy + (c11 + c12 + c13) * step / 6.0; dy1 += (0.5 * c11 + c12 + c13 + 0.5 * c14) /3.0; - cy1 -= cdy1 * c2->dlri[jpnt60]; - cdy1 += 2.0 * c2->dlri[jpnt60]; + cy1 -= cdy1 * c1->dlri[jpnt60]; + cdy1 += 2.0 * c1->dlri[jpnt60]; c21 = (cy1 * y2 + cdy1 * dy2) * step; - cy23 -= cdy23 * c2->dlri[jpnt]; - cdy23 += 2.0 * c2->dlri[jpnt]; + cy23 -= cdy23 * c1->dlri[jpnt]; + cdy23 += 2.0 * c1->dlri[jpnt]; yc2 = y2 + dy2 * half_step; c22 = (cy23 * yc2 + cdy23 * (dy2 + 0.5 * c21)) * step; c23 = (cy23 * (yc2 + 0.25 * c21 * step) + cdy23 * (dy2 + 0.5 * c22)) * step; - cy4 -= cdy4 * c2->dlri[jpntpo]; - cdy4 += 2.0 * c2->dlri[jpntpo]; + cy4 -= cdy4 * c1->dlri[jpntpo]; + cdy4 += 2.0 * c1->dlri[jpntpo]; yy = y2 + dy2 * step; c24 = (cy4 * (yc2 + 0.5 * c22 * step) + cdy4 * (dy2 + c23)) * step; y2 = yy + (c21 + c22 + c23) * step / 6.0; diff --git a/src/sphere/sphere.cpp b/src/sphere/sphere.cpp index e90fcae87699b3ed032b67a871390708da72de3d..9f8c661698188683e17ea41db885f12f1fd658b3 100644 --- a/src/sphere/sphere.cpp +++ b/src/sphere/sphere.cpp @@ -147,7 +147,6 @@ void sphere(const string& config_file, const string& data_file, const string& ou double sc_phi_start = gconf->sc_phi_start; double sc_phi_step = gconf->sc_phi_step; double sc_phi_end = gconf->sc_phi_end; - C2 *c2 = new C2(gconf, sconf); argi = new double[1]; args = new double[1]; gaps = new double[2]; @@ -317,18 +316,18 @@ void sphere(const string& config_file, const string& data_file, const string& ou int ici = (nsh + 1) / 2; if (idfc == 0) { for (int ic = 0; ic < ici; ic++) - c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested! + c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); // WARNING: IDFC=0 is not tested! } else { // IDFC != 0 if (jxi == 1) { for (int ic = 0; ic < ici; ic++) { - c2->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); + c1->dc0[ic] = sconf->get_dielectric_constant(ic, i132, jxi488); } } } - if (nsh % 2 == 0) c2->dc0[ici] = exdc; + if (nsh % 2 == 0) c1->dc0[ici] = exdc; int jer = 0; int lcalc = 0; - dme(l_max, i, npnt, npntts, vkarg, exdc, exri, c1, c2, jer, lcalc, arg); + dme(l_max, i, npnt, npntts, vkarg, exdc, exri, c1, jer, lcalc, arg); if (jer != 0) { fprintf(output, " STOP IN DME\n"); fprintf(output, " AT %1d LCALC=%3d TOO SMALL WITH ARG=%15.7lE+i(%15.7lE)\n", jer, lcalc, real(arg), imag(arg)); @@ -337,7 +336,6 @@ void sphere(const string& config_file, const string& data_file, const string& ou delete sconf; delete gconf; delete c1; - delete c2; for (int zi = l_max - 1; zi > -1; zi--) { for (int zj = 0; zj < 3; zj++) { for (int zk = 0; zk < 2; zk++) { @@ -409,14 +407,14 @@ void sphere(const string& config_file, const string& data_file, const string& ou c1->sqexs[i170] *= sqsfi; fprintf(output, " SPHERE %2d\n", i); if (c1->nshl[i170] != 1) { - fprintf(output, " SIZE=%15.7lE\n", c2->vsz[i170]); + fprintf(output, " SIZE=%15.7lE\n", c1->vsz[i170]); } else { fprintf( output, " SIZE=%15.7lE, REFRACTIVE INDEX=%15.7lE%15.7lE\n", - c2->vsz[i170], - real(c2->vkt[i170]), - imag(c2->vkt[i170]) + c1->vsz[i170], + real(c1->vkt[i170]), + imag(c1->vkt[i170]) ); } fprintf(output, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n"); @@ -642,7 +640,6 @@ void sphere(const string& config_file, const string& data_file, const string& ou } fclose(output); delete c1; - delete c2; for (int zi = l_max - 1; zi > -1; zi--) { for (int zj = 0; zj < 3; zj++) { for (int zk = 0; zk < 2; zk++) { diff --git a/src/testing/test_file_io.cpp b/src/testing/test_file_io.cpp new file mode 100644 index 0000000000000000000000000000000000000000..549866af0286ffc2919224754a0cdf7e48999d2e --- /dev/null +++ b/src/testing/test_file_io.cpp @@ -0,0 +1,53 @@ +/* Copyright (C) 2024 INAF - Osservatorio Astronomico di Cagliari + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License is distributed along with + this program in the COPYING file. If not, see: . + */ + +//! \file test_file_io.cpp + +#include +#include +#include +#include + +#ifndef INCLUDE_TYPES_H_ +#include "../include/types.h" +#endif + +#ifndef INCLUDE_ERRORS_H_ +#include "../include/errors.h" +#endif + +#ifndef INCLUDE_LIST_H_ +#include "../include/List.h" +#endif + +#ifndef INCLUDE_FILE_IO_H_ +#include "../include/file_io.h" +#endif + +using namespace std; + +/*! \brief Main program execution body. + */ +int main() { + List *parameter_names = new List(1); + parameter_names->set(0, "Param. zero"); + parameter_names->append("Param. one"); + parameter_names->append("Param. two"); + parameter_names->append("Param. three"); + + delete parameter_names; + return 0; +} diff --git a/test_data/cluster/README.md b/test_data/cluster/README.md index f584dc3bf6588639a27a57e93582fcd424a9d955..d93b52044281e4b639019e2b489593c4792c2da6 100644 --- a/test_data/cluster/README.md +++ b/test_data/cluster/README.md @@ -34,7 +34,7 @@ This directory contains test data for models made up by more than one sphere. v EOF_CODE ``` -were the different lines have the following roles: +where the different lines have the following roles: 1. declare the number of spheres and whether to add an external layer 2. define the external dielectric constant and the scaling configuration 3. define the vector of scales (either explicitly, with one element per row, or in steps, with only first element and step declared) @@ -55,7 +55,7 @@ were the different lines have the following roles: OUTPUT_T-MATRIX_SCALE_NUMBER EOF_CODE ``` -were the different lines have the following roles: +where the different lines have the following roles: 1. general configuration of the scattering problem, with some specification of the transition between materials 2. The vectors of spherical component Cartesian coordinates `xyz` 3. definition of the elevation angle arrays for the incident and scattered radiation fields diff --git a/test_data/inclusion/README.md b/test_data/inclusion/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8da761c37ea89e7c523c68a2d7738f465cb5a023 --- /dev/null +++ b/test_data/inclusion/README.md @@ -0,0 +1,20 @@ +# Folder instructions + +This directory contains test data for models made up by a sphere with inclusions. + +# License + + Copyright (C) 2024 INAF - Osservatorio Astronomico di Cagliari + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + A copy of the GNU General Public License is distributed along with + this program in the COPYING file. If not, see: . diff --git a/test_data/sphere/README.md b/test_data/sphere/README.md index cab016904bddac9d3b2a470d78d7bcb6e995ea34..baff04bce0dd69504435e4d9929191bea110176d 100644 --- a/test_data/sphere/README.md +++ b/test_data/sphere/README.md @@ -23,7 +23,7 @@ This directory contains test data for the single sphere case. v EOF_CODE ``` -were the different lines have the following roles: +where the different lines have the following roles: 1. declare the number of spheres and whether to add an external layer 2. define the external dielectric constant and the scaling configuration 3. define the vector of scales (either explicitly, with one element per row, or in steps, with only first element and step declared) @@ -41,7 +41,7 @@ were the different lines have the following roles: OUTPUT_T-MATRIX_SCALE_NUMBER EOF_CODE ``` -were the different lines have the following roles: +where the different lines have the following roles: 1. general configuration of the scattering problem, with some specification of the transition between materials 2. definition of the elevation angle arrays for the incident and scattered radiation fields 3. definition of the azimuth angle arrays for the incident and scattered radiation fields