diff --git a/src/cluster/Makefile b/src/cluster/Makefile
index f884a43e5634e1486ce9178047e45e91373caacf..48f4547116b322f770182eecd2caf85fd5de512c 100644
--- a/src/cluster/Makefile
+++ b/src/cluster/Makefile
@@ -2,8 +2,12 @@ BUILDDIR=../../build/cluster
 FC=gfortran
 FCFLAGS=-std=legacy -O3
 LFLAGS=
+LFLAGS=
+CXX=g++
+CXXFLAGS=-O0 -ggdb -pg -coverage
+CXXLFLAGS=
 
-all: clu edfb
+all: clu edfb np_cluster
 
 clu: clu.o
 	$(FC) $(FCFLAGS) -o $(BUILDDIR)/clu $(BUILDDIR)/clu.o $(LFLAGS)
@@ -11,6 +15,27 @@ clu: clu.o
 edfb: edfb.o
 	$(FC) $(FCFLAGS) -o $(BUILDDIR)/edfb $(BUILDDIR)/edfb.o $(LFLAGS)
 
+np_cluster: $(BUILDDIR)/np_cluster.o $(BUILDDIR)/Commons.o $(BUILDDIR)/Configuration.o $(BUILDDIR)/Parsers.o $(BUILDDIR)/sphere.o $(BUILDDIR)/cluster.o
+	$(CXX) $(CXXFLAGS) $(CXXLFLAGS) -o $(BUILDDIR)/np_cluster $(BUILDDIR)/np_cluster.o $(BUILDDIR)/Commons.o $(BUILDDIR)/Configuration.o $(BUILDDIR)/Parsers.o $(BUILDDIR)/sphere.o $(BUILDDIR)/cluster.o
+
+$(BUILDDIR)/np_cluster.o:
+	$(CXX) $(CXXFLAGS) -c ../np_cluster.cpp -o $(BUILDDIR)/np_cluster.o
+
+$(BUILDDIR)/Commons.o:
+	$(CXX) $(CXXFLAGS) -c ../libnptm/Commons.cpp -o $(BUILDDIR)/Commons.o
+
+$(BUILDDIR)/Configuration.o:
+	$(CXX) $(CXXFLAGS) -c ../libnptm/Configuration.cpp -o $(BUILDDIR)/Configuration.o
+
+$(BUILDDIR)/Parsers.o:
+	$(CXX) $(CXXFLAGS) -c ../libnptm/Parsers.cpp -o $(BUILDDIR)/Parsers.o
+
+$(BUILDDIR)/cluster.o:
+	$(CXX) $(CXXFLAGS) -c cluster.cpp -o $(BUILDDIR)/cluster.o
+
+$(BUILDDIR)/sphere.o:
+	$(CXX) $(CXXFLAGS) -c ../sphere/sphere.cpp -o $(BUILDDIR)/sphere.o
+
 clean:
 	rm -f $(BUILDDIR)/*.o
 
diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index e57added3ad108c00998cf37fa57770cba1a9d91..03038fc2e4bb59cc1d13f3b5bb3e667804c694d7 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -19,13 +19,13 @@ using namespace std;
 //! \brief C++ implementation of CLU
 void cluster() {
 	printf("INFO: making legacy configuration ...\n");
-	ScattererConfiguration *conf = ScattererConfiguration::from_dedfb("DEDFB_clu");
+	ScattererConfiguration *conf = ScattererConfiguration::from_dedfb("../../test_data/cluster/DEDFB");
 	conf->write_formatted("c_OEDFB_clu");
 	conf->write_binary("c_TEDF_clu");
 	delete conf;
 	printf("INFO: reading binary configuration ...\n");
 	ScattererConfiguration *sconf = ScattererConfiguration::from_binary("c_TEDF_clu");
-	GeometryConfiguration *gconf = GeometryConfiguration::from_legacy("DCLU");
+	GeometryConfiguration *gconf = GeometryConfiguration::from_legacy("../../test_data/cluster/DCLU");
 	if (sconf->number_of_spheres == gconf->number_of_spheres) {
 		// Shortcuts to variables stored in configuration objects
 		int nsph = gconf->number_of_spheres;
@@ -61,43 +61,27 @@ void cluster() {
 			c1->ryy[c1i] = gconf->sph_y[c1i];
 			c1->rzz[c1i] = gconf->sph_z[c1i];
 			c1->ros[c1i] = sconf->radii_of_spheres[c1i];
-			int iogi = c1->iog[c1i];
-			if (iogi >= c1i + 1) {
-				double gcss = pi * c1->ros[c1i] * c1->ros[c1i];
-				c1->gcsv[c1i] = gcss;
-				int nsh = c1->nshl[c1i];
-				for (int j16 = 1; j16 <= nsh; j16++) {
-					c1->rc[c1i][j16- 1] = sconf->rcf[c1i][j16] * c1->ros[c1i];
-				}
-				c3->gcs += c1->gcsv[c1i - 1];
-			}
 		}
 		C4 *c4 = new C4;
 		c4->li = gconf->li;
 		c4->le = gconf->le;
-		c4->lm = lm;
-		c4->lmpo = c4->lm + 1;
-		c4->litpo = 2 * gconf->li + 1;
+		c4->lm = (c4->li > c4-> le) ? c4->li : c4->le;
+		c4->nv3j = (c4->lm * (c4->lm  + 1) * (2 * c4->lm + 7)) / 6;
+		c4->nsph = nsph;
+		// The following is needed to initialize C1_AddOns
+		c4->litpo = c4->li + c4->li + 1;
 		c4->litpos = c4->litpo * c4->litpo;
-		c4->lmtpo = gconf->li + gconf->le + 1;
+		c4->lmtpo = c4->li + c4->le + 1;
 		c4->lmtpos = c4->lmtpo * c4->lmtpo;
 		c4->nlim = c4->li * (c4->li + 2);
 		c4->nlem = c4->le * (c4->le + 2);
-		c4->nsph = nsph;
-		C6 *c6 = new C6(c4->lmtpo);
+		c4->lmpo = c4->lm + 1;
 		C1_AddOns *c1ao = new C1_AddOns(c4);
+		// End of add-ons initialization
+		C6 *c6 = new C6(c4->lmtpo);
 		FILE *output = fopen("c_OCLU", "w");
-		double ****zpv = new double***[c4->lm];
-		for (int zi = 0; zi < c4->lm; zi++) {
-			zpv[zi] = new double**[3];
-			for (int zj = 0; zj < 3; zj++) {
-				zpv[zi][zj] = new double*[2];
-				zpv[zi][zj][0] = new double[2];
-				zpv[zi][zj][1] = new double[2];
-			}
-		}
 		int jer = 0, lcalc = 0;
-		complex<double> arg = complex<double>(0.0, 0.0);
+		complex<double> arg(0.0, 0.0), ccsam(0.0, 0.0);
 		int max_ici = 0;
 		for (int insh = 0; insh < nsph; insh++) {
 			int nsh = sconf->nshl_vec[insh];
@@ -162,7 +146,19 @@ void cluster() {
 		int nks = nths * nphs;
 		int nkks = nk * nks;
 		double th1 = th, ph1 = ph, ths1 = ths, phs1 = phs;
-		str(c1, c1ao, c3, c4, c6);
+		str(sconf->rcf, c1, c1ao, c3, c4, c6);
+		double ****zpv = new double***[c4->lm]; //[gconf->l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2]
+		for (int zi = 0; zi < c4->lm; zi++) {
+			zpv[zi] = new double**[3];
+			for (int zj = 0; zj < 3; zj++) {
+				zpv[zi][zj] = new double*[2];
+				for (int zk = 0; zk < 2; zk++) {
+					zpv[zi][zj][zk] = new double[2];
+					zpv[zi][zj][zk][0] = 0.0;
+					zpv[zi][zj][zk][1] = 0.0;
+				}
+			}
+		}
 		thdps(c4->lm, zpv);
 		double exri = sqrt(sconf->exdc);
 		double vk = 0.0; // NOTE: Not really sure it should be initialized at 0
@@ -200,10 +196,7 @@ void cluster() {
 					sqsfi = 1.0 / (xi * xi);
 					fprintf(output, "  XI=%15.7lE\n", xi);
 				}
-				// Would call HJV(EXRI, VKARG, JER, LCALC, ARG)
 				hjv(exri, vkarg, jer, lcalc, arg, c1, c1ao, c4);
-				//printf("INFO: calculation went up to %d and jer = %d\n", lcalc, jer);
-				//printf("INFO: arg = (%lE,%lE)\n", arg.real(), arg.imag());
 				if (jer != 0) {
 					fprintf(output, "  STOP IN HJV\n");
 					break; // jxi488 loop: goes to memory cleaning and  return
@@ -218,7 +211,6 @@ void cluster() {
 					} else {
 						int nsh = sconf->nshl_vec[i132 - 1];
 						int ici = (nsh + 1) / 2;
-						int size_dc0 = (nsh % 2 == 0) ? ici + 1 : ici;
 						if (sconf->idfc == 0) {
 							for (int ic = 0; ic < ici; ic++)
 								c2->dc0[ic] = sconf->dc0_matrix[ic][i132 - 1][jxi488 - 1];
@@ -233,8 +225,11 @@ void cluster() {
 								c4->li, i132, npnt, npntts, vkarg, sconf->exdc, exri,
 								c1, c2, jer, lcalc, arg
 						);
-						//printf("INFO: DME returned jer = %d , lcalc = %d and arg = (%lE, %lE)\n",
-						//		jer, lcalc, arg.real(), arg.imag());
+						//for (int idl = 1; idl <= 8; idl++) {
+						//	printf("DEBUG: RMI( %d , %d ) = (%lE, %lE)\n", idl, i132, c1->rmi[idl - 1][i132 - 1].real(), c1->rmi[idl - 1][i132 - 1].imag());
+						//	printf("DEBUG: REI( %d , %d ) = (%lE, %lE)\n", idl, i132, c1->rei[idl - 1][i132 - 1].real(), c1->rei[idl - 1][i132 - 1].imag());
+						//}
+						//printf("DEBUG: DME - jer = %d, lcalc = %d, arg = (%lE,%lE)\n", jer, lcalc, arg.real(), arg.imag());
 						if (jer != 0) {
 							fprintf(output, "  STOP IN DME\n");
 							break;
@@ -242,7 +237,18 @@ void cluster() {
 					}
 					if (jer != 0) break;
 				} // i132 loop
-				// Would call CMS(AM)
+				cms(am, c1, c1ao, c4, c6);
+				if (jxi488 == 1) logmat(am, 960, 960);
+				ccsam = summat(am, 960, 960);
+				printf("DEBUG: after CMS CCSAM = (%lE,%lE)\n", ccsam.real(), ccsam.imag());
+				double srac3j = 0.0;
+				for (int di = 0; di < c4->lmtpo; di++) srac3j += c6->rac3j[di];
+				printf("DEBUG: after CMS SRAC3J = %lE\n", srac3j);
+				//int ndit = 2 * nsph * c4->nlim;
+				//lucin(am, mxndm, ndit, jer);
+				//ccsam = summat(am, 960, 960);
+				//printf("DEBUG: after LUCIN CCSAM = (%lE,%lE)\n", ccsam.real(), ccsam.imag());
+				//printf("DEBUG: after LUCIN, JER = %d\n", jer);
 
 				if (jer != 0) break;
 			} // jxi488 loop
diff --git a/src/include/Commons.h b/src/include/Commons.h
index 4aeb5ef024cfd61b3d6c038d2b02a8b358629d41..01571dfb8af7e0a5b9c0dec92c86719de498dc8b 100644
--- a/src/include/Commons.h
+++ b/src/include/Commons.h
@@ -163,11 +163,11 @@ struct C4 {
 	int litpo;
 	//! \brief QUESTION: definition?
 	int litpos;
-	//! \brief QUESTION: definition?
+	//! \brief Maximum field expansion order plus one. QUESTION: correct?
 	int lmpo;
-	//! \brief QUESTION: definition?
+	//! \brief Twice maximum field expansion order plus one. QUESTION: correct?
 	int lmtpo;
-	//! \brief QUESTION: definition?
+	//! \brief Square of `lmtpo`.
 	int lmtpos;
 	//! \brief QUESTION: definition?
 	int li;
@@ -181,6 +181,8 @@ struct C4 {
 	int lm;
 	//! \brief Number of spheres.
 	int nsph;
+	//! \brief QUESTION: definition?
+	int nv3j;
 };
 
 /*! \brief Vectors and matrices that are specific to cluster C1 blocks.
@@ -192,7 +194,7 @@ protected:
 	int nsph;
 	//! \brief QUESTION: definition?
 	int nlemt;
-	//! \brief QUESTION: definition?
+	//! \brief Maximum expansion order plus one. QUESTION: correct?
 	int lmpo;
 
 	/*! \brief Allocate the necessary common vectors depending on configuration.
diff --git a/src/include/clu_subs.h b/src/include/clu_subs.h
index a0bfc929c50b553219dbfef453ac29ffde298f57..dd1ee920acba36f5bc7c752a422b133655f562a9 100644
--- a/src/include/clu_subs.h
+++ b/src/include/clu_subs.h
@@ -31,6 +31,34 @@ extern void rbf(int n, double x, int &nm, double sj[]);
 extern void rnf(int n, double x, int &nm, double sy[]);
 extern void thdps(int lm, double ****zpv);
 // >>> END OF SPH_SUBS DECLARATION <<<
+void logmat(std::complex<double> **mat, int rows, int cols);
+
+/*! \brief C++ porting of CDTP
+ *
+ * \param z: `complex<double>`
+ * \param am: Matrix of complex.
+ * \param i: `int`
+ * \param jf: `int`
+ * \param k: `int`
+ * \param nj: `int`
+ */
+std::complex<double> cdtp(
+		std::complex<double> z, std::complex<double> **am, int i, int jf,
+		int k, int nj
+) {
+	/* NOTE: the original FORTRAN code treats the AM matrix as a
+	 * vector. This is not directly allowed in C++ and it requires
+	 * accounting for the different dimensions.
+	 */
+	std::complex<double> result = z;
+	if (nj > 0) {
+		int jl = jf + nj - 1;
+		for (int j = jf; j <= jl; j++) {
+			result += (am[i - 1][j - 1] * am[j - 1][k - 1]);
+		}
+	}
+	return result;
+}
 
 /*! \brief C++ porting of CGEV
  *
@@ -38,6 +66,7 @@ extern void thdps(int lm, double ****zpv);
  * \param mu: `int`
  * \param l: `int`
  * \param m: `int`
+ * \return result: `double`
  */
 double cgev(int ipamo, int mu, int l, int m) {
 	double result = 0.0;
@@ -61,7 +90,7 @@ double cgev(int ipamo, int mu, int l, int m) {
 		}
 	} else { // label 30
 		xd = 2.0 * l * (l * 2 - 1);
-		if (mu < 0) { // label 35. CHECK: is clu.f code line 2458 a switch?
+		if (mu < 0) { // label 35
 			xn = 1.0 * (l - 1 + m) * (l + m);
 		} else if (mu == 0) { // label 40
 			xn = 2.0 * (l - m) * (l + m);
@@ -84,7 +113,7 @@ double cgev(int ipamo, int mu, int l, int m) {
 void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
 	int jmx = j3 + j2;
 	int jdf = j3 - j2;
-	int m1 = m2 - m3;
+	int m1 = -m2 - m3;
 	int abs_jdf = (jdf >= 0) ? jdf : -jdf;
 	int abs_m1 = (m1 >= 0) ? m1 : -m1;
 	int jmn = (abs_jdf > abs_m1) ? abs_jdf : abs_m1;
@@ -96,7 +125,6 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
 		double sj = 1.0 * jf;
 		double cnr = (1.0 / sqrt(sj)) * isn;
 		c6->rac3j[0] = cnr;
-		return;
 	} else { // label 15
 		double sjt = 1.0;
 		double sjr = 1.0 * jf;
@@ -111,15 +139,16 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
 		double ccj = 1.0 * (j1s - jdfs) * (j1s - m1s);
 		double cj = sqrt(ccj * (jsmpos - j1s));
 		double dj = 1.0 * jf * (j1 * j1po * mdf + idjc);
-		double cjp = 0.0; // WARNING: not sure it should be really defined here.
+		// In old version, CJP was defined here. Did not work.
+		// double cjp = 0.0
 		if (njmo <= 1) {
-			c6->rac3j[0] = -dj /(cj * j1po);
+			c6->rac3j[0] = -dj / (cj * j1po);
 			double sj = sjr + (c6->rac3j[0] * c6->rac3j[0]) * (jf - 2);
 			double cnr = (1.0 / sqrt(sj)) * isn;
 			c6->rac3j[1] = cnr;
 			c6->rac3j[0] *= cnr;
-			return;
 		} else { // label 20
+			double cjp = 0.0;
 			int nj = njmo + 1;
 			int nmat = (nj + 1) / 2;
 			c6->rac3j[nj - 1] = 1.0;
@@ -137,8 +166,8 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
 					cj = sqrt(ccj * (jsmpos - j1s));
 					sjt = c6->rac3j[irr - 1] * c6->rac3j[irr - 1];
 					dj = 1.0 * jf * (j1 * j1po * mdf + idjc);
-					c6->rac3j[irr - 2] = -c6->rac3j[irr - 1] * dj
-							+ c6->rac3j[irr] * cjp * j1 / (cj * j1po);
+					c6->rac3j[irr - 2] = -(c6->rac3j[irr - 1] * dj
+							+ c6->rac3j[irr] * cjp * j1) / (cj * j1po);
 					sjr += (sjt * jf);
 				} // ibr45 loop
 			}
@@ -223,20 +252,24 @@ std::complex<double> ghit(
 	/* NBL identifies transfer vector going from N2 to N1;
 	 * IHI=0 for Hankel, IHI=1 for Bessel, IHI=2 for Bessel from origin;
 	 * depending on IHI, IPAM=0 gives H or I, IPAM= 1 gives K or L. */
-	const std::complex<double> cc0 = std::complex<double>(0.0, 0.0);
-	const std::complex<double> uim = std::complex<double>(0.0, 1.0);
+	const std::complex<double> cc0(0.0, 0.0);
+	const std::complex<double> uim(0.0, 1.0);
+	std::complex<double> csum(0.0, 0.0), cfun(0.0, 0.0);
 	std::complex<double> result = cc0;
+
 	if (ihi == 2) {
-		if (!(c1->rxx[nbl - 1] != 0.0 || c1->ryy[nbl - 1] != 0.0 || c1->rzz[nbl - 1] != 0.0)) {
-			if (ipamo != 0) return result;
-			if (l1 == l2 && m1 == m2) result = std::complex<double>(1.0, 0.0);
+		if (c1->rxx[nbl - 1] == 0.0 && c1->ryy[nbl - 1] == 0.0 && c1->rzz[nbl - 1] == 0.0) {
+			if (ipamo == 0) {
+				if (l1 == l2 && m1 == m2) result = std::complex(1.0, 0.0);
+			}
 			return result;
 		}
 	}
+	// label 10
 	int l1mp = l1 - ipamo;
 	int l1po = l1 + 1;
 	int m1mm2 = m1 - m2;
-	int m1mm2m = (m1mm2 > 0) ? m1mm2 + 1 : -m1mm2 + 1;
+	int m1mm2m = (m1mm2 > 0) ? m1mm2 + 1 : 1 - m1mm2;
 	int lminpo = (l2 - l1mp > 0) ? l2 - l1mp + 1 : l1mp - l2 + 1;
 	int lmaxpo = l2 + l1mp + 1;
 	int i3j0in = c1ao->ind3j[l1mp][l2 - 1];
@@ -248,74 +281,72 @@ std::complex<double> ghit(
 		isn *= -1;
 		if (l2 > l1mp) isn *= -1;
 	}
+	// label 12
 	int nblmo = nbl - 1;
 	if (ihi != 2) {
 		int nbhj = nblmo * c4->litpo;
 		int nby = nblmo * c4->litpos;
 		if (ihi != 1) {
 			for (int jm24 = 1; jm24 <= 3; jm24++) {
-				std::complex<double> csum = cc0;
+				csum = cc0;
 				int mu = jm24 - 2;
 				int mupm1 = mu + m1;
-				int mupm2 = mu  + m2;
-				if (mupm1 < -l1mp || mupm1 > l1mp || mupm2 < -l2 || mupm2 > l2)
-					continue; //jm24 loop
-				int jsn = -isn;
-				if (mu == 0) jsn = isn;
-				double cr = cgev(ipamo, mu, l1, m1) * cgev(0, mu, l2, m2);
-				int i3j0 = i3j0in;
-				if (mupm1 == 0 && mupm2 == 0) {
-					int lt14 = lminpo;
-					while (lt14 <= lmaxpo) {
-						i3j0 += 1;
-						int l3 = lt14 - 1;
-						int ny = l3 * l3 + lt14;
-						double aors = 1.0 * (l3 + lt14);
-						double f3j = (
-								c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] *
-								sqrt(aors)
-						) * jsn;
-						std::complex<double> cfun = (
-								c1ao->vh[nbhj + lt14 - 1] * c1ao->vyhj[nby + ny - 1]
-						) * f3j;
-						csum += cfun;
-						jsn *= - 1;
-						lt14 += 2;
-					}
-				} else { // label 16
-					r3jjr(l1mp, l2, -mupm1, mupm2, c6);
-					int il = ilin;
-					int lt20 = lminpo;
-					while (lt20 <= lmaxpo) {
-						i3j0 += 1;
-						if (m1mm2m <= lt20) {
-							il = il + 2;
-							int l3 = lt20 - 1;
-							int ny = l3 * l3 + lt20 + m1mm2;
-							double aors = 1.0 * (l3 + lt20);
-							double f3j = (
-									c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)
-							) * jsn;
-							std::complex<double> cfun = (
-									c1ao->vh[nbhj + lt20 - 1] * c1ao->vyhj[nby + ny - 1]
-							) * f3j;
+				int mupm2 = mu + m2;
+				if (mupm1 >= -l1mp && mupm1 <= l1mp && mupm2 >= - l2 && mupm2 <= l2) {
+					int jsn = -isn;
+					if (mu == 0) jsn = isn;
+					double cr = cgev(ipamo, mu, l1, m1) * cgev(0, mu, l2, m2);
+					int i3j0 = i3j0in;
+					if (mupm1 == 0 && mupm2 == 0) {
+						int lt14 = lminpo;
+						while (lt14 <= lmaxpo) {
+							i3j0++;
+							int l3 = lt14 - 1;
+							int ny = l3 * l3 + lt14;
+							double aors = 1.0 * (l3 + lt14);
+							double f3j = (c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+							cfun = (c1ao->vh[nbhj + lt14 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
 							csum += cfun;
+							jsn *= -1;
+							lt14 += 2;
+						}
+						// goes to 22
+					} else { // label 16
+						r3jjr(l1mp, l2, -mupm1, mupm2, c6);
+						int il = ilin;
+						int lt20 = lminpo;
+						while (lt20 <= lmaxpo) {
+							i3j0++;
+							if (m1mm2m <= lt20) {
+								il += 2;
+								int l3 = lt20 - 1;
+								int ny = l3 * l3  + lt20 + m1mm2;
+								double aors = 1.0 * (l3 + lt20);
+								double f3j = (c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+								//printf("DEBUG: VH( %d ) = (%lE, %lE)\n", (nbhj + lt20), c1ao->vh[nbhj + lt20 - 1].real(), c1ao->vh[nbhj + lt20 - 1].imag());
+								//printf("DEBUG: VYHJ( %d ) = (%lE, %lE)\n", (nby + ny), c1ao->vyhj[nby + ny - 1].real(), c1ao->vyhj[nby + ny - 1].imag());
+								cfun = (c1ao->vh[nbhj + lt20 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
+								csum += cfun; // we were here
+							}
+							// label 20
+							jsn *= -1;
+							lt20 += 2;
 						}
-						jsn *= -1;
-						lt20 += 2;
 					}
+					// label 22
+					csum *= cr;
+					result += csum;
 				}
-				csum *= cr;
-				result += csum;
-			} // jm24 loop
-		} else { // label 30, IHI = 1
-			for (int jm44 = 1; jm44 <= 3; jm44 ++) {
-				std::complex<double> csum = cc0;
+				// Otherwise there is nothing to add
+			} // jm24 loop. Should go to 70
+		} else { // label 30, IHI == 1
+			for (int jm44 = 1; jm44 <= 3; jm44++) {
+				csum = cc0;
 				int mu = jm44 - 2;
 				int mupm1 = mu + m1;
 				int mupm2 = mu + m2;
-				if (mupm1 >= -l1mp && mupm1 <= l1mp && mupm2 >= -l2 && mupm2 >= l2) {
-					int jsn = -isn;
+				if (mupm1 >= -l1mp && mupm1 <= l1mp && mupm2 >= - l2 && mupm2 <= l2) {
+					int jsn = - isn;
 					if (mu == 0) jsn = isn;
 					double cr = cgev(ipamo, mu, l1, m1) * cgev(0, mu, l2, m2);
 					int i3j0 = i3j0in;
@@ -326,16 +357,13 @@ std::complex<double> ghit(
 							int l3 = lt34 - 1;
 							int ny = l3 * l3 + lt34;
 							double aors = 1.0 * (l3 + lt34);
-							std::complex<double> f3j = (
-									c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)
-							) * jsn;
-							std::complex<double> cfun = (
-									c1ao->vj[nbhj + lt34 - 1] * c1ao->vyhj[nby + ny - 1]
-							) * f3j;
+							double f3j = (c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+							cfun = (c1ao->vh[nbhj + lt34 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
 							csum += cfun;
 							jsn *= -1;
 							lt34 += 2;
 						}
+						// goes to 42
 					} else { // label 36
 						r3jjr(l1mp, l2, -mupm1, mupm2, c6);
 						int il = ilin;
@@ -345,36 +373,35 @@ std::complex<double> ghit(
 							if (m1mm2m <= lt40) {
 								il += 2;
 								int l3 = lt40 - 1;
-								int ny = l3 * l3 + lt40 + m1mm2;
+								int ny = l3 * l3  + lt40 + m1mm2;
 								double aors = 1.0 * (l3 + lt40);
-								std::complex<double> f3j = (
-										c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)
-								) * jsn;
-								std::complex<double> cfun = (
-										c1ao->vj[nbhj + lt40 - 1] * c1ao->vyhj[nby + ny - 1]
-								) * f3j;
+								double f3j = (c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+								cfun = (c1ao->vh[nbhj + lt40 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
 								csum += cfun;
 							}
+							// label 40
 							jsn *= -1;
 							lt40 += 2;
 						}
 					}
 					// label 42
 					csum *= cr;
+					result += csum;
 				}
-				result += csum;
-			} // jm44 loop
+				// Otherwise there is nothing to add
+			} // jm44 loop. Should go to 70
 		}
-	} else { // label 50, IHI = 2
+		// goes to 70
+	} else { // label 50, IHI == 2
 		int nbhj = nblmo * c4->lmtpo;
 		int nby = nblmo * c4->lmtpos;
-		for (int jm64 =1; jm64 <= 3; jm64++) {
-			std::complex<double> csum = cc0;
+		for (int jm64 = 1; jm64 <= 3; jm64++) {
+			csum = cc0;
 			int mu = jm64 - 2;
 			int mupm1 = mu + m1;
 			int mupm2 = mu + m2;
-			if (mupm1 >= -l1mp && mupm1 < l1mp && mupm2 >= -l2 && mupm2 <= l2) {
-				int jsn = - isn;
+			if (mupm1 >= -l1mp && mupm1 <= l1mp && mupm2 >= - l2 && mupm2 <= l2) {
+				int jsn = -isn;
 				if (mu == 0) jsn = isn;
 				double cr = cgev(ipamo, mu, l1, m1) * cgev(0, mu, l2, m2);
 				int i3j0 = i3j0in;
@@ -385,16 +412,13 @@ std::complex<double> ghit(
 						int l3 = lt54 - 1;
 						int ny = l3 * l3 + lt54;
 						double aors = 1.0 * (l3 + lt54);
-						std::complex<double> f3j = (
-								c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)
-						) * jsn;
-						std::complex<double> cfun = (
-								c1ao->vj0[nbhj + lt54 - 1] * c1ao->vyj0[nby + ny - 1]
-						) * f3j;
+						double f3j = (c1ao->v3j0[i3j0 - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+						cfun = (c1ao->vh[nbhj + lt54 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
 						csum += cfun;
 						jsn *= -1;
 						lt54 += 2;
 					}
+					// goes to 62
 				} else { // label 56
 					r3jjr(l1mp, l2, -mupm1, mupm2, c6);
 					int il = ilin;
@@ -404,25 +428,23 @@ std::complex<double> ghit(
 						if (m1mm2m <= lt60) {
 							il += 2;
 							int l3 = lt60 - 1;
-							int ny = l3 * l3 + lt60 + m1mm2;
+							int ny = l3 * l3  + lt60 + m1mm2;
 							double aors = 1.0 * (l3 + lt60);
-							std::complex<double> f3j = (
-									c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)
-							) * jsn;
-							std::complex<double> cfun = (
-									c1ao->vj0[nbhj + lt60 - 1] * c1ao->vyj0[nby + ny - 1]
-							) * f3j;
+							double f3j = (c6->rac3j[il - 1] * c1ao->v3j0[i3j0 - 1] * sqrt(aors)) * jsn;
+							cfun = (c1ao->vh[nbhj + lt60 - 1] * c1ao->vyhj[nby + ny - 1]) * f3j;
 							csum += cfun;
 						}
+						// label 60
 						jsn *= -1;
 						lt60 += 2;
 					}
 				}
 				// label 62
 				csum *= cr;
+				result += csum;
 			}
-			result += csum;
-		} // jm64 loop
+			// Otherwise there is nothing to add
+		} // jm64 loop. Should go to 70
 	}
 	// label 70
 	const double four_pi = acos(0.0) * 8.0;
@@ -446,7 +468,34 @@ std::complex<double> ghit(
  */
 void cms(std::complex<double> **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
 	std::complex<double> dm, de, cgh, cgk;
-	const std::complex<double> cc0 = std::complex<double>(0.0, 0.0);
+	const std::complex<double> cc0(0.0, 0.0);
+	// DEBUG CODE
+	/*double srxx = 0.0, sryy = 0.0, srzz = 0.0;
+	for (int di = 0; di < c4->nsph; di++) {
+		srxx += c1->rxx[di]; sryy += c1->ryy[di]; srzz += c1->rzz[di];
+	}
+	printf("DEBUG: in CMS srxx = %lE, sryy = %lE, srzz = %lE\n", srxx, sryy, srzz);
+	int sind3j = 0;
+	for (int di = 0; di < 9; di++) {
+		for (int dj = 0; dj < 8; dj++) sind3j += c1ao->ind3j[di][dj];
+	}
+	printf("DEBUG: in CMS sind3j = %d\n", sind3j);
+	std::complex<double> sv3j0(0.0, 0.0);
+	for (int di = 0; di < c4->nv3j; di++) sv3j0 += c1ao->v3j0[di];
+	printf("DEBUG: in CMS sv3j0 = (%lE,%lE)\n", sv3j0.real(), sv3j0.imag());
+	std::complex<double> svh(0.0, 0.0);
+	int dsize = (c4->nsph * c4->nsph - 1) * c4->litpo;
+	for (int di = 0; di < dsize; di++) svh += c1ao->vh[di];
+	printf("DEBUG: in CMS svh = (%lE,%lE)\n", svh.real(), svh.imag());
+	std::complex<double> svyhj(0.0, 0.0);
+	dsize = (c4->nsph * c4->nsph - 1) * c4->litpos;
+	for (int di = 0; di < dsize; di++) svyhj += c1ao->vyhj[di];
+	printf("DEBUG: in CMS svyhj = (%lE,%lE)\n", svyhj.real(), svyhj.imag());
+	std::complex<double> svyj0(0.0, 0.0);
+	dsize = c4->nsph * c4->lmtpos;
+	for (int di = 0; di < dsize; di++) svyj0 += c1ao->vyj0[di];
+	printf("DEBUG: in CMS svyj0 = (%lE,%lE)\n", svyj0.real(), svyj0.imag());
+	// END DEBUG */
 	int ndi = c4->nsph * c4->nlim;
 	int nbl = 0;
 	int nsphmo = c4->nsph - 1;
@@ -460,7 +509,7 @@ void cms(std::complex<double> **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
 				int l1po = l1 + 1;
 				int il1 = l1po * l1;
 				int l1tpo = l1po + l1;
-				for (int im1 =1; im1 <= l1tpo; il1++) {
+				for (int im1 = 1; im1 <= l1tpo; im1++) {
 					int m1 = im1 - l1po;
 					int ilm1 = il1 + m1;
 					int ilm1e = ilm1 + ndi;
@@ -468,7 +517,7 @@ void cms(std::complex<double> **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
 					int i1e = in1 + ilm1e;
 					int j1 = in2 + ilm1;
 					int j1e = in2 + ilm1e;
-					for (int l2 =1; l2 <= c4->li; l2++) {
+					for (int l2 = 1; l2 <= c4->li; l2++) {
 						int l2po = l2 + 1;
 						int il2 = l2po * l2;
 						int l2tpo = l2po + l2;
@@ -482,6 +531,18 @@ void cms(std::complex<double> **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
 							int i2e = in2 + ilm2e;
 							int j2 = in1 + ilm2;
 							int j2e = in1 + ilm2e;
+							bool make_break = false;
+							if (i1 == 5 && i2 == 487) make_break = true;
+							if (i1 == 5 && i2e == 487) make_break = true;
+							if (i1e == 5 && i2 == 487) make_break = true;
+							if (i1e == 5 && i2e == 487) make_break = true;
+							if (j1 == 5 && j2 == 487) make_break = true;
+							if (j1 == 5 && j2e == 487) make_break = true;
+							if (j1e == 5 && j2 == 487) make_break = true;
+							if (j1e == 5 && j2e == 487) make_break = true;
+							if (make_break) {
+								printf("DEBUG: this is a diverging place.\n");
+							}
 							cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1, c1ao, c4, c6);
 							cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1, c1ao, c4, c6);
 							am[i1 - 1][i2 - 1] = cgh;
@@ -524,6 +585,9 @@ void cms(std::complex<double> **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
 			} // im1 loop
 		} // l1 loop
 	} // n1 loop
+	double srac3j = 0.0;
+	for (int di = 0; di < c4->lmtpo; di++) srac3j += c6->rac3j[di];
+	printf("DEBUG: in CMS srac3j = %lE\n", srac3j);
 }
 
 /*! \brief C++ porting of HJV
@@ -543,7 +607,7 @@ void hjv(
 ) {
 	int nsphmo = c4->nsph - 1;
 	int lit = c4->li + c4->li;
-	int lmt = c4->lm + c4->lm;
+	int lmt = c4->li + c4->le;
 	const int rfj_size = (lit > lmt) ? lit : lmt;
 	const int rfn_size = c4->litpo;
 	double *rfj, *rfn;
@@ -575,7 +639,9 @@ void hjv(
 				return;
 			}
 			for (int lpo38 = 1; lpo38 <= c4->litpo; lpo38++) {
-				c1ao->vh[lpo38 + ivhb - 1] = std::complex<double>(rfj[lpo38 - 1], rfn[lpo38 - 1]);
+				double rpart = rfj[lpo38 - 1];
+				double ipart = rfn[lpo38 - 1];
+				c1ao->vh[lpo38 + ivhb - 1] = std::complex<double>(rpart, ipart);
 			}
 			ivhb += c4->litpo;
 		} // ns40 loop
@@ -605,6 +671,118 @@ void hjv(
 	delete[] rfn;
 }
 
+/*! \brief C++ porting of LUCIN
+ *
+ * \param am: Matrix of complex.
+ * \param nddmst: `const int`
+ * \param n: `int`
+ * \param ier: `int &`
+ */
+void lucin(std::complex<double> **am, const int nddmst, int n, int &ier) {
+	/* NDDMST  FIRST DIMENSION OF AM AS DECLARED IN DIMENSION
+	 *         STATEMENT.
+	 * N       NUMBER OF ROWS IN AM.
+	 * IER     IS REPLACED BY 1 FOR SINGULARITY.
+	 */
+	double *v = new double[nddmst];
+	std::complex<double> ctemp, cfun;
+	std::complex<double> cc0 = std::complex<double>(0.0, 0.0);
+	ier = 0;
+	int nminus = n - 1;
+	for (int i = 1; i <= n; i++) {
+		double sum = 0.0;
+		for (int j = 1; j <= n; j++) {
+			sum += (
+					am[i - 1][j - 1].real() * am[i - 1][j - 1].real()
+					+ am[i - 1][j - 1].imag() * am[i - 1][j - 1].imag()
+			);
+		} // j1319 loop
+		v[i - 1] = 1.0 / sum;
+	} // i1309 loop
+	// 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U.
+	//    REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4.
+	//    (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS
+	//    ARE PLACED IN V.)
+	/* >>> THERE APPEARS TO BE A BUG IN THE FOLLOWING LOOP <<< */
+	for (int k = 1; k <= n; k++) {
+		int kplus = k + 1;
+		int kminus = k - 1;
+		int l = k;
+		double psqmax = 0.0;
+		for (int i = k; i <= n; i++) {
+			cfun = cdtp(-am[i - 1][k - 1], am, i, 1, k, kminus);
+			ctemp = -cfun;
+			am[i - 1][k - 1] = ctemp;
+			double psq = v[i - 1] * (ctemp.real() * ctemp.real() + ctemp.imag() * ctemp.imag());
+			if (psq > psqmax) {
+				psqmax = psq;
+				l = i;
+			}
+		} // i2029 loop
+		if (l != k) {
+			for (int j = 1; j <= n; j++) {
+				ctemp = am[k - 1][j - 1];
+				am[k - 1][j - 1] = am[l - 1][j - 1];
+				am[l - 1][j - 1] = ctemp;
+			} // j2049 loop
+			v[l - 1] = v[k - 1];
+		}
+		// label 2011
+		v[k - 1] = 1.0 * l;
+		if (psqmax == 0.0) {
+			ier = 1;
+			delete[] v;
+			return;
+		}
+		ctemp = 1.0 / am[k - 1][k - 1];
+		am[k - 1][k - 1] = ctemp;
+		if (kplus <= n) {
+			for (int j = kplus; j <= n; j++) {
+				cfun = cdtp(-am[k - 1][j - 1], am, k, 1, j, kminus);
+				am[k - 1][j - 1] = -ctemp * cfun;
+			} // j2059 loop
+		}
+	} // k2019 loop
+	// 4.  REPLACE AM BY ITS INVERSE AMINV.
+	// 4.1 REPLACE L AND U BY THEIR INVERSE LINV AND UINV.
+	for (int k = 1; k <= nminus; k++) {
+		int kplus = k + 1;
+		for (int i = kplus; i <= n; i++) {
+			cfun = cdtp(cc0, am, i, k, k, i - k);
+			am[i - 1][k - 1] = -am[i - 1][i - 1] * cfun;
+			cfun = cdtp(am[k - 1][i - 1], am, k, kplus, i, i - k - 1);
+			am[k - 1][i - 1] = -cfun;
+		} // i4119 loop
+	} // k4109 loop
+	// 4.2 FORM AMINV=UINV*LINV.
+	for (int k = 1; k <= n; k++) {
+		for (int i = 1; i <= n; i++) {
+			if (i < k) {
+				cfun = cdtp(cc0, am, i, k, k, n - k + 1);
+				am[i - 1][k -1] = cfun;
+			}
+			else {
+				cfun = cdtp(am[i - 1][k - 1], am, i, i + 1, k, n - i);
+				am[i - 1][k - 1] = cfun;
+			}
+		} // i4119 loop
+	} // k4209 loop
+	// 4.3 INTERCHANGE COLUMNS OF AMINV AS SPECIFIED BY V, BUT IN REVERSE
+	//     ORDER.
+	for (int l = 1; l <= n; l++) {
+		int k = n - l + 1;
+		int kcol = (int)(v[k - 1]);
+		if (kcol != k) {
+			for (int i = 1; i <= n; i++) {
+				ctemp = am[i - 1][k - 1];
+				am[i - 1][k - 1] = am[i - 1][kcol - 1];
+				am[i - 1][kcol - 1] = ctemp;
+			} // i4319 loop
+		}
+	} // l4309 loop
+	delete[] v;
+}
+
 /*! \brief C++ porting of POLAR
  *
  * \param x: `double`
@@ -623,58 +801,55 @@ void polar(
 	bool onx = (y == 0.0);
 	bool ony = (x == 0.0);
 	bool onz = (onx && ony);
-	double rho;
-	if (!(onz || onx || ony)) {
-		rho = sqrt(x * x + y * y);
-		cph = x / rho;
-		sph = y / rho;
-	} else {
-		if (onz) {
-			cph = 1.0;
-			sph = 0.0;
-		} else if (onx) {
-			rho = x;
-			cph = 1.0;
-			if (x < 0.0) {
-				rho *= -1.0;
-				cph *= -1.0;
+	double rho = 0.0;
+	if (!onz) {
+		if (!onx) {
+			if (!ony) {
+				rho = sqrt(x * x + y * y);
+				cph = x / rho;
+				sph = y / rho;
+				// goes to 25
+			} else { // label 20
+				rho = (y > 0.0) ? y : -y;
+				cph = 0.0;
+				sph = (y > 0.0) ? 1.0 : -1.0;
+				// goes to 25
 			}
+		} else { // label 15
+			rho = (x > 0.0) ? x : -x;
+			cph = (x > 0.0) ? 1.0 : -1.0;
 			sph = 0.0;
-		} else if (ony) {
-			rho = y;
-			sph = 1.0;
-			if (y < 0.0) {
-				rho *= -1.0;
-				sph *= -1.0;
-			}
-			cph = 0.0;
+			// goes to 25
 		}
+	} else { // label 10
+		cph = 1.0;
+		sph = 0.0;
+		// goes to 25
 	}
+	// label 25
 	if (z == 0.0) {
 		if (!onz) {
 			r = rho;
 			cth = 0.0;
 			sth = 1.0;
-			return;
-		} else {
+			// returns
+		} else { // label 30
 			r = 0.0;
 			cth = 1.0;
 			sth = 0.0;
-			return;
+			// returns
 		}
-	} else {
+	} else { // label 35
 		if (!onz) {
-			r = sqrt(rho + z * z);
+			r = sqrt(rho * rho + z * z);
 			cth = z / r;
 			sth = rho / r;
-		} else {
-			r = z;
-			cth = 1.0;
+			// returns
+		} else { // label 40
+			r = (z > 0.0) ? z : -z;
+			cth = (z > 0.0) ? 1.0 : -1.0;
 			sth = 0.0;
-			if (z < 0.0) {
-				r *= -1.0;
-				cth *= -1.0;
-			}
+			// returns
 		}
 	}
 }
@@ -689,7 +864,6 @@ void r3j000(int j2, int j3, C6 *c6) {
 	int jmx = j3 + j2;
 	if (jmx <= 0) {
 		c6->rac3j[0] = 1.0;
-		//printf("DEBUG: RAC3J(1) = %lE\n", c6->rac3j[0]);
 		return;
 	}
 	int jmn = j3 - j2;
@@ -702,14 +876,13 @@ void r3j000(int j2, int j3, C6 *c6) {
 		double sj = 1.0 * jf;
 		double cnr = (1 / sqrt(sj)) * isn;
 		c6->rac3j[0] = cnr;
-		//printf("DEBUG: RAC3J(1) = %lE\n", c6->rac3j[0]);
 		return;
 	}
-	double sjr = jf;
+	double sjr = 1.0 * jf;
 	int jmxpos = (jmx + 1) * (jmx + 1);
 	int jmns = jmn * jmn;
 	int j1mo = jmx - 1;
-	int j1s = jmx * jmx; // a slight optimization was applied here
+	int j1s = (j1mo + 1) * (j1mo + 1);
 	double cj = sqrt(1.0 * (jmxpos - j1s) * (j1s - jmns));
 	int j1mos = j1mo * j1mo;
 	double cjmo = sqrt(1.0 * (jmxpos - j1mos) * (j1mos - jmns));
@@ -719,8 +892,6 @@ void r3j000(int j2, int j3, C6 *c6) {
 		double cnr = (1.0 / sqrt(sj)) * isn;
 		c6->rac3j[1] = cnr;
 		c6->rac3j[0] *= cnr;
-		//printf("DEBUG: RAC3J(1) = %lE\n", c6->rac3j[0]);
-		//printf("DEBUG: RAC3J(2) = %lE\n", c6->rac3j[1]);
 		return;
 	}
 	int nj = njmo + 1;
@@ -731,16 +902,17 @@ void r3j000(int j2, int j3, C6 *c6) {
 		int nbr = njmo - nmat;
 		for (int ibr45 = 1; ibr45 <= nbr; ibr45++) {
 			int irr = nj - ibr45;
-			jf = jf -4;
-			j1mo = j1mo - 2;
+			jf -= 4;
+			j1mo -= 2;
 			j1s = (j1mo + 1) * (j1mo + 1);
 			cj = sqrt(1.0 * (jmxpos - j1s) * (j1s - jmns));
 			j1mos = j1mo * j1mo;
-			cjmo = sqrt(1.0 * (jmxpos - j1mos) * (j1s - jmns));
+			cjmo = sqrt(1.0 * (jmxpos - j1mos) * (j1mos - jmns));
 			c6->rac3j[irr - 2] = c6->rac3j[irr - 1] * (-cj / cjmo);
 			sjr = sjr + (c6->rac3j[irr - 1] * c6->rac3j[irr - 1]) * jf;
 		}
 	}
+	// label 50
 	double racmat = c6->rac3j[nmat - 1];
 	sjr = sjr + (racmat * racmat) * (jf - 4);
 	c6->rac3j[0] = 1.0;
@@ -755,8 +927,8 @@ void r3j000(int j2, int j3, C6 *c6) {
 	int nmatmo = nmat - 1;
 	if (nmatmo >= 2) {
 		for (int irl70 = 2; irl70 <= nmatmo; irl70++) {
-			jf = jf + 4;
-			j1pt = j1pt + 2;
+			jf += 4;
+			j1pt += 2;
 			j1pos = (j1pt - 1) * (j1pt - 1);
 			cjpo = sqrt(1.0 * (jmxpos - j1pos) * (j1pos - jmns));
 			j1pts = j1pt * j1pt;
@@ -765,6 +937,7 @@ void r3j000(int j2, int j3, C6 *c6) {
 			sjl = sjl + (c6->rac3j[irl70 - 1] * c6->rac3j[irl70 - 1]) * jf;
 		}
 	}
+	// label 75
 	double ratrac = racmat / c6->rac3j[nmat - 1];
 	double rats = ratrac * ratrac;
 	double sj = sjr + sjl * rats;
@@ -781,76 +954,121 @@ void r3j000(int j2, int j3, C6 *c6) {
 
 /*! \brief C++ porting of STR
  *
+ * \param rcf: `double **` Matrix of double.
  * \param c1: `C1 *` Pointer to a C1 instance.
- * \param c1ao: `C1_AddOns *` Pointer to a C1_AddOns instance.
+ * \param c1ao: `C1_AddOns *` Pointer to C1_AddOns instance.
  * \param c3: `C3 *` Pointer to a C3 instance.
  * \param c4: `C4 *` Pointer to a C4 structure.
  * \param c6: `C6 *` Pointer to a C6 instance.
  */
-void str(C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 *c4, C6 *c6) {
+void str(double **rcf, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 *c4, C6 *c6) {
 	std::complex<double> *ylm;
-	c3 = new C3(); // this makes up for GCS = 0.0D0
+	const double pi = acos(-1.0);
+	c3->gcs = 0.0;
+	for (int i18 = 1; i18 <= c4->nsph; i18++) {
+		int iogi = c1->iog[i18 - 1];
+		if (iogi >= i18) {
+			double gcss = pi * c1->ros[i18 - 1] * c1->ros[i18 - 1];
+			c1->gcsv[i18 - 1] = gcss;
+			int nsh = c1->nshl[i18 - 1];
+			for (int j16 = 1; j16 <= nsh; j16++) {
+				c1->rc[i18 - 1][j16 - 1] = rcf[i18 - 1][j16 - 1] * c1->ros[i18 - 1];
+			} // j16 loop
+			c3->gcs += gcss;
+		}
+	} // i18 loop
+	int ylm_size = (c4->litpos > c4->lmtpos) ? c4->litpos : c4->lmtpos;
+	ylm = new std::complex<double>[ylm_size]();
 	int i = 0;
-	for (int l1po = 1; l1po <= c4->lmpo; l1po++) {
-		int l1 = l1po - 1;
+	for (int l1po28 = 1; l1po28 <= c4->lmpo; l1po28++) {
+		int l1 = l1po28 - 1;
 		for (int l2 = 1; l2 <= c4->lm; l2++) {
 			r3j000(l1, l2, c6);
-			c1ao->ind3j[l1po - 1][l2 - 1] = i;
-			int iabs = l2 - l1;
-			if (iabs < 0) iabs *= -1;
-			int lmnpo = iabs + 1;
+			c1ao->ind3j[l1po28 - 1][l2 - 1] = i;
+			int lmnpo = (l2 > l1) ? l2 - l1 + 1 : l1 - l2 + 1;
 			int lmxpo = l2 + l1 + 1;
+			int lpo28 = lmnpo;
 			int il = 0;
-			int lpo = lmnpo;
-			while (lpo <= lmxpo) {
+			while (lpo28 <= lmxpo) {
 				i++;
 				il++;
 				c1ao->v3j0[i - 1] = c6->rac3j[il - 1];
-				lpo += 2;
+				lpo28 += 2;
 			}
-		}
-	} // l1po loop
+		} // l2 loop
+	} // l1po28 loop
 	int nsphmo = c4->nsph - 1;
-	int lit = c4->li + c4-> li;
-	int lmt = c4->li + c4->le;
-	int size40 = nsphmo * c4->nsph * c4->litpos;
-	int size50 = c4->nsph * c4->lmtpos;
-	int ylm_size = (size40 > size50) ? size40 : size50;
-	ylm = new std::complex<double>[ylm_size];
+	int lit = c4->li + c4->li;
 	int ivy = 0;
-	for (int nf40 = 1; nf40 <= nsphmo; nf40++) {
+	for (int nf40 = 1; nf40 <= nsphmo; nf40++) { // GPU portable?
 		int nfpo = nf40 + 1;
 		for (int ns40 = nfpo; ns40 <= c4->nsph; ns40++) {
 			double rx = c1->rxx[nf40 - 1] - c1->rxx[ns40 - 1];
 			double ry = c1->ryy[nf40 - 1] - c1->ryy[ns40 - 1];
 			double rz = c1->rzz[nf40 - 1] - c1->rzz[ns40 - 1];
-			double rr, crth, srth, crph, srph;
+			double rr = 0.0;
+			double crth = 0.0, srth = 0.0, crph = 0.0, srph = 0.0;
 			polar(rx, ry, rz, rr, crth, srth, crph, srph);
-			//printf("DEBUG: crth = %lE\n", crth);
-			//printf("DEBUG: srth = %lE\n", srth);
-			//printf("DEBUG: crph = %lE\n", crph);
-			//printf("DEBUG: srph = %lE\n", srph);
 			sphar(crth, srth, crph, srph, lit, ylm);
 			for (int iv38 = 1; iv38 <= c4->litpos; iv38++) {
 				c1ao->vyhj[iv38 + ivy - 1] = dconjg(ylm[iv38 - 1]);
 			} // iv38 loop
+			//std::complex<double> svyhj(0.0, 0.0);
+			//int dsize = (c4->nsph * c4->nsph - 1) * c4->litpos;
+			//for (int di = 0; di < dsize; di++) svyhj += c1ao->vyhj[di];
+			//printf("DEBUG: in STR svyhj = (%lE,%lE)\n", svyhj.real(), svyhj.imag());
 			ivy += c4->litpos;
 		} // ns40 loop
 	} // nf40 loop
+	int lmt = c4->li + c4->le;
 	ivy = 0;
 	for (int nf50 = 1; nf50 <= c4->nsph; nf50++) {
 		double rx = c1->rxx[nf50 - 1];
 		double ry = c1->ryy[nf50 - 1];
 		double rz = c1->rzz[nf50 - 1];
-		double rr, crth, srth, crph, srph;
-		if (rx == 0.0 && ry == 0.0 && rz == 0.0) continue;
-		polar(rx, ry, rz, rr, crth, srth, crph, srph);
-		sphar(crth, srth, crph, srph, lmt, ylm);
-		for (int iv48 = 1; iv48 <= c4->lmtpos; iv48++) {
-			c1ao->vyj0[iv48 + ivy - 1] = dconjg(ylm[iv48 - 1]);
-		} // iv48 loop
+		if (rx != 0.0 || ry != 0.0 || rz != 0.0) {
+			double rr = 0.0;
+			double crth = 0.0, srth = 0.0, crph = 0.0, srph = 0.0;
+			polar(rx, ry, rz, rr, crth, srth, crph, srph);
+			sphar(crth, srth, crph, srph, lmt, ylm);
+			for (int iv48 = 1; iv48 <= c4->lmtpos; iv48++) {
+				c1ao->vyj0[iv48 + ivy - 1] = dconjg(ylm[iv48 - 1]);
+			} // iv48 loop
+		}
 		ivy += c4->lmtpos;
 	} // nf50 loop
+	delete[] ylm;
+}
+
+/*! \brief Write a matrix to a log file (debug function).
+ *
+ *  \param mat: Matrix of complex.
+ *  \param rows: `int`
+ *  \param cols: `int`
+ */
+void logmat(std::complex<double> **mat, int rows, int cols) {
+	FILE* logfile = fopen("c_matrix.log", "w");
+	for (int i = 0; i < rows; i++) {
+		for (int j = 0; j < cols; j++) {
+			fprintf(logfile, "R:%3d,%3d,%15.7lE\n", i + 1, j + 1, mat[i][j].real());
+			fprintf(logfile, "I:%3d,%3d,%15.7lE\n", i + 1, j + 1, mat[i][j].imag());
+		}
+	}
+	fclose(logfile);
+}
+
+/*! \brief Sum all the elements of a matrix (debug function).
+ *
+ *  \param mat: Matrix of complex.
+ *  \param rows: `int`
+ *  \param cols: `int`
+ */
+std::complex<double> summat(std::complex<double> **mat, int rows, int cols) {
+	std::complex<double> result(0.0, 0.0);
+	for (int i = 0; i < rows; i++) {
+		for (int j = 0; j < cols; j++) result += mat[i][j];
+	}
+	return result;
 }
 
 #endif
diff --git a/src/include/sph_subs.h b/src/include/sph_subs.h
index 484472df369d3419d1873b9867c5975095fd8527..2f1e1ebbf7c2695ceeec7d9c2d4e560b09c29e8a 100644
--- a/src/include/sph_subs.h
+++ b/src/include/sph_subs.h
@@ -1046,14 +1046,13 @@ label47:
  * \param zpv: `double ****`
  */
 void thdps(int lm, double ****zpv) {
-	// WARNING: unclear nested loop in THDPS
-	// The optimized interpretation was adopted here.
-	for (int l10 = 1; l10 <= lm; l10++) {
-		for (int ilmp = 1; ilmp <= 3; ilmp++) {
-			zpv[l10 - 1][ilmp - 1][0][0] = 0.0;
-			zpv[l10 - 1][ilmp - 1][0][1] = 0.0;
-			zpv[l10 - 1][ilmp - 1][1][0] = 0.0;
-			zpv[l10 - 1][ilmp - 1][1][1] = 0.0;
+	//double szpv = 0.0;
+	for (int l10 = 0; l10 < lm; l10++) {
+		for (int ilmp = 0; ilmp < 3; ilmp++) {
+			zpv[l10][ilmp][0][0] = 0.0;
+			zpv[l10][ilmp][0][1] = 0.0;
+			zpv[l10][ilmp][1][0] = 0.0;
+			zpv[l10][ilmp][1][1] = 0.0;
 		}
 	}
 	for (int l15 = 1; l15 <= lm; l15++) {
@@ -1061,6 +1060,7 @@ void thdps(int lm, double ****zpv) {
 		double zp = -1.0 / sqrt(xd);
 		zpv[l15 - 1][1][0][1] = zp;
 		zpv[l15 - 1][1][1][0] = zp;
+		//szpv += 2.0 * zp;
 	}
 	if (lm != 1) {
 		for (int l20 = 2; l20 <= lm; l20++) {
@@ -1069,6 +1069,7 @@ void thdps(int lm, double ****zpv) {
 			double zp = sqrt(xn / xd);
 			zpv[l20 - 1][0][0][0] = zp;
 			zpv[l20 - 1][0][1][1] = zp;
+			//szpv += 2.0 * zp;
 		}
 		int lmmo = lm - 1;
 		for (int l25 = 1; l25 <= lmmo; l25++) {
@@ -1077,8 +1078,10 @@ void thdps(int lm, double ****zpv) {
 			double zp = -1.0 * sqrt(xn / xd);
 			zpv[l25 - 1][2][0][0] = zp;
 			zpv[l25 - 1][2][1][1] = zp;
+			//szpv += 2.0 * zp;
 		}
 	}
+	//printf("DEBUG: szpv = %lE\n", szpv);
 }
 
 /*! \brief C++ porting of UPVMP
@@ -1412,9 +1415,7 @@ void dme(
 	}
 	for (int j43 = 1; j43 <= lipt; j43++) {
 		fb[j43 - 1] = rfj[j43 - 1];
-		//printf("DEBUG: fb[%d] = (%lE,%lE)\n", j43, fb[j43 - 1].real(), fb[j43 - 1].imag());
 		fn[j43 - 1] = rfn[j43 - 1];
-		//printf("DEBUG: fn[%d] = (%lE,%lE)\n", j43, fn[j43 - 1].real(), fn[j43 - 1].imag());
 	}
 	if (nsh <= 1) {
 		cri = c2->dc0[0] / exdc;
@@ -1429,7 +1430,6 @@ void dme(
 			ccnb = fn[lpo - 1] * dfbi;
 			ccnc = fbi[lpo - 1] * dfb;
 			ccnd = fb[lpo - 1] * dfbi;
-			// CLUSTER terminated in this loop with li = 8.
 			c1->rmi[l60 - 1][i - 1] = 1.0 + uim * (ccna - ccnb) / (ccnc - ccnd);
 			c1->rei[l60 - 1][i - 1] = 1.0 + uim * (cri * ccna - ccnb) / (cri * ccnc - ccnd);
 		}
diff --git a/src/libnptm/Commons.cpp b/src/libnptm/Commons.cpp
index 342381f8eec4ac6a21944bc3e0fb5b6104f935c4..1d67281b19812dd194fd00dd01e844888d443ab4 100644
--- a/src/libnptm/Commons.cpp
+++ b/src/libnptm/Commons.cpp
@@ -25,39 +25,39 @@ C1::C1(int ns, int l_max, int *_nshl, int *_iog) {
 	rmi = new complex<double>*[lm];
 	rei = new complex<double>*[lm];
 	for (int ri = 0; ri < lm; ri++) {
-		rmi[ri] = new complex<double>[nsph];
-		rei[ri] = new complex<double>[nsph];
+		rmi[ri] = new complex<double>[nsph]();
+		rei[ri] = new complex<double>[nsph]();
 	}
 	w = new complex<double>*[nlmmt];
-	for (int wi = 0; wi < nlmmt; wi++) w[wi] = new complex<double>[4];
+	for (int wi = 0; wi < nlmmt; wi++) w[wi] = new complex<double>[4]();
 	vints = new complex<double>*[nsph];
 	rc = new double*[nsph];
-	nshl = new int[nsph];
-	iog = new int[nsph];
+	nshl = new int[nsph]();
+	iog = new int[nsph]();
 	for (int vi = 0; vi < nsph; vi++) {
-		rc[vi] = new double[_nshl[vi]];
-		vints[vi] = new complex<double>[16];
+		rc[vi] = new double[_nshl[vi]]();
+		vints[vi] = new complex<double>[16]();
 		nshl[vi] = _nshl[vi];
 		iog[vi] = _iog[vi];
 	}
-	fsas = new complex<double>[nsph];
-	sscs = new double[nsph];
-	sexs = new double[nsph];
-	sabs = new double[nsph];
-	sqscs = new double[nsph];
-	sqexs = new double[nsph];
-	sqabs = new double[nsph];
-	gcsv = new double[nsph];;
-	rxx = new double[nsph];
-	ryy = new double[nsph];
-	rzz = new double[nsph];
-	ros = new double[nsph];
+	fsas = new complex<double>[nsph]();
+	sscs = new double[nsph]();
+	sexs = new double[nsph]();
+	sabs = new double[nsph]();
+	sqscs = new double[nsph]();
+	sqexs = new double[nsph]();
+	sqabs = new double[nsph]();
+	gcsv = new double[nsph]();
+	rxx = new double[nsph]();
+	ryy = new double[nsph]();
+	rzz = new double[nsph]();
+	ros = new double[nsph]();
 
 	sas = new complex<double>**[nsph];
 	for (int si = 0; si < nsph; si++) {
 		sas[si] = new complex<double>*[2];
-		sas[si][0] = new complex<double>[2];
-		sas[si][1] = new complex<double>[2];
+		sas[si][0] = new complex<double>[2]();
+		sas[si][1] = new complex<double>[2]();
 	}
 }
 
@@ -97,11 +97,11 @@ C1_AddOns::C1_AddOns(C4 *c4) {
 	nsph = c4->nsph;
 	lmpo = c4->lmpo;
 	nlemt = 2 * c4->nlem;
-	vh = new complex<double>[(nsph * nsph - 1) * c4->litpo];
+	vh = new complex<double>[(nsph * nsph - 1) * c4->litpo]();
 	vj0 = new complex<double>[nsph * c4->lmtpo];
 	vj = new complex<double>[1]; // QUESTION: is 1 really enough for a general case?
-	vyhj = new complex<double>[nsph * (nsph - 1) * c4->litpo];
-	vyj0 = new complex<double>[nsph * c4->lmtpo];
+	vyhj = new complex<double>[(nsph * nsph - 1) * c4->litpos];
+	vyj0 = new complex<double>[nsph * c4->lmtpos];
 	am0m = new complex<double>*[nlemt];
 	for (int ai = 0; ai < nlemt; ai++) {
 		am0m[ai] = new complex<double>[nlemt];
@@ -156,50 +156,29 @@ C1_AddOns::~C1_AddOns() {
 }
 
 void C1_AddOns::allocate_vectors(C4 *c4) {
-	int i = 0;
-	int l1po_max = 0, l2_max = 0;
-	int i_max = 0;
-	// This loop calculates NV3J, the required size of v3j0
-	for (int l1po = 1; l1po <= c4->lmpo; l1po++) {
-		int l1 = l1po - 1;
-		for (int l2 = 1; l2 <= c4->lm; l2++) {
-			if (l1po > l1po_max) l1po_max = l1po;
-			if (l2 > l2_max) l2_max = l2;
-			int iabs = l2 - l1;
-			if (iabs < 0) iabs *= -1;
-			int lmnpo = iabs + 1;
-			int lmxpo = l2 + l1 + 1;
-			int il = 0;
-			int lpo = lmnpo;
-			while (lpo <= lmxpo) {
-				i++;
-				il++;
-				if (i > i_max) i_max = i;
-				lpo += 2;
-			}
-		}
-	} // l1po loop
-	v3j0 = new double[i_max];
+	// This calculates the size of v3j0
+	int lm = (c4->li > c4->le) ? c4->li : c4->le;
+	const int nv3j = c4->nv3j;
+	v3j0 = new double[nv3j]();
 	ind3j = new int*[lmpo];
-	for (int ii = 0; ii < lmpo; ii++) ind3j[ii] = new int[c4->lm];
+	for (int ii = 0; ii < lmpo; ii++) ind3j[ii] = new int[c4->lm]();
 	// This calculates the size of vyhj
-	int nsphmo = c4->nsph - 1;
-	int ivy = nsphmo * nsph * c4->litpos;
-	vyhj = new complex<double>[ivy];
+	int ivy = (nsph * nsph - 1) * c4->litpos;
+	vyhj = new complex<double>[ivy]();
 	// This calculates the size of vyj0
 	ivy = c4->lmtpos * c4->nsph;
-	vyj0 = new complex<double>[ivy];
+	vyj0 = new complex<double>[ivy]();
 }
 
 C2::C2(int ns, int nl, int npnt, int npntts) {
 	nsph = ns;
 	int max_n = (npnt > npntts) ? npnt : npntts;
 	nhspo = 2 * max_n - 1;
-	ris = new complex<double>[nhspo];
-	dlri = new complex<double>[nhspo];
-	vkt = new complex<double>[nsph];
-	dc0 = new complex<double>[nl];
-	vsz = new double[nsph];
+	ris = new complex<double>[nhspo]();
+	dlri = new complex<double>[nhspo]();
+	vkt = new complex<double>[nsph]();
+	dc0 = new complex<double>[nl]();
+	vsz = new double[nsph]();
 }
 
 C2::~C2() {
@@ -228,7 +207,7 @@ C3::~C3() {
 }
 
 C6::C6(int lmtpo) {
-	rac3j = new double[lmtpo];
+	rac3j = new double[lmtpo]();
 }
 
 C6::~C6() {
@@ -239,11 +218,11 @@ C9::C9(int ndi, int nlem, int ndit, int nlemt) {
 	gis = new complex<double>*[ndi];
 	gls = new complex<double>*[ndi];
 	for (int gi = 0; gi < ndi; gi++) {
-		gis[gi] = new complex<double>[nlem];
-		gls[gi] = new complex<double>[nlem];
+		gis[gi] = new complex<double>[nlem]();
+		gls[gi] = new complex<double>[nlem]();
 	}
 	sam = new complex<double>*[ndit];
-	for (int si = 0; si < ndit; si++) sam[si] = new complex<double>[nlemt];
+	for (int si = 0; si < ndit; si++) sam[si] = new complex<double>[nlemt]();
 	gis_size_0 = ndi;
 	sam_size_0 = ndit;
 }
diff --git a/src/np_cluster.cpp b/src/np_cluster.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..16abf18d024e5c208e7f251cf1cc183e2548545b
--- /dev/null
+++ b/src/np_cluster.cpp
@@ -0,0 +1,28 @@
+/*! \file np_sphere.cpp
+ */
+
+#include <cstdio>
+#include <string>
+#ifndef INCLUDE_CONFIGURATION_H_
+#include "include/Configuration.h"
+#endif
+
+using namespace std;
+
+extern void cluster();
+extern void sphere();
+
+/*! \brief Main program entry point.
+ *
+ * This is the starting point of the execution flow. Here we may choose
+ * how to configure the code, e.g. by loading a legacy configuration file
+ * or some otherwise formatted configuration data set. The code can be
+ * linked to a luncher script or to a GUI oriented application that performs
+ * the configuration and runs the main program.
+ */
+int main(int argc, char **argv) {
+	bool is_sphere = false;
+	if (is_sphere) sphere();
+	else cluster();
+	return 0;
+}