diff --git a/src/libnptm/clu_subs.cpp b/src/libnptm/clu_subs.cpp
index 9a09a5960e38096423c95e1f9f32d028db692c97..f10d3f788ac77b67adfc917f11d15895ace741a8 100644
--- a/src/libnptm/clu_subs.cpp
+++ b/src/libnptm/clu_subs.cpp
@@ -47,7 +47,9 @@
 #include <omp.h>
 #endif
 
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp requires unified_shared_memory
+#endif
 
 using namespace std;
 
@@ -405,7 +407,9 @@ dcomplex cdtp(dcomplex z, dcomplex **am, int i, int jf, int k, int nj) {
   return result;
 }
 
-# pragma omp begin declare target device_type(any)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp begin declare target device_type(any)
+#endif
 double cgev(int ipamo, int mu, int l, int m) {
   double result = 0.0;
   double xd = 0.0, xn = 0.0;
@@ -439,7 +443,9 @@ double cgev(int ipamo, int mu, int l, int m) {
   }
   return result;
 }
-# pragma omp end declare target
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp end declare target
+#endif
 
 void cms(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
   dcomplex dm, de, cgh, cgk;
@@ -643,7 +649,9 @@ void crsm1(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) {
   delete[] svs;
 }
 
-# pragma omp begin declare target device_type(any)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp begin declare target device_type(any)
+#endif
 dcomplex ghit_d(
 	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1,
 	      C1_AddOns *c1ao, C4 *c4, double *rac3j
@@ -854,9 +862,13 @@ dcomplex ghit_d(
   }
   return result;
 }
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp end declare target
+#endif
 
-# pragma omp begin declare target device_type(any)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp begin declare target device_type(any)
+#endif
 dcomplex ghit(
 	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1,
 	      C1_AddOns *c1ao, C4 *c4, C6 *c6
@@ -1067,7 +1079,9 @@ dcomplex ghit(
   }
   return result;
 }
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp end declare target
+#endif
 
 void hjv(
 	 double exri, double vk, int &jer, int &lcalc, dcomplex &arg,
@@ -1325,12 +1339,13 @@ void pcros(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4) {
 #ifdef USE_NVTX
   nvtxRangePush("pcros intermediate loop 1");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
-    for (int i12 = 0; i12 < nlemt; i12++) {
+#endif
+  for (int i12 = 0; i12 < nlemt; i12++) {
       // int i = i12 - 1;
       dcomplex am = cc0;
       dcomplex amp = cc0;
-      //#pragma omp target teams distribute parallel for simd reduction(+:am,amp)
       for (int j10 = 0; j10 < nlemt; j10++) {
 	// int j = j10 - 1;
 	am += (vec_am0m[nlemt*i12+j10] * vec_w[4*j10+ipo18]);
@@ -1391,7 +1406,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4)
   csam = -(ccs / (exri * vk)) * 0.5 * I;
   sum2 = cc0;
   sum3 = cc0;
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3)
+#endif
   for (int i14 = 0; i14 < c4->nlem; i14++) { 
     int ie = i14 + c4->nlem;
     sum2 += (vec_am0m[nlemt*i14 + i14] + vec_am0m[nlemt*ie + ie]);
@@ -1399,7 +1416,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4)
   } // i14 loop
   double sumpi = 0.0;
   dcomplex sumpd = cc0;
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd)
+#endif
   for (int i16 = 0; i16 < nlemt; i16++) {
     for (int j16 = 0; j16 < c4->nlem; j16++) {
       int je = j16 + c4->nlem;
@@ -1599,7 +1618,9 @@ void r3j000(int j2, int j3, C6 *c6) {
   }
 }
 
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp begin declare target device_type(any)
+#endif
 void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
   int jmx = j3 + j2;
   int jdf = j3 - j2;
@@ -1717,9 +1738,13 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) {
     }
   }
 }
-# pragma omp end declare target
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp end declare target
+#endif
 
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp begin declare target device_type(any)
+#endif
 void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) {
   int jmx = j3 + j2;
   int jdf = j3 - j2;
@@ -1837,7 +1862,9 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) {
     }
   }
 }
-# pragma omp end declare target
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp end declare target
+#endif
 
 void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) {
   int mmx = (j2 < j3 - m1) ? j2 : j3 - m1;
@@ -1968,8 +1995,10 @@ void raba(
 #ifdef USE_NVTX
   nvtxRangePush("raba inner loop 1");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:c1, c2)
-    for (int j10 = 1; j10 <= nlemt; j10++) {
+#endif
+  for (int j10 = 1; j10 <= nlemt; j10++) {
       int j = j10 - 1;
       c1 += (vec_am0m[i*nlemt+j] * vec_w[4*j]);
       c2 += (vec_am0m[i*nlemt+j] * vec_w[4*j+1]);
@@ -2018,8 +2047,10 @@ void raba(
 #ifdef USE_NVTX
   nvtxRangePush("raba inner loop 2");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
-    for (int k = 1; k<=kmax; k++) {
+#endif
+  for (int k = 1; k<=kmax; k++) {
       int l60 = (int) sqrt(k+1);
       int im60 = k - (l60*l60) + 1;
       if (im60 == 0) {
@@ -2091,7 +2122,9 @@ void raba(
 #ifdef USE_NVTX
   nvtxRangePush("raba loop 3");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp teams distribute parallel for simd
+#endif
   for (int ipo78 = 1; ipo78 <= 2; ipo78++) {
     int ipo = ipo78 - 1;
     tqce[ipo][0] = real(ctqce[ipo][0] - ctqce[ipo][2]) * sq2i;
@@ -2161,7 +2194,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) {
 #ifdef USE_NVTX
       nvtxRangePush("scr0 inner loop 1");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)
+#endif
       for (int l10 = 1; l10 <= c4->li; l10++) {
 	double fl = 1.0 * (l10 + l10 + 1);
 	// dcomplex rm = 1.0 / c1->rmi[l10 - 1][i14 - 1];
@@ -2203,9 +2238,11 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) {
   double acs = 0.0;
   dcomplex tfsas = cc0;
 #ifdef USE_NVTX
-      nvtxRangePush("scr0 loop 2");
+  nvtxRangePush("scr0 loop 2");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)
+#endif
   for (int i14 = 1; i14 <= c4->nsph; i14++) {
     int iogi = c1->iog[i14 - 1];
     scs += c1->sscs[iogi - 1];
@@ -2265,9 +2302,11 @@ void scr2(
       // furthermore if it results im10 > 2*l10+1, then we set
       // im10 = im10 -(2*l10+1) and l10 = l10+1 (there was a rounding error in a nearly exact root)
 #ifdef USE_NVTX
-  nvtxRangePush("scr2 inner loop 1");
+      nvtxRangePush("scr2 inner loop 1");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22)
+#endif
       for (int k = 1; k<=kmax; k++) {
 	int l10 = (int) sqrt(k+1);
 	int im10 = k - (l10*l10) + 1;
@@ -2319,7 +2358,9 @@ void scr2(
 #ifdef USE_NVTX
   nvtxRangePush("scr2 loop 2");
 #endif
-  #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
+#endif
   for (int i14 = 1; i14 <= c4->nsph; i14++) {
     int i = i14 - 1;
     int iogi = c1->iog[i14 - 1];
@@ -2349,8 +2390,10 @@ void scr2(
 #ifdef USE_NVTX
   nvtxRangePush("scr2 inner loop 3");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target teams distribute parallel for simd collapse(4)
-      for (int ipo1 = 1; ipo1 <=2; ipo1++) {
+#endif
+  for (int ipo1 = 1; ipo1 <=2; ipo1++) {
 	for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
 	  for (int ipo2 = 1; ipo2 <= 2; ipo2++) {
 	    for (int jpo2 = 1; jpo2 <= 2; jpo2++) {
@@ -2372,7 +2415,9 @@ void scr2(
 #ifdef USE_NVTX
   nvtxRangePush("scr2 loop 4");
 #endif
+#ifdef USE_TARGET_OFFLOAD
 #pragma omp target parallel for collapse(4)
+#endif
   for (int ipo1 = 1; ipo1 <=2; ipo1++) {
     for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
       for (int ipo2 = 1; ipo2 <= 2; ipo2++) {
@@ -2505,7 +2550,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) {
   // but if it results im = 0, then we set l = l-1 and im = 2*l+1
   // furthermore if it results im > 2*l+1, then we set
   // im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root)
-# pragma omp target teams distribute parallel for simd collapse(3)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp target teams distribute parallel for simd collapse(3)
+#endif
   for (int n2 = 1; n2 <= c4->nsph; n2++) { // GPU portable?
     for (int k2 = 1; k2<=k2max; k2++) {
       for (int k3 = 1; k3<=k3max; k3++) {
@@ -2550,7 +2597,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) {
 #endif
   dcomplex *am_v = am[0];
   dcomplex *sam_v = c9->sam[0];
-# pragma omp target teams distribute parallel for simd collapse(2)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp target teams distribute parallel for simd collapse(2)
+#endif
   for (int i1 = 1; i1 <= ndi; i1++) { // GPU portable?
     for (int i3 = 1; i3 <= c4->nlem; i3++) {
       dcomplex sum1 = cc0;
@@ -2584,7 +2633,7 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) {
       sam_v[vecind1e + i3e - 1] = sum4;
     } // i3 loop
   } // i1 loop
-# pragma omp parallel for collapse(2)
+#pragma omp parallel for collapse(2)
   for (int i1 = 1; i1 <= ndi; i1++) {
     for (int i0 = 1; i0 <= c4->nlem; i0++) {
       int vecindex = (i1 - 1)*c9->nlem + i0 - 1;
@@ -2596,7 +2645,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) {
   } // i1 loop
   int nlemt = c4->nlem + c4->nlem;
   dcomplex *am0m_v = c1ao->am0m[0];
-# pragma omp target parallel for collapse(2)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp target parallel for collapse(2)
+#endif
   for (int i0 = 1; i0 <= c4->nlem; i0++) {
     for (int i3 = 1; i3 <= nlemt; i3++) {
       int i0e = i0 + c4->nlem;
diff --git a/src/libnptm/sph_subs.cpp b/src/libnptm/sph_subs.cpp
index 6e1cde3681e548e01b395ecf2e8430599d3cdeaa..63685f97af4e18be3d10718b6495293a7509ea6d 100644
--- a/src/libnptm/sph_subs.cpp
+++ b/src/libnptm/sph_subs.cpp
@@ -197,13 +197,17 @@ double cg1(int lmpml, int mu, int l, int m) {
   return result;
 }
 
-# pragma omp begin declare target device_type(any)
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp begin declare target device_type(any)
+#endif
 dcomplex dconjg(dcomplex z) {
   double zreal = real(z);
   double zimag = imag(z);
   return (zreal - zimag * I);
 }
-# pragma omp end declare target
+#ifdef USE_TARGET_OFFLOAD
+#pragma omp end declare target
+#endif
 
 void diel(int npntmo, int ns, int i, int ic, double vk, C1 *c1, C2 *c2) {
   const double dif = c1->rc[i - 1][ns] - c1->rc[i - 1][ns - 1];
diff --git a/src/libnptm/types.cpp b/src/libnptm/types.cpp
index fcd571d05a07dbf017a741b0363b0eccd02a1dbb..4b2bd23372e899f382d25fed87bee2132b409f61 100644
--- a/src/libnptm/types.cpp
+++ b/src/libnptm/types.cpp
@@ -22,10 +22,18 @@
 #include "../include/types.h"
 #endif
 
+#ifdef USE_TARGET_OFFLOAD
 # pragma omp begin declare target device_type(any)
+#endif
 double real(dcomplex z) { return __real__ z; }
+#ifdef USE_TARGET_OFFLOAD
 # pragma omp end declare target
+#endif
 
+#ifdef USE_TARGET_OFFLOAD
 # pragma omp begin declare target device_type(any)
+#endif
 double imag(dcomplex z) { return __imag__ z; }
+#ifdef USE_TARGET_OFFLOAD
 # pragma omp end declare target
+#endif