diff --git a/src/vect.1b.c b/src/vect.1b.c
index 757d5ce04f5cd2ae3023470fcf87d2a63af6b7ef..3d2af740217fd997b9f183c829c6d60ba279a584 100644
--- a/src/vect.1b.c
+++ b/src/vect.1b.c
@@ -321,7 +321,7 @@ void process_with_vectors_intrinsics( const double * restrict V,
       //
       // get [ dx^2+dy^2+dz^2, .., .., .. ]
       vdist3 = _mm256_hadd_pd        ( vdist3, vdist3      );
-      vdist3 = _mm256_mul_pd         ( vdist3, _mm256_sqrt_pd(vdist3) );
+      vdist3 = _mm256_mul_pd         ( vdist3, VSQRT(vdist3) );
       
       __m256d mprod;
       double  m1m2;
@@ -620,7 +620,7 @@ void process_with_arrays_intrinsics( const double * restrict pos_x,
       __m256d  mmass = mtarget * ngbm;
       
       __m256d dist3 = deltax*deltax + deltay*deltay + deltaz*deltaz;
-      dist3 = 1.0 / (dist3 * _mm256_sqrt_pd(dist3));
+      dist3 = 1.0 / (dist3 * VSQRT(dist3));
 
       vforcex += deltax * mmass * dist3;
       vforcey += deltay * mmass * dist3;
@@ -1080,7 +1080,7 @@ int main( int argc, char **argv )
 	      this_timing = chrono; }
 
 	  if ( shot == 1 )
-	    PAPI_STOP_CNTR;	    
+	    PAPI_STOP_CNTR;
 	  	  
 	}
 
@@ -1152,8 +1152,8 @@ int main( int argc, char **argv )
  #if defined(USE_PAPI)
   if ( case_to_run > 0 )
     for ( int i = 0; i < PAPI_EVENTS_NUM; i++ )
-      printf("PAPI event %d: %llu\n",
-	     i, (unsigned long long)papi_values[i]);
+      printf("[ PAPI event %d ] >   %15s : %llu\n",
+	     i, papi_eventnames[i], (unsigned long long)papi_values[i]);
  #endif
 
   
diff --git a/src/vect.2b.c b/src/vect.2b.c
index 181fe49d29c7e300d7723455f69db0127b00f038..63f3354c94c5c2d099b8e53595bc737708bcc25e 100644
--- a/src/vect.2b.c
+++ b/src/vect.2b.c
@@ -337,7 +337,7 @@ void process_with_vectors_intrinsics( const double * restrict V,
       //
       // get [ dx^2+dy^2+dz^2, .., .., .. ]
       vdist3 = _mm256_hadd_pd        ( vdist3, vdist3    );
-      vdist3 = _mm256_mul_pd         ( vdist3, _mm256_sqrt_pd(vdist3) );
+      vdist3 = _mm256_mul_pd         ( vdist3, VSQRT(vdist3) );
       
       __m256d mprod;
       double m1m2;
@@ -518,6 +518,9 @@ void process_with_arrays_vectors( const double * restrict pos_x,
   dvector_t register vtargetz ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(pos_z[target]);
   dvector_t register vtargetm ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(mass[target]);
   dvector_t          one      ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(1.0);
+  dvector_t          vforcex  ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0);
+  dvector_t          vforcey  ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0);
+  dvector_t          vforcez  ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0);
  #else
  #define X(v) (pos_x[target])
   dvector_t vtargetx ATTRIBUTE_ALIGNED(VALIGN) = INIT_VECTOR(DVSIZE);
@@ -561,7 +564,7 @@ void process_with_arrays_vectors( const double * restrict pos_x,
       dvector_t dist2z = distz*distz;
 
       dist2x = (dist2x + dist2y) + dist2z;
-      dvector_t inv_dist3 = one / (dist2x * _mm256_sqrt_pd(dist2x));
+      dvector_t inv_dist3 = one / (dist2x * VSQRT(dist2x));
       
       vforcex += distx * (vtargetm * _vmemm[i]) * inv_dist3;
       vforcey += disty * (vtargetm * _vmemm[i]) * inv_dist3;
@@ -999,8 +1002,8 @@ int main( int argc, char **argv )
  #if defined(USE_PAPI)
   if ( case_to_run > 0 )
     for ( int i = 0; i < PAPI_EVENTS_NUM; i++ )
-      printf("PAPI event %d: %llu\n",
-	     i, (unsigned long long)papi_values[i]);
+      printf("[ PAPI event %d ] >   %15s : %llu\n",
+	     i, papi_eventnames[i], (unsigned long long)papi_values[i]);
  #endif