diff --git a/src/vect.1b.c b/src/vect.1b.c index 757d5ce04f5cd2ae3023470fcf87d2a63af6b7ef..3d2af740217fd997b9f183c829c6d60ba279a584 100644 --- a/src/vect.1b.c +++ b/src/vect.1b.c @@ -321,7 +321,7 @@ void process_with_vectors_intrinsics( const double * restrict V, // // get [ dx^2+dy^2+dz^2, .., .., .. ] vdist3 = _mm256_hadd_pd ( vdist3, vdist3 ); - vdist3 = _mm256_mul_pd ( vdist3, _mm256_sqrt_pd(vdist3) ); + vdist3 = _mm256_mul_pd ( vdist3, VSQRT(vdist3) ); __m256d mprod; double m1m2; @@ -620,7 +620,7 @@ void process_with_arrays_intrinsics( const double * restrict pos_x, __m256d mmass = mtarget * ngbm; __m256d dist3 = deltax*deltax + deltay*deltay + deltaz*deltaz; - dist3 = 1.0 / (dist3 * _mm256_sqrt_pd(dist3)); + dist3 = 1.0 / (dist3 * VSQRT(dist3)); vforcex += deltax * mmass * dist3; vforcey += deltay * mmass * dist3; @@ -1080,7 +1080,7 @@ int main( int argc, char **argv ) this_timing = chrono; } if ( shot == 1 ) - PAPI_STOP_CNTR; + PAPI_STOP_CNTR; } @@ -1152,8 +1152,8 @@ int main( int argc, char **argv ) #if defined(USE_PAPI) if ( case_to_run > 0 ) for ( int i = 0; i < PAPI_EVENTS_NUM; i++ ) - printf("PAPI event %d: %llu\n", - i, (unsigned long long)papi_values[i]); + printf("[ PAPI event %d ] > %15s : %llu\n", + i, papi_eventnames[i], (unsigned long long)papi_values[i]); #endif diff --git a/src/vect.2b.c b/src/vect.2b.c index 181fe49d29c7e300d7723455f69db0127b00f038..63f3354c94c5c2d099b8e53595bc737708bcc25e 100644 --- a/src/vect.2b.c +++ b/src/vect.2b.c @@ -337,7 +337,7 @@ void process_with_vectors_intrinsics( const double * restrict V, // // get [ dx^2+dy^2+dz^2, .., .., .. ] vdist3 = _mm256_hadd_pd ( vdist3, vdist3 ); - vdist3 = _mm256_mul_pd ( vdist3, _mm256_sqrt_pd(vdist3) ); + vdist3 = _mm256_mul_pd ( vdist3, VSQRT(vdist3) ); __m256d mprod; double m1m2; @@ -518,6 +518,9 @@ void process_with_arrays_vectors( const double * restrict pos_x, dvector_t register vtargetz ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(pos_z[target]); dvector_t register vtargetm ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(mass[target]); dvector_t one ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(1.0); + dvector_t vforcex ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0); + dvector_t vforcey ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0); + dvector_t vforcez ATTRIBUTE_ALIGNED(VALIGN) = (dvector_t)(0.0); #else #define X(v) (pos_x[target]) dvector_t vtargetx ATTRIBUTE_ALIGNED(VALIGN) = INIT_VECTOR(DVSIZE); @@ -561,7 +564,7 @@ void process_with_arrays_vectors( const double * restrict pos_x, dvector_t dist2z = distz*distz; dist2x = (dist2x + dist2y) + dist2z; - dvector_t inv_dist3 = one / (dist2x * _mm256_sqrt_pd(dist2x)); + dvector_t inv_dist3 = one / (dist2x * VSQRT(dist2x)); vforcex += distx * (vtargetm * _vmemm[i]) * inv_dist3; vforcey += disty * (vtargetm * _vmemm[i]) * inv_dist3; @@ -999,8 +1002,8 @@ int main( int argc, char **argv ) #if defined(USE_PAPI) if ( case_to_run > 0 ) for ( int i = 0; i < PAPI_EVENTS_NUM; i++ ) - printf("PAPI event %d: %llu\n", - i, (unsigned long long)papi_values[i]); + printf("[ PAPI event %d ] > %15s : %llu\n", + i, papi_eventnames[i], (unsigned long long)papi_values[i]); #endif