diff --git a/headers/mypapi.h b/headers/mypapi.h
index 97b3fbac8390e8c2465338d70cd0691d80c749bf..ae8cf46ad6813b54952ee64b0c8c5b4bb6f33a0b 100644
--- a/headers/mypapi.h
+++ b/headers/mypapi.h
@@ -27,7 +27,7 @@ const unsigned int native_dp_scalar = (((0xc7 & 0xffffff)<<8) | (0xfc & 0xff));
 
 
 #if defined(PAPI_MYLAPTOP)
-
+#warning "using LOCAL events set"
 #define PAPI_EVENTS_NUM 8
 char     *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles",
 					      "L2D accesses", "L2D misses",
@@ -44,7 +44,7 @@ char     *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC",
 					       "FP_ARITH:256B_PACKED_DOUBLE" };
 
 #elif defined(PAPI_LEONARDO_DCGP)
-
+#warning "using LEONARDO_DCGP events set"
 #define PAPI_EVENTS_NUM 9
 char     *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles",
 					      "L2D accesses", "L2D misses",
@@ -62,6 +62,17 @@ char     *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC",
 					       "FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE",
 					       "FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE" };
 
+#elif defined(PAPI_LUMI_C)
+#warning "using LUMI_C events set"
+#define PAPI_EVENTS_NUM 7
+char     *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles",
+					      "L2D accesses", "L2D misses",
+					      "FP INS", "VFP INS", "FP OPS" };
+
+char     *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC",
+					       "PAPI_L2_DCA", "PAPI_L2_LDM",
+					       "PAPI_FP_INS", "PAPI_VEC_INS", "PAPI_FP_OPS" };
+
 #else
 
 #error "please define the system at command line -DPAPI_MY_LAPTOP or -DPAPI_LEONARDO_DCGP"
diff --git a/src/compile b/src/compile
index f524b82d9864eac408ac63e2f0cda5987765a775..e33555334b3ae1895d6cb373221210b721a27848 100755
--- a/src/compile
+++ b/src/compile
@@ -40,14 +40,27 @@ PAPI_LIB=" -lpapi "
 declare -A OPTIMIZATIONS
 OPTIMIZATIONS[gcc]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops "
 OPTIMIZATIONS[icx]=" -O3 -xHost -vec -axCORE-AVX2,CORE-AVX512 "
-OPTIMIZATIONS[cc]=" -O3 "
+OPTIMIZATIONS[clang]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops "
+OPTIMIZATIONS[aocc]=" -Ofast -m64 -freciprocal-math -flto -fstruct-layout=9 -flto -fremap-arrays -mllvm -enable-X86-prefetching -fnt-store -fnt-store=aggressive -mllvm -merge-constant -mrecip=all -mllvm -optimize-strided-mem-cost -mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop  "
+OPTIMIZATIONS[aocc_all]=" -Ofast -m64 -freciprocal-math -flto -fstruct-layout=9 -flto -fremap-arrays -mllvm -enable-X86-prefetching -fnt-store -fnt-store=aggressive -mllvm -merge-constant -mrecip=all -fscalar-transform -fvector-transform -floop-transform -faggressive-loop-transform -mllvm -optimize-strided-mem-cost -mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop  "
+OPTIMIZATIONS[cc]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops "
 
+declare -A COMPILER_EXEC
+COMPILER_EXEC[gcc]="gcc"
+COMPILER_EXEC[icx]="icx"
+COMPILER_EXEC[clang]="clang"
+COMPILER_EXEC[aocc]="cc"
+COMPILER_EXEC[cc]="cc"
+
+
+#"-mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -fvector-transform -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop -floop-transform -faggressive-loop-transform "
 
 # variables that control the behaviour
 #
 compiler=cc
 assert_on=0
 support_papi=0
+generate_asm=0
 optimization_given="none"
 options_given="none"
 debug_option=
@@ -58,18 +71,14 @@ debug_option=
 #  parse the arguments
 #
 
-CLOPTIONS="Cc:O:o:gaph"
+CLOPTIONS="Cc:O:o:gaphs"
 
 if [[ $# -lt 1 ]]; then print_help; fi
 
 while getopts ${CLOPTIONS} opt; do
       
     case ${opt} in
-
-	C) compiler=cc
-	   echo "use generic compiler cc:"
-	   cc --version
-	   ;;
+	
 	c) lcarg=$( echo ${OPTARG} | tr "[:upper:]" "[:lower]" )
 	
 	   if [[ ${lcarg} == "gcc" ]]; then
@@ -78,18 +87,30 @@ while getopts ${CLOPTIONS} opt; do
 	   elif [[ "${lcarg}" == "icx" ]]; then
 	       echo "compile with icx"
 	       compiler="icx"
+	   elif [[ "${lcarg}" == "aocc" ]]; then
+	       echo "compile with aocc"
+	       compiler="aocc"
 	   else
 	       echo "unknown compiler, use generic compiler cc:"
 	       cc --version
 	   fi
 	   ;;
 
+	C) compiler=cc
+	   echo "use generic compiler cc:"
+	   cc --version
+	   ;;
+	
 	O) optimization_given=${OPTARG}
 	   ;;
 	
 	o) options_given=${OPTARG}
 	   ;;
 	
+	s) generate_asm=1
+	   echo "generate assembler"
+	   ;;
+
 	g) debug_option=" -g3 "
 	   ;;
 	
@@ -155,6 +176,12 @@ else
     ADD_PAPI_LIB=""
 fi
 
+if [[ ${generate_asm} -eq 1 ]];
+then
+    suffix=${suffix}.s
+    OPTS="${OPTS} -g -S -fverbose-asm -masm=intel"
+fi
+
 echo "compiler's command line is: "
 echo -I${INC_DIR} ${OPTIONS} ${OPTIMIZATION} ${ADD_PAPI_OPT} ${ADD_PAPI_LIB}
 echo "source folder is " ${SRC_DIR}
@@ -167,8 +194,8 @@ echo "source folder is " ${SRC_DIR}
 #
 
 
-${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1${suffix} ${SRC_DIR}/vect.1.c ${ADD_PAPI_LIB} 2> compile.log.out
-${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1b${suffix} ${SRC_DIR}/vect.1b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out
+${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1${suffix} ${SRC_DIR}/vect.1.c ${ADD_PAPI_LIB} 2> compile.log.out
+${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1b${suffix} ${SRC_DIR}/vect.1b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out
 
-${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2${suffix} ${SRC_DIR}/vect.2.c ${ADD_PAPI_LIB} 2>> compile.log.out
-${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2b${suffix} ${SRC_DIR}/vect.2b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out
+${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2${suffix} ${SRC_DIR}/vect.2.c ${ADD_PAPI_LIB} 2>> compile.log.out
+${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2b${suffix} ${SRC_DIR}/vect.2b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out
diff --git a/src/vect.1b.c b/src/vect.1b.c
index a6800ea3419e4f0d3bdb8ade527190b032933a15..31e0891f7a4fd4bda45da93a533418c88a79b4a8 100644
--- a/src/vect.1b.c
+++ b/src/vect.1b.c
@@ -29,8 +29,6 @@
  * ────────────────────────────────────────────────────────────────────────── */
 
 
-
-
 #if defined(__STDC__)
 #  if (__STDC_VERSION__ >= 199901L)
 #     define _XOPEN_SOURCE 700
@@ -218,7 +216,7 @@ void process_with_vectors( const v4df   * restrict V,
       v4df_u register delta2;
       delta2.P = delta * delta;
       double scalar_dist = delta2.p[0]+delta2.p[1]+delta2.p[2]+delta2.p[3];
-      scalar_dist = pow(scalar_dist, 1.5);  // r^3
+      scalar_dist = scalar_dist*sqrt(scalar_dist);  // r^3
       
       // the mass product with the i-th neighbour
       //
@@ -796,8 +794,8 @@ int main( int argc, char **argv )
   unsigned int N           = (argc > 2 ? atoi(*(argv+2)) : 1000000 );
   long     int seed        = (argc > 3 ? atoi(*(argv+3)) : -1 );     // set the seed for repeatible runs
            int Nrepetitions= (argc > 4 ? atoi(*(argv+4)) : NREPETITIONS );
-           int dry_run     = (argc > 5 ? atoi(*(argv+5)) : 0 );      // 1 to estimate floats for initialization
-           int from_file   = ( case_to_run > 0 );	   
+	   int output_force= (argc > 5 ? atoi(*(argv+5)) : 0 );
+           int from_file   = ( case_to_run > 0 );
 	   
 	   case_to_run = (case_to_run < 0 ? (-case_to_run) : case_to_run);  // make it positive
 	   
@@ -851,8 +849,6 @@ int main( int argc, char **argv )
 
   printf("> Running case %d -> \"%s\" with %u points and %d repetitions\n",
 	 case_to_run, implementation_labels[case_to_run], N, Nrepetitions);
-  if ( dry_run )
-    printf(" >>> DRY RUN :: use to estimate ops outside the actual calculations\n" );
 
   
   if ( case_to_run > 0 )
@@ -933,9 +929,6 @@ int main( int argc, char **argv )
 	}
     }
 
-  if ( dry_run ) {
-    printf("dry run: going to clean up\n");
-    goto clean; }
   
   /* ------------------------------------------------------
    *
@@ -1124,23 +1117,24 @@ int main( int argc, char **argv )
       // pointless calculations
       //
 
-      char  filename[100];
-      sprintf( filename, "force1b.%d.out", case_to_run );
-      FILE *output = fopen( filename, "w" );
-      if( output != NULL ) {
-	fwrite( &N, sizeof(int), 1, output);
-	fwrite( force, sizeof(double), N*3, output );
-	fclose(output); }
-      else
-	printf(">>> wow, I was unable to create a stupid file\n");
-
+      if ( output_force )
+	{
+	  char  filename[100];
+	  sprintf( filename, "force1b.%d.out", case_to_run );
+	  FILE *output = fopen( filename, "w" );
+	  if( output != NULL ) {
+	    fwrite( &N, sizeof(int), 1, output);
+	    fwrite( force, sizeof(double), N*3, output );
+	    fclose(output); }
+	  else
+	    printf(">>> wow, I was unable to create a stupid file\n");
+	}
+      
       free ( force    );
-      if ( !dry_run)
-	free ( ngb_list );
+      free ( ngb_list );
   
     }
 
- clean:
   switch ( case_to_run )
     {
     case 1: {