diff --git a/headers/mypapi.h b/headers/mypapi.h index 97b3fbac8390e8c2465338d70cd0691d80c749bf..ae8cf46ad6813b54952ee64b0c8c5b4bb6f33a0b 100644 --- a/headers/mypapi.h +++ b/headers/mypapi.h @@ -27,7 +27,7 @@ const unsigned int native_dp_scalar = (((0xc7 & 0xffffff)<<8) | (0xfc & 0xff)); #if defined(PAPI_MYLAPTOP) - +#warning "using LOCAL events set" #define PAPI_EVENTS_NUM 8 char *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles", "L2D accesses", "L2D misses", @@ -44,7 +44,7 @@ char *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC", "FP_ARITH:256B_PACKED_DOUBLE" }; #elif defined(PAPI_LEONARDO_DCGP) - +#warning "using LEONARDO_DCGP events set" #define PAPI_EVENTS_NUM 9 char *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles", "L2D accesses", "L2D misses", @@ -62,6 +62,17 @@ char *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC", "FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE", "FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE" }; +#elif defined(PAPI_LUMI_C) +#warning "using LUMI_C events set" +#define PAPI_EVENTS_NUM 7 +char *papi_eventnames[PAPI_EVENTS_NUM] = {"Instructions", "cycles", + "L2D accesses", "L2D misses", + "FP INS", "VFP INS", "FP OPS" }; + +char *papi_namedevents[PAPI_EVENTS_NUM] = {"PAPI_TOT_INS","PAPI_TOT_CYC", + "PAPI_L2_DCA", "PAPI_L2_LDM", + "PAPI_FP_INS", "PAPI_VEC_INS", "PAPI_FP_OPS" }; + #else #error "please define the system at command line -DPAPI_MY_LAPTOP or -DPAPI_LEONARDO_DCGP" diff --git a/src/compile b/src/compile index f524b82d9864eac408ac63e2f0cda5987765a775..e33555334b3ae1895d6cb373221210b721a27848 100755 --- a/src/compile +++ b/src/compile @@ -40,14 +40,27 @@ PAPI_LIB=" -lpapi " declare -A OPTIMIZATIONS OPTIMIZATIONS[gcc]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops " OPTIMIZATIONS[icx]=" -O3 -xHost -vec -axCORE-AVX2,CORE-AVX512 " -OPTIMIZATIONS[cc]=" -O3 " +OPTIMIZATIONS[clang]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops " +OPTIMIZATIONS[aocc]=" -Ofast -m64 -freciprocal-math -flto -fstruct-layout=9 -flto -fremap-arrays -mllvm -enable-X86-prefetching -fnt-store -fnt-store=aggressive -mllvm -merge-constant -mrecip=all -mllvm -optimize-strided-mem-cost -mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop " +OPTIMIZATIONS[aocc_all]=" -Ofast -m64 -freciprocal-math -flto -fstruct-layout=9 -flto -fremap-arrays -mllvm -enable-X86-prefetching -fnt-store -fnt-store=aggressive -mllvm -merge-constant -mrecip=all -fscalar-transform -fvector-transform -floop-transform -faggressive-loop-transform -mllvm -optimize-strided-mem-cost -mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop " +OPTIMIZATIONS[cc]=" -O3 -march=native -mtune=native -ftree-vectorize -funroll-loops " +declare -A COMPILER_EXEC +COMPILER_EXEC[gcc]="gcc" +COMPILER_EXEC[icx]="icx" +COMPILER_EXEC[clang]="clang" +COMPILER_EXEC[aocc]="cc" +COMPILER_EXEC[cc]="cc" + + +#"-mllvm -enable-strided-vectorization -mllvm -global-vectorize-slp=true -mllvm -enable-loop-vectorization-with-conditions -mllvm -fvector-transform -mllvm -aggressive-loop-unswitch -mllvm -enable-loop-fusion -mllvm -enable-loopinterchange -mllvm -fuse-tile-inner-loop -floop-transform -faggressive-loop-transform " # variables that control the behaviour # compiler=cc assert_on=0 support_papi=0 +generate_asm=0 optimization_given="none" options_given="none" debug_option= @@ -58,18 +71,14 @@ debug_option= # parse the arguments # -CLOPTIONS="Cc:O:o:gaph" +CLOPTIONS="Cc:O:o:gaphs" if [[ $# -lt 1 ]]; then print_help; fi while getopts ${CLOPTIONS} opt; do case ${opt} in - - C) compiler=cc - echo "use generic compiler cc:" - cc --version - ;; + c) lcarg=$( echo ${OPTARG} | tr "[:upper:]" "[:lower]" ) if [[ ${lcarg} == "gcc" ]]; then @@ -78,18 +87,30 @@ while getopts ${CLOPTIONS} opt; do elif [[ "${lcarg}" == "icx" ]]; then echo "compile with icx" compiler="icx" + elif [[ "${lcarg}" == "aocc" ]]; then + echo "compile with aocc" + compiler="aocc" else echo "unknown compiler, use generic compiler cc:" cc --version fi ;; + C) compiler=cc + echo "use generic compiler cc:" + cc --version + ;; + O) optimization_given=${OPTARG} ;; o) options_given=${OPTARG} ;; + s) generate_asm=1 + echo "generate assembler" + ;; + g) debug_option=" -g3 " ;; @@ -155,6 +176,12 @@ else ADD_PAPI_LIB="" fi +if [[ ${generate_asm} -eq 1 ]]; +then + suffix=${suffix}.s + OPTS="${OPTS} -g -S -fverbose-asm -masm=intel" +fi + echo "compiler's command line is: " echo -I${INC_DIR} ${OPTIONS} ${OPTIMIZATION} ${ADD_PAPI_OPT} ${ADD_PAPI_LIB} echo "source folder is " ${SRC_DIR} @@ -167,8 +194,8 @@ echo "source folder is " ${SRC_DIR} # -${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1${suffix} ${SRC_DIR}/vect.1.c ${ADD_PAPI_LIB} 2> compile.log.out -${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1b${suffix} ${SRC_DIR}/vect.1b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out +${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1${suffix} ${SRC_DIR}/vect.1.c ${ADD_PAPI_LIB} 2> compile.log.out +${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.1b${suffix} ${SRC_DIR}/vect.1b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out -${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2${suffix} ${SRC_DIR}/vect.2.c ${ADD_PAPI_LIB} 2>> compile.log.out -${compiler} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2b${suffix} ${SRC_DIR}/vect.2b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out +${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2${suffix} ${SRC_DIR}/vect.2.c ${ADD_PAPI_LIB} 2>> compile.log.out +${COMPILER_EXEC[$compiler]} ${OPTIONS} ${ADD_PAPI_OPT} -I${INC_DIR} ${OPTIMIZATION} -o ${X_DIR}/vect.2b${suffix} ${SRC_DIR}/vect.2b.c -lm ${ADD_PAPI_LIB} 2>> compile.log.out diff --git a/src/vect.1b.c b/src/vect.1b.c index a6800ea3419e4f0d3bdb8ade527190b032933a15..31e0891f7a4fd4bda45da93a533418c88a79b4a8 100644 --- a/src/vect.1b.c +++ b/src/vect.1b.c @@ -29,8 +29,6 @@ * ────────────────────────────────────────────────────────────────────────── */ - - #if defined(__STDC__) # if (__STDC_VERSION__ >= 199901L) # define _XOPEN_SOURCE 700 @@ -218,7 +216,7 @@ void process_with_vectors( const v4df * restrict V, v4df_u register delta2; delta2.P = delta * delta; double scalar_dist = delta2.p[0]+delta2.p[1]+delta2.p[2]+delta2.p[3]; - scalar_dist = pow(scalar_dist, 1.5); // r^3 + scalar_dist = scalar_dist*sqrt(scalar_dist); // r^3 // the mass product with the i-th neighbour // @@ -796,8 +794,8 @@ int main( int argc, char **argv ) unsigned int N = (argc > 2 ? atoi(*(argv+2)) : 1000000 ); long int seed = (argc > 3 ? atoi(*(argv+3)) : -1 ); // set the seed for repeatible runs int Nrepetitions= (argc > 4 ? atoi(*(argv+4)) : NREPETITIONS ); - int dry_run = (argc > 5 ? atoi(*(argv+5)) : 0 ); // 1 to estimate floats for initialization - int from_file = ( case_to_run > 0 ); + int output_force= (argc > 5 ? atoi(*(argv+5)) : 0 ); + int from_file = ( case_to_run > 0 ); case_to_run = (case_to_run < 0 ? (-case_to_run) : case_to_run); // make it positive @@ -851,8 +849,6 @@ int main( int argc, char **argv ) printf("> Running case %d -> \"%s\" with %u points and %d repetitions\n", case_to_run, implementation_labels[case_to_run], N, Nrepetitions); - if ( dry_run ) - printf(" >>> DRY RUN :: use to estimate ops outside the actual calculations\n" ); if ( case_to_run > 0 ) @@ -933,9 +929,6 @@ int main( int argc, char **argv ) } } - if ( dry_run ) { - printf("dry run: going to clean up\n"); - goto clean; } /* ------------------------------------------------------ * @@ -1124,23 +1117,24 @@ int main( int argc, char **argv ) // pointless calculations // - char filename[100]; - sprintf( filename, "force1b.%d.out", case_to_run ); - FILE *output = fopen( filename, "w" ); - if( output != NULL ) { - fwrite( &N, sizeof(int), 1, output); - fwrite( force, sizeof(double), N*3, output ); - fclose(output); } - else - printf(">>> wow, I was unable to create a stupid file\n"); - + if ( output_force ) + { + char filename[100]; + sprintf( filename, "force1b.%d.out", case_to_run ); + FILE *output = fopen( filename, "w" ); + if( output != NULL ) { + fwrite( &N, sizeof(int), 1, output); + fwrite( force, sizeof(double), N*3, output ); + fclose(output); } + else + printf(">>> wow, I was unable to create a stupid file\n"); + } + free ( force ); - if ( !dry_run) - free ( ngb_list ); + free ( ngb_list ); } - clean: switch ( case_to_run ) { case 1: {