From 5b8e8cec4f8fc11976bbe5a19c956fc72ef793ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Riette?= <sebastien.riette@meteo.fr> Date: Mon, 18 Dec 2023 14:50:24 +0100 Subject: [PATCH] S. Riette 18 dec 2023: add performance plots --- .../arch/arch-ECMWF_NEC440MPI225DP.AU.x.env | 4 + tools/check_commit_ial.sh | 11 ++- tools/check_commit_lmdz.sh | 18 +++- tools/check_commit_mesonh.sh | 10 +- tools/check_commit_testprogs.sh | 65 ++++++++++++- tools/plot_perf.py | 96 +++++++++++++++++++ tools/testing.sh | 24 +++-- 7 files changed, 211 insertions(+), 17 deletions(-) create mode 100755 tools/plot_perf.py diff --git a/build/with_fcm/arch/arch-ECMWF_NEC440MPI225DP.AU.x.env b/build/with_fcm/arch/arch-ECMWF_NEC440MPI225DP.AU.x.env index c1473b389..2487bc849 100644 --- a/build/with_fcm/arch/arch-ECMWF_NEC440MPI225DP.AU.x.env +++ b/build/with_fcm/arch/arch-ECMWF_NEC440MPI225DP.AU.x.env @@ -30,3 +30,7 @@ export VE_PROGINF=DETAIL ######################################################## #We must not use -ldl during link edition LIBS='rt' + +######################################################## +#NPROMA for performance check +NPROMA_perf=2500 diff --git a/tools/check_commit_ial.sh b/tools/check_commit_ial.sh index e14040108..fbbe8418a 100755 --- a/tools/check_commit_ial.sh +++ b/tools/check_commit_ial.sh @@ -115,7 +115,7 @@ mainPackVersion=${mainPackVersion:-${defaultMainPackVersion}} ################################ function usage { - echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [-f] [--noexpand] [-t TEST] [--cycle CYCLE] [--scripttag TAG] [--repo-user USER] [--repo-protocol PROTOCOL] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--prep_code-opts 'OPTS'] commit [reference]" + echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [-f] [--noexpand] [-t TEST] [--cycle CYCLE] [--scripttag TAG] [--repo-user USER] [--repo-protocol PROTOCOL] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--prep_code-opts 'OPTS'] [--perf FILE] commit [reference]" echo "commit commit hash (or a directory, or among $specialPack) to test" echo "reference commit hash (or a directory, or among $specialPack) REF to use as a reference" echo "-s suppress compilation pack" @@ -144,6 +144,7 @@ function usage { echo " OPTS is added to the call to prep_code (e.g. --prep_code_opts '--lowerCase'" echo " to transfor all source codes in lower case). Help on prep_code.sh options" echo " can be found with 'prep_code.sh -h'. Note: don't forget to enclose OPTS in ' or \"" + echo "--perf FILE add performance statistics in file FILE" echo "" echo "If nothing is asked (pack creation, compilation, running, check, removing) everything" echo "except the removing is done" @@ -178,6 +179,7 @@ remove=0 onlyIfNeeded=0 computeRefIfNeeded=0 prepCodeOpts="" +perffile="" while [ -n "$1" ]; do case "$1" in @@ -198,6 +200,7 @@ while [ -n "$1" ]; do '--onlyIfNeeded') onlyIfNeeded=1;; '--computeRefIfNeeded') computeRefIfNeeded=1;; '--prep_code-opts') prepCodeOpts=$2; shift;; + '--perf') perffile="$(realpath $2)"; shift;; #--) shift; break ;; *) if [ -z "${commit-}" ]; then commit=$1 @@ -666,7 +669,13 @@ if [ $run -ge 1 ]; then mkdir -p conf_tests/$t cd conf_tests/$t + t1=$(($(date +%s%N)/1000)) #current time in milliseconds MYLIB=$name TESTDIR=$dirconf/$t exescript Output_run $dirconf/$t/aro${cycle}${scripttag}.sh + t2=$(($(date +%s%N)/1000)) + if [ "$perffile" != "" ]; then + #The elapsed time is not relevant when the model runs with a queuing system (HPC) + echo "$commit ial $t $(($t2-$t1))" >> "$perffile" + fi fi else echo "The test $t is not allowed" diff --git a/tools/check_commit_lmdz.sh b/tools/check_commit_lmdz.sh index 97cd92a77..c1fbae738 100755 --- a/tools/check_commit_lmdz.sh +++ b/tools/check_commit_lmdz.sh @@ -17,7 +17,7 @@ separator='_' #- be carrefull, gmkpack (at least on belenos) has multiple allerg #- seprator must be in sync with prep_code.sh separator function usage { - echo "Usage: $0 [-h] [-p] [-c] [-C] [-r] [-s] [--expand] [-t test] [--version VERSION] [--repo-user] [--repo-protocol] [--remove] commit [reference]" + echo "Usage: $0 [-h] [-p] [-c] [-C] [-r] [-s] [--expand] [-t test] [--version VERSION] [--repo-user] [--repo-protocol] [--remove] [--perf FILE] commit [reference]" echo "commit commit hash (or a directory, or among $specialPack) to test" echo "reference commit hash (or a directory, or among $specialPack) REF to use as a reference" echo "-s suppress compilation pack" @@ -35,6 +35,7 @@ function usage { echo "--repo-protocol protocol (https or ssh) to reach the PHYEX repository on github," echo " defaults to the env variable PHYEXREOprotocol (=$PHYEXREOprotocol)" echo "--remove removes the pack" + echo "--perf FILE add performance statistics in file FILE" echo "" echo "If nothing is asked (pack creation, compilation, running, check, removing) everything" echo "except the removing is done" @@ -62,6 +63,7 @@ useexpand=0 version="" link=0 #Not yet put in command line argument becaus this option has not been tested here remove=0 +perffile="" while [ -n "$1" ]; do case "$1" in @@ -78,6 +80,7 @@ while [ -n "$1" ]; do '--repo-user') export PHYEXREPOuser=$2; shift;; '--repo-protocol') export PHYEXREPOprotocol=$2; shift;; '--remove') remove=1;; + '--perf') perffile="$(realpath $2)"; shift;; #--) shift; break ;; *) if [ -z "${commit-}" ]; then commit=$1 @@ -280,12 +283,12 @@ if [ $run -eq 1 ]; then cd $lmdzdir/1D/INPUT/PHYS sed '1 i\iflag_physiq=1\n' physiq.def_6A > physiq.def_PHYLMD sed '1 i\iflag_physiq=2\n' physiq.def_6A > physiq.def_PHYEX - for cas in $tests; do + for t in $tests; do for DEF in PHYEX PHYLMD; do - d=${lmdzdir}/1D/EXEC/${DEF}L$L/$cas + d=${lmdzdir}/1D/EXEC/${DEF}L$L/$t [ ! -d $d ] && mkdir -p $d cd $d - ln -sf ${lmdzdir}/1D/OLDCASES/$cas/* . + ln -sf ${lmdzdir}/1D/OLDCASES/$t/* . cp -f ${lmdzdir}/1D/INPUT/DEF/*.def . cp -f ${lmdzdir}/1D/INPUT/PHYS/physiq.def_$DEF physiq.def if [ $rad = oldrad ] ; then @@ -300,7 +303,7 @@ if [ $run -eq 1 ]; then cp -f ${lmdzdir}/1D/INPUT/VERT/L$L/* . ln -sf L$L.def vert.def set +e - cp -f $lmdzdir/1D/OLDCASES/$cas/*.d[ae]* . + cp -f $lmdzdir/1D/OLDCASES/$t/*.d[ae]* . set -e cat <<......eod>| compile.sh cd $lmdzdir/1D/bin @@ -317,7 +320,12 @@ if [ $run -eq 1 ]; then if [ $DEF == PHYEX ]; then sed -i -e 's/day_step=144$/day_step=1440/' gcm1d.def fi + t1=$(($(date +%s%N)/1000)) #current time in milliseconds ./lmdz1d.e 2>&1 | tee execution.log + t2=$(($(date +%s%N)/1000)) + if [ "$perffile" != "" ]; then + echo "$commit lmdz $t $(($t2-$t1))" >> "$perffile" + fi done done fi diff --git a/tools/check_commit_mesonh.sh b/tools/check_commit_mesonh.sh index 4155612d5..ed91a95fe 100755 --- a/tools/check_commit_mesonh.sh +++ b/tools/check_commit_mesonh.sh @@ -42,7 +42,7 @@ TARGZDIR=${TARGZDIR:=$PHYEXTOOLSDIR/pack/} ################################ function usage { - echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [--expand] [-t TEST] [--repo-user USER] [--repo-protocol PROTOCOL] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--prep_code-opts 'OPTS'] commit [reference]" + echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [--expand] [-t TEST] [--repo-user USER] [--repo-protocol PROTOCOL] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--prep_code-opts 'OPTS'] [--per FILE] commit [reference]" echo "commit commit hash (or a directory) to test" echo "reference commit hash or a directory or nothing for ref" echo "-s suppress compilation pack" @@ -68,6 +68,7 @@ function usage { echo " OPTS is added to the call to prep_code (e.g. --prep_code_opts '--lowerCase'" echo " to transfor all source codes in lower case). Help on prep_code.sh options" echo " can be found with 'prep_code.sh -h'. Note: don't forget to enclose OPTS in ' or \"" + echo "--perf FILE add performance statistics in file FILE" echo "" echo "If nothing is asked (pack creation, compilation, running, check, removing) everything" echo "except the removing is done" @@ -95,6 +96,7 @@ remove=0 onlyIfNeeded=0 computeRefIfNeeded=0 prepCodeOpts="" +perffile="" while [ -n "$1" ]; do case "$1" in @@ -112,6 +114,7 @@ while [ -n "$1" ]; do '--onlyIfNeeded') onlyIfNeeded=1;; '--computeRefIfNeeded') computeRefIfNeeded=1;; '--prep_code-opts') prepCodeOpts=$2; shift;; + '--perf') perffile="$(realpath $2)"; shift;; #--) shift; break ;; *) if [ -z "${commit-}" ]; then commit=$1 @@ -436,8 +439,13 @@ if [ $run -ge 1 ]; then fi ./clean_mesonh_xyz set +o pipefail #We want to go through all tests + t1=$(($(date +%s%N)/1000)) #current time in milliseconds ./run_mesonh_xyz | tee Output_run + t2=$(($(date +%s%N)/1000)) set -o pipefail + if [ "$perffile" != "" ]; then + echo "$commit mesonh $t $(($t2-$t1))" >> "$perffile" + fi fi fi done diff --git a/tools/check_commit_testprogs.sh b/tools/check_commit_testprogs.sh index d4aaa7e21..236158dfe 100755 --- a/tools/check_commit_testprogs.sh +++ b/tools/check_commit_testprogs.sh @@ -76,6 +76,7 @@ i=$((i+1)); conf_extra_tag[$i]="_Z120_NPRO32_BLK256_TIMES4" conf_extra_opts[$i]="--nflevg 120 --nproma 32 --blocks 256 --times 4" i=$((i+1)); conf_extra_tag[$i]="_Z120_NPRO32_BLK64_TIMES16" conf_extra_opts[$i]="--nflevg 120 --nproma 32 --blocks 64 --times 16" +#The following case is the one used for performance evaluation, it must remains the 4th one i=$((i+1)); conf_extra_tag[$i]='_Z120_NPRO${NPROMA}_BLK${NBLOCKS}' conf_extra_opts[$i]='--nflevg 120 --nproma ${NPROMA} --blocks ${NBLOCKS}' @@ -85,7 +86,7 @@ i=$((i+1)); conf_extra_tag[$i]='_Z120_NPRO${NPROMA}_BLK${NBLOCKS}' ################################ function usage { - echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [--noexpand] [-t TEST] [--repo-user USER] [--repo-protocol PROTOCOL] [-a ARCH] [-A ARCH] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--no-perf] [--no-check] [-e EXTRAPOLATION] commit [reference]" + echo "Usage: $0 [-h] [-p] [-c] [-r] [-C] [-s] [--noexpand] [-t TEST] [--repo-user USER] [--repo-protocol PROTOCOL] [-a ARCH] [-A ARCH] [--remove] [--onlyIfNeeded] [--computeRefIfNeeded] [--no-perf] [--no-check] [-e EXTRAPOLATION] [--perf FILE] commit [reference]" echo "commit commit hash (or a directory, or among $specialName) to test" echo "reference commit hash (or a directory, or among $specialName) REF to use as a reference" echo "-s suppress compilation directory" @@ -112,6 +113,7 @@ function usage { echo " to access performance statistics." echo "-a arch ARCH architecture name to use to build and run the commit (=$defaultarchfile)" echo "-A arch ARCH architecture name to use for the reference simulation (=$defaultarchfile)" + echo "--perf FILE add performance statistics in file FILE" echo "-e EXTRAPOLATION" echo " extrapolate data. EXTRAPOLATION corresponds to a configuration:" for i in $(seq 1 $((${#conf_extra_tag[@]}-1))); do @@ -148,6 +150,7 @@ computeRefIfNeeded=0 perf=1 extrapolation=0 checkOpt="--check" +perffile="" while [ -n "$1" ]; do case "$1" in @@ -168,6 +171,7 @@ while [ -n "$1" ]; do '--computeRefIfNeeded') computeRefIfNeeded=1;; '--no-perf') perf=0;; '--no-check') checkOpt="";; + '--perf') perffile="$(realpath $2)"; shift;; '-e') extrapolation=$2; shift;; #--) shift; break ;; @@ -383,6 +387,7 @@ if [ $run -ge 1 ]; then if [ ! -f $TESTDIR/$name/build/with_fcm/arch_${archfile}/build/bin/main_${t}.exe ]; then echo "Directory does not exist ($TESTDIR/$name) or compilation has failed, please check" + echo "Run '$0 -p -c $commit' to compile." exit 6 fi @@ -427,6 +432,64 @@ with open('drhook.prof.agg', 'w') as f: f.write(df.to_string()) done fi +##################### +#### PERFORMANCE #### +##################### + +if [ $run -ge 1 -a "$perffile" != "" ]; then + echo "### Evaluate performance for commit $commit" + + ZTD_sum=0 + firstrun=1 + for t in $(echo $tests | sed 's/,/ /g'); do + if echo $allowedTests | grep -w $t > /dev/null; then + if [ ! -f $TESTDIR/$name/build/with_fcm/arch_${archfile}/build/bin/main_${t}.exe ]; then + echo "Directory does not exist ($TESTDIR/$name) or compilation has failed, please check" + echo "Run '$0 -p -c $commit' to compile." + exit 7 + fi + + if [ $firstrun -eq 1 ]; then + firstrun=0 + #Read prefered NPROMA for performance evaluation + . $TESTDIR/$name/build/with_fcm/arch_${archfile}/arch.env + + #Experiement size + NPOINTS=100000 + NPROMA=${NPROMA_perf-32} + NBLOCKS=$(($NPOINTS/$NPROMA/8*8)) #must be divisible by 8 + perf_extrapolation_tag=$(NPROMA=$NPROMA; NBLOCKS=$NBLOCKS; eval echo ${conf_extra_tag[4]}) + + #Cleaning to suppress old results that may be confusing in case of a crash during the run + if [ $onlyIfNeeded -eq 0 ]; then + for t in $(echo $tests | sed 's/,/ /g'); do + if [ -d tests/with_fcm/arch_${archfile}/${t}${perf_extrapolation_tag} ]; then + rm -rf tests/with_fcm/arch_${archfile}/${t}${perf_extrapolation_tag} + fi + done + fi + fi + + NPROMA=$NPROMA NBLOCKS=$NBLOCKS OMP_NUM_THREADS=8 $0 -r -t $t -a ${archfile} -e 4 ${commit} + file=$TESTDIR/$name/tests/with_fcm/arch_${archfile}/${t}${perf_extrapolation_tag}/Output_run + if [ -f $file ]; then + ZTD=$(grep -m 1 "ZTD =" $file | awk '{print $4}') + if [ "$ZTD" != "" ]; then + ZTD_sum=$(python3 -c "print(${ZTD_sum} if ${ZTD_sum} < 0. else (${ZTD_sum} + ${ZTD}))") + else + ZTD=-999 + ZTD_sum=-999 + fi + else + ZTD=-999 + ZTD_sum=-999 + fi + echo "$commit testprogs $t $ZTD" >> "$perffile" + fi + done + echo "$commit testprogs ALL $ZTD_sum" >> "$perffile" +fi + #################### #### COMPARISON #### #################### diff --git a/tools/plot_perf.py b/tools/plot_perf.py new file mode 100755 index 000000000..5fafa07a7 --- /dev/null +++ b/tools/plot_perf.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +""" +This script plots the data contained in the performance files obtained with the --perf option +""" + +import matplotlib.pyplot as plt +import numpy +import pandas + +class Perf(): + def __init__(self, perffile): + """ + :param perffile: text file with each line having the form "commit model case time" + """ + self._df = df = pandas.read_csv(perffile, sep=' ', + names=['commit', 'model', 'case', 'time']) + def plotPerf(self, outfile, model=None, title=None, num=None): + """ + :param outfile: output file + :param model: None to plot each model on a subplot or the model to plot + :param title: custom title to use (%M will be replaced by the model name) + :param num: plot only last num values (None to plot all values) + """ + models = [model] if model is not None else sorted(set(self._df['model'])) + fig, ax = plt.subplots(nrows=len(models), sharex=True, sharey=True, figsize=(8, 8 * len(models))) + if len(models) == 1: + ax = [ax] + + #Ordered commit list common to all models + commits = [] + for commit in self._df['commit']: + if commit not in commits: + commits.append(commit) + if num is not None: + commits = commits[-num:] + + df = self._df.groupby('model') + for igrpM, grpM in enumerate(models): + if title is None: + if len(models) == 1: + ax[igrpM].set_title('Mean elapsed computational time') + else: + ax[igrpM].set_title('Mean elapsed computational time for ' + grpM) + else: + ax[igrpM].set_title(title.replace('%M', grpM)) + ax[igrpM].set_ylabel('time (ms/gp)') + ax[igrpM].set_yscale('log') + + dfp = df.get_group(grpM).groupby('case') + for grp in dfp.groups: + #Build time serie with possible missing value and mean aggregation if needed + time = [] + for commit in commits: + f = dfp.get_group(grp)['commit'] == commit + #discard negative values + l = [numpy.nan if t < 0. else t for t in dfp.get_group(grp)[f]['time']] + time.append(numpy.nan if len(l) == 0 else numpy.ma.array(l).mean()) + ax[igrpM].plot(range(len(commits)), numpy.ma.array(time), 'o-', label=grp) + if igrpM == len(df.groups) - 1: + ax[igrpM].set_xlabel('PHYEX version') + ax[igrpM].set_xticks(range(len(commits))) + ax[igrpM].set_xticklabels(commits, rotation=45, ha='right') + ax[igrpM].legend() + fig.tight_layout() + fig.savefig(outfile) + + def listModels(self): + """ + :result: list of models present in the file + """ + return sorted(set(self._df['model'])) + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Plot performance file') + parser.add_argument('PERF_FILE', type=str, + help='file containing the performance statistics') + parser.add_argument('--plot', type=str, default=None, + help='output plot file') + parser.add_argument('--model', type=str, default=None, + help='plot only model MODEL') + parser.add_argument('--title', type=str, default=None, + help="Plot title, %%M will be replaced by model name") + parser.add_argument('--num', metavar='N', type=int, default=None, + help="Plot only last N values") + parser.add_argument('--listModels', default=False, action='store_true', + help="returns the list of models present in the performance file") + args = parser.parse_args() + perf = Perf(args.PERF_FILE) + if args.plot is not None: + perf.plotPerf(args.plot, args.model, args.title, args.num) + if args.listModels: + print(' '.join(perf.listModels())) + + diff --git a/tools/testing.sh b/tools/testing.sh index c49e1f5c3..3c351a898 100755 --- a/tools/testing.sh +++ b/tools/testing.sh @@ -5,14 +5,17 @@ set -e set -o pipefail #abort if left command on a pipe fails function usage { - echo "Usage: $0 [-h] [--repo-user] [--repo-protocol] [--repo-repo] [--no-update] [--no-compil]" - echo " [--no-exec] [--no-comp] [--no-remove] [--commit SHA] [--ref REF] [--force] [MAIL]" - echo "--repo-user user hosting the PHYEX repository on github," + echo "Usage: $0 [-h] [--repo-user USER] [--repo-protocol PROTOCOL] [--repo-repo REPO] [--no-update] [--no-compil]" + echo " [--no-exec] [--no-comp] [--no-remove] [--force] [--commit SHA] [--ref REF]" + echo " [--only-model MODEL] [--no-enable-gh-pages] [--perf PERF] [MAIL]" + echo "--repo-user USER" + echo " user hosting the PHYEX repository on github," echo " defaults to the env variable PHYEXREPOuser (=$PHYEXREPOuser)" - echo "--repo-protocol protocol (https or ssh) to reach the PHYEX repository on github," + echo "--repo-protocol PROTOCOL" + echo " protocol (https or ssh) to reach the PHYEX repository on github," echo " defaults to the env variable PHYEXREPOprotocol (=$PHYEXREPOprotocol)" - echo "--repo-repo repository name" - echo " defaults to the env variable PHYEXREPOrepo (=$PHYEXREPOrepo)" + echo "--repo-repo REPO" + echo " repository name defaults to the env variable PHYEXREPOrepo (=$PHYEXREPOrepo)" echo "--no-update do not update the tools" echo "--no-compil do not compil (only usefull after a first execution with --no-update)" echo "--no-exec do not execute (only usefull after a first execution with --no-update)" @@ -25,6 +28,7 @@ function usage { echo " performs the test only using model MODEL (option can be provided several times)" echo "--no-enable-gh-pages" echo " dont't try to enable the project pages on github" + echo "--perf FILE add performance statistics in file FILE" echo "MAIL comma-separated list of e-mail addresses (no spaces); if not provided, mail is not sent" echo "" echo "This script provides functionality for automated tests." @@ -58,6 +62,7 @@ SHA=0 force=0 models="" enableghpages=1 +perfopt="" while [ -n "$1" ]; do case "$1" in @@ -75,6 +80,7 @@ while [ -n "$1" ]; do '--ref') REF=$2; shift;; '--only-model') models="${models} $2"; shift;; '--no-enable-gh-pages') enableghpages=0;; + '--perf') perfopt="--perf $2"; shift;; #--) shift; break ;; *) if [ -z "${MAIL-}" ]; then MAIL="$1" @@ -288,19 +294,19 @@ if [ ${force} -eq 1 -o $(get_statuses "${SHA}" | grep "${context}" | wc -l) -eq #Model specific configuration if [ "${model}" == 'ial' ]; then compilation='-p -c' - execution='-r' + execution="-r $perfopt" comparison='-C --computeRefIfNeeded' jsonfile="src/arome/ial_version.json" docmp=1 elif [ "${model}" == 'lmdz' ]; then compilation='-p -c --nofcm' - execution='-r --nofcm' + execution="-r --nofcm $perfopt" comparison='-C' jsonfile="src/${model}/${model}_version.json" docmp=0 else compilation='-p -c' - execution='-r' + execution="-r $perfopt" comparison='-C --computeRefIfNeeded' jsonfile="src/${model}/${model}_version.json" docmp=1 -- GitLab