Commit ed7ef722 authored by Mikaël Salson's avatar Mikaël Salson

Merge branch 'feature-a/4275-reproducible-benchmark' into 'dev'

More reproducible benchmark

Closes #4275 and #4282

See merge request !687
parents 0ed1e517 189fc914
Pipeline #146588 failed with stages
in 1 minute and 9 seconds
......@@ -377,7 +377,10 @@ benchmark_algo:
- apt-get install -y time valgrind python3 wget tar
stage: benchmark
script:
- cd algo/tests ; python3 benchmark-releases.py -bic
- cd algo/tests ; python3 benchmark-releases.py -r 3 -bIc
artifacts:
paths:
- algo/tests/benchmark.log
when: manual
only:
- /^feature-.*a.*\/.*$/
......
......@@ -5,10 +5,14 @@ SRC = DEST + 'src/'
BIN = DEST + 'bin/'
RUN = DEST + 'run/'
OUT = 'benchmark.log'
CURRENT = 'HEAD'
#####
WARN_RATIO = 0.10
LIMIT1e5 = '-x 100000 '
LIMIT1e4 = '-x 10000 '
LIMIT1e3 = '-x 1000 '
......@@ -23,23 +27,56 @@ CONSENSUS_NO = '-y 0 -z 0 '
CONSENSUS_ALL = '-y all -z 0 '
DESIGNATIONS = '-c designations '
BENCHS = {
'init': '-x 1 ' + MULTI + L4 + CONSENSUS_NO,
'germ': LIMIT1e5 + MULTI + L4 + '-c germlines ',
from collections import OrderedDict
'multi-0': LIMIT1e5 + MULTI + L4 + CONSENSUS_NO,
'multi-1': LIMIT1e5 + MULTI + L4 + CONSENSUS_ALL,
'multi-a': LIMIT1e3 + MULTI + L4 + DESIGNATIONS + '-z 1000',
BENCHS = OrderedDict([
('init', '-x 1 ' + MULTI + L4 + CONSENSUS_NO),
('germ', LIMIT1e5 + MULTI + L4 + '-c germlines '),
'igh-0': LIMIT1e5 + IGH + S22 + CONSENSUS_NO,
'igh-1': LIMIT1e5 + IGH + S22 + CONSENSUS_ALL,
'igh-a': LIMIT1e3 + IGH + S22 + DESIGNATIONS,
}
('multi-0', LIMIT1e5 + MULTI + L4 + CONSENSUS_NO),
('multi-1', LIMIT1e5 + MULTI + L4 + CONSENSUS_ALL),
('multi-a', LIMIT1e3 + MULTI + L4 + DESIGNATIONS + '-z 1000'),
('igh-0', LIMIT1e5 + IGH + S22 + CONSENSUS_NO),
('igh-1', LIMIT1e5 + IGH + S22 + CONSENSUS_ALL),
('igh-a', LIMIT1e3 + IGH + S22 + DESIGNATIONS),
])
COMPATIBILITY = [
('2019.03', '-c designations', '-c segment'),
]
# Notable changes that may affect speed/memory
INFOS = {
'2019.03': 'Aho by default',
'2018.07': '--analysis-filter (always 3)',
'2018.10': '--analysis-filter 1',
'2020.04': '#4287',
}
# Simple colored output
CSIm = '\033[%sm'
class ANSI:
RESET = 0
BRIGHT = 1
BLACK = 30
RED = 31
GREEN = 32
YELLOW = 33
BLUE = 34
MAGENTA = 35
CYAN = 36
WHITE = 37
def color(col, text, colorize = True):
if not colorize:
return text
return CSIm % col + text + CSIm % ANSI.RESET
#
def convert(cmd, release):
'''
Convert a command line to be used by old vidjil-algo releases
......@@ -68,39 +105,50 @@ import time
import sys
import argparse
import resource
import datetime
from tempfile import NamedTemporaryFile
stats = {}
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--current', action='store_true', help='install current HEAD')
parser.add_argument('-i', '--install', action='store_true', help='install various releases from %s' % ARCHIVE)
parser.add_argument('-i', '--install', dest='release', default=[], action='append',
help='install selected releases from %s, such as in "-s 2018.02 -s 2020.05"' % ARCHIVE)
parser.add_argument('-I', '--install-all', action='store_true',
help='install all releases from %s' % ARCHIVE)
parser.add_argument('-b', '--benchmark', action='store_true', help='benchmark installed releases')
parser.add_argument('-s', '--select', dest='benchs', default=[], action='append',
help = 'Specify the benchmarks to select (among {}, default is all)'.format(', '.join(BENCHS.keys())))
parser.add_argument('-r', '--retries', type=int, default=1, help='Number of times each benchmark is launched')
def go(cmd, log=None):
def go(cmd, log=None, time=False):
if log:
flog = open(log, 'a')
flog.write('\n\n%s\n' % cmd)
else:
flog = sys.stdout
print(cmd, end=' ')
start = resource.getrusage(resource.RUSAGE_CHILDREN)
if time:
time_file = NamedTemporaryFile(mode='w+', delete=False)
cmd = "/usr/bin/time -o {} -f '%U\t%S\t%M' {}".format(time_file.name, cmd)
returncode = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT, stdout=flog)
end = resource.getrusage(resource.RUSAGE_CHILDREN)
if log:
flog.close()
if returncode:
print('FAILED', end=' ')
stime = end.ru_stime-start.ru_stime
utime = end.ru_utime-start.ru_utime
print('%5.2fu %5.2fs' % (utime, stime))
if returncode:
raise subprocess.CalledProcessError(returncode, cmd)
elif not time:
return
else:
(utime, stime, mem) = [ float(i) for i in time_file.read().split() ]
mem = mem // 1000
os.unlink(time_file.name)
print(color(ANSI.YELLOW, '%5.2fu %5.2fs %6.1fM' % (utime, stime, mem)))
return stime + utime
return (stime + utime, mem)
def code(tgz):
'''
......@@ -147,10 +195,11 @@ def install(release, tgz):
def install_current():
install(CURRENT, None)
def install_from_archive():
def install_from_archive(install_versions):
for release, tgz in get_releases():
try:
install(release, tgz)
if (not install_versions) or release in install_versions:
install(release, tgz)
except subprocess.CalledProcessError:
print("FAILED")
......@@ -158,50 +207,92 @@ def installed():
return sorted([f.replace(BIN, '') for f in glob.glob('%s/*' % BIN)])
def run_all(tag, args):
go("make -C ../.. germline")
go("make -C ../.. data")
go("make -C ../.. demo")
print('==== %s ==== %s' % (tag, args))
def run_all(tag, args, retries):
print(color(ANSI.CYAN, '==== %s ==== %s' % (tag, args)))
os.system('mkdir -p %s' % RUN)
for release in installed():
print('%9s' % release, end=' ')
print(color(ANSI.MAGENTA, '%9s' % release), end=' ')
log = RUN + '/%s-%s.log' % (tag, release)
cmd = '%s/%s ' % (BIN, release) + convert(args, release)
try:
bench = go(cmd, log)
stats[tag,release] = bench
benchs = []
for i in range(retries) :
benchs.append(go(cmd, log, True))
time = min([b[0] for b in benchs])
mem = min([b[1] for b in benchs])
stats[tag,release] = (time, mem)
except subprocess.CalledProcessError:
stats[tag,release] = None
print()
def show_benchs(f):
def bench_line(f, release, stats, index, format='%8.2f', previous_release=None, colorize=True):
f.write('%-9s' % release)
warned = False
for tag in BENCHS:
if (tag,release) in stats:
if stats[tag, release] is not None:
val = stats[tag,release][index]
b = format % val
# Highlight value
if previous_release:
if stats[tag, previous_release] is not None:
previous_val = stats[tag,previous_release][index]
if previous_val:
if val/previous_val >= 1 + WARN_RATIO:
b = color(ANSI.RED, b) if colorize else '!' + b[1:]
warned = True
elif val/previous_val <= 1 - WARN_RATIO:
b = color(ANSI.GREEN, b) if colorize else '!' + b[1:]
warned = True
else:
b = '%8s' % 'x'
else:
b = '%8s' % '-'
f.write(b)
if release in INFOS:
f.write(' ' + INFOS[release])
f.write('\n')
return warned
def show_benchs(f, watched_release=None, colorize=True):
f.write('\n')
f.write(color(ANSI.YELLOW, '\nBenchmark summary, %s\n' % datetime.datetime.now().isoformat(), colorize))
for tag, bench in BENCHS.items():
f.write('%8s: %s\n' % (tag, bench))
f.write('\n')
f.write('%9s ' % '')
for tag in BENCHS:
f.write('%8s' % tag)
f.write('\n\n')
for release in installed():
f.write('%-9s' % release)
for tag in BENCHS:
if (tag,release) in stats:
if stats[tag, release] is not None:
b = '%8.2f' % stats[tag,release]
else:
b = '%8s' % 'x'
else:
b = '%8s' % '-'
f.write(b)
f.write('\n')
warned = False
def bench_all():
for (key, index, format) in [
('Time (s)', 0, '%8.2f'),
('Memory (MB)', 1, '%8d'),
]:
f.write(color(ANSI.YELLOW, '\n%s\n' % key, colorize))
previous_release = None
for release in installed():
w = bench_line(f, release, stats, index, format, previous_release, colorize)
previous_release = release
if w and release == watched_release:
warned = True
return warned
def bench_all(retries, selected_benchs):
try:
go("make -C ../.. germline")
go("make -C ../.. data")
go("make -C ../.. demo")
print()
print()
for tag, bench in BENCHS.items():
run_all(tag, bench)
if len(selected_benchs) == 0 or tag in selected_benchs:
run_all(tag, bench, retries)
except KeyboardInterrupt:
pass
......@@ -210,15 +301,20 @@ def bench_all():
if __name__ == '__main__':
args = parser.parse_args(sys.argv[1:])
if not args.install and not args.benchmark:
if not args.release and not args.benchmark:
parser.print_help()
if args.current:
install_current()
if args.install:
install_from_archive()
if args.release or args.install_all:
install_from_archive(args.release)
if args.benchmark:
bench_all()
show_benchs(sys.stdout)
bench_all(args.retries, args.benchs)
show_benchs(sys.stdout, colorize=True)
print('\n==>', OUT)
watched_release = installed()[-1]
warned = show_benchs(open(OUT, 'w'), watched_release=watched_release, colorize=False)
sys.exit(42 if warned else 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment