benchmark-releases.py 9.11 KB
Newer Older
1 2 3 4 5

ARCHIVE = 'http://www.vidjil.org/releases/'
DEST = 'bench/'
SRC = DEST + 'src/'
BIN = DEST + 'bin/'
6 7
RUN = DEST + 'run/'

8 9
OUT = 'benchmark.log'

10 11
CURRENT = 'HEAD'

12 13
#####

14 15
WARN_RATIO = 0.10

16 17 18 19 20 21 22
LIMIT1e5 = '-x 100000 '
LIMIT1e4 = '-x 10000 '
LIMIT1e3 = '-x 1000 '
LIMIT1e2 = '-x 100 '

MULTI = '-g ../../germline/homo-sapiens.g '
IGH = '-g ../../germline/homo-sapiens.g:IGH '
Mathieu Giraud's avatar
Mathieu Giraud committed
23 24
L4 = '../../demo/LIL-L4.fastq.gz '
S22 = '../../demo/Stanford_S22.fasta '
25 26 27 28 29

CONSENSUS_NO = '-y 0 -z 0 '
CONSENSUS_ALL = '-y all -z 0 '
DESIGNATIONS = '-c designations '

30 31 32 33 34 35 36 37 38 39 40 41 42 43
from collections import OrderedDict

BENCHS = OrderedDict([
  ('init', '-x 1 ' + MULTI + L4 + CONSENSUS_NO),
  ('germ', LIMIT1e5 + MULTI + L4 + '-c germlines '),

  ('multi-0', LIMIT1e5 + MULTI + L4 + CONSENSUS_NO),
  ('multi-1', LIMIT1e5 + MULTI + L4 + CONSENSUS_ALL),
  ('multi-a', LIMIT1e3 + MULTI + L4 + DESIGNATIONS + '-z 1000'),

  ('igh-0', LIMIT1e5 + IGH + S22 + CONSENSUS_NO),
  ('igh-1', LIMIT1e5 + IGH + S22 + CONSENSUS_ALL),
  ('igh-a', LIMIT1e3 + IGH + S22 + DESIGNATIONS),
])
44

45 46 47 48
COMPATIBILITY = [
  ('2019.03', '-c designations', '-c segment'),
]

49 50 51 52 53 54 55 56
# Notable changes that may affect speed/memory
INFOS = {
  '2019.03': 'Aho by default',
  '2018.07': '--analysis-filter (always 3)',
  '2018.10': '--analysis-filter 1',
  '2020.04': '#4287',
}

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
# Simple colored output

CSIm = '\033[%sm'

class ANSI:
    RESET = 0
    BRIGHT = 1
    BLACK = 30
    RED = 31
    GREEN = 32
    YELLOW = 33
    BLUE = 34
    MAGENTA = 35
    CYAN = 36
    WHITE = 37

def color(col, text, colorize = True):
    if not colorize:
        return text
    return CSIm % col + text + CSIm % ANSI.RESET

#

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
def convert(cmd, release):
    '''
    Convert a command line to be used by old vidjil-algo releases

    >>> convert('-x 10 -c designations', '2019.05')
    '-x 10 -c designations'

    >>> convert('-x 10 -c designations', '2018.02')
    '-x 10 -c segment'
    '''

    for rel, new, old in COMPATIBILITY:
        if release < rel:
            cmd = cmd.replace(new, old)
    return cmd

96 97 98 99 100 101 102 103 104 105 106 107

#####

import re
import urllib.request
import os
import subprocess
import glob
import time
import sys
import argparse
import resource
108
import datetime
109
from tempfile import NamedTemporaryFile
110 111 112 113

stats = {}

parser = argparse.ArgumentParser()
114
parser.add_argument('-c', '--current', action='store_true', help='install current HEAD')
115 116 117 118
parser.add_argument('-i', '--install', dest='release', default=[], action='append', 
                    help='install selected releases from %s, such as in "-s 2018.02 -s 2020.05"' % ARCHIVE)
parser.add_argument('-I', '--install-all', action='store_true',
                    help='install all releases from %s' % ARCHIVE)
119
parser.add_argument('-b', '--benchmark', action='store_true', help='benchmark installed releases')
120
parser.add_argument('-s', '--select', dest='benchs', default=[], action='append',
121
                    help = 'Specify the benchmarks to select (among {}, default is all)'.format(', '.join(BENCHS.keys())))
122
parser.add_argument('-r', '--retries', type=int, default=1, help='Number of times each benchmark is launched')
123 124


125
def go(cmd, log=None, time=False):
126 127 128 129 130 131
    if log:
        flog = open(log, 'a')
        flog.write('\n\n%s\n' % cmd)
    else:
        flog = sys.stdout
    print(cmd, end=' ')
132 133 134
    if time:
        time_file = NamedTemporaryFile(mode='w+', delete=False)
        cmd = "/usr/bin/time -o {} -f '%U\t%S\t%M' {}".format(time_file.name, cmd)
135
    returncode = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT, stdout=flog)
136 137 138
    if log:
        flog.close()

139
    if returncode:
140
        print('FAILED', end=' ')
141
        raise subprocess.CalledProcessError(returncode, cmd)
142 143
    elif not time:
        return
144 145
    else:
        (utime, stime, mem) = [ float(i) for i in time_file.read().split() ]
146

147 148
    mem = mem // 1000
    os.unlink(time_file.name)
149
    print(color(ANSI.YELLOW, '%5.2fu %5.2fs %6.1fM' % (utime, stime, mem)))
150 151

    return (stime + utime, mem)
152

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
def code(tgz):
    '''
    Extract release tag from filename

    >>> code('vidjil-algo-2001.01.tar.gz')
    '2001.01'
    '''
    base = tgz.replace('.tgz', '').replace('.tar.gz', '').replace('vidjil-algo-', '').replace('vidjil-', '')
    return base

def get_releases():
    with urllib.request.urlopen(ARCHIVE) as response:
        for elt in str(response.read()).split('"'):
            ok = True
            for ignore in ['<', '>', 'x86', 'latest']:
                if ignore in elt:
                    ok = False
                    break
            if ok and 'vidjil-' in elt:
                yield code(elt), elt

def install(release, tgz):
    os.system('mkdir -p %s' % BIN)
    print('== %s' % release)

    dir = SRC + release
    go('mkdir -p %s' % dir)

    log = dir + '/' + 'install.log'
182 183 184 185 186 187

    if release == CURRENT:
        go('make -C ../../algo', log)
        go('cp ../../vidjil-algo %s/%s ' % (BIN, release), log)
        return

188 189 190 191 192 193 194
    go('wget %s/%s -O %s/src.tgz' % (ARCHIVE, tgz, dir), log)
    go('cd %s ; tar xfz src.tgz' % dir, log)
    go('cd %s/*%s* ; make vidjil-algo || make CXX=g++-6' % (dir, release), log)
    res = go('cp %s/*%s*/vidjil* %s/%s ' % (dir, release, BIN, release), log)

    print()

195 196 197
def install_current():
    install(CURRENT, None)

198
def install_from_archive(install_versions):
199 200
    for release, tgz in get_releases():
        try:
201
            if (not install_versions) or release in install_versions:
202
                install(release, tgz)
203 204 205 206 207 208 209
        except subprocess.CalledProcessError:
            print("FAILED")

def installed():
    return sorted([f.replace(BIN, '') for f in glob.glob('%s/*' % BIN)])


210
def run_all(tag, args, retries):
211
    print(color(ANSI.CYAN, '==== %s ==== %s' % (tag, args)))
212 213
    os.system('mkdir -p %s' % RUN)
    for release in installed():
214
        print(color(ANSI.MAGENTA, '%9s' % release), end=' ')
215 216
        log = RUN + '/%s-%s.log' % (tag, release)

217
        cmd = '%s/%s ' % (BIN, release) + convert(args, release)
218
        try:
219 220
            benchs = []
            for i in range(retries) :
221
                benchs.append(go(cmd, log, True))
222 223 224
            time = min([b[0] for b in benchs])
            mem = min([b[1] for b in benchs])
            stats[tag,release] = (time, mem)
225 226 227 228
        except subprocess.CalledProcessError:
            stats[tag,release] = None
    print()

229
def bench_line(f, release, stats, index, format='%8.2f', previous_release=None, colorize=True):
230
    f.write('%-9s' % release)
231
    warned = False
232 233 234
    for tag in BENCHS:
        if (tag,release) in stats:
            if stats[tag, release] is not None:
235 236 237 238 239 240 241
                val = stats[tag,release][index]
                b = format % val

                # Highlight value
                if previous_release:
                    if stats[tag, previous_release] is not None:
                        previous_val = stats[tag,previous_release][index]
242 243 244 245 246 247 248
                        if previous_val:
                            if val/previous_val >= 1 + WARN_RATIO:
                                b = color(ANSI.RED, b) if colorize else '!' + b[1:]
                                warned = True
                            elif val/previous_val <= 1 - WARN_RATIO:
                                b = color(ANSI.GREEN, b) if colorize else '!' + b[1:]
                                warned = True
249 250 251 252 253
            else:
                b = '%8s' % 'x'
        else:
            b = '%8s' % '-'
        f.write(b)
254 255
    if release in INFOS:
        f.write('     ' + INFOS[release])
256
    f.write('\n')
257
    return warned
258
    
259
def show_benchs(f, watched_release=None, colorize=True):
260 261
    f.write('\n')
    f.write(color(ANSI.YELLOW, '\nBenchmark summary, %s\n' % datetime.datetime.now().isoformat(), colorize))
262 263 264
    for tag, bench in BENCHS.items():
        f.write('%8s: %s\n' % (tag, bench))

265
    f.write('\n')
266 267 268
    f.write('%9s ' % '')
    for tag in BENCHS:
        f.write('%8s' % tag)
269

270 271
    warned = False

Mathieu Giraud's avatar
Mathieu Giraud committed
272 273 274 275 276 277 278
    for (key, index, format) in [
      ('Time (s)', 0, '%8.2f'),
      ('Memory (MB)', 1, '%8d'),
     ]:
        f.write(color(ANSI.YELLOW, '\n%s\n' % key, colorize))
        previous_release = None
        for release in installed():
279
            w = bench_line(f, release, stats, index, format, previous_release, colorize)
Mathieu Giraud's avatar
Mathieu Giraud committed
280
            previous_release = release
281 282
            if w and release == watched_release:
                warned = True
283

284
    return warned
285

286
def bench_all(retries, selected_benchs):
287
    try:
288 289 290
        go("make -C ../.. germline")
        go("make -C ../.. data")
        go("make -C ../.. demo")
291 292
        print()
        print()
293
        for tag, bench in BENCHS.items():
294 295
            if len(selected_benchs) == 0 or tag in selected_benchs:
                run_all(tag, bench, retries)
296 297 298 299
    except KeyboardInterrupt:
        pass


300 301 302 303

if __name__ == '__main__':
    args = parser.parse_args(sys.argv[1:])

304
    if not args.release and not args.benchmark:
305 306
        parser.print_help()

307 308 309
    if args.current:
        install_current()

310 311
    if args.release or args.install_all:
        install_from_archive(args.release)
312 313

    if args.benchmark:
314
        bench_all(args.retries, args.benchs)
315
        show_benchs(sys.stdout, colorize=True)
316
        print('\n==>', OUT)
317 318 319 320

        watched_release = installed()[-1]
        warned = show_benchs(open(OUT, 'w'), watched_release=watched_release, colorize=False)
        sys.exit(42 if warned else 0)