benchmark-releases.py 8.41 KB
Newer Older
1 2 3 4 5

ARCHIVE = 'http://www.vidjil.org/releases/'
DEST = 'bench/'
SRC = DEST + 'src/'
BIN = DEST + 'bin/'
6 7
RUN = DEST + 'run/'

8 9
OUT = 'benchmark.log'

10 11
CURRENT = 'HEAD'

12 13
#####

14 15
WARN_RATIO = 0.10

16 17 18 19 20 21 22
LIMIT1e5 = '-x 100000 '
LIMIT1e4 = '-x 10000 '
LIMIT1e3 = '-x 1000 '
LIMIT1e2 = '-x 100 '

MULTI = '-g ../../germline/homo-sapiens.g '
IGH = '-g ../../germline/homo-sapiens.g:IGH '
Mathieu Giraud's avatar
Mathieu Giraud committed
23 24
L4 = '../../demo/LIL-L4.fastq.gz '
S22 = '../../demo/Stanford_S22.fasta '
25 26 27 28 29

CONSENSUS_NO = '-y 0 -z 0 '
CONSENSUS_ALL = '-y all -z 0 '
DESIGNATIONS = '-c designations '

30 31 32 33 34 35 36 37 38 39 40 41 42 43
from collections import OrderedDict

BENCHS = OrderedDict([
  ('init', '-x 1 ' + MULTI + L4 + CONSENSUS_NO),
  ('germ', LIMIT1e5 + MULTI + L4 + '-c germlines '),

  ('multi-0', LIMIT1e5 + MULTI + L4 + CONSENSUS_NO),
  ('multi-1', LIMIT1e5 + MULTI + L4 + CONSENSUS_ALL),
  ('multi-a', LIMIT1e3 + MULTI + L4 + DESIGNATIONS + '-z 1000'),

  ('igh-0', LIMIT1e5 + IGH + S22 + CONSENSUS_NO),
  ('igh-1', LIMIT1e5 + IGH + S22 + CONSENSUS_ALL),
  ('igh-a', LIMIT1e3 + IGH + S22 + DESIGNATIONS),
])
44

45 46 47 48
COMPATIBILITY = [
  ('2019.03', '-c designations', '-c segment'),
]

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
# Simple colored output

CSIm = '\033[%sm'

class ANSI:
    RESET = 0
    BRIGHT = 1
    BLACK = 30
    RED = 31
    GREEN = 32
    YELLOW = 33
    BLUE = 34
    MAGENTA = 35
    CYAN = 36
    WHITE = 37

def color(col, text, colorize = True):
    if not colorize:
        return text
    return CSIm % col + text + CSIm % ANSI.RESET

#

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
def convert(cmd, release):
    '''
    Convert a command line to be used by old vidjil-algo releases

    >>> convert('-x 10 -c designations', '2019.05')
    '-x 10 -c designations'

    >>> convert('-x 10 -c designations', '2018.02')
    '-x 10 -c segment'
    '''

    for rel, new, old in COMPATIBILITY:
        if release < rel:
            cmd = cmd.replace(new, old)
    return cmd

88 89 90 91 92 93 94 95 96 97 98 99

#####

import re
import urllib.request
import os
import subprocess
import glob
import time
import sys
import argparse
import resource
100
import datetime
101
from tempfile import NamedTemporaryFile
102 103 104 105

stats = {}

parser = argparse.ArgumentParser()
106
parser.add_argument('-c', '--current', action='store_true', help='install current HEAD')
107 108 109 110
parser.add_argument('-i', '--install', dest='release', default=[], action='append', 
                    help='install selected releases from %s, such as in "-s 2018.02 -s 2020.05"' % ARCHIVE)
parser.add_argument('-I', '--install-all', action='store_true',
                    help='install all releases from %s' % ARCHIVE)
111
parser.add_argument('-b', '--benchmark', action='store_true', help='benchmark installed releases')
112
parser.add_argument('-s', '--select', dest='benchs', default=[], action='append',
113
                    help = 'Specify the benchmarks to select (among {}, default is all)'.format(', '.join(BENCHS.keys())))
114
parser.add_argument('-r', '--retries', type=int, default=1, help='Number of times each benchmark is launched')
115 116


117
def go(cmd, log=None, time=False):
118 119 120 121 122 123
    if log:
        flog = open(log, 'a')
        flog.write('\n\n%s\n' % cmd)
    else:
        flog = sys.stdout
    print(cmd, end=' ')
124 125 126
    if time:
        time_file = NamedTemporaryFile(mode='w+', delete=False)
        cmd = "/usr/bin/time -o {} -f '%U\t%S\t%M' {}".format(time_file.name, cmd)
127
    returncode = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT, stdout=flog)
128 129 130
    if log:
        flog.close()

131
    if returncode:
132
        print('FAILED', end=' ')
133
        raise subprocess.CalledProcessError(returncode, cmd)
134 135
    elif not time:
        return
136 137
    else:
        (utime, stime, mem) = [ float(i) for i in time_file.read().split() ]
138

139 140
    mem = mem // 1000
    os.unlink(time_file.name)
141
    print(color(ANSI.YELLOW, '%5.2fu %5.2fs %6.1fM' % (utime, stime, mem)))
142 143

    return (stime + utime, mem)
144

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
def code(tgz):
    '''
    Extract release tag from filename

    >>> code('vidjil-algo-2001.01.tar.gz')
    '2001.01'
    '''
    base = tgz.replace('.tgz', '').replace('.tar.gz', '').replace('vidjil-algo-', '').replace('vidjil-', '')
    return base

def get_releases():
    with urllib.request.urlopen(ARCHIVE) as response:
        for elt in str(response.read()).split('"'):
            ok = True
            for ignore in ['<', '>', 'x86', 'latest']:
                if ignore in elt:
                    ok = False
                    break
            if ok and 'vidjil-' in elt:
                yield code(elt), elt

def install(release, tgz):
    os.system('mkdir -p %s' % BIN)
    print('== %s' % release)

    dir = SRC + release
    go('mkdir -p %s' % dir)

    log = dir + '/' + 'install.log'
174 175 176 177 178 179

    if release == CURRENT:
        go('make -C ../../algo', log)
        go('cp ../../vidjil-algo %s/%s ' % (BIN, release), log)
        return

180 181 182 183 184 185 186
    go('wget %s/%s -O %s/src.tgz' % (ARCHIVE, tgz, dir), log)
    go('cd %s ; tar xfz src.tgz' % dir, log)
    go('cd %s/*%s* ; make vidjil-algo || make CXX=g++-6' % (dir, release), log)
    res = go('cp %s/*%s*/vidjil* %s/%s ' % (dir, release, BIN, release), log)

    print()

187 188 189
def install_current():
    install(CURRENT, None)

190
def install_from_archive(install_versions):
191 192
    for release, tgz in get_releases():
        try:
193
            if (not install_versions) or release in install_versions:
194
                install(release, tgz)
195 196 197 198 199 200 201
        except subprocess.CalledProcessError:
            print("FAILED")

def installed():
    return sorted([f.replace(BIN, '') for f in glob.glob('%s/*' % BIN)])


202
def run_all(tag, args, retries):
203
    print(color(ANSI.CYAN, '==== %s ==== %s' % (tag, args)))
204 205
    os.system('mkdir -p %s' % RUN)
    for release in installed():
206
        print(color(ANSI.MAGENTA, '%9s' % release), end=' ')
207 208
        log = RUN + '/%s-%s.log' % (tag, release)

209
        cmd = '%s/%s ' % (BIN, release) + convert(args, release)
210
        try:
211 212
            benchs = []
            for i in range(retries) :
213
                benchs.append(go(cmd, log, True))
214 215 216
            time = min([b[0] for b in benchs])
            mem = min([b[1] for b in benchs])
            stats[tag,release] = (time, mem)
217 218 219 220
        except subprocess.CalledProcessError:
            stats[tag,release] = None
    print()

221
def bench_line(f, release, stats, index, format='%8.2f', previous_release=None, colorize=True):
222 223 224 225
    f.write('%-9s' % release)
    for tag in BENCHS:
        if (tag,release) in stats:
            if stats[tag, release] is not None:
226 227 228 229 230 231 232 233 234 235 236
                val = stats[tag,release][index]
                b = format % val

                # Highlight value
                if previous_release:
                    if stats[tag, previous_release] is not None:
                        previous_val = stats[tag,previous_release][index]
                        if val/previous_val >= 1 + WARN_RATIO:
                            b = color(ANSI.RED, b) if colorize else '!' + b[1:]
                        elif val/previous_val <= 1 - WARN_RATIO:
                            b = color(ANSI.GREEN, b) if colorize else '!' + b[1:]
237 238 239 240 241 242 243
            else:
                b = '%8s' % 'x'
        else:
            b = '%8s' % '-'
        f.write(b)
    f.write('\n')
    
244
def show_benchs(f, colorize):
245 246
    f.write('\n')
    f.write(color(ANSI.YELLOW, '\nBenchmark summary, %s\n' % datetime.datetime.now().isoformat(), colorize))
247 248 249
    for tag, bench in BENCHS.items():
        f.write('%8s: %s\n' % (tag, bench))

250
    f.write('\n')
251 252 253
    f.write('%9s ' % '')
    for tag in BENCHS:
        f.write('%8s' % tag)
254

Mathieu Giraud's avatar
Mathieu Giraud committed
255 256 257 258 259 260 261 262 263
    for (key, index, format) in [
      ('Time (s)', 0, '%8.2f'),
      ('Memory (MB)', 1, '%8d'),
     ]:
        f.write(color(ANSI.YELLOW, '\n%s\n' % key, colorize))
        previous_release = None
        for release in installed():
            bench_line(f, release, stats, index, format, previous_release, colorize)
            previous_release = release
264

265

266
def bench_all(retries, selected_benchs):
267
    try:
268 269 270
        go("make -C ../.. germline")
        go("make -C ../.. data")
        go("make -C ../.. demo")
271 272
        print()
        print()
273
        for tag, bench in BENCHS.items():
274 275
            if len(selected_benchs) == 0 or tag in selected_benchs:
                run_all(tag, bench, retries)
276 277 278 279
    except KeyboardInterrupt:
        pass


280 281 282 283

if __name__ == '__main__':
    args = parser.parse_args(sys.argv[1:])

284
    if not args.release and not args.benchmark:
285 286
        parser.print_help()

287 288 289
    if args.current:
        install_current()

290 291
    if args.release or args.install_all:
        install_from_archive(args.release)
292 293

    if args.benchmark:
294
        bench_all(args.retries, args.benchs)
295
        show_benchs(sys.stdout, colorize=True)
296 297
        print('\n==>', OUT)
        show_benchs(open(OUT, 'w'), colorize=False)