From 8a42fa9387796e18dcdfabd5c9fd96c57ed0f1d9 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 10 Nov 2016 09:50:37 +0100 Subject: [PATCH] should-vdj-to-tap.py: move Vidjil specific code to repseq_vdj.py --- algo/tests/repseq_vdj.py | 55 +++++++++++++++++++++++++++++- algo/tests/should-vdj-to-tap.py | 60 ++++----------------------------- 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/algo/tests/repseq_vdj.py b/algo/tests/repseq_vdj.py index 47bbbe37b..490c26602 100644 --- a/algo/tests/repseq_vdj.py +++ b/algo/tests/repseq_vdj.py @@ -10,8 +10,9 @@ python repsep_vdj.py data-curated/curated_TR.fa data-curated/curated_tr_Summary. python repseq_vdj.py data-curated/mixcr.results > data-curated/mixcr.vdj ''' -import sys +import sys +import os V = 'V' D = 'D' @@ -185,6 +186,58 @@ def header_vquest_results(ff_fasta, ff_vquest): r = IMGT_VQUEST_Result(vquest) yield (fasta.replace('>', ''), r.to_vdj()) + + +### Vidjil + +VIDJIL_FINE = '{directory}/vidjil -X 100 -# "#" -c segment -i -3 -d -g {directory}/germline %s >> %s' +VIDJIL_KMER = '{directory}/vidjil -w 20 -# "#" -b out -c windows -uuuU -2 -i -g {directory}/germline %s > /dev/null ; cat out/out.segmented.vdj.fa out/out.unsegmented.vdj.fa >> %s' + +def should_results_from_vidjil_output(f_log): + ''' + Yield (should, result) couples from a Vidjil output including lines in the form of: + >TRDD2*01_1/AGG/1_TRDD3*01__TRD+ + VJ 0 84 88 187 TRDD2*01 1/AGG/1 TRDD3*01 TRD+ + or + >TRDV3*01_0//0_TRDJ4*01 ! + VJ 0 49 50 97 TRD UNSEG noisy + ''' + + for l in open(f_log): + if not l: + continue + if l[0] == '>': + l = l.strip() + pos = l.find(' + ') if ' + ' in l else l.find(' - ') + should = l[1:pos].replace('_', ' ') + + pos = l.find('\t') + result = l[pos+1:] + ' ' + + yield (should, result) + +def should_results_from_vidjil(program, f_should, f_log): + ''' + Launch the program on f_should + Yields (#, >) couples of V(D)J designations, such as in: + #TRDD2*01 1/AGG/1 TRDD3*01 TRD+ + >TRDD2*01 TRDD3*01 + ''' + + cmd = program % (f_should, f_log) + + with open(f_log, 'w') as f: + f.write('# %s\n\n' % cmd) + + exit_code = os.system(cmd) + if exit_code: + print "Error. The program halted with exit code %s." % exit_code + sys.exit(3) + + return should_results_from_vidjil_output(f_log) + + + +### Main + if __name__ == '__main__': if 'mixcr' in sys.argv[1]: diff --git a/algo/tests/should-vdj-to-tap.py b/algo/tests/should-vdj-to-tap.py index 89bf13bee..b4fb2b4a4 100644 --- a/algo/tests/should-vdj-to-tap.py +++ b/algo/tests/should-vdj-to-tap.py @@ -28,13 +28,11 @@ from collections import defaultdict import os import argparse - -VIDJIL_FINE = '{directory}/vidjil -X 100 -# "#" -c segment -i -3 -d -g {directory}/germline %s >> %s' -VIDJIL_KMER = '{directory}/vidjil -w 20 -# "#" -b out -c windows -uuuU -2 -i -g {directory}/germline %s > /dev/null ; cat out/out.segmented.vdj.fa out/out.unsegmented.vdj.fa >> %s' +import repseq_vdj parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) -parser.add_argument('--program', '-p', default=VIDJIL_FINE, help='program to launch on each file (%(default)s)') -parser.add_argument('-q', dest='program', action='store_const', const=VIDJIL_KMER, help='shortcut for -p (VIDJIL_KMER), to be used with -2') +parser.add_argument('--program', '-p', default=repseq_vdj.VIDJIL_FINE, help='program to launch on each file (%(default)s)') +parser.add_argument('-q', dest='program', action='store_const', const=repseq_vdj.VIDJIL_KMER, help='shortcut for -p (VIDJIL_KMER), to be used with -2') parser.add_argument('--ignore_N', '-N', action='store_true', help='ignore N patterns, checking only gene and allele names') parser.add_argument('--ignore_allele', '-A', action='store_true', help='ignore allele, checking only gene names') parser.add_argument('--ignore_D', '-D', action='store_true', help='ignore D gene names and alleles') @@ -67,53 +65,6 @@ global_stats_failed = defaultdict(int) global_stats_todo = defaultdict(int) -def should_result_from_vidjil(l): - ''' - Return a (should, result) couple from a Vidjil output line in the form of: - >TRDD2*01_1/AGG/1_TRDD3*01__TRD+ + VJ 0 84 88 187 TRDD2*01 1/AGG/1 TRDD3*01 TRD+ - or - >TRDV3*01_0//0_TRDJ4*01 ! + VJ 0 49 50 97 TRD UNSEG noisy - ''' - - l = l.strip() - pos = l.find(' + ') if ' + ' in l else l.find(' - ') - should = l[1:pos].replace('_', ' ') - - pos = l.find('\t') - result = l[pos+1:] + ' ' - - return (should, result) - - -def should_results_from_program(f_should): - ''' - Launch the program on f_should - Yields (#, >) couples of V(D)J designations, such as in: - #TRDD2*01 1/AGG/1 TRDD3*01 TRD+ - >TRDD2*01 TRDD3*01 - ''' - - f_log = f_should + PROG_TAG + LOG_SUFFIX - f_log = f_log.replace(SHOULD_SUFFIX, '') - - program = args.program.replace('{directory}',args.directory) - cmd = program % (f_should, f_log) - - with open(f_log, 'w') as f: - f.write('# %s\n\n' % cmd) - - exit_code = os.system(cmd) - if exit_code: - print "Error. The program halted with exit code %s." % exit_code - sys.exit(3) - - for l in open(f_log): - if not l: - continue - if l[0] == '>': - yield should_result_from_vidjil(l) - - def should_results_from_vdj(f_vdj): ''' Parses a .vdj file @@ -321,8 +272,11 @@ def should_to_tap_one_file(f_should): f_tap = f_should + PROG_TAG + TAP_SUFFIX f_tap = f_tap.replace(SHOULD_SUFFIX, '') + f_log = f_should + PROG_TAG + LOG_SUFFIX + f_log = f_log.replace(SHOULD_SUFFIX, '') + print "<== %s" % f_should - should_results = list(should_results_from_program(f_should)) + should_results = list(repseq_vdj.should_results_from_vidjil(args.program.replace('{directory}',args.directory), f_should, f_log)) write_should_results_to_tap(should_results, f_tap) -- GitLab