Commit b48e4e36 authored by Mikaël Salson's avatar Mikaël Salson
Browse files

should-vdj-to-tap.py: Use grep -E (POSIX) instead of re

grep -E is much faster than re, which can be exponential in time.
parent 98e07251
...@@ -114,8 +114,8 @@ def should_pattern_to_regex(p): ...@@ -114,8 +114,8 @@ def should_pattern_to_regex(p):
pass pass
if args.ignore_N or args.ignore_del: if args.ignore_N or args.ignore_del:
trim_left = '\d*' trim_left = '[[:digit:]]*'
trim_right = '\d*' trim_right = '[[:digit:]]*'
if args.ignore_N: if args.ignore_N:
n_region = '[ACGT]*' n_region = '[ACGT]*'
...@@ -127,7 +127,7 @@ def should_pattern_to_regex(p): ...@@ -127,7 +127,7 @@ def should_pattern_to_regex(p):
# Gene name, possibly without allele information # Gene name, possibly without allele information
if not '*' in term: if not '*' in term:
# Some 'genes', such as KDE, do not have allele information # Some 'genes', such as KDE, do not have allele information
term += '([*]\d*)?' term += '([*][[:digit:]]*)?'
else: else:
gene, allele = term.split('*') gene, allele = term.split('*')
...@@ -136,10 +136,10 @@ def should_pattern_to_regex(p): ...@@ -136,10 +136,10 @@ def should_pattern_to_regex(p):
if args.ignore_D and ('IGHD' in gene or 'TRBD' in gene or 'TRDD' in gene): if args.ignore_D and ('IGHD' in gene or 'TRBD' in gene or 'TRDD' in gene):
gene = '\S*' gene = '\S*'
allele = '\d*' allele = '[[:digit:]]*'
if args.ignore_allele: if args.ignore_allele:
allele = '\d*' allele = '[[:digit:]]*'
allele_separator = '[*]' allele_separator = '[*]'
if args.ignore_D or args.ignore_allele: if args.ignore_D or args.ignore_allele:
...@@ -215,8 +215,11 @@ def should_result_to_tap(should_pattern, result, tap_id): ...@@ -215,8 +215,11 @@ def should_result_to_tap(should_pattern, result, tap_id):
else: else:
# Testing the should pattern # Testing the should pattern
should_regex = should_pattern_to_regex(should_pattern) should_regex = should_pattern_to_regex(should_pattern)
match = should_regex.search(result) # match = should_regex.search(result)
found = (match is not None) match = os.system("echo '%s' | grep -E '%s' > /dev/null 2>&1" \
% (result.replace("'", "\\'"),
should_regex.pattern.replace("'", "\\'")))
found = (match == 0)
globals()['global_stats'][locus] += 1 globals()['global_stats'][locus] += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment