Commit 6b5e41c9 authored by Mathieu Giraud's avatar Mathieu Giraud

should-vdj-to-tap.py: more flexible N-region check

parent 81af8eff
......@@ -61,6 +61,13 @@ if args.after_two:
SPECIAL_KEYWORDS = ['TODO']
RE_NUMBER = '\d*'
RE_SEQ = '[ACGT]*'
RE_NUMBER_OR_SEQ = '(\d*|[ACGT]*)'
# That regex should do not clash with very particular gene nemes such as "IGKV1/OR-3*01" or "TRAV29/DV5*01"
R_FIRST_LAST_N = re.compile('^(%s/%s|%s/%s)$' % (RE_NUMBER, RE_NUMBER_OR_SEQ, RE_NUMBER_OR_SEQ, RE_NUMBER))
def special_keywords(after_two):
return SPECIAL_KEYWORDS + ['BUG' + ('-LOCUS' if after_two else '')]
......@@ -113,7 +120,8 @@ def should_pattern_to_regex(p):
n_insert = int(n_region)
n_region = '[ACGT]{%d}' % n_insert
except ValueError: # already /ACGTG/
pass
n_insert = len(n_region)
n_region = '(%d|%s)' % (n_insert, n_region)
if args.ignore_N or args.ignore_del:
trim_left = '[[:digit:]]*'
......@@ -126,6 +134,10 @@ def should_pattern_to_regex(p):
separator='/?'
return [separator.join((trim_left, n_region, trim_right))]
# insertion/deletion (first term, pre-V) or deletion/insertion (laste term, post-J)
if R_FIRST_LAST_N.search(term):
return [term]
# Gene name, possibly without allele information
if not '*' in term:
# Some 'genes', such as KDE, do not have allele information
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment