Commit 48393217 authored by Mathieu Giraud's avatar Mathieu Giraud

split-from-imgt.py: do not include IGHD*0? constant region in IGHD_upstream

parent b896cbaf
......@@ -129,6 +129,7 @@ SPLIT_SEQUENCES = {'/DV': ['TRAV', 'TRDV']}
DOWNSTREAM_REGIONS=['[A-Z]{3}J', 'TRDD3']
UPSTREAM_REGIONS=['IGHD', 'TRDD', 'TRBD', 'TRDD2']
# Be careful, 'IGHD' regex for UPSTREAM_REGIONS also matches IGHD*0? constant regions.
SPECIES = {
"Homo sapiens": './',
......@@ -167,8 +168,9 @@ for l in sys.stdin:
if system.startswith('IG') or system.startswith('TR'):
store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS)
store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS)
if feature in FEATURES_VDJ:
store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS)
store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS)
systems = get_split_files(seq, SPLIT_SEQUENCES)
if systems:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment