Commit 08182493 authored by Mathieu Giraud's avatar Mathieu Giraud

split-from-imgt.py: creates .fa files with some special sequences

Dd2 and Dd3 may play interesting roles.
parent 3ac7ab5e
......@@ -8,12 +8,22 @@ import sys
open_files = {}
current_file = None
# Create isolated files for some sequences
SPECIAL_SEQUENCES = [
'TRDD2*01',
'TRDD3*01',
]
for l in sys.stdin:
if ">" in l:
current_file = None
current_special = None
if "Homo sapiens" in l and ("V-REGION" in l or "D-REGION" in l or "J-REGION" in l):
system = l.split('|')[1][:4]
seq = l.split('|')[1]
system = seq[:4]
if system.startswith('IG') or system.startswith('TR'):
if system in open_files:
......@@ -24,9 +34,15 @@ for l in sys.stdin:
current_file = open(name, 'w')
open_files[system] = current_file
if seq in SPECIAL_SEQUENCES:
name = '%s.fa' % seq.replace('*', '-')
print " ==>", name
current_special = open(name, 'w')
if current_file:
current_file.write(l)
if current_special:
current_special.write(l)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment