Commit eca46b8c authored by Mathieu Giraud's avatar Mathieu Giraud

split-from-imgt.py: reindent

parent 326e3378
...@@ -246,78 +246,78 @@ class IMGTGENEDBGeneList(): ...@@ -246,78 +246,78 @@ class IMGTGENEDBGeneList():
def split_IMGTGENEDBReferenceSequences(f, gene_list): def split_IMGTGENEDBReferenceSequences(f, gene_list):
downstream_data = defaultdict(lambda: OrderedDefaultListDict()) downstream_data = defaultdict(lambda: OrderedDefaultListDict())
upstream_data = defaultdict(lambda: OrderedDefaultListDict()) upstream_data = defaultdict(lambda: OrderedDefaultListDict())
for l in open(ReferenceSequences): for l in open(ReferenceSequences):
# New sequence: compute 'current_files' and stores up/downstream_data[] # New sequence: compute 'current_files' and stores up/downstream_data[]
if ">" in l: if ">" in l:
current_files = [] current_files = []
current_special = None current_special = None
species = l.split('|')[2].strip() species = l.split('|')[2].strip()
feature = l.split('|')[4].strip() feature = l.split('|')[4].strip()
if species in SPECIES and feature in FEATURES: if species in SPECIES and feature in FEATURES:
seq = l.split('|')[1] seq = l.split('|')[1]
path = SPECIES[species] path = SPECIES[species]
if feature in FEATURES_VDJ: if feature in FEATURES_VDJ:
system = seq[:4] system = seq[:4]
else: else:
system = seq[:seq.find("*")] system = seq[:seq.find("*")]
if not system in CLASSES: if not system in CLASSES:
print "! Unknown class: ", system print "! Unknown class: ", system
system = system.replace("IGH", "IGHC=") system = system.replace("IGH", "IGHC=")
keys = [path + system] keys = [path + system]
check_directory_exists(path) check_directory_exists(path)
if system.startswith('IG') or system.startswith('TR'): if system.startswith('IG') or system.startswith('TR'):
if feature in FEATURES_VDJ: if feature in FEATURES_VDJ:
store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS) store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS)
store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS) store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS)
systems = get_split_files(seq, SPLIT_SEQUENCES) systems = get_split_files(seq, SPLIT_SEQUENCES)
if systems: if systems:
keys = [path + s for s in systems] keys = [path + s for s in systems]
for key in keys: for key in keys:
current_files.append(open_files[key]) current_files.append(open_files[key])
if seq in SPECIAL_SEQUENCES: if seq in SPECIAL_SEQUENCES:
name = '%s.fa' % seq.replace('*', '-') name = '%s.fa' % seq.replace('*', '-')
current_special = verbose_open_w(name) current_special = verbose_open_w(name)
# Possibly gap J_REGION # Possibly gap J_REGION
if '>' not in l and current_files and feature == FEATURE_J_REGION: if '>' not in l and current_files and feature == FEATURE_J_REGION:
l = gap_j(l) l = gap_j(l)
# Dump 'l' to the concerned files # Dump 'l' to the concerned files
for current_file in current_files: for current_file in current_files:
current_file.write(l) current_file.write(l)
if current_special: if current_special:
current_special.write(l) current_special.write(l)
# End, loop to next 'l' # End, loop to next 'l'
# Dump up/downstream data # Dump up/downstream data
for system in upstream_data: for system in upstream_data:
f = verbose_open_w(system + TAG_UPSTREAM + '.fa') f = verbose_open_w(system + TAG_UPSTREAM + '.fa')
retrieve_genes(f, upstream_data[system], TAG_UPSTREAM, -LENGTH_UPSTREAM, gene_list) retrieve_genes(f, upstream_data[system], TAG_UPSTREAM, -LENGTH_UPSTREAM, gene_list)
for system in downstream_data: for system in downstream_data:
f = verbose_open_w(system + TAG_DOWNSTREAM + '.fa') f = verbose_open_w(system + TAG_DOWNSTREAM + '.fa')
retrieve_genes(f, downstream_data[system], TAG_DOWNSTREAM, LENGTH_DOWNSTREAM, gene_list) retrieve_genes(f, downstream_data[system], TAG_DOWNSTREAM, LENGTH_DOWNSTREAM, gene_list)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment