Commit eca46b8c authored by Mathieu Giraud's avatar Mathieu Giraud

split-from-imgt.py: reindent

parent 326e3378
......@@ -246,78 +246,78 @@ class IMGTGENEDBGeneList():
def split_IMGTGENEDBReferenceSequences(f, gene_list):
downstream_data = defaultdict(lambda: OrderedDefaultListDict())
upstream_data = defaultdict(lambda: OrderedDefaultListDict())
downstream_data = defaultdict(lambda: OrderedDefaultListDict())
upstream_data = defaultdict(lambda: OrderedDefaultListDict())
for l in open(ReferenceSequences):
for l in open(ReferenceSequences):
# New sequence: compute 'current_files' and stores up/downstream_data[]
# New sequence: compute 'current_files' and stores up/downstream_data[]
if ">" in l:
current_files = []
current_special = None
if ">" in l:
current_files = []
current_special = None
species = l.split('|')[2].strip()
feature = l.split('|')[4].strip()
species = l.split('|')[2].strip()
feature = l.split('|')[4].strip()
if species in SPECIES and feature in FEATURES:
seq = l.split('|')[1]
path = SPECIES[species]
if species in SPECIES and feature in FEATURES:
seq = l.split('|')[1]
path = SPECIES[species]
if feature in FEATURES_VDJ:
system = seq[:4]
else:
system = seq[:seq.find("*")]
if not system in CLASSES:
print "! Unknown class: ", system
system = system.replace("IGH", "IGHC=")
if feature in FEATURES_VDJ:
system = seq[:4]
else:
system = seq[:seq.find("*")]
if not system in CLASSES:
print "! Unknown class: ", system
system = system.replace("IGH", "IGHC=")
keys = [path + system]
keys = [path + system]
check_directory_exists(path)
check_directory_exists(path)
if system.startswith('IG') or system.startswith('TR'):
if system.startswith('IG') or system.startswith('TR'):
if feature in FEATURES_VDJ:
store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS)
store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS)
if feature in FEATURES_VDJ:
store_data_if_updownstream(l, path, downstream_data, DOWNSTREAM_REGIONS)
store_data_if_updownstream(l, path, upstream_data, UPSTREAM_REGIONS)
systems = get_split_files(seq, SPLIT_SEQUENCES)
if systems:
keys = [path + s for s in systems]
for key in keys:
current_files.append(open_files[key])
systems = get_split_files(seq, SPLIT_SEQUENCES)
if systems:
keys = [path + s for s in systems]
for key in keys:
current_files.append(open_files[key])
if seq in SPECIAL_SEQUENCES:
name = '%s.fa' % seq.replace('*', '-')
current_special = verbose_open_w(name)
if seq in SPECIAL_SEQUENCES:
name = '%s.fa' % seq.replace('*', '-')
current_special = verbose_open_w(name)
# Possibly gap J_REGION
# Possibly gap J_REGION
if '>' not in l and current_files and feature == FEATURE_J_REGION:
l = gap_j(l)
if '>' not in l and current_files and feature == FEATURE_J_REGION:
l = gap_j(l)
# Dump 'l' to the concerned files
# Dump 'l' to the concerned files
for current_file in current_files:
current_file.write(l)
for current_file in current_files:
current_file.write(l)
if current_special:
current_special.write(l)
if current_special:
current_special.write(l)
# End, loop to next 'l'
# End, loop to next 'l'
# Dump up/downstream data
# Dump up/downstream data
for system in upstream_data:
f = verbose_open_w(system + TAG_UPSTREAM + '.fa')
retrieve_genes(f, upstream_data[system], TAG_UPSTREAM, -LENGTH_UPSTREAM, gene_list)
for system in upstream_data:
f = verbose_open_w(system + TAG_UPSTREAM + '.fa')
retrieve_genes(f, upstream_data[system], TAG_UPSTREAM, -LENGTH_UPSTREAM, gene_list)
for system in downstream_data:
f = verbose_open_w(system + TAG_DOWNSTREAM + '.fa')
retrieve_genes(f, downstream_data[system], TAG_DOWNSTREAM, LENGTH_DOWNSTREAM, gene_list)
for system in downstream_data:
f = verbose_open_w(system + TAG_DOWNSTREAM + '.fa')
retrieve_genes(f, downstream_data[system], TAG_DOWNSTREAM, LENGTH_DOWNSTREAM, gene_list)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment