Commit ecbdbd2e authored by Mathieu Giraud's avatar Mathieu Giraud

germline/split-from-imgt.py: get sequences from several files

See #3515.
parent 4bd2e72c
Pipeline #53595 passed with stages
in 60 minutes and 33 seconds
...@@ -309,12 +309,18 @@ class IMGTGENEDBGeneList(): ...@@ -309,12 +309,18 @@ class IMGTGENEDBGeneList():
def split_IMGTGENEDBReferenceSequences(f, gene_list): def split_IMGTGENEDBReferenceSequences(sources, gene_list):
downstream_data = OrderedDefaultListDict() downstream_data = OrderedDefaultListDict()
upstream_data = OrderedDefaultListDict() upstream_data = OrderedDefaultListDict()
for l in open(ReferenceSequences): processed_keys = []
for source in sources:
print()
print()
print('<== %s' % source)
for l in open(source):
# New sequence: compute 'current_files' and stores up/downstream_data[] # New sequence: compute 'current_files' and stores up/downstream_data[]
...@@ -328,6 +334,13 @@ def split_IMGTGENEDBReferenceSequences(f, gene_list): ...@@ -328,6 +334,13 @@ def split_IMGTGENEDBReferenceSequences(f, gene_list):
if species in SPECIES and feature in FEATURES: if species in SPECIES and feature in FEATURES:
seq = l.split('|')[1] seq = l.split('|')[1]
# Check whether this sequence was already retrieven from a previous source
key = '%s %s %s' % (species, seq, feature)
if key in processed_keys:
continue
processed_keys.append(key)
path = SPECIES[species] path = SPECIES[species]
if feature in FEATURES_VDJ: if feature in FEATURES_VDJ:
...@@ -403,9 +416,10 @@ if __name__ == '__main__': ...@@ -403,9 +416,10 @@ if __name__ == '__main__':
else: else:
print (IMGT_LICENSE) print (IMGT_LICENSE)
ReferenceSequences = sys.argv[1] ReferenceSequencesInframe = sys.argv[1]
GeneList = sys.argv[2] ReferenceSequencesAll = sys.argv[2]
GeneList = sys.argv[3]
gl = IMGTGENEDBGeneList(GeneList) gl = IMGTGENEDBGeneList(GeneList)
split_IMGTGENEDBReferenceSequences(ReferenceSequences, gl) split_IMGTGENEDBReferenceSequences([ReferenceSequencesInframe, ReferenceSequencesAll], gl)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment