Commit 75a201e3 authored by Mathieu Giraud's avatar Mathieu Giraud

split-from-imgt.py: reorder, always retrieve gene_data before genome_data

See #3009.
parent ccd58bb6
......@@ -103,10 +103,14 @@ def store_data_if_updownstream(fasta_header, path, data, genes):
def retrieve_genes(f, genes, tag, additional_length, gene_list):
for gene in genes:
for coord in genes[gene]:
# extract from gene
gene_data = ncbi.get_gene_sequence(gene, coord['imgt_data'] + tag, coord['from'], coord['to'], additional_length)
# try to extract from genome
gene_id = gene_list.get_gene_id_from_imgt_name(coord['species'], coord['imgt_name'])
if gene_id:
# gene_id is found, extract from chromosome
try:
(target, start, end) = ncbi.get_gene_positions(gene_id)
print(coord, gene_id, target, start, end)
......@@ -114,13 +118,13 @@ def retrieve_genes(f, genes, tag, additional_length, gene_list):
print('! No positions for %s (%s)' % (gene_id, gene))
gene_id = None
if not gene_id:
# extract from gene
target = gene
start = coord['from']
end = coord['to']
if gene_id:
genome_data = ncbi.get_gene_sequence(target, coord['imgt_data'] + tag, start, end, additional_length)
# TODO: Check that gene_data was in genome_data
gene_data = genome_data
gene_data = ncbi.get_gene_sequence(target, coord['imgt_data'] + tag, start, end, additional_length)
# post-process gene_data
if coord['imgt_data'].split('|')[-1] == FEATURE_J_REGION:
gene_lines = gene_data.split('\n')
gene_lines[1] = gap_j(gene_lines[1].lower())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment