Commit dad1bca1 authored by Mathieu Giraud's avatar Mathieu Giraud

ncbi.py: get_gene_position(), from eutils API

parent ab308abe
......@@ -9,6 +9,10 @@ API_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'
API_NUCCORE_ID = API_EUTILS + 'db=nuccore&rettype=fasta&retmode=text' + '&id=%s'
API_NUCCORE_ID_FROM_TO = API_EUTILS + 'db=nuccore&rettype=fasta&retmode=text' + '&id=%s' + '&from=%s&to=%s'
API_GENE_ID_XML = API_EUTILS + 'db=gene&retmode=xml&rettype=docsum' + '&id=%s'
from xml.dom import minidom, Node
......@@ -31,3 +35,34 @@ def ncbi_and_write(ncbi, additional_header, outs):
for out in outs:
out.write(fasta_with_id)
# Parse output from API_GENE_ID_XML to get genomic positions of a gene
def xml_bang_one(parent):
return {node.nodeName: node.firstChild.nodeValue for node in parent.childNodes if node.nodeType == Node.ELEMENT_NODE}
def get_last_LocationHistType(gene):
'''
>>> get_last_LocationHistType(6969)
{u'AssemblyAccVer': u'GCF_000001405.38', u'ChrAccVer': u'NC_000007.14', u'AnnotationRelease': u'109', u'ChrStop': u'38253379', u'ChrStart': u'38253428'}
'''
sys.stderr.write('%% eutils -> gene %s' % gene + '\n')
xml = minidom.parseString(urllib.urlopen(API_GENE_ID_XML % gene).read())
locations = xml.getElementsByTagName('LocationHistType')
return xml_bang_one(locations[0])
def get_gene_positions(gene):
'''
>>> get_gene_positions(6969)
(u'NC_000007.14', 38253428, 38253379)
'''
loc = get_last_LocationHistType(gene)
chr = loc['ChrAccVer']
start, stop = int(loc['ChrStart']), int(loc['ChrStop'])
return chr, start, stop
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment