Commit c205996c authored by AITE Meziane's avatar AITE Meziane

fix pre_pantograph

parent eed99228
......@@ -692,7 +692,7 @@ def genes_parser(filePath, padmet):
for current_id, dict_values in dict_data.iteritems():
count += 1
gene_id = dict_values["ACCESSION-1"][0]
gene_id = dict_values.get("ACCESSION-1",[current_id])[0]
enzyme_id = dict_values["PRODUCT"][0]
dict_protein_gene_id[enzyme_id] = gene_id
gene_node = Node("gene", gene_id)
......@@ -45,8 +45,9 @@ option:
import re
import itertools
from Bio import SeqIO
from libsbml import *
import libsbml
import docopt
from padmet.utils import sbmlPlugin as sp
def main():
args = docopt.docopt(__doc__)
......@@ -86,14 +87,14 @@ def check_ids(model_metabolic, model_faa, cutoff, verbose=False):
@return: True if same ids, if verbose, print % of genes under cutoff
@type: bool
reader = SBMLReader()
reader = libsbml.SBMLReader()
document = reader.readSBML(model_metabolic)
model = document.getModel()
listOfReactions = model.getListOfReactions()
#convert to set
model_metabolic_ids = set(itertools.chain.from_iterable([parseGeneAssoc(geneAssoc)
for geneAssoc in (parseNotes(r).get("GENE_ASSOCIATION",[None])[0] for r in listOfReactions)
model_metabolic_ids = set(itertools.chain.from_iterable([sp.parseGeneAssoc(geneAssoc)
for geneAssoc in (sp.parseNotes(r).get("GENE_ASSOCIATION",[None])[0] for r in listOfReactions)
if geneAssoc is not None]))
with open(model_faa, "rU") as f:
......@@ -137,63 +138,6 @@ def get_valid_faa(model_faa, dict_ids_file, output):
line = line.replace(origin_id, new_gene_id)
def parseNotes(element):
From an SBML element (ex: species or reaction) will return all the section
note in a dictionnary.
<html:p>BIOCYC: |Alkylphosphonates|</html:p>
<html:p>CHEBI: 60983</html:p>
output: {'BIOCYC': ['Alkylphosphonates'],'CHEBI':['60983']}
value is a list in case diff lines for the same type of info
@param element: an element from libsbml
@type element: libsbml.element
@return: the dictionnary of note
@rtype: dict
notes = element.getNotesString()
notesList = notes.splitlines()
notesDict = {}
for line in notesList:
#line = <html:p>BIOCYC: |Alkylphosphonates|</html:p>
start = line.index(">")+1
end = line.index("<",start)
line = line[start:end]
#line = BIOCYC: |Alkylphosphonates|
line = line.split(":")
#line = [BIOCYC,|Alkylphosphonates|]
line[0] = re.sub(" ","_",line[0])
line[1] = re.sub("\s|\|","",line[1])
if len(line[1]) != 0:
line[1] = line[1].split(",")
notesDict[line[0]] = line[1]
except ValueError:
return notesDict
def parseGeneAssoc(GeneAssocStr):
Given a grammar of 'and', 'or' and '(' ')'. Extracts genes ids to a list.
(geneX and geneY) or geneW' => [geneX,geneY,geneW]
@param GeneAssocStr: the string containing genes ids
@type GeneAssocStr: str
@return: the list of unique ids
@rtype: list
if GeneAssocStr is not None:
#sub '(',')',' ' by '' sub "and" by "or"
resultat = re.sub("\(|\)|\s","",GeneAssocStr).replace("and","or")
#create a set by spliting 'or' then convert to list, set for unique genes
resultat = list(set(resultat.split("or")))
return resultat
if __name__ == "__main__":
# Netscape HTTP Cookie File
# This file was generated by libcurl! Edit at your own risk. FALSE / FALSE 0 gem_a1562_ulvagem__session rgi39f0tbu4hgr02vvtujsbek5
......@@ -22,3 +22,7 @@ FALSE / FALSE 1542644157 gem_a1562_ccrgem_UserID 1 FALSE / FALSE 1542644157 gem_a1562_ccrgem_UserName Dyliss FALSE / FALSE 1542644157 gem_a1562_ccrgem_Token 8afc6728636c171388119420d24f5b74 FALSE / FALSE 0 gem_a1562_test__session fp0rpt1no8t5j6biumrvqs7bu3 FALSE / FALSE 1546677268 gem_a1562_test_UserID 1 FALSE / FALSE 1546677268 gem_a1562_test_UserName Test FALSE / FALSE 1546677268 gem_a1562_test_Token f24113a6fbfc631cfd77c492fe42f9a7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment