Commit 5ca3192e authored by BELCOUR Arnaud's avatar BELCOUR Arnaud

Make script compatible with Python 3.

parent 2b725133
......@@ -77,7 +77,7 @@ def padmet_to_asp(padmet_file, output, verbose = False):
with open(output, 'w') as f:
#recover all reactions's data
reactions = [node for node in padmet.dicOfNode.itervalues()
reactions = [node for node in padmet.dicOfNode.values()
if node.type == "reaction"]
nb_reactions = len(reactions)
......@@ -214,7 +214,7 @@ def padmet_to_asp(padmet_file, output, verbose = False):
except KeyError:
pass
#recover all pathway's data
pathways = [node for node in padmet.dicOfNode.itervalues()
pathways = [node for node in padmet.dicOfNode.values()
if node.type == "pathway"]
nb_pathways = len(pathways)
count = 0
......
......@@ -56,7 +56,7 @@ def raw_data():
url_bigg = 'http://bigg.ucsd.edu/api/v2/'
raw_data = requests.get(url_bigg + "universal/reactions").json()['results']
all_reactions_ids = [rxn_dict['bigg_id'] for rxn_dict in raw_data if not rxn_dict['bigg_id'].startswith("BIOMASS")]
print("%s reactions to extract" %(len(all_reactions_ids)))
print(("%s reactions to extract" %(len(all_reactions_ids))))
with open("/home/maite/Documents/data/bigg/bigg_raw.txt", 'w') as f:
count = 0
......@@ -129,7 +129,7 @@ def raw_to_padmet():
count += 1
print("%s/%s" %(count, len(raw_data)))
rxn_id = rxn_dict["bigg_id"]
if rxn_id not in padmet.dicOfNode.keys():
if rxn_id not in list(padmet.dicOfNode.keys()):
rxn_metabolites = rxn_dict["metabolites"]
rxn_name = rxn_dict["name"]
rxn_direction = rxn_dict["direction"]
......@@ -141,9 +141,9 @@ def raw_to_padmet():
has_xref_rlt = Relation(rxn_id, "has_xref", xref_id)
list_of_relation.append(has_xref_rlt)
for db, k in rxn_xrefs.items():
for db, k in list(rxn_xrefs.items()):
_id = k[0]["id"]
if db in xref_node.misc.keys() and _id not in xref_node.misc[db]:
if db in list(xref_node.misc.keys()) and _id not in xref_node.misc[db]:
xref_node.misc[db].append(_id)
else:
xref_node.misc[db] = [_id]
......@@ -199,7 +199,7 @@ def add_kegg_pwy(pwy_file, padmetRef, verbose = False):
except KeyError:
pwy_node.misc["COMMON_NAME"] = [name]
if rxn_id:
if rxn_id in padmetRef.dicOfNode.keys():
if rxn_id in list(padmetRef.dicOfNode.keys()):
pwy_rlt = Relation(rxn_id,"is_in_pathway",pwy_id)
padmetRef._addRelation(pwy_rlt)
else:
......
......@@ -115,7 +115,7 @@ def main():
for rxn_id in [i for i in all_reactions_ids if not i.startswith("BIOMASS")]:
count += 1
if verbose: print("reaction: %s, %s/%s" %(rxn_id, count, len(all_reactions_ids)))
if rxn_id not in padmetRef.dicOfNode.keys():
if rxn_id not in list(padmetRef.dicOfNode.keys()):
rxn_response = requests.get(url_bigg + "universal/reactions/" +rxn_id)
rxn_dict = rxn_response.json()
......@@ -149,9 +149,9 @@ def main():
has_xref_rlt = Relation(rxn_id, "has_xref", xref_id)
list_of_relation.append(has_xref_rlt)
for db, k in rxn_xrefs.items():
for db, k in list(rxn_xrefs.items()):
_id = k[0]["id"]
if db in xref_node.misc.keys() and _id not in xref_node.misc[db]:
if db in list(xref_node.misc.keys()) and _id not in xref_node.misc[db]:
xref_node.misc[db].append(_id)
else:
xref_node.misc[db] = [_id]
......@@ -195,7 +195,7 @@ def main():
chrono = (time() - chronoDepart)
partie_entiere, partie_decimale = str(chrono).split('.')
chrono = ".".join([partie_entiere, partie_decimale[:3]])
if verbose: print "done in: ", chrono, "s !"
if verbose: print("done in: ", chrono, "s !")
def add_kegg_pwy(pwy_file, padmetRef, verbose = False):
global list_of_relation
......@@ -212,7 +212,7 @@ def add_kegg_pwy(pwy_file, padmetRef, verbose = False):
except KeyError:
pwy_node.misc["COMMON_NAME"] = [name]
if rxn_id:
if rxn_id in padmetRef.dicOfNode.keys():
if rxn_id in list(padmetRef.dicOfNode.keys()):
pwy_rlt = Relation(rxn_id,"is_in_pathway",pwy_id)
padmetRef._addRelation(pwy_rlt)
else:
......
......@@ -65,7 +65,7 @@ def main():
for line in file_in_array[start_index:]]
reactions_ids = [convert_from_coded_id(r)[0] for r in encoded_reactions]
nb_reactions = str(len(reactions_ids))
if verbose: print(nb_reactions+" reactions to check")
if verbose: print(nb_reactions+" reactions to check")
with open(output,'w') as f:
header = ["idRef","Common name","EC-number","Formula (with id)","Formula (with cname)","Action","Comment", "Genes"]
header = "\t".join(header)+"\n"
......
......@@ -74,7 +74,7 @@ def main():
#2: extract all pathways from database Ref
if verbose: print("extract all pathways from database Ref")
all_pathways_id = (node.id for node in padmetRef.dicOfNode.values() if node.type == "pathway")
all_pathways_id = (node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "pathway")
for pwy_id in all_pathways_id:
rxns_assoc = [rlt.id_in for rlt in padmetRef.dicOfRelationOut[pwy_id]
if rlt.type == "is_in_pathway" and padmetRef.dicOfNode[rlt.id_in].type == "reaction"]
......@@ -95,15 +95,15 @@ def main():
dict_mnxref[mnx][db].add(_id)
except KeyError:
dict_mnxref[mnx].update({db: set([_id])})
for k,v in dict_mnxref.items():
if len(v.keys()) < 2:
for k,v in list(dict_mnxref.items()):
if len(list(v.keys())) < 2:
dict_mnxref.pop(k)
#map reactions id
assoc_sgs_id_reactions_mapped = {}
for sgs_id, set_rxn in assoc_sgs_id_reactions.iteritems():
for assoc_dict in dict_mnxref.values():
for sgs_id, set_rxn in assoc_sgs_id_reactions.items():
for assoc_dict in list(dict_mnxref.values()):
for _id in assoc_dict[db_origin]:
if _id in set_rxn:
try:
......@@ -116,8 +116,8 @@ def main():
if an intersection length is > 0 ==> the sgs covere a part of this pwy.
Extract which reactions and the ratio on all the reactions in the pathways.
"""
for sgs_id, rxns_in_sgs in assoc_sgs_id_reactions_mapped.iteritems():
for pwy_id, rxns_in_pwy in assoc_pathways_reactions.iteritems():
for sgs_id, rxns_in_sgs in assoc_sgs_id_reactions_mapped.items():
for pwy_id, rxns_in_pwy in assoc_pathways_reactions.items():
rxns_inter = rxns_in_sgs.intersection(rxns_in_pwy)
len_rxns_inter = len(rxns_inter)
if len_rxns_inter > 0 and float(len_rxns_inter)/float(len(rxns_in_pwy)) > float(1)/float(3):
......@@ -153,13 +153,13 @@ def main():
header = ["PATHWAYS ID\SGS ID"]+all_sgs
f.write("\t".join(header)+"\n")
for pwy_id,sgs_dict in assoc_pathways_sgs.iteritems():
for pwy_id,sgs_dict in assoc_pathways_sgs.items():
line = [""]*len(header)
try:
line[0] = pwy_id+"/"+padmetRef.dicOfNode[pwy_id].misc["COMMON_NAME"][0]
except KeyError:
line[0] = pwy_id
for sgs_id, data in sgs_dict.iteritems():
for sgs_id, data in sgs_dict.items():
sgs_index = header.index(sgs_id)
line[sgs_index] = data
line = "\t".join(line)+"\n"
......
......@@ -44,7 +44,7 @@ def main():
if not args["--rxn_id"]:
try:
rxn = (rxn for rxn in model.reactions if rxn.objective_coefficient == 1.0).next()
rxn = next(rxn for rxn in model.reactions if rxn.objective_coefficient == 1.0)
except StopIteration:
print("No reaction id given and no reaction with obj coefficient to 1.0, enable to get the reaction")
exit()
......@@ -80,7 +80,7 @@ def main():
f.write(line)
#check if have gene assoc
try:
k = [i for i in rxn.notes.keys() if i.lower().startswith("gene")][0]
k = [i for i in list(rxn.notes.keys()) if i.lower().startswith("gene")][0]
gene_assoc = rxn.notes[k][0]
line = ["linked_gene", gene_assoc]
except IndexError:
......
......@@ -45,7 +45,7 @@ def main():
reactions_to_remove = []
for reaction in listOfReactions:
if "GENE_ASSOCIATION" not in parseNotes(reaction).keys():
if "GENE_ASSOCIATION" not in list(parseNotes(reaction).keys()):
reactions_to_remove.append(reaction.getId())
for rId in reactions_to_remove:
listOfReactions.remove(rId)
......
# -*- coding: utf-8 -*-
"""
This file is part of padmet-utils.
padmet-utils is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
padmet-utils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with padmet-utils. If not, see <http://www.gnu.org/licenses/>.
@author: Meziane AITE, meziane.aite@inria.fr
Description:
convert GBK to FAA with Bio package
usage:
gbk_to_faa.py --gbk=FILE --output=FILE [--qualifier=STR] [-v]
option:
-h --help Show help.
--gbk=FILE pathname to the gbk file.
--output=FILE pathename to the output, a FAA file.
--qualifier=STR the qualifier of the gene id [default: locus_tag].
-v print info
"""
from Bio import SeqIO
import docopt
def main():
args = docopt.docopt(__doc__)
gbk_file = args["--gbk"]
faa_file = args["--output"]
qualifier = args["--qualifier"]
verbose = args["-v"]
dict_faa = {}
with open(gbk_file, "rU") as gbk:
for seq_record in SeqIO.parse(gbk, "genbank"):
seq_feature_cds = (seq_feature for seq_feature in seq_record.features if seq_feature.type == "CDS")
for seq_feature in seq_feature_cds:
try:
dict_faa[seq_feature.qualifiers[qualifier][0]] = seq_feature.qualifiers['translation'][0]
except KeyError:
if verbose:
try:
print ("locus without Translation: "+seq_feature.qualifiers['locus_tag'][0])
except KeyError:
print ("locus without Translation: "+seq_feature.qualifiers.get('gene',["Unknown"])[0])
with open(faa_file,'w') as f:
for locus_tag, seq in dict_faa.iteritems():
f.write(">%s\n%s\n" % (locus_tag,seq))
if __name__ == "__main__":
# -*- coding: utf-8 -*-
"""
This file is part of padmet-utils.
padmet-utils is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
padmet-utils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with padmet-utils. If not, see <http://www.gnu.org/licenses/>.
@author: Meziane AITE, meziane.aite@inria.fr
Description:
convert GBK to FAA with Bio package
usage:
gbk_to_faa.py --gbk=FILE --output=FILE [--qualifier=STR] [-v]
option:
-h --help Show help.
--gbk=FILE pathname to the gbk file.
--output=FILE pathename to the output, a FAA file.
--qualifier=STR the qualifier of the gene id [default: locus_tag].
-v print info
"""
from Bio import SeqIO
import docopt
def main():
args = docopt.docopt(__doc__)
gbk_file = args["--gbk"]
faa_file = args["--output"]
qualifier = args["--qualifier"]
verbose = args["-v"]
dict_faa = {}
with open(gbk_file, "rU") as gbk:
for seq_record in SeqIO.parse(gbk, "genbank"):
seq_feature_cds = (seq_feature for seq_feature in seq_record.features if seq_feature.type == "CDS")
for seq_feature in seq_feature_cds:
try:
dict_faa[seq_feature.qualifiers[qualifier][0]] = seq_feature.qualifiers['translation'][0]
except KeyError:
if verbose:
try:
print("locus without Translation: "+seq_feature.qualifiers['locus_tag'][0])
except KeyError:
print("locus without Translation: "+seq_feature.qualifiers.get('gene',["Unknown"])[0])
with open(faa_file,'w') as f:
for locus_tag, seq in dict_faa.items():
f.write(">%s\n%s\n" % (locus_tag,seq))
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -73,7 +73,7 @@ def main():
rxn_data.pop("rxn12985")
if verbose: print("updating padmet")
count = 0
for rxn_id, rxn_dict in rxn_data.items():
for rxn_id, rxn_dict in list(rxn_data.items()):
count += 1
if verbose: print("reaction: %s, %s/%s" %(rxn_id, count, len(rxn_data)))
try:
......@@ -82,7 +82,7 @@ def main():
except KeyError:
print(rxn_id)
continue
if rxn_id not in padmetRef.dicOfNode.keys():
if rxn_id not in list(padmetRef.dicOfNode.keys()):
if rxn_dict["reversibility"] == ">":
rxn_direction = "LEFT-TO-RIGHT"
else:
......@@ -160,7 +160,7 @@ def main():
chrono = (time() - chronoDepart)
partie_entiere, partie_decimale = str(chrono).split('.')
chrono = ".".join([partie_entiere, partie_decimale[:3]])
if verbose: print "done in: ", chrono, "s !"
if verbose: print("done in: ", chrono, "s !")
def add_kegg_pwy(pwy_file, padmetRef, verbose = False):
global list_of_relation
......@@ -177,7 +177,7 @@ def add_kegg_pwy(pwy_file, padmetRef, verbose = False):
except KeyError:
pwy_node.misc["COMMON_NAME"] = [name]
if rxn_id:
if rxn_id in padmetRef.dicOfNode.keys():
if rxn_id in list(padmetRef.dicOfNode.keys()):
pwy_rlt = Relation(rxn_id,"is_in_pathway",pwy_id)
padmetRef._addRelation(pwy_rlt)
else:
......
......@@ -40,7 +40,7 @@ def main():
padmet = PadmetSpec(padmet_file)
all_metabolites = sorted(set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type in ["consumes","produces"]]))
all_reactions = sorted([node.id for node in padmet.dicOfNode.values() if node.type == "reaction"])
all_reactions = sorted([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"])
#col = reactions, row = metabolites
matrix_dict = {}
for met in all_metabolites:
......
......@@ -86,7 +86,7 @@ def main():
path = args["--to_add"]
if not path.endswith("/"):
path += "/"
all_files = [i for i in os.walk(path).next()[2] if not i.startswith(".~lock")]
all_files = [i for i in next(os.walk(path))[2] if not i.startswith(".~lock")]
padmetFiles = [path+i for i in all_files if i.endswith(".padmet")]
else:
padmetFiles = [args["--to_add"]]
......@@ -110,7 +110,7 @@ def main():
chrono = (time() - chronoDepart)
partie_entiere, partie_decimale = str(chrono).split('.')
chrono = ".".join([partie_entiere, partie_decimale[:3]])
if verbose: print "done in: ", chrono, "s !"
if verbose: print("done in: ", chrono, "s !")
if __name__ == "__main__":
......
......@@ -54,7 +54,7 @@ options:
-v
"""
from padmet.classes import PadmetSpec, PadmetRef
from itertools import izip_longest
from itertools import zip_longest
import os
import csv
import docopt
......@@ -80,7 +80,7 @@ def main():
padmetSpec_name = os.path.splitext(os.path.basename(padmetSpec_file))[0]
padmetSpec_folder = output_dir+padmetSpec_name+"/"
if not os.path.isdir(padmetSpec_folder):
if verbose: print("Creating folder %s" %padmetSpec_folder)
if verbose: print(("Creating folder %s" %padmetSpec_folder))
os.makedirs(padmetSpec_folder)
#if padmetRef given, create folder for padmetRef
......@@ -105,7 +105,7 @@ def main():
writer.writerow([padmetRef_name, padmetRef_name])
if verbose: print("\tExtracting reactions")
all_rxn_nodes = [node for node in padmetRef.dicOfNode.values() if node.type == "reaction"]
all_rxn_nodes = [node for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction"]
if all_rxn_nodes: extract_nodes(padmetRef, all_rxn_nodes, "reaction", padmetRef_folder+"rxn.tsv", {"in@metabolic_network":[padmetRef_name]})
if verbose: print("\t%s reactions" %len(all_rxn_nodes))
......@@ -115,12 +115,12 @@ def main():
if verbose: print("\t%s compounds" %len(all_cpd_nodes))
if verbose: print("\tExtracting pathways")
all_pwy_nodes = [node for node in padmetRef.dicOfNode.values() if node.type == "pathway"]
all_pwy_nodes = [node for node in list(padmetRef.dicOfNode.values()) if node.type == "pathway"]
if all_pwy_nodes: extract_nodes(padmetRef, all_pwy_nodes, "pathway", padmetRef_folder+"pwy.tsv")
if verbose: print("\t%s pathways" %len(all_pwy_nodes))
if verbose: print("\tExtracting xrefs")
all_xrefs_nodes = [node for node in padmetRef.dicOfNode.values() if node.type == "xref"]
all_xrefs_nodes = [node for node in list(padmetRef.dicOfNode.values()) if node.type == "xref"]
if all_xrefs_nodes: extract_nodes(padmetRef, all_xrefs_nodes, "xref", padmetRef_folder+"xref.tsv")
if verbose: print("\t%s xrefs" %len(all_xrefs_nodes))
......@@ -189,7 +189,7 @@ def main():
writer.writerow([padmetSpec_name,padmetSpec_name])
if verbose: print("\tExtracting reactions")
spec_rxn_nodes = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
spec_rxn_nodes = [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]
if all_rxn_nodes: extract_nodes(padmetSpec, spec_rxn_nodes, "reaction", padmetSpec_folder+"rxn.tsv", {"in@metabolic_network":[padmetSpec_name]})
if verbose: print("\t%s reactions" %len(all_rxn_nodes))
......@@ -199,22 +199,22 @@ def main():
if verbose: print("\t%s compounds" %len(all_cpd_nodes))
if verbose: print("\tExtracting pathways")
spec_pwy_nodes = [node for node in padmetSpec.dicOfNode.values() if node.type == "pathway"]
spec_pwy_nodes = [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "pathway"]
if all_pwy_nodes: extract_nodes(padmetSpec, spec_pwy_nodes, "pathway", padmetSpec_folder+"pwy.tsv")
if verbose: print("\t%s pathways" %len(all_pwy_nodes))
if verbose: print("\tExtracting xrefs")
spec_xrefs_nodes = [node for node in padmetSpec.dicOfNode.values() if node.type == "xref"]
spec_xrefs_nodes = [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "xref"]
if spec_xrefs_nodes: extract_nodes(padmetSpec, spec_xrefs_nodes, "xref", padmetSpec_folder+"xref.tsv")
if verbose: print("\t%s xrefs" %len(spec_xrefs_nodes))
if verbose: print("\tExtracting all genes")
spec_genes_nodes = [node for node in padmetSpec.dicOfNode.values() if node.type == "gene"]
spec_genes_nodes = [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "gene"]
if spec_genes_nodes: extract_nodes(padmetSpec, spec_genes_nodes, "gene", padmetSpec_folder+"gene.tsv", opt_col = {"in@metabolic_network":[padmetSpec_name]})
if verbose: print("\t%s genes" %len(spec_genes_nodes))
if verbose: print("\tExtracting all reconstructionData")
spec_recData_nodes = [node for node in padmetSpec.dicOfNode.values() if node.type == "reconstructionData"]
spec_recData_nodes = [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "reconstructionData"]
if spec_genes_nodes: extract_nodes(padmetSpec, spec_recData_nodes, "reconstructionData", padmetSpec_folder+"reconstructionData.tsv")
if verbose: print("\t%s reconstructionData" %len(spec_recData_nodes))
......@@ -305,9 +305,9 @@ def extract_nodes(padmet, nodes, entity_id, output, opt_col = {}):
#set of keys from each misc dict
all_keys = set()
#opt_col: if wanted to add extra data
[all_keys.update(node.misc.keys()) for node in nodes]
[all_keys.update(list(node.misc.keys())) for node in nodes]
if opt_col:
fieldnames = [entity_id] + opt_col.keys() + list(all_keys)
fieldnames = [entity_id] + list(opt_col.keys()) + list(all_keys)
else:
fieldnames = [entity_id] + list(all_keys)
......@@ -318,7 +318,7 @@ def extract_nodes(padmet, nodes, entity_id, output, opt_col = {}):
try:
names = [padmet.dicOfNode[rlt.id_out].misc["LABEL"] for rlt in padmet.dicOfRelationIn[node.id] if rlt.type == "has_name"]
names = names[0]
if "COMMON-NAME" in node.misc.keys():
if "COMMON-NAME" in list(node.misc.keys()):
[node.misc["COMMON-NAME"].append(name) for name in names if name not in node.misc["COMMON-NAME"]]
else:
node.misc["COMMON-NAME"] = names
......@@ -327,7 +327,7 @@ def extract_nodes(padmet, nodes, entity_id, output, opt_col = {}):
if opt_col:
node.misc.update(opt_col)
data = [node.misc.get(col,[""]) for col in fieldnames]
data = izip_longest(*data,fillvalue="")
data = zip_longest(*data,fillvalue="")
for d in data:
d = list(d)
d[0] = node.id
......@@ -385,9 +385,9 @@ def extract_pwy(padmet):
#get all pathways in ref in dict: k=pwy_id, v = set(rxn_id in pwy)
#get all pathways in in spec dict: k=pwy_id, v = set(rxn_id in pwy)
pathways_rxn_dict = dict([(node.id, set([rlt.id_in for rlt in padmet.dicOfRelationOut.get(node.id,[])
if rlt.type == "is_in_pathway" and padmet.dicOfNode[rlt.id_in].type == "reaction"])) for node in padmet.dicOfNode.values() if node.type == "pathway"])
if rlt.type == "is_in_pathway" and padmet.dicOfNode[rlt.id_in].type == "reaction"])) for node in list(padmet.dicOfNode.values()) if node.type == "pathway"])
#pop k,v if len(v) == 0 (if not v)
[pathways_rxn_dict.pop(k) for k,v in pathways_rxn_dict.items() if not v]
[pathways_rxn_dict.pop(k) for k,v in list(pathways_rxn_dict.items()) if not v]
return pathways_rxn_dict
def pwy_rate(padmetRef, padmetSpec, metabolic_network, output):
......@@ -401,7 +401,7 @@ def pwy_rate(padmetRef, padmetSpec, metabolic_network, output):
with open(output, 'w') as f:
writer = csv.writer(f, delimiter="\t")
writer.writerow(fieldnames)
for pwy_id, rxns_set in network_pathways_dict.items():
for pwy_id, rxns_set in list(network_pathways_dict.items()):
#pwy_id = "PWY-5497"
#rxns_set = network_pathways_dict["PWY-5497"]
all_rxns_set = all_pathways_dict[pwy_id]
......
# -*- coding: utf-8 -*-
"""
This file is part of padmet-utils.
padmet-utils is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
padmet-utils is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with padmet-utils. If not, see <http://www.gnu.org/licenses/>.
@author: Meziane AITE, meziane.aite@inria.fr
Description:
usage:
post_pantograph.py --ptg_run=DIR --output=FILE [-v]
post_pantograph.py --model_metabolic=FILE --study_metabolic=FILE --inp=FILE --omcl=FILE --output=FILE [-v]
option:
-h --help Show help.
--model_metabolic=FILE pathname to the metabolic network of the model (sbml).
--model_faa=FILE pathname to the proteom of the model (faa)
--cutoff=FLOAT cutoff [0:1] for comparing model_metabolic and model_faa. [default: 0.70].
--dict_ids_file=FILE pathname to the dict associating genes ids from the model_metabolic to the model_faa. line =
--output=FILE output of get_valid_faa (a faa) or get_dict_ids (a dictionnary of gene ids in tsv)
-v print info
"""
import re
from padmet.utils.sbmlPlugin import parseNotes, parseGeneAssoc
import libsbml
import docopt
import os
import subprocess
import csv
def main():
args = docopt.docopt(__doc__)
fold = args["--ptg_run"]
output = args["--output"]
verbose = args["-v"]
if fold:
if fold.endswith("/"):
dir_name = os.path.split(fold[:-1])[1]
else:
dir_name = os.path.split(fold)[1]
fold += "/"
model_metabolic = fold+"metabolic_model.sbml"
study_metabolic = fold+"original_output_pantograph_"+dir_name+".sbml"
omcl_rez = fold+"all_orthomcl.out"
inp_rez = fold+"table.FAA_model.faa-FAA_study.faa"
else:
model_metabolic = args["--model_metabolic"]
study_metabolic = args["--study_metabolic"]
omcl_rez = args["--omcl"]
inp_rez = args["--inp"]
dir_path_gbr = os.path.dirname(os.path.realpath(__file__))+"/grammar-boolean-rapsody.py"
#create dict, k = OrtoA gene_id, v = set of OrtoB orthologus
inp_dict = {}
with open(inp_rez, 'r') as f:
reader = csv.DictReader(f, delimiter = "\t")
#line[2] or [3] contains genes id with score, delete score and blank
for line in reader:
OrtoA = set([i for i in line["OrtoA"].split(" ") if i != "1.000" and len(i) != 0])
OrtoB = set([i for i in line["OrtoB"].split(" ") if i != "1.000" and len(i) != 0])
for gene in OrtoA:
if gene in inp_dict.keys(): print("%s multiple /!\\" %gene)
inp_dict[gene] = OrtoB
#create a dict K = FAA_model gene id, V = set of FAA_study gene ortho
omcl_dict = {}
with open(omcl_rez, 'r') as f: