...
 
Commits (11)
......@@ -38,7 +38,7 @@ option:
--model_metabolic=FILE pathname to the metabolic network of the model (sbml).
--model_faa=FILE pathname to the proteom of the model (faa)
--cutoff=FLOAT cutoff [0:1] for comparing model_metabolic and model_faa. [default: 0.70].
--dict_ids_file=FILE pathname to the dict associating genes ids from the model_metabolic to the model_faa. line =
--dict_ids_file=FILE pathname to the dict associating genes ids from the model_metabolic to the model_faa. line = gene_id_in_metabolic_network\tgene_id_in_faa
--output=FILE output of get_valid_faa (a faa) or get_dict_ids (a dictionnary of gene ids in tsv)
-v print info
"""
......
......@@ -88,7 +88,7 @@ def main():
create_biological_page("Metabolite", cpd_node, wiki_folder+"metabolites/")
create_navigation_page(wiki_folder+"/navigation/")
create_venn()
#create_venn()
create_main(model_id, model_name)
if log_file:
create_log_page(log_file, wiki_folder+"/navigation/")
......@@ -119,12 +119,18 @@ def create_venn():
if verbose: print("Venn Diagramm")
#'ARGSUCCINSYN-RXN'
categories_dict ={}
all_categories = ["orthology","annotation","gap-filling","manual"]
"""
all_categories = ["orthology","annotation","gap-filling","manual","microbiont"]
for category in all_categories:
categories_dict[category] = set()
"""
for rxn_id, rxn_src_dict in list(full_sources_dict.items()):
for category in list(rxn_src_dict.keys()):
categories_dict[category].add(rxn_id)
try:
categories_dict[category].add(rxn_id)
except KeyError:
categories_dict[category] = set(rxn_id)
labels = get_labels(list(categories_dict.values()))
fig, ax = venn4(labels, names=list(categories_dict.keys()))
......@@ -142,7 +148,7 @@ def create_main(model_id, model_name):
main_template[main_template.index(line)] = line.replace("MODEL_ID",model_id).replace("MODEL_NAME",model_name)
final_network_index = main_template.index([line for line in main_template if line.startswith("The automatic")][0])
main_template[final_network_index] = main_template[final_network_index].replace("NB_RXN", str(len(all_rxns))).replace("NB_CPD", str(len(all_cpds))).replace("NB_PWY", str(len(all_pwys))).replace("NB_GENE", str(len(all_genes)))
reconstruct_summary = {"ANNOTATION":0,"ORTHOLOGY":{},"MANUAL":0,"GAP-FILLING":0}
reconstruct_summary = {"ANNOTATION":0,"ORTHOLOGY":{},"MANUAL":0,"GAP-FILLING":0,"MICROBIONT":0,"HOST":0}
for rec_node in [node for node in list(padmetSpec.dicOfNode.values()) if node.type == "reconstructionData"]:
cat = rec_node.misc["CATEGORY"][0]
if cat == "ORTHOLOGY":
......@@ -599,7 +605,7 @@ def create_biological_page(category, page_node, output_folder):
pass
dataInArray.extend(properties)
with open(fileName,'w') as f:
with open(fileName,'w', encoding="utf8") as f:
for line in dataInArray:
f.write(line+"\n")
"""
......
......@@ -328,22 +328,14 @@ def intern_mapping(id_to_map, db_out, _type):
all_rxn_id = []
[all_rxn_id.extend(i) for i in list(mapp_dict.values())]
if id_to_map in all_rxn_id:
return mapp_dict[db_out][0]
return mapp_dict.get(db_out, [None])[0]
elif _type == "compound":
for mapp_dict in list(intern_cpd_dict.values()):
all_cpd_id = []
[all_cpd_id.extend(i) for i in list(mapp_dict.values())]
if id_to_map in all_cpd_id:
return mapp_dict[db_out][0]
if db_out == "METACYC":
for mapp_dict in list(intern_cpd_dict.values()):
all_cpd_id = []
[all_cpd_id.extend(i) for i in list(mapp_dict.values())]
if id_to_map.upper() in all_cpd_id:
return mapp_dict[db_out][0]
return mapp_dict.get(db_out, [None])[0]
return None
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Description:
Use reactions.csv file from compare_padmet.py to create a dendrogram using a Jaccard distance.
From the matrix absence/presence of reactions in different species computes a Jaccard distance between these species.
Then create a dendrogram using these distances.
usage:
dendrogram_reactions_distance.py --reactions=FILE --output=FILE [-v]
option:
-h --help Show help.
-r --reactions=FILE pathname of the file containing reactions in each species of the comparison.
-o --output=FOLDER path to the output folder.
-v verbose mode.
"""
import docopt
import itertools
import pandas as pa
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
import subprocess
sns.set_style("white")
sns.set('poster', rc={'figure.figsize':(100,80)}, font_scale=4)
from collections import defaultdict
from scipy.cluster.hierarchy import dendrogram, fcluster
from scipy.spatial import distance
from sklearn.metrics.pairwise import pairwise_distances
from fastcluster import linkage
from scipy.spatial.distance import squareform, pdist
def main():
global verbose
args = docopt.docopt(__doc__)
reaction_pathname = args["--reactions"]
output_pathname = args["--output"]
if args['-v']:
verbose = args["-v"]
else:
verbose = None
reaction_figure_creation(reaction_pathname, output_pathname)
def reaction_figure_creation(reaction_file, output_folder):
# Check if output_folder exists, if not create it.
output_folder_data = output_folder + '/data'
output_folder_data_intersect = output_folder + '/data/intersect'
output_folder_data_unique = output_folder + '/data/unique'
output_folder_upset = output_folder + '/upset_graph'
temp_data_folder = output_folder + '/upset_graph/temp_data/'
folders = [output_folder, output_folder_data, output_folder_data_intersect,
output_folder_data_unique, output_folder_upset, temp_data_folder]
for folder in folders:
if not os.path.isdir("{0}".format(folder)):
os.mkdir("{0}".format(folder))
path_to_intervene = 'intervene'
# Read the reactions file with pandas.
all_reactions_dataframe = pa.read_csv(reaction_file, sep='\t')
# Keep column containing absence-presence of reactions.
# (columns with (sep=;) are column with gene name linked to reactions)
# (columns with _formula contain the reaction formula)
columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' not in column]
columns = [column for column in columns if '_formula' not in column]
reactions_dataframe = all_reactions_dataframe[columns].copy()
reactions_dataframe.set_index('reaction', inplace=True)
# Translate 'present'/(nan) data into a True/False absence-presence matrix.
for column in reactions_dataframe.columns.tolist():
reactions_dataframe[column] = [True if data == "present" else False for data in reactions_dataframe[column]]
# Transpose the matrix to have species as index and reactions as columns.
absence_presence_matrix = reactions_dataframe.transpose()
# Compute a distance matrix using the Jaccard distance between species. Then condense it.
distance_matrix_jaccard = distance.squareform(pairwise_distances(absence_presence_matrix, metric="jaccard"))
# Hierarchical clustering on the condensed distance matrix.
linkage_matrix = linkage(distance_matrix_jaccard, method="average")
# Draw a dendrogram of the clustering.
reaction_dendrogram = dendrogram(linkage_matrix, labels=absence_presence_matrix.index, leaf_font_size=40)
plt.savefig(output_folder+'/reaction_dendrogram.png')
# Extract species in each clusteR.
k = len(set(reaction_dendrogram['color_list']))
results = fcluster(linkage_matrix, k, criterion='maxclust')
species = absence_presence_matrix.index.tolist()
cluster_species = dict(zip(species, results))
cluster_classes = defaultdict(list)
for key, value in cluster_species.items():
cluster_classes[value].append(key)
# Extract reactions in each cluster.
cluster_reactions = {}
for cluster in cluster_classes:
reactions_temp = []
for species in cluster_classes[cluster]:
species_reactions_dataframe = reactions_dataframe[reactions_dataframe[species] == True]
reactions_temp.extend(species_reactions_dataframe.index.tolist())
cluster_reactions[cluster] = set(reactions_temp)
all_reactions = [reactions for reactions in cluster_reactions.values()]
cluster_intersections = set.intersection(*all_reactions)
# Create file containing the intersection of the reactions for all cluster.
df = pa.DataFrame({'all_species': list(cluster_intersections)})
df.set_index('all_species', inplace=True)
all_reactions_dataframe.set_index('reaction', inplace=True)
gene_assoc_columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' in column]
gene_assoc_reactions = all_reactions_dataframe[gene_assoc_columns]
df = df.join(gene_assoc_reactions)
df.to_csv(output_folder_data+'/'+'all_species.tsv', sep='\t', index=True)
cluster_subintersection = {}
cluster_subintersection_name = {}
# Extract intersection between clusters.
for cluster_number in reversed(range(len(cluster_reactions))):
if cluster_number != 0 and cluster_number != 1:
for set_list in itertools.combinations(cluster_reactions, cluster_number):
tmp_reactions = [cluster_reactions[cluster] for cluster in set_list]
cltemp = set.intersection(*tmp_reactions)
intersection_temp = cltemp - cluster_intersections
cluster_subintersection[set_list] = intersection_temp
cluster_species_name = '&'.join(['_'.join(cluster_classes[cluster]) for cluster in set_list])
cluster_subintersection_name[cluster_species_name] = list(intersection_temp)
# Create a file containing intersection between each cluster.
df = pa.DataFrame({'reaction': list(intersection_temp)})
df.set_index('reaction', inplace=True)
gene_assoc_columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' in column]
column_species = [ species for cluster in set_list for species in cluster_classes[cluster]]
temp_gene_assoc_columns = [gene_assoc for gene_assoc in gene_assoc_columns if gene_assoc.split('_genes_assoc')[0] in column_species]
gene_assoc_reactions = all_reactions_dataframe[temp_gene_assoc_columns]
df = df.join(gene_assoc_reactions)
df.to_csv(output_folder_data_intersect+'/'+cluster_species_name+'_intersect.tsv', sep='\t', index=True)
# Create reactions which intersect for each cluster.
cluster_subsubintersection = {}
for cluster in cluster_classes:
species_intersections = []
for set_list in cluster_subintersection:
if cluster in set_list:
species_intersections.append(cluster_subintersection[set_list])
cluster_subsubintersection[cluster] = set([j for i in species_intersections for j in i])
# Extract reactions unique for each cluster.
cluster_unique = {}
for cluster in cluster_classes:
cluster_unique[cluster] = cluster_reactions[cluster]-cluster_intersections-cluster_subsubintersection[cluster]
# Create a file containing reactions unique for each cluster.
df = pa.DataFrame({'reaction': list(cluster_unique[cluster])})
df.set_index('reaction', inplace=True)
gene_assoc_columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' in column]
column_species = [ species for species in cluster_classes[cluster]]
temp_gene_assoc_columns = [gene_assoc for gene_assoc in gene_assoc_columns if gene_assoc.split('_genes_assoc')[0] in column_species]
gene_assoc_reactions = all_reactions_dataframe[temp_gene_assoc_columns]
df = df.join(gene_assoc_reactions)
df.to_csv(output_folder_data_unique+'/'+'_'.join(cluster_classes[cluster])+'_unique.tsv', sep='\t', index=True)
# Create data for creating upset graph using intervene.
for cluster in cluster_classes:
df = pa.DataFrame({'_'.join(cluster_classes[cluster]): list(cluster_reactions[cluster])})
df.to_csv(temp_data_folder+'/'+'_'.join(cluster_classes[cluster])+'.tsv', sep='\t', index=None, header=None)
cmd = '{0} upset -i {1}/*.tsv --type list -o {2} --figtype svg'.format(path_to_intervene, temp_data_folder, output_folder_upset)
if verbose:
subprocess.call(cmd, shell=True)
else:
FNULL = open(os.devnull, 'w')
subprocess.call(cmd, shell=True, stdout=FNULL, stderr=subprocess.STDOUT)
main()
......@@ -135,6 +135,10 @@ def fba_on_targets(allspecies, model):
for species in allspecies:
#lets create a copy of the initial model
model2 = model.copy()
#remove all obj coef
for rxn in model2.reactions:
if rxn.objective_coefficient == 1.0:
rxn.objective_coefficient = 0.0
#Create a new reaction that consume the given species
FBA_rxn = Reaction("FBA_TEST")
FBA_rxn.lower_bound = 0
......
......@@ -35,6 +35,7 @@ options:
"""
from padmet.classes import PadmetRef, PadmetSpec
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib.pylab as plt
import docopt
......@@ -92,7 +93,7 @@ def main():
# pip install pygraphviz
nx.draw_networkx(DG,
pos=nx.graphviz_layout(DG, prog='neato'), # Layout from graphviz
pos=graphviz_layout(DG, prog='neato'), # Layout from graphviz
node_size=1600,
arrows=True,
font_size=11, # font-size for labels
......
......@@ -145,19 +145,19 @@ def rxn_creator(data_file, padmetSpec, padmetRef = None, output = None, verbose
else:
print("Please choose a value in ['true','false'] for the reversibility of the reaction: %s" %reaction_id)
continue
comment = reaction_data["comment"][0]
comment = reaction_data["comment"]
node_misc = {"DIRECTION":[reaction_rev]}
padmetSpec.createNode("reaction", reaction_id, node_misc)
#reconstructionData:
if tool:
reconstructionData_id = reaction_id+"_reconstructionData_"+tool
reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":[comment]}
reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":comment}
if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
print("Warning: The reaction %s seems to be already added from the same source %s" %(reaction_id,tool))
else:
reconstructionData_id = reaction_id+"_reconstructionData_MANUAL"
reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":[comment]}
reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":comment}
if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %reaction_id)
......@@ -257,7 +257,22 @@ def rxn_creator(data_file, padmetSpec, padmetRef = None, output = None, verbose
padmetSpec._addRelation(rlt)
except KeyError:
if verbose: print("No products defined")
if "pathway" in reaction_data.keys():
pathways = reaction_data["pathway"][0].split(";")
for pwy_id in pathways:
try:
padmetSpec.dicOfNode[pwy_id]
except KeyError:
if verbose: print("%s not in the network" %pwy_id)
if padmetRef is not None:
if verbose: print("Check if new pathway %s is in dbref" %pwy_id)
if pwy_id in padmetRef.dicOfNode.keys():
print("Warning the new pathway %s exist in the dbref, risk of overwritting data, change pwy id" %pwy_id)
continue
padmetSpec.createNode("pathway", pwy_id)
if verbose: print(("new pathway created: id = %s" %pwy_id))
rlt = Relation(reaction_id,"is_in_pathway",pwy_id)
padmetSpec._addRelation(rlt)
if verbose: print("Creating output: %s" % output)
padmetSpec.generateFile(output)
......
......@@ -95,10 +95,10 @@ END_OF_EXPECT\
# Informs Docker that the container listens on the specified network ports at runtime.
# EXPOSE does not make the ports of the container accessible to the host
EXPOSE 80 3306
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start
# Command for an 'executing container'
# Think to Python supervisord because the following line is disgusting
#CMD service nginx start & service mysql start & service php5-fpm start && tail -F /var/log/nginx/error.log
COPY supervisord.conf /etc/supervisor/conf.d/
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start
......@@ -71,7 +71,7 @@ def main():
if wiki_id in os.walk(wiki_folders).next()[1]:
raise ValueError("A wiki with the id %s already exist, remove it or change the new wiki id" %wiki_id)
print("Checking wiki id %s: OK" %wiki_id)
print("Checking the if the prefix %s_ is already used in the database..." %wiki_id)
print("Checking if the prefix %s_ is already used in the database..." %wiki_id)
try:
out = subprocess.check_output(["/bin/bash", "-i", "-c", 'DB -D '+db_name+' -e "show tables" -s | egrep "^'+wiki_id+'"'])
raise ValueError("%s tables found with prefix %s_, a wiki is already using this prefix." %(out.count("\n"), wiki_id))
......@@ -140,7 +140,6 @@ def main():
all_tables = all_tables[:-1]
print("%s tables to drop" %len(all_tables))
cmd = 'DB -D '+db_name+' -e "DROP TABLE '+",".join(all_tables)+'"'
print cmd
subprocess.call(["/bin/bash", "-i", "-c",cmd])
except subprocess.CalledProcessError:
print("No tables with prefix %s_ found" %wiki_id)
......
......@@ -4,7 +4,7 @@ include 'globals.php';
$api_url = $argv[3];
$username = $argv[4];
$password = $argv[5];
$page_name = $argv[1].replace("__47__","/")
$page_name = str_replace("__47__","/",$argv[1]);
echo $argv;
return false;
......