Commit 3a4d1894 authored by BELCOUR Arnaud's avatar BELCOUR Arnaud

Add reactions intersection file creation.

Fix issue with temp file creation for upset.
parent f296ee83
......@@ -38,17 +38,26 @@ from scipy.spatial.distance import squareform, pdist
def reaction_figure_creation(reaction_file, output_folder):
# Check if output_folder exists, if not create it.
if not os.path.isdir("{0}".format(output_folder)):
output_folder_data = output_folder + '/data'
output_folder_upset = output_folder + '/upset_graph'
temp_data_folder = output_folder + '/upset_graph/temp_data/'
folders = [output_folder, output_folder_data, output_folder_upset, temp_data_folder]
for folder in folders:
if not os.path.isdir("{0}".format(folder)):
path_to_intervene = 'intervene'
# Read the reactions file with pandas.
reactions_dataframe = pa.read_csv(reaction_file, sep='\t')
all_reactions_dataframe = pa.read_csv(reaction_file, sep='\t')
# Keep column containing absence-presence of reactions.
# (columns with (seep;) are column with gene name linked to reactions)
# (columns with (sep=;) are column with gene name linked to reactions)
# (columns with _formula contain the reaction formula)
columns = [column for column in reactions_dataframe.columns if '(sep=;)' not in column]
columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' not in column]
columns = [column for column in columns if '_formula' not in column]
reactions_dataframe = reactions_dataframe[columns]
reactions_dataframe = all_reactions_dataframe[columns]
reactions_dataframe.set_index('reaction', inplace=True)
......@@ -93,9 +102,17 @@ def reaction_figure_creation(reaction_file, output_folder):
all_reactions = [reactions for reactions in cluster_reactions.values()]
cluster_intersections = set.intersection(*all_reactions)
# Create file containing the intersection of the reactions for all cluster.
df = pa.DataFrame({'all_species': list(cluster_intersections)})
df.set_index('all_species', inplace=True)
all_reactions_dataframe.set_index('reaction', inplace=True)
gene_assoc_columns = [column for column in all_reactions_dataframe.columns if '(sep=;)' in column]
gene_assoc_reactions = all_reactions_dataframe[gene_assoc_columns]
df = df.join(gene_assoc_reactions)
df.to_csv(output_folder_data+'/'+'all_species.tsv', sep='\t', index=True)
cluster_subintersection = {}
cluster_subintersection_name = {}
# Extract intersection between clusters.
for cluster_number in reversed(range(len(cluster_reactions))):
if cluster_number != 0 and cluster_number != 1:
......@@ -104,6 +121,11 @@ def reaction_figure_creation(reaction_file, output_folder):
cltemp = set.intersection(*tmp_reactions)
intersection_temp = cltemp - cluster_intersections
cluster_subintersection[set_list] = intersection_temp
cluster_species_name = '&'.join(['_'.join(cluster_classes[cluster]) for cluster in set_list])
cluster_subintersection_name[cluster_species_name] = list(intersection_temp)
# Create a file containing intersection between each cluster.
df = pa.DataFrame(dict([ (k,pa.Series(v)) for k,v in cluster_subintersection_name.items() ]))
df.to_csv(output_folder_data+'/intersect.tsv', sep='\t', index=None)
# Create reactions which intersect for each cluster.
cluster_subsubintersection = {}
......@@ -118,19 +140,16 @@ def reaction_figure_creation(reaction_file, output_folder):
cluster_unique = {}
for cluster in cluster_classes:
cluster_unique[cluster] = cluster_reactions[cluster]-cluster_intersections-cluster_subsubintersection[cluster]
input_folder = output_folder + '/temp_data/'
path_to_intervene = 'intervene'
if not os.path.isdir("{0}".format(input_folder)):
# Create a file containing reactions unique for each cluster.
df = pa.DataFrame(dict([ ('&'.join(cluster_classes[k]),pa.Series(list(v))) for k,v in cluster_unique.items() ]))
df.to_csv(output_folder_data+'/unique.tsv', sep='\t', index=None)
# Create data for creating upset graph using intervene.
for cluster in cluster_classes:
df = pa.DataFrame({'_'.join(cluster_classes[cluster]): list(cluster_reactions[cluster])})
df.to_csv(input_folder+'/'+'_'.join(cluster_classes[cluster])+'.tsv', sep='\t', index=None)
df.to_csv(temp_data_folder+'/'+'_'.join(cluster_classes[cluster])+'.tsv', sep='\t', index=None, header=None)
cmd = '{0} upset -i {1}/*.tsv --type list -o {2} --figtype svg'.format(path_to_intervene, input_folder, output_folder)
cmd = '{0} upset -i {1}/*.tsv --type list -o {2} --figtype svg'.format(path_to_intervene, temp_data_folder, output_folder_upset), shell=True)
def main():
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment