Commit 6e1df6e3 authored by VIGNET Pierre's avatar VIGNET Pierre
Browse files

Add occurences matrix

parent a8e57625
......@@ -132,6 +132,21 @@ def solutions_2_common_graph(args):
)
def solutions_2_occcurrences_matrix(args):
"""Create a matrix of occurrences counting entities in the solutions found in
*cam.txt files in the given path.
"""
# Module import
import solution_sort
params = args_to_param(args)
solution_sort.solutions_2_occcurrences_matrix(
params['output'],
params['chart_file'],
params['path'],
transposed=params['transpose_csv']
)
def model_comparison(args):
"""Isomorphism test.
......@@ -385,6 +400,26 @@ def main():
parser_trajectories.set_defaults(func=solutions_2_common_graph)
# subparser: Create a matrix of occurrences counting entities in the solutions.
# Model file (xml : cadbiom language)
# Solution file (cam.txt)
parser_occurrences_matrix = subparsers.add_parser(
'solutions_2_occcurrences_matrix',
help=solutions_2_occcurrences_matrix.__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_occurrences_matrix.add_argument('chart_file',
help="bcx model file.")
parser_occurrences_matrix.add_argument('path',
help="Directory with MAC solutions files "
"(*cam.txt files) generated with the 'compute_macs' command.")
parser_occurrences_matrix.add_argument('--output', action=ReadableDir,
nargs='?', default='./',
help="Output directory for CSV files.")
parser_occurrences_matrix.add_argument('--transpose_csv', action='store_true',
help="Transpose the final matrix (switch columns and rows).")
parser_occurrences_matrix.set_defaults(func=solutions_2_occcurrences_matrix)
# subparser: Merge solutions to a csv file
# Solution file (cam)
# Output (csv)
......
......@@ -23,7 +23,7 @@
# 35042 RENNES Cedex, FRANCE
"""Handle generated files
This module provides some functions to do some analyzes on the output
This module provides some functions to do some analyzis on the output
files of Cadbiom.
Entry points:
......@@ -68,9 +68,12 @@ from __future__ import unicode_literals
from __future__ import print_function
# Standard imports
from collections import defaultdict, Counter
import itertools as it
import json
import os
import glob
import csv
import networkx as nx
# Library imports
......@@ -78,7 +81,7 @@ import networkx as nx
from tools.solutions import get_solutions
from tools.models import get_transitions_from_model_file
from tools.solutions import load_solutions, convert_solutions_to_json, \
get_query_from_filename
get_query_from_filename, get_cam_lines
from tools.graphs import export_graph, build_graph
import cadbiom.commons as cm
......@@ -314,7 +317,6 @@ def convert_solution_file_to_graphs(output_dir, sol_steps, transitions):
export_graph(output_dir, sol, sol_index,
build_graph(sol, steps, transitions)[0])
##
def solutions_2_common_graph(output_dir, model_file, path):
"""Entry point for solutions_2_common_graph
......@@ -426,3 +428,139 @@ def merge_graphs(graphs):
G.add_edge(ori, ext, attr_dict=data, weight=1)
return G
## Matrices of occurrences #####################################################
def solutions_2_occcurrences_matrix(output_dir, model_file, path,
transposed=False, normalized=False):
"""Entry point for solutions_2_occcurrences_matrix
:param output_dir: Output path.
:param model_file: Filepath of the model.
:param path: Directory of many complete solutions files.
:param transposed: (Optional) Transpose the final matrix (switch columns and rows).
:type output_dir: <str>
:type model_file: <str>
:type path: <str>
:type transposed: <boolean>
"""
# Check valid input directory
assert os.path.isdir(path)
path = path if path[-1] == '/' else path + '/'
output_dir = output_dir if output_dir[-1] == '/' else output_dir + '/'
# Make matrix
occurence_matrix(output_dir, model_file, path)
if transposed:
transpose_csv(input_file=output_dir + 'occurence_matrix.csv',
output_file=output_dir + 'occurence_matrix_t.csv')
def occurence_matrix(output_dir, model_file, path,
matrix_filename='occurence_matrix.csv'):
"""Make a matrix of occurrences for the solutions in the given path.
- Compute occurrences of each place in all `cam.txt` files.
- Save the matrix in csv format with the following columns:
Fieldnames: "patterns (number)/places (number);cam_number;frontier places"
Each request (pattern) is accompanied by the number of solutions found.
.. todo:: Split the creation and writing of the matrix in 2 functions.
:param output_dir: Output path.
:param model_file: Filepath of the model.
:param path: Directory of many complete solutions files.
:param matrix_filename: (Optional) Filename of the matrix file.
:type output_dir: <str>
:type model_file: <str>
:type path: <str>
:type matrix_filename: <str>
:return: A dictionnary with the matrix object.
keys: queries, values: occurrences of frontier places
:rtype: <dict>
"""
# Key: Logical formula as input of Cadbiom
# Value: Number of each place in all solutions of the current file
matrix = defaultdict(Counter)
# All frontier places in all cam files
all_frontier_places = set()
# Compute occurences of each place in all cam files
file_number = 0
for file_number, filepath in enumerate(glob.glob(path + '*cam.txt'), 1):
# gene pattern
# pattern = {gene for gene in genes if gene in cam}
# Get query string from the name of the solution file
# From: 'MODEL_NAME_PLACE1 and not PLACE2 and not PLACE3_cam.txt'
# Get: 'PLACE1 and not PLACE2 and not PLACE3'
query = get_query_from_filename(model_file, filepath)
cam_number = 0
for cam_number, cam_line in enumerate(get_cam_lines(filepath), 1):
frontier_places = set(cam_line.split(' '))
# Update set of all frontier places
all_frontier_places.update(frontier_places)
# Update counter of places => compute frequencies
matrix[query] += Counter(frontier_places)
# Set the cam_number for future standardization
matrix[query]["cam_number"] = cam_number
LOGGER.info("Files processed: %s", file_number)
# Save the matrix
# columns: "patterns (number)/places (number);cam_number;frontier places"
with open(output_dir + matrix_filename, 'w') as f_d:
# Forge header
header = "patterns ({})/places ({})".format(
len(matrix),
len(all_frontier_places),
)
writer = csv.DictWriter(
f_d,
delimiter=str(';'),
restval=0, # default value for frequency
fieldnames=[header, "cam_number"] + list(all_frontier_places))
writer.writeheader()
# Add a last line in the csv: total of occurences for each place
global_frontier_counter = Counter()
# The first column is composed of the query + the number of solutions for it
for query, row in matrix.iteritems():
global_frontier_counter += row
# PS: THIS modifies the matrix by adding a new key ('header')
row[header] = "{} ({})".format(query, row["cam_number"])
writer.writerow(row)
# Total of occurences at the end of the file
global_frontier_counter[header] = "Total of occurences"
writer.writerow(global_frontier_counter)
return matrix
def transpose_csv(input_file='occurence_matrix.csv',
output_file='occurence_matrix_t.csv'):
"""Useful function to transpose a csv file x,y => y,x
.. note:: The csv file must be semicolon ';' separated.
:param input_file: Input file.
:param output_file: Output file transposed.
:type input_file: <str>
:type output_file: <str>
"""
# Transpose file
# PS: izip('ABCD', 'xy') --> Ax By
data = it.izip(*csv.reader(open(input_file, "r"), delimiter=str(';')))
csv.writer(open(output_file, "w"), delimiter=str(';')).writerows(data)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment