solution_sort.py 24.2 KB
Newer Older
1
# -*- coding: utf-8 -*-
2
# Copyright (C) 2017-2020  IRISA
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# The original code contained here was initially developed by:
#
#     Pierre Vignet.
#     IRISA
#     Dyliss team
#     IRISA Campus de Beaulieu
#     35042 RENNES Cedex, FRANCE
24 25
"""Handle generated files

VIGNET Pierre's avatar
VIGNET Pierre committed
26
This module provides some functions to do some analyzis on the output
27
files of Cadbiom.
28

VIGNET Pierre's avatar
VIGNET Pierre committed
29 30
Entry points:

31
    * :meth:`~cadbiom_cmd.solution_sort.queries_2_json`
32
    * :meth:`~cadbiom_cmd.solution_sort.solutions_2_graphs`
VIGNET Pierre's avatar
VIGNET Pierre committed
33
    * :meth:`~cadbiom_cmd.solution_sort.queries_2_common_graph`
VIGNET Pierre's avatar
VIGNET Pierre committed
34

35 36
:Example of the content of a complete solution file:

VIGNET Pierre's avatar
VIGNET Pierre committed
37
    .. code-block:: text
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65

        Bx  Ax
        % h2 h00
        % h3
        % h0 h1
        % hlast
        Bx  Ax
        % h2
        % h3 h00
        % h0 h1
        %
        % hlast
        Bx  Ax
        % h2
        % h3 h00
        % h0 h1
        % hlast
        %
        %
        Bx  Ax
        % h2 h00
        % h3
        % h0 h1
        % hlast
        %
        %
        %

66
"""
67 68 69
from __future__ import unicode_literals
from __future__ import print_function

70
# Standard imports
VIGNET Pierre's avatar
VIGNET Pierre committed
71 72
from collections import defaultdict, Counter
import itertools as it
73
import json
74 75
import os
import glob
VIGNET Pierre's avatar
VIGNET Pierre committed
76
import csv
77

78 79
# Library imports

80
from tools.solutions import get_solutions
81
from tools.models import get_transitions_from_model_file
82
from tools.solutions import load_solutions, convert_solutions_to_json, \
83
    get_query_from_filename, get_mac_lines
84
from tools.graphs import export_graph, build_graph, merge_graphs, get_solutions_graph_data
85 86 87 88

import cadbiom.commons as cm

LOGGER = cm.logger()
89

90
## Sort functions ##############################################################
91

92 93
def sort_solutions_in_file(filepath):
    """Sort all solutions in the given file in alphabetical order.
94

95
    .. warning:: The file is modified in place.
96

97 98 99
    :param: Filepath to be opened and in which solutions will be sorted.
    :arg: <str>
    """
100 101 102

    solutions = dict()

VIGNET Pierre's avatar
VIGNET Pierre committed
103
    with open(filepath, "r+") as f_d:
104

105
        # Get old line as key and ordered line as value
106
        for line, stripped_line in get_solutions(f_d):
107
            # Sort in lower case, remove ' ' empty elements
VIGNET Pierre's avatar
VIGNET Pierre committed
108 109 110 111 112 113
            solutions[line] = " ".join(
                sorted(
                    [place for place in stripped_line.split(" ") if place != " "],
                    key=lambda s: s.lower(),
                )
            )
114

115
        # Rewind the whole file
116
        f_d.seek(0)
117 118

        # Load all the content
119
        file_text = f_d.read()
120

121
        # Replace old sols with the new ones
122 123 124
        for original_sol, sorted_sol in solutions.items():
            file_text = file_text.replace(original_sol, sorted_sol)

125
        # Rewind the whole file
126
        f_d.seek(0)
127

128
        # Write all text in the current opened file
129
        f_d.write(file_text)
130 131


132
def solutions_sort(path):
133
    """Entry point for sorting solutions.
134

135
    Read a solution(s) file(s) (\*mac\* files) and sort all
136 137
    frontier places/boundaries in alphabetical order.

VIGNET Pierre's avatar
VIGNET Pierre committed
138
    This function tests if the given path is a directory or a file.
VIGNET Pierre's avatar
VIGNET Pierre committed
139

140
    .. warning:: The files will be modified in place.
141

142 143 144
    :param: Filepath or directory path containing Cadbiom solutions.
    :type: <str>
    """
145

146 147
    # Check valid input file/directory
    assert os.path.isfile(path) or os.path.isdir(path)
148

149
    if os.path.isdir(path):
150 151
        # Recursive search of *mac* files
        # (mac.txt, mac_complete.txt, mac_step.txt)
VIGNET Pierre's avatar
VIGNET Pierre committed
152 153
        path = path if path[-1] == "/" else path + "/"
        [sort_solutions_in_file(file) for file in glob.glob(path + "*mac*")]
154 155
    else:
        sort_solutions_in_file(path)
156 157 158

## Conversion functions ########################################################

159 160 161
def write_json(output_dir, file_path, file_suffix, data):
    """Write decompiled solutions to a JSON formated file

162
    Called by :meth:`queries_2_json TODO` and  :meth:`queries_2_common_graph`
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

    :param output_dir: Output directory
    :param file_path: Filepath of the original solution file.
        We extract the basename in order to name the JSON file.
    :param file_suffix: String added to the solution filename.
        Ex: filename + file_suffix + ".json"
    :param data: Data to be serialized in JSON
    :type output_dir: <str>
    :type file_path: <str>
    :type file_suffix: <str>
    :type data: <list> or <dict> or <whatever>
    """
    # Add file_suffix to the solution filename
    filename = os.path.basename(os.path.splitext(file_path)[0])

VIGNET Pierre's avatar
VIGNET Pierre committed
178
    with open(output_dir + filename + file_suffix + ".json", "w") as f_d:
179 180
        json.dump(data, f_d, sort_keys=True, indent=2)

VIGNET Pierre's avatar
VIGNET Pierre committed
181

182
## query - json
183 184
def queries_2_json(output_dir, model_file, path, conditions=True):
    """Entry point for queries_2_json
185

186
    Create a JSON formated file containing all data from complete MAC files
187
    (\*mac_complete files). The file will contain frontier places/boundaries
188 189 190 191 192
    and decompiled steps with their respective events for each solution.

    This is a function to quickly search all transition attributes involved
    in a solution.

VIGNET Pierre's avatar
VIGNET Pierre committed
193
    This function tests if the given path is a directory or a file.
194 195 196

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
197
    :param path: Filepath/directory of a complete solution file.
198 199 200 201 202
    :param conditions: (Optional) If False, conditions of transitions will not
        be present in the JSON file. This allows to have only places/entities
        used inside trajectories; thus, inhibitors are avoided.
    :type output_dir: <str>
    :type model_file: <str>
203
    :type path: <str>
204 205
    :type conditions: <boolean>
    """
206
    # Check valid input file/directory
207
    assert os.path.isfile(path) or os.path.isdir(path)
208

209
    # Get transitions from the model
210
    model_transitions, _ = get_transitions_from_model_file(model_file)
211

212
    if os.path.isfile(path):
213
        # The given path is a solution file
214
        decomp_solutions = convert_solutions_to_json(
VIGNET Pierre's avatar
VIGNET Pierre committed
215
            load_solutions(path), model_transitions, conditions=conditions
216
        )
217
        write_json(output_dir, path, "_decomp", decomp_solutions)
218

219
    elif os.path.isdir(path):
220
        # The given path is a directory
VIGNET Pierre's avatar
VIGNET Pierre committed
221
        path = path if path[-1] == "/" else path + "/"
222 223

        # Decompilation of all files in the directory
224
        file_number = 0
VIGNET Pierre's avatar
VIGNET Pierre committed
225 226 227
        for file_number, solution_file in enumerate(
            glob.glob(path + "*mac_complete.txt"), 1
        ):
228

229
            decomp_solutions = convert_solutions_to_json(
VIGNET Pierre's avatar
VIGNET Pierre committed
230
                load_solutions(solution_file), model_transitions, conditions=conditions
231
            )
232
            write_json(solution_file, path, "_decomp", decomp_solutions)
233

234
        LOGGER.info("Files processed: %s", file_number)
235
        assert file_number != 0, "No *mac_complete.txt files found!"
236

237
################################################################################
238

239 240
def solutions_2_graphs(output_dir, model_file, path):
    """Entry point for solutions_2_graphs
241 242

    Create GraphML formated files containing a representation of the
VIGNET Pierre's avatar
VIGNET Pierre committed
243
    trajectories for each solution in complete MAC files (\*mac_complete files).
244 245 246 247

    This is a function to visualize paths taken by the solver from the boundaries
    to the entities of interest.

VIGNET Pierre's avatar
VIGNET Pierre committed
248
    This function tests if the given path is a directory or a file.
249 250 251

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
VIGNET Pierre's avatar
VIGNET Pierre committed
252
    :param path: Filepath/directory of a/many complete solutions files.
253 254
    :type output_dir: <str>
    :type model_file: <str>
255
    :type path: <str>
256 257 258
    """

    # Check valid input file/directory
259
    assert os.path.isfile(path) or os.path.isdir(path)
260 261

    # Get transitions from the model
262
    model_transitions, _ = get_transitions_from_model_file(model_file)
263

264
    if os.path.isfile(path):
265
        # The given path is a solution file
VIGNET Pierre's avatar
VIGNET Pierre committed
266
        save_solutions_to_graphs(output_dir, load_solutions(path), model_transitions)
267

268
    elif os.path.isdir(path):
269
        # The given path is a directory
VIGNET Pierre's avatar
VIGNET Pierre committed
270
        path = path if path[-1] == "/" else path + "/"
271 272

        # Decompilation of all files in the directory
273
        file_number = 0
VIGNET Pierre's avatar
VIGNET Pierre committed
274 275 276
        for file_number, solution_file in enumerate(
            glob.glob(path + "*mac_complete.txt"), 1
        ):
277

VIGNET Pierre's avatar
VIGNET Pierre committed
278
            save_solutions_to_graphs(
VIGNET Pierre's avatar
VIGNET Pierre committed
279
                output_dir, load_solutions(solution_file), model_transitions
280 281
            )

282
        LOGGER.info("Files processed: %s", file_number)
283
        assert file_number != 0, "No *mac_complete.txt files found!"
284 285


VIGNET Pierre's avatar
VIGNET Pierre committed
286 287
def save_solutions_to_graphs(output_dir, sol_steps, transitions):
    """Build and export graphs based on the given solutions
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

    Each solution is composed of a set of frontier places and steps,
    themselves composed of events.
    We construct a graph based on the transitions that occur in the composition
    of the events of the given solution.

    :param output_dir: Output path.
    :param sol_steps: A generator of tuples of "frontier places" and a list of
        events in each step.

        :Example:

            .. code-block:: python

                ("Bx Ax", [['h2', 'h00'], ['h3'], ['h0', 'h1'], ['hlast']])

    :param transitions: A dictionnary of events as keys, and transitions as values.
        Since many transitions can define an event, values are lists.
        Each transition is a tuple with: origin node, final node, attributes
        like label and condition.

        :Example:

            .. code-block:: python

                {'h00': [('Ax', 'n1', {'label': 'h00[]'}),]

    :type output_dir: <str>
    :type sol_steps: <tuple <str>, <list>>
    :type transitions: <dict <list <tuple <str>, <str>, <dict <str>: <str>>>>
    """

    for sol_index, (sol, steps) in enumerate(sol_steps):

        # build_graph() returns :
        # G, transition_nodes, all_nodes, edges_in_cond, edges
        # sol_index is used to order files according to the order of appearance
        # in the file
VIGNET Pierre's avatar
VIGNET Pierre committed
326
        export_graph(output_dir, sol, sol_index, build_graph(sol, steps, transitions)[0])
VIGNET Pierre's avatar
VIGNET Pierre committed
327

328

329
################################################################################
330

331 332 333 334 335 336 337 338 339 340 341 342 343
def get_solution_graphs(sol_steps, transitions):
    """Generator that yields the graphs of the given solutions.

    .. note:: See the doc of a similar function
        :meth:`~cadbiom_cmd.solution_sort.save_solutions_to_graphs`.
    """
    for sol, steps in sol_steps:
        # build_graph() returns :
        # G, transition_nodes, all_nodes, edges_in_cond, edges
        # Python 3: partial unpacking: G, *_
        yield build_graph(sol, steps, transitions)[0]


VIGNET Pierre's avatar
VIGNET Pierre committed
344
def queries_2_common_graph(output_dir, model_file, path,
345 346
                           make_graphs=True, make_csv=False, make_json=False,
                           *args, **kwargs):
VIGNET Pierre's avatar
VIGNET Pierre committed
347
    """Entry point for queries_2_common_graph
348

VIGNET Pierre's avatar
VIGNET Pierre committed
349 350
    Create a GraphML formated file containing a unique representation of **all**
    trajectories corresponding to all solutions in each complete MAC files
351
    (\*mac_complete files).
352 353 354 355

    This is a function to visualize paths taken by the solver from the boundaries
    to the entities of interest.

356 357
    CSV fields::

358
        - query: Query giving the solutions
359
        - solutions: nb trajectories/solutions
360 361 362 363 364 365 366 367 368 369 370 371 372 373
        - boundaries: Number of boundary places
        - events: Number of events in all solutions
        - genes: Number of genes involved in solutions
        - Protein: Number of boundaries with the type Protein
            (genes are not counted)
        - Complex: Number of boundaries with the type Complex
            (genes are not counted)
        - input_boundaries: Boundaries found only as input places
        - guard_boundaries: Boundaries found only in guards
        - mixed_boundaries: Boundaries found in guards AND in inputs of reactions
        - graph_nodes: Total number of nodes in the graph
        - graph_nodes_places: Nodes that are biomolecules (do not count reaction nodes)
        - graph_edges: Number of edges
        - strongly_connected: Is the graph strongly connected ?
374 375 376 377
        - max_degree
        - min_degree
        - average_degree

VIGNET Pierre's avatar
VIGNET Pierre committed
378
    This function tests if the given path is a directory or a file.
379 380 381 382

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Filepath/directory of a/many complete solutions files.
383 384 385 386 387 388
    :key make_graphs: (optional) Make a GraphML for each query results in path.
        default: True
    :key make_csv: (optional) Make a **global** CSV for all query results in path.
        default: False
    :key make_json: (optional) Make a JSON dump of each query results in path.
        default: False
389 390 391
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
392 393 394
    :type make_graphs: <boolean>
    :type make_csv: <boolean>
    :type make_json: <boolean>
395 396
    """

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
    def write_csv(data):
        """Write given data in CSV file; then flush the file descriptor

        .. note:: data is modified in place.

        :param data: Data to be serialized in JSON.
        """
        # Handle nested dictionaries by flattening them
        data.update({
            sub_key: sub_val
            for key, val in data.items() if isinstance(val, dict)
            for sub_key, sub_val in val.items()
        })

        writer.writerow(data)
        f_d.flush()


    def do_magic(solution_file):
        # Get query string from the name of the solution file
        query = get_query_from_filename(model_file, solution_file)

        LOGGER.info("Processing %s query...", query)

        # generator of ("Ax Bx", [['h2', 'h00'], ['h3'], ['h0', 'h1'], ['hlast']])
        solutions = tuple(load_solutions(solution_file))
VIGNET Pierre's avatar
VIGNET Pierre committed
423
        graphs = get_solution_graphs(solutions, model_transitions)
424 425 426 427 428
        # Get common graph
        graph = merge_graphs(graphs)

        if make_graphs:
            # Write graph
429
            export_graph(output_dir, [query], "", graph)
430 431 432

        if make_json or make_csv:
            # Export to json file (similaire à model_graph_info() ...)
433 434 435

            # Get 2 lists: 1: frontiers; 2: events
            unzipped_solutions = list(zip(*solutions))
436
            # Frontiers
437
            boundaries = set(it.chain(*[sol.split() for sol in unzipped_solutions[0]]))
438 439 440 441
            # Events
            events = set(it.chain(*it.chain(*unzipped_solutions[1])))
            # Genes
            # brutal but effective as long as the nodes of a model do not have
VIGNET Pierre's avatar
VIGNET Pierre committed
442
            # the hard coded information in one of their attributes
443 444
            genes = {name for name in boundaries if "_gene" in name}

445 446

            # Influencing frontier places
447 448 449 450 451 452 453 454 455 456
            # Pb: this retrieves frontiers in guards, but they can be also inputs
            # In practice this is not happening in solutions (probably because
            # frontiers are disabled unlike perm nodes once they are used 1 time).
            # But it could/should happen if many solutions are considered because
            # frontiers may play different roles...
            incoming_biomolecules, infl_places = tuple(
                zip(
                    *[(trans.ori.name, trans.get_influencing_places())
                      for trans in parser.model.transition_list]
                )
VIGNET Pierre's avatar
VIGNET Pierre committed
457
            )
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472

            all_inputs = boundaries & set(incoming_biomolecules)
            # print("all inputs", len(all_inputs))
            all_guards = boundaries & set(it.chain(*infl_places))
            # print("all guards", len(all_guards))

            only_guards = all_guards - all_inputs
            only_inputs = all_inputs - all_guards
            mixed = all_inputs & all_guards
            # print("not used", boundaries - (all_guards | only_inputs))

            LOGGER.debug("Guard frontiers roles: %s", len(only_guards))
            LOGGER.debug("Input frontiers roles: %s", len(only_inputs))
            LOGGER.debug("Mixed frontiers roles found: %s", len(mixed))

473 474 475

            # Get frontier types
            # Do not count genes entities that are "virtual" types
476
            g = (parser.model.node_dict[name] for name in boundaries - genes)
477
            # Remove nodes with empty notes
VIGNET Pierre's avatar
VIGNET Pierre committed
478 479 480 481 482 483 484
            frontier_types = Counter(
                [
                    json.loads(node.note).get("entityType", "UKN")
                    for node in g
                    if node.note
                ]
            )
485

486
            solutions_info = {
VIGNET Pierre's avatar
VIGNET Pierre committed
487 488 489
                "modelFile": model_file,
                "query": query,
                "solutions": len(solutions),  # nb trajectories/solutions
490
                "boundaries": len(boundaries),
VIGNET Pierre's avatar
VIGNET Pierre committed
491 492
                "events": len(events),
                "genes": len(genes),
493 494 495
                "input_boundaries": len(only_inputs),
                "guard_boundaries": len(only_guards),
                "mixed_boundaries": len(mixed),
VIGNET Pierre's avatar
VIGNET Pierre committed
496
                "entityTypes": frontier_types,
497 498 499
            }
            # Advanced metrics
            get_solutions_graph_data(graph, solutions_info, True)
VIGNET Pierre's avatar
VIGNET Pierre committed
500
            LOGGER.debug(solutions_info)
501 502 503 504 505 506 507 508 509 510

            if make_json:
                # Save to <solution_filename>_graph_summary.json
                write_json(output_dir, solution_file, "_graph_summary", solutions_info)

            if make_csv:
                # Export to csv graphs_summary.csv
                write_csv(solutions_info)


511 512 513
    # Check valid input file/directory
    assert os.path.isfile(path) or os.path.isdir(path)

514
    # Bind arguments to avoid overwriting previous imports
VIGNET Pierre's avatar
VIGNET Pierre committed
515 516 517
    make_graphs = kwargs["graphs"]
    make_json = kwargs["json"]
    make_csv = kwargs["csv"]
518

519
    # Get transitions from the model
520
    model_transitions, parser = get_transitions_from_model_file(model_file)
521

522 523 524

    if make_csv:
        # Init the CSV file now
VIGNET Pierre's avatar
VIGNET Pierre committed
525
        f_d = open(output_dir + "graphs_summary.csv", "w")
VIGNET Pierre's avatar
VIGNET Pierre committed
526
        # Get a subset of solutions_info keys as fieldnames
527 528 529
        writer = csv.DictWriter(
            f_d,
            [
530 531
                "query", "solutions", "boundaries", "events", "genes", "Protein", "Complex",
                "input_boundaries", "guard_boundaries", "mixed_boundaries",
VIGNET Pierre's avatar
VIGNET Pierre committed
532
                "graph_nodes", "graph_nodes_places", "graph_edges",
533 534
                "strongly_connected",
                "max_degree", "min_degree", "average_degree",
535
            ],
536 537
            extrasaction="ignore",  # Ignore unknown fieldnames
            restval=0,  # Value for missing fieldnames
VIGNET Pierre's avatar
VIGNET Pierre committed
538
            delimiter=str(";"),
539
        )
540 541
        writer.writeheader()

542

543 544 545 546
    # File management...
    if os.path.isfile(path):
        # The given path is a solution file
        do_magic(path)
547 548 549

    elif os.path.isdir(path):
        # The given path is a directory
VIGNET Pierre's avatar
VIGNET Pierre committed
550
        path = path if path[-1] == "/" else path + "/"
551 552 553

        # Decompilation of all files in the directory
        file_number = 0
VIGNET Pierre's avatar
VIGNET Pierre committed
554 555 556
        for file_number, solution_file in enumerate(
            glob.glob(path + "*mac_complete.txt"), 1
        ):
557

558
            do_magic(solution_file)
559 560

        LOGGER.info("Files processed: %s", file_number)
561
        assert file_number != 0, "No *mac_complete.txt files found!"
562

563 564 565 566
    if make_csv:
        # Close the CSV file descriptor
        f_d.close()

VIGNET Pierre's avatar
VIGNET Pierre committed
567

VIGNET Pierre's avatar
VIGNET Pierre committed
568 569
## Matrices of occurrences #####################################################

VIGNET Pierre's avatar
VIGNET Pierre committed
570 571 572 573

def queries_2_occcurrence_matrix(
    output_dir, model_file, path, transposed=False, normalized=False
):
574
    """Entry point for queries_2_occcurrence_matrix
VIGNET Pierre's avatar
VIGNET Pierre committed
575

VIGNET Pierre's avatar
VIGNET Pierre committed
576 577
    See :meth:`~cadbiom_cmd.solution_sort.occurrence_matrix`.

VIGNET Pierre's avatar
VIGNET Pierre committed
578 579 580 581 582 583 584 585 586 587 588 589 590
    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Directory of many complete solutions files.
    :param transposed: (Optional) Transpose the final matrix (switch columns and rows).
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
    :type transposed: <boolean>
    """

    # Check valid input directory
    assert os.path.isdir(path)

VIGNET Pierre's avatar
VIGNET Pierre committed
591
    path = path if path[-1] == "/" else path + "/"
VIGNET Pierre's avatar
VIGNET Pierre committed
592 593

    # Make matrix
VIGNET Pierre's avatar
VIGNET Pierre committed
594
    occurrence_matrix(output_dir, model_file, path)
VIGNET Pierre's avatar
VIGNET Pierre committed
595 596

    if transposed:
VIGNET Pierre's avatar
VIGNET Pierre committed
597 598 599 600
        transpose_csv(
            input_file=output_dir + "occurrence_matrix.csv",
            output_file=output_dir + "occurrence_matrix_t.csv",
        )
VIGNET Pierre's avatar
VIGNET Pierre committed
601 602


VIGNET Pierre's avatar
VIGNET Pierre committed
603 604 605
def occurrence_matrix(
    output_dir, model_file, path, matrix_filename="occurrence_matrix.csv"
):
VIGNET Pierre's avatar
VIGNET Pierre committed
606 607
    """Make a matrix of occurrences for the solutions in the given path.

608
    - Compute occurrences of each place in all `mac.txt` files.
VIGNET Pierre's avatar
VIGNET Pierre committed
609
    - Save the matrix in csv format with the following columns:
610
        Fieldnames: "patterns (number)/places (number);mac_number;frontier places"
VIGNET Pierre's avatar
VIGNET Pierre committed
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
        Each request (pattern) is accompanied by the number of solutions found.

    .. todo:: Split the creation and writing of the matrix in 2 functions.

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Directory of many complete solutions files.
    :param matrix_filename: (Optional) Filename of the matrix file.
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
    :type matrix_filename: <str>
    :return: A dictionnary with the matrix object.
        keys: queries, values: occurrences of frontier places
    :rtype: <dict>
    """

    # Key: Logical formula as input of Cadbiom
    # Value: Number of each place in all solutions of the current file
    matrix = defaultdict(Counter)
631
    # All frontier places in all mac files
VIGNET Pierre's avatar
VIGNET Pierre committed
632 633
    all_frontier_places = set()

634
    # Compute occurrences of each place in all mac files
VIGNET Pierre's avatar
VIGNET Pierre committed
635
    file_number = 0
VIGNET Pierre's avatar
VIGNET Pierre committed
636
    for file_number, filepath in enumerate(glob.glob(path + "*mac.txt"), 1):
VIGNET Pierre's avatar
VIGNET Pierre committed
637 638

        # gene pattern
639
#        pattern = {gene for gene in genes if gene in mac}
VIGNET Pierre's avatar
VIGNET Pierre committed
640 641

        # Get query string from the name of the solution file
642
        # From: 'MODEL_NAME_PLACE1 and not PLACE2 and not PLACE3_mac.txt'
VIGNET Pierre's avatar
VIGNET Pierre committed
643 644 645
        # Get: 'PLACE1 and not PLACE2 and not PLACE3'
        query = get_query_from_filename(model_file, filepath)

646 647
        mac_number = 0
        for mac_number, mac_line in enumerate(get_mac_lines(filepath), 1):
VIGNET Pierre's avatar
VIGNET Pierre committed
648

VIGNET Pierre's avatar
VIGNET Pierre committed
649
            frontier_places = set(mac_line.split(" "))
VIGNET Pierre's avatar
VIGNET Pierre committed
650 651 652 653 654
            # Update set of all frontier places
            all_frontier_places.update(frontier_places)
            # Update counter of places => compute frequencies
            matrix[query] += Counter(frontier_places)

655 656
        # Set the mac_number for future standardization
        matrix[query]["mac_number"] = mac_number
VIGNET Pierre's avatar
VIGNET Pierre committed
657 658

    LOGGER.info("Files processed: %s", file_number)
659
    assert file_number != 0, "No *mac.txt files found!"
VIGNET Pierre's avatar
VIGNET Pierre committed
660 661

    # Save the matrix
662
    # columns: "patterns (number)/places (number);mac_number;frontier places"
VIGNET Pierre's avatar
VIGNET Pierre committed
663
    with open(output_dir + matrix_filename, "w") as f_d:
VIGNET Pierre's avatar
VIGNET Pierre committed
664 665 666

        # Forge header
        header = "patterns ({})/places ({})".format(
VIGNET Pierre's avatar
VIGNET Pierre committed
667
            len(matrix), len(all_frontier_places)
VIGNET Pierre's avatar
VIGNET Pierre committed
668 669 670
        )
        writer = csv.DictWriter(
            f_d,
VIGNET Pierre's avatar
VIGNET Pierre committed
671 672 673 674
            delimiter=str(";"),
            restval=0,  # default value for frequency
            fieldnames=[header, "mac_number"] + list(all_frontier_places),
        )
VIGNET Pierre's avatar
VIGNET Pierre committed
675 676
        writer.writeheader()

VIGNET Pierre's avatar
VIGNET Pierre committed
677
        # Add a last line in the csv: total of occurrences for each place
VIGNET Pierre's avatar
VIGNET Pierre committed
678 679 680 681 682
        global_frontier_counter = Counter()
        # The first column is composed of the query + the number of solutions for it
        for query, row in matrix.iteritems():
            global_frontier_counter += row
            # PS: THIS modifies the matrix by adding a new key ('header')
683
            row[header] = "{} ({})".format(query, row["mac_number"])
VIGNET Pierre's avatar
VIGNET Pierre committed
684 685
            writer.writerow(row)

VIGNET Pierre's avatar
VIGNET Pierre committed
686 687
        # Total of occurrences at the end of the file
        global_frontier_counter[header] = "Total of occurrences"
VIGNET Pierre's avatar
VIGNET Pierre committed
688 689 690 691 692
        writer.writerow(global_frontier_counter)

    return matrix


VIGNET Pierre's avatar
VIGNET Pierre committed
693 694 695
def transpose_csv(
    input_file="occurrence_matrix.csv", output_file="occurrence_matrix_t.csv"
):
VIGNET Pierre's avatar
VIGNET Pierre committed
696 697 698 699 700 701 702 703 704 705 706 707
    """Useful function to transpose a csv file x,y => y,x

    .. note:: The csv file must be semicolon ';' separated.

    :param input_file: Input file.
    :param output_file: Output file transposed.
    :type input_file: <str>
    :type output_file: <str>
    """

    # Transpose file
    # PS: izip('ABCD', 'xy') --> Ax By
VIGNET Pierre's avatar
VIGNET Pierre committed
708 709
    data = it.izip(*csv.reader(open(input_file, "r"), delimiter=str(";")))
    csv.writer(open(output_file, "w"), delimiter=str(";")).writerows(data)