solution_sort.py 18.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-
# Copyright (C) 2017  IRISA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# The original code contained here was initially developed by:
#
#     Pierre Vignet.
#     IRISA
#     Dyliss team
#     IRISA Campus de Beaulieu
#     35042 RENNES Cedex, FRANCE
24
25
"""Handle generated files

VIGNET Pierre's avatar
VIGNET Pierre committed
26
This module provides some functions to do some analyzis on the output
27
files of Cadbiom.
28

VIGNET Pierre's avatar
VIGNET Pierre committed
29
30
31
Entry points:

    * :meth:`~cadbiom_cmd.solution_sort.solutions_2_json`
32
    * :meth:`~cadbiom_cmd.solution_sort.solutions_2_graphs`
VIGNET Pierre's avatar
VIGNET Pierre committed
33
34
    * :meth:`~cadbiom_cmd.solution_sort.solutions_2_common_graph`

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
:Example of the content of a complete solution file:

    .. code::

        Bx  Ax
        % h2 h00
        % h3
        % h0 h1
        % hlast
        Bx  Ax
        % h2
        % h3 h00
        % h0 h1
        %
        % hlast
        Bx  Ax
        % h2
        % h3 h00
        % h0 h1
        % hlast
        %
        %
        Bx  Ax
        % h2 h00
        % h3
        % h0 h1
        % hlast
        %
        %
        %

66
"""
67
68
69
from __future__ import unicode_literals
from __future__ import print_function

70
# Standard imports
VIGNET Pierre's avatar
VIGNET Pierre committed
71
72
from collections import defaultdict, Counter
import itertools as it
73
import json
74
75
import os
import glob
VIGNET Pierre's avatar
VIGNET Pierre committed
76
import csv
77
import networkx as nx
78

79
80
# Library imports

81
from tools.solutions import get_solutions
82
from tools.models import get_transitions_from_model_file
83
from tools.solutions import load_solutions, convert_solutions_to_json, \
84
    get_query_from_filename, get_mac_lines
85
from tools.graphs import export_graph, build_graph
86
87
88
89

import cadbiom.commons as cm

LOGGER = cm.logger()
90

91
## Sort functions ##############################################################
92

93
94
def sort_solutions_in_file(filepath):
    """Sort all solutions in the given file in alphabetical order.
95

96
    .. warning:: The file is modified in place.
97

98
99
100
    :param: Filepath to be opened and in which solutions will be sorted.
    :arg: <str>
    """
101
102
103

    solutions = dict()

104
    with open(filepath, 'r+') as f_d:
105

106
        # Get old line as key and ordered line as value
107
        for line, stripped_line in get_solutions(f_d):
108
109
110
111
112
            # Sort in lower case, remove ' ' empty elements
            solutions[line] = \
                " ".join(sorted([place for place in stripped_line.split(' ')
                                 if place != ' '], key=lambda s: s.lower()))

113
        # Rewind the whole file
114
        f_d.seek(0)
115
116

        # Load all the content
117
        file_text = f_d.read()
118

119
        # Replace old sols with the new ones
120
121
122
        for original_sol, sorted_sol in solutions.items():
            file_text = file_text.replace(original_sol, sorted_sol)

123
        # Rewind the whole file
124
        f_d.seek(0)
125

126
        # Write all text in the current opened file
127
        f_d.write(file_text)
128
129


130
def solutions_sort(path):
131
    """Entry point for sorting solutions.
132

133
    Read a solution(s) file(s) (\*mac\* files) and sort all
134
135
    frontier places/boundaries in alphabetical order.

VIGNET Pierre's avatar
VIGNET Pierre committed
136
137
    This functions tests if the given path is a directory or a file.

138
    .. warning:: The files will be modified in place.
139

140
141
142
    :param: Filepath or directory path containing Cadbiom solutions.
    :type: <str>
    """
143

144
145
    # Check valid input file/directory
    assert os.path.isfile(path) or os.path.isdir(path)
146

147
    if os.path.isdir(path):
148
149
        # Recursive search of *mac* files
        # (mac.txt, mac_complete.txt, mac_step.txt)
150
        path = path if path[-1] == '/' else path + '/'
151
        [sort_solutions_in_file(file) for file in glob.glob(path + '*mac*')]
152
153
    else:
        sort_solutions_in_file(path)
154
155
156

## Conversion functions ########################################################

157
def solutions_2_json(output_dir, model_file, path, conditions=True):
158
159
    """Entry point for solutions_2_json

160
    Create a JSON formated file containing all data from complete MAC files
161
    (\*mac_complete files). The file will contain frontier places/boundaries
162
163
164
165
166
    and decompiled steps with their respective events for each solution.

    This is a function to quickly search all transition attributes involved
    in a solution.

167
168
169
170
    This functions tests if the given path is a directory or a file.

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
171
    :param path: Filepath/directory of a complete solution file.
172
173
174
175
176
    :param conditions: (Optional) If False, conditions of transitions will not
        be present in the JSON file. This allows to have only places/entities
        used inside trajectories; thus, inhibitors are avoided.
    :type output_dir: <str>
    :type model_file: <str>
177
    :type path: <str>
178
179
180
    :type conditions: <boolean>
    """

VIGNET Pierre's avatar
DRY    
VIGNET Pierre committed
181
    def write_json_file(file_path, decomp_solutions):
182
        """Write decompiled solutions to a JSON formated file"""
VIGNET Pierre's avatar
DRY    
VIGNET Pierre committed
183
184
185
186
        # Add _decomp to the solution filename
        filename = os.path.basename(os.path.splitext(file_path)[0])

        with open(output_dir + filename + '_decomp.json', 'w') as f_d:
187
            json.dump(decomp_solutions, f_d, sort_keys=True, indent=2)
188
189


190
    # Check valid input file/directory
191
    assert os.path.isfile(path) or os.path.isdir(path)
192

193
194
195
    # Get transitions from the model
    model_transitions = get_transitions_from_model_file(model_file)

196
    if os.path.isfile(path):
197
        # The given path is a solution file
198
        decomp_solutions = convert_solutions_to_json(
199
            load_solutions(path),
200
201
202
            model_transitions,
            conditions=conditions,
        )
VIGNET Pierre's avatar
DRY    
VIGNET Pierre committed
203
        write_json_file(path, decomp_solutions)
204

205
    elif os.path.isdir(path):
206
        # The given path is a directory
207
        path = path if path[-1] == '/' else path + '/'
208
209

        # Decompilation of all files in the directory
210
        file_number = 0
211
        for file_number, solution_file in \
212
            enumerate(glob.glob(path + '*mac_complete.txt'), 1):
213

214
            decomp_solutions = convert_solutions_to_json(
215
216
217
218
                load_solutions(solution_file),
                model_transitions,
                conditions=conditions,
            )
VIGNET Pierre's avatar
DRY    
VIGNET Pierre committed
219
            write_json_file(solution_file, decomp_solutions)
220

221
        LOGGER.info("Files processed: %s", file_number)
222
        assert file_number != 0, "No *mac_complete.txt files found!"
223
224


225
226
def solutions_2_graphs(output_dir, model_file, path):
    """Entry point for solutions_2_graphs
227
228

    Create GraphML formated files containing a representation of the
VIGNET Pierre's avatar
VIGNET Pierre committed
229
    trajectories for each solution in complete MAC files (\*mac_complete files).
230
231
232
233
234
235
236
237

    This is a function to visualize paths taken by the solver from the boundaries
    to the entities of interest.

    This functions tests if the given path is a directory or a file.

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
VIGNET Pierre's avatar
VIGNET Pierre committed
238
    :param path: Filepath/directory of a/many complete solutions files.
239
240
    :type output_dir: <str>
    :type model_file: <str>
241
    :type path: <str>
242
243
244
    """

    # Check valid input file/directory
245
    assert os.path.isfile(path) or os.path.isdir(path)
246
247
248
249

    # Get transitions from the model
    model_transitions = get_transitions_from_model_file(model_file)

250
    if os.path.isfile(path):
251
252
253
        # The given path is a solution file
        convert_solution_file_to_graphs(
            output_dir,
254
            load_solutions(path),
255
256
257
            model_transitions
        )

258
    elif os.path.isdir(path):
259
        # The given path is a directory
260
        path = path if path[-1] == '/' else path + '/'
261
262

        # Decompilation of all files in the directory
263
264
        file_number = 0
        for file_number, solution_file in \
265
            enumerate(glob.glob(path + '*mac_complete.txt'), 1):
266
267
268

            convert_solution_file_to_graphs(
                output_dir,
269
                load_solutions(solution_file),
270
271
272
                model_transitions
            )

273
        LOGGER.info("Files processed: %s", file_number)
274
        assert file_number != 0, "No *mac_complete.txt files found!"
275
276
277


def convert_solution_file_to_graphs(output_dir, sol_steps, transitions):
VIGNET Pierre's avatar
VIGNET Pierre committed
278
    """Build and write graphs based on the given solutions
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

    Each solution is composed of a set of frontier places and steps,
    themselves composed of events.
    We construct a graph based on the transitions that occur in the composition
    of the events of the given solution.

    :param output_dir: Output path.
    :param sol_steps: A generator of tuples of "frontier places" and a list of
        events in each step.

        :Example:

            .. code-block:: python

                ("Bx Ax", [['h2', 'h00'], ['h3'], ['h0', 'h1'], ['hlast']])

    :param transitions: A dictionnary of events as keys, and transitions as values.
        Since many transitions can define an event, values are lists.
        Each transition is a tuple with: origin node, final node, attributes
        like label and condition.

        :Example:

            .. code-block:: python

                {'h00': [('Ax', 'n1', {'label': 'h00[]'}),]

    :type output_dir: <str>
    :type sol_steps: <tuple <str>, <list>>
    :type transitions: <dict <list <tuple <str>, <str>, <dict <str>: <str>>>>
    """

    for sol_index, (sol, steps) in enumerate(sol_steps):

        # build_graph() returns :
        # G, transition_nodes, all_nodes, edges_in_cond, edges
        # sol_index is used to order files according to the order of appearance
        # in the file
        export_graph(output_dir, sol, sol_index,
318
319
320
                     build_graph(sol, steps, transitions)[0])


321
def solutions_2_common_graph(output_dir, model_file, path):
322
323
    """Entry point for solutions_2_common_graph

VIGNET Pierre's avatar
VIGNET Pierre committed
324
325
326
    Create a GraphML formated file containing a unique representation of **all**
    trajectories corresponding to all solutions in each complete MAC files
    (*mac_complete files).
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

    This is a function to visualize paths taken by the solver from the boundaries
    to the entities of interest.

    This functions tests if the given path is a directory or a file.

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Filepath/directory of a/many complete solutions files.
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
    """

    def get_solution_graph(sol_steps, transitions):
        """Generator that yields the graph of the given solutions.

        .. note:: See the doc of a similar function
            :meth:`~cadbiom_cmd.solution_sort.convert_solution_file_to_graphs`.
        """

        for sol, steps in sol_steps:

            # build_graph() returns :
            # G, transition_nodes, all_nodes, edges_in_cond, edges
            # Python 3: partial unpacking: G, *_
            yield build_graph(sol, steps, transitions)[0]


    # Check valid input file/directory
    assert os.path.isfile(path) or os.path.isdir(path)

    # Get transitions from the model
    model_transitions = get_transitions_from_model_file(model_file)

    if os.path.isfile(path):
        # The given path is a solution file
        graphs = get_solution_graph(
            load_solutions(path),
            model_transitions
        )
368
369
        # Get query string from the name of the solution file
        query = get_query_from_filename(model_file, path)
370
        # Write graph
371
372
        export_graph(output_dir, query, '', merge_graphs(graphs))

373
374
375

    elif os.path.isdir(path):
        # The given path is a directory
376
        path = path if path[-1] == '/' else path + '/'
377
378
379
380

        # Decompilation of all files in the directory
        file_number = 0
        for file_number, solution_file in \
381
                enumerate(glob.glob(path + '*mac_complete.txt'), 1):
382

383
384
385
386
387
            # Get query string from the name of the solution file
            query = get_query_from_filename(model_file, solution_file)

            LOGGER.info("Processing %s query...", query)

388
389
390
391
            graphs = get_solution_graph(
                load_solutions(solution_file),
                model_transitions
            )
392

393
            # Write graph
394
            export_graph(output_dir, query, '', merge_graphs(graphs))
395
396

        LOGGER.info("Files processed: %s", file_number)
397
        assert file_number != 0, "No *mac_complete.txt files found!"
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428


def merge_graphs(graphs):
    """Merge graphs in the given iterable; count and add the weights to the edges
    of the final graph

    :param graphs: Networkx graph objects.
    :type graphs: <generator <networkx.classes.digraph.DiGraph>>
    :return: Networkx graph object.
    :rtype: <networkx.classes.digraph.DiGraph>
    """

    G = nx.DiGraph()

    for graph in graphs:

        missing_nodes = set(graph.nodes_iter()) - set(G.nodes_iter())
        if missing_nodes:
            # Add missing nodes in G from the current graph
            # Build a tuple (node_name, attrs)
            G.add_nodes_from((node, graph.node[node]) for node in missing_nodes)

        for ori, ext, data in graph.edges_iter(data=True):
            if G.has_edge(ori, ext):
                # Update the edge
                G[ori][ext]['weight'] += 1
            else:
                # Add the missing edge
                G.add_edge(ori, ext, attr_dict=data, weight=1)

    return G
VIGNET Pierre's avatar
VIGNET Pierre committed
429
430
431
432
433
434
435

## Matrices of occurrences #####################################################

def solutions_2_occcurrences_matrix(output_dir, model_file, path,
                                    transposed=False, normalized=False):
    """Entry point for solutions_2_occcurrences_matrix

VIGNET Pierre's avatar
VIGNET Pierre committed
436
437
    See :meth:`~cadbiom_cmd.solution_sort.occurrence_matrix`.

VIGNET Pierre's avatar
VIGNET Pierre committed
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Directory of many complete solutions files.
    :param transposed: (Optional) Transpose the final matrix (switch columns and rows).
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
    :type transposed: <boolean>
    """

    # Check valid input directory
    assert os.path.isdir(path)

    path = path if path[-1] == '/' else path + '/'

    # Make matrix
VIGNET Pierre's avatar
VIGNET Pierre committed
454
    occurrence_matrix(output_dir, model_file, path)
VIGNET Pierre's avatar
VIGNET Pierre committed
455
456

    if transposed:
VIGNET Pierre's avatar
VIGNET Pierre committed
457
458
        transpose_csv(input_file=output_dir + 'occurrence_matrix.csv',
                      output_file=output_dir + 'occurrence_matrix_t.csv')
VIGNET Pierre's avatar
VIGNET Pierre committed
459
460


VIGNET Pierre's avatar
VIGNET Pierre committed
461
462
def occurrence_matrix(output_dir, model_file, path,
                     matrix_filename='occurrence_matrix.csv'):
VIGNET Pierre's avatar
VIGNET Pierre committed
463
464
    """Make a matrix of occurrences for the solutions in the given path.

465
    - Compute occurrences of each place in all `mac.txt` files.
VIGNET Pierre's avatar
VIGNET Pierre committed
466
    - Save the matrix in csv format with the following columns:
467
        Fieldnames: "patterns (number)/places (number);mac_number;frontier places"
VIGNET Pierre's avatar
VIGNET Pierre committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
        Each request (pattern) is accompanied by the number of solutions found.

    .. todo:: Split the creation and writing of the matrix in 2 functions.

    :param output_dir: Output path.
    :param model_file: Filepath of the model.
    :param path: Directory of many complete solutions files.
    :param matrix_filename: (Optional) Filename of the matrix file.
    :type output_dir: <str>
    :type model_file: <str>
    :type path: <str>
    :type matrix_filename: <str>
    :return: A dictionnary with the matrix object.
        keys: queries, values: occurrences of frontier places
    :rtype: <dict>
    """

    # Key: Logical formula as input of Cadbiom
    # Value: Number of each place in all solutions of the current file
    matrix = defaultdict(Counter)
488
    # All frontier places in all mac files
VIGNET Pierre's avatar
VIGNET Pierre committed
489
490
    all_frontier_places = set()

491
    # Compute occurrences of each place in all mac files
VIGNET Pierre's avatar
VIGNET Pierre committed
492
    file_number = 0
493
    for file_number, filepath in enumerate(glob.glob(path + '*mac.txt'), 1):
VIGNET Pierre's avatar
VIGNET Pierre committed
494
495

        # gene pattern
496
#        pattern = {gene for gene in genes if gene in mac}
VIGNET Pierre's avatar
VIGNET Pierre committed
497
498

        # Get query string from the name of the solution file
499
        # From: 'MODEL_NAME_PLACE1 and not PLACE2 and not PLACE3_mac.txt'
VIGNET Pierre's avatar
VIGNET Pierre committed
500
501
502
        # Get: 'PLACE1 and not PLACE2 and not PLACE3'
        query = get_query_from_filename(model_file, filepath)

503
504
        mac_number = 0
        for mac_number, mac_line in enumerate(get_mac_lines(filepath), 1):
VIGNET Pierre's avatar
VIGNET Pierre committed
505

506
            frontier_places = set(mac_line.split(' '))
VIGNET Pierre's avatar
VIGNET Pierre committed
507
508
509
510
511
            # Update set of all frontier places
            all_frontier_places.update(frontier_places)
            # Update counter of places => compute frequencies
            matrix[query] += Counter(frontier_places)

512
513
        # Set the mac_number for future standardization
        matrix[query]["mac_number"] = mac_number
VIGNET Pierre's avatar
VIGNET Pierre committed
514
515

    LOGGER.info("Files processed: %s", file_number)
516
    assert file_number != 0, "No *mac.txt files found!"
VIGNET Pierre's avatar
VIGNET Pierre committed
517
518

    # Save the matrix
519
    # columns: "patterns (number)/places (number);mac_number;frontier places"
VIGNET Pierre's avatar
VIGNET Pierre committed
520
521
522
523
524
525
526
527
528
529
530
    with open(output_dir + matrix_filename, 'w') as f_d:

        # Forge header
        header = "patterns ({})/places ({})".format(
            len(matrix),
            len(all_frontier_places),
        )
        writer = csv.DictWriter(
            f_d,
            delimiter=str(';'),
            restval=0, # default value for frequency
531
            fieldnames=[header, "mac_number"] + list(all_frontier_places))
VIGNET Pierre's avatar
VIGNET Pierre committed
532
533
        writer.writeheader()

VIGNET Pierre's avatar
VIGNET Pierre committed
534
        # Add a last line in the csv: total of occurrences for each place
VIGNET Pierre's avatar
VIGNET Pierre committed
535
536
537
538
539
        global_frontier_counter = Counter()
        # The first column is composed of the query + the number of solutions for it
        for query, row in matrix.iteritems():
            global_frontier_counter += row
            # PS: THIS modifies the matrix by adding a new key ('header')
540
            row[header] = "{} ({})".format(query, row["mac_number"])
VIGNET Pierre's avatar
VIGNET Pierre committed
541
542
            writer.writerow(row)

VIGNET Pierre's avatar
VIGNET Pierre committed
543
544
        # Total of occurrences at the end of the file
        global_frontier_counter[header] = "Total of occurrences"
VIGNET Pierre's avatar
VIGNET Pierre committed
545
546
547
548
549
        writer.writerow(global_frontier_counter)

    return matrix


VIGNET Pierre's avatar
VIGNET Pierre committed
550
551
def transpose_csv(input_file='occurrence_matrix.csv',
                  output_file='occurrence_matrix_t.csv'):
VIGNET Pierre's avatar
VIGNET Pierre committed
552
553
554
555
556
557
558
559
560
561
562
563
564
565
    """Useful function to transpose a csv file x,y => y,x

    .. note:: The csv file must be semicolon ';' separated.

    :param input_file: Input file.
    :param output_file: Output file transposed.
    :type input_file: <str>
    :type output_file: <str>
    """

    # Transpose file
    # PS: izip('ABCD', 'xy') --> Ax By
    data = it.izip(*csv.reader(open(input_file, "r"), delimiter=str(';')))
    csv.writer(open(output_file, "w"), delimiter=str(';')).writerows(data)