models.py 21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-
# Copyright (C) 2017  IRISA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# The original code contained here was initially developed by:
#
#     Pierre Vignet.
#     IRISA
#     Dyliss team
#     IRISA Campus de Beaulieu
#     35042 RENNES Cedex, FRANCE
24
25
26
27
28
29
30
31
"""
This module groups functions directly related to the management and the
extraction of data of a Cadbiom model.

Here we find high-level functions to manage the logical formulas of the events
and conditions defining the transitions; as well as useful functions to manage
the entities, like to obtain their metadata or the frontier places of the model.
"""
32
33
34
35
36
37
38
from __future__ import unicode_literals
from __future__ import print_function

# Standard imports
from collections import defaultdict
import re
import json
39
40
import itertools as it
from logging import DEBUG
41
42

# Library imports
VIGNET Pierre's avatar
VIGNET Pierre committed
43
from cadbiom.models.guard_transitions.translators.chart_xml import MakeModelFromXmlFile
44
from cadbiom.models.biosignal.translators.gt_visitors import compile_event, compile_cond
45
46
47
48
49
50
51
52
53
54
55
from cadbiom.models.biosignal.sig_expr import *
from cadbiom.models.guard_transitions.analyser.ana_visitors import TableVisitor

import cadbiom.commons as cm

LOGGER = cm.logger()


class Reporter(object):
    """Error reporter.

56
57
58
59
    .. note:: Link the lexer to the model allows to avoid some errors in Reporter.
        We mask error like: "-> dec -> Undeclared event or state".
        This error is generated for every place in a condition/event and should
        only mean that an item has never been met.
60
61
62
63
64
65
66
67
        In practice this is time consuming and useless for what we want to do.
        See parse_condition()
    """

    def __init__(self):
        self.error = False
        self.mess = ""

68
    def display(self, error_msg):
69
        """Display the error in the logger"""
70
        self.error = True
71
72
73
74
75

        if "Undeclared event or state" in error_msg:
            return

        LOGGER.error("Event or condition parser:: %s", error_msg)
76
77


78
def get_transitions_from_model_file(model_file):
79
    """Get all transitions and parser from a model file (bcx format).
80
81
82

    :param: bcx file.
    :type: <str>
83
84
    :return: Transitions (see get_transitions()) and the Parser for the model.
    :rtype: <dict>, <MakeModelFromXmlFile>
85
86
87
    """

    parser = MakeModelFromXmlFile(model_file)
88
    return get_transitions(parser), parser
89
90
91
92
93


def get_transitions(parser):
    """Get all transitions in the given parser.

94
95
96
97
98
99
100
    There are two methods to access the transitions of a model.

    :Example:

        .. code-block:: python

            >>> print(dir(parser))
101
            ['handler', 'model', 'parser']
102
103
            >>> # Direct access
            >>> events = list()
104
            >>> for transition in parser.model.transition_list:
105
106
107
108
109
110
111
112
113
            ...     events.append(transition.event)
            >>>
            >>> # Indirect access via a handler
            >>> events = list()
            >>> for transitions in parser.handler.top_pile.transitions:
            ...     # transitions is a list of CTransition objects
            ...     for transition in transitions:
            ...         events.append(transition.event)

114
115
116
117
118
119
    :param: Parser opened on a bcx file.
    :type: <MakeModelFromXmlFile>
    :return: A dictionnary of events as keys, and transitions as values.
        Since many transitions can define an event, values are lists.
        Each transition is a tuple with: origin node, final node, attributes
        like label and condition.
VIGNET Pierre's avatar
VIGNET Pierre committed
120
        ``{'h00': [('Ax', 'n1', {'label': 'h00[]'}),]``
121
122
123
    :rtype: <dict <list <tuple <str>, <str>, <dict <str>: <str>>>>
    """

124
    # NOTE: je devrais parler d'events au lieu de transitions...
125
    # voir si on peut retourner le parser pour faire tourner le static analysis ?
126
    # ou faire 1 fonction séparée qui parle plus du modèle lui meme que du graphe...
127
128
129
    # (ce que fait get_statistics d'ailleurs...)
    transitions = defaultdict(list)

130
    for trans in parser.model.transition_list:
131
132
133
134
135

        # Get the names of clocks
        # Some event have many clocks (like _h_2755) for the same
        # ori/ext entities, so we have to extract them and their respective
        # conditions
VIGNET Pierre's avatar
VIGNET Pierre committed
136
        if trans.event == "":
137
138
139
140
141
142
143
            # null event without clock => StartNodes
            # These nodes are used to resolve the problem of
            # Strongly Connected Components (inactivated cycles in the graph)
            # The nodes
            # Avoids having SigConstExpr as event type in parse_event()
            # I create a transition (SCC-__start__?),
            # and a node (__start__?) for this case.
VIGNET Pierre's avatar
VIGNET Pierre committed
144
            trans.event = "SCC-" + trans.ori.name
145
            events = {trans.event: trans.condition}
VIGNET Pierre's avatar
VIGNET Pierre committed
146
        elif re.match("_h[\w]+", trans.event):
147
148
149
150
151
152
153
154
155
156
157
158
159
160
            # 1 event (with 1 clock)
            events = {trans.event: trans.condition}
        else:
            # Many events (with many clocks with condition(s))
            events = parse_event(trans.event)

        for event, condition in events.iteritems():
            # LOGGER.debug("NEW trans", event)

            # Handle multiple transitions for 1 event
            transitions[event].append(
                (
                    trans.ori.name, trans.ext.name,
                    {
VIGNET Pierre's avatar
VIGNET Pierre committed
161
162
163
                        "label": event,  # + '[' + trans.condition + ']',
                        "condition": condition,
                    },
164
165
166
                )
            )

167
    LOGGER.info("%s transitions loaded", len(transitions))
168
    # Return a dict instead of defaultdict to avoid later confusions
VIGNET Pierre's avatar
VIGNET Pierre committed
169
    # (masked errors) by searching a transition that was not in the model...
170

VIGNET Pierre's avatar
VIGNET Pierre committed
171
172
    assert transitions, (
        "No transitions found in the model ! "
173
        "Please check the names of events (_h_xxx)"
VIGNET Pierre's avatar
VIGNET Pierre committed
174
    )
175
176
177
178
179

    # Forge return value
    return dict(transitions)


180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def get_frontier_places(transitions, all_places):
    """Return frontier places of a model (deducted from its transitions and
    from all places of the model).

    .. note:: why we use all_places from the model instead of
        (input_places - output_places) to get frontier places ?
        Because some nodes are only in conditions and not in transitions.
        If we don't do that, these nodes are missing when we compute
        valid paths from conditions.

    :param arg1: Model's transitions.
        {u'h00': [('Ax', 'n1', {u'label': u'h00[]'}),]
    :type arg1: <dict>
        keys: names of events
        values: list of transitions as tuples (with in/output, and label).
    :return: Set of frontier places.
    :rtype: <set>
    """

    # Get transitions in events
    g = tuple(trans for event in transitions.values() for trans in event)

    # Get input nodes & output nodes
#    input_places = {trans[0] for trans in g}
    output_places = {trans[1] for trans in g}

    # Get all places that are not in transitions in the "output" place
    return set(all_places) - output_places


################################################################################

def get_places_from_condition(condition):
    """Parse condition string and return all places, regardless of operators.

    .. note:: This function is only used to get all nodes in a condition when
VIGNET Pierre's avatar
VIGNET Pierre committed
216
        we know they are all inhibitors nodes.
217

218
219
220
    .. todo:: See the workaround in the code, without using very time consuming
        and badly coded functions.

221
222
223
224
225
226
    :param: Condition string.
    :type: <str>
    :return: Set of places.
    :rtype: <set>
    """

227
228
229
230
231
232
233
234
235
236
237
238
    # Valid but very time consuming like any other things in Cadbiom library
#    err = Reporter()
#    tvi = TableVisitor(err)
#    symb_tab = tvi.tab_symb
#    cond_sexpr = compile_cond(condition, symb_tab, err)
#    inhibitors_nodes = set()
#    possible_paths = rec(cond_sexpr, inhibitors_nodes)
#    return set(it.chain(*possible_paths))

    # Replace parentheses first to make spaces in the string
    # As operators are followed or preceded by parentheses, we can detect them
    # without false positives (operator string inside an entity name)
VIGNET Pierre's avatar
VIGNET Pierre committed
239
    replacement = ["(", ")", " and ", " or ", " not "]
240

241
    for operator in replacement:
VIGNET Pierre's avatar
VIGNET Pierre committed
242
        condition = condition.replace(operator, " ")
243
244

    # Must be exempt of unauthorized chars
VIGNET Pierre's avatar
VIGNET Pierre committed
245
    return {elem for elem in condition.split(" ") if elem != ""}
246
247


248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
def parse_event(event):
    """Decompile logical formula in event's name.

    :param: Event string.
    :type: <event string>
    :return: A dict of events and their conditions.
    :rtype: <dict>
        keys: event's names; values: logical formula attached (condition)
    """

    def treeToExprDefaultsList(tree):
        if isinstance(tree, SigDefaultExpr):
            return treeToExprDefaultsList(tree.left_h) + \
                treeToExprDefaultsList(tree.right_h)

263
264
265
266
267
268
        # Here, some tree are from classes SigConstExpr or SigIdentExpr
        # Ex: for the clock "_h_5231":
        #    ... default (_h_5231)" => No condition for this event
        # Other examples:
        # _h_2018 _h_820 _h_4939 _h_5231 _h_3301 _h_4967 _h_2303 _h_3301
        return [tree]
269
270
271
272
273
274
275
276
277
278
279
280

    def filterSigExpression(expr):
        """
        .. note:: No SigConstExpr here => filtered in get_transitions()
            by checking null events (event="") in the model.
        """

        if isinstance(expr, SigWhenExpr):
            # right : SigSyncBinExpr (logical formula), BUT
            # sometimes SigConstExpr (just a True boolean) when clock is empty
            # Ex: "when ()"
            # So, we replace this boolean with an empty condition
VIGNET Pierre's avatar
VIGNET Pierre committed
281
            right = "" if isinstance(expr.right_h, SigConstExpr) else str(expr.right_h)
282
283
284
285

            return expr.left_h.name, right

        if isinstance(expr, SigIdentExpr):
VIGNET Pierre's avatar
VIGNET Pierre committed
286
            return expr.name, ""
287

VIGNET Pierre's avatar
VIGNET Pierre committed
288
289
290
291
        raise AssertionError(
            "You should never have been there ! "
            "Your expression type is not yet supported..."
        )
292
293
294
295
296
297
298
299
300
301
302
303
304

#    def filterSigExpression(listOfExpr):
#        return [filterSigExpression(expr) for expr in listOfExpr]

    # Error Reporter
    err = Reporter()
    tvi = TableVisitor(err)
    symb_tab = tvi.tab_symb

    # Get tree object from event string
    event_sexpr = compile_event(event, symb_tab, True, err)[0]

    # Filter when events
VIGNET Pierre's avatar
VIGNET Pierre committed
305
306
    g = (filterSigExpression(expr) for expr in treeToExprDefaultsList(event_sexpr))
    eventToCondStr = {event_name: event_cond for event_name, event_cond in g}
307

308
    LOGGER.debug("Clocks from event parsing: %s", eventToCondStr)
309
310
311
312

    return eventToCondStr


313
def parse_condition(condition, all_nodes, inhibitors_nodes):
314
315
316
317
318
319
320
321
322
323
324
325
326
    """Return valid paths according the given logical formula and nodes;
    and set inhibitors_nodes

    .. note:: inhibitors_nodes is modified(set) by this function.

    :param condition: Condition string of a transition.
    :param all_nodes: Nodes involved in transitions + frontier places.
    :param inhibitors_nodes: Inactivated nodes in paths of conditions.
    :type condition: <str>
    :type inhibitors_nodes: <set>
    :type all_nodes: <set>
    :return: Set of paths. Each path is a tuple of nodes.
    :rtype: <set>
327
328
    """

329
    LOGGER.debug("CONDITION: %s", condition)
330
331
332
333
334
335
    # Error Reporter
    err = Reporter()
    tvi = TableVisitor(err)
    # Link the lexer to the model allows to avoid error in Reporter
    # like:  "-> dec -> Undeclared event or state"
    # In practice this is time consuming and useless for what we want to do
VIGNET Pierre's avatar
VIGNET Pierre committed
336
    # parser = MakeModelFromXmlFile(BIO_MOLDELS_DIR +
337
    # "Whole NCI-PID database translated into CADBIOM formalism(and).bcx")
VIGNET Pierre's avatar
VIGNET Pierre committed
338
    # parser.model.accept(tvi)
339
340
341
342
343
344
345
346
347
348
349
350
351
352
    symb_tab = tvi.tab_symb
    # Get tree object from condition string
    cond_sexpr = compile_cond(condition, symb_tab, err)
    # Get all possible paths from the condition
    possible_paths = rec(cond_sexpr, inhibitors_nodes)

    # Prune possible paths according to:
    # - Inhibitor nodes that must be removed because they will never
    # be in the graph.
    # - All nodes in transitions (ori -> ext) because we know all transitions
    # in the graph, so we know which entities can be choosen to validate a path.
    # - All frontier places, that are known entities that can be in conditions
    # (not only in ori/ext) of transitions.
    # So: authorized nodes = frontier_places + transition_nodes - inhibitor nodes
VIGNET Pierre's avatar
VIGNET Pierre committed
353
354
355
356
357
    valid_paths = {
        tuple(path)
        for path in possible_paths
        if (set(path) - inhibitors_nodes).issubset(all_nodes)
    }
358
359
360

    # Debugging only
    if LOGGER.getEffectiveLevel() == DEBUG:
361
362
363
364
        LOGGER.debug("INHIBIT NODES: %s", inhibitors_nodes)
        LOGGER.debug("ALL NODES: %s", all_nodes)
        LOGGER.debug("POSSIBLE PATHS: %s", possible_paths)
        LOGGER.debug("VALID PATHS: %s", valid_paths)
365

366
        if len(valid_paths) > 1:
VIGNET Pierre's avatar
VIGNET Pierre committed
367
368
369
            LOGGER.debug(
                "Multiple valid paths in the model for: %s:\n%s", condition, valid_paths
            )
370

371
372
373
        for path in possible_paths:
            pruned_places = set(path) - inhibitors_nodes
            isinsubset = pruned_places.issubset(all_nodes)
374
            LOGGER.debug("PRUNED PATH: %s, VALID: %s", pruned_places, isinsubset)
375

VIGNET Pierre's avatar
VIGNET Pierre committed
376
    assert valid_paths, "No valid path in the model for: " + str(condition)
377

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
    return valid_paths


    from cadbiom.models.guard_transitions.analyser.ana_visitors import SigExpIdCollectVisitor

    # condition expressions contains only node ident
    icv = SigExpIdCollectVisitor()
    lst1 = cond_sexpr.accept(icv)
    print(cond_sexpr)
    print(type(cond_sexpr))
    print(dir(cond_sexpr))
    print("LISTE", lst1)
#    <class 'cadbiom.models.biosignal.sig_expr.SigSyncBinExpr'>
#    'accept', 'get_signals', 'get_ultimate_signals', 'is_bot', 'is_clock',
# 'is_const', 'is_const_false', 'is_ident', 'left_h', 'operator', 'right_h', 'test_equal']

    print(cond_sexpr.get_signals())
#    print(cond_sexpr.get_ultimate_signals())
    print("LEFT", cond_sexpr.left_h)
    print("OPERATOR", cond_sexpr.operator)
    print("RIGHT", cond_sexpr.right_h)


#    ret = treeToTab(cond_sexpr)
#    [set([('((formule', True)])]
#    print("treeToTab", ret)
#    print(type(ret))
#    print(dir(ret))


def rec(tree, inhibitors_nodes):
    """Recursive function to decompile conditions

    :param tree:
        :Example of tree argument:

            .. code-block:: python

                tree = ('H', 'v', (
                    ('F', 'v', 'G'),
                    '^',
                    (
                        ('A', 'v', 'B'),
                        '^',
                        ('C', 'v', ('D', '^', 'E'))
                    )
                ))
    """

#    print("TREE", tree, type(tree), dir(tree))

    if isinstance(tree, str):  # terminal node
        path = [tree]
        solutions = [path]
        return solutions
    if isinstance(tree, SigNotExpr):
434
        # tree.operand: the entity, type: SigIdentExpr
435
        LOGGER.debug("NOT OPERAND: %s, %s", tree.operand, type(tree.operand))
436
437
438
        try:
            current_inhibitors = get_places_from_condition(tree.operand.__str__())
            inhibitors_nodes.update(current_inhibitors)
439
            LOGGER.debug("INHIBITORS found: %s", current_inhibitors)
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454

            path = [tree.operand.name]
            solutions = [path]
            return solutions
        except AttributeError:
            tree = tree.operand

    if isinstance(tree, SigIdentExpr):
        path = [tree.name]
        solutions = [path]
        return solutions



    lch = tree.left_h
VIGNET Pierre's avatar
VIGNET Pierre committed
455
    op = tree.operator
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
    rch = tree.right_h
#    print('OZCJSH:', lch, op, rch, sep='\t\t')
    lpaths = rec(lch, inhibitors_nodes)
    rpaths = rec(rch, inhibitors_nodes)
#    print('VFUENK:', lpaths, rpaths)
    if op == 'or':  # or
#        ret = [*lpaths, *rpaths]
        ret = list(it.chain(lpaths, rpaths))
#        print('RET:', ret)
        return ret
    else:  # and
        assert op == 'and'
#        print(list(it.product(lpaths, rpaths)))
#        raw_input('test')

        ret = list(l + r for l, r in it.product(lpaths, rpaths))
#        print('RET:', ret)
        return ret


################################################################################

478
479
480
481
482
def get_places_data(places, model):
    """Get a list of JSON data parsed from each given places in the model.

    .. note:: This function is used by low_model_infos().

483
    .. note:: v1 models return a dict with only 1 key: 'cadbiomName'
484

485
486
487
488
    .. note:: Start nodes (with a name like __start__x) are handled even
        with no JSON data.
        They are counted in the other_types and other_locations fields.

489
490
491
492
493
494
495
496
497
498
499
500
501
    :Example of JSON data that can be found in the model:

        .. code-block:: python

            {
                "uri": entity.uri,
                "entityType": entity.entityType,
                "names": list(entity.synonyms | set([entity.name])),
                "entityRef": entity.entityRef,
                "location": entity.location.name if entity.location else None,
                "modificationFeatures": dict(entity.modificationFeatures),
                "members": list(entity.members),
                "reactions": [reaction.uri for reaction in entity.reactions],
502
                "xrefs": entity.xrefs,
503
504
505
506
507
508
509
510
511
512
            }

    :param arg1: Iterable of name of places.
    :param arg2: Model from handler.
    :type arg1: <set>
    :type arg2: <MakeModelFromXmlFile>
    :return: List of data parsed from each give places.

        .. note:: Here is the list of field retrieved for v2 models:

513
            - cadbiomName
514
515
516
517
518
            - uri
            - entityType
            - entityRef
            - location
            - names
519
            - xrefs
520
521
522
    :rtype: <list <dict>>
    """

VIGNET Pierre's avatar
VIGNET Pierre committed
523
    if model.xml_namespace == "http://cadbiom.genouest.org/v2/":
524
525
        # Fixed fields and default types
        json_note_fieldnames = {
VIGNET Pierre's avatar
VIGNET Pierre committed
526
527
528
529
530
            "uri": "",
            "entityType": "",
            "entityRef": "",
            "location": "",
            "names": list(),  # Default type is 'list' for names (not '')
531
            "xrefs": dict(),
532
533
534
        }
        # Init final dictionary
        data = list()
535
536
537
538
539
540
        for place_name in places:

            try:
                # Model type 2 => We use JSON data in each nodes
                # Get JSON data ('' if the field is not present)
                json_data = json.loads(model.node_dict[place_name].note)
541
            except ValueError as exc:
542
                # Handle start nodes (name: __start__x)
VIGNET Pierre's avatar
VIGNET Pierre committed
543
                if exc.message == "No JSON object could be decoded":
544
545
                    json_data = dict()

VIGNET Pierre's avatar
VIGNET Pierre committed
546
547
548
549
            temp = {
                fieldname: json_data.get(fieldname, default_data)
                for fieldname, default_data in json_note_fieldnames.items()
            }
550
551
552

            # Patch: Handle null values that should be avoided in cadbiom_writer.build_json_data()
            temp["names"] = [name for name in temp["names"] if name]
553
            # Add the cadbiom name (name attribute of xml element
VIGNET Pierre's avatar
VIGNET Pierre committed
554
            temp["cadbiomName"] = place_name
555
556
557
558
            data.append(temp)

        return data

559
    # v1 model: return only the name of the place
VIGNET Pierre's avatar
VIGNET Pierre committed
560
    return [{"cadbiomName": place_name} for place_name in places]
561
562


563
def get_model_identifier_mapping(model_file, external_identifiers):
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
    """Get Cadbiom names corresponding to the given external identifiers (xrefs)

    .. note:: This function works only on v2 formated models with JSON additional data

    :param model_file: Model file.
    :param external_identifiers: Set of external identifiers to be mapped.
    :type model_file: <str>
    :type external_identifiers: <set>
    :return: Mapping dictionary with external identifiers as keys
        and cadbiom names as values.
    :rtype: <dict <str>:<list>>
    """
    # Get the model
    parser = MakeModelFromXmlFile(model_file)
    model = parser.handler.model

    assert model.xml_namespace == 'http://cadbiom.genouest.org/v2/', \
        "Operation not supported: Only v2 models are supported."

    # Get all nodes
    places_data = get_places_data(parser.handler.node_dict.iterkeys(), model)

    # {'xrefs': {'bdd': [values],}, 'cadbiomName': '',}
VIGNET Pierre's avatar
VIGNET Pierre committed
587
588
589
590
    g = {
        place["cadbiomName"]: frozenset(it.chain(*place["xrefs"].itervalues()))
        for place in places_data
    }
591
592
593
594
595
596
597
598
599
600
601

    # Mapping: external_identifiers as keys and Cadbiom names as values
    mapping = defaultdict(set)
    for place, identifiers in g.iteritems():

        common_identifiers = identifiers & external_identifiers
        if common_identifiers:
            [mapping[common_id].add(place) for common_id in common_identifiers]

    not_found_identifiers = external_identifiers - set(mapping.keys())
    if not_found_identifiers:
VIGNET Pierre's avatar
VIGNET Pierre committed
602
603
604
605
606
        LOGGER.info(
            "Some identifiers were not found (%s/%s): %s",
            len(not_found_identifiers),
            len(external_identifiers),
            not_found_identifiers,
607
        )
608
609

    return mapping