grew_ast.ml 9.34 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11 12
open Printf
open Log
bguillaum's avatar
bguillaum committed
13
open Grew_base
bguillaum's avatar
bguillaum committed
14
open Grew_types
bguillaum's avatar
bguillaum committed
15

16
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
17
module Ast = struct
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
  (* general function for checking that an identifier is of the right kind *)
  let check_special name allowed s =
    let sp = Str.full_split (Str.regexp "#\\|\\.\\|:\\|\\*") s in
    try
      match List.find
      (function
        | Str.Delim d when not (List.mem d allowed) -> true
        | _ -> false
      ) sp
      with
      | Str.Delim wrong_char ->
       Error.build "The identifier '%s' is not a valid %s, the character '%s' is illegal" s name wrong_char
      | Str.Text _ -> Error.bug "[Grew_ast.check_special]"
    with
    | Not_found -> ()
34

35 36 37 38 39 40
  (* ---------------------------------------------------------------------- *)
  (* simple_ident: cat *)
  type simple_ident = Id.name
  let parse_simple_ident s = check_special "simple ident" [] s; s
  let is_simple_ident s = try ignore (parse_simple_ident s); true with _ -> false
  let dump_simple_ident name = name
41

bguillaum's avatar
bguillaum committed
42
  (* ---------------------------------------------------------------------- *)
43 44 45 46
  (* label_ident: D:mod.dis *)
  type label_ident = string
  let parse_label_ident s = check_special "label ident" [":"; "."] s; s
  let dump_label_ident name = name
47

48 49 50 51 52
  (* ---------------------------------------------------------------------- *)
  (* pattern_label_ident: D:mod.* *)
  type pattern_label_ident = string
  let parse_pattern_label_ident s = check_special "label ident" [":"; "."; "*"] s; s
  let dump_pattern_label_ident name = name
53

bguillaum's avatar
bguillaum committed
54
  (* ---------------------------------------------------------------------- *)
55 56 57 58 59 60 61 62
  (* feature_ident: V.cat *)
  type feature_ident = Id.name * feature_name
  let parse_feature_ident s =
    check_special "feature ident" ["."] s;
    match Str.full_split (Str.regexp "\\.") s with
    | [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, fn)
    | _ -> Error.build "The identifier '%s' must be a feature identifier (with exactly one '.' symbol, like \"V.cat\" for instance)" s
  let dump_feature_ident (name, feat_name) = sprintf "%s.%s" name feat_name
63 64


bguillaum's avatar
bguillaum committed
65
  (* ---------------------------------------------------------------------- *)
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
  (* command_node_id: V, V#alpha *)
  type command_node_ident =
    | No_sharp of Id.name
    | Sharp of Id.name * string

  let parse_command_node_ident s =
    check_special "feature ident" ["#"] s;
    match Str.full_split (Str.regexp "#") s with
    | [Str.Text base; Str.Delim "#"; Str.Text ext] -> Sharp (base, ext)
    | [Str.Text base] -> No_sharp base
    | _ -> Error.build "The identifier '%s' must be a command node identifier (with at most one '#' symbol)" s

  let dump_command_node_ident = function
    | No_sharp x -> x
    | Sharp (x,y) -> x ^ "#" ^ y
81 82


83 84 85
  let base_command_node_ident = function
    | No_sharp x -> x
    | Sharp (x,y) -> x
86

bguillaum's avatar
bguillaum committed
87
  (* ---------------------------------------------------------------------- *)
88 89
  (* command_feature_ident: V.cat, V#alpha.cat *)
  type command_feature_ident = command_node_ident * feature_name
90

91 92 93 94 95 96
  let parse_command_feature_ident s =
    check_special "feature ident" ["."; "#"] s;
    match Str.full_split (Str.regexp "#\\|\\.") s with
    | [Str.Text base; Str.Delim "#"; Str.Text ext; Str.Delim "."; Str.Text feature_name] -> (Sharp (base, ext), feature_name)
    | [Str.Text base; Str.Delim "."; Str.Text feature_name] -> (No_sharp base, feature_name)
    | _ -> Error.build "The identifier '%s' must be a command feature identifier (with exactly one '.' symbol and at most one '#' symbol in the left part)" s
97

98 99 100
  let dump_command_feature_ident = function
    | (No_sharp base, feature_name) -> sprintf "%s.%s" base feature_name
    | (Sharp (base,ext), feature_name) -> sprintf "%s#%s.%s" base ext feature_name
101

102
  (* ---------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
103
  type feature_kind =
104 105
    | Equality of feature_value list
    | Disequality of feature_value list
bguillaum's avatar
bguillaum committed
106
    | Equal_param of string (* $ident *)
bguillaum's avatar
bguillaum committed
107
    | Absent
bguillaum's avatar
bguillaum committed
108

bguillaum's avatar
bguillaum committed
109
  type u_feature = {
110 111 112
    name: feature_name;
    kind: feature_kind;
  }
bguillaum's avatar
bguillaum committed
113
  type feature = u_feature * Loc.t
114

bguillaum's avatar
bguillaum committed
115
  type u_node = {
bguillaum's avatar
bguillaum committed
116 117 118 119
    node_id: Id.name;
    position: float option;
    fs: feature list;
  }
bguillaum's avatar
bguillaum committed
120
  type node = u_node * Loc.t
121 122 123

  type edge_label = string

bguillaum's avatar
bguillaum committed
124 125 126
  (* (list of edge_label separated by '|', bool true iff it is a negative constraint) *)
  type edge_label_cst = edge_label list * bool

bguillaum's avatar
bguillaum committed
127
  type u_edge = {
bguillaum's avatar
bguillaum committed
128 129
    edge_id: Id.name option;
    src: Id.name;
bguillaum's avatar
bguillaum committed
130
    edge_label_cst: edge_label_cst;
bguillaum's avatar
bguillaum committed
131 132
    tar: Id.name;
  }
bguillaum's avatar
bguillaum committed
133
  type edge = u_edge * Loc.t
bguillaum's avatar
bguillaum committed
134 135 136 137 138 139 140 141 142

  type ineq = Lt | Gt | Le | Ge

  let string_of_ineq = function
    | Lt -> "<"
    | Gt -> ">"
    | Le -> "≤"
    | Ge -> "≥"

143
  type u_const =
144 145
    | Cst_out of Id.name * edge_label_cst
    | Cst_in of Id.name * edge_label_cst
146 147 148
    | Feature_eq of feature_ident * feature_ident
    | Feature_diseq of feature_ident * feature_ident
    | Feature_ineq of ineq * feature_ident * feature_ident
bguillaum's avatar
bguillaum committed
149
  type const = u_const * Loc.t
150

bguillaum's avatar
bguillaum committed
151
  type basic = {
bguillaum's avatar
bguillaum committed
152 153 154 155
    pat_nodes: node list;
    pat_edges: edge list;
    pat_const: const list;
  }
bguillaum's avatar
bguillaum committed
156

157 158 159 160 161
  type pattern = {
    pat_pos: basic;
    pat_negs: basic list;
  }

162
  type graph = {
bguillaum's avatar
bguillaum committed
163 164 165
    nodes: (Id.name * node) list;
    edge: edge list;
  }
166

bguillaum's avatar
bguillaum committed
167
  type concat_item =
168
    | Qfn_item of feature_ident
bguillaum's avatar
bguillaum committed
169
    | String_item of string
170
    | Param_item of string
bguillaum's avatar
bguillaum committed
171

bguillaum's avatar
bguillaum committed
172
  type u_command =
173
    | Del_edge_expl of (command_node_ident * command_node_ident * edge_label)
bguillaum's avatar
bguillaum committed
174
    | Del_edge_name of string
175
    | Add_edge of (command_node_ident * command_node_ident * edge_label)
176 177

    (* 4 args: source, target, labels, flag true iff negative cst *)
bguillaum's avatar
bguillaum committed
178 179 180
    | Shift_in of (command_node_ident * command_node_ident * edge_label_cst)
    | Shift_out of (command_node_ident * command_node_ident * edge_label_cst)
    | Shift_edge of (command_node_ident * command_node_ident * edge_label_cst)
181

182 183 184 185 186 187 188
    | Merge_node of (command_node_ident * command_node_ident)
    | New_neighbour of (Id.name * command_node_ident * edge_label)
    | Del_node of command_node_ident
    | Activate of command_node_ident

    | Del_feat of command_feature_ident
    | Update_feat of command_feature_ident * concat_item list
bguillaum's avatar
bguillaum committed
189 190
  type command = u_command * Loc.t

191
  (* the [rule] type is used for 3 kinds of module items:
192 193 194
     - rule     { param=None; ... }
     - lex_rule
     - filter   { param=None; commands=[]; ... }
bguillaum's avatar
bguillaum committed
195
  *)
bguillaum's avatar
bguillaum committed
196
  type rule = {
bguillaum's avatar
bguillaum committed
197
    rule_id:Id.name;
bguillaum's avatar
bguillaum committed
198 199
    pos_basic: basic;
    neg_basics: basic list;
bguillaum's avatar
bguillaum committed
200 201
    commands: command list;
    param: (string list * string list) option;
bguillaum's avatar
bguillaum committed
202
    lex_par: string list option;
bguillaum's avatar
bguillaum committed
203 204 205
    rule_doc:string list;
    rule_loc: Loc.t;
  }
206

bguillaum's avatar
bguillaum committed
207
  type modul = {
bguillaum's avatar
bguillaum committed
208 209
    module_id:Id.name;
    local_labels: (string * string list) list;
210
    suffixes: string list;
bguillaum's avatar
bguillaum committed
211 212 213 214 215 216 217
    rules: rule list;
    confluent: bool;
    module_doc:string list;
    mod_loc:Loc.t;
    mod_dir: string; (* the directory where the module is defined (for lp file localisation) *)
  }

218
  type old_sequence = {
bguillaum's avatar
bguillaum committed
219 220 221 222 223 224
    seq_name:string;
    seq_mod:string list;
    seq_doc:string list;
    seq_loc:Loc.t;
  }

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
  type new_sequence =
    | Ref of string
    | List of new_sequence list
    | Plus of new_sequence list
    | Star of new_sequence
    | Diamond of new_sequence

  let rec new_sequence_to_string = function
  | Ref m -> m
  | List l -> "[" ^ (String.concat "; " (List.map new_sequence_to_string l)) ^ "]"
  | Plus l -> "[" ^ (String.concat "+" (List.map new_sequence_to_string l)) ^ "]"
  | Star s -> "[" ^ (new_sequence_to_string s) ^"]"  ^ "*"
  | Diamond s -> "◇" ^ "[" ^(new_sequence_to_string s)^"]"

  let rec flatten = function
  | Ref m -> Ref m
  | Star s -> Star (flatten s)
  | Diamond s -> Diamond (flatten s)
  | List l ->
    let fl = List.map flatten l in
    let rec loop = function
    | [] -> []
    | (List l) :: tail -> l @ (loop tail)
    | x :: tail -> x :: (loop tail)
    in List (loop fl)
  | Plus l ->
    let fl = List.map flatten l in
    let rec loop = function
    | [] -> []
    | (Plus l) :: tail -> l @ (loop tail)
    | x :: tail -> x :: (loop tail)
    in Plus (loop fl)

  type sequence =
  | Old of old_sequence
  | New of ((string * Loc.t) * new_sequence)

bguillaum's avatar
bguillaum committed
262 263
  (** a GRS: graph rewriting system *)
  type module_or_include =
bguillaum's avatar
bguillaum committed
264
    | Modul of modul
265
    | Includ of (string * Loc.t)
bguillaum's avatar
bguillaum committed
266 267

  type grs_with_include = {
bguillaum's avatar
bguillaum committed
268
    domain_wi: Domain.t;
bguillaum's avatar
bguillaum committed
269 270 271 272
    labels_wi: (string * string list) list;    (* the list of global edge labels *)
    modules_wi: module_or_include list;
    sequences_wi: sequence list;
  }
bguillaum's avatar
bguillaum committed
273 274

  type grs = {
bguillaum's avatar
bguillaum committed
275
    domain: Domain.t;
bguillaum's avatar
bguillaum committed
276 277 278 279
    labels: (string * string list) list;
    modules: modul list;
    sequences: sequence list;
  }
bguillaum's avatar
bguillaum committed
280 281

  type gr = {
282 283 284 285
    meta: (string * string) list;
    nodes: node list;
    edges: edge list;
  }
bguillaum's avatar
bguillaum committed
286 287

  let empty_grs = { domain = []; labels = []; modules = []; sequences= [] }
bguillaum's avatar
bguillaum committed
288

289
end (* module Ast *)