grew_ast.ml 12.9 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11 12
open Printf
open Log
bguillaum's avatar
bguillaum committed
13
open Grew_base
bguillaum's avatar
bguillaum committed
14
open Grew_types
bguillaum's avatar
bguillaum committed
15

16
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
17
module Ast = struct
18

19
  (* general function for checking that an identifier is of the right kind *)
20
  (* allowed is a char list which is a sub set of ['#'; '.'; ':'; '*'] *)
21 22 23 24 25 26 27 28 29 30 31 32 33 34
  let check_special name allowed s =
    let sp = Str.full_split (Str.regexp "#\\|\\.\\|:\\|\\*") s in
    try
      match List.find
      (function
        | Str.Delim d when not (List.mem d allowed) -> true
        | _ -> false
      ) sp
      with
      | Str.Delim wrong_char ->
       Error.build "The identifier '%s' is not a valid %s, the character '%s' is illegal" s name wrong_char
      | Str.Text _ -> Error.bug "[Grew_ast.check_special]"
    with
    | Not_found -> ()
35

36 37 38 39 40 41
  (* ---------------------------------------------------------------------- *)
  (* simple_ident: cat *)
  type simple_ident = Id.name
  let parse_simple_ident s = check_special "simple ident" [] s; s
  let is_simple_ident s = try ignore (parse_simple_ident s); true with _ -> false
  let dump_simple_ident name = name
42

bguillaum's avatar
bguillaum committed
43
  (* ---------------------------------------------------------------------- *)
44 45 46 47
  (* label_ident: D:mod.dis *)
  type label_ident = string
  let parse_label_ident s = check_special "label ident" [":"; "."] s; s
  let dump_label_ident name = name
48

49 50 51 52 53
  (* ---------------------------------------------------------------------- *)
  (* pattern_label_ident: D:mod.* *)
  type pattern_label_ident = string
  let parse_pattern_label_ident s = check_special "label ident" [":"; "."; "*"] s; s
  let dump_pattern_label_ident name = name
54

55 56 57 58 59 60
  (* ---------------------------------------------------------------------- *)
  (* node_ident: W0.5 *)
  type node_ident = string
  let parse_node_ident s = check_special "node ident" ["."] s; s
  let dump_node_ident name = name

bguillaum's avatar
bguillaum committed
61
  (* ---------------------------------------------------------------------- *)
62 63
  (* feature_ident: V.cat *)
  type feature_ident = Id.name * feature_name
64 65
  let dump_feature_ident (name, feat_name) = sprintf "%s.%s" name feat_name

66 67 68 69 70
  let parse_feature_ident s =
    check_special "feature ident" ["."] s;
    match Str.full_split (Str.regexp "\\.") s with
    | [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, fn)
    | _ -> Error.build "The identifier '%s' must be a feature identifier (with exactly one '.' symbol, like \"V.cat\" for instance)" s
71

bguillaum's avatar
bguillaum committed
72 73 74 75 76 77
  (* ---------------------------------------------------------------------- *)
  (* simple_or_feature_ident: union of simple_ident and feature_ident *)
  (* Note: used for parsing of "X < Y" and "X.feat < Y.feat" without conflicts *)
  type simple_or_feature_ident = Id.name * feature_name option

  let parse_simple_or_feature_ident s =
78 79
    check_special "feature ident" ["."] s;
    match Str.full_split (Str.regexp "\\.") s with
bguillaum's avatar
bguillaum committed
80 81 82
    | [Str.Text base; ] -> (base, None)
    | [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, Some fn)
    | _ -> Error.build "The identifier '%s' must be a feature identifier (with at most one '.' symbol, like \"V\" or \"V.cat\" for instance)" s
83 84


85
  (* ---------------------------------------------------------------------- *)
bguillaum's avatar
bguillaum committed
86
  type feature_kind =
87 88
    | Equality of feature_value list
    | Disequality of feature_value list
bguillaum's avatar
bguillaum committed
89
    | Equal_param of string (* $ident *)
bguillaum's avatar
bguillaum committed
90
    | Absent
bguillaum's avatar
bguillaum committed
91

bguillaum's avatar
bguillaum committed
92
  type u_feature = {
93 94 95
    name: feature_name;
    kind: feature_kind;
  }
bguillaum's avatar
bguillaum committed
96
  type feature = u_feature * Loc.t
97

bguillaum's avatar
bguillaum committed
98
  type u_node = {
bguillaum's avatar
bguillaum committed
99 100 101 102
    node_id: Id.name;
    position: float option;
    fs: feature list;
  }
bguillaum's avatar
bguillaum committed
103
  type node = u_node * Loc.t
104

105 106
  let grewpy_compare (n1,_) (n2,_) = Id.grewpy_compare n1.node_id n2.node_id

107 108
  type edge_label = string

109 110 111 112
  type edge_label_cst =
    | Pos_list of edge_label list (*  X|Y|Z    *)
    | Neg_list of edge_label list (*  ^X|Y|Z   *)
    | Regexp of string            (*  re"a.*"  *)
bguillaum's avatar
bguillaum committed
113

bguillaum's avatar
bguillaum committed
114
  type u_edge = {
bguillaum's avatar
bguillaum committed
115 116
    edge_id: Id.name option;
    src: Id.name;
bguillaum's avatar
bguillaum committed
117
    edge_label_cst: edge_label_cst;
bguillaum's avatar
bguillaum committed
118 119
    tar: Id.name;
  }
bguillaum's avatar
bguillaum committed
120
  type edge = u_edge * Loc.t
bguillaum's avatar
bguillaum committed
121 122 123 124 125 126 127 128 129

  type ineq = Lt | Gt | Le | Ge

  let string_of_ineq = function
    | Lt -> "<"
    | Gt -> ">"
    | Le -> "≤"
    | Ge -> "≥"

130
  type u_const =
131 132
    | Cst_out of Id.name * edge_label_cst
    | Cst_in of Id.name * edge_label_cst
133 134 135
    | Feature_eq of feature_ident * feature_ident
    | Feature_diseq of feature_ident * feature_ident
    | Feature_ineq of ineq * feature_ident * feature_ident
136
    | Feature_ineq_cst of ineq * feature_ident * float
bguillaum's avatar
bguillaum committed
137
    | Feature_re of feature_ident * string
bguillaum's avatar
bguillaum committed
138 139
    | Prec of Id.name * Id.name
    | Lprec of Id.name * Id.name
bguillaum's avatar
bguillaum committed
140
  type const = u_const * Loc.t
141

bguillaum's avatar
bguillaum committed
142
  type basic = {
bguillaum's avatar
bguillaum committed
143 144 145 146
    pat_nodes: node list;
    pat_edges: edge list;
    pat_const: const list;
  }
bguillaum's avatar
bguillaum committed
147

148 149 150 151 152
  type pattern = {
    pat_pos: basic;
    pat_negs: basic list;
  }

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
  let add_implicit_node loc aux name pat_nodes =
    if (List.exists (fun ({node_id},_) -> node_id=name) pat_nodes)
    || (List.exists (fun ({node_id},_) -> node_id=name) aux)
    then pat_nodes
    else ({node_id=name; position=None; fs=[]}, loc) :: pat_nodes

  let complete_basic aux {pat_nodes; pat_edges; pat_const} =
    let pat_nodes_2 = List.fold_left
    (fun acc ({src; tar}, loc) ->
      acc
      |> (add_implicit_node loc aux src)
      |> (add_implicit_node loc aux tar)
    ) pat_nodes pat_edges in

    let pat_nodes_3 = List.fold_left
    (fun acc (u_const, loc) -> match u_const with
      | Feature_eq ((name1,_), (name2,_))
      | Feature_diseq ((name1,_), (name2,_))
bguillaum's avatar
bguillaum committed
171
      | Feature_ineq (_, (name1,_), (name2,_))
172 173
      | Prec (name1, name2)
      | Lprec (name1, name2) ->
174 175 176
        acc
        |> (add_implicit_node loc aux name1)
        |> (add_implicit_node loc aux name2)
177 178 179 180
      | Feature_ineq_cst (_, (name,_), _)
      | Feature_re ((name,_), _) ->
        acc
        |> (add_implicit_node loc aux name)
181 182 183 184 185 186 187 188 189 190 191
      | _ -> acc
    ) pat_nodes_2 pat_const in

    {pat_nodes=pat_nodes_3; pat_edges; pat_const}

  let complete_pattern pattern =
    let new_pat_pos = complete_basic [] pattern.pat_pos in
    let aux = new_pat_pos.pat_nodes in
    let new_pat_negs = List.map (complete_basic aux) pattern.pat_negs in
    { pat_pos = new_pat_pos; pat_negs = new_pat_negs;}

192
  type graph = {
bguillaum's avatar
bguillaum committed
193 194 195
    nodes: (Id.name * node) list;
    edge: edge list;
  }
196

bguillaum's avatar
bguillaum committed
197
  type concat_item =
198
    | Qfn_item of feature_ident
bguillaum's avatar
bguillaum committed
199
    | String_item of string
200
    | Param_item of string
bguillaum's avatar
bguillaum committed
201

bguillaum's avatar
bguillaum committed
202 203 204 205 206
  let string_of_concat_item = function
    | Qfn_item id -> sprintf "%s" (dump_feature_ident id)
    | String_item s -> sprintf "\"%s\"" s
    | Param_item var -> sprintf "%s" var

bguillaum's avatar
bguillaum committed
207
  type u_command =
bguillaum's avatar
bguillaum committed
208
    | Del_edge_expl of (Id.name * Id.name * edge_label)
bguillaum's avatar
bguillaum committed
209
    | Del_edge_name of string
bguillaum's avatar
bguillaum committed
210
    | Add_edge of (Id.name * Id.name * edge_label)
211 212

    (* 4 args: source, target, labels, flag true iff negative cst *)
bguillaum's avatar
bguillaum committed
213 214 215
    | Shift_in of (Id.name * Id.name * edge_label_cst)
    | Shift_out of (Id.name * Id.name * edge_label_cst)
    | Shift_edge of (Id.name * Id.name * edge_label_cst)
216

bguillaum's avatar
bguillaum committed
217 218
    | Merge_node of (Id.name * Id.name)
    | New_neighbour of (Id.name * Id.name * edge_label)
bguillaum's avatar
bguillaum committed
219 220 221 222
    | New_node of Id.name
    | New_before of (Id.name * Id.name)
    | New_after of (Id.name * Id.name)

bguillaum's avatar
bguillaum committed
223
    | Del_node of Id.name
224

bguillaum's avatar
bguillaum committed
225 226
    | Del_feat of feature_ident
    | Update_feat of feature_ident * concat_item list
bguillaum's avatar
bguillaum committed
227 228
  type command = u_command * Loc.t

bguillaum's avatar
bguillaum committed
229 230 231 232 233 234 235
  let string_of_u_command u_command = match u_command with
    | Del_edge_expl (n1,n2,label) ->
      sprintf "del_edge %s -[%s]-> %s" n1 label n2
    | Del_edge_name name -> sprintf "del_edge %s" name
    | Add_edge (n1,n2,label) ->
      sprintf "add_edge %s -[%s]-> %s" n1 label n2

236
    | Shift_in (n1,n2,Neg_list []) ->
bguillaum's avatar
bguillaum committed
237
      sprintf "shift_in %s ==> %s" n1 n2
238
    | Shift_in (n1,n2,Pos_list labels) ->
bguillaum's avatar
bguillaum committed
239
      sprintf "shift_in %s =[%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
240
    | Shift_in (n1,n2,Neg_list labels) ->
bguillaum's avatar
bguillaum committed
241
      sprintf "shift_in %s =[^%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
242 243
    | Shift_in (n1,n2,Regexp re) ->
      sprintf "shift_in %s =[re\"%s\"]=> %s" n1 re n2
bguillaum's avatar
bguillaum committed
244

245
    | Shift_out (n1,n2,Neg_list []) ->
bguillaum's avatar
bguillaum committed
246
      sprintf "shift_out %s ==> %s" n1 n2
247
    | Shift_out (n1,n2,Pos_list labels) ->
bguillaum's avatar
bguillaum committed
248
      sprintf "shift_out %s =[%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
249
    | Shift_out (n1,n2,Neg_list labels) ->
bguillaum's avatar
bguillaum committed
250
      sprintf "shift_out %s =[^%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
251 252
    | Shift_out (n1,n2,Regexp re) ->
      sprintf "shift_out %s =[re\"%s\"]=> %s" n1 re n2
bguillaum's avatar
bguillaum committed
253

254 255

    | Shift_edge (n1,n2,Neg_list []) ->
bguillaum's avatar
bguillaum committed
256
      sprintf "shift %s ==> %s" n1 n2
257
    | Shift_edge (n1,n2,Pos_list labels) ->
bguillaum's avatar
bguillaum committed
258
      sprintf "shift %s =[%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
259
    | Shift_edge (n1,n2,Neg_list labels) ->
bguillaum's avatar
bguillaum committed
260
      sprintf "shift %s =[^%s]=> %s" n1 n2 (List_.to_string (fun x->x) "|" labels)
261 262
    | Shift_edge (n1,n2,Regexp re) ->
      sprintf "shift %s =[re\"%s\"]=> %s" n1 re n2
bguillaum's avatar
bguillaum committed
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280

    | Merge_node (n1,n2) -> sprintf "merge %s ==> %s" n1 n2
    | New_neighbour (n1,n2,label) -> sprintf "add_node %s: <-[%s]- %s" n1 label n2
    | New_node (n) -> sprintf "add_node %s" n
    | New_before (n1,n2) -> sprintf "add_node %s :< %s" n1 n2
    | New_after (n1,n2) -> sprintf "add_node %s :> %s" n1 n2
    | Del_node act_id -> sprintf "del_node %s" act_id
    | Update_feat ((act_id, feat_name),item_list) ->
      sprintf "%s.%s = %s" act_id feat_name (List_.to_string string_of_concat_item " + " item_list)
    | Del_feat (act_id, feat_name) ->
      sprintf "del_feat %s.%s" act_id feat_name

  let rec replace_new_neighbour = function
  | [] -> []
  | (New_neighbour (new_name, old_name, edge),loc) :: tail ->
    (New_after (new_name, old_name),loc) :: (Add_edge (old_name, new_name, edge),loc) :: (replace_new_neighbour tail)
  | head :: tail -> head :: (replace_new_neighbour tail)

281
  (* the [rule] type is used for 3 kinds of module items:
282 283 284
     - rule     { param=None; ... }
     - lex_rule
     - filter   { param=None; commands=[]; ... }
bguillaum's avatar
bguillaum committed
285
  *)
bguillaum's avatar
bguillaum committed
286
  type rule = {
bguillaum's avatar
bguillaum committed
287
    rule_id:Id.name;
288
    pattern: pattern;
bguillaum's avatar
bguillaum committed
289
    commands: command list;
290 291
    param: (string list * string list) option; (* (files, vars) *)
    lex_par: string list option; (* lexical parameters in the file *)
bguillaum's avatar
bguillaum committed
292 293 294
    rule_doc:string list;
    rule_loc: Loc.t;
  }
295

bguillaum's avatar
bguillaum committed
296
  type modul = {
bguillaum's avatar
bguillaum committed
297 298 299 300 301 302 303 304 305
    module_id:Id.name;
    local_labels: (string * string list) list;
    rules: rule list;
    confluent: bool;
    module_doc:string list;
    mod_loc:Loc.t;
    mod_dir: string; (* the directory where the module is defined (for lp file localisation) *)
  }

306
  type old_sequence = {
bguillaum's avatar
bguillaum committed
307 308 309 310 311 312
    seq_name:string;
    seq_mod:string list;
    seq_doc:string list;
    seq_loc:Loc.t;
  }

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
  type new_sequence =
    | Ref of string
    | List of new_sequence list
    | Plus of new_sequence list
    | Star of new_sequence
    | Diamond of new_sequence

  let rec new_sequence_to_string = function
  | Ref m -> m
  | List l -> "[" ^ (String.concat "; " (List.map new_sequence_to_string l)) ^ "]"
  | Plus l -> "[" ^ (String.concat "+" (List.map new_sequence_to_string l)) ^ "]"
  | Star s -> "[" ^ (new_sequence_to_string s) ^"]"  ^ "*"
  | Diamond s -> "◇" ^ "[" ^(new_sequence_to_string s)^"]"

  let rec flatten = function
  | Ref m -> Ref m
  | Star s -> Star (flatten s)
  | Diamond s -> Diamond (flatten s)
  | List l ->
    let fl = List.map flatten l in
    let rec loop = function
    | [] -> []
    | (List l) :: tail -> l @ (loop tail)
    | x :: tail -> x :: (loop tail)
    in List (loop fl)
  | Plus l ->
    let fl = List.map flatten l in
    let rec loop = function
    | [] -> []
    | (Plus l) :: tail -> l @ (loop tail)
    | x :: tail -> x :: (loop tail)
    in Plus (loop fl)

  type sequence =
  | Old of old_sequence
  | New of ((string * Loc.t) * new_sequence)

bguillaum's avatar
bguillaum committed
350 351
  (** a GRS: graph rewriting system *)
  type module_or_include =
bguillaum's avatar
bguillaum committed
352
    | Modul of modul
353
    | Includ of (string * Loc.t)
bguillaum's avatar
bguillaum committed
354

355 356 357 358 359 360 361
  type domain = {
      feature_domain: Feature_domain.feature_spec list;
      label_domain: (string * string list) list;
    }

  let empty_domain = { feature_domain=[]; label_domain=[] }

362 363 364
  type domain_wi = Dom of domain | Dom_file of string

  type grs_wi = {
bguillaum's avatar
bguillaum committed
365
    domain_wi: domain_wi option;
bguillaum's avatar
bguillaum committed
366 367 368
    modules_wi: module_or_include list;
    sequences_wi: sequence list;
  }
bguillaum's avatar
bguillaum committed
369 370

  type grs = {
bguillaum's avatar
bguillaum committed
371
    domain: domain option;
bguillaum's avatar
bguillaum committed
372 373 374
    modules: modul list;
    sequences: sequence list;
  }
bguillaum's avatar
bguillaum committed
375 376

  type gr = {
377
    meta: string list;
378 379 380
    nodes: node list;
    edges: edge list;
  }
bguillaum's avatar
bguillaum committed
381

bguillaum's avatar
bguillaum committed
382
  let empty_grs = { domain = None; modules = []; sequences= [] }
bguillaum's avatar
bguillaum committed
383

384
end (* module Ast *)