grew_grs.ml 11.8 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12 13
open Printf
open Log

14
open Grew_fs
15
open Libgrew_utils
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
open Grew_edge
bguillaum's avatar
bguillaum committed
18 19 20
open Grew_command
open Grew_graph
open Grew_rule
bguillaum's avatar
bguillaum committed
21
open Grew_parser
pj2m's avatar
pj2m committed
22

23
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
24 25
module Rewrite_history = struct
  type t = {
26 27 28 29 30
    instance: Instance.t;
    module_name: string;
    good_nf: t list;
    bad_nf: Instance.t list;
  }
pj2m's avatar
pj2m committed
31

32
  let rec is_empty t =
bguillaum's avatar
bguillaum committed
33
    (t.instance.Instance.rules = []) && List.for_all is_empty t.good_nf
bguillaum's avatar
bguillaum committed
34 35

  let rec num_sol = function
36 37
    | { good_nf = []; bad_nf = [] } -> 1
    | { good_nf = [] } -> 0 (* dead branch *)
bguillaum's avatar
bguillaum committed
38
    | { good_nf = l} -> List.fold_left (fun acc t -> acc + (num_sol t)) 0 l
39

40
  let save_nfs ?filter ?main_feat ~dot base_name t =
41
    let rec loop file_name rules t =
42
      match (t.good_nf, t.bad_nf) with
43
        | [],[] when dot -> Instance.save_dot_png ?filter ?main_feat file_name t.instance; [rules, file_name]
bguillaum's avatar
bguillaum committed
44
        | [],[] -> ignore (Instance.save_dep_png ?filter ?main_feat file_name t.instance); [rules, file_name]
45 46
        | [],_ -> []
        | l, _ ->
47
          List_.foldi_left
48
            (fun i acc son ->
49
              (* Instance.save_dep_png ?main_feat (sprintf "%s_%d" file_name i) son.instance; *)
50 51 52 53
              let nfs = loop
                (sprintf "%s_%d" file_name i)
                (rules @ [t.module_name, son.instance.Instance.rules])
                son in
54 55 56 57 58
              nfs @ acc
            )
            [] l
    in loop base_name [] t

59 60 61 62 63 64 65
  let save_gr base t =
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_gr t.instance) (file_name^".gr")
        | l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
    in loop base t

bguillaum's avatar
bguillaum committed
66 67 68 69 70 71 72
  let save_conll base t =
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_conll t.instance) (file_name^".conll")
        | l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
    in loop base t

73 74 75 76 77 78 79 80
  (* suppose that all modules are confluent and produced exacly one normal form *)
  let save_det_gr base t =
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_gr t.instance) (base^".gr")
        | [one], [] -> loop one
        | _ -> Error.run "Not a single rewriting"
    in loop t
bguillaum's avatar
bguillaum committed
81

bguillaum's avatar
bguillaum committed
82 83 84 85 86 87 88 89 90
  let save_annot out_dir base_name t =
    List_.mapi
      (fun i alts ->
        match alts.good_nf with
      | [alt_1; alt_2] ->
        let a = sprintf "%s_%d_A" base_name i in
        let b = sprintf "%s_%d_B" base_name i in
        let hpa = Instance.save_dep_svg (Filename.concat out_dir a) alt_1.instance in
        let hpb = Instance.save_dep_svg (Filename.concat out_dir b) alt_2.instance in
bguillaum's avatar
bguillaum committed
91 92 93
        let (afn,apos) = G_graph.get_annot_info alt_1.instance.Instance.graph
        and (bfn,bpos) = G_graph.get_annot_info alt_2.instance.Instance.graph in
        (base_name,i,(afn,apos),(bfn,bpos),(hpa,hpb))
bguillaum's avatar
bguillaum committed
94 95 96
      | _ -> Error.run "Not two alternatives in an annotation rewriting in %s" base_name
      ) t.good_nf

97
  let save_det_conll ?header base t =
bguillaum's avatar
bguillaum committed
98 99
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
100 101 102 103 104 105 106
        | ([],[]) ->
          let output =
            match header with
              | Some h -> sprintf "%% %s\n%s" h (Instance.to_conll t.instance)
              | None -> Instance.to_conll t.instance in
          File.write output (base^".conll")
        | ([one], []) -> loop one
bguillaum's avatar
bguillaum committed
107 108 109
        | _ -> Error.run "Not a single rewriting"
    in loop t

bguillaum's avatar
bguillaum committed
110 111 112 113 114
  let det_dep_string t =
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] ->
          let graph = t.instance.Instance.graph in
bguillaum's avatar
bguillaum committed
115
          Some (G_graph.to_dep graph)
bguillaum's avatar
bguillaum committed
116
        | [one], [] -> loop one
bguillaum's avatar
bguillaum committed
117
        | _ -> None
bguillaum's avatar
bguillaum committed
118
    in loop t
bguillaum's avatar
bguillaum committed
119 120 121 122 123 124 125 126 127 128 129 130 131

  let conll_dep_string ?(keep_empty_rh=false) t =
    if (not keep_empty_rh) && is_empty t
    then None
    else
      let rec loop t =
        match (t.good_nf, t.bad_nf) with
          | [],[] ->
            let graph = t.instance.Instance.graph in
            Some (G_graph.to_conll graph)
          | [one], [] -> loop one
          | _ -> None
      in loop t
132
end (* module Rewrite_history *)
pj2m's avatar
pj2m committed
133

134
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
135 136
module Modul = struct
  type t = {
137
    name: string;
138
    local_labels: (string * string list) array;
139
    suffixes: string list;
140 141 142 143 144
    rules: Rule.t list;
    filters: Rule.t list;
    confluent: bool;
    loc: Loc.t;
  }
pj2m's avatar
pj2m committed
145

146 147 148 149
  let check t =
    (* check for duplicate rules *)
    let rec loop already_defined = function
      | [] -> ()
150 151
      | r::_ when List.mem (Rule.get_name r) already_defined ->
        Error.build ~loc:(Rule.get_loc r) "Rule '%s' is defined twice in the same module" (Rule.get_name r)
152 153
      | r::tail -> loop ((Rule.get_name r) :: already_defined) tail in
    loop [] t.rules
pj2m's avatar
pj2m committed
154

155
  let build ast_module =
156 157
    let locals = Array.of_list ast_module.Ast.local_labels in
    Array.sort compare locals;
158 159
    let suffixes = ast_module.Ast.suffixes in
    let rules_or_filters = List.map (Rule.build ~locals suffixes ast_module.Ast.mod_dir) ast_module.Ast.rules in
160 161
    let (filters, rules) = List.partition Rule.is_filter rules_or_filters in
    let modul =
162
      {
163 164
        name = ast_module.Ast.module_id;
        local_labels = locals;
165 166 167
        suffixes;
        rules;
        filters;
168 169 170
        confluent = ast_module.Ast.confluent;
        loc = ast_module.Ast.mod_loc;
      } in
171
    check modul; modul
172
end (* module Modul *)
173

174
(* ==================================================================================================== *)
175 176
module Sequence = struct
  type t = {
177 178 179 180
    name: string;
    def: string list;
    loc: Loc.t;
  }
181 182 183

  let check module_list t =
    List.iter
184
      (fun module_name ->
185
        if not (List.exists (fun modul -> modul.Modul.name = module_name) module_list)
bguillaum's avatar
Typos  
bguillaum committed
186
        then Error.build ~loc:t.loc "sequence \"%s\" refers to the unknown module \"%s\"."
187
          t.name module_name
188 189
      ) t.def

190 191
  let build module_list ast_sequence =
    let sequence =
192
      {
193 194 195 196
        name = ast_sequence.Ast.seq_name;
        def = ast_sequence.Ast.seq_mod;
        loc = ast_sequence.Ast.seq_loc;
      } in
197
    check module_list sequence; sequence
198
end (* module Sequence *)
pj2m's avatar
pj2m committed
199

200
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
201
module Grs = struct
202

pj2m's avatar
pj2m committed
203
  type t = {
204 205
    labels: Label.t list;        (* the list of global edge labels *)
    modules: Modul.t list;       (* the ordered list of modules used from rewriting *)
206
    sequences: Sequence.t list;
bguillaum's avatar
bguillaum committed
207 208
    filename: string;
    ast: Ast.grs;
209 210
  }

bguillaum's avatar
bguillaum committed
211
  let get_modules t = t.modules
bguillaum's avatar
bguillaum committed
212 213
  let get_ast t = t.ast
  let get_filename t = t.filename
bguillaum's avatar
bguillaum committed
214

215 216
  let sequence_names t = List.map (fun s -> s.Sequence.name) t.sequences

bguillaum's avatar
bguillaum committed
217
  let empty = {labels=[]; modules=[]; sequences=[]; ast=Ast.empty_grs; filename=""; }
pj2m's avatar
pj2m committed
218

219 220 221 222
  let check t =
    (* check for duplicate modules *)
    let rec loop already_defined = function
      | [] -> ()
223 224
      | m::_ when List.mem m.Modul.name already_defined ->
        Error.build ~loc:m.Modul.loc "Module '%s' is defined twice" m.Modul.name
225 226 227 228 229 230
      | m::tail -> loop (m.Modul.name :: already_defined) tail in
    loop [] t.modules;

    (* check for duplicate sequences *)
    let rec loop already_defined = function
      | [] -> ()
231 232
      | s::_ when List.mem s.Sequence.name already_defined ->
        Error.build ~loc:s.Sequence.loc "Sequence '%s' is defined twice" s.Sequence.name
233 234 235
      | s::tail -> loop (s.Sequence.name :: already_defined) tail in
    loop [] t.sequences

bguillaum's avatar
bguillaum committed
236 237 238 239 240
  let build filename =
    let ast = Grew_parser.grs_of_file filename in
    Label.init ast.Ast.labels;
    Domain.init ast.Ast.domain;
    let modules = List.map Modul.build ast.Ast.modules in
241
    let grs = {
bguillaum's avatar
bguillaum committed
242 243 244
      labels = List.map (fun (l,_) -> Label.from_string l) ast.Ast.labels;
      sequences = List.map (Sequence.build modules) ast.Ast.sequences;
      modules; ast; filename;
245 246
    } in
    check grs; grs
pj2m's avatar
pj2m committed
247

bguillaum's avatar
bguillaum committed
248
  let modules_of_sequence grs sequence =
249
    let module_names =
250
      try
251 252
        let seq = List.find (fun s -> s.Sequence.name = sequence) grs.sequences in
        seq.Sequence.def
bguillaum's avatar
bguillaum committed
253 254
      with Not_found -> [sequence] in (* a module name can be used as a singleton sequence *)

255 256 257
    List.map
      (fun name ->
        try List.find (fun m -> m.Modul.name=name) grs.modules
bguillaum's avatar
bguillaum committed
258 259 260 261
        with Not_found -> Log.fcritical "No sequence or module named '%s'" name
      )
      module_names

262
  let rewrite grs sequence instance =
bguillaum's avatar
bguillaum committed
263
    Timeout.start ();
bguillaum's avatar
bguillaum committed
264
    let modules_to_apply = modules_of_sequence grs sequence in
265

266 267 268
    let rec loop instance module_list =
      let instance = {instance with Instance.graph = G_graph.normalize instance.Instance.graph} in
      match module_list with
269 270 271 272 273 274 275 276 277
      | [] -> (* no more modules to apply *)
        {Rewrite_history.instance = instance; module_name = ""; good_nf = []; bad_nf = []; }
      | next::tail ->
        let (good_set, bad_set) =
          Rule.normalize
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
278
            (Instance.flatten instance) in
279 280 281 282 283 284 285 286
        let good_list = Instance_set.elements good_set
        and bad_list = Instance_set.elements bad_set in
        {
          Rewrite_history.instance = instance;
          module_name = next.Modul.name;
          good_nf = List.map (fun i -> loop i tail) good_list;
          bad_nf = bad_list;
        } in
bguillaum's avatar
bguillaum committed
287
    loop instance modules_to_apply
288

bguillaum's avatar
bguillaum committed
289 290
  let build_rew_display grs sequence instance =
    let modules_to_apply = modules_of_sequence grs sequence in
pj2m's avatar
pj2m committed
291

292 293 294
    let rec loop instance module_list =
      let instance = {instance with Instance.graph = G_graph.normalize instance.Instance.graph} in
      match module_list with
pj2m's avatar
pj2m committed
295
      | [] -> Grew_types.Leaf instance.Instance.graph
296
      | next :: tail ->
297 298 299 300 301 302
        let (good_set, bad_set) =
          Rule.normalize
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
303
            (Instance.flatten instance) in
304
        let inst_list = Instance_set.elements good_set
305
              (* and bad_list = Instance_set.elements bad_set *) in
pj2m's avatar
pj2m committed
306

307
        match inst_list with
308
          | [{Instance.big_step = None}] ->
309
            Grew_types.Local_normal_form (instance.Instance.graph, next.Modul.name, loop instance tail)
310
          | _ -> Grew_types.Node
311 312 313 314 315 316 317 318 319 320
            (
              instance.Instance.graph,
              next.Modul.name,
              List.map
                (fun inst ->
                  match inst.Instance.big_step with
                    | None -> Error.bug "Cannot have no big_steps and more than one reducts at the same time"
                    | Some bs -> (bs, loop inst tail)
                ) inst_list
            )
pj2m's avatar
pj2m committed
321
    in loop instance modules_to_apply
bguillaum's avatar
bguillaum committed
322

bguillaum's avatar
bguillaum committed
323 324 325
  let rule_iter fct grs =
    List.iter
      (fun modul ->
bguillaum's avatar
bguillaum committed
326 327 328 329 330 331 332
        List.iter (fun rule -> fct modul.Modul.name rule) modul.Modul.rules
      ) grs.modules

  let filter_iter fct grs =
    List.iter
      (fun modul ->
        List.iter (fun filter -> fct modul.Modul.name filter) modul.Modul.filters
bguillaum's avatar
bguillaum committed
333
      ) grs.modules
334
end (* module Grs *)