grew_grs.ml 13.1 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12 13
open Printf
open Log

14
open Grew_fs
bguillaum's avatar
bguillaum committed
15
open Grew_base
16
open Grew_types
bguillaum's avatar
bguillaum committed
17
open Grew_ast
18
open Grew_edge
bguillaum's avatar
bguillaum committed
19 20 21
open Grew_command
open Grew_graph
open Grew_rule
bguillaum's avatar
bguillaum committed
22
open Grew_loader
pj2m's avatar
pj2m committed
23

bguillaum's avatar
bguillaum committed
24
(* ================================================================================ *)
pj2m's avatar
pj2m committed
25 26
module Rewrite_history = struct
  type t = {
27 28 29 30 31
    instance: Instance.t;
    module_name: string;
    good_nf: t list;
    bad_nf: Instance.t list;
  }
pj2m's avatar
pj2m committed
32

33 34 35 36 37
  let rec get_graphs = function
    | { good_nf = []; bad_nf = []; instance } -> [instance.Instance.graph]
    | { good_nf = [] } -> []
    | { good_nf = l} -> List_.flat_map get_graphs l

38
  let rec is_empty t =
bguillaum's avatar
bguillaum committed
39
    (t.instance.Instance.rules = []) && List.for_all is_empty t.good_nf
bguillaum's avatar
bguillaum committed
40 41

  let rec num_sol = function
42 43
    | { good_nf = []; bad_nf = [] } -> 1
    | { good_nf = [] } -> 0 (* dead branch *)
bguillaum's avatar
bguillaum committed
44
    | { good_nf = l} -> List.fold_left (fun acc t -> acc + (num_sol t)) 0 l
45

bguillaum's avatar
bguillaum committed
46
  let save_nfs ?domain ?filter ?main_feat ~dot base_name t =
47
    let rec loop file_name rules t =
48
      match (t.good_nf, t.bad_nf) with
bguillaum's avatar
bguillaum committed
49 50
        | [],[] when dot -> Instance.save_dot_png ?domain ?filter ?main_feat file_name t.instance; [rules, file_name]
        | [],[] -> ignore (Instance.save_dep_png ?domain ?filter ?main_feat file_name t.instance); [rules, file_name]
51 52
        | [],_ -> []
        | l, _ ->
53
          List_.foldi_left
54
            (fun i acc son ->
55
              (* Instance.save_dep_png ?main_feat (sprintf "%s_%d" file_name i) son.instance; *)
56 57 58 59
              let nfs = loop
                (sprintf "%s_%d" file_name i)
                (rules @ [t.module_name, son.instance.Instance.rules])
                son in
60 61 62 63 64
              nfs @ acc
            )
            [] l
    in loop base_name [] t

bguillaum's avatar
bguillaum committed
65
  let save_gr ?domain base t =
66 67
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
bguillaum's avatar
bguillaum committed
68
        | [],[] -> File.write (Instance.to_gr ?domain t.instance) (file_name^".gr")
69
        | l, _ -> List.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
70 71
    in loop base t

bguillaum's avatar
bguillaum committed
72
  let save_conll ?domain base t =
bguillaum's avatar
bguillaum committed
73 74
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
bguillaum's avatar
bguillaum committed
75
        | [],[] -> File.write (Instance.to_conll_string ?domain t.instance) (file_name^".conll")
76
        | l, _ -> List.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
bguillaum's avatar
bguillaum committed
77 78
    in loop base t

bguillaum's avatar
bguillaum committed
79
  let save_full_conll ?domain base t =
80 81 82 83
    let cpt = ref 0 in
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> 
bguillaum's avatar
bguillaum committed
84
          File.write (Instance.to_conll_string ?domain t.instance) (sprintf "%s__%d.conll" base !cpt);
85 86
          incr cpt
        | l, _ -> List.iter loop l
87
    in loop t; !cpt
88

89
  (* suppose that all modules are confluent and produced exacly one normal form *)
bguillaum's avatar
bguillaum committed
90
  let save_det_gr ?domain base t =
91 92
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
bguillaum's avatar
bguillaum committed
93
        | [],[] -> File.write (Instance.to_gr ?domain t.instance) (base^".gr")
94
        | [one], [] -> loop one
95
        | _ -> Error.run "[save_det_gr] Not a single rewriting"
96
    in loop t
bguillaum's avatar
bguillaum committed
97

bguillaum's avatar
bguillaum committed
98
  let save_annot ?domain out_dir base_name t =
99
    List.mapi
bguillaum's avatar
bguillaum committed
100 101 102 103 104
      (fun i alts ->
        match alts.good_nf with
      | [alt_1; alt_2] ->
        let a = sprintf "%s_%d_A" base_name i in
        let b = sprintf "%s_%d_B" base_name i in
bguillaum's avatar
bguillaum committed
105 106
        let hpa = Instance.save_dep_svg ?domain (Filename.concat out_dir a) alt_1.instance in
        let hpb = Instance.save_dep_svg ?domain (Filename.concat out_dir b) alt_2.instance in
bguillaum's avatar
bguillaum committed
107 108 109
        let (afn,apos) = G_graph.get_annot_info alt_1.instance.Instance.graph
        and (bfn,bpos) = G_graph.get_annot_info alt_2.instance.Instance.graph in
        (base_name,i,(afn,apos),(bfn,bpos),(hpa,hpb))
bguillaum's avatar
bguillaum committed
110 111 112
      | _ -> Error.run "Not two alternatives in an annotation rewriting in %s" base_name
      ) t.good_nf

bguillaum's avatar
bguillaum committed
113
  let save_det_conll ?domain ?header base t =
bguillaum's avatar
bguillaum committed
114 115
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
116 117 118
        | ([],[]) ->
          let output =
            match header with
bguillaum's avatar
bguillaum committed
119 120
              | Some h -> sprintf "%% %s\n%s" h (Instance.to_conll_string ?domain t.instance)
              | None -> Instance.to_conll_string ?domain t.instance in
121 122
          File.write output (base^".conll")
        | ([one], []) -> loop one
123
        | _ -> Error.run "[save_det_conll] Not a single rewriting"
bguillaum's avatar
bguillaum committed
124 125
    in loop t

bguillaum's avatar
bguillaum committed
126
  let det_dep_string ?domain t =
bguillaum's avatar
bguillaum committed
127 128 129 130
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] ->
          let graph = t.instance.Instance.graph in
bguillaum's avatar
bguillaum committed
131
          Some (G_graph.to_dep ?domain graph)
bguillaum's avatar
bguillaum committed
132
        | [one], [] -> loop one
bguillaum's avatar
bguillaum committed
133
        | _ -> None
bguillaum's avatar
bguillaum committed
134
    in loop t
bguillaum's avatar
bguillaum committed
135

bguillaum's avatar
bguillaum committed
136
  let conll_dep_string ?domain ?(keep_empty_rh=false) t =
bguillaum's avatar
bguillaum committed
137 138 139 140 141 142 143
    if (not keep_empty_rh) && is_empty t
    then None
    else
      let rec loop t =
        match (t.good_nf, t.bad_nf) with
          | [],[] ->
            let graph = t.instance.Instance.graph in
bguillaum's avatar
bguillaum committed
144
            Some (G_graph.to_conll_string ?domain graph)
bguillaum's avatar
bguillaum committed
145 146 147
          | [one], [] -> loop one
          | _ -> None
      in loop t
148
end (* module Rewrite_history *)
pj2m's avatar
pj2m committed
149

bguillaum's avatar
bguillaum committed
150
(* ================================================================================ *)
pj2m's avatar
pj2m committed
151 152
module Modul = struct
  type t = {
153
    name: string;
154
    local_labels: (string * string list) array;
155 156 157 158 159
    rules: Rule.t list;
    filters: Rule.t list;
    confluent: bool;
    loc: Loc.t;
  }
pj2m's avatar
pj2m committed
160

161 162 163 164
  let check t =
    (* check for duplicate rules *)
    let rec loop already_defined = function
      | [] -> ()
165 166
      | r::_ when List.mem (Rule.get_name r) already_defined ->
        Error.build ~loc:(Rule.get_loc r) "Rule '%s' is defined twice in the same module" (Rule.get_name r)
167 168
      | r::tail -> loop ((Rule.get_name r) :: already_defined) tail in
    loop [] t.rules
pj2m's avatar
pj2m committed
169

bguillaum's avatar
bguillaum committed
170
  let build ?domain ast_module =
171 172
    let locals = Array.of_list ast_module.Ast.local_labels in
    Array.sort compare locals;
bguillaum's avatar
bguillaum committed
173
    let rules_or_filters = List.map (Rule.build ?domain ~locals ast_module.Ast.mod_dir) ast_module.Ast.rules in
174 175
    let (filters, rules) = List.partition Rule.is_filter rules_or_filters in
    let modul =
176
      {
177 178
        name = ast_module.Ast.module_id;
        local_labels = locals;
179 180
        rules;
        filters;
181 182 183
        confluent = ast_module.Ast.confluent;
        loc = ast_module.Ast.mod_loc;
      } in
184
    check modul; modul
185
end (* module Modul *)
186

bguillaum's avatar
bguillaum committed
187
(* ================================================================================ *)
188 189
module Sequence = struct
  type t = {
190 191 192 193
    name: string;
    def: string list;
    loc: Loc.t;
  }
194 195 196

  let check module_list t =
    List.iter
197
      (fun module_name ->
198
        if not (List.exists (fun modul -> modul.Modul.name = module_name) module_list)
bguillaum's avatar
Typos  
bguillaum committed
199
        then Error.build ~loc:t.loc "sequence \"%s\" refers to the unknown module \"%s\"."
200
          t.name module_name
201 202
      ) t.def

203
  let build module_list ast_sequence =
204 205 206 207 208 209
    match ast_sequence with
    | Ast.New ((n,_),s) ->
    printf "----%s----> %s\n%!" n (Ast.new_sequence_to_string s);
    printf "====%s====> %s\n%!" n (Ast.new_sequence_to_string (Ast.flatten s));
    {name=n; def=[]; loc=Loc.file "No_file_given"; }
    | Ast.Old old_seq ->
210
    let sequence =
211
      {
212 213 214
        name = old_seq.Ast.seq_name;
        def = old_seq.Ast.seq_mod;
        loc = old_seq.Ast.seq_loc;
215
      } in
216
    check module_list sequence; sequence
217
end (* module Sequence *)
pj2m's avatar
pj2m committed
218

bguillaum's avatar
bguillaum committed
219
(* ================================================================================ *)
pj2m's avatar
pj2m committed
220
module Grs = struct
221

pj2m's avatar
pj2m committed
222
  type t = {
bguillaum's avatar
bguillaum committed
223
    domain: Domain.t option;
224
    modules: Modul.t list;       (* the ordered list of modules used from rewriting *)
225
    sequences: Sequence.t list;
bguillaum's avatar
bguillaum committed
226 227
    filename: string;
    ast: Ast.grs;
228 229
  }

bguillaum's avatar
bguillaum committed
230
  let get_modules t = t.modules
bguillaum's avatar
bguillaum committed
231 232
  let get_ast t = t.ast
  let get_filename t = t.filename
233
  let get_domain t = t.domain
234 235
  let sequence_names t = List.map (fun s -> s.Sequence.name) t.sequences

bguillaum's avatar
bguillaum committed
236
  let empty = {domain=None; modules=[]; sequences=[]; ast=Ast.empty_grs; filename=""; }
pj2m's avatar
pj2m committed
237

238 239 240 241
  let check t =
    (* check for duplicate modules *)
    let rec loop already_defined = function
      | [] -> ()
242 243
      | m::_ when List.mem m.Modul.name already_defined ->
        Error.build ~loc:m.Modul.loc "Module '%s' is defined twice" m.Modul.name
244 245 246 247 248 249
      | m::tail -> loop (m.Modul.name :: already_defined) tail in
    loop [] t.modules;

    (* check for duplicate sequences *)
    let rec loop already_defined = function
      | [] -> ()
250 251
      | s::_ when List.mem s.Sequence.name already_defined ->
        Error.build ~loc:s.Sequence.loc "Sequence '%s' is defined twice" s.Sequence.name
252 253 254
      | s::tail -> loop (s.Sequence.name :: already_defined) tail in
    loop [] t.sequences

bguillaum's avatar
bguillaum committed
255 256 257 258
  let domain_build= function
  | {Ast.label_domain=[]; feature_domain=[]} -> None
  | ast_domain -> Some (
      Domain.build
259 260
      (Label_domain.build ast_domain.Ast.label_domain)
      (Feature_domain.build ast_domain.Ast.feature_domain)
bguillaum's avatar
bguillaum committed
261
    )
262

bguillaum's avatar
bguillaum committed
263
  let build filename =
bguillaum's avatar
bguillaum committed
264
    let ast = Loader.grs filename in
265
    let domain = domain_build ast.Ast.domain in
bguillaum's avatar
bguillaum committed
266
    let modules = List.map (Modul.build ?domain) ast.Ast.modules in
267
    let grs = {domain; sequences = List.map (Sequence.build modules) ast.Ast.sequences; modules; ast; filename} in
268 269
    check grs;
    grs
pj2m's avatar
pj2m committed
270

bguillaum's avatar
bguillaum committed
271
  (* compute the list of modules to apply for a requested sentence *)
bguillaum's avatar
bguillaum committed
272
  let modules_of_sequence grs sequence =
bguillaum's avatar
bguillaum committed
273 274 275 276
    try
      let seq = List.find (fun s -> s.Sequence.name = sequence) grs.sequences in
      List.map (fun name -> List.find (fun m -> m.Modul.name=name) grs.modules) seq.Sequence.def
    with Not_found ->
277
      try
bguillaum's avatar
bguillaum committed
278 279 280 281 282 283 284 285
        let modul = List.find (fun m -> m.Modul.name=sequence) grs.modules in
        Log.fwarning "\"%s\" is a module but not a senquence, only this module is used" sequence; [modul]
      with Not_found ->
        match grs.sequences with
        | head::_ ->
          Log.fwarning "No sequence and no module named \"%s\", the first sequence \"%s\" is used" sequence head.Sequence.name;
          List.map (fun name -> List.find (fun m -> m.Modul.name=name) grs.modules) head.Sequence.def
        | _ -> Error.run "No sequence defined and no module named \"%s\", cannot go on" sequence
bguillaum's avatar
bguillaum committed
286

287 288
  let rewrite grs sequence graph =
    let instance = Instance.from_graph graph in
bguillaum's avatar
bguillaum committed
289
    Timeout.start ();
bguillaum's avatar
bguillaum committed
290
    let modules_to_apply = modules_of_sequence grs sequence in
291

292 293
    let rec loop instance module_list =
      match module_list with
294 295 296 297 298
      | [] -> (* no more modules to apply *)
        {Rewrite_history.instance = instance; module_name = ""; good_nf = []; bad_nf = []; }
      | next::tail ->
        let (good_set, bad_set) =
          Rule.normalize
bguillaum's avatar
bguillaum committed
299
            ?domain: grs.domain
300 301 302 303
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
304
            (Instance.refresh instance) in
305 306 307 308 309 310 311 312
        let good_list = Instance_set.elements good_set
        and bad_list = Instance_set.elements bad_set in
        {
          Rewrite_history.instance = instance;
          module_name = next.Modul.name;
          good_nf = List.map (fun i -> loop i tail) good_list;
          bad_nf = bad_list;
        } in
bguillaum's avatar
bguillaum committed
313
    loop instance modules_to_apply
314

315 316
  let build_rew_display grs sequence graph =
    let instance = Instance.from_graph graph in
bguillaum's avatar
bguillaum committed
317
    let modules_to_apply = modules_of_sequence grs sequence in
pj2m's avatar
pj2m committed
318

319 320
    let rec loop instance module_list =
      match module_list with
bguillaum's avatar
bguillaum committed
321
      | [] -> Libgrew_types.Leaf instance.Instance.graph
322
      | next :: tail ->
323 324
        let (good_set, bad_set) =
          Rule.normalize
bguillaum's avatar
bguillaum committed
325
            ?domain: grs.domain
326 327 328 329
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
330
            (Instance.refresh instance) in
331
        let inst_list = Instance_set.elements good_set
332
              (* and bad_list = Instance_set.elements bad_set *) in
pj2m's avatar
pj2m committed
333

334
        match inst_list with
335
          | [{Instance.big_step = None}] ->
bguillaum's avatar
bguillaum committed
336 337
            Libgrew_types.Local_normal_form (instance.Instance.graph, next.Modul.name, loop instance tail)
          | _ -> Libgrew_types.Node
338 339 340 341 342 343 344 345 346 347
            (
              instance.Instance.graph,
              next.Modul.name,
              List.map
                (fun inst ->
                  match inst.Instance.big_step with
                    | None -> Error.bug "Cannot have no big_steps and more than one reducts at the same time"
                    | Some bs -> (bs, loop inst tail)
                ) inst_list
            )
pj2m's avatar
pj2m committed
348
    in loop instance modules_to_apply
bguillaum's avatar
bguillaum committed
349

bguillaum's avatar
bguillaum committed
350 351 352
  let rule_iter fct grs =
    List.iter
      (fun modul ->
bguillaum's avatar
bguillaum committed
353 354 355 356 357 358 359
        List.iter (fun rule -> fct modul.Modul.name rule) modul.Modul.rules
      ) grs.modules

  let filter_iter fct grs =
    List.iter
      (fun modul ->
        List.iter (fun filter -> fct modul.Modul.name filter) modul.Modul.filters
bguillaum's avatar
bguillaum committed
360
      ) grs.modules
361
end (* module Grs *)