grew_grs.ml 11.4 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11 12 13
open Printf
open Log

14
open Grew_fs
15
open Libgrew_utils
bguillaum's avatar
bguillaum committed
16
open Grew_ast
17
open Grew_edge
bguillaum's avatar
bguillaum committed
18 19 20
open Grew_command
open Grew_graph
open Grew_rule
bguillaum's avatar
bguillaum committed
21
open Grew_parser
pj2m's avatar
pj2m committed
22

23
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
24 25
module Rewrite_history = struct
  type t = {
26 27 28 29 30
    instance: Instance.t;
    module_name: string;
    good_nf: t list;
    bad_nf: Instance.t list;
  }
pj2m's avatar
pj2m committed
31

32
  let rec is_empty t =
bguillaum's avatar
bguillaum committed
33
    (t.instance.Instance.rules = []) && List.for_all is_empty t.good_nf
bguillaum's avatar
bguillaum committed
34 35

  let rec num_sol = function
36 37
    | { good_nf = []; bad_nf = [] } -> 1
    | { good_nf = [] } -> 0 (* dead branch *)
bguillaum's avatar
bguillaum committed
38
    | { good_nf = l} -> List.fold_left (fun acc t -> acc + (num_sol t)) 0 l
39

40
  let save_nfs ?filter ?main_feat ~dot base_name t =
41
    let rec loop file_name rules t =
42
      match (t.good_nf, t.bad_nf) with
43
        | [],[] when dot -> Instance.save_dot_png ?filter ?main_feat file_name t.instance; [rules, file_name]
bguillaum's avatar
bguillaum committed
44
        | [],[] -> ignore (Instance.save_dep_png ?filter ?main_feat file_name t.instance); [rules, file_name]
45 46
        | [],_ -> []
        | l, _ ->
47
          List_.foldi_left
48
            (fun i acc son ->
49
              (* Instance.save_dep_png ?main_feat (sprintf "%s_%d" file_name i) son.instance; *)
50 51 52 53
              let nfs = loop
                (sprintf "%s_%d" file_name i)
                (rules @ [t.module_name, son.instance.Instance.rules])
                son in
54 55 56 57 58
              nfs @ acc
            )
            [] l
    in loop base_name [] t

59 60 61 62 63 64 65
  let save_gr base t =
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_gr t.instance) (file_name^".gr")
        | l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
    in loop base t

bguillaum's avatar
bguillaum committed
66 67 68 69 70 71 72
  let save_conll base t =
    let rec loop file_name t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_conll t.instance) (file_name^".conll")
        | l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
    in loop base t

73 74 75 76 77 78 79 80
  (* suppose that all modules are confluent and produced exacly one normal form *)
  let save_det_gr base t =
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] -> File.write (Instance.to_gr t.instance) (base^".gr")
        | [one], [] -> loop one
        | _ -> Error.run "Not a single rewriting"
    in loop t
bguillaum's avatar
bguillaum committed
81

bguillaum's avatar
bguillaum committed
82 83 84 85 86 87 88 89 90
  let save_annot out_dir base_name t =
    List_.mapi
      (fun i alts ->
        match alts.good_nf with
      | [alt_1; alt_2] ->
        let a = sprintf "%s_%d_A" base_name i in
        let b = sprintf "%s_%d_B" base_name i in
        let hpa = Instance.save_dep_svg (Filename.concat out_dir a) alt_1.instance in
        let hpb = Instance.save_dep_svg (Filename.concat out_dir b) alt_2.instance in
bguillaum's avatar
bguillaum committed
91 92 93
        let (afn,apos) = G_graph.get_annot_info alt_1.instance.Instance.graph
        and (bfn,bpos) = G_graph.get_annot_info alt_2.instance.Instance.graph in
        (base_name,i,(afn,apos),(bfn,bpos),(hpa,hpb))
bguillaum's avatar
bguillaum committed
94 95 96
      | _ -> Error.run "Not two alternatives in an annotation rewriting in %s" base_name
      ) t.good_nf

97
  let save_det_conll ?header base t =
bguillaum's avatar
bguillaum committed
98 99
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
100 101 102 103 104 105 106
        | ([],[]) ->
          let output =
            match header with
              | Some h -> sprintf "%% %s\n%s" h (Instance.to_conll t.instance)
              | None -> Instance.to_conll t.instance in
          File.write output (base^".conll")
        | ([one], []) -> loop one
bguillaum's avatar
bguillaum committed
107 108 109
        | _ -> Error.run "Not a single rewriting"
    in loop t

bguillaum's avatar
bguillaum committed
110 111 112 113 114
  let det_dep_string t =
    let rec loop t =
      match (t.good_nf, t.bad_nf) with
        | [],[] ->
          let graph = t.instance.Instance.graph in
bguillaum's avatar
bguillaum committed
115
          Some (G_graph.to_dep graph)
bguillaum's avatar
bguillaum committed
116
        | [one], [] -> loop one
bguillaum's avatar
bguillaum committed
117
        | _ -> None
bguillaum's avatar
bguillaum committed
118
    in loop t
bguillaum's avatar
bguillaum committed
119 120 121 122 123 124 125 126 127 128 129 130 131

  let conll_dep_string ?(keep_empty_rh=false) t =
    if (not keep_empty_rh) && is_empty t
    then None
    else
      let rec loop t =
        match (t.good_nf, t.bad_nf) with
          | [],[] ->
            let graph = t.instance.Instance.graph in
            Some (G_graph.to_conll graph)
          | [one], [] -> loop one
          | _ -> None
      in loop t
132
end (* module Rewrite_history *)
pj2m's avatar
pj2m committed
133

134
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
135 136
module Modul = struct
  type t = {
137
    name: string;
138
    local_labels: (string * string list) array;
139 140 141 142 143
    rules: Rule.t list;
    filters: Rule.t list;
    confluent: bool;
    loc: Loc.t;
  }
pj2m's avatar
pj2m committed
144

145 146 147 148
  let check t =
    (* check for duplicate rules *)
    let rec loop already_defined = function
      | [] -> ()
149 150
      | r::_ when List.mem (Rule.get_name r) already_defined ->
        Error.build ~loc:(Rule.get_loc r) "Rule '%s' is defined twice in the same module" (Rule.get_name r)
151 152
      | r::tail -> loop ((Rule.get_name r) :: already_defined) tail in
    loop [] t.rules
pj2m's avatar
pj2m committed
153

154
  let build ast_module =
155 156
    let locals = Array.of_list ast_module.Ast.local_labels in
    Array.sort compare locals;
157
    let rules_or_filters = List.map (Rule.build ~locals ast_module.Ast.mod_dir) ast_module.Ast.rules in
158 159
    let (filters, rules) = List.partition Rule.is_filter rules_or_filters in
    let modul =
160
      {
161 162 163 164 165 166 167
        name = ast_module.Ast.module_id;
        local_labels = locals;
        rules = rules;
        filters = filters;
        confluent = ast_module.Ast.confluent;
        loc = ast_module.Ast.mod_loc;
      } in
168
    check modul; modul
169
end (* module Modul *)
170

171
(* ==================================================================================================== *)
172 173
module Sequence = struct
  type t = {
174 175 176 177
    name: string;
    def: string list;
    loc: Loc.t;
  }
178 179 180

  let check module_list t =
    List.iter
181
      (fun module_name ->
182
        if not (List.exists (fun modul -> modul.Modul.name = module_name) module_list)
bguillaum's avatar
Typos  
bguillaum committed
183
        then Error.build ~loc:t.loc "sequence \"%s\" refers to the unknown module \"%s\"."
184
          t.name module_name
185 186
      ) t.def

187 188
  let build module_list ast_sequence =
    let sequence =
189
      {
190 191 192 193
        name = ast_sequence.Ast.seq_name;
        def = ast_sequence.Ast.seq_mod;
        loc = ast_sequence.Ast.seq_loc;
      } in
194
    check module_list sequence; sequence
195
end (* module Sequence *)
pj2m's avatar
pj2m committed
196

197
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
198
module Grs = struct
199

pj2m's avatar
pj2m committed
200
  type t = {
201 202
    labels: Label.t list;        (* the list of global edge labels *)
    modules: Modul.t list;       (* the ordered list of modules used from rewriting *)
203
    sequences: Sequence.t list;
bguillaum's avatar
bguillaum committed
204 205
    filename: string;
    ast: Ast.grs;
206 207
  }

bguillaum's avatar
bguillaum committed
208
  let get_modules t = t.modules
bguillaum's avatar
bguillaum committed
209 210
  let get_ast t = t.ast
  let get_filename t = t.filename
bguillaum's avatar
bguillaum committed
211

212 213
  let sequence_names t = List.map (fun s -> s.Sequence.name) t.sequences

bguillaum's avatar
bguillaum committed
214
  let empty = {labels=[]; modules=[]; sequences=[]; ast=Ast.empty_grs; filename=""; }
pj2m's avatar
pj2m committed
215

216 217 218 219
  let check t =
    (* check for duplicate modules *)
    let rec loop already_defined = function
      | [] -> ()
220 221
      | m::_ when List.mem m.Modul.name already_defined ->
        Error.build ~loc:m.Modul.loc "Module '%s' is defined twice" m.Modul.name
222 223 224 225 226 227
      | m::tail -> loop (m.Modul.name :: already_defined) tail in
    loop [] t.modules;

    (* check for duplicate sequences *)
    let rec loop already_defined = function
      | [] -> ()
228 229
      | s::_ when List.mem s.Sequence.name already_defined ->
        Error.build ~loc:s.Sequence.loc "Sequence '%s' is defined twice" s.Sequence.name
230 231 232
      | s::tail -> loop (s.Sequence.name :: already_defined) tail in
    loop [] t.sequences

bguillaum's avatar
bguillaum committed
233 234 235 236 237
  let build filename =
    let ast = Grew_parser.grs_of_file filename in
    Label.init ast.Ast.labels;
    Domain.init ast.Ast.domain;
    let modules = List.map Modul.build ast.Ast.modules in
238
    let grs = {
bguillaum's avatar
bguillaum committed
239 240 241
      labels = List.map (fun (l,_) -> Label.from_string l) ast.Ast.labels;
      sequences = List.map (Sequence.build modules) ast.Ast.sequences;
      modules; ast; filename;
242 243
    } in
    check grs; grs
pj2m's avatar
pj2m committed
244

bguillaum's avatar
bguillaum committed
245
  let modules_of_sequence grs sequence =
246
    let module_names =
247
      try
248 249
        let seq = List.find (fun s -> s.Sequence.name = sequence) grs.sequences in
        seq.Sequence.def
bguillaum's avatar
bguillaum committed
250 251
      with Not_found -> [sequence] in (* a module name can be used as a singleton sequence *)

252 253 254
    List.map
      (fun name ->
        try List.find (fun m -> m.Modul.name=name) grs.modules
bguillaum's avatar
bguillaum committed
255 256 257 258
        with Not_found -> Log.fcritical "No sequence or module named '%s'" name
      )
      module_names

259
  let rewrite grs sequence instance =
bguillaum's avatar
bguillaum committed
260
    Timeout.start ();
bguillaum's avatar
bguillaum committed
261
    let modules_to_apply = modules_of_sequence grs sequence in
262

pj2m's avatar
pj2m committed
263
    let rec loop instance = function
264 265 266 267 268 269 270 271 272
      | [] -> (* no more modules to apply *)
        {Rewrite_history.instance = instance; module_name = ""; good_nf = []; bad_nf = []; }
      | next::tail ->
        let (good_set, bad_set) =
          Rule.normalize
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
273
            (Instance.flatten instance) in
274 275 276 277 278 279 280 281
        let good_list = Instance_set.elements good_set
        and bad_list = Instance_set.elements bad_set in
        {
          Rewrite_history.instance = instance;
          module_name = next.Modul.name;
          good_nf = List.map (fun i -> loop i tail) good_list;
          bad_nf = bad_list;
        } in
bguillaum's avatar
bguillaum committed
282
    loop instance modules_to_apply
283

bguillaum's avatar
bguillaum committed
284 285
  let build_rew_display grs sequence instance =
    let modules_to_apply = modules_of_sequence grs sequence in
pj2m's avatar
pj2m committed
286 287 288

    let rec loop instance = function
      | [] -> Grew_types.Leaf instance.Instance.graph
289
      | next :: tail ->
290 291 292 293 294 295
        let (good_set, bad_set) =
          Rule.normalize
            next.Modul.name
            ~confluent: next.Modul.confluent
            next.Modul.rules
            next.Modul.filters
296
            (Instance.flatten instance) in
297
        let inst_list = Instance_set.elements good_set
298
              (* and bad_list = Instance_set.elements bad_set *) in
pj2m's avatar
pj2m committed
299

300
        match inst_list with
301
          | [{Instance.big_step = None}] ->
302
            Grew_types.Local_normal_form (instance.Instance.graph, next.Modul.name, loop instance tail)
303
          | _ -> Grew_types.Node
304 305 306 307 308 309 310 311 312 313
            (
              instance.Instance.graph,
              next.Modul.name,
              List.map
                (fun inst ->
                  match inst.Instance.big_step with
                    | None -> Error.bug "Cannot have no big_steps and more than one reducts at the same time"
                    | Some bs -> (bs, loop inst tail)
                ) inst_list
            )
pj2m's avatar
pj2m committed
314
    in loop instance modules_to_apply
bguillaum's avatar
bguillaum committed
315

bguillaum's avatar
bguillaum committed
316 317 318
  let rule_iter fct grs =
    List.iter
      (fun modul ->
bguillaum's avatar
bguillaum committed
319 320 321 322 323 324 325
        List.iter (fun rule -> fct modul.Modul.name rule) modul.Modul.rules
      ) grs.modules

  let filter_iter fct grs =
    List.iter
      (fun modul ->
        List.iter (fun filter -> fct modul.Modul.name filter) modul.Modul.filters
bguillaum's avatar
bguillaum committed
326
      ) grs.modules
327
end (* module Grs *)