libgrew.ml 13.1 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
pj2m's avatar
pj2m committed
12
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15 16
let libgrew_debug_mode () = Grew_base.Global.debug := true

bguillaum's avatar
bguillaum committed
17
(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
18
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
19 20 21 22 23 24 25 26
(* ==================================================================================================== *)
module Loc = struct
  type t = Grew_base.Loc.t
  let to_string = Grew_base.Loc.to_string
  let to_line = Grew_base.Loc.to_line
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
27
(** {2 Exceptions} *)
bguillaum's avatar
bguillaum committed
28
(* ==================================================================================================== *)
29
exception File_not_found of string
bguillaum's avatar
bguillaum committed
30 31 32 33
exception Parsing_err of string * Loc.t option
exception Build of string * Loc.t option
exception Run of string * Loc.t option
exception Bug of string * Loc.t option
pj2m's avatar
pj2m committed
34

35 36
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
37
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
38 39 40 41
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
42
    | File_not_found file -> raise (File_not_found file)
43 44

    (* Catch new exceptions *)
45
    | Grew_base.Error.Parse (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
46 47 48
    | Grew_base.Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Grew_base.Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Grew_base.Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
49

50
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
51

bguillaum's avatar
bguillaum committed
52

bguillaum's avatar
bguillaum committed
53
(* ==================================================================================================== *)
54
(** {2 Domain} *)
bguillaum's avatar
bguillaum committed
55 56 57
(* ==================================================================================================== *)
module Domain = struct
  type t = Grew_types.Domain.t
58

bguillaum's avatar
bguillaum committed
59 60
  let empty = Grew_types.Domain.empty

bguillaum's avatar
bguillaum committed
61 62 63
  let load filename =
    let ast = Grew_loader.Loader.domain filename in
    Grew_grs.Grs.domain_build ast
64

bguillaum's avatar
bguillaum committed
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
  let feature_names domain =  handle ~name:"feature_names" (fun () ->  Grew_types.Domain.feature_names domain) ()
end

(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
module Pattern = struct
  type t = Grew_rule.Rule.pattern

  let load domain file =
  handle ~name:"Pattern.load" (fun () -> Grew_rule.Rule.build_pattern domain (Grew_loader.Loader.pattern file)) ()
end

(* ==================================================================================================== *)
(** {2 Matching} *)
(* ==================================================================================================== *)
module Matching = struct
  type t = Grew_rule.Rule.matching
end

(* ==================================================================================================== *)
(** {2 Deco} *)
(* ==================================================================================================== *)
module Deco = struct
  type t = Grew_graph.G_deco.t
  let build pattern matching = Grew_rule.Rule.match_deco pattern matching
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
94
(** {2 Graph} *)
bguillaum's avatar
bguillaum committed
95 96 97 98 99 100 101 102
(* ==================================================================================================== *)
module Graph = struct


type t = Grew_graph.G_graph.t

  let load_gr domain file =
    if not (Sys.file_exists file)
103
    then raise (File_not_found file)
bguillaum's avatar
bguillaum committed
104 105 106 107 108 109
    else
      handle ~name:"Graph.load_gr" ~file
        (fun () ->
          let gr_ast = Grew_loader.Loader.gr file in
          Grew_graph.G_graph.build domain gr_ast
        ) ()
bguillaum's avatar
bguillaum committed
110

bguillaum's avatar
bguillaum committed
111 112
  let load_conll domain file =
    handle ~name:"Graph.load_conll" ~file
113
      (fun () ->
114
        Grew_graph.G_graph.of_conll domain (Conll.load file)
115
      ) ()
pj2m's avatar
pj2m committed
116

bguillaum's avatar
bguillaum committed
117 118 119 120 121 122
  let load_brown domain file =
    handle ~name:"Graph.load_brown"
      (fun () ->
        let brown = Grew_base.File.load file in
        Grew_graph.G_graph.of_brown domain brown
      ) ()
123

bguillaum's avatar
bguillaum committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137
  let load domain file =
    handle ~name:"Graph.load_graph" ~file
      (fun () ->
        match Grew_base.File.get_suffix file with
        | Some ".gr" -> load_gr domain file
        | Some ".conll" -> load_conll domain file
        | Some ".br" | Some ".melt" -> load_brown domain file
        | _ ->
            Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
            let rec loop = function
            | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
            | load_fct :: tail -> try load_fct domain file with _ -> loop tail in
            loop [load_gr; load_conll; load_brown]
      ) ()
bguillaum's avatar
bguillaum committed
138

139 140
  let of_gr domain ?(grewpy=false) gr_string =
    handle ~name:"Graph.of_gr" (fun () -> Grew_graph.G_graph.build domain ~grewpy (Grew_loader.Parser.gr gr_string)) ()
141

142
  let of_conll domain conll =
143
    handle ~name:"Graph.of_conll" (fun () -> Grew_graph.G_graph.of_conll domain conll) ()
144

bguillaum's avatar
bguillaum committed
145 146
  let of_brown domain ?sentid brown =
    handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown domain ?sentid brown) ()
bguillaum's avatar
bguillaum committed
147

bguillaum's avatar
bguillaum committed
148 149
  let to_dot domain ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dot" (fun () -> Grew_graph.G_graph.to_dot domain ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
150

bguillaum's avatar
bguillaum committed
151 152
  let to_dep domain ?filter ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dep" (fun () -> Grew_graph.G_graph.to_dep domain ?filter ?main_feat ~deco graph) ()
bguillaum's avatar
bguillaum committed
153

bguillaum's avatar
bguillaum committed
154 155
  let to_gr domain graph =
    handle ~name:"Graph.to_gr" (fun () -> Grew_graph.G_graph.to_gr domain graph) ()
bguillaum's avatar
bguillaum committed
156

157 158
  let to_conll_string domain graph =
    handle ~name:"Graph.to_conll_string" (fun () -> Grew_graph.G_graph.to_conll_string domain graph) ()
bguillaum's avatar
bguillaum committed
159

bguillaum's avatar
bguillaum committed
160 161 162 163 164
  let to_sentence ?main_feat gr =
    handle ~name:"Graph.to_sentence"
      (fun () ->
        Grew_graph.G_graph.to_sentence ?main_feat gr
      ) ()
bguillaum's avatar
bguillaum committed
165

bguillaum's avatar
bguillaum committed
166 167 168
  let save_conll domain filename graph =
    handle ~name:"Graph.save_conll" (fun () ->
      let out_ch = open_out filename in
169
      fprintf out_ch "%s" (Grew_graph.G_graph.to_conll_string domain graph);
bguillaum's avatar
bguillaum committed
170
      close_out out_ch
bguillaum's avatar
bguillaum committed
171 172
    ) ()

bguillaum's avatar
bguillaum committed
173 174 175 176
  let raw domain gr =
    handle ~name:"Graph.raw" (fun () -> Grew_graph.G_graph.to_raw domain gr) ()

  let search_pattern domain pattern graph = Grew_rule.Rule.match_in_graph domain pattern graph
bguillaum's avatar
bguillaum committed
177

178 179
  let node_matching pattern graph matching  = Grew_rule.Rule.node_matching pattern graph matching

bguillaum's avatar
bguillaum committed
180
end
181

bguillaum's avatar
bguillaum committed
182
(* ==================================================================================================== *)
183
(** {2 Graph Rewriting System} *)
bguillaum's avatar
bguillaum committed
184 185 186
(* ==================================================================================================== *)
module Grs = struct
  type t = Grew_grs.Grs.t
187

bguillaum's avatar
bguillaum committed
188
  let empty = Grew_grs.Grs.empty
189

bguillaum's avatar
bguillaum committed
190 191 192 193
  let load file =
    handle ~name:"Grs.load" ~file
      (fun () ->
        if not (Sys.file_exists file)
194
        then raise (File_not_found file)
bguillaum's avatar
bguillaum committed
195 196
        else Grew_grs.Grs.build file
      ) ()
197

bguillaum's avatar
bguillaum committed
198 199 200 201 202
  let get_sequence_names grs =
    handle ~name:"Grs.get_sequence_names"
      (fun () ->
        Grew_grs.Grs.sequence_names grs
      ) ()
203

bguillaum's avatar
bguillaum committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
  IFDEF DEP2PICT THEN
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [with Dep2pict]"
      (fun () ->
        Grew_html.Html_doc.build ~corpus ~dep:true dir grs;

        (* draw pattern graphs for all rules and all filters *)
        let fct module_ rule_ =
          let dep_code = Grew_rule.Rule.to_dep (Grew_grs.Grs.get_domain grs) rule_ in
          let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Grew_rule.Rule.get_name rule_) in
          let d2p = Dep2pict.Dep2pict.from_dep ~dep:dep_code in
          Dep2pict.Dep2pict.save_png ~filename:dep_png_file d2p in
        Grew_grs.Grs.rule_iter fct grs;
        Grew_grs.Grs.filter_iter fct grs
      ) ()
  ELSE
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [without Dep2pict]" (fun () -> Grew_html.Html_doc.build ~corpus ~dep:false dir grs) ()
  END

  let get_domain grs = Grew_grs.Grs.get_domain grs
end

(* ==================================================================================================== *)
(** {2 Rewrite} *)
(* ==================================================================================================== *)
module Rewrite = struct
  type display = Libgrew_types.rew_display
  type history = Grew_grs.Rewrite_history.t

234 235 236
  let set_max_depth_det value = Grew_rule.Rule.set_max_depth_det value
  let set_max_depth_non_det value = Grew_rule.Rule.set_max_depth_non_det value

bguillaum's avatar
bguillaum committed
237 238
  let set_debug_loop () = Grew_rule.Rule.set_debug_loop ()

bguillaum's avatar
bguillaum committed
239 240 241 242 243 244 245 246
  let display ~gr ~grs ~seq =
    handle ~name:"Rewrite.display" (fun () -> Grew_grs.Grs.build_rew_display grs seq gr) ()

  let set_timeout t = Grew_base.Timeout.timeout := t

  let rewrite ~gr ~grs ~seq =
    handle ~name:"Rewrite.rewrite" (fun () -> Grew_grs.Grs.rewrite grs seq gr) ()

247 248 249
  let get_graphs rh =
    handle ~name:"Rewrite.get_graphs" (fun () -> Grew_grs.Rewrite_history.get_graphs rh) ()

bguillaum's avatar
bguillaum committed
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
  let is_empty rh =
    handle ~name:"Rewrite.is_empty" (fun () -> Grew_grs.Rewrite_history.is_empty rh) ()

  let num_sol rh =
    handle ~name:"Rewrite.num_sol" (fun () -> Grew_grs.Rewrite_history.num_sol rh) ()

  let write_stat filename rew_hist =
    handle ~name:"Rewrite.write_stat" (fun () -> Grew_html.Gr_stat.save filename (Grew_html.Gr_stat.from_rew_history rew_hist)) ()

  let write_annot domain ~title static_dir annot_dir base_name_rew_hist_list =
    handle ~name:"Rewrite.write_annot" (fun () -> Grew_html.Html_annot.build domain ~title static_dir annot_dir base_name_rew_hist_list) ()

  let save_index ~dirname ~base_names =
    handle ~name:"Rewrite.save_index" (fun () ->
      let out_ch = open_out (Filename.concat dirname "index") in
265
      Array.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
bguillaum's avatar
bguillaum committed
266
      close_out out_ch
267 268
    ) ()

bguillaum's avatar
bguillaum committed
269 270
  let save_gr domain base rew_hist =
    handle ~name:"Rewrite.save_gr" (fun () -> Grew_grs.Rewrite_history.save_gr domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
271

bguillaum's avatar
bguillaum committed
272 273
  let save_conll domain base rew_hist =
    handle ~name:"Rewrite.save_conll" (fun () -> Grew_grs.Rewrite_history.save_conll domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
274

bguillaum's avatar
bguillaum committed
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
  let save_full_conll domain base rew_hist =
    handle ~name:"Rewrite.save_full_conll" (fun () -> Grew_grs.Rewrite_history.save_full_conll domain base rew_hist) ()

  let save_det_gr domain base rew_hist =
    handle ~name:"Rewrite.save_det_gr" (fun () -> Grew_grs.Rewrite_history.save_det_gr domain base rew_hist) ()

  let save_det_conll domain ?header base rew_hist =
    handle ~name:"Rewrite.save_det_conll" (fun () -> Grew_grs.Rewrite_history.save_det_conll domain ?header base rew_hist) ()

  let det_dep_string domain rew_hist =
    handle ~name:"Rewrite.det_dep_string" (fun () -> Grew_grs.Rewrite_history.det_dep_string domain rew_hist) ()

  let conll_dep_string domain ?keep_empty_rh rew_hist =
    handle ~name:"Rewrite.conll_dep_string" (fun () -> Grew_grs.Rewrite_history.conll_dep_string domain ?keep_empty_rh rew_hist) ()

  let write_html domain ?(no_init=false) ?(out_gr=false) ?filter ?main_feat ?dot ~header ?graph_file rew_hist output_base =
    handle ~name:"Rewrite.write_html" (fun () ->
      ignore (
        Grew_html.Html_rh.build domain ?filter ?main_feat ?dot ~out_gr ~init_graph: (not no_init) ~header ?graph_file output_base rew_hist
      )
    ) ()

  let error_html domain ?(no_init=false) ?main_feat ?dot ~header msg ?init output_base =
    handle ~name:"Rewrite.error_html" (fun () ->
      ignore (
        Grew_html.Html_rh.error domain ?main_feat ?dot ~init_graph: (not no_init) ~header output_base msg init
      )
    ) ()

  let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
    handle ~name:"Rewrite.make_index" (fun () ->
      let init = Grew_html.Corpus_stat.empty grs seq in
      let corpus_stat =
308
        Array.fold_left
bguillaum's avatar
bguillaum committed
309 310 311 312 313
          (fun acc base_name ->
            Grew_html.Corpus_stat.add_gr_stat base_name (Grew_html.Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
          ) init base_names in
      Grew_html.Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
    ) ()
bguillaum's avatar
bguillaum committed
314

bguillaum's avatar
bguillaum committed
315 316
  let html_sentences ~title = handle ~name:"Rewrite.html_sentences" (fun () -> Grew_html.Html_sentences.build ~title) ()
end
317 318 319