libgrew.ml 16.9 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
pj2m's avatar
pj2m committed
12
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15 16
let libgrew_debug_mode () = Grew_base.Global.debug := true

bguillaum's avatar
bguillaum committed
17
(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
18
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
19 20 21 22 23 24 25 26
(* ==================================================================================================== *)
module Loc = struct
  type t = Grew_base.Loc.t
  let to_string = Grew_base.Loc.to_string
  let to_line = Grew_base.Loc.to_line
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
27
(** {2 Exceptions} *)
bguillaum's avatar
bguillaum committed
28
(* ==================================================================================================== *)
29
exception File_not_found of string
bguillaum's avatar
bguillaum committed
30 31 32 33
exception Parsing_err of string * Loc.t option
exception Build of string * Loc.t option
exception Run of string * Loc.t option
exception Bug of string * Loc.t option
pj2m's avatar
pj2m committed
34

35 36
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
37
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
38 39 40 41
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
42
    | File_not_found file -> raise (File_not_found file)
43 44

    (* Catch new exceptions *)
45
    | Grew_base.Error.Parse (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
46 47 48
    | Grew_base.Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Grew_base.Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Grew_base.Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
49

50
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
51

bguillaum's avatar
bguillaum committed
52

bguillaum's avatar
bguillaum committed
53
(* ==================================================================================================== *)
54
(** {2 Domain} *)
bguillaum's avatar
bguillaum committed
55 56 57
(* ==================================================================================================== *)
module Domain = struct
  type t = Grew_types.Domain.t
58

bguillaum's avatar
bguillaum committed
59 60
  let load filename =
    let ast = Grew_loader.Loader.domain filename in
bguillaum's avatar
bguillaum committed
61
    Grew_grs.Grs.domain_build ast
62

bguillaum's avatar
bguillaum committed
63
  let feature_names domain =  handle ~name:"feature_names" (fun () -> Grew_types.Domain.feature_names domain) ()
bguillaum's avatar
bguillaum committed
64 65 66 67 68 69 70 71
end

(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
module Pattern = struct
  type t = Grew_rule.Rule.pattern

bguillaum's avatar
bguillaum committed
72 73
  let load ?domain file =
  handle ~name:"Pattern.load" (fun () -> Grew_rule.Rule.build_pattern ?domain (Grew_loader.Loader.pattern file)) ()
bguillaum's avatar
bguillaum committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
end

(* ==================================================================================================== *)
(** {2 Matching} *)
(* ==================================================================================================== *)
module Matching = struct
  type t = Grew_rule.Rule.matching
end

(* ==================================================================================================== *)
(** {2 Deco} *)
(* ==================================================================================================== *)
module Deco = struct
  type t = Grew_graph.G_deco.t
  let build pattern matching = Grew_rule.Rule.match_deco pattern matching
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
92
(** {2 Graph} *)
bguillaum's avatar
bguillaum committed
93 94 95 96 97 98
(* ==================================================================================================== *)
module Graph = struct


type t = Grew_graph.G_graph.t

bguillaum's avatar
bguillaum committed
99
  let load_gr ?domain file =
bguillaum's avatar
bguillaum committed
100
    if not (Sys.file_exists file)
101
    then raise (File_not_found file)
bguillaum's avatar
bguillaum committed
102 103 104 105
    else
      handle ~name:"Graph.load_gr" ~file
        (fun () ->
          let gr_ast = Grew_loader.Loader.gr file in
bguillaum's avatar
bguillaum committed
106
          Grew_graph.G_graph.build ?domain gr_ast
bguillaum's avatar
bguillaum committed
107
        ) ()
bguillaum's avatar
bguillaum committed
108

bguillaum's avatar
bguillaum committed
109
  let load_conll ?domain file =
bguillaum's avatar
bguillaum committed
110
    handle ~name:"Graph.load_conll" ~file
111
      (fun () ->
bguillaum's avatar
bguillaum committed
112
        Grew_graph.G_graph.of_conll ?domain (Conll.load file)
113
      ) ()
pj2m's avatar
pj2m committed
114

bguillaum's avatar
bguillaum committed
115
  let load_brown ?domain file =
bguillaum's avatar
bguillaum committed
116 117 118
    handle ~name:"Graph.load_brown"
      (fun () ->
        let brown = Grew_base.File.load file in
bguillaum's avatar
bguillaum committed
119
        Grew_graph.G_graph.of_brown ?domain brown
bguillaum's avatar
bguillaum committed
120
      ) ()
121

bguillaum's avatar
bguillaum committed
122
  let load ?domain file =
bguillaum's avatar
bguillaum committed
123 124 125
    handle ~name:"Graph.load_graph" ~file
      (fun () ->
        match Grew_base.File.get_suffix file with
bguillaum's avatar
bguillaum committed
126 127 128
        | Some ".gr" -> load_gr ?domain file
        | Some ".conll" -> load_conll ?domain file
        | Some ".br" | Some ".melt" -> load_brown ?domain file
bguillaum's avatar
bguillaum committed
129 130 131 132
        | _ ->
            Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
            let rec loop = function
            | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
bguillaum's avatar
bguillaum committed
133
            | load_fct :: tail -> try load_fct ?domain file with _ -> loop tail in
bguillaum's avatar
bguillaum committed
134 135
            loop [load_gr; load_conll; load_brown]
      ) ()
bguillaum's avatar
bguillaum committed
136

bguillaum's avatar
bguillaum committed
137 138
  let of_gr ?domain ?(grewpy=false) gr_string =
    handle ~name:"Graph.of_gr" (fun () -> Grew_graph.G_graph.build ?domain ~grewpy (Grew_loader.Parser.gr gr_string)) ()
139

bguillaum's avatar
bguillaum committed
140 141
  let of_conll ?domain conll =
    handle ~name:"Graph.of_conll" (fun () -> Grew_graph.G_graph.of_conll ?domain conll) ()
142

bguillaum's avatar
bguillaum committed
143 144
  let of_brown ?domain ?sentid brown =
    handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown ?domain ?sentid brown) ()
bguillaum's avatar
bguillaum committed
145

bguillaum's avatar
bguillaum committed
146 147
  let to_dot ?domain ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dot" (fun () -> Grew_graph.G_graph.to_dot ?domain ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
148

bguillaum's avatar
bguillaum committed
149 150
  let to_dep ?domain ?filter ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dep" (fun () -> Grew_graph.G_graph.to_dep ?domain ?filter ?main_feat ~deco graph) ()
bguillaum's avatar
bguillaum committed
151

bguillaum's avatar
bguillaum committed
152 153
  let to_gr ?domain graph =
    handle ~name:"Graph.to_gr" (fun () -> Grew_graph.G_graph.to_gr ?domain graph) ()
bguillaum's avatar
bguillaum committed
154

bguillaum's avatar
bguillaum committed
155 156
  let to_conll_string ?domain graph =
    handle ~name:"Graph.to_conll_string" (fun () -> Grew_graph.G_graph.to_conll_string ?domain graph) ()
bguillaum's avatar
bguillaum committed
157

bguillaum's avatar
bguillaum committed
158 159 160 161 162
  let to_sentence ?main_feat gr =
    handle ~name:"Graph.to_sentence"
      (fun () ->
        Grew_graph.G_graph.to_sentence ?main_feat gr
      ) ()
bguillaum's avatar
bguillaum committed
163

bguillaum's avatar
bguillaum committed
164
  let save_conll ?domain filename graph =
bguillaum's avatar
bguillaum committed
165 166
    handle ~name:"Graph.save_conll" (fun () ->
      let out_ch = open_out filename in
bguillaum's avatar
bguillaum committed
167
      fprintf out_ch "%s" (Grew_graph.G_graph.to_conll_string ?domain graph);
bguillaum's avatar
bguillaum committed
168
      close_out out_ch
bguillaum's avatar
bguillaum committed
169 170
    ) ()

bguillaum's avatar
bguillaum committed
171
  let search_pattern ?domain pattern graph = Grew_rule.Rule.match_in_graph ?domain pattern graph
bguillaum's avatar
bguillaum committed
172

173 174
  let node_matching pattern graph matching  = Grew_rule.Rule.node_matching pattern graph matching

bguillaum's avatar
bguillaum committed
175
end
176

bguillaum's avatar
bguillaum committed
177
(* ==================================================================================================== *)
178
(** {2 Graph Rewriting System} *)
bguillaum's avatar
bguillaum committed
179 180 181
(* ==================================================================================================== *)
module Grs = struct
  type t = Grew_grs.Grs.t
182

bguillaum's avatar
bguillaum committed
183
  let empty = Grew_grs.Grs.empty
184

bguillaum's avatar
bguillaum committed
185 186 187 188
  let load file =
    handle ~name:"Grs.load" ~file
      (fun () ->
        if not (Sys.file_exists file)
189
        then raise (File_not_found file)
bguillaum's avatar
bguillaum committed
190 191
        else Grew_grs.Grs.build file
      ) ()
192

bguillaum's avatar
bguillaum committed
193 194 195 196 197
  let get_sequence_names grs =
    handle ~name:"Grs.get_sequence_names"
      (fun () ->
        Grew_grs.Grs.sequence_names grs
      ) ()
198

bguillaum's avatar
bguillaum committed
199 200 201 202 203 204 205
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [with Dep2pict]"
      (fun () ->
        Grew_html.Html_doc.build ~corpus ~dep:true dir grs;

        (* draw pattern graphs for all rules and all filters *)
        let fct module_ rule_ =
bguillaum's avatar
bguillaum committed
206
          let dep_code = Grew_rule.Rule.to_dep ?domain:(Grew_grs.Grs.get_domain grs) rule_ in
bguillaum's avatar
bguillaum committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
          let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Grew_rule.Rule.get_name rule_) in
          let d2p = Dep2pict.Dep2pict.from_dep ~dep:dep_code in
          Dep2pict.Dep2pict.save_png ~filename:dep_png_file d2p in
        Grew_grs.Grs.rule_iter fct grs;
        Grew_grs.Grs.filter_iter fct grs
      ) ()

  let get_domain grs = Grew_grs.Grs.get_domain grs
end

(* ==================================================================================================== *)
(** {2 Rewrite} *)
(* ==================================================================================================== *)
module Rewrite = struct
  type display = Libgrew_types.rew_display
  type history = Grew_grs.Rewrite_history.t

224 225 226
  let set_max_depth_det value = Grew_rule.Rule.set_max_depth_det value
  let set_max_depth_non_det value = Grew_rule.Rule.set_max_depth_non_det value

bguillaum's avatar
bguillaum committed
227 228
  let set_debug_loop () = Grew_rule.Rule.set_debug_loop ()

bguillaum's avatar
bguillaum committed
229 230 231 232 233 234 235 236
  let display ~gr ~grs ~seq =
    handle ~name:"Rewrite.display" (fun () -> Grew_grs.Grs.build_rew_display grs seq gr) ()

  let set_timeout t = Grew_base.Timeout.timeout := t

  let rewrite ~gr ~grs ~seq =
    handle ~name:"Rewrite.rewrite" (fun () -> Grew_grs.Grs.rewrite grs seq gr) ()

237 238 239
  let get_graphs rh =
    handle ~name:"Rewrite.get_graphs" (fun () -> Grew_grs.Rewrite_history.get_graphs rh) ()

bguillaum's avatar
bguillaum committed
240 241 242 243 244 245 246 247 248
  let is_empty rh =
    handle ~name:"Rewrite.is_empty" (fun () -> Grew_grs.Rewrite_history.is_empty rh) ()

  let num_sol rh =
    handle ~name:"Rewrite.num_sol" (fun () -> Grew_grs.Rewrite_history.num_sol rh) ()

  let write_stat filename rew_hist =
    handle ~name:"Rewrite.write_stat" (fun () -> Grew_html.Gr_stat.save filename (Grew_html.Gr_stat.from_rew_history rew_hist)) ()

bguillaum's avatar
bguillaum committed
249 250
  let write_annot ?domain ~title static_dir annot_dir base_name_rew_hist_list =
    handle ~name:"Rewrite.write_annot" (fun () -> Grew_html.Html_annot.build ?domain ~title static_dir annot_dir base_name_rew_hist_list) ()
bguillaum's avatar
bguillaum committed
251 252 253 254

  let save_index ~dirname ~base_names =
    handle ~name:"Rewrite.save_index" (fun () ->
      let out_ch = open_out (Filename.concat dirname "index") in
255
      Array.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
bguillaum's avatar
bguillaum committed
256
      close_out out_ch
257 258
    ) ()

bguillaum's avatar
bguillaum committed
259 260
  let save_gr ?domain base rew_hist =
    handle ~name:"Rewrite.save_gr" (fun () -> Grew_grs.Rewrite_history.save_gr ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
261

bguillaum's avatar
bguillaum committed
262 263
  let save_conll ?domain base rew_hist =
    handle ~name:"Rewrite.save_conll" (fun () -> Grew_grs.Rewrite_history.save_conll ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
264

bguillaum's avatar
bguillaum committed
265 266
  let save_full_conll ?domain base rew_hist =
    handle ~name:"Rewrite.save_full_conll" (fun () -> Grew_grs.Rewrite_history.save_full_conll ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
267

bguillaum's avatar
bguillaum committed
268 269
  let save_det_gr ?domain base rew_hist =
    handle ~name:"Rewrite.save_det_gr" (fun () -> Grew_grs.Rewrite_history.save_det_gr ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
270

bguillaum's avatar
bguillaum committed
271 272
  let save_det_conll ?domain ?header base rew_hist =
    handle ~name:"Rewrite.save_det_conll" (fun () -> Grew_grs.Rewrite_history.save_det_conll ?domain ?header base rew_hist) ()
bguillaum's avatar
bguillaum committed
273

bguillaum's avatar
bguillaum committed
274 275
  let det_dep_string ?domain rew_hist =
    handle ~name:"Rewrite.det_dep_string" (fun () -> Grew_grs.Rewrite_history.det_dep_string ?domain rew_hist) ()
bguillaum's avatar
bguillaum committed
276

bguillaum's avatar
bguillaum committed
277 278
  let conll_dep_string ?domain ?keep_empty_rh rew_hist =
    handle ~name:"Rewrite.conll_dep_string" (fun () -> Grew_grs.Rewrite_history.conll_dep_string ?domain ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
279

bguillaum's avatar
bguillaum committed
280
  let write_html ?domain ?(no_init=false) ?(out_gr=false) ?filter ?main_feat ?dot ~header ?graph_file rew_hist output_base =
bguillaum's avatar
bguillaum committed
281 282
    handle ~name:"Rewrite.write_html" (fun () ->
      ignore (
bguillaum's avatar
bguillaum committed
283
        Grew_html.Html_rh.build ?domain ?filter ?main_feat ?dot ~out_gr ~init_graph: (not no_init) ~header ?graph_file output_base rew_hist
bguillaum's avatar
bguillaum committed
284 285 286
      )
    ) ()

bguillaum's avatar
bguillaum committed
287
  let error_html ?domain ?(no_init=false) ?main_feat ?dot ~header msg ?init output_base =
bguillaum's avatar
bguillaum committed
288 289
    handle ~name:"Rewrite.error_html" (fun () ->
      ignore (
bguillaum's avatar
bguillaum committed
290
        Grew_html.Html_rh.error ?domain ?main_feat ?dot ~init_graph: (not no_init) ~header output_base msg init
bguillaum's avatar
bguillaum committed
291 292 293 294 295 296 297
      )
    ) ()

  let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
    handle ~name:"Rewrite.make_index" (fun () ->
      let init = Grew_html.Corpus_stat.empty grs seq in
      let corpus_stat =
298
        Array.fold_left
bguillaum's avatar
bguillaum committed
299 300 301 302 303
          (fun acc base_name ->
            Grew_html.Corpus_stat.add_gr_stat base_name (Grew_html.Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
          ) init base_names in
      Grew_html.Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
    ) ()
bguillaum's avatar
bguillaum committed
304

bguillaum's avatar
bguillaum committed
305 306
  let html_sentences ~title = handle ~name:"Rewrite.html_sentences" (fun () -> Grew_html.Html_sentences.build ~title) ()
end
307 308 309



bguillaum's avatar
bguillaum committed

(* -------------- remove... after merge ------------
let xml_graph xml =
  handle ~name:"xml_graph" (fun () -> G_graph.of_xml xml) ()

let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()

let display ~gr ~grs ~seq =
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()

let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()

let write_annot ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build ~title static_dir annot_dir base_name_rew_hist_list) ()

let save_index ~dirname ~base_names =
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()

let save_graph_conll filename graph =
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
    fprintf out_ch "%s" (G_graph.to_conll graph);
    close_out out_ch
  ) ()

let save_gr base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr base rew_hist) ()

let save_conll base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll base rew_hist) ()

let save_full_conll base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll base rew_hist) ()

let save_det_gr base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr base rew_hist) ()

let save_det_conll ?header base rew_hist =
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll ?header base rew_hist) ()

let det_dep_string rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) ()

let conll_dep_string ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string ?keep_empty_rh rew_hist) ()

let write_html
    ?(no_init=false)
    ?(out_gr=false)
    ?filter
    ?main_feat
    ?dot
    ~header
    ?graph_file
    rew_hist
    output_base =
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
        ?graph_file
        output_base rew_hist
    )
  ) ()

let error_html
    ?(no_init=false)
    ?main_feat
    ?dot
    ~header
    msg
    ?init
    output_base =
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
    )
  ) ()

let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

let feature_names () =  handle ~name:"feature_names" (fun () -> Domain.feature_names ()) ()

let to_dot_graph ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot ?main_feat graph ~deco) ()

let to_dep_graph ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep ?filter ?main_feat ~deco graph) ()

let to_gr_graph graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr graph) ()

let to_conll_graph graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll graph) ()

type pattern = Rule.pattern
type matching = Rule.matching

let load_pattern file =
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Loader.pattern file)) ()

let match_in_graph pattern graph = Rule.match_in_graph pattern graph

let match_deco pattern matching = Rule.match_deco pattern matching
-------------- remove... after merge ------------ *)