libgrew.ml 10.2 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

11
open Libgrew_types
pj2m's avatar
pj2m committed
12

bguillaum's avatar
bguillaum committed
13
open Printf
pj2m's avatar
pj2m committed
14 15
open Log

16 17 18 19
IFDEF DEP2PICT THEN
open Dep2pict
ENDIF

20
open Grew_fs
21
open Grew_base
22
open Grew_types
23
open Grew_ast
bguillaum's avatar
bguillaum committed
24 25 26
open Grew_graph
open Grew_rule
open Grew_grs
bguillaum's avatar
bguillaum committed
27
open Grew_loader
bguillaum's avatar
bguillaum committed
28
open Grew_html
pj2m's avatar
pj2m committed
29

bguillaum's avatar
bguillaum committed
30 31
(* -------------------------------------------------------------------------------- *)
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
32 33
type loc = Loc.t
let string_of_loc = Loc.to_string
34
let line_of_loc = Loc.to_line
bguillaum's avatar
bguillaum committed
35

bguillaum's avatar
bguillaum committed
36 37
(* -------------------------------------------------------------------------------- *)
(** {2 Exceptions} *)
pj2m's avatar
pj2m committed
38
exception File_dont_exists of string
bguillaum's avatar
bguillaum committed
39 40 41 42
exception Parsing_err of string * loc option
exception Build of string * loc option
exception Run of string * loc option
exception Bug of string * loc option
pj2m's avatar
pj2m committed
43

44 45
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
46
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
47 48 49 50
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
51 52

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
53
    | Loader.Error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
54 55 56
    | Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
57

58
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
59

bguillaum's avatar
bguillaum committed
60

bguillaum's avatar
bguillaum committed
61
(* -------------------------------------------------------------------------------- *)
62 63
(** {2 Domain} *)
type domain = Domain.t
64

65 66 67
let load_domain filename =
  let ast = Loader.domain filename in
  Grs.domain_build ast
68

bguillaum's avatar
bguillaum committed
69 70 71
(* -------------------------------------------------------------------------------- *)
(** {2 Graph} *)
type graph = G_graph.t
bguillaum's avatar
bguillaum committed
72

73
let load_gr domain file =
74 75 76 77 78
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
    handle ~name:"load_gr" ~file
      (fun () ->
bguillaum's avatar
bguillaum committed
79
        let gr_ast = Loader.gr file in
80
        G_graph.build domain gr_ast
81
      ) ()
pj2m's avatar
pj2m committed
82

83
let load_conll domain file =
84 85
  handle ~name:"load_conll" ~file
    (fun () ->
86
      G_graph.of_conll ~loc:(Loc.file file) domain (Conll.load file)
87 88
    ) ()

89
let load_brown domain file =
bguillaum's avatar
bguillaum committed
90 91 92
  handle ~name:"load_brown"
    (fun () ->
      let brown = File.load file in
93
      G_graph.of_brown domain brown
bguillaum's avatar
bguillaum committed
94 95
    ) ()

96
let load_graph domain file =
97 98
  handle ~name:"load_graph" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
99
      match File.get_suffix file with
100 101 102
      | Some ".gr" -> load_gr domain file
      | Some ".conll" -> load_conll domain file
      | Some ".br" | Some ".melt" -> load_brown domain file
bguillaum's avatar
bguillaum committed
103
      | _ ->
104
          Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
bguillaum's avatar
bguillaum committed
105 106
          let rec loop = function
          | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
107
          | load_fct :: tail -> try load_fct domain file with _ -> loop tail in
bguillaum's avatar
bguillaum committed
108
          loop [load_gr; load_conll; load_brown]
109
    ) ()
110

111
let of_conll domain file_name line_list =
bguillaum's avatar
bguillaum committed
112 113
  handle ~name:"of_conll"
    (fun () ->
114
      G_graph.of_conll ~loc:(Loc.file file_name) domain (Conll.parse file_name line_list)
bguillaum's avatar
bguillaum committed
115 116
    ) ()

117 118
let of_brown domain ?sentid brown =
  handle ~name:"of_brown" (fun () -> G_graph.of_brown domain ?sentid brown) ()
bguillaum's avatar
bguillaum committed
119

120 121
let to_dot_graph domain ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot domain ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
122

123 124
let to_dep_graph domain ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep domain ?filter ?main_feat ~deco graph) ()
bguillaum's avatar
bguillaum committed
125

126 127
let to_gr_graph domain graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr domain graph) ()
bguillaum's avatar
bguillaum committed
128

129 130
let to_conll_graph domain graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll domain graph) ()
bguillaum's avatar
bguillaum committed
131 132 133 134 135 136 137

let to_sentence ?main_feat gr =
  handle ~name:"to_sentence"
    (fun () ->
      G_graph.to_sentence ?main_feat gr
    ) ()

138
let save_graph_conll domain filename graph =
bguillaum's avatar
bguillaum committed
139 140
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
141
    fprintf out_ch "%s" (G_graph.to_conll domain graph);
bguillaum's avatar
bguillaum committed
142 143 144
    close_out out_ch
  ) ()

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
let raw_graph domain gr =
  handle ~name:"raw_graph" (fun () -> G_graph.to_raw domain gr) ()

(* -------------------------------------------------------------------------------- *)
(** {2 Graph Rewriting System} *)
type grs = Grs.t

let empty_grs = Grs.empty

let load_grs file =
  handle ~name:"load_grs" ~file
    (fun () ->
      if not (Sys.file_exists file)
      then raise (File_dont_exists file)
      else Grs.build file
    ) ()

let get_sequence_names grs =
  handle ~name:"get_sequence_names"
    (fun () ->
      Grs.sequence_names grs
    ) ()

IFDEF DEP2PICT THEN
let build_html_doc ?(corpus=false) dir grs =
  handle ~name:"build_doc [with Dep2pict]"
    (fun () ->
      Html_doc.build ~corpus ~dep:true dir grs;

      (* draw pattern graphs for all rules and all filters *)
      let fct module_ rule_ =
        let dep_code = Rule.to_dep (Grs.get_domain grs) rule_ in
        let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
        let d2p = Dep2pict.from_dep ~dep:dep_code in
        Dep2pict.save_png ~filename:dep_png_file d2p in
      Grs.rule_iter fct grs;
      Grs.filter_iter fct grs
    ) ()
ELSE
let build_html_doc ?(corpus=false) dir grs =
  handle ~name:"build_doc [without Dep2pict]" (fun () -> Html_doc.build ~corpus ~dep:false dir grs) ()
END

let feature_names domain =  handle ~name:"feature_names" (fun () ->  Domain.feature_names domain) ()
bguillaum's avatar
bguillaum committed
189

bguillaum's avatar
bguillaum committed
190 191 192 193 194 195 196 197 198 199 200
(* -------------------------------------------------------------------------------- *)
(** {2 rew_display: data for the GUI } *)
let display ~gr ~grs ~seq =
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()

(* -------------------------------------------------------------------------------- *)
(** {2 Rewrite} *)
type rewrite_history = Rewrite_history.t

let set_timeout t = Timeout.timeout := t

201 202
let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
203

bguillaum's avatar
bguillaum committed
204 205 206 207 208 209 210 211
let is_empty rh =
  handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()

let num_sol rh =
  handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()



212 213
let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
bguillaum's avatar
bguillaum committed
214

215 216
let write_annot domain ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build domain ~title static_dir annot_dir base_name_rew_hist_list) ()
217

bguillaum's avatar
bguillaum committed
218
let save_index ~dirname ~base_names =
219 220 221 222 223
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()
224

bguillaum's avatar
bguillaum committed
225

226 227
let save_gr domain base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr domain base rew_hist) ()
228

229 230
let save_conll domain base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
231

232 233
let save_full_conll domain base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll domain base rew_hist) ()
234

235 236
let save_det_gr domain base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
237

238 239
let save_det_conll domain ?header base rew_hist =
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll domain ?header base rew_hist) ()
240

241 242
let det_dep_string domain rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string domain rew_hist) ()
bguillaum's avatar
bguillaum committed
243

244 245
let conll_dep_string domain ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string domain ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
246

247
let write_html domain
248 249
    ?(no_init=false)
    ?(out_gr=false)
250 251
    ?filter
    ?main_feat
252
    ?dot
bguillaum's avatar
bguillaum committed
253
    ~header
bguillaum's avatar
bguillaum committed
254
    ?graph_file
bguillaum's avatar
bguillaum committed
255 256
    rew_hist
    output_base =
257 258 259
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
260
        domain
261 262 263 264 265 266
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
bguillaum's avatar
bguillaum committed
267
        ?graph_file
268
        output_base rew_hist
bguillaum's avatar
bguillaum committed
269
    )
270
  ) ()
271

272
let error_html domain
bguillaum's avatar
bguillaum committed
273 274
    ?(no_init=false)
    ?main_feat
275
    ?dot
276
    ~header
bguillaum's avatar
bguillaum committed
277
    msg
278
    ?init
279
    output_base =
280 281 282
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
283
        domain
284 285 286 287 288
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
289
    )
290
  ) ()
291

292
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
293 294 295 296 297 298 299 300 301 302 303 304
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

bguillaum's avatar
bguillaum committed
305

bguillaum's avatar
bguillaum committed
306

bguillaum's avatar
bguillaum committed
307 308
(* -------------------------------------------------------------------------------- *)
(** {2 Patterns} *)
bguillaum's avatar
bguillaum committed
309

310 311 312
type pattern = Rule.pattern
type matching = Rule.matching

313 314
let load_pattern domain file =
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern domain (Loader.pattern file)) ()
315

316
let match_in_graph domain pattern graph = Rule.match_in_graph domain pattern graph
317

bguillaum's avatar
bguillaum committed
318
let match_deco pattern matching = Rule.match_deco pattern matching