libgrew.ml 12.5 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
pj2m's avatar
pj2m committed
12
13
open Log

bguillaum's avatar
bguillaum committed
14
(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
15
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
16
17
18
19
20
21
22
23
(* ==================================================================================================== *)
module Loc = struct
  type t = Grew_base.Loc.t
  let to_string = Grew_base.Loc.to_string
  let to_line = Grew_base.Loc.to_line
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
24
(** {2 Exceptions} *)
bguillaum's avatar
bguillaum committed
25
(* ==================================================================================================== *)
pj2m's avatar
pj2m committed
26
exception File_dont_exists of string
bguillaum's avatar
bguillaum committed
27
28
29
30
exception Parsing_err of string * Loc.t option
exception Build of string * Loc.t option
exception Run of string * Loc.t option
exception Bug of string * Loc.t option
pj2m's avatar
pj2m committed
31

bguillaum's avatar
bguillaum committed
32
33
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
34
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
35
36
37
38
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
39
40

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
41
42
43
44
    | Grew_loader.Loader.Error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
    | Grew_base.Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Grew_base.Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Grew_base.Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
45

bguillaum's avatar
bguillaum committed
46
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
47

bguillaum's avatar
bguillaum committed
48

bguillaum's avatar
bguillaum committed
49
(* ==================================================================================================== *)
50
(** {2 Domain} *)
bguillaum's avatar
bguillaum committed
51
52
53
(* ==================================================================================================== *)
module Domain = struct
  type t = Grew_types.Domain.t
54

bguillaum's avatar
bguillaum committed
55
56
  let empty = Grew_types.Domain.empty

bguillaum's avatar
bguillaum committed
57
58
59
  let load filename =
    let ast = Grew_loader.Loader.domain filename in
    Grew_grs.Grs.domain_build ast
bguillaum's avatar
bguillaum committed
60

bguillaum's avatar
bguillaum committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  let feature_names domain =  handle ~name:"feature_names" (fun () ->  Grew_types.Domain.feature_names domain) ()
end

(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
module Pattern = struct
  type t = Grew_rule.Rule.pattern

  let load domain file =
  handle ~name:"Pattern.load" (fun () -> Grew_rule.Rule.build_pattern domain (Grew_loader.Loader.pattern file)) ()
end

(* ==================================================================================================== *)
(** {2 Matching} *)
(* ==================================================================================================== *)
module Matching = struct
  type t = Grew_rule.Rule.matching
end

(* ==================================================================================================== *)
(** {2 Deco} *)
(* ==================================================================================================== *)
module Deco = struct
  type t = Grew_graph.G_deco.t
  let build pattern matching = Grew_rule.Rule.match_deco pattern matching
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
90
(** {2 Graph} *)
bguillaum's avatar
bguillaum committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
(* ==================================================================================================== *)
module Graph = struct


type t = Grew_graph.G_graph.t

  let load_gr domain file =
    if not (Sys.file_exists file)
    then raise (File_dont_exists file)
    else
      handle ~name:"Graph.load_gr" ~file
        (fun () ->
          let gr_ast = Grew_loader.Loader.gr file in
          Grew_graph.G_graph.build domain gr_ast
        ) ()
bguillaum's avatar
bguillaum committed
106

bguillaum's avatar
bguillaum committed
107
108
  let load_conll domain file =
    handle ~name:"Graph.load_conll" ~file
bguillaum's avatar
bguillaum committed
109
      (fun () ->
bguillaum's avatar
bguillaum committed
110
        Grew_graph.G_graph.of_conll ~loc:(Grew_base.Loc.file file) domain (Grew_types.Conll.load file)
bguillaum's avatar
bguillaum committed
111
      ) ()
pj2m's avatar
pj2m committed
112

bguillaum's avatar
bguillaum committed
113
114
115
116
117
118
  let load_brown domain file =
    handle ~name:"Graph.load_brown"
      (fun () ->
        let brown = Grew_base.File.load file in
        Grew_graph.G_graph.of_brown domain brown
      ) ()
bguillaum's avatar
bguillaum committed
119

bguillaum's avatar
bguillaum committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
  let load domain file =
    handle ~name:"Graph.load_graph" ~file
      (fun () ->
        match Grew_base.File.get_suffix file with
        | Some ".gr" -> load_gr domain file
        | Some ".conll" -> load_conll domain file
        | Some ".br" | Some ".melt" -> load_brown domain file
        | _ ->
            Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
            let rec loop = function
            | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
            | load_fct :: tail -> try load_fct domain file with _ -> loop tail in
            loop [load_gr; load_conll; load_brown]
      ) ()
bguillaum's avatar
bguillaum committed
134

bguillaum's avatar
bguillaum committed
135
136
137
138
139
  let of_conll domain file_name line_list =
    handle ~name:"Graph.of_conll"
      (fun () ->
        Grew_graph.G_graph.of_conll ~loc:(Grew_base.Loc.file file_name) domain (Grew_types.Conll.parse file_name line_list)
      ) ()
140

bguillaum's avatar
bguillaum committed
141
142
  let of_brown domain ?sentid brown =
    handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown domain ?sentid brown) ()
bguillaum's avatar
bguillaum committed
143

bguillaum's avatar
bguillaum committed
144
145
  let to_dot domain ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dot" (fun () -> Grew_graph.G_graph.to_dot domain ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
146

bguillaum's avatar
bguillaum committed
147
148
  let to_dep domain ?filter ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dep" (fun () -> Grew_graph.G_graph.to_dep domain ?filter ?main_feat ~deco graph) ()
bguillaum's avatar
bguillaum committed
149

bguillaum's avatar
bguillaum committed
150
151
  let to_gr domain graph =
    handle ~name:"Graph.to_gr" (fun () -> Grew_graph.G_graph.to_gr domain graph) ()
bguillaum's avatar
bguillaum committed
152

bguillaum's avatar
bguillaum committed
153
154
  let to_conll domain graph =
    handle ~name:"Graph.to_conll" (fun () -> Grew_graph.G_graph.to_conll domain graph) ()
bguillaum's avatar
bguillaum committed
155

bguillaum's avatar
bguillaum committed
156
157
158
159
160
  let to_sentence ?main_feat gr =
    handle ~name:"Graph.to_sentence"
      (fun () ->
        Grew_graph.G_graph.to_sentence ?main_feat gr
      ) ()
bguillaum's avatar
bguillaum committed
161

bguillaum's avatar
bguillaum committed
162
163
164
165
166
  let save_conll domain filename graph =
    handle ~name:"Graph.save_conll" (fun () ->
      let out_ch = open_out filename in
      fprintf out_ch "%s" (Grew_graph.G_graph.to_conll domain graph);
      close_out out_ch
bguillaum's avatar
bguillaum committed
167
168
    ) ()

bguillaum's avatar
bguillaum committed
169
170
171
172
  let raw domain gr =
    handle ~name:"Graph.raw" (fun () -> Grew_graph.G_graph.to_raw domain gr) ()

  let search_pattern domain pattern graph = Grew_rule.Rule.match_in_graph domain pattern graph
bguillaum's avatar
bguillaum committed
173

bguillaum's avatar
bguillaum committed
174
end
175

bguillaum's avatar
bguillaum committed
176
(* ==================================================================================================== *)
177
(** {2 Graph Rewriting System} *)
bguillaum's avatar
bguillaum committed
178
179
180
(* ==================================================================================================== *)
module Grs = struct
  type t = Grew_grs.Grs.t
181

bguillaum's avatar
bguillaum committed
182
  let empty = Grew_grs.Grs.empty
183

bguillaum's avatar
bguillaum committed
184
185
186
187
188
189
190
  let load file =
    handle ~name:"Grs.load" ~file
      (fun () ->
        if not (Sys.file_exists file)
        then raise (File_dont_exists file)
        else Grew_grs.Grs.build file
      ) ()
191

bguillaum's avatar
bguillaum committed
192
193
194
195
196
  let get_sequence_names grs =
    handle ~name:"Grs.get_sequence_names"
      (fun () ->
        Grew_grs.Grs.sequence_names grs
      ) ()
197

bguillaum's avatar
bguillaum committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  IFDEF DEP2PICT THEN
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [with Dep2pict]"
      (fun () ->
        Grew_html.Html_doc.build ~corpus ~dep:true dir grs;

        (* draw pattern graphs for all rules and all filters *)
        let fct module_ rule_ =
          let dep_code = Grew_rule.Rule.to_dep (Grew_grs.Grs.get_domain grs) rule_ in
          let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Grew_rule.Rule.get_name rule_) in
          let d2p = Dep2pict.Dep2pict.from_dep ~dep:dep_code in
          Dep2pict.Dep2pict.save_png ~filename:dep_png_file d2p in
        Grew_grs.Grs.rule_iter fct grs;
        Grew_grs.Grs.filter_iter fct grs
      ) ()
  ELSE
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [without Dep2pict]" (fun () -> Grew_html.Html_doc.build ~corpus ~dep:false dir grs) ()
  END

  let get_domain grs = Grew_grs.Grs.get_domain grs
end

(* ==================================================================================================== *)
(** {2 Rewrite} *)
(* ==================================================================================================== *)
module Rewrite = struct
  type display = Libgrew_types.rew_display
  type history = Grew_grs.Rewrite_history.t

  let display ~gr ~grs ~seq =
    handle ~name:"Rewrite.display" (fun () -> Grew_grs.Grs.build_rew_display grs seq gr) ()

  let set_timeout t = Grew_base.Timeout.timeout := t

  let rewrite ~gr ~grs ~seq =
    handle ~name:"Rewrite.rewrite" (fun () -> Grew_grs.Grs.rewrite grs seq gr) ()

  let is_empty rh =
    handle ~name:"Rewrite.is_empty" (fun () -> Grew_grs.Rewrite_history.is_empty rh) ()

  let num_sol rh =
    handle ~name:"Rewrite.num_sol" (fun () -> Grew_grs.Rewrite_history.num_sol rh) ()

  let write_stat filename rew_hist =
    handle ~name:"Rewrite.write_stat" (fun () -> Grew_html.Gr_stat.save filename (Grew_html.Gr_stat.from_rew_history rew_hist)) ()

  let write_annot domain ~title static_dir annot_dir base_name_rew_hist_list =
    handle ~name:"Rewrite.write_annot" (fun () -> Grew_html.Html_annot.build domain ~title static_dir annot_dir base_name_rew_hist_list) ()

  let save_index ~dirname ~base_names =
    handle ~name:"Rewrite.save_index" (fun () ->
      let out_ch = open_out (Filename.concat dirname "index") in
      List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
      close_out out_ch
253
254
    ) ()

bguillaum's avatar
bguillaum committed
255
256
  let save_gr domain base rew_hist =
    handle ~name:"Rewrite.save_gr" (fun () -> Grew_grs.Rewrite_history.save_gr domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
257

bguillaum's avatar
bguillaum committed
258
259
  let save_conll domain base rew_hist =
    handle ~name:"Rewrite.save_conll" (fun () -> Grew_grs.Rewrite_history.save_conll domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
260

bguillaum's avatar
bguillaum committed
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  let save_full_conll domain base rew_hist =
    handle ~name:"Rewrite.save_full_conll" (fun () -> Grew_grs.Rewrite_history.save_full_conll domain base rew_hist) ()

  let save_det_gr domain base rew_hist =
    handle ~name:"Rewrite.save_det_gr" (fun () -> Grew_grs.Rewrite_history.save_det_gr domain base rew_hist) ()

  let save_det_conll domain ?header base rew_hist =
    handle ~name:"Rewrite.save_det_conll" (fun () -> Grew_grs.Rewrite_history.save_det_conll domain ?header base rew_hist) ()

  let det_dep_string domain rew_hist =
    handle ~name:"Rewrite.det_dep_string" (fun () -> Grew_grs.Rewrite_history.det_dep_string domain rew_hist) ()

  let conll_dep_string domain ?keep_empty_rh rew_hist =
    handle ~name:"Rewrite.conll_dep_string" (fun () -> Grew_grs.Rewrite_history.conll_dep_string domain ?keep_empty_rh rew_hist) ()

  let write_html domain ?(no_init=false) ?(out_gr=false) ?filter ?main_feat ?dot ~header ?graph_file rew_hist output_base =
    handle ~name:"Rewrite.write_html" (fun () ->
      ignore (
        Grew_html.Html_rh.build domain ?filter ?main_feat ?dot ~out_gr ~init_graph: (not no_init) ~header ?graph_file output_base rew_hist
      )
    ) ()

  let error_html domain ?(no_init=false) ?main_feat ?dot ~header msg ?init output_base =
    handle ~name:"Rewrite.error_html" (fun () ->
      ignore (
        Grew_html.Html_rh.error domain ?main_feat ?dot ~init_graph: (not no_init) ~header output_base msg init
      )
    ) ()

  let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
    handle ~name:"Rewrite.make_index" (fun () ->
      let init = Grew_html.Corpus_stat.empty grs seq in
      let corpus_stat =
        List.fold_left
          (fun acc base_name ->
            Grew_html.Corpus_stat.add_gr_stat base_name (Grew_html.Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
          ) init base_names in
      Grew_html.Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
    ) ()
bguillaum's avatar
bguillaum committed
300

bguillaum's avatar
bguillaum committed
301
302
  let html_sentences ~title = handle ~name:"Rewrite.html_sentences" (fun () -> Grew_html.Html_sentences.build ~title) ()
end
303
304
305