Attention une mise à jour du service Gitlab va être effectuée le mardi 18 janvier (et non lundi 17 comme annoncé précédemment) entre 18h00 et 18h30. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes.

libgrew.ml 15.3 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
pj2m's avatar
pj2m committed
12
open Log
13
open Conll
pj2m's avatar
pj2m committed
14

bguillaum's avatar
bguillaum committed
15
let libgrew_debug_mode () = Grew_base.Global.debug := true
bguillaum's avatar
bguillaum committed
16
let get_version () = VERSION
bguillaum's avatar
bguillaum committed
17

bguillaum's avatar
bguillaum committed
18
(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
19
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
20
21
22
23
24
25
26
(* ==================================================================================================== *)
module Loc = struct
  type t = Grew_base.Loc.t
  let to_string = Grew_base.Loc.to_string
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
27
(** {2 Exceptions} *)
bguillaum's avatar
bguillaum committed
28
(* ==================================================================================================== *)
29
30
exception Error of string
exception Bug of string
pj2m's avatar
pj2m committed
31

bguillaum's avatar
bguillaum committed
32
33
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
bguillaum's avatar
typo    
bguillaum committed
34
    (* Raise again already caught exceptions *)
35
36
    | Error msg -> raise (Error msg)
    | Bug msg -> raise (Bug msg)
37
38

    (* Catch new exceptions *)
Bruno Guillaume's avatar
Bruno Guillaume committed
39
40
41
42
43
44
    | Grew_base.Error.Parse (msg, Some loc) -> raise (Error (sprintf "%s %s" (Grew_base.Loc.to_string loc) msg))
    | Grew_base.Error.Parse (msg, None) -> raise (Error (sprintf "%s" msg))
    | Grew_base.Error.Build (msg, Some loc) -> raise (Error (sprintf "%s %s" (Grew_base.Loc.to_string loc) msg))
    | Grew_base.Error.Build (msg, None) -> raise (Error (sprintf "%s" msg))
    | Grew_base.Error.Run (msg, Some loc) -> raise (Error (sprintf "%s %s" (Grew_base.Loc.to_string loc) msg))
    | Grew_base.Error.Run (msg, None) -> raise (Error (sprintf "%s" msg))
45
    | Conll_types.Error msg -> raise (Error (sprintf "Conll error: %s" msg))
46

Bruno Guillaume's avatar
Bruno Guillaume committed
47
    | Grew_base.Error.Bug (msg, Some loc) -> raise (Bug (sprintf "%s %s" (Grew_base.Loc.to_string loc) msg))
48
49
    | Grew_base.Error.Bug (msg, None) -> raise (Bug (sprintf "%s" msg))
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCAUGHT EXCEPTION: %s" name (Printexc.to_string exc)))
pj2m's avatar
pj2m committed
50

bguillaum's avatar
bguillaum committed
51

bguillaum's avatar
bguillaum committed
52
(* ==================================================================================================== *)
53
(** {2 Domain} *)
bguillaum's avatar
bguillaum committed
54
55
(* ==================================================================================================== *)
module Domain = struct
56
  type t = Grew_domain.Domain.t
57

bguillaum's avatar
bguillaum committed
58
59
  let load filename =
    let ast = Grew_loader.Loader.domain filename in
bguillaum's avatar
bguillaum committed
60
    Grew_grs.Grs.domain_build ast
bguillaum's avatar
bguillaum committed
61

62
  let load filename =
63
    handle ~name:"Domain.load"
64
65
66
67
68
69
      (fun () ->
      let ast = Grew_loader.Loader.domain filename in
      Grew_grs.Grs.domain_build ast
      ) ()

  let feature_names domain =
70
    handle ~name:"Domain.feature_names"
71
72
      (fun () -> Grew_domain.Domain.feature_names domain)
      ()
73
74
75
76
77
78

  let dump domain =
    handle ~name:"Domain.dump"
      (fun () -> Grew_domain.Domain.dump domain)
      ()

bguillaum's avatar
bguillaum committed
79
80
81
82
83
84
85
86
end

(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
module Pattern = struct
  type t = Grew_rule.Rule.pattern

bguillaum's avatar
bguillaum committed
87
88
  let load ?domain file =
  handle ~name:"Pattern.load" (fun () -> Grew_rule.Rule.build_pattern ?domain (Grew_loader.Loader.pattern file)) ()
89
90
91

  let parse ?domain desc =
  handle ~name:"Pattern.load" (fun () -> Grew_rule.Rule.build_pattern ?domain (Grew_loader.Parser.pattern desc)) ()
92

93
  let pid_name_list pattern =
94
  handle ~name:"Pattern.pid_lits"
95
    (fun () -> List.map (fun x -> x) (Grew_rule.Rule.pid_name_list pattern)
96
    ) ()
bguillaum's avatar
bguillaum committed
97
98
99
100
101
102
103
end

(* ==================================================================================================== *)
(** {2 Matching} *)
(* ==================================================================================================== *)
module Matching = struct
  type t = Grew_rule.Rule.matching
104
105

  let to_python pattern graph t = Grew_rule.Rule.to_python pattern graph t
bguillaum's avatar
bguillaum committed
106
107
108
109
110
111
112
113
114
115
116
end

(* ==================================================================================================== *)
(** {2 Deco} *)
(* ==================================================================================================== *)
module Deco = struct
  type t = Grew_graph.G_deco.t
  let build pattern matching = Grew_rule.Rule.match_deco pattern matching
end

(* ==================================================================================================== *)
bguillaum's avatar
bguillaum committed
117
(** {2 Graph} *)
bguillaum's avatar
bguillaum committed
118
119
120
121
122
123
(* ==================================================================================================== *)
module Graph = struct


type t = Grew_graph.G_graph.t

bguillaum's avatar
bguillaum committed
124
  let load_gr ?domain file =
bguillaum's avatar
bguillaum committed
125
    if not (Sys.file_exists file)
126
    then raise (Error ("File_not_found: " ^ file))
bguillaum's avatar
bguillaum committed
127
128
129
130
    else
      handle ~name:"Graph.load_gr" ~file
        (fun () ->
          let gr_ast = Grew_loader.Loader.gr file in
bguillaum's avatar
bguillaum committed
131
          Grew_graph.G_graph.build ?domain gr_ast
bguillaum's avatar
bguillaum committed
132
        ) ()
bguillaum's avatar
bguillaum committed
133

bguillaum's avatar
bguillaum committed
134
  let load_conll ?domain file =
bguillaum's avatar
bguillaum committed
135
    handle ~name:"Graph.load_conll" ~file
bguillaum's avatar
bguillaum committed
136
      (fun () ->
bguillaum's avatar
bguillaum committed
137
        Grew_graph.G_graph.of_conll ?domain (Conll.load file)
bguillaum's avatar
bguillaum committed
138
      ) ()
pj2m's avatar
pj2m committed
139

bguillaum's avatar
bguillaum committed
140
  let load_brown ?domain file =
bguillaum's avatar
bguillaum committed
141
142
143
    handle ~name:"Graph.load_brown"
      (fun () ->
        let brown = Grew_base.File.load file in
bguillaum's avatar
bguillaum committed
144
        Grew_graph.G_graph.of_brown ?domain brown
bguillaum's avatar
bguillaum committed
145
      ) ()
bguillaum's avatar
bguillaum committed
146

147
  let load_pst ?domain file =
148
    if not (Sys.file_exists file)
149
    then raise (Error ("File_not_found: " ^ file))
150
    else
151
      handle ~name:"load_pst" ~file
152
        (fun () ->
153
154
          let const_ast = Grew_loader.Loader.phrase_structure_tree file in
          Grew_graph.G_graph.of_pst ?domain const_ast
155
156
        ) ()

bguillaum's avatar
bguillaum committed
157
  let load ?domain file =
bguillaum's avatar
bguillaum committed
158
159
160
    handle ~name:"Graph.load_graph" ~file
      (fun () ->
        match Grew_base.File.get_suffix file with
bguillaum's avatar
bguillaum committed
161
162
163
        | Some ".gr" -> load_gr ?domain file
        | Some ".conll" -> load_conll ?domain file
        | Some ".br" | Some ".melt" -> load_brown ?domain file
164
        | Some ".cst" -> load_pst ?domain file
bguillaum's avatar
bguillaum committed
165
166
167
168
        | _ ->
            Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
            let rec loop = function
            | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
bguillaum's avatar
bguillaum committed
169
            | load_fct :: tail -> try load_fct ?domain file with _ -> loop tail in
170
            loop [load_gr; load_conll; load_brown; load_pst]
bguillaum's avatar
bguillaum committed
171
      ) ()
bguillaum's avatar
bguillaum committed
172

bguillaum's avatar
bguillaum committed
173
174
  let of_gr ?domain ?(grewpy=false) gr_string =
    handle ~name:"Graph.of_gr" (fun () -> Grew_graph.G_graph.build ?domain ~grewpy (Grew_loader.Parser.gr gr_string)) ()
175

bguillaum's avatar
bguillaum committed
176
177
  let of_conll ?domain conll =
    handle ~name:"Graph.of_conll" (fun () -> Grew_graph.G_graph.of_conll ?domain conll) ()
178

179
180
  let of_pst ?domain pst_string =
    handle ~name:"of_pst"
181
      (fun () ->
182
183
184
185
186
187
188
189
190
191
        let pst_ast = Grew_loader.Parser.phrase_structure_tree pst_string in
        (Grew_graph.G_graph.of_pst ?domain pst_ast)
      ) ()

  let sentence_of_pst ?domain pst_string =
    handle ~name:"of_pst"
      (fun () ->
        let pst_ast = Grew_loader.Parser.phrase_structure_tree pst_string in
        let word_list = Grew_ast.Ast.word_list pst_ast in
        Sentence.fr_clean_spaces (String.concat " " word_list)
192
193
      ) ()

bguillaum's avatar
bguillaum committed
194
195
  let of_brown ?domain ?sentid brown =
    handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown ?domain ?sentid brown) ()
bguillaum's avatar
bguillaum committed
196

bguillaum's avatar
bguillaum committed
197
198
  let to_dot ?domain ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dot" (fun () -> Grew_graph.G_graph.to_dot ?domain ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
199

bguillaum's avatar
bguillaum committed
200
201
  let to_dep ?domain ?filter ?main_feat ?(deco=Grew_graph.G_deco.empty) graph =
    handle ~name:"Graph.to_dep" (fun () -> Grew_graph.G_graph.to_dep ?domain ?filter ?main_feat ~deco graph) ()
bguillaum's avatar
bguillaum committed
202

bguillaum's avatar
bguillaum committed
203
204
  let to_gr ?domain graph =
    handle ~name:"Graph.to_gr" (fun () -> Grew_graph.G_graph.to_gr ?domain graph) ()
bguillaum's avatar
bguillaum committed
205

bguillaum's avatar
bguillaum committed
206
207
  let to_conll_string ?domain graph =
    handle ~name:"Graph.to_conll_string" (fun () -> Grew_graph.G_graph.to_conll_string ?domain graph) ()
bguillaum's avatar
bguillaum committed
208

bguillaum's avatar
bguillaum committed
209
210
211
212
213
  let to_sentence ?main_feat gr =
    handle ~name:"Graph.to_sentence"
      (fun () ->
        Grew_graph.G_graph.to_sentence ?main_feat gr
      ) ()
bguillaum's avatar
bguillaum committed
214

bguillaum's avatar
bguillaum committed
215
  let save_conll ?domain filename graph =
bguillaum's avatar
bguillaum committed
216
217
    handle ~name:"Graph.save_conll" (fun () ->
      let out_ch = open_out filename in
bguillaum's avatar
bguillaum committed
218
      fprintf out_ch "%s" (Grew_graph.G_graph.to_conll_string ?domain graph);
bguillaum's avatar
bguillaum committed
219
      close_out out_ch
bguillaum's avatar
bguillaum committed
220
221
    ) ()

222
223
224
225
  let search_pattern ?domain pattern graph =
    handle ~name:"Graph.search_pattern" (fun () ->
      Grew_rule.Rule.match_in_graph ?domain pattern graph
    ) ()
226

227
228
229
230
  let node_matching pattern graph matching =
    handle ~name:"Graph.node_matching" (fun () ->
      Grew_rule.Rule.node_matching pattern graph matching
    ) ()
bguillaum's avatar
bguillaum committed
231
end
232

bguillaum's avatar
bguillaum committed
233
(* ==================================================================================================== *)
234
(** {2 Graph Rewriting System} *)
bguillaum's avatar
bguillaum committed
235
236
237
(* ==================================================================================================== *)
module Grs = struct
  type t = Grew_grs.Grs.t
238

bguillaum's avatar
bguillaum committed
239
  let empty = Grew_grs.Grs.empty
240

bguillaum's avatar
bguillaum committed
241
242
243
244
  let load file =
    handle ~name:"Grs.load" ~file
      (fun () ->
        if not (Sys.file_exists file)
245
        then raise (Error ("File_not_found: " ^ file))
bguillaum's avatar
bguillaum committed
246
247
        else Grew_grs.Grs.build file
      ) ()
248

bguillaum's avatar
bguillaum committed
249
250
251
252
253
  let get_sequence_names grs =
    handle ~name:"Grs.get_sequence_names"
      (fun () ->
        Grew_grs.Grs.sequence_names grs
      ) ()
254

bguillaum's avatar
bguillaum committed
255
256
257
258
259
  let build_html_doc ?(corpus=false) dir grs =
    handle ~name:"Grs.build_doc [with Dep2pict]"
      (fun () ->
        Grew_html.Html_doc.build ~corpus ~dep:true dir grs;

Bruno Guillaume's avatar
Bruno Guillaume committed
260
        (* draw pattern graphs for all rules *)
bguillaum's avatar
bguillaum committed
261
        let fct module_ rule_ =
bguillaum's avatar
bguillaum committed
262
          let dep_code = Grew_rule.Rule.to_dep ?domain:(Grew_grs.Grs.get_domain grs) rule_ in
bguillaum's avatar
bguillaum committed
263
264
265
          let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Grew_rule.Rule.get_name rule_) in
          let d2p = Dep2pict.Dep2pict.from_dep ~dep:dep_code in
          Dep2pict.Dep2pict.save_png ~filename:dep_png_file d2p in
Bruno Guillaume's avatar
Bruno Guillaume committed
266
        Grew_grs.Grs.rule_iter fct grs
bguillaum's avatar
bguillaum committed
267
268
269
      ) ()

  let get_domain grs = Grew_grs.Grs.get_domain grs
Bruno Guillaume's avatar
Bruno Guillaume committed
270
271
272
273

  let to_json t =
    let json = Grew_grs.Grs.to_json t in
    Yojson.Basic.pretty_to_string json
bguillaum's avatar
bguillaum committed
274
275
end

276
277
278
279
280
281
282
283
284
285
286
(* ==================================================================================================== *)
(** {2 New Graph Rewriting System} *)
(* ==================================================================================================== *)
module New_grs = struct
  type t = Grew_grs.New_grs.t

  let load file =
    handle ~name:"New_grs.load" ~file
      (fun () ->
        Grew_grs.New_grs.load file
      ) ()
287
288
289
290
291
292
293
294
295
296
297
298

  let dump grs =
    handle ~name:"New_grs.dump"
      (fun () ->
        Grew_grs.New_grs.dump grs
      ) ()

  let domain grs =
    handle ~name:"New_grs.domain"
      (fun () ->
        Grew_grs.New_grs.domain grs
      ) ()
299
300
end

bguillaum's avatar
bguillaum committed
301
302
303
304
305
306
307
(* ==================================================================================================== *)
(** {2 Rewrite} *)
(* ==================================================================================================== *)
module Rewrite = struct
  type display = Libgrew_types.rew_display
  type history = Grew_grs.Rewrite_history.t

308
309
310
  let set_max_depth_det value = Grew_rule.Rule.set_max_depth_det value
  let set_max_depth_non_det value = Grew_rule.Rule.set_max_depth_non_det value

bguillaum's avatar
bguillaum committed
311
312
  let set_debug_loop () = Grew_rule.Rule.set_debug_loop ()

bguillaum's avatar
bguillaum committed
313
314
315
316
317
318
319
320
  let display ~gr ~grs ~seq =
    handle ~name:"Rewrite.display" (fun () -> Grew_grs.Grs.build_rew_display grs seq gr) ()

  let set_timeout t = Grew_base.Timeout.timeout := t

  let rewrite ~gr ~grs ~seq =
    handle ~name:"Rewrite.rewrite" (fun () -> Grew_grs.Grs.rewrite grs seq gr) ()

321
322
323
  let simple_rewrite ~gr ~grs ~strat =
    handle ~name:"Rewrite.simple_rewrite" (fun () -> Grew_grs.Grs.simple_rewrite grs strat gr) ()

bguillaum's avatar
bguillaum committed
324
325
326
327
328
329
330
331
332
  let is_empty rh =
    handle ~name:"Rewrite.is_empty" (fun () -> Grew_grs.Rewrite_history.is_empty rh) ()

  let num_sol rh =
    handle ~name:"Rewrite.num_sol" (fun () -> Grew_grs.Rewrite_history.num_sol rh) ()

  let write_stat filename rew_hist =
    handle ~name:"Rewrite.write_stat" (fun () -> Grew_html.Gr_stat.save filename (Grew_html.Gr_stat.from_rew_history rew_hist)) ()

bguillaum's avatar
bguillaum committed
333
334
  let write_annot ?domain ~title static_dir annot_dir base_name_rew_hist_list =
    handle ~name:"Rewrite.write_annot" (fun () -> Grew_html.Html_annot.build ?domain ~title static_dir annot_dir base_name_rew_hist_list) ()
bguillaum's avatar
bguillaum committed
335
336
337
338

  let save_index ~dirname ~base_names =
    handle ~name:"Rewrite.save_index" (fun () ->
      let out_ch = open_out (Filename.concat dirname "index") in
339
      Array.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
bguillaum's avatar
bguillaum committed
340
      close_out out_ch
341
342
    ) ()

bguillaum's avatar
bguillaum committed
343
344
  let save_gr ?domain base rew_hist =
    handle ~name:"Rewrite.save_gr" (fun () -> Grew_grs.Rewrite_history.save_gr ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
345

bguillaum's avatar
bguillaum committed
346
347
  let save_conll ?domain base rew_hist =
    handle ~name:"Rewrite.save_conll" (fun () -> Grew_grs.Rewrite_history.save_conll ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
348

bguillaum's avatar
bguillaum committed
349
350
  let save_full_conll ?domain base rew_hist =
    handle ~name:"Rewrite.save_full_conll" (fun () -> Grew_grs.Rewrite_history.save_full_conll ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
351

bguillaum's avatar
bguillaum committed
352
353
  let save_det_gr ?domain base rew_hist =
    handle ~name:"Rewrite.save_det_gr" (fun () -> Grew_grs.Rewrite_history.save_det_gr ?domain base rew_hist) ()
bguillaum's avatar
bguillaum committed
354

bguillaum's avatar
bguillaum committed
355
356
  let save_det_conll ?domain ?header base rew_hist =
    handle ~name:"Rewrite.save_det_conll" (fun () -> Grew_grs.Rewrite_history.save_det_conll ?domain ?header base rew_hist) ()
bguillaum's avatar
bguillaum committed
357

bguillaum's avatar
bguillaum committed
358
359
  let det_dep_string ?domain rew_hist =
    handle ~name:"Rewrite.det_dep_string" (fun () -> Grew_grs.Rewrite_history.det_dep_string ?domain rew_hist) ()
bguillaum's avatar
bguillaum committed
360

bguillaum's avatar
bguillaum committed
361
362
  let conll_dep_string ?domain ?keep_empty_rh rew_hist =
    handle ~name:"Rewrite.conll_dep_string" (fun () -> Grew_grs.Rewrite_history.conll_dep_string ?domain ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
363

bguillaum's avatar
bguillaum committed
364
  let write_html ?domain ?(no_init=false) ?(out_gr=false) ?filter ?main_feat ?dot ~header ?graph_file rew_hist output_base =
bguillaum's avatar
bguillaum committed
365
366
    handle ~name:"Rewrite.write_html" (fun () ->
      ignore (
bguillaum's avatar
bguillaum committed
367
        Grew_html.Html_rh.build ?domain ?filter ?main_feat ?dot ~out_gr ~init_graph: (not no_init) ~header ?graph_file output_base rew_hist
bguillaum's avatar
bguillaum committed
368
369
370
      )
    ) ()

bguillaum's avatar
bguillaum committed
371
  let error_html ?domain ?(no_init=false) ?main_feat ?dot ~header msg ?init output_base =
bguillaum's avatar
bguillaum committed
372
373
    handle ~name:"Rewrite.error_html" (fun () ->
      ignore (
bguillaum's avatar
bguillaum committed
374
        Grew_html.Html_rh.error ?domain ?main_feat ?dot ~init_graph: (not no_init) ~header output_base msg init
bguillaum's avatar
bguillaum committed
375
376
377
378
379
380
381
      )
    ) ()

  let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
    handle ~name:"Rewrite.make_index" (fun () ->
      let init = Grew_html.Corpus_stat.empty grs seq in
      let corpus_stat =
382
        Array.fold_left
bguillaum's avatar
bguillaum committed
383
384
385
386
387
          (fun acc base_name ->
            Grew_html.Corpus_stat.add_gr_stat base_name (Grew_html.Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
          ) init base_names in
      Grew_html.Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
    ) ()
bguillaum's avatar
bguillaum committed
388

bguillaum's avatar
bguillaum committed
389
390
  let html_sentences ~title = handle ~name:"Rewrite.html_sentences" (fun () -> Grew_html.Html_sentences.build ~title) ()
end