libgrew.ml 9.22 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

11
open Libgrew_types
pj2m's avatar
pj2m committed
12

bguillaum's avatar
bguillaum committed
13
open Printf
pj2m's avatar
pj2m committed
14
15
open Log

16
open Grew_fs
bguillaum's avatar
bguillaum committed
17
open Grew_base
18
19
open Grew_types

bguillaum's avatar
bguillaum committed
20
21
22
23
open Grew_graph
open Grew_rule
open Grew_grs

pj2m's avatar
pj2m committed
24
open Grew_parser
bguillaum's avatar
bguillaum committed
25
open Grew_html
pj2m's avatar
pj2m committed
26
27


bguillaum's avatar
bguillaum committed
28
29
30
31
32
33
let css_file = Filename.concat DATA_DIR "style.css"

let empty_grs = Grs.empty

let set_timeout t = Timeout.timeout := t

bguillaum's avatar
bguillaum committed
34
35
type loc = Loc.t
let string_of_loc = Loc.to_string
36
let line_of_loc = Loc.to_line
bguillaum's avatar
bguillaum committed
37

pj2m's avatar
pj2m committed
38
39
exception File_dont_exists of string

bguillaum's avatar
bguillaum committed
40
41
42
43
exception Parsing_err of string * loc option
exception Build of string * loc option
exception Run of string * loc option
exception Bug of string * loc option
pj2m's avatar
pj2m committed
44

bguillaum's avatar
bguillaum committed
45
46
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
47
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
48
49
50
51
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
52
53

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
54
55
56
57
    | Grew_parser.Parse_error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
    | Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
58

bguillaum's avatar
bguillaum committed
59
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
60

bguillaum's avatar
bguillaum committed
61
let is_empty rh =
bguillaum's avatar
bguillaum committed
62
  handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()
bguillaum's avatar
bguillaum committed
63

bguillaum's avatar
bguillaum committed
64
65
let num_sol rh =
  handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()
pj2m's avatar
pj2m committed
66

bguillaum's avatar
bguillaum committed
67

68
IFDEF DEP2PICT THEN
69
let build_html_doc ?(corpus=false) dir grs =
bguillaum's avatar
bguillaum committed
70
  handle ~name:"build_doc [with Dep2pict]"
bguillaum's avatar
bguillaum committed
71
    (fun () ->
72
      Html_doc.build ~corpus ~dep:true dir grs;
bguillaum's avatar
bguillaum committed
73
74
75
76

      (* draw pattern graphs for all rules and all filters *)
      let fct module_ rule_ =
        let dep_code = Rule.to_dep rule_ in
bguillaum's avatar
bguillaum committed
77
78
        let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
        ignore (Dep2pict.Dep2pict.fromDepStringToPng dep_code dep_png_file) in
bguillaum's avatar
bguillaum committed
79
80
81
      Grs.rule_iter fct grs;
      Grs.filter_iter fct grs
    ) ()
82
ELSE
83
let build_html_doc ?(corpus=false) dir grs =
84
  handle ~name:"build_doc [without Dep2pict]" (fun () -> Html_doc.build ~corpus ~dep:false dir grs) ()
85
86
END

bguillaum's avatar
bguillaum committed
87
let load_grs file =
bguillaum's avatar
bguillaum committed
88
89
90
91
  handle ~name:"load_grs" ~file
    (fun () ->
      if not (Sys.file_exists file)
      then raise (File_dont_exists file)
bguillaum's avatar
bguillaum committed
92
      else Grs.build file
bguillaum's avatar
bguillaum committed
93
    ) ()
bguillaum's avatar
bguillaum committed
94

bguillaum's avatar
bguillaum committed
95
let to_sentence ?main_feat gr =
bguillaum's avatar
bguillaum committed
96
97
98
99
100
101
102
103
104
105
106
  handle ~name:"to_sentence"
    (fun () ->
      let graph = gr.Instance.graph in
      G_graph.to_sentence ?main_feat graph
    ) ()

let get_sequence_names grs =
  handle ~name:"get_sequence_names"
    (fun () ->
      Grs.sequence_names grs
    ) ()
bguillaum's avatar
bguillaum committed
107
108

let load_gr file =
bguillaum's avatar
bguillaum committed
109
110
111
112
113
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
    handle ~name:"load_gr" ~file
      (fun () ->
bguillaum's avatar
bguillaum committed
114
115
        let gr_ast = Grew_parser.gr_of_file file in
        Instance.from_graph (G_graph.build gr_ast)
bguillaum's avatar
bguillaum committed
116
      ) ()
pj2m's avatar
pj2m committed
117

bguillaum's avatar
bguillaum committed
118
let load_conll file =
bguillaum's avatar
bguillaum committed
119
120
  handle ~name:"load_conll" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
121
      let graph = G_graph.of_conll ~loc:(Loc.file file) (Conll.load file) in
bguillaum's avatar
bguillaum committed
122
123
124
      Instance.from_graph graph
    ) ()

bguillaum's avatar
bguillaum committed
125
126
127
let of_conll file_name line_list =
  handle ~name:"of_conll"
    (fun () ->
128
      let graph = G_graph.of_conll ~loc:(Loc.file file_name) (Conll.parse file_name line_list) in
bguillaum's avatar
bguillaum committed
129
130
131
      Instance.from_graph graph
    ) ()

132
let of_brown ?sentid brown =
133
134
  handle ~name:"of_brown"
    (fun () ->
135
      let graph = G_graph.of_brown ?sentid brown in
136
137
138
      Instance.from_graph graph
    ) ()

bguillaum's avatar
bguillaum committed
139
140
141
142
143
144
145
146
let load_brown file =
  handle ~name:"load_brown"
    (fun () ->
      let brown = File.load file in
      let graph = G_graph.of_brown brown in
      Instance.from_graph graph
    ) ()

bguillaum's avatar
bguillaum committed
147
148
149
let load_graph file =
  handle ~name:"load_graph" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
150
151
152
153
154
      match File.get_suffix file with
      | Some ".gr" -> load_gr file
      | Some ".conll" -> load_conll file
      | Some ".br" | Some ".melt" -> load_brown file 
      | _ ->
bguillaum's avatar
bguillaum committed
155
          Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
bguillaum's avatar
bguillaum committed
156
157
158
159
          let rec loop = function
          | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
          | load_fct :: tail -> try load_fct file with _ -> loop tail in
          loop [load_gr; load_conll; load_brown]
bguillaum's avatar
bguillaum committed
160
    ) ()
161

bguillaum's avatar
bguillaum committed
162
let xml_graph xml =
bguillaum's avatar
bguillaum committed
163
  handle ~name:"xml_graph" (fun () -> Instance.from_graph (G_graph.of_xml xml)) ()
bguillaum's avatar
bguillaum committed
164

bguillaum's avatar
bguillaum committed
165
let raw_graph instance =
bguillaum's avatar
bguillaum committed
166
  handle ~name:"raw_graph" (fun () -> G_graph.to_raw instance.Instance.graph) ()
bguillaum's avatar
bguillaum committed
167

bguillaum's avatar
bguillaum committed
168
169
let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
170

bguillaum's avatar
bguillaum committed
171
let display ~gr ~grs ~seq =
bguillaum's avatar
bguillaum committed
172
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()
bguillaum's avatar
bguillaum committed
173

bguillaum's avatar
bguillaum committed
174
175
let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
bguillaum's avatar
bguillaum committed
176

bguillaum's avatar
bguillaum committed
177
178
let write_annot ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build ~title static_dir annot_dir base_name_rew_hist_list) ()
bguillaum's avatar
bguillaum committed
179

bguillaum's avatar
bguillaum committed
180
let save_index ~dirname ~base_names =
bguillaum's avatar
bguillaum committed
181
182
183
184
185
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()
bguillaum's avatar
bguillaum committed
186

bguillaum's avatar
bguillaum committed
187
let save_graph_conll filename graph =
bguillaum's avatar
bguillaum committed
188
189
190
191
192
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
    fprintf out_ch "%s" (Instance.to_conll graph);
    close_out out_ch
  ) ()
bguillaum's avatar
bguillaum committed
193

bguillaum's avatar
bguillaum committed
194
195
let save_gr base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr base rew_hist) ()
196

bguillaum's avatar
bguillaum committed
197
198
let save_conll base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll base rew_hist) ()
bguillaum's avatar
bguillaum committed
199

200
201
202
let save_full_conll base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll base rew_hist) ()

bguillaum's avatar
bguillaum committed
203
204
let save_det_gr base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr base rew_hist) ()
bguillaum's avatar
bguillaum committed
205

bguillaum's avatar
bguillaum committed
206
let save_det_conll ?header base rew_hist =
207
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll ?header base rew_hist) ()
208

bguillaum's avatar
bguillaum committed
209
210
let det_dep_string rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) ()
bguillaum's avatar
bguillaum committed
211

bguillaum's avatar
bguillaum committed
212
213
214
let conll_dep_string ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string ?keep_empty_rh rew_hist) ()

bguillaum's avatar
bguillaum committed
215
let write_html
216
217
    ?(no_init=false)
    ?(out_gr=false)
218
219
    ?filter
    ?main_feat
220
    ?dot
bguillaum's avatar
bguillaum committed
221
    ~header
bguillaum's avatar
bguillaum committed
222
    ?graph_file
bguillaum's avatar
bguillaum committed
223
224
    rew_hist
    output_base =
bguillaum's avatar
bguillaum committed
225
226
227
228
229
230
231
232
233
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
bguillaum's avatar
bguillaum committed
234
        ?graph_file
bguillaum's avatar
bguillaum committed
235
        output_base rew_hist
bguillaum's avatar
bguillaum committed
236
    )
bguillaum's avatar
bguillaum committed
237
  ) ()
238

bguillaum's avatar
bguillaum committed
239
240
241
let error_html
    ?(no_init=false)
    ?main_feat
242
    ?dot
243
    ~header
bguillaum's avatar
bguillaum committed
244
    msg
245
    ?init
246
    output_base =
bguillaum's avatar
bguillaum committed
247
248
249
250
251
252
253
254
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
255
    )
bguillaum's avatar
bguillaum committed
256
  ) ()
257

258
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
bguillaum's avatar
bguillaum committed
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

let graph_of_instance instance = handle ~name:"graph_of_instance" (fun () -> instance.Instance.graph) ()

let feature_names () =  handle ~name:"feature_names" (fun () -> Domain.feature_names ()) ()
bguillaum's avatar
bguillaum committed
274

bguillaum's avatar
bguillaum committed
275
276
let to_dot_graph ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
277

bguillaum's avatar
bguillaum committed
278
279
let to_dep_graph ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep ?filter ?main_feat ~deco graph) ()
pj2m's avatar
pj2m committed
280

bguillaum's avatar
bguillaum committed
281
282
let to_gr_graph graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr graph) ()
283

bguillaum's avatar
bguillaum committed
284
285
let to_conll_graph graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll graph) ()
bguillaum's avatar
bguillaum committed
286

287
288
289
type pattern = Rule.pattern
type matching = Rule.matching

bguillaum's avatar
bguillaum committed
290
let load_pattern file =
291
292
293
294
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Grew_parser.load_pattern file)) ()

let match_in_graph pattern graph = Rule.match_in_graph pattern graph

bguillaum's avatar
bguillaum committed
295
let match_deco pattern matching = Rule.match_deco pattern matching