libgrew.ml 9.7 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

11
open Libgrew_types
pj2m's avatar
pj2m committed
12

bguillaum's avatar
bguillaum committed
13
open Printf
pj2m's avatar
pj2m committed
14 15
open Log

16 17 18 19
IFDEF DEP2PICT THEN
open Dep2pict
ENDIF

20
open Grew_fs
21
open Grew_base
22 23
open Grew_types

bguillaum's avatar
bguillaum committed
24 25 26 27
open Grew_graph
open Grew_rule
open Grew_grs

bguillaum's avatar
bguillaum committed
28
open Grew_loader
bguillaum's avatar
bguillaum committed
29
open Grew_html
pj2m's avatar
pj2m committed
30

31 32 33 34 35 36
let css_file = Filename.concat DATA_DIR "style.css"

let empty_grs = Grs.empty

let set_timeout t = Timeout.timeout := t

bguillaum's avatar
bguillaum committed
37 38
type loc = Loc.t
let string_of_loc = Loc.to_string
39
let line_of_loc = Loc.to_line
bguillaum's avatar
bguillaum committed
40

41 42
type graph = G_graph.t

pj2m's avatar
pj2m committed
43 44
exception File_dont_exists of string

bguillaum's avatar
bguillaum committed
45 46 47 48
exception Parsing_err of string * loc option
exception Build of string * loc option
exception Run of string * loc option
exception Bug of string * loc option
pj2m's avatar
pj2m committed
49

50 51
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
52
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
53 54 55 56
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
57 58

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
59
    | Loader.Error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
60 61 62
    | Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
63

64
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
65

66
let is_empty rh =
bguillaum's avatar
bguillaum committed
67
  handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()
bguillaum's avatar
bguillaum committed
68

69 70
let num_sol rh =
  handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()
pj2m's avatar
pj2m committed
71

bguillaum's avatar
bguillaum committed
72

73
IFDEF DEP2PICT THEN
74
let build_html_doc ?(corpus=false) dir grs =
75
  handle ~name:"build_doc [with Dep2pict]"
76
    (fun () ->
77
      Html_doc.build ~corpus ~dep:true dir grs;
78 79 80

      (* draw pattern graphs for all rules and all filters *)
      let fct module_ rule_ =
81
        let dep_code = Rule.to_dep (Grs.get_label_domain grs) rule_ in
82
        let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
83 84
        let d2p = Dep2pict.from_dep ~dep:dep_code in
        Dep2pict.save_png ~filename:dep_png_file d2p in
85 86 87
      Grs.rule_iter fct grs;
      Grs.filter_iter fct grs
    ) ()
88
ELSE
89
let build_html_doc ?(corpus=false) dir grs =
90
  handle ~name:"build_doc [without Dep2pict]" (fun () -> Html_doc.build ~corpus ~dep:false dir grs) ()
91 92
END

93
let load_grs file =
94 95 96 97
  handle ~name:"load_grs" ~file
    (fun () ->
      if not (Sys.file_exists file)
      then raise (File_dont_exists file)
98
      else Grs.build file
99
    ) ()
bguillaum's avatar
bguillaum committed
100

bguillaum's avatar
bguillaum committed
101
let to_sentence ?main_feat gr =
102 103
  handle ~name:"to_sentence"
    (fun () ->
104
      G_graph.to_sentence ?main_feat gr
105 106 107 108 109 110 111
    ) ()

let get_sequence_names grs =
  handle ~name:"get_sequence_names"
    (fun () ->
      Grs.sequence_names grs
    ) ()
bguillaum's avatar
bguillaum committed
112

113
let load_gr grs file =
114 115 116 117 118
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
    handle ~name:"load_gr" ~file
      (fun () ->
bguillaum's avatar
bguillaum committed
119
        let gr_ast = Loader.gr file in
120
        G_graph.build (Grs.get_domain grs) (Grs.get_label_domain grs) gr_ast
121
      ) ()
pj2m's avatar
pj2m committed
122

123
let load_conll grs file =
124 125
  handle ~name:"load_conll" ~file
    (fun () ->
126
      G_graph.of_conll ~loc:(Loc.file file) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.load file)
127 128
    ) ()

129
let of_conll grs file_name line_list =
bguillaum's avatar
bguillaum committed
130 131
  handle ~name:"of_conll"
    (fun () ->
132
      G_graph.of_conll ~loc:(Loc.file file_name) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.parse file_name line_list)
bguillaum's avatar
bguillaum committed
133 134
    ) ()

135
let of_brown grs ?sentid brown =
136 137
  handle ~name:"of_brown"
    (fun () ->
138
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) ?sentid brown
139 140
    ) ()

141
let load_brown grs file =
bguillaum's avatar
bguillaum committed
142 143 144
  handle ~name:"load_brown"
    (fun () ->
      let brown = File.load file in
145
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) brown
bguillaum's avatar
bguillaum committed
146 147
    ) ()

148
let load_graph grs file =
149 150
  handle ~name:"load_graph" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
151
      match File.get_suffix file with
152 153 154
      | Some ".gr" -> load_gr grs file
      | Some ".conll" -> load_conll grs file
      | Some ".br" | Some ".melt" -> load_brown grs file 
bguillaum's avatar
bguillaum committed
155
      | _ ->
156
          Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
bguillaum's avatar
bguillaum committed
157 158
          let rec loop = function
          | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
159
          | load_fct :: tail -> try load_fct grs file with _ -> loop tail in
bguillaum's avatar
bguillaum committed
160
          loop [load_gr; load_conll; load_brown]
161
    ) ()
162

163 164
let raw_graph grs gr =
  handle ~name:"raw_graph" (fun () -> G_graph.to_raw (Grs.get_label_domain grs) gr) ()
bguillaum's avatar
bguillaum committed
165

166 167
let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
168

bguillaum's avatar
bguillaum committed
169
let display ~gr ~grs ~seq =
170
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()
bguillaum's avatar
bguillaum committed
171

172 173
let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
bguillaum's avatar
bguillaum committed
174

175 176
let write_annot grs ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build (Grs.get_label_domain grs) ~title static_dir annot_dir base_name_rew_hist_list) ()
177

bguillaum's avatar
bguillaum committed
178
let save_index ~dirname ~base_names =
179 180 181 182 183
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()
184

185
let save_graph_conll grs filename graph =
186 187
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
188
    fprintf out_ch "%s" (G_graph.to_conll (Grs.get_label_domain grs) graph);
189 190
    close_out out_ch
  ) ()
bguillaum's avatar
bguillaum committed
191

192 193
let save_gr grs base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr (Grs.get_label_domain grs) base rew_hist) ()
194

195 196
let save_conll grs base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
197

198 199
let save_full_conll grs base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll (Grs.get_label_domain grs) base rew_hist) ()
200

201 202
let save_det_gr grs base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
203

204 205
let save_det_conll grs ?header base rew_hist =
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll (Grs.get_label_domain grs) ?header base rew_hist) ()
206

207 208
let det_dep_string grs rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string (Grs.get_label_domain grs) rew_hist) ()
bguillaum's avatar
bguillaum committed
209

210 211
let conll_dep_string grs ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string (Grs.get_label_domain grs) ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
212

213
let write_html grs
214 215
    ?(no_init=false)
    ?(out_gr=false)
216 217
    ?filter
    ?main_feat
218
    ?dot
bguillaum's avatar
bguillaum committed
219
    ~header
bguillaum's avatar
bguillaum committed
220
    ?graph_file
bguillaum's avatar
bguillaum committed
221 222
    rew_hist
    output_base =
223 224 225
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
226
        (Grs.get_label_domain grs)
227 228 229 230 231 232
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
bguillaum's avatar
bguillaum committed
233
        ?graph_file
234
        output_base rew_hist
bguillaum's avatar
bguillaum committed
235
    )
236
  ) ()
237

238
let error_html grs
bguillaum's avatar
bguillaum committed
239 240
    ?(no_init=false)
    ?main_feat
241
    ?dot
242
    ~header
bguillaum's avatar
bguillaum committed
243
    msg
244
    ?init
245
    output_base =
246 247 248
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
249
        (Grs.get_label_domain grs)
250 251 252 253 254
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
255
    )
256
  ) ()
257

258
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
259 260 261 262 263 264 265 266 267 268 269 270
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

271
let feature_names grs =  handle ~name:"feature_names" (fun () -> Domain.feature_names (Grs.get_domain grs)) ()
bguillaum's avatar
bguillaum committed
272

273 274
let to_dot_graph grs ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot (Grs.get_label_domain grs) ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
275

276 277
let to_dep_graph grs ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep (Grs.get_label_domain grs) ?filter ?main_feat ~deco graph) ()
pj2m's avatar
pj2m committed
278

279 280
let to_gr_graph grs graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr (Grs.get_label_domain grs) graph) ()
281

282 283
let to_conll_graph grs graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll (Grs.get_label_domain grs) graph) ()
bguillaum's avatar
bguillaum committed
284

285 286 287
type pattern = Rule.pattern
type matching = Rule.matching

288
let load_pattern grs file =
289
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Grs.get_domain grs) (Grs.get_label_domain grs) (Loader.pattern file)) ()
290

291
let match_in_graph grs pattern graph = Rule.match_in_graph (Grs.get_label_domain grs) pattern graph
292

bguillaum's avatar
bguillaum committed
293
let match_deco pattern matching = Rule.match_deco pattern matching