libgrew.ml 9.7 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

11
open Libgrew_types
pj2m's avatar
pj2m committed
12

bguillaum's avatar
bguillaum committed
13
open Printf
pj2m's avatar
pj2m committed
14 15
open Log

16 17 18 19
IFDEF DEP2PICT THEN
open Dep2pict
ENDIF

20
open Grew_fs
bguillaum's avatar
bguillaum committed
21
open Grew_base
22 23
open Grew_types

bguillaum's avatar
bguillaum committed
24 25 26 27
open Grew_graph
open Grew_rule
open Grew_grs

bguillaum's avatar
bguillaum committed
28
open Grew_loader
bguillaum's avatar
bguillaum committed
29
open Grew_html
pj2m's avatar
pj2m committed
30

bguillaum's avatar
bguillaum committed
31 32 33 34 35 36
let css_file = Filename.concat DATA_DIR "style.css"

let empty_grs = Grs.empty

let set_timeout t = Timeout.timeout := t

bguillaum's avatar
bguillaum committed
37 38
type loc = Loc.t
let string_of_loc = Loc.to_string
39
let line_of_loc = Loc.to_line
bguillaum's avatar
bguillaum committed
40

41 42
type graph = G_graph.t

pj2m's avatar
pj2m committed
43 44
exception File_dont_exists of string

bguillaum's avatar
bguillaum committed
45 46 47 48
exception Parsing_err of string * loc option
exception Build of string * loc option
exception Run of string * loc option
exception Bug of string * loc option
pj2m's avatar
pj2m committed
49

bguillaum's avatar
bguillaum committed
50 51
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
52
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
53 54 55 56
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
57 58

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
59
    | Loader.Error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
60 61 62
    | Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
63

bguillaum's avatar
bguillaum committed
64
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
65

bguillaum's avatar
bguillaum committed
66
let is_empty rh =
bguillaum's avatar
bguillaum committed
67
  handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()
bguillaum's avatar
bguillaum committed
68

bguillaum's avatar
bguillaum committed
69 70
let num_sol rh =
  handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()
pj2m's avatar
pj2m committed
71

bguillaum's avatar
bguillaum committed
72

73
IFDEF DEP2PICT THEN
74
let build_html_doc ?(corpus=false) dir grs =
bguillaum's avatar
bguillaum committed
75
  handle ~name:"build_doc [with Dep2pict]"
bguillaum's avatar
bguillaum committed
76
    (fun () ->
77
      Html_doc.build ~corpus ~dep:true dir grs;
bguillaum's avatar
bguillaum committed
78 79 80

      (* draw pattern graphs for all rules and all filters *)
      let fct module_ rule_ =
81
        let dep_code = Rule.to_dep (Grs.get_label_domain grs) rule_ in
bguillaum's avatar
bguillaum committed
82
        let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
83 84
        let d2p = Dep2pict.from_dep ~dep:dep_code in
        Dep2pict.save_png ~filename:dep_png_file d2p in
bguillaum's avatar
bguillaum committed
85 86 87
      Grs.rule_iter fct grs;
      Grs.filter_iter fct grs
    ) ()
88
ELSE
89
let build_html_doc ?(corpus=false) dir grs =
90
  handle ~name:"build_doc [without Dep2pict]" (fun () -> Html_doc.build ~corpus ~dep:false dir grs) ()
91 92
END

bguillaum's avatar
bguillaum committed
93
let load_grs file =
bguillaum's avatar
bguillaum committed
94 95 96 97
  handle ~name:"load_grs" ~file
    (fun () ->
      if not (Sys.file_exists file)
      then raise (File_dont_exists file)
bguillaum's avatar
bguillaum committed
98
      else Grs.build file
bguillaum's avatar
bguillaum committed
99
    ) ()
bguillaum's avatar
bguillaum committed
100

bguillaum's avatar
bguillaum committed
101
let to_sentence ?main_feat gr =
bguillaum's avatar
bguillaum committed
102 103
  handle ~name:"to_sentence"
    (fun () ->
104
      G_graph.to_sentence ?main_feat gr
bguillaum's avatar
bguillaum committed
105 106 107 108 109 110 111
    ) ()

let get_sequence_names grs =
  handle ~name:"get_sequence_names"
    (fun () ->
      Grs.sequence_names grs
    ) ()
bguillaum's avatar
bguillaum committed
112

113
let load_gr grs file =
bguillaum's avatar
bguillaum committed
114 115 116 117 118
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
    handle ~name:"load_gr" ~file
      (fun () ->
bguillaum's avatar
bguillaum committed
119
        let gr_ast = Loader.gr file in
120
        G_graph.build (Grs.get_domain grs) (Grs.get_label_domain grs) gr_ast
bguillaum's avatar
bguillaum committed
121
      ) ()
pj2m's avatar
pj2m committed
122

123
let load_conll grs file =
bguillaum's avatar
bguillaum committed
124 125
  handle ~name:"load_conll" ~file
    (fun () ->
126
      G_graph.of_conll ~loc:(Loc.file file) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.load file)
bguillaum's avatar
bguillaum committed
127 128
    ) ()

129
let of_conll grs file_name line_list =
bguillaum's avatar
bguillaum committed
130 131
  handle ~name:"of_conll"
    (fun () ->
132
      G_graph.of_conll ~loc:(Loc.file file_name) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.parse file_name line_list)
bguillaum's avatar
bguillaum committed
133 134
    ) ()

135
let of_brown grs ?sentid brown =
136 137
  handle ~name:"of_brown"
    (fun () ->
138
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) ?sentid brown
139 140
    ) ()

141
let load_brown grs file =
bguillaum's avatar
bguillaum committed
142 143 144
  handle ~name:"load_brown"
    (fun () ->
      let brown = File.load file in
145
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) brown
bguillaum's avatar
bguillaum committed
146 147
    ) ()

148
let load_graph grs file =
bguillaum's avatar
bguillaum committed
149 150
  handle ~name:"load_graph" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
151
      match File.get_suffix file with
152 153 154
      | Some ".gr" -> load_gr grs file
      | Some ".conll" -> load_conll grs file
      | Some ".br" | Some ".melt" -> load_brown grs file 
bguillaum's avatar
bguillaum committed
155
      | _ ->
bguillaum's avatar
bguillaum committed
156
          Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
bguillaum's avatar
bguillaum committed
157 158
          let rec loop = function
          | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
159
          | load_fct :: tail -> try load_fct grs file with _ -> loop tail in
bguillaum's avatar
bguillaum committed
160
          loop [load_gr; load_conll; load_brown]
bguillaum's avatar
bguillaum committed
161
    ) ()
162

163 164
let raw_graph grs gr =
  handle ~name:"raw_graph" (fun () -> G_graph.to_raw (Grs.get_label_domain grs) gr) ()
bguillaum's avatar
bguillaum committed
165

bguillaum's avatar
bguillaum committed
166 167
let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
168

bguillaum's avatar
bguillaum committed
169
let display ~gr ~grs ~seq =
bguillaum's avatar
bguillaum committed
170
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()
bguillaum's avatar
bguillaum committed
171

bguillaum's avatar
bguillaum committed
172 173
let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
bguillaum's avatar
bguillaum committed
174

175 176
let write_annot grs ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build (Grs.get_label_domain grs) ~title static_dir annot_dir base_name_rew_hist_list) ()
bguillaum's avatar
bguillaum committed
177

bguillaum's avatar
bguillaum committed
178
let save_index ~dirname ~base_names =
bguillaum's avatar
bguillaum committed
179 180 181 182 183
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()
bguillaum's avatar
bguillaum committed
184

185
let save_graph_conll grs filename graph =
bguillaum's avatar
bguillaum committed
186 187
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
188
    fprintf out_ch "%s" (G_graph.to_conll (Grs.get_label_domain grs) graph);
bguillaum's avatar
bguillaum committed
189 190
    close_out out_ch
  ) ()
bguillaum's avatar
bguillaum committed
191

192 193
let save_gr grs base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr (Grs.get_label_domain grs) base rew_hist) ()
194

195 196
let save_conll grs base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
197

198 199
let save_full_conll grs base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll (Grs.get_label_domain grs) base rew_hist) ()
200

201 202
let save_det_gr grs base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
203

204 205
let save_det_conll grs ?header base rew_hist =
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll (Grs.get_label_domain grs) ?header base rew_hist) ()
206

207 208
let det_dep_string grs rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string (Grs.get_label_domain grs) rew_hist) ()
bguillaum's avatar
bguillaum committed
209

210 211
let conll_dep_string grs ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string (Grs.get_label_domain grs) ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
212

213
let write_html grs
214 215
    ?(no_init=false)
    ?(out_gr=false)
216 217
    ?filter
    ?main_feat
218
    ?dot
bguillaum's avatar
bguillaum committed
219
    ~header
bguillaum's avatar
bguillaum committed
220
    ?graph_file
bguillaum's avatar
bguillaum committed
221 222
    rew_hist
    output_base =
bguillaum's avatar
bguillaum committed
223 224 225
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
226
        (Grs.get_label_domain grs)
bguillaum's avatar
bguillaum committed
227 228 229 230 231 232
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
bguillaum's avatar
bguillaum committed
233
        ?graph_file
bguillaum's avatar
bguillaum committed
234
        output_base rew_hist
bguillaum's avatar
bguillaum committed
235
    )
bguillaum's avatar
bguillaum committed
236
  ) ()
237

238
let error_html grs
bguillaum's avatar
bguillaum committed
239 240
    ?(no_init=false)
    ?main_feat
241
    ?dot
242
    ~header
bguillaum's avatar
bguillaum committed
243
    msg
244
    ?init
245
    output_base =
bguillaum's avatar
bguillaum committed
246 247 248
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
249
        (Grs.get_label_domain grs)
bguillaum's avatar
bguillaum committed
250 251 252 253 254
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
255
    )
bguillaum's avatar
bguillaum committed
256
  ) ()
257

258
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
bguillaum's avatar
bguillaum committed
259 260 261 262 263 264 265 266 267 268 269 270
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

271
let feature_names grs =  handle ~name:"feature_names" (fun () -> Domain.feature_names (Grs.get_domain grs)) ()
bguillaum's avatar
bguillaum committed
272

273 274
let to_dot_graph grs ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot (Grs.get_label_domain grs) ?main_feat graph ~deco) ()
bguillaum's avatar
bguillaum committed
275

276 277
let to_dep_graph grs ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep (Grs.get_label_domain grs) ?filter ?main_feat ~deco graph) ()
pj2m's avatar
pj2m committed
278

279 280
let to_gr_graph grs graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr (Grs.get_label_domain grs) graph) ()
281

282 283
let to_conll_graph grs graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll (Grs.get_label_domain grs) graph) ()
bguillaum's avatar
bguillaum committed
284

285 286 287
type pattern = Rule.pattern
type matching = Rule.matching

288
let load_pattern grs file =
289
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Grs.get_domain grs) (Grs.get_label_domain grs) (Loader.pattern file)) ()
290

291
let match_in_graph grs pattern graph = Rule.match_in_graph (Grs.get_label_domain grs) pattern graph
292

bguillaum's avatar
bguillaum committed
293
let match_deco pattern matching = Rule.match_deco pattern matching