libgrew.ml 10.5 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

11
open Libgrew_types
pj2m's avatar
pj2m committed
12

bguillaum's avatar
bguillaum committed
13
open Printf
pj2m's avatar
pj2m committed
14 15
open Log

16 17 18 19
IFDEF DEP2PICT THEN
open Dep2pict
ENDIF

20
open Grew_fs
21
open Grew_base
22
open Grew_types
bguillaum's avatar
bguillaum committed
23 24 25
open Grew_graph
open Grew_rule
open Grew_grs
bguillaum's avatar
bguillaum committed
26
open Grew_loader
bguillaum's avatar
bguillaum committed
27
open Grew_html
pj2m's avatar
pj2m committed
28

bguillaum's avatar
bguillaum committed
29 30
(* -------------------------------------------------------------------------------- *)
(** {2 Location} *)
bguillaum's avatar
bguillaum committed
31 32
type loc = Loc.t
let string_of_loc = Loc.to_string
33
let line_of_loc = Loc.to_line
bguillaum's avatar
bguillaum committed
34

bguillaum's avatar
bguillaum committed
35 36
(* -------------------------------------------------------------------------------- *)
(** {2 Exceptions} *)
pj2m's avatar
pj2m committed
37
exception File_dont_exists of string
bguillaum's avatar
bguillaum committed
38 39 40 41
exception Parsing_err of string * loc option
exception Build of string * loc option
exception Run of string * loc option
exception Bug of string * loc option
pj2m's avatar
pj2m committed
42

43 44
let handle ?(name="") ?(file="No file defined") fct () =
  try fct () with
45
    (* Raise again already catched exceptions *)
bguillaum's avatar
bguillaum committed
46 47 48 49
    | Parsing_err (msg,loc_opt) -> raise (Parsing_err (msg,loc_opt))
    | Build (msg,loc_opt) -> raise (Build (msg,loc_opt))
    | Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
50 51

    (* Catch new exceptions *)
bguillaum's avatar
bguillaum committed
52
    | Loader.Error (msg, loc_opt) -> raise (Parsing_err (msg, loc_opt))
bguillaum's avatar
bguillaum committed
53 54 55
    | Error.Build (msg, loc_opt) -> raise (Build (msg, loc_opt))
    | Error.Bug (msg, loc_opt) -> raise (Bug (msg,loc_opt))
    | Error.Run (msg, loc_opt) -> raise (Run (msg,loc_opt))
56

57
    | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
pj2m's avatar
pj2m committed
58

bguillaum's avatar
bguillaum committed
59

pj2m's avatar
pj2m committed
60

bguillaum's avatar
bguillaum committed
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
(* -------------------------------------------------------------------------------- *)
(** {2 Graph Rewriting System} *)
type grs = Grs.t

let empty_grs = Grs.empty

let load_grs file =
  handle ~name:"load_grs" ~file
    (fun () ->
      if not (Sys.file_exists file)
      then raise (File_dont_exists file)
      else Grs.build file
    ) ()

let get_sequence_names grs =
  handle ~name:"get_sequence_names"
    (fun () ->
      Grs.sequence_names grs
    ) ()
bguillaum's avatar
bguillaum committed
80

81
IFDEF DEP2PICT THEN
82
let build_html_doc ?(corpus=false) dir grs =
83
  handle ~name:"build_doc [with Dep2pict]"
84
    (fun () ->
85
      Html_doc.build ~corpus ~dep:true dir grs;
86 87 88

      (* draw pattern graphs for all rules and all filters *)
      let fct module_ rule_ =
89
        let dep_code = Rule.to_dep (Grs.get_label_domain grs) rule_ in
90
        let dep_png_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
91 92
        let d2p = Dep2pict.from_dep ~dep:dep_code in
        Dep2pict.save_png ~filename:dep_png_file d2p in
93 94 95
      Grs.rule_iter fct grs;
      Grs.filter_iter fct grs
    ) ()
96
ELSE
97
let build_html_doc ?(corpus=false) dir grs =
98
  handle ~name:"build_doc [without Dep2pict]" (fun () -> Html_doc.build ~corpus ~dep:false dir grs) ()
99 100
END

bguillaum's avatar
bguillaum committed
101
let feature_names grs =  handle ~name:"feature_names" (fun () ->  Feature_domain.feature_names (Grs.get_domain grs)) ()
102

bguillaum's avatar
bguillaum committed
103 104 105
(* -------------------------------------------------------------------------------- *)
(** {2 Graph} *)
type graph = G_graph.t
bguillaum's avatar
bguillaum committed
106

107
let load_gr grs file =
108 109 110 111 112
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
    handle ~name:"load_gr" ~file
      (fun () ->
bguillaum's avatar
bguillaum committed
113
        let gr_ast = Loader.gr file in
114
        G_graph.build (Grs.get_domain grs) (Grs.get_label_domain grs) gr_ast
115
      ) ()
pj2m's avatar
pj2m committed
116

117
let load_conll grs file =
118 119
  handle ~name:"load_conll" ~file
    (fun () ->
120
      G_graph.of_conll ~loc:(Loc.file file) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.load file)
121 122
    ) ()

123
let load_brown grs file =
bguillaum's avatar
bguillaum committed
124 125 126
  handle ~name:"load_brown"
    (fun () ->
      let brown = File.load file in
127
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) brown
bguillaum's avatar
bguillaum committed
128 129
    ) ()

130
let load_graph grs file =
131 132
  handle ~name:"load_graph" ~file
    (fun () ->
bguillaum's avatar
bguillaum committed
133
      match File.get_suffix file with
134 135 136
      | Some ".gr" -> load_gr grs file
      | Some ".conll" -> load_conll grs file
      | Some ".br" | Some ".melt" -> load_brown grs file 
bguillaum's avatar
bguillaum committed
137
      | _ ->
138
          Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
bguillaum's avatar
bguillaum committed
139 140
          let rec loop = function
          | [] -> Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
141
          | load_fct :: tail -> try load_fct grs file with _ -> loop tail in
bguillaum's avatar
bguillaum committed
142
          loop [load_gr; load_conll; load_brown]
143
    ) ()
144

bguillaum's avatar
bguillaum committed
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
let of_conll grs file_name line_list =
  handle ~name:"of_conll"
    (fun () ->
      G_graph.of_conll ~loc:(Loc.file file_name) (Grs.get_domain grs) (Grs.get_label_domain grs) (Conll.parse file_name line_list)
    ) ()

let of_brown grs ?sentid brown =
  handle ~name:"of_brown"
    (fun () ->
      G_graph.of_brown (Grs.get_domain grs) (Grs.get_label_domain grs) ?sentid brown
    ) ()

let to_dot_graph grs ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot (Grs.get_label_domain grs) ?main_feat graph ~deco) ()

let to_dep_graph grs ?filter ?main_feat ?(deco=G_deco.empty) graph =
  handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep (Grs.get_label_domain grs) ?filter ?main_feat ~deco graph) ()

let to_gr_graph grs graph =
  handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr (Grs.get_label_domain grs) graph) ()

let to_conll_graph grs graph =
  handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll (Grs.get_label_domain grs) graph) ()

let to_sentence ?main_feat gr =
  handle ~name:"to_sentence"
    (fun () ->
      G_graph.to_sentence ?main_feat gr
    ) ()

let save_graph_conll grs filename graph =
  handle ~name:"save_graph_conll" (fun () ->
    let out_ch = open_out filename in
    fprintf out_ch "%s" (G_graph.to_conll (Grs.get_label_domain grs) graph);
    close_out out_ch
  ) ()

182 183
let raw_graph grs gr =
  handle ~name:"raw_graph" (fun () -> G_graph.to_raw (Grs.get_label_domain grs) gr) ()
bguillaum's avatar
bguillaum committed
184

bguillaum's avatar
bguillaum committed
185 186 187 188 189 190 191 192 193 194 195
(* -------------------------------------------------------------------------------- *)
(** {2 rew_display: data for the GUI } *)
let display ~gr ~grs ~seq =
  handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()

(* -------------------------------------------------------------------------------- *)
(** {2 Rewrite} *)
type rewrite_history = Rewrite_history.t

let set_timeout t = Timeout.timeout := t

196 197
let rewrite ~gr ~grs ~seq =
  handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
198

bguillaum's avatar
bguillaum committed
199 200 201 202 203 204 205 206 207 208 209 210
let is_empty rh =
  handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()

let num_sol rh =
  handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()







bguillaum's avatar
bguillaum committed
211

212 213
let write_stat filename rew_hist =
  handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
bguillaum's avatar
bguillaum committed
214

215 216
let write_annot grs ~title static_dir annot_dir base_name_rew_hist_list =
  handle ~name:"write_annot" (fun () -> Html_annot.build (Grs.get_label_domain grs) ~title static_dir annot_dir base_name_rew_hist_list) ()
217

bguillaum's avatar
bguillaum committed
218
let save_index ~dirname ~base_names =
219 220 221 222 223
  handle ~name:"save_index" (fun () ->
    let out_ch = open_out (Filename.concat dirname "index") in
    List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
    close_out out_ch
  ) ()
224

bguillaum's avatar
bguillaum committed
225

226 227
let save_gr grs base rew_hist =
  handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr (Grs.get_label_domain grs) base rew_hist) ()
228

229 230
let save_conll grs base rew_hist =
  handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
231

232 233
let save_full_conll grs base rew_hist =
  handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll (Grs.get_label_domain grs) base rew_hist) ()
234

235 236
let save_det_gr grs base rew_hist =
  handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr (Grs.get_label_domain grs) base rew_hist) ()
bguillaum's avatar
bguillaum committed
237

238 239
let save_det_conll grs ?header base rew_hist =
  handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll (Grs.get_label_domain grs) ?header base rew_hist) ()
240

241 242
let det_dep_string grs rew_hist =
  handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string (Grs.get_label_domain grs) rew_hist) ()
bguillaum's avatar
bguillaum committed
243

244 245
let conll_dep_string grs ?keep_empty_rh rew_hist =
  handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string (Grs.get_label_domain grs) ?keep_empty_rh rew_hist) ()
bguillaum's avatar
bguillaum committed
246

247
let write_html grs
248 249
    ?(no_init=false)
    ?(out_gr=false)
250 251
    ?filter
    ?main_feat
252
    ?dot
bguillaum's avatar
bguillaum committed
253
    ~header
bguillaum's avatar
bguillaum committed
254
    ?graph_file
bguillaum's avatar
bguillaum committed
255 256
    rew_hist
    output_base =
257 258 259
  handle ~name:"write_html" (fun () ->
    ignore (
      Html_rh.build
260
        (Grs.get_label_domain grs)
261 262 263 264 265 266
        ?filter
        ?main_feat
        ?dot
        ~out_gr
        ~init_graph: (not no_init)
        ~header
bguillaum's avatar
bguillaum committed
267
        ?graph_file
268
        output_base rew_hist
bguillaum's avatar
bguillaum committed
269
    )
270
  ) ()
271

272
let error_html grs
bguillaum's avatar
bguillaum committed
273 274
    ?(no_init=false)
    ?main_feat
275
    ?dot
276
    ~header
bguillaum's avatar
bguillaum committed
277
    msg
278
    ?init
279
    output_base =
280 281 282
  handle ~name:"error_html" (fun () ->
    ignore (
      Html_rh.error
283
        (Grs.get_label_domain grs)
284 285 286 287 288
        ?main_feat
        ?dot
        ~init_graph: (not no_init)
        ~header
        output_base msg init
289
    )
290
  ) ()
291

292
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
293 294 295 296 297 298 299 300 301 302 303 304
  handle ~name:"make_index" (fun () ->
    let init = Corpus_stat.empty grs seq in
    let corpus_stat =
      List.fold_left
        (fun acc base_name ->
          Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
        ) init base_names in
    Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
  ) ()

let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()

bguillaum's avatar
bguillaum committed
305

bguillaum's avatar
bguillaum committed
306

bguillaum's avatar
bguillaum committed
307 308
(* -------------------------------------------------------------------------------- *)
(** {2 Patterns} *)
bguillaum's avatar
bguillaum committed
309

310 311 312
type pattern = Rule.pattern
type matching = Rule.matching

313
let load_pattern grs file =
314
  handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Grs.get_domain grs) (Grs.get_label_domain grs) (Loader.pattern file)) ()
315

316
let match_in_graph grs pattern graph = Rule.match_in_graph (Grs.get_label_domain grs) pattern graph
317

bguillaum's avatar
bguillaum committed
318
let match_deco pattern matching = Rule.match_deco pattern matching