libgrew.ml 6.69 KB
Newer Older
pj2m's avatar
pj2m committed
1
2
include Grew_types

bguillaum's avatar
bguillaum committed
3
open Printf
pj2m's avatar
pj2m committed
4
5
open Log

6
open Grew_fs
bguillaum's avatar
bguillaum committed
7
8
9
10
11
open Grew_utils
open Grew_graph
open Grew_rule
open Grew_grs

pj2m's avatar
pj2m committed
12
open Grew_parser
bguillaum's avatar
bguillaum committed
13
open Grew_html
pj2m's avatar
pj2m committed
14
15


bguillaum's avatar
bguillaum committed
16

pj2m's avatar
pj2m committed
17
18
exception File_dont_exists of string

bguillaum's avatar
bguillaum committed
19
exception Parsing_err of string
pj2m's avatar
pj2m committed
20
21
exception Build of string * (string * int) option
exception Run of string * (string * int) option
bguillaum's avatar
bguillaum committed
22
exception Bug of string * (string * int) option
pj2m's avatar
pj2m committed
23
24
25

type grs = Grs.t
type gr = Instance.t
bguillaum's avatar
bguillaum committed
26
type rew_history = Rewrite_history.t
pj2m's avatar
pj2m committed
27

bguillaum's avatar
bguillaum committed
28
29
let is_empty = Rewrite_history.is_empty

bguillaum's avatar
bguillaum committed
30
31
let num_sol = Rewrite_history.num_sol

pj2m's avatar
pj2m committed
32
33
let empty_grs = Grs.empty

bguillaum's avatar
bguillaum committed
34
35
let set_timeout t = Timeout.timeout := t

36
37
IFDEF DEP2PICT THEN
let build_doc file dir grs_ast grs =
bguillaum's avatar
bguillaum committed
38
  Html_doc.build ~dep:true file dir grs_ast;
39
40
41
42
43
44
45
46
47
48
  
  (* draw pattern graphs for all rules and all filters *)
  let fct module_ rule_ = 
    let dep_code = Rule.to_dep rule_ in
    let dep_svg_file = sprintf "%s/%s_%s-patt.png" dir module_ (Rule.get_name rule_) in
    ignore (Dep2pict.Dep2pict.fromDepStringToPng dep_code dep_svg_file) in
  Grs.rule_iter fct grs;
  Grs.filter_iter fct grs
ELSE
let build_doc file dir grs_ast grs =
bguillaum's avatar
bguillaum committed
49
  Html_doc.build ~dep:false file dir grs_ast
50
51
END

bguillaum's avatar
bguillaum committed
52
53
54
55
let load_grs ?doc_output_dir file =
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
56
    try
bguillaum's avatar
bguillaum committed
57
      let grs_ast = Grew_parser.grs_of_file file in
bguillaum's avatar
bguillaum committed
58
      let grs = Grs.build grs_ast in
bguillaum's avatar
bguillaum committed
59
      (match doc_output_dir with
bguillaum's avatar
bguillaum committed
60
        | None -> ()
61
        | Some dir -> build_doc file dir grs_ast grs);
bguillaum's avatar
bguillaum committed
62
      grs
bguillaum's avatar
bguillaum committed
63
    with
bguillaum's avatar
bguillaum committed
64
65
    | Grew_parser.Parse_error (msg,Some (sub_file,l)) -> 
        raise (Parsing_err (sprintf "[file:%s, line:%d] %s" sub_file l msg))
bguillaum's avatar
bguillaum committed
66
67
    | Grew_parser.Parse_error (msg,None) -> 
        raise (Parsing_err (sprintf "[file:%s] %s" file msg))
bguillaum's avatar
bguillaum committed
68
69
    | Error.Build (msg,loc) -> raise (Build (msg,loc))
    | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
70
    | exc -> raise (Bug (sprintf "[Libgrew.load_grs] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
bguillaum's avatar
bguillaum committed
71

bguillaum's avatar
bguillaum committed
72
73
74
let to_sentence ?main_feat gr =
  let graph = gr.Instance.graph in
  G_graph.to_sentence ?main_feat graph
bguillaum's avatar
bguillaum committed
75

76
let get_sequence_names grs = Grs.sequence_names grs
bguillaum's avatar
bguillaum committed
77
78

let load_gr file =
bguillaum's avatar
bguillaum committed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  if Sys.file_exists file
  then
    begin
      try
        let gr_ast = Grew_parser.gr_of_file file in
        Instance.from_graph (G_graph.build gr_ast)
      with
        | Grew_parser.Parse_error (msg,Some (sub_file,l)) ->
          raise (Parsing_err (sprintf "[file:%s, line:%d] %s" sub_file l msg))
        | Grew_parser.Parse_error (msg,None) ->
          raise (Parsing_err (sprintf "[file:%s] %s" file msg))
        | Error.Build (msg,loc) -> raise (Build (msg,loc))
        | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
        | exc -> raise (Bug (sprintf "[Libgrew.load_gr] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
    end
  else raise (File_dont_exists file)
pj2m's avatar
pj2m committed
95

bguillaum's avatar
bguillaum committed
96
let load_conll file =
97
  try
98
99
    let graph = G_graph.of_conll ~loc:(file,-1) (Conll.load file) in
    Instance.from_graph graph
100
  with
bguillaum's avatar
bguillaum committed
101
102
    | Grew_parser.Parse_error (msg,Some (sub_file,l)) -> 
        raise (Parsing_err (sprintf "[file:%s, line:%d] %s" sub_file l msg))
bguillaum's avatar
bguillaum committed
103
104
    | Grew_parser.Parse_error (msg,None) -> 
        raise (Parsing_err (sprintf "[file:%s] %s" file msg))
105
106
  | Error.Build (msg,loc) -> raise (Build (msg,loc))
  | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
107
  | exc -> raise (Bug (sprintf "[Libgrew.load_conll] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

let load_graph file = 
  if Filename.check_suffix file ".gr" 
  then load_gr file
  else if Filename.check_suffix file ".conll"
  then load_conll file
  else
    begin
      Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
      try load_gr file with
        Parsing_err _ -> 
          try load_conll file with
            Parsing_err _ ->
              Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
    end

bguillaum's avatar
bguillaum committed
124
125
126
127
128
129
let xml_graph xml =
  try Instance.from_graph (G_graph.of_xml xml) with
    | Error.Build (msg,loc) -> raise (Build (msg,loc))
    | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
    | exc -> raise (Bug (sprintf "[Libgrew.load_conll] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))

bguillaum's avatar
bguillaum committed
130
131
132
let raw_graph instance =
  G_graph.to_raw instance.Instance.graph

133
let rewrite ~gr ~grs ~seq = 
134
  try Grs.rewrite grs seq gr
135
  with
bguillaum's avatar
bguillaum committed
136
137
  | Error.Run (msg,loc) -> raise (Run (msg,loc))
  | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
138
  | exc -> raise (Bug (sprintf "[Libgrew.rewrite] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
139

bguillaum's avatar
bguillaum committed
140
let display ~gr ~grs ~seq =
141
  try Grs.build_rew_display grs seq gr
pj2m's avatar
pj2m committed
142
  with
bguillaum's avatar
bguillaum committed
143
144
  | Error.Run (msg,loc) -> raise (Run (msg,loc))
  | Error.Bug (msg, loc) -> raise (Bug (msg,loc))
145
  | Error.Build (msg, loc) -> raise (Build (msg,loc))
bguillaum's avatar
bguillaum committed
146
  | exc -> raise (Bug (sprintf "[Libgrew.display] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
bguillaum's avatar
bguillaum committed
147
148
149

let write_stat filename rew_hist = Gr_stat.save filename (Gr_stat.from_rew_history rew_hist) 

bguillaum's avatar
bguillaum committed
150
151
152
153
154
let save_index ~dirname ~base_names =
  let out_ch = open_out (Filename.concat dirname "index") in
  List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
  close_out out_ch

bguillaum's avatar
bguillaum committed
155
156
157
158
159
let save_graph_conll filename graph =
  let out_ch = open_out filename in
  fprintf out_ch "%s" (Instance.to_conll graph);
  close_out out_ch

bguillaum's avatar
bguillaum committed
160
let save_gr base rew_hist = Rewrite_history.save_gr base rew_hist
161

bguillaum's avatar
bguillaum committed
162
163
164
165
let save_conll base rew_hist = Rewrite_history.save_conll base rew_hist

let save_det_gr base rew_hist = Rewrite_history.save_det_gr base rew_hist

166
let save_det_conll ?header base rew_hist = Rewrite_history.save_det_conll ?header base rew_hist
167

bguillaum's avatar
bguillaum committed
168
169
let det_dep_string rew_hist = Rewrite_history.det_dep_string rew_hist

bguillaum's avatar
bguillaum committed
170
let write_html 
171
172
    ?(no_init=false)
    ?(out_gr=false)
173
174
    ?filter
    ?main_feat
175
    ?dot
bguillaum's avatar
bguillaum committed
176
    ~header
177
    ~graph_file
bguillaum's avatar
bguillaum committed
178
179
    rew_hist
    output_base =
180
  ignore (
bguillaum's avatar
bguillaum committed
181
  Html_rh.build
182
    ?filter
183
    ?main_feat
184
    ?dot
185
    ~out_gr
bguillaum's avatar
bguillaum committed
186
187
    ~init_graph: (not no_init)
    ~header
188
    ~graph_file
bguillaum's avatar
bguillaum committed
189
190
    output_base rew_hist
    )
191
192

let error_html 
193
194
195
    ?(no_init=false) 
    ?main_feat 
    ?dot
196
    ~header
197
198
    msg 
    ?init
199
200
    output_base =
  ignore (
bguillaum's avatar
bguillaum committed
201
  Html_rh.error
202
203
    ?main_feat
    ?dot
204
205
206
207
208
    ~init_graph: (not no_init)
    ~header
    output_base msg init
    )

209
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names  =
bguillaum's avatar
bguillaum committed
210
  let init = Corpus_stat.empty grs seq in
bguillaum's avatar
bguillaum committed
211
212
213
214
215
  let corpus_stat =
    List.fold_left
      (fun acc base_name -> 
        Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
      ) init base_names in
216
  Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
bguillaum's avatar
bguillaum committed
217

bguillaum's avatar
bguillaum committed
218
let html_sentences ~title = Html_sentences.build ~title
bguillaum's avatar
bguillaum committed
219

pj2m's avatar
pj2m committed
220
221
let get_css_file = Filename.concat DATA_DIR "style.css"

bguillaum's avatar
bguillaum committed
222
let graph_of_instance instance = instance.Instance.graph
223
224

let feature_names () = Domain.feature_names ()