libgrew.ml 14.3 KB
Newer Older
pj2m's avatar
pj2m committed
1
2
include Grew_types

bguillaum's avatar
bguillaum committed
3
open Printf
pj2m's avatar
pj2m committed
4
5
open Log

6
open Utils
pj2m's avatar
pj2m committed
7
8
9
10
11
12
13
14
open Grew_parser
open Checker
open Grs
open Graph
open Rule
open HTMLer


bguillaum's avatar
bguillaum committed
15

pj2m's avatar
pj2m committed
16
17
18
19
20
exception Parsing_err of string
exception File_dont_exists of string

exception Build of string * (string * int) option
exception Run of string * (string * int) option
bguillaum's avatar
bguillaum committed
21
exception Bug of string * (string * int) option
pj2m's avatar
pj2m committed
22
23
24

type grs = Grs.t
type gr = Instance.t
bguillaum's avatar
bguillaum committed
25
type rew_history = Rewrite_history.t
pj2m's avatar
pj2m committed
26
27
28

let empty_grs = Grs.empty

bguillaum's avatar
bguillaum committed
29
30
31
32
let load_grs ?doc_output_dir file =
  if not (Sys.file_exists file)
  then raise (File_dont_exists file)
  else
33
34
35
    try
      let ast = Grew_parser.parse_file_to_grs file in
      (* Checker.check_grs ast; *)
bguillaum's avatar
bguillaum committed
36
37
38
39
40
      (match doc_output_dir with
      | None -> ()
      | Some dir -> HTMLer.proceed dir ast);
      Grs.build ast
    with
41
42
    | Grew_parser.Parse_error msg -> raise (Parsing_err msg)
    | Utils.Build (msg,loc) -> raise (Build (msg,loc))
43
    | Utils.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
44
    | exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
bguillaum's avatar
bguillaum committed
45
46


pj2m's avatar
pj2m committed
47
let get_available_seq grs = Grs.sequences grs
bguillaum's avatar
bguillaum committed
48

pj2m's avatar
pj2m committed
49
50
let empty_gr = Instance.empty

bguillaum's avatar
bguillaum committed
51
let load_gr file =
52
53
54
  if (Sys.file_exists file) then (
    try
      let ast = Grew_parser.parse_file_to_gr file in
bguillaum's avatar
bguillaum committed
55
      (* Checker.check_gr ast;*)
56
57
58
59
60
      Instance.build ast
    with
    | Grew_parser.Parse_error msg -> raise (Parsing_err msg)
    | Utils.Build (msg,loc) -> raise (Build (msg,loc))
    | Utils.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
61
    | exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
62
63
64
65

   ) else (
    raise (File_dont_exists file)
   )
pj2m's avatar
pj2m committed
66

67
68
69
70
71
72
73
74
let rewrite ~gr ~grs ~seq = 
  try
    Grs.rewrite grs seq gr
  with
  | Utils.Run (msg,loc) -> raise (Run (msg,loc))
  | Utils.Bug (msg, loc) -> raise (Bug (msg,loc))
  | exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))

bguillaum's avatar
bguillaum committed
75
76

let display ~gr ~grs ~seq =
pj2m's avatar
pj2m committed
77
78
79
80
  try
    Grs.build_rew_display grs seq gr
  with
  | Utils.Run (msg,loc) -> raise (Run (msg,loc))
bguillaum's avatar
bguillaum committed
81
  | Utils.Bug (msg, loc) -> raise (Bug (msg,loc))
bguillaum's avatar
bguillaum committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  | exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))


let write_stat filename rew_hist = Gr_stat.save filename (Gr_stat.from_rew_history rew_hist) 


(* let rules_stat grs seq gr_file = *)
(*   try *)
(*     let gr = Instance.build (Grew_parser.parse_file_to_gr gr_file) in *)
(*     let rew_hist = Grs.rewrite grs seq gr in *)
(*     StringMap.fold *)
(*       (fun key value acc -> *)
(*         (key,value)::acc *)
(*       ) *)
(*       (Gr_stat.from_rew_history rew_hist)  *)
(*       [] *)
(*   with *)
(*   | Utils.Run (msg, Some (loc_file,loc_line)) -> *)
(*       Log.fmessage "[file: %s, line: %d] Utils.run: %s\n" loc_file loc_line msg; [] *)
(*   | exc -> *)
(*       Log.fmessage "Unexpected exception: %s\n" (Printexc.to_string exc); [] *)


let write_html 
    ?(no_init=false) ?main_feat 
    ~header
    rew_hist
    output_base =
110
  ignore (
bguillaum's avatar
bguillaum committed
111
112
113
114
115
116
  Rewrite_history.save_html 
    ?main_feat 
    ~init_graph: (not no_init)
    ~header
    output_base rew_hist
    )
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

let error_html 
    ?(no_init=false) ?main_feat 
    ~header
    msg ?init
    output_base =
  ignore (
  Rewrite_history.error_html 
    ?main_feat 
    ~init_graph: (not no_init)
    ~header
    output_base msg init
    )


bguillaum's avatar
bguillaum committed
132
        IFDEF DEP2PICT THEN
bguillaum's avatar
bguillaum committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
let dummy = ()


(* let rewrite_to_html_intern  *)
(*     ?(no_init=false)  *)
(*     ?main_feat  *)
(*     grs_file  *)
(*     grs seq  *)
(*     gr_file *)
(*     output_base *)
(*     nb_sentence  *)
(*     previous  *)
(*     next = *)

(*   let header = "" in *)
  
(*   let _ =  *)
(*     Sys.command (sprintf "cp %s %s"  *)
(*                    gr_file  *)
(*                    (Filename.concat (Filename.dirname output_base) (Filename.basename gr_file)) *)
(*                 ) in *)
  
(*   try *)
(*     let init = Instance.build (Grew_parser.parse_file_to_gr gr_file) in *)
(*     try *)
(*       let rew_hist = Grs.rewrite grs seq init in *)
      
(*       ignore ( (\* FIXME: ingore inutile *\) *)
(*       Rewrite_history.save_html  *)
(*         ?main_feat  *)
(*         ~init_graph: (not no_init) *)
(*         ~header *)
(*         output_base rew_hist *)
(*      ) *)
        
(*     with *)
(*     | Utils.Run (msg, Some (loc_file,loc_line)) -> *)
(*         let html_ch = open_out (sprintf "%s.html" output_base) in *)
(*         Html.enter html_ch ~header output_base; *)
(*         fprintf html_ch "<h6>Initial graph</h6>\n"; *)
(*         Instance.save_dep_png ?main_feat output_base init; *)
(*         fprintf html_ch "<div width=100%% style=\"overflow-x:auto\"><IMG SRC=\"%s.png\"></div>\n" (Filename.basename output_base); *)
(*         fprintf html_ch "<h2>ERROR during rewriting:</h2>\n"; *)
(*         fprintf html_ch "<p>Message: %s</p>\n" msg; *)
(*         fprintf html_ch "<p>File: %s</p>\n" loc_file; *)
(*         fprintf html_ch "<p>Line: %d</p>\n" loc_line; *)
(*         Html.leave html_ch; *)
(*         close_out html_ch with *)
(*   | exc -> *)
(*       let html_ch = open_out (sprintf "%s.html" output_base) in *)
(*       Html.enter html_ch ~header output_base; *)
(*       fprintf html_ch "<h1>UNEXPECTED EXCEPTION: %s</h1>" (Printexc.to_string exc); *)
(*       Html.leave html_ch; *)
(*       close_out html_ch *)
        
pj2m's avatar
pj2m committed
188
189


bguillaum's avatar
bguillaum committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
(* let rewrite_to_html ?main_feat input_dir output_dir no_init grs_file grs seq title = *)
(*   try *)
(*     (\* get ALL gr files *\) *)
(*     let all_files = Array.to_list (Sys.readdir input_dir) in *)
(*     let gr_files = List.sort (fun a b -> compare a b)  *)
(*         (List.filter (fun file -> Filename.check_suffix file ".gr") all_files) in *)
(*     let nb_files = List.length gr_files in *)
(*     let ratio nb = (float nb) /. (float nb_files) *. 100. in *)

(*     (\* create html files *\) *)
(*     ignore (Sys.command (sprintf "cp %s %s" grs_file  *)
(*                            (Filename.concat output_dir (Filename.basename grs_file)))); *)

(*     let sentence_counter = ref 1 in *)


(*     List.iter *)
(*       (fun input -> *)
(*         Log.fmessage "Computing %s" input; *)
(*         let rules = rewrite_to_html_intern *)
(*             ~no_init *)
(*             grs_file *)
(*             grs *)
(*             seq *)
(*             (Filename.concat input_dir input) *)
(*             (Filename.concat output_dir (Filename.chop_extension input)) *)
(*             ?main_feat *)
(*             !sentence_counter *)
(*             (if !sentence_counter > 1 then (Filename.chop_extension (List.nth gr_files (!sentence_counter-2))) else "") *)
(*             (if !sentence_counter < nb_files then (Filename.chop_extension (List.nth gr_files (!sentence_counter)))  else "") *)
(*         in *)
(*         incr sentence_counter; *)
(*         (\* match rules with *\) *)
(*         (\* | Some module_list -> *\) *)
(*         (\*     List.iter *\) *)
(*         (\*       (fun (module_name, rule_list) -> *\) *)
(*         (\*         List.iter *\) *)
(*         (\*           (fun rule -> *\) *)
(*         (\*             stats := Corpus_stat.add module_name rule input 1 !stats *\) *)
(*         (\*           ) rule_list *\) *)
(*         (\*         (\\* let old_rule_list = *\\) *\) *)
(*         (\*         (\\*   try ref (StringMap.find module_name !stats) *\\) *\) *)
(*         (\*         (\\*   with Not_found -> ref StringMap.empty in *\\) *\) *)
(*         (\*         (\\* List.iter *\\) *\) *)
(*         (\*         (\\*   (fun rule -> *\\) *\) *)
(*         (\*         (\\*     let old = try StringMap.find rule !old_rule_list with Not_found -> [] in *\\) *\) *)
(*         (\*         (\\*     old_rule_list := StringMap.add rule (input::old) !old_rule_list *\\) *\) *)
(*         (\*         (\\*   ) rule_list; *\\) *\) *)
(*         (\*         (\\* stats := StringMap.add module_name !old_rule_list !stats *\\) *\) *)
(*         (\*       ) module_list *\) *)
(*         (\* | None -> errors := input :: !errors *\) *)
(*       ) gr_files; *)



(*     (\* let out_ch = open_out (Filename.concat output_dir "index.html") in *\) *)

(*     (\* let css = "<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">" in *\) *)

(*     (\* ignore(Sys.command("cp "^(Filename.concat DATA_DIR "style.css")^" "^(Filename.concat output_dir "style.css"))); *\) *)

(*     (\* fprintf out_ch "<head>\n%s\n<title>%s</title>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /></head>\n" css title; *\) *)
(*     (\* fprintf out_ch "<h1>%s</h1>\n" title; *\) *)
(*     (\* fprintf out_ch "<b>Grs file</b>:%s\n<br/>\n" (Filename.basename grs_file); *\) *)
(*     (\* fprintf out_ch "<b>%d Sentences</b><br/>\n<br/>\n" nb_files; *\) *)
(*     (\* fprintf out_ch "<center><table cellpadding=10 cellspacing=0 width=90%%>\n"; *\) *)
(*     (\* StringMap.iter *\) *)
(*     (\*   (fun modul rules -> *\) *)
(*     (\*     fprintf out_ch "<tr><td colspan=5><h6>Module %s</h6></td>\n" modul; *\) *)
(*     (\*     fprintf out_ch "<tr><th class=\"first\" width=10>Rule</th><th width=10>#occ</th><th width=10>#files</th><th width=10>Ratio</th><th width=10>Files</th></tr>\n"; *\) *)
(*     (\*     StringMap.iter *\) *)
(*     (\*       (fun rule (occ_num, file_set) -> *\) *)
(*     (\*         let file_list = StringSet.elements file_set in *\) *)

(*     (\*         let tmp = ref "" in *\) *)
(*     (\*         let counter = ref 0 in *\) *)
(*     (\*         let rec compute list = match list with *\) *)
(*     (\*         | [] -> () *\) *)
(*     (\*         | h::[] -> *\) *)
(*     (\*             if (!counter = 10) then ( *\) *)
(*     (\*               tmp := sprintf "%s<div id=\"%s_%s\" style=\"display:none;\">\n" !tmp modul rule *\) *)
(*     (\*              ); *\) *)
(*     (\*             incr counter; *\) *)
(*     (\*             tmp := sprintf "%s<a href=\"%s\">%s</a>" !tmp ((Filename.chop_extension h)^".html") (Filename.chop_extension h) *\) *)
(*     (\*         | h::t -> *\) *)
(*     (\*             if (not (List.mem h t)) then ( (\\*avoid doublons*\\) *\) *)
(*     (\*               if (!counter = 10) then ( *\) *)
(*     (\*                 tmp := sprintf "%s<div id=\"%s_%s\" style=\"display:none;\">\n" !tmp modul rule *\) *)
(*     (\*                ); *\) *)
(*     (\*               incr counter; *\) *)
(*     (\*               tmp := sprintf "%s<a href=\"%s\">%s</a><br/>" !tmp ((Filename.chop_extension h)^".html") (Filename.chop_extension h) *\) *)
(*     (\*              ); *\) *)
(*     (\*             compute t *\) *)
(*     (\*         in compute (List.rev file_list); *\) *)

(*     (\*         let file_num = List.length file_list in *\) *)

(*     (\*         fprintf out_ch "<tr>\n"; *\) *)
(*     (\*         fprintf out_ch "<td class=\"first_stats\" width=10 valign=top>%s</td>\n" rule; *\) *)
(*     (\*         fprintf out_ch "<td class=\"stats\" width=10 valign=top>%d</td>\n" occ_num; *\) *)
(*     (\*         fprintf out_ch "<td class=\"stats\" width=10 valign=top>%d</td>\n" file_num; *\) *)
(*     (\*         fprintf out_ch "<td class=\"stats\" width=10 valign=top>%.2f%%</td>\n" (ratio file_num); *\) *)
            
(*     (\*         fprintf out_ch "<td class=\"stats\">%s" !tmp; *\) *)
(*     (\*         if (!counter > 10) *\) *)
(*     (\*         then ( *\) *)
(*     (\*           fprintf out_ch "</div><a style=\"cursor:pointer;\" onClick=\"if (document.getElementById('%s_%s').style.display == 'none') { %s } else { %s }\"><b><p id=\"p_%s_%s\">+ Show more +</p></b></a>\n" *\) *)
(*     (\*             modul rule *\) *)
(*     (\*             (sprintf "document.getElementById('%s_%s').style.display = 'block'; document.getElementById('p_%s_%s').innerHTML = '- Show less -';" modul rule modul rule) *\) *)
(*     (\*             (sprintf "document.getElementById('%s_%s').style.display = 'none';; document.getElementById('p_%s_%s').innerHTML = '+ Show more +';" modul rule modul rule) *\) *)
(*     (\*             modul rule; *\) *)
(*     (\*          ); *\) *)
(*     (\*         fprintf out_ch "</td></tr>\n"; *\) *)
(*     (\*       ) rules; *\) *)
(*     (\*   ) !stats; *\) *)

(*     (\* (\\* add a subtalbe for sentence that produces an error *\\) *\) *)
(*     (\* let nb_errors = List.length !errors in *\) *)
(*     (\* fprintf out_ch "<tr><td colspan=5><h6>ERRORS</h6></td>\n"; *\) *)
(*     (\* fprintf out_ch "<tr><th class=\"first\" width=10>Rule</th><th colspan=2 width=20>#files</th><th width=10>Ratio</th><th>Files</th></tr>\n"; *\) *)

(*     (\* fprintf out_ch "<tr>\n"; *\) *)
(*     (\* fprintf out_ch "<td class=\"first_stats\">Errors</td>\n"; *\) *)
(*     (\* fprintf out_ch "<td class=\"stats\" colspan=2>%d</td>\n" nb_errors; *\) *)
(*     (\* fprintf out_ch "<td class=\"stats\">%.2f%%</td>\n" (ratio nb_errors); *\) *)
(*     (\* fprintf out_ch "<td class=\"stats\">"; *\) *)
(*     (\* List.iter *\) *)
(*     (\*   (fun err -> *\) *)
(*     (\*     fprintf out_ch "<a href=\"%s.html\">%s</a><br/>" (Filename.chop_extension err) (Filename.chop_extension err) *\) *)
(*     (\*   ) (List.rev !errors); *\) *)
(*     (\* fprintf out_ch "</td>\n"; *\) *)
(*     (\* fprintf out_ch "</tr>"; *\) *)

(*     (\* fprintf out_ch "</table></center>\n"; *\) *)

(*     (\* close_out out_ch; *\) *)
(*     (\* () *\) *)

(*   with *)
(*   | Utils.Run (msg,loc) -> raise (Run (msg,loc)) *)
(*   | Utils.Bug (msg, loc) -> raise (Bug (msg,loc)) *)
(*   | exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None)) *)
        ENDIF
bguillaum's avatar
bguillaum committed
333

bguillaum's avatar
bguillaum committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
(* (\* read all stats in [dir] and produce an html file with full stats *\) *)
(* let collect_stats init dir = *)
(*   let all_files = Array.to_list (Sys.readdir dir) in *)
(*   let stat_files = List.filter (fun f -> Filename.check_suffix f ".stat") all_files in *)
(*   List.fold_left *)
(*     (fun acc stat_file ->  *)
(*       List.fold_left  *)
(*         (fun acc2 line -> *)
(*           match Str.split (Str.regexp "\\.\\|:") line with *)
(*           | [modul; rule; num] -> (\* FIXME *\) acc2  *)
(*           | _ -> Log.fcritical "invalid stat line: %s" line *)
(*         ) acc (File.read stat_file) *)
(*     ) init stat_files *)


let make_index ~title ~grs_file ~html ~grs ~output_dir ~base_names  =
  let init = Corpus_stat.empty grs in
  let corpus_stat =
    List.fold_left
      (fun acc base_name -> 
        Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
      ) init base_names in
  Corpus_stat.save_html title grs_file html output_dir corpus_stat

  
359

pj2m's avatar
pj2m committed
360
361
let get_css_file = Filename.concat DATA_DIR "style.css"