Commit a02dddf8 authored by Bruno Guillaume's avatar Bruno Guillaume

Usage of stdin/stdout for data handling

Empty GRS by default
parent 51669b07
......@@ -17,8 +17,7 @@ module Grew_args = struct
type mode = Undefined | Gui of string | Transform | Grep | Test
let mode = ref Undefined
let grs = ref None
let gui_doc = ref false
let grs = ref Grs.empty
let dep_dir = ref None
let cupt = ref false
......@@ -98,7 +97,7 @@ module Grew_args = struct
let rec loop = function
| [] -> ()
| "-grs" :: file :: args -> grs := Some file; loop args
| "-grs" :: file :: args -> grs := Grs.load file; loop args
| "-i" :: files :: args ->
input_data := (Str.split (Str.regexp " ") files) @ !input_data; loop args
| "-o" :: file :: args -> output_file := Some file; loop args
......
......@@ -36,50 +36,46 @@ let handle fct () =
(* -------------------------------------------------------------------------------- *)
let transform () =
handle (fun () ->
match (!Grew_args.grs, !Grew_args.input_data, !Grew_args.output_file) with
| (None,_,_) -> Log.message "No grs filespecified: use -grs option"; exit 1
| (_,[],_) -> Log.message "No input data specified: use -i option"; exit 1
| (_,_,None) -> Log.message "No output specified: use -o option"; exit 1
| (Some grs_file, input_list, Some output_file) ->
let grs = Grs.load grs_file in
let domain = Grs.domain grs in
let graph_array = Corpus.get_graphs ?domain input_list in
let len = Array.length graph_array in
let out_ch = open_out output_file in
Array.iteri
(fun index (id, gr) ->
Counter.print index len id;
match Rewrite.simple_rewrite ~gr ~grs ~strat:!Grew_args.strat with
| [one] -> fprintf out_ch "%s\n" (Graph.to_conll_string ~cupt:!Grew_args.cupt one)
| l ->
List.iteri (fun i gr ->
let conll = Graph.to_conll gr in
let conll_new_id = Conll.set_sentid (sprintf "%s_%d" id i) conll in
fprintf out_ch "%s\n" (Conll.to_string conll_new_id)
) l
) graph_array;
close_out out_ch;
Counter.finish ()
let grs = !Grew_args.grs in
let domain = Grs.domain grs in
let graph_array = Corpus.input ?domain () in
let len = Array.length graph_array in
let out_ch = match !Grew_args.output_file with
| Some output_file -> open_out output_file
| None -> stdout in
Array.iteri
(fun index (id, gr) ->
Counter.print index len id;
match Rewrite.simple_rewrite ~gr ~grs ~strat:!Grew_args.strat with
| [one] -> fprintf out_ch "%s\n" (Graph.to_conll_string ~cupt:!Grew_args.cupt one)
| l ->
List.iteri (fun i gr ->
let conll = Graph.to_conll gr in
let conll_new_id = Conll.set_sentid (sprintf "%s_%d" id i) conll in
fprintf out_ch "%s\n" (Conll.to_string conll_new_id)
) l
) graph_array;
match !Grew_args.output_file with
| Some output_file -> close_out out_ch
| None -> ();
Counter.finish ()
) ()
(* -------------------------------------------------------------------------------- *)
let grep () = handle
(fun () ->
match (!Grew_args.input_data, !Grew_args.pattern) with
| ([],_) -> Log.message "No input data specified: use -i option"; exit 1
| (_,None) -> Log.message "No pattern file specified: use -pattern option"; exit 1;
| (file_list, Some pattern_file) ->
match !Grew_args.pattern with
| None -> Log.message "No pattern file specified: use -pattern option"; exit 1;
| Some pattern_file ->
let domain = match !Grew_args.grs with
| None -> None
| Some file -> Grs.domain (Grs.load file) in
let domain = Grs.domain !Grew_args.grs in
let pattern = Pattern.load ?domain pattern_file in
(* get the array of graphs to explore *)
let graph_array = Corpus.get_graphs ?domain file_list in
let graph_array = Corpus.input ?domain () in
(match !Grew_args.dep_dir with
| None -> ()
......
......@@ -44,24 +44,29 @@ module Counter = struct
let print value total text =
if not !Grew_args.quiet
then printf "%s%.2f%% (%s)%!" back (((float value) /. (float total))*. 100. ) text
then eprintf "%s%.2f%% (%s)%!" back (((float value) /. (float total))*. 100. ) text
let finish () = if not !Grew_args.quiet then printf "%s100.00%%\n%!" back
let finish () = if not !Grew_args.quiet then eprintf "%s100.00%%\n%!" back
end (* module Counter *)
(* ================================================================================ *)
module File = struct
let read file =
let read_rev file =
let in_ch = open_in file in
(* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
(match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);
let line_num = ref 0 in
let res = ref [] in
try
while true
do res := (input_line in_ch) :: !res
(* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
(match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);
while true do
incr line_num;
res := (!line_num, input_line in_ch) :: !res
done; assert false
with End_of_file -> close_in in_ch; List.rev !res
with End_of_file -> close_in in_ch; !res
let read file = List.rev (read_rev file)
exception Found of int
let get_suffix file_name =
......@@ -75,6 +80,17 @@ module File = struct
with
| Found i -> Some (String.sub file_name i (len-i))
let read_stdin () =
let cpt = ref 0 in
let res = ref [] in
try
while true do
incr cpt;
res := (!cpt, input_line stdin) :: !res
done;
assert false
with End_of_file -> List.rev !res
end (* module File *)
(* ================================================================================ *)
......@@ -128,18 +144,22 @@ module Corpus = struct
let conll_corpus = Conll_corpus.load file in
Array.map (fun (sentid, conll) -> (sentid, Graph.of_conll ?domain conll)) conll_corpus
let load_brown ?domain file =
let lines = File.read file in
let brown_form_lines ?domain lines =
let brown_list =
List_.opt_mapi
(fun i line -> match Str.split (Str.regexp "#") line with
List_.opt_map
(fun (i,line) -> match Str.split (Str.regexp "#") line with
| [] -> None
| [line] -> let sentid = sprintf "%05d" i in Some (sentid, Graph.of_brown ?domain ~sentid line)
| [sentid; line] -> Some (sentid, Graph.of_brown ?domain ~sentid line)
| _ -> raise (Fail (sprintf "[file %s, line %d] Illegal Brown line >>>%s<<<<\n%!" file i line))
| _ -> raise (Fail (sprintf "[line %d] Illegal Brown line >>>%s<<<<\n%!" i line))
) lines in
Array.of_list brown_list
let load_brown ?domain file =
let lines = File.read file in
try brown_form_lines ?domain lines
with Fail msg -> raise (Fail (sprintf "[file %s] %s" file msg))
(** [load source] loads a corpus; [source] can be:
- a folder, the corpus is the set of graphs (files matching *.gr or *.conll) in the folder
- a conll file *)
......@@ -189,6 +209,19 @@ module Corpus = struct
| _ ->
let conll_corpus = Conll_corpus.load_list source_list in
Array.map (fun (sentid, conll) -> (sentid, Graph.of_conll ?domain conll)) conll_corpus
let from_stdin () =
let lines = File.read_stdin () in
try
let conll_corpus = Conll_corpus.from_lines ~basename: "stdin" lines in
Array.map (fun (sentid, conll) -> (sentid, Graph.of_conll conll)) conll_corpus
with _ -> brown_form_lines lines
let input ?domain () =
match !Grew_args.input_data with
| [] -> from_stdin ()
| input_list -> get_graphs ?domain input_list
end (* module Corpus *)
(* ==================================================================================================== *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment