Commit 5f3e6c7b authored by Bruno Guillaume's avatar Bruno Guillaume

accept a list of input files

parent 4289282b
......@@ -22,7 +22,7 @@ module Grew_args = struct
let old_grs = ref false
let dep_dir = ref None
let (input_data : string option ref) = ref None
let (input_data : string list ref) = ref []
let (output_file : string option ref) = ref None
let strat = ref "main"
let quiet = ref false
......@@ -97,7 +97,8 @@ module Grew_args = struct
let rec loop = function
| [] -> ()
| "-grs" :: file :: args -> grs := Some file; loop args
| "-i" :: file :: args -> input_data := Some file; loop args
| "-i" :: files :: args ->
input_data := (Str.split (Str.regexp " ") files) @ !input_data; loop args
| "-o" :: file :: args -> output_file := Some file; loop args
| "-strat" :: s :: args -> strat := s; loop args
| "-pattern" :: file :: args -> pattern := Some file; loop args
......
......@@ -40,16 +40,16 @@ let transform () =
handle (fun () ->
match (!Grew_args.grs, !Grew_args.input_data, !Grew_args.output_file) with
| (None,_,_) -> Log.message "No grs filespecified: use -grs option"; exit 1
| (_,None,_) -> Log.message "No input data specified: use -i option"; exit 1
| (_,[],_) -> Log.message "No input data specified: use -i option"; exit 1
| (_,_,None) -> Log.message "No output specified: use -o option"; exit 1
| (Some grs_file, Some input, Some output_file) ->
| (Some grs_file, input_list, Some output_file) ->
let out_ch = open_out output_file in
let grs = (if !Grew_args.old_grs then Grs.load_old grs_file else Grs.load grs_file) in
let domain = Grs.domain grs in
(* get the list of files to rewrite *)
let graph_array = Corpus.get_graphs ?domain input in
let graph_array = Corpus.get_graphs ?domain input_list in
let len = Array.length graph_array in
Array.iteri
......@@ -72,9 +72,9 @@ let transform () =
let grep () = handle
(fun () ->
match (!Grew_args.input_data, !Grew_args.pattern) with
| (None,_) -> Log.message "No input data specified: use -i option"; exit 1
| ([],_) -> Log.message "No input data specified: use -i option"; exit 1
| (_,None) -> Log.message "No pattern file specified: use -pattern option"; exit 1;
| (Some data_file, Some pattern_file) ->
| (file_list, Some pattern_file) ->
let domain = match !Grew_args.grs with
| None -> None
......@@ -83,7 +83,7 @@ let transform () =
let pattern = Pattern.load ?domain pattern_file in
(* get the array of graphs to explore *)
let graph_array = Corpus.get_graphs ?domain data_file in
let graph_array = Corpus.get_graphs ?domain file_list in
(match !Grew_args.dep_dir with
| None -> ()
......
......@@ -152,7 +152,10 @@ module Corpus = struct
(** [load source] loads a corpus; [source] can be:
- a folder, the corpus is the set of graphs (files matching *.gr or *.conll) in the folder
- a conll file *)
let get_graphs ?domain source =
let get_graphs ?domain source_list =
match source_list with
| [source] ->
begin
if not (Sys.file_exists source)
then raise (File_not_found source);
if Sys.is_directory source
......@@ -176,6 +179,7 @@ module Corpus = struct
Array.of_list graph_list
end
else (* if [source] is a file *)
match File.get_suffix source with
| Some s when String_.contains "conll" s -> load_conll ?domain source
| Some s when String_.contains "melt" s -> load_brown ?domain source
......@@ -188,6 +192,12 @@ module Corpus = struct
with _ ->
try load_brown ?domain source
with _ -> raise (Fail (sprintf "Cannot load file \"%s\", unknown format" source))
end
| [] -> raise (Fail ( "Empty input list\n%!"))
| _ ->
let conll_corpus = Conll_corpus.load_list source_list in
Array.map (fun (sentid, conll) -> (sentid, Graph.of_conll ?domain conll)) conll_corpus
end (* module Corpus *)
(* ==================================================================================================== *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment