Commit d6a73a53 authored by Bruno Guillaume's avatar Bruno Guillaume

fix compil pble

parent 56e4160b
......@@ -14,7 +14,6 @@ open Libgrew
open Grew_utils
let fos str =
try float_of_string str
with Failure _ ->
......
......@@ -141,6 +141,68 @@ module Svg = struct
temp_file_name
end (* module Svg *)
(* ================================================================================ *)
module Corpus = struct
exception Fail of string
exception File_not_found of string
let load_conll ?domain file =
let conll_corpus = Conll_corpus.load file in
Array.map (fun (sentid, conll) -> (sentid, Graph.of_conll ?domain conll)) conll_corpus
let load_brown ?domain file =
let lines = File.read file in
let brown_list =
List_.opt_mapi
(fun i line -> match Str.split (Str.regexp "#") line with
| [] -> None
| [line] -> let sentid = sprintf "%05d" i in Some (sentid, Graph.of_brown ?domain ~sentid line)
| [sentid; line] -> Some (sentid, Graph.of_brown ?domain ~sentid line)
| _ -> raise (Fail (sprintf "[file %s, line %d] Illegal Brown line >>>%s<<<<\n%!" file i line))
) lines in
Array.of_list brown_list
(** [load source] loads a corpus; [source] can be:
- a folder, the corpus is the set of graphs (files matching *.gr or *.conll) in the folder
- a conll file *)
let get_graphs ?domain source =
if not (Sys.file_exists source)
then raise (File_not_found source);
if Sys.is_directory source
then (* if [source] is a folder *)
begin
let files_array = Sys.readdir source in
let graph_list =
Array.fold_right
(fun file acc ->
if Filename.check_suffix file ".gr"
then (Filename.chop_extension file, Graph.load ?domain (Filename.concat source file)) :: acc
else if (Filename.check_suffix file ".conll" || Filename.check_suffix file ".conllu")
then
let conll = Conll.load (Filename.concat source file) in
let graph = Graph.of_conll ?domain conll in
match Conll.get_sentid conll with
| Some sentid -> (sentid, graph) :: acc
| None -> (file, graph) :: acc
else acc
) files_array [] in
Array.of_list graph_list
end
else (* if [source] is a file *)
match File.get_suffix source with
| Some s when String_.contains "conll" s -> load_conll ?domain source
| Some s when String_.contains "melt" s -> load_brown ?domain source
| Some s when String_.contains "brown" s -> load_brown ?domain source
| Some s when String_.contains "gr" s -> [| (source, Graph.load ?domain source) |]
| _ ->
Log.fwarning "Unknown suffix for file \"%s\", trying to guess format..." source;
try load_conll ?domain source
with _ ->
try load_brown ?domain source
with _ -> raise (Fail (sprintf "Cannot load file \"%s\", unknown format" source))
end (* module Corpus *)
(* ==================================================================================================== *)
module Int =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment