Commit f39ae3c4 authored by bguillaum's avatar bguillaum
Browse files

improve brown loading (more robust and take identifier into account)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@8755 7838e531-6607-4d57-9587-6c381814729c
parent 00ab496e
...@@ -373,11 +373,15 @@ module G_graph = struct ...@@ -373,11 +373,15 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *) (* -------------------------------------------------------------------------------- *)
(** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *) (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
let of_brown brown = let of_brown ?sentid brown =
let units = Str.split (Str.regexp " ") brown in let units = Str.split (Str.regexp " ") brown in
let conll_lines = List_.mapi let conll_lines = List_.mapi
(fun i item -> match Str.split (Str.regexp "/") item with (fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
| [phon;pos;lemma] -> | [Str.Text phon; Str.Delim pos; Str.Text lemma] ->
let pos = String.sub pos 1 ((String.length pos)-2) in
let morph = match (i,sentid) with
| (0,Some id) -> [("sentid", id)]
| _ -> [] in
{ {
Conll.line_num=0; Conll.line_num=0;
num = sprintf "%d" (i+1); num = sprintf "%d" (i+1);
...@@ -385,10 +389,10 @@ module G_graph = struct ...@@ -385,10 +389,10 @@ module G_graph = struct
lemma; lemma;
pos1 = "_"; pos1 = "_";
pos2 = pos; pos2 = pos;
morph = []; morph;
deps = [(sprintf "%d" i, "SUC")] deps = [(sprintf "%d" i, "SUC")]
} }
| _ -> failwith "Unexpected MElt output" | _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\")" item
) units in ) units in
of_conll conll_lines of_conll conll_lines
......
...@@ -107,7 +107,7 @@ module G_graph: sig ...@@ -107,7 +107,7 @@ module G_graph: sig
(** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." (** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/."
It supposes that "SUC" is defined in current relations *) It supposes that "SUC" is defined in current relations *)
val of_brown: string -> t val of_brown: ?sentid: string -> string -> t
val of_xml: Xml.xml -> t val of_xml: Xml.xml -> t
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *) (* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
......
...@@ -131,10 +131,10 @@ let of_conll file_name line_list = ...@@ -131,10 +131,10 @@ let of_conll file_name line_list =
Instance.from_graph graph Instance.from_graph graph
) () ) ()
let of_brown brown = let of_brown ?sentid brown =
handle ~name:"of_brown" handle ~name:"of_brown"
(fun () -> (fun () ->
let graph = G_graph.of_brown brown in let graph = G_graph.of_brown ?sentid brown in
Instance.from_graph graph Instance.from_graph graph
) () ) ()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment