Commit 8a2080cd authored by bguillaum's avatar bguillaum

version 0.27: modification for online parsing

 * add support for brown output (like melt output)
 * WARNING: change of specification of function [save_full_conll] returns an int instead of unit

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@8688 7838e531-6607-4d57-9587-6c381814729c
parent ea5c7c82
VERSION = 0.26
VERSION = 0.27
INSTALL_DIR_LIB = @OCAMLLIB@
INSTALL_DIR = @prefix@/bin/
......
......@@ -371,6 +371,27 @@ module G_graph = struct
) map_without_edges lines in
{meta=[]; map=map_with_edges}
(* -------------------------------------------------------------------------------- *)
(** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
let of_brown brown =
let units = Str.split (Str.regexp " ") brown in
let conll_lines = List_.mapi
(fun i item -> match Str.split (Str.regexp "/") item with
| [phon;pos;lemma] ->
{
Conll.line_num=0;
num = sprintf "%d" (i+1);
phon;
lemma;
pos1 = "_";
pos2 = pos;
morph = [];
deps = [(sprintf "%d" i, "SUC")]
}
| _ -> failwith "Unexpected MElt output"
) units in
of_conll conll_lines
(* -------------------------------------------------------------------------------- *)
let opt_att atts name =
try Some (List.assoc name atts)
......
......@@ -105,6 +105,10 @@ module G_graph: sig
val of_conll: ?loc:Loc.t -> Conll.line list -> t
(** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/."
It supposes that "SUC" is defined in current relations *)
val of_brown: string -> t
val of_xml: Xml.xml -> t
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
(* Update functions *)
......
......@@ -79,7 +79,7 @@ module Rewrite_history = struct
File.write (Instance.to_conll t.instance) (sprintf "%s__%d.conll" base !cpt);
incr cpt
| l, _ -> List.iter loop l
in loop t
in loop t; !cpt
(* suppose that all modules are confluent and produced exacly one normal form *)
let save_det_gr base t =
......
......@@ -47,8 +47,9 @@ module Rewrite_history: sig
val save_conll: string -> t -> unit
(** [save_full_conll base_name t] saves one conll_file for each normal form defined in [t].
Output files are named according to [base_name] and a secondary index after "__". *)
val save_full_conll: string -> t -> unit
Output files are named according to [base_name] and a secondary index after "__".
The number of conll file produced is returned. *)
val save_full_conll: string -> t -> int
(** [save_det_gr base_name t] supposes that the current GRS is deterministic.
It writes exactly one output file named [base_name].gr with the unique normal form. *)
......
......@@ -131,6 +131,13 @@ let of_conll file_name line_list =
Instance.from_graph graph
) ()
let of_brown brown =
handle ~name:"of_brown"
(fun () ->
let graph = G_graph.of_brown brown in
Instance.from_graph graph
) ()
let load_graph file =
handle ~name:"load_graph" ~file
(fun () ->
......
......@@ -79,7 +79,10 @@ val save_gr: string -> Rewrite_history.t -> unit
val save_conll: string -> Rewrite_history.t -> unit
val save_full_conll: string -> Rewrite_history.t -> unit
(** [save_full_conll base_name rh] saves one conll_file for each normal form defined in [rh].
Output files are named according to [base_name] and a secondary index after "__".
The number of conll file produced is returned. *)
val save_full_conll: string -> Rewrite_history.t -> int
val save_det_gr: string -> Rewrite_history.t -> unit
......@@ -99,6 +102,8 @@ val load_graph: string -> Instance.t
(** [of_conll filename line_list] *)
val of_conll: string -> (int * string) list -> Instance.t
val of_brown: string -> Instance.t
val xml_graph: Xml.xml -> Instance.t
(** [raw_graph instance] returns all graph information with a triple of elementary caml types:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment