Commit 87ad29e1 authored by bguillaum's avatar bguillaum

add xml input handling

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7798 7838e531-6607-4d57-9587-6c381814729c
parent 8c8e3816
......@@ -25,6 +25,10 @@ LOG_DIR = @LOG_INCLUDE@
LOG_BYTE = $(LOG_DIR) log.cma
LOG_OPT = $(LOG_DIR) log.cmxa
XML_LIGHT_DIR = @XMLLIGHT_INCLUDE@
XML_LIGHT_BYTE = $(XML_LIGHT_DIR) xml-light.cma
XML_LIGHT_OPT = $(XML_LIGHT_DIR) xml-light.cmxa
DEP2PICT=@DEP2PICT@
DEP2PICT_DIR = @DEP2PICT_INCLUDE@
DEP2PICT_BYTE = $(DEP2PICT_DIR) dep2pict.cma
......
......@@ -63,7 +63,9 @@ if test "$config" = yes ; then
AC_LIB_CHECKING(Log,log,log,log,Log,"",true)
AC_SUBST(LOG_INCLUDE,$LIB_INCLUDE)
AC_LIB_CHECKING(Xml-light,xml-light,xml-light,xml-light,XmlParser,"",true,true)
AC_SUBST(XMLLIGHT_INCLUDE,$LIB_INCLUDE)
# Dep2pict is optionnal
AC_LIB_CHECKING(Dep2pict,dep2pict,dep2pict,dep2pict,Dep2pict,"",false,true)
......
......@@ -595,6 +595,7 @@ WARNING
DEBUG
DEP2PICT
DEP2PICT_INCLUDE
XMLLIGHT_INCLUDE
LOG_INCLUDE
ANSITERMINAL_INCLUDE
MENHIR
......@@ -2249,6 +2250,138 @@ $as_echo "The $NAME library is missing." >&6; }
LOG_INCLUDE=$LIB_INCLUDE
MACRO="A"
MACRO=$MACRO"C_LIB_CHECKING"
NAME=Xml-light
STD_FOLDER=xml-light
OFIND_NAME=xml-light
LIB_FILE=xml-light
MOD=XmlParser
OPTIONS=""
NO_OPTIONAL=true
FULL_PATH=true
OCAMLLIB_DIR=`ocamlc -where`
# test au cas ou un argument obligatoire est manquant
if test "$NAME" = "" || test "$STD_FOLDER" = "" || test "$OFIND_NAME" = "" || test "$LIB_FILE" = "" || test "$MOD" = "" ; then
echo ""
echo "Can't check a library"
echo "Missing argument in $MACRO($NAME,$STD_FOLDER,$OFIND_NAME,$LIB_FILE,$MOD,$OPTIONS,$NO_OPTIONAL)"
echo ""
exit 1
fi
# affichage du nom de la librairie a checker (avec details sur l'optionalité)
if test "$NO_OPTIONAL" = true; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for requested library : $NAME" >&5
$as_echo_n "checking for requested library : $NAME... " >&6; }
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for optional library : $NAME" >&5
$as_echo_n "checking for optional library : $NAME... " >&6; }
fi
# si le fichier ml de test existe
if test -f c_check_$STD_FOLDER.ml ; then
# si on peut le supprimer
if test -w c_check_$STD_FOLDER.ml ; then
# on le supprimer
rm c_check_$STD_FOLDER.ml >& /dev/null
# sinon on arrete et on balance un message d'erreur (ie on a pas les droits sur le fichier
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "Cannot remove c_check_xml-light.ml. Please change its right with chmod 666 c_check_xml-light.ml
See \`config.log' for more details" "$LINENO" 5; }
fi
fi
# on prepare le fichier ml de test
echo "open $MOD;;" > c_check_$STD_FOLDER.ml
# si on arrive à l'executer avec la librairie dans le rep de lib de caml
if (ocamlc -c $OPTIONS c_check_$STD_FOLDER.ml >& /dev/null) ; then
# pas besoin d'include
LIB_INCLUDE=""
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Found in ocaml lib directory" >&5
$as_echo "Found in ocaml lib directory" >&6; }
# sinon on essaie avec un repertoire dedié à la lib (-I +xxx yyy.cma)
elif (ocamlc -c $OPTIONS -I +$STD_FOLDER $LIB_FILE.cma c_check_$STD_FOLDER.ml >& /dev/null) ; then
# si ca marche, on s'arrete et on precise le include
if test "$FULL_PATH" = "true" ; then
LIB_INCLUDE="-I $OCAMLLIB_DIR/$STD_FOLDER"
else
LIB_INCLUDE="-I +$STD_FOLDER"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Found in $STD_FOLDER directory => $LIB_INCLUDE" >&5
$as_echo "Found in $STD_FOLDER directory => $LIB_INCLUDE" >&6; }
# sinon, on essaie avec ocamlfind
else
# si on n'a pas ocamlfind , on s'arrete (dans le cas d'une lib oblig.) ou
# on lance un warning (dans le cas d'une lib optionelle)
if test "ocamlfind" = no ; then
if test "$NO_OPTIONAL" = "true"; then
as_fn_error $? "The $NAME library is missing." "$LINENO" 5
LIB_INCLUDE="no"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: The $NAME library is missing." >&5
$as_echo "The $NAME library is missing." >&6; }
LIB_INCLUDE="no"
fi
# sinon, ocamlfind est présent
else
# on regarde déjà si la lib est installée avec ocamlfind
if ocamlfind query $OFIND_NAME > /dev/null 2>&1 ; then
# si c'est le cas, on recupere le repertoire d'installation et le include correspondant
LIB_INCLUDE=`ocamlfind query $OFIND_NAME`
LIB_INCLUDE="-I $LIB_INCLUDE"
# on teste maintenant si on peut exectuer le fichier ml de test
if (ocamlc -c $OPTIONS $LIB_INCLUDE $LIB_FILE.cma c_check_$STD_FOLDER.ml >& /dev/null) ; then
# on y arrive, on dit qu'on a trouvé la lib avec ocamlfind
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Found with ocamlfind => $LIB_INCLUDE" >&5
$as_echo "Found with ocamlfind => $LIB_INCLUDE" >&6; }
# si on y arrive pas
else
# suivant l'optionalité de la lib: un warning ou une erreur
if test "$NO_OPTIONAL" = "true"; then
as_fn_error $? "The $NAME library is missing." "$LINENO" 5
LIB_INCLUDE="no"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: The $NAME library is missing." >&5
$as_echo "The $NAME library is missing." >&6; }
LIB_INCLUDE="no"
fi
fi
# si la lib n'est pas installée avec ocamlfind
else
# suivant l'optionalité de la lib: un warning ou une erreur
if test "$NO_OPTIONAL" = "true"; then
as_fn_error $? "The $NAME library is missing." "$LINENO" 5
LIB_INCLUDE="no"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: The $NAME library is missing." >&5
$as_echo "The $NAME library is missing." >&6; }
LIB_INCLUDE="no"
fi
fi
fi
fi
# suppression du fichier ml de test
rm c_check_$STD_FOLDER.ml >& /dev/null
rm c_check_$STD_FOLDER.cmo >& /dev/null
rm c_check_$STD_FOLDER.cmi >& /dev/null
XMLLIGHT_INCLUDE=$LIB_INCLUDE
# Dep2pict is optionnal
MACRO="A"
......
......@@ -26,22 +26,22 @@ include ../config/Makefile
#executables
grew.byte: $(FILES_CMO) parser/parser_global.cmo parser_byte parser/grew_parser.cmo grew.ml
$(OCAMLC) $(BYTE_FLAGS) -o grew.byte $(ANSITERMINAL_BYTE) $(LOG_BYTE) \
$(OCAMLC) $(BYTE_FLAGS) -o grew.byte $(XML_LIGHT_BYTE) $(ANSITERMINAL_BYTE) $(LOG_BYTE) \
$(FILES_CMO) -I parser $(PARSER_CMO) \
grew.ml
grew: $(FILES_CMX) parser/parser_global.cmx parser_opt parser/grew_parser.cmx grew.ml
$(OCAMLOPT) $(OPT_FLAGS) -o grew str.cmxa unix.cmxa $(ANSITERMINAL_OPT) $(LOG_OPT) \
$(OCAMLOPT) $(OPT_FLAGS) -o grew str.cmxa unix.cmxa $(XML_LIGHT_OPT) $(ANSITERMINAL_OPT) $(LOG_OPT) \
$(FILES_CMX) -I parser $(PARSER_CMX) \
grew.ml
test.byte: parser/parser_global.cmo parser_byte parser/grew_parser.cmo $(FILES_CMO) $(FILES_CMI) test.ml
$(OCAMLC) $(BYTE_FLAGS) -o test.byte $(ANSITERMINAL_BYTE) $(LOG_BYTE) \
$(OCAMLC) $(BYTE_FLAGS) -o test.byte $(XML_LIGHT_BYTE) $(ANSITERMINAL_BYTE) $(LOG_BYTE) \
$(FILES_CMO) -I parser $(PARSER_CMO) \
test.ml
test: parser/parser_global.cmx parser_opt parser/grew_parser.cmx $(FILES_CMX) $(FILES_CMI) test.ml
$(OCAMLOPT) $(OPT_FLAGS) -o test str.cmxa unix.cmxa $(ANSITERMINAL_OPT) $(LOG_OPT) \
$(OCAMLOPT) $(OPT_FLAGS) -o test str.cmxa unix.cmxa $(XML_LIGHT_OPT) $(ANSITERMINAL_OPT) $(LOG_OPT) \
$(FILES_CMX) -I parser $(PARSER_CMX) \
test.ml
......@@ -70,19 +70,19 @@ libgrew.mli : grew_types.mli libgrew_.mli
libgrew.cma : $(FILES_CMO) parser_byte libgrew.mli libgrew.ml
ifeq (@DEP2PICT@,no)
$(OCAMLC) -c -pp 'camlp4o pa_macro.cmo' $(BYPE_FLAGS) $(FILES_CMO) str.cma -I parser $(PARSER_CMO) libgrew.mli
$(OCAMLC) -a -o libgrew.cma $(BYPE_FLAGS) -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\"' -linkall $(FILES_CMO) -I parser $(PARSER_CMO) libgrew.ml
$(OCAMLC) -c -pp 'camlp4o pa_macro.cmo' $(BYTE_FLAGS) $(FILES_CMO) $(XML_LIGHT_BYTE) str.cma -I parser $(PARSER_CMO) libgrew.mli
$(OCAMLC) -a -o libgrew.cma $(BYTE_FLAGS) -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\"' -linkall $(FILES_CMO) -I parser $(PARSER_CMO) libgrew.ml
else
$(OCAMLC) -c -pp 'camlp4o pa_macro.cmo -DDEP2PICT' $(BYPE_FLAGS) $(FILES_CMO) str.cma -I parser $(PARSER_CMO) libgrew.mli
$(OCAMLC) -a -o libgrew.cma $(BYPE_FLAGS) -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\" -DDEP2PICT' -linkall $(FILES_CMO) -I parser $(PARSER_CMO) libgrew.ml
$(OCAMLC) -c -pp 'camlp4o pa_macro.cmo -DDEP2PICT' $(BYTE_FLAGS) $(FILES_CMO) $(XML_LIGHT_BYTE) str.cma -I parser $(PARSER_CMO) libgrew.mli
$(OCAMLC) -a -o libgrew.cma $(BYTE_FLAGS) -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\" -DDEP2PICT' -linkall $(FILES_CMO) -I parser $(PARSER_CMO) libgrew.ml
endif
libgrew.cmxa : $(FILES_CMX) parser_opt libgrew.mli libgrew.ml
ifeq (@DEP2PICT@,no)
$(OCAMLOPT) -c -pp 'camlp4o pa_macro.cmo' $(OPT_FLAGS) $(FILES_CMX) str.cmxa -I parser $(PARSER_CMX) libgrew.mli
$(OCAMLOPT) -c -pp 'camlp4o pa_macro.cmo' $(OPT_FLAGS) $(FILES_CMX) $(XML_LIGHT_OPT) str.cmxa -I parser $(PARSER_CMX) libgrew.mli
$(OCAMLOPT) -a -o libgrew.cmxa $(OPT_FLAGS) -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\"' -linkall $(FILES_CMX) -I parser $(PARSER_CMX) libgrew.ml
else
$(OCAMLOPT) -c -pp 'camlp4o pa_macro.cmo -DDEP2PICT' $(FILES_CMX) str.cmxa -I parser $(PARSER_CMX) libgrew.mli
$(OCAMLOPT) -c -pp 'camlp4o pa_macro.cmo -DDEP2PICT' $(FILES_CMX) $(XML_LIGHT_OPT) str.cmxa -I parser $(PARSER_CMX) libgrew.mli
$(OCAMLOPT) -a -o libgrew.cmxa -pp 'camlp4o pa_macro.cmo -DDATA_DIR=\"$(DATA_DIR)\" -DDEP2PICT' -linkall $(FILES_CMX) -I parser $(PARSER_CMX) libgrew.ml
endif
......@@ -212,13 +212,13 @@ GREW_GRAPH_CMO = $(GREW_GRAPH_DEP:%=%.cmo)
GREW_GRAPH_CMX = $(GREW_GRAPH_DEP:%=%.cmx)
grew_graph.cmi: $(GREW_GRAPH_CMI) grew_graph.mli
$(OCAMLC) -c grew_graph.mli
$(OCAMLC) -c $(XML_LIGHT_BYTE) grew_graph.mli
grew_graph.cmo: $(GREW_GRAPH_CMO) grew_graph.cmi grew_graph.ml
$(OCAMLC) $(BYTE_FLAGS) -c $(DEPENDS_DIR) grew_graph.ml
$(OCAMLC) $(BYTE_FLAGS) -c $(DEPENDS_DIR) $(XML_LIGHT_BYTE) grew_graph.ml
grew_graph.cmx: $(GREW_GRAPH_CMX) grew_graph.cmi grew_graph.ml
$(OCAMLOPT) $(OPT_FLAGS) -c $(DEPENDS_DIR) grew_graph.ml
$(OCAMLOPT) $(OPT_FLAGS) -c $(DEPENDS_DIR) $(XML_LIGHT_OPT) grew_graph.ml
################################################################################
......
......@@ -191,7 +191,7 @@ module G_fs = struct
let get_main ?main_feat t =
let main_list = match main_feat with
| None -> []
| None -> ["phon"]
| Some string -> Str.split (Str.regexp "\\( *; *\\)\\|#") string in
let rec loop = function
......
......@@ -8,6 +8,7 @@ open Grew_fs
open Grew_node
open Grew_command
module Str_map = Map.Make (String)
(* ==================================================================================================== *)
module P_deco = struct
......@@ -324,6 +325,53 @@ module G_graph = struct
{meta=[]; map=nodes_with_edges}
(* -------------------------------------------------------------------------------- *)
let opt_att atts name =
try Some (List.assoc name atts)
with Not_found -> None
(** [of_xml d_xml] loads a graph in the xml format: [d_xml] must be a <D> xml element *)
let of_xml d_xml =
match d_xml with
| Xml.Element ("D", _, t_or_r_list) ->
let (t_list, r_list) = List.partition (function Xml.Element ("T",_,_) -> true | _ -> false) t_or_r_list in
let (nodes_without_edges, mapping) =
List_.foldi_left
(fun i (acc, acc_map) t_xml ->
match t_xml with
| Xml.Element ("T", t_atts, [Xml.PCData phon]) ->
let id = List.assoc "id" t_atts in
let other_feats = List.filter (fun (n,_) -> not (List.mem n ["id"; "start"; "end"; "label"])) t_atts in
let new_fs =
List.fold_left
(fun acc2 (fn,fv) -> G_fs.set_feat fn fv acc2)
G_fs.empty
(("phon", phon) :: ("cat", (List.assoc "label" t_atts)) :: other_feats) in
let new_node = G_node.set_fs (G_node.set_pos G_node.empty i) new_fs in
(Gid_map.add (Gid.Old i) new_node acc, Str_map.add id (Gid.Old i) acc_map)
| _ -> Log.critical "[G_graph.of_xml] Not a wellformed <T> tag"
) (Gid_map.empty, Str_map.empty) t_list in
let final_map =
List.fold_left
(fun acc r_xml ->
match r_xml with
| Xml.Element ("R", r_atts, _) ->
let src = List.assoc "from" r_atts
and tar = List.assoc "to" r_atts
and label = List.assoc "label" r_atts in
let gid_tar = Str_map.find tar mapping in
let gid_src = Str_map.find src mapping in
let old_node = Gid_map.find gid_src acc in
let new_map =
match G_node.add_edge (G_edge.make label) gid_tar old_node with
| Some new_node -> Gid_map.add gid_src new_node acc
| None -> Log.critical "[G_graph.of_xml] Fail to add edge" in
new_map
| _ -> Log.critical "[G_graph.of_xml] Not a wellformed <R> tag"
) nodes_without_edges r_list in
{meta=[]; map=final_map}
| _ -> Log.critical "[G_graph.of_xml] Not a <D> tag"
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
(* Update functions *)
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
......@@ -652,7 +700,7 @@ module G_graph = struct
let nodes = Gid_map.fold (fun gid node acc -> (gid,node)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let get_num gid = list_num (fun (x,_) -> x=gid) snodes in
let get_num gid = (list_num (fun (x,_) -> x=gid) snodes) + 1 in
(* Warning: [govs_labs] maps [gid]s to [num]s *)
let govs_labs =
......
......@@ -96,6 +96,7 @@ module G_graph: sig
val of_conll: ?loc:Loc.t -> Conll.line list -> t
val of_xml: Xml.xml -> t
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
(* Update functions *)
(* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *)
......
......@@ -79,7 +79,15 @@ module Rewrite_history = struct
| _ -> Error.run "Not a single rewriting"
in loop t
let det_dep_string t =
let rec loop t =
match (t.good_nf, t.bad_nf) with
| [],[] ->
let graph = t.instance.Instance.graph in
G_graph.to_dep graph
| [one], [] -> loop one
| _ -> Error.run "Not a single rewriting"
in loop t
end
......
......@@ -35,6 +35,8 @@ module Rewrite_history: sig
It writes exactly one output file named [base_name].gr with the unique normal form. *)
val save_det_gr: string -> t -> unit
val save_det_conll: string -> t -> unit
val det_dep_string: t -> string
end
module Modul: sig
......
......@@ -17,6 +17,7 @@ module G_node = struct
let get_next t = t.next
let set_fs t fs = {t with fs = fs}
let set_pos t pos = {t with pos = Some pos}
let empty = { fs = G_fs.empty; pos = None; next = Massoc_gid.empty }
......
......@@ -16,6 +16,7 @@ module G_node: sig
val get_next: t -> G_edge.t Massoc_gid.t
val set_fs: t -> G_fs.t -> t
val set_pos: t -> int -> t
val remove: Gid.t -> G_edge.t -> t -> t
......
......@@ -121,6 +121,12 @@ let load_graph file =
Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
end
let xml_graph xml =
try Instance.from_graph (G_graph.of_xml xml) with
| Error.Build (msg,loc) -> raise (Build (msg,loc))
| Error.Bug (msg, loc) -> raise (Bug (msg,loc))
| exc -> raise (Bug (sprintf "[Libgrew.load_conll] UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
let raw_graph instance =
G_graph.to_raw instance.Instance.graph
......@@ -146,6 +152,11 @@ let save_index ~dirname ~base_names =
List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
close_out out_ch
let save_graph_conll filename graph =
let out_ch = open_out filename in
fprintf out_ch "%s" (Instance.to_conll graph);
close_out out_ch
let save_gr base rew_hist = Rewrite_history.save_gr base rew_hist
let save_conll base rew_hist = Rewrite_history.save_conll base rew_hist
......@@ -154,6 +165,8 @@ let save_det_gr base rew_hist = Rewrite_history.save_det_gr base rew_hist
let save_det_conll base rew_hist = Rewrite_history.save_det_conll base rew_hist
let det_dep_string rew_hist = Rewrite_history.det_dep_string rew_hist
let write_html
?(no_init=false)
?(out_gr=false)
......
......@@ -50,6 +50,8 @@ val get_sequence_names: Grs.t -> string list
val to_sentence: ?main_feat:string -> Instance.t -> string
val save_graph_conll: string -> Instance.t -> unit
val save_gr: string -> Rewrite_history.t -> unit
val save_conll: string -> Rewrite_history.t -> unit
......@@ -58,6 +60,8 @@ val save_det_gr: string -> Rewrite_history.t -> unit
val save_det_conll: string -> Rewrite_history.t -> unit
val det_dep_string: Rewrite_history.t -> string
(** get a graph from a file either in 'gr' or 'conll' format.
File extension should be '.gr' or '.conll'.
@raise Parsing_err if libgrew can't parse the file
......@@ -65,6 +69,8 @@ File extension should be '.gr' or '.conll'.
*)
val load_graph: string -> Instance.t
val xml_graph: Xml.xml -> Instance.t
(** [raw_graph instance] returns all graph information with a triple of basic caml types:
- the meta data
- the list of node (node is a list of feature (feature is string * string))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment