Commit 548db436 authored by bguillaum's avatar bguillaum

add conll output

add full support of ":" in labels

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7795 7838e531-6607-4d57-9587-6c381814729c
parent d444c7f7
......@@ -216,9 +216,15 @@ module G_fs = struct
let reduced_sub = match filter with
| None -> sub
| Some l -> List.filter (fun (fn,_) -> List.mem fn l) sub in
sprintf " word=\"%s\"; subword=\"%s\"; "
sprintf " word=\"%s\"; subword=\"%s\""
(match main_opt with Some atom -> string_of_value atom | None -> "")
(List_.to_string G_feature.to_string "#" reduced_sub)
let to_conll ?exclude t =
let reduced_t = match exclude with
| None -> t
| Some list -> List.filter (fun (fn,_) -> not (List.mem fn list || fn.[0]='_')) t in
String.concat "|" (List.map (function (fn, String "true") -> fn | (fn, fv) -> fn^"="^(string_of_value fv)) reduced_t)
end (* module G_fs *)
(* ==================================================================================================== *)
......
......@@ -33,6 +33,7 @@ module G_fs: sig
val to_word: ?main_feat: string -> t -> string
val to_dep: ?main_feat: string -> ?filter: string list -> t -> string
val to_raw: t -> (string * string) list
val to_conll: ?exclude: string list -> t -> string
val to_string: t -> string
......
......@@ -223,7 +223,7 @@ module G_graph = struct
| (Gid.Old i,_) -> i
| _ -> Error.bug "[G_graph.max_binding]"
let list_search test =
let list_num test =
let rec loop n = function
| [] -> raise Not_found
| x::_ when test x -> n
......@@ -563,22 +563,23 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let to_dep ?filter ?main_feat ?(deco=G_deco.empty) graph =
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let buff = Buffer.create 32 in
bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
bprintf buff "[WORDS] { \n";
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
(* nodes *)
List.iter
(fun (id, node) ->
if List.mem id deco.G_deco.nodes
then bprintf buff
"N_%s { %sforecolor=red; subcolor=red; }\n" (Gid.to_string id) (G_fs.to_dep ?filter ?main_feat (G_node.get_fs node))
else bprintf buff
"N_%s { %s }\n" (Gid.to_string id) (G_fs.to_dep ?filter ?main_feat (G_node.get_fs node))
let fs = G_node.get_fs node in
let dep_fs = G_fs.to_dep ?filter ?main_feat fs in
let style = match (List.mem id deco.G_deco.nodes, G_fs.get_string_atom "sem" fs) with
| (true, _) -> "; forecolor=red; subcolor=red; "
| (false, Some "void") -> "; forecolor=\"#AAAAAA\"; subcolor=\"#AAAAAA\"; "
| _ -> "" in
bprintf buff "N_%s { %s%s }\n" (Gid.to_string id) dep_fs style
) snodes;
bprintf buff "} \n";
......@@ -631,20 +632,57 @@ module G_graph = struct
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let raw_nodes = List.map (fun (pid,node) -> (pid, G_fs.to_raw (G_node.get_fs node))) snodes in
let raw_nodes = List.map (fun (gid,node) -> (gid, G_fs.to_raw (G_node.get_fs node))) snodes in
let search pid = list_search (fun (x,_) -> x=pid) raw_nodes in
let get_num gid = list_num (fun (x,_) -> x=gid) raw_nodes in
let edge_list = ref [] in
Gid_map.iter
(fun src_pid node ->
(fun src_gid node ->
Massoc_gid.iter
(fun tar_pid edge ->
edge_list := (search src_pid, G_edge.to_string edge, search tar_pid) :: !edge_list
(fun tar_gid edge ->
edge_list := (get_num src_gid, G_edge.to_string edge, get_num tar_gid) :: !edge_list
)
(G_node.get_next node)
)
graph.map;
(graph.meta, List.map snd raw_nodes, !edge_list)
(* -------------------------------------------------------------------------------- *)
let to_conll graph =
let nodes = Gid_map.fold (fun gid node acc -> (gid,node)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let get_num gid = list_num (fun (x,_) -> x=gid) snodes in
(* Warning: [govs_labs] maps [gid]s to [num]s *)
let govs_labs =
Gid_map.fold
(fun src_gid node acc ->
let src_num = get_num src_gid in
Massoc_gid.fold
(fun acc2 tar_gid edge ->
let old = try Gid_map.find tar_gid acc2 with Not_found -> [] in
Gid_map.add tar_gid ((string_of_int src_num, G_edge.to_string edge)::old) acc2
) acc (G_node.get_next node)
) graph.map Gid_map.empty in
let buff = Buffer.create 32 in
Gid_map.iter
(fun gid node ->
let (govs,labs) = List.split (try Gid_map.find gid govs_labs with Not_found -> []) in
let fs = G_node.get_fs node in
bprintf buff "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n"
(get_num gid)
(match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "NO_PHON")
(match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "NO_LEMMA")
(match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "NO_CAT")
(match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "_")
(G_fs.to_conll ~exclude: ["phon"; "lemma"; "cat"; "pos"; "position"] fs)
(String.concat "|" govs)
(String.concat "|" labs)
)
graph.map;
Buffer.contents buff
end (* module G_graph *)
(* ================================================================================ *)
......@@ -148,6 +148,7 @@ module G_graph: sig
val to_dot: ?main_feat:string -> ?deco:G_deco.t -> t -> string
val to_sentence: ?main_feat:string -> t -> string
val to_dep: ?filter : string list -> ?main_feat:string -> ?deco:G_deco.t -> t -> string
val to_conll: t -> string
val to_raw: t ->
(string * string) list *
......
......@@ -55,6 +55,13 @@ module Rewrite_history = struct
| l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
in loop base t
let save_conll base t =
let rec loop file_name t =
match (t.good_nf, t.bad_nf) with
| [],[] -> File.write (Instance.to_conll t.instance) (file_name^".conll")
| l, _ -> List_.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
in loop base t
(* suppose that all modules are confluent and produced exacly one normal form *)
let save_det_gr base t =
let rec loop t =
......@@ -63,6 +70,16 @@ module Rewrite_history = struct
| [one], [] -> loop one
| _ -> Error.run "Not a single rewriting"
in loop t
let save_det_conll base t =
let rec loop t =
match (t.good_nf, t.bad_nf) with
| [],[] -> File.write (Instance.to_conll t.instance) (base^".conll")
| [one], [] -> loop one
| _ -> Error.run "Not a single rewriting"
in loop t
end
......
......@@ -29,10 +29,12 @@ module Rewrite_history: sig
(** [save_gr base_name t] saves one gr_file for each normal form defined in [t].
Output files are named according to [base_name] and the Gorn adress in the rewriting tree. *)
val save_gr: string -> t -> unit
val save_conll: string -> t -> unit
(** [save_det_gr base_name t] supposes that the current GRS is deterministic.
It writes exactly one output file named [base_name].gr with the unique normal form. *)
val save_det_gr: string -> t -> unit
val save_det_conll: string -> t -> unit
end
module Modul: sig
......
......@@ -51,6 +51,8 @@ module Instance = struct
let to_gr t = G_graph.to_gr t.graph
let to_conll t = G_graph.to_conll t.graph
let save_dot_png ?main_feat base t =
ignore (Grew_utils.png_file_from_dot (G_graph.to_dot ?main_feat t.graph) (base^".png"))
......
......@@ -29,6 +29,9 @@ module Instance : sig
(** [to_gr t] returns a string which contains the "gr" code of the current graph *)
val to_gr: t -> string
(** [to_conll t] returns a string which contains the "conll" code of the current graph *)
val to_conll: t -> string
(** [save_dep_png base t] writes a file "base.png" with the dep representation of [t].
NB: if the Dep2pict is not available, nothing is done *)
val save_dep_png: ?main_feat: string -> string -> t -> unit
......
......@@ -27,3 +27,4 @@ and big_step = {
let to_dot_graph ?main_feat ?(deco=G_deco.empty) graph = G_graph.to_dot ?main_feat graph ~deco
let to_dep_graph ?filter ?main_feat ?(deco=G_deco.empty) graph = G_graph.to_dep ?filter ?main_feat ~deco graph
let to_gr_graph graph = G_graph.to_gr graph
let to_conll_graph graph = G_graph.to_conll graph
......@@ -34,3 +34,4 @@ and big_step = {
val to_dot_graph : ?main_feat:string -> ?deco:deco -> graph -> string
val to_dep_graph : ?filter: string list -> ?main_feat:string -> ?deco:deco -> graph -> string
val to_gr_graph: graph -> string
val to_conll_graph: graph -> string
......@@ -146,11 +146,13 @@ let save_index ~dirname ~base_names =
List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
close_out out_ch
let save_gr base rew_hist =
Rewrite_history.save_gr base rew_hist
let save_gr base rew_hist = Rewrite_history.save_gr base rew_hist
let save_det_gr base rew_hist =
Rewrite_history.save_det_gr base rew_hist
let save_conll base rew_hist = Rewrite_history.save_conll base rew_hist
let save_det_gr base rew_hist = Rewrite_history.save_det_gr base rew_hist
let save_det_conll base rew_hist = Rewrite_history.save_det_conll base rew_hist
let write_html
?(no_init=false)
......
......@@ -52,8 +52,12 @@ val to_sentence: ?main_feat:string -> Instance.t -> string
val save_gr: string -> Rewrite_history.t -> unit
val save_conll: string -> Rewrite_history.t -> unit
val save_det_gr: string -> Rewrite_history.t -> unit
val save_det_conll: string -> Rewrite_history.t -> unit
(** get a graph from a file either in 'gr' or 'conll' format.
File extension should be '.gr' or '.conll'.
@raise Parsing_err if libgrew can't parse the file
......
......@@ -134,7 +134,7 @@ gr_item:
{ Graph_node (localize {Ast.node_id = id; position=position; fs=feats}) }
(* A -[x|y|z]-> B*)
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ Graph_edge (localize {Ast.edge_id = None; src=n1; edge_labels=labels; tar=n2; negative=false; }) }
num:
......@@ -413,7 +413,7 @@ node_features:
pat_edge:
(* "e: A -> B" OR "e: A -[*]-> B" *)
| id = edge_id n1 = IDENT GOTO_NODE n2 = IDENT
| id = edge_id n1 = IDENT LTR_EDGE_LEFT STAR LTR_EDGE_RIGHT n2 = IDENT
| id = edge_id n1 = IDENT LTR_EDGE_LEFT STAR LTR_EDGE_RIGHT n2 = label_ident
{ localize ({Ast.edge_id = Some id; src=n1; edge_labels=[]; tar=n2; negative=true}) }
(* "A -> B" *)
......@@ -421,19 +421,19 @@ pat_edge:
{ localize ({Ast.edge_id = None; src=n1; edge_labels=[]; tar=n2; negative=true}) }
(* "e: A -[^X|Y]-> B" *)
| id = edge_id n1 = IDENT labels = delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| id = edge_id n1 = IDENT labels = delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ localize ({Ast.edge_id = Some id; src=n1; edge_labels=labels; tar=n2; negative=true}) }
(* "A -[^X|Y]-> B"*)
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ localize ({Ast.edge_id = None; src=n1; edge_labels=labels; tar=n2; negative=true}) }
(* "e: A -[X|Y]-> B" *)
| id = edge_id n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| id = edge_id n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ localize ({Ast.edge_id = Some id; src=n1; edge_labels=labels; tar=n2; negative=false}) }
(* "A -[X|Y]-> B" *)
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ localize ({Ast.edge_id = None; src=n1; edge_labels=labels; tar=n2; negative=false}) }
......@@ -442,7 +442,7 @@ edge_id:
pat_const:
(* "A -[X|Y]-> *" *)
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) STAR
| n1 = IDENT labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) STAR
{ localize (Ast.Start (n1,labels)) }
(* "A -> *" *)
......@@ -450,7 +450,7 @@ pat_const:
{ localize (Ast.Cst_out n1) }
(* "* -[X|Y]-> A" *)
| STAR labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,IDENT),LTR_EDGE_RIGHT) n2 = IDENT
| STAR labels = delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,label_ident),LTR_EDGE_RIGHT) n2 = IDENT
{ localize (Ast.End (n2,labels)) }
(* "* -> A" *)
......@@ -509,9 +509,9 @@ commands:
command:
| DEL_EDGE n = IDENT
{ localize (Ast.Del_edge_name n) }
| DEL_EDGE n1 = IDENT label = delimited(LTR_EDGE_LEFT,IDENT,LTR_EDGE_RIGHT) n2 = IDENT
| DEL_EDGE n1 = IDENT label = delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) n2 = IDENT
{ localize (Ast.Del_edge_expl (n1,n2,label)) }
| ADD_EDGE n1 = IDENT label = delimited(LTR_EDGE_LEFT,IDENT,LTR_EDGE_RIGHT) n2 = IDENT
| ADD_EDGE n1 = IDENT label = delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) n2 = IDENT
{ localize (Ast.Add_edge (n1,n2,label)) }
| SHIFT_IN n1 = IDENT LONGARROW n2 = IDENT
{ localize (Ast.Shift_in (n1,n2)) }
......@@ -523,7 +523,7 @@ command:
{ localize (Ast.Merge_node (n1,n2)) }
| DEL_NODE n = IDENT
{ localize (Ast.Del_node n) }
| ADD_NODE n1 = IDENT DDOT label = delimited(RTL_EDGE_LEFT,IDENT,RTL_EDGE_RIGHT) n2 = IDENT
| ADD_NODE n1 = IDENT DDOT label = delimited(RTL_EDGE_LEFT,label_ident,RTL_EDGE_RIGHT) n2 = IDENT
{ localize (Ast.New_neighbour (n1,n2,label)) }
| DEL_FEAT qfn = QFN
{ localize (Ast.Del_feat qfn) }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment