Commit bc1dd696 authored by bguillaum's avatar bguillaum

add a root node at pos 0 in CONLL

fix CONLL output (empty fields, quote)
sem=void ==> void=y

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7936 7838e531-6607-4d57-9587-6c381814729c
parent a51d1f0f
......@@ -28,7 +28,7 @@ module Label = struct
let to_string ?(locals=[||]) t =
match (!full, t) with
| (None, No_domain s) -> s
| (_, No_domain s) -> s
| (Some table, Global i) -> table.(i)
| (Some _, Local i) -> fst locals.(i)
| _ -> Error.bug "[Label.to_string] inconsistent data"
......@@ -58,6 +58,8 @@ module G_edge = struct
let to_string ?(locals=[||]) t = Label.to_string ~locals t
let root = Label.No_domain "root"
let make ?loc ?(locals=[||]) string = Label.from_string ?loc ~locals string
let build ?locals (ast_edge, loc) =
......
......@@ -28,6 +28,8 @@ module G_edge: sig
val to_string: ?locals:Label.decl array -> t -> string
val root: t
val make: ?loc:Loc.t -> ?locals:Label.decl array -> string -> t
val build: ?locals:Label.decl array -> Ast.edge -> t
......
......@@ -8,6 +8,10 @@ open Grew_ast
type value = String of string | Float of float
let string_of_value = function
| String s -> Str.global_replace (Str.regexp "\"") "\\\"" s
| Float i -> String_.of_float i
let conll_string_of_value = function
| String s -> s
| Float i -> String_.of_float i
......@@ -35,6 +39,7 @@ module Domain = struct
| ((Ast.Closed (n,vs))::_) when n = name ->
(match List_.sort_diff values vs with
| [] -> List.map (fun s -> String s) values
| l when List.for_all (fun x -> x.[0] = '_') l -> List.map (fun s -> String s) values
| l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
(List_.to_string (fun x->x) ", " l)
name
......@@ -150,7 +155,7 @@ module G_fs = struct
let get_string_atom feat_name t =
match List_.sort_assoc feat_name t with
| None -> None
| Some v -> Some (string_of_value v)
| Some v -> Some (conll_string_of_value v)
let get_float_feat feat_name t =
match List_.sort_assoc feat_name t with
......@@ -219,14 +224,20 @@ module G_fs = struct
| None -> sub
| Some l -> List.filter (fun (fn,_) -> List.mem fn l) sub in
sprintf " word=\"%s\"; subword=\"%s\""
(match main_opt with Some atom -> string_of_value atom | None -> "")
(match main_opt with Some atom -> string_of_value atom | None -> "_")
(List_.to_string G_feature.to_string "#" reduced_sub)
let to_conll ?exclude t =
let reduced_t = match exclude with
| None -> t
| Some list -> List.filter (fun (fn,_) -> not (List.mem fn list || fn.[0]='_')) t in
String.concat "|" (List.map (function (fn, String "true") -> fn | (fn, fv) -> fn^"="^(string_of_value fv)) reduced_t)
match reduced_t with
| [] -> "_"
| _ -> String.concat "|"
(List.map
(function (fn, String "true") -> fn | (fn, fv) -> fn^"="^(string_of_value fv))
reduced_t
)
end (* module G_fs *)
(* ==================================================================================================== *)
......
......@@ -295,8 +295,9 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let of_conll ?loc lines =
let sorted_lines = List.sort (fun line1 line2 -> Pervasives.compare line1.Conll.num line2.Conll.num) lines in
let sorted_lines =
Conll.root ::
(List.sort (fun line1 line2 -> Pervasives.compare line1.Conll.num line2.Conll.num) lines) in
let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in
......@@ -308,20 +309,17 @@ module G_graph = struct
(fun acc line ->
(* add line number information in loc *)
let loc = Loc.opt_set_line line.Conll.line_num loc in
let dep_id = Id.build ?loc line.Conll.num table in
List.fold_left
(fun acc2 (gov, dep_lab) ->
if gov = "0"
then acc2
else
let gov_id = Id.build ?loc gov table in
let dep_id = Id.build ?loc line.Conll.num table in
let edge = G_edge.make ?loc dep_lab in
(match map_add_edge acc2 (Gid.Old gov_id) edge (Gid.Old dep_id) with
| Some g -> g
| None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
(G_edge.to_string edge)
(match loc with Some l -> Loc.to_string l | None -> "")
)
let gov_id = Id.build ?loc gov table in
let edge = if gov = "0" then G_edge.root else G_edge.make ?loc dep_lab in
(match map_add_edge acc2 (Gid.Old gov_id) edge (Gid.Old dep_id) with
| Some g -> g
| None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
(G_edge.to_string edge)
(match loc with Some l -> Loc.to_string l | None -> "")
)
) acc line.Conll.deps
) map_without_edges lines in
{meta=[]; map=map_with_edges}
......@@ -624,9 +622,9 @@ module G_graph = struct
(fun (id, node) ->
let fs = G_node.get_fs node in
let dep_fs = G_fs.to_dep ?filter ?main_feat fs in
let style = match (List.mem id deco.G_deco.nodes, G_fs.get_string_atom "sem" fs) with
let style = match (List.mem id deco.G_deco.nodes, G_fs.get_string_atom "void" fs) with
| (true, _) -> "; forecolor=red; subcolor=red; "
| (false, Some "void") -> "; forecolor=red; subcolor=red; "
| (false, Some "y") -> "; forecolor=red; subcolor=red; "
| _ -> "" in
bprintf buff "N_%s { %s%s }\n" (Gid.to_string id) dep_fs style
) snodes;
......@@ -698,10 +696,9 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let to_conll graph =
let nodes = Gid_map.fold (fun gid node acc -> (gid,node)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let get_num gid = (list_num (fun (x,_) -> x=gid) snodes) + 1 in
let get_num gid = (list_num (fun (x,_) -> x=gid) snodes) in
(* Warning: [govs_labs] maps [gid]s to [num]s *)
let govs_labs =
......@@ -718,19 +715,31 @@ module G_graph = struct
let buff = Buffer.create 32 in
List.iter
(fun (gid, node) ->
let (govs,labs) = List.split (try Gid_map.find gid govs_labs with Not_found -> ["0","root"]) in
let gov_labs = try Gid_map.find gid govs_labs with Not_found -> [] in
let sorted_gov_labs =
List.sort
(fun (g1,l1) (g2,l2) ->
if l1 <> "" && l1.[0] <> 'I' && l1.[0] <> 'D'
then -1
else if l2 <> "" && l2.[0] <> 'I' && l2.[0] <> 'D'
then 1
else compare (String_.to_float g1) (String_.to_float g2)
) gov_labs in
let (govs,labs) = List.split sorted_gov_labs in
let fs = G_node.get_fs node in
bprintf buff "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n"
(get_num gid)
(match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "NO_PHON")
(match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "NO_LEMMA")
(match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "NO_CAT")
(match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "_")
(G_fs.to_conll ~exclude: ["phon"; "lemma"; "cat"; "pos"; "position"] fs)
(String.concat "|" govs)
(String.concat "|" labs)
bprintf buff "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n"
(get_num gid)
(match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "NO_PHON")
(match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "NO_LEMMA")
(match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "NO_CAT")
(match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "_")
(G_fs.to_conll ~exclude: ["phon"; "lemma"; "cat"; "pos"; "position"] fs)
(String.concat "|" govs)
(String.concat "|" labs)
)
snodes;
(List.tl snodes) (* do not consider the root node in CONLL output *);
Buffer.contents buff
end (* module G_graph *)
......
......@@ -48,11 +48,14 @@ module G_node = struct
} )
let of_conll line =
{
fs = G_fs.of_conll line;
pos = Some (String_.to_float line.Conll.num);
next = Massoc_gid.empty;
}
if line = Conll.root
then { fs = G_fs.empty; pos = Some 0.; next = Massoc_gid.empty }
else
{
fs = G_fs.of_conll line;
pos = Some (String_.to_float line.Conll.num);
next = Massoc_gid.empty;
}
let remove (id_tar : Gid.t) label t = {t with next = Massoc_gid.remove id_tar label t.next}
......
......@@ -23,8 +23,3 @@ and big_step = {
first: rule_app;
small_step: (G_graph.t * rule_app) list;
}
let to_dot_graph ?main_feat ?(deco=G_deco.empty) graph = G_graph.to_dot ?main_feat graph ~deco
let to_dep_graph ?filter ?main_feat ?(deco=G_deco.empty) graph = G_graph.to_dep ?filter ?main_feat ~deco graph
let to_gr_graph graph = G_graph.to_gr graph
let to_conll_graph graph = G_graph.to_conll graph
......@@ -28,10 +28,3 @@ and big_step = {
first: rule_app;
small_step: (graph * rule_app) list;
}
(** {2 Types displaying} *)
val to_dot_graph : ?main_feat:string -> ?deco:deco -> graph -> string
val to_dep_graph : ?filter: string list -> ?main_feat:string -> ?deco:deco -> graph -> string
val to_gr_graph: graph -> string
val to_conll_graph: graph -> string
......@@ -608,8 +608,9 @@ module Conll = struct
deps: (string * string ) list;
}
let load file =
let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
let load file =
let parse_morph line_num = function
| "_" -> []
| morph ->
......@@ -631,8 +632,8 @@ module Conll = struct
let deps = List.combine gov_list lab_list in
{line_num = line_num;
num = num;
phon = escape_quote phon;
lemma = escape_quote lemma;
phon = phon;
lemma = lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph line_num morph;
......
......@@ -266,6 +266,8 @@ module Conll: sig
deps: (string * string ) list;
}
val root:line
val load: string -> line list
end
......
This diff is collapsed.
......@@ -6,6 +6,8 @@ open Grew_graph
open Grew_rule
open Grew_grs
val css_file: string
exception Parsing_err of string
exception File_dont_exists of string
......@@ -116,8 +118,12 @@ val make_index:
val html_sentences: title:string -> string -> (bool * string * int * string) list -> unit
val get_css_file: string
val graph_of_instance: Instance.t -> G_graph.t
val feature_names: unit -> string list option
val to_dot_graph : ?main_feat:string -> ?deco:deco -> graph -> string
val to_dep_graph : ?filter: string list -> ?main_feat:string -> ?deco:deco -> graph -> string
val to_gr_graph: graph -> string
val to_conll_graph: graph -> string
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment