Commit 93627df8 authored by bguillaum's avatar bguillaum

float position in Conll input

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7904 7838e531-6607-4d57-9587-6c381814729c
parent aabc6aec
......@@ -5,11 +5,11 @@ open Grew_utils
open Grew_ast
type value = String of string | Int of int
type value = String of string | Float of float
let string_of_value = function
| String s -> s
| Int i -> string_of_int i
| Float i -> String_.of_float i
(* ==================================================================================================== *)
module Domain = struct
......@@ -30,7 +30,7 @@ module Domain = struct
| ((Ast.Open n)::_) when n = name ->
List.map (fun s -> String s) values
| ((Ast.Int n)::_) when n = name ->
(try List.map (fun s -> Int (int_of_string s)) values
(try List.map (fun s -> Float (String_.to_float s)) values
with Failure _ -> Error.build ?loc "[GRS] The feature '%s' is of type int" name)
| ((Ast.Closed (n,vs))::_) when n = name ->
(match List_.sort_diff values vs with
......@@ -152,11 +152,11 @@ module G_fs = struct
| None -> None
| Some v -> Some (string_of_value v)
let get_int_feat feat_name t =
let get_float_feat feat_name t =
match List_.sort_assoc feat_name t with
| None -> None
| Some (Int i) -> Some i
| Some _ -> Error.build "[Fs.get_int_feat]"
| Some (Float i) -> Some i
| Some _ -> Error.build "[Fs.get_float_feat]"
let to_string t = List_.to_string G_feature.to_string "," t
let to_gr t = List_.to_string G_feature.to_gr ", " t
......@@ -170,7 +170,7 @@ module G_fs = struct
("phon", Domain.build_one "phon" line.Conll.phon)
:: ("lemma", Domain.build_one "lemma" line.Conll.lemma)
:: ("cat", Domain.build_one "cat" line.Conll.pos1)
:: ("position", Domain.build_one "position" (string_of_int line.Conll.num))
:: ("position", Domain.build_one "position" line.Conll.num)
:: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in
let unsorted = match line.Conll.pos2 with
| "_" -> unsorted_without_pos
......
......@@ -27,7 +27,7 @@ module G_fs: sig
It returns [None] if there is no feature named [f] in [t] *)
val get_string_atom: string -> t -> string option
val get_int_feat: string -> t -> int option
val get_float_feat: string -> t -> float option
val to_gr: t -> string
val to_dot: ?main_feat: string -> t -> string
val to_word: ?main_feat: string -> t -> string
......
......@@ -296,34 +296,35 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let of_conll ?loc lines =
let nodes =
List.fold_left
(fun acc line ->
Gid_map.add (Gid.Old line.Conll.num) (G_node.of_conll line) acc
) Gid_map.empty lines in
let sorted_lines = List.sort (fun line1 line2 -> Pervasives.compare line1.Conll.num line2.Conll.num) lines in
let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in
let nodes_with_edges =
let map_without_edges =
List_.foldi_left (fun i acc line -> Gid_map.add (Gid.Old i) (G_node.of_conll line) acc) Gid_map.empty sorted_lines in
let map_with_edges =
List.fold_left
(fun acc line ->
(* add line number information in loc *)
let loc = Loc.opt_set_line line.Conll.line_num loc in
List.fold_left
(fun acc2 (gov, dep_lab) ->
if gov=0
then acc
if gov = "0"
then acc2
else
let gov_node =
try Gid_map.find (Gid.Old gov) acc
with Not_found ->
Error.build ?loc "[G_graph.of_conll] the line refers to unknown gov %d" gov in
match G_node.add_edge (G_edge.make ?loc dep_lab) (Gid.Old line.Conll.num) gov_node with
| None -> acc
| Some new_node -> Gid_map.add (Gid.Old gov) new_node acc2
let gov_id = Id.build ?loc gov table in
let dep_id = Id.build ?loc line.Conll.num table in
let edge = G_edge.make ?loc dep_lab in
(match map_add_edge acc2 (Gid.Old gov_id) edge (Gid.Old dep_id) with
| Some g -> g
| None -> Error.build "[GRS] [Graph.of_conll] try to build a graph with twice the same edge %s %s"
(G_edge.to_string edge)
(match loc with Some l -> Loc.to_string l | None -> "")
)
) acc line.Conll.deps
) nodes lines in
{meta=[]; map=nodes_with_edges}
) map_without_edges lines in
{meta=[]; map=map_with_edges}
(* -------------------------------------------------------------------------------- *)
let opt_att atts name =
......@@ -347,7 +348,7 @@ module G_graph = struct
(fun acc2 (fn,fv) -> G_fs.set_feat fn fv acc2)
G_fs.empty
(("phon", phon) :: ("cat", (List.assoc "label" t_atts)) :: other_feats) in
let new_node = G_node.set_fs (G_node.set_pos G_node.empty i) new_fs in
let new_node = G_node.set_fs (G_node.set_pos G_node.empty (float i)) new_fs in
(Gid_map.add (Gid.Old i) new_node acc, Str_map.add id (Gid.Old i) acc_map)
| _ -> Log.critical "[G_graph.of_xml] Not a wellformed <T> tag"
) (Gid_map.empty, Str_map.empty) t_list in
......
......@@ -9,7 +9,7 @@ open Grew_fs
module G_node = struct
type t = {
fs: G_fs.t;
pos: int option;
pos: float option;
next: G_edge.t Massoc_gid.t;
}
......@@ -28,7 +28,7 @@ module G_node = struct
let to_gr t =
sprintf "%s [%s] "
(match t.pos with Some i -> sprintf "(%d)" i | None -> "")
(match t.pos with Some i -> sprintf "(%g)" i | None -> "")
(G_fs.to_gr t.fs)
let add_edge g_edge gid_tar t =
......@@ -43,13 +43,14 @@ module G_node = struct
| Some num -> G_fs.set_feat "position" (string_of_int num) fs in
(ast_node.Ast.node_id,
{ fs = fs_with_num;
pos = ast_node.Ast.position;
pos = (match ast_node.Ast.position with Some n -> Some (float n) | None -> None);
next = Massoc_gid.empty;
} )
let of_conll line = {
let of_conll line =
{
fs = G_fs.of_conll line;
pos = Some line.Conll.num;
pos = Some (String_.to_float line.Conll.num);
next = Massoc_gid.empty;
}
......@@ -68,7 +69,7 @@ module G_node = struct
let rm_out_edges t = {t with next = Massoc_gid.empty}
let build_neighbour t = {empty with pos = match t.pos with Some x -> Some (x+1) | None -> None}
let build_neighbour t = {empty with pos = match t.pos with Some x -> Some (x +. 0.01) | None -> None}
let pos_comp n1 n2 = Pervasives.compare n1.pos n2.pos
......
......@@ -16,7 +16,7 @@ module G_node: sig
val get_next: t -> G_edge.t Massoc_gid.t
val set_fs: t -> G_fs.t -> t
val set_pos: t -> int -> t
val set_pos: t -> float -> t
val remove: Gid.t -> G_edge.t -> t -> t
......
......@@ -54,7 +54,7 @@ module Instance = struct
let to_conll t = G_graph.to_conll t.graph
let save_dot_png ?filter ?main_feat base t =
ignore (Grew_utils.png_file_from_dot (G_graph.to_dot ?main_feat t.graph) (base^".png"))
ignore (Dot.to_png_file (G_graph.to_dot ?main_feat t.graph) (base^".png"))
IFDEF DEP2PICT THEN
let save_dep_png ?filter ?main_feat base t =
......@@ -386,7 +386,7 @@ module Rule = struct
let fullfill graph matching cst =
let get_node pid = G_graph.find (Pid_map.find pid matching.n_match) graph in
let get_string_feat pid feat_name = G_fs.get_string_atom feat_name (G_node.get_fs (get_node pid)) in
let get_int_feat pid feat_name = G_fs.get_int_feat feat_name (G_node.get_fs (get_node pid)) in
let get_float_feat pid feat_name = G_fs.get_float_feat feat_name (G_node.get_fs (get_node pid)) in
match cst with
| Cst_out (pid,edge) ->
......@@ -415,7 +415,7 @@ module Rule = struct
| _ -> false
end
| Feature_ineq (ineq, pid1, feat_name1, pid2, feat_name2) ->
match (ineq, get_int_feat pid1 feat_name1, get_int_feat pid2 feat_name2) with
match (ineq, get_float_feat pid1 feat_name1, get_float_feat pid2 feat_name2) with
| (Ast.Lt, Some fv1, Some fv2) when fv1 < fv2 -> true
| (Ast.Gt, Some fv1, Some fv2) when fv1 > fv2 -> true
| (Ast.Le, Some fv1, Some fv2) when fv1 <= fv2 -> true
......
......@@ -8,14 +8,25 @@ module IntSet = Set.Make (struct type t = int let compare = Pervasives.compare e
module IntMap = Map.Make (struct type t = int let compare = Pervasives.compare end)
(* ================================================================================ *)
module String_ = struct
let to_float string =
try float_of_string string
with _ -> float_of_string (Str.global_replace (Str.regexp "\\.") "," string)
let png_file_from_dot dot output_file =
let temp_file_name,out_ch = Filename.open_temp_file ~mode:[Open_rdonly;Open_wronly;Open_text] "grewui_" ".dot" in
fprintf out_ch "%s" dot;
close_out out_ch;
ignore(Sys.command(sprintf "dot -Tpng -o %s %s " output_file temp_file_name))
let of_float float = Str.global_replace (Str.regexp ",") "." (sprintf "%g" float)
end (* module String_ *)
(* ================================================================================ *)
module Dot = struct
let to_png_file dot output_file =
let temp_file_name,out_ch = Filename.open_temp_file ~mode:[Open_rdonly;Open_wronly;Open_text] "grewui_" ".dot" in
fprintf out_ch "%s" dot;
close_out out_ch;
ignore(Sys.command(sprintf "dot -Tpng -o %s %s " output_file temp_file_name))
end (* module Dot *)
(* ================================================================================ *)
module Loc = struct
......@@ -124,7 +135,7 @@ module Gid = struct
| Old of int
| New of (int * int) (* identifier for "created nodes" *)
(* a compore function which ensures that new nodes are a the "end" of the graph *)
(* a compare function which ensures that new nodes are at the "end" of the graph *)
let compare t1 t2 = match (t1,t2) with
| Old _ , New _ -> -1
| New _, Old _ -> 1
......@@ -588,13 +599,13 @@ end (* module Html *)
module Conll = struct
type line = {
line_num: int;
num: int;
num: string;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
deps: ( int * string ) list;
deps: (string * string ) list;
}
let load file =
......@@ -615,11 +626,11 @@ module Conll = struct
let parse (line_num, line) =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
let gov_list = List.map int_of_string (Str.split (Str.regexp "|") govs)
let gov_list = Str.split (Str.regexp "|") govs
and lab_list = Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in
{line_num = line_num;
num = int_of_string num;
num = num;
phon = escape_quote phon;
lemma = escape_quote lemma;
pos1 = pos1;
......
......@@ -5,8 +5,23 @@ module StringSet : Set.S with type elt = string
module IntSet : Set.S with type elt = int
module IntMap : Map.S with type key = int
(* ================================================================================ *)
(* [Pid_set] *)
module String_: sig
(* [to_float]: robust conversion of string to float whatever is the locale *)
val to_float: string -> float
(* [to_float]: robust conversion of float to string whatever is the locale *)
val of_float: float -> string
end
(* ================================================================================ *)
(* [Dot] function to manipulate the dot format *)
module Dot: sig
val to_png_file: string -> string -> unit
end
val png_file_from_dot: string -> string -> unit
(* ================================================================================ *)
(* [Loc] general module to describe errors location: (file name, line number in file) *)
......@@ -242,13 +257,13 @@ end
module Conll: sig
type line = {
line_num: int;
num: int;
num: string;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
deps: ( int * string ) list;
deps: (string * string ) list;
}
val load: string -> line list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment