Commit f3631754 authored by bguillaum's avatar bguillaum

better integration of float features to handle position of "new nodes"

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@8123 7838e531-6607-4d57-9587-6c381814729c
parent b30267ea
......@@ -122,7 +122,7 @@ module Ast = struct
type u_node = {
node_id: Id.name;
position: int option;
position: float option;
fs: feature list;
}
type node = u_node * Loc.t
......
......@@ -58,7 +58,7 @@ module Ast : sig
type u_node = {
node_id: Id.name;
position: int option;
position: float option;
fs: feature list;
}
type node = u_node * Loc.t
......
......@@ -132,6 +132,8 @@ module Command = struct
((DEL_NODE (pid_of_act_id loc act_n), loc), (List_.rm act_n kai, kei))
| (Ast.Del_feat (act_id, feat_name), loc) ->
if feat_name = "position"
then Error.build ~loc "Illegal del_feat command: the 'position' feature cannot be deleted";
check_act_id loc act_id kai;
((DEL_FEAT (pid_of_act_id loc act_id, feat_name), loc), (kai, kei))
......
......@@ -179,7 +179,6 @@ module G_fs = struct
("phon", Domain.build_one ?loc "phon" line.Conll.phon)
:: ("lemma", Domain.build_one ?loc "lemma" line.Conll.lemma)
:: ("cat", Domain.build_one ?loc "cat" line.Conll.pos1)
:: ("position", Domain.build_one ?loc "position" line.Conll.num)
:: (List.map (fun (f,v) -> (f, Domain.build_one ?loc f v)) line.Conll.morph) in
let unsorted = match line.Conll.pos2 with
| "" | "_" -> unsorted_without_pos
......@@ -222,11 +221,12 @@ module G_fs = struct
| (None, _) -> "#"
| (Some atom, _) -> string_of_value atom
let to_dep ?main_feat ?filter t =
let to_dep ?position ?main_feat ?filter t =
let (main_opt, sub) = get_main ?main_feat t in
let last = match position with Some f when f > 0. -> [("position", Float f)] | _ -> [] in
let reduced_sub = match filter with
| None -> sub
| Some l -> List.filter (fun (fn,_) -> List.mem fn l) sub in
| None -> sub @ last
| Some l -> (List.filter (fun (fn,_) -> List.mem fn l) sub) @ last in
sprintf " word=\"%s\"; subword=\"%s\""
(match main_opt with Some atom -> string_of_value atom | None -> "_")
(List_.to_string G_feature.to_string "#" reduced_sub)
......@@ -251,6 +251,18 @@ module P_fs = struct
let empty = []
let check_position ?param position t =
try
match List.assoc "position" t with
| P_feature.Equal pos_list -> List.mem (Float position) pos_list
| P_feature.Different pos_list -> not (List.mem (Float position) pos_list)
| P_feature.Absent -> false
| P_feature.Param index ->
match param with
| Some p -> float_of_string (Lex_par.get_param_value index p) = position
| None -> Log.bug "[P_fs.check_position] Illegal parametrized pattern feature"; exit 2
with Not_found -> true
let build ?pat_vars ast_fs =
let unsorted = List.map (P_feature.build ?pat_vars) ast_fs in
List.sort P_feature.compare unsorted
......@@ -268,6 +280,9 @@ module P_fs = struct
exception Fail
let match_ ?param pattern fs =
let pattern_wo_pos =
try List.remove_assoc "position" pattern
with Not_found -> pattern in
let rec loop acc = function
| [], _ -> acc
......@@ -299,7 +314,7 @@ module P_fs = struct
(* remaining cases: Equal and not list_mem | Diff and not list_mem -> fail*)
| _ -> raise Fail
in loop param (pattern,fs)
in loop param (pattern_wo_pos,fs)
let filter fs_p fs_g =
let rec loop = function
......
......@@ -31,7 +31,7 @@ module G_fs: sig
val to_gr: t -> string
val to_dot: ?main_feat: string -> t -> string
val to_word: ?main_feat: string -> t -> string
val to_dep: ?main_feat: string -> ?filter: string list -> t -> string
val to_dep: ?position:float -> ?main_feat: string -> ?filter: string list -> t -> string
val to_raw: t -> (string * string) list
val to_conll: ?exclude: string list -> t -> string
......@@ -68,6 +68,10 @@ module P_fs: sig
*)
val match_: ?param:Lex_par.t -> t -> G_fs.t -> Lex_par.t option
(** [check_position ?parma position pfs] checks wheter [pfs] is compatible with a node at [position].
It returns [true] iff [pfs] has no requirement about position ok if the requirement is satisfied. *)
val check_position: ?param:Lex_par.t -> float -> t -> bool
val filter: t -> G_fs.t -> bool
val unif: t -> t -> t
......
......@@ -6,7 +6,6 @@ open Grew_ast
open Grew_edge
open Grew_fs
open Grew_node
open Grew_command
module Str_map = Map.Make (String)
......@@ -259,15 +258,20 @@ module G_graph = struct
let full_node_list = gr_ast.Ast.nodes
and full_edge_list = gr_ast.Ast.edges in
let next_free_position = ref 1. in
let named_nodes =
let rec loop already_bound = function
| [] -> []
| (ast_node, loc) :: tail ->
let node_id = ast_node.Ast.node_id in
let tail = loop (node_id :: already_bound) tail in
if List.mem node_id already_bound
then Error.build "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
else G_node.build (ast_node, loc) :: tail in
if List.mem node_id already_bound
then Error.build "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" node_id
else
let (new_id,new_node) = G_node.build ~def_position:!next_free_position (ast_node, loc) in
next_free_position := 1. +. (max !next_free_position (G_node.get_position new_node));
let new_tail = loop (node_id :: already_bound) tail in
(new_id,new_node) :: new_tail in
loop [] full_node_list in
let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
......@@ -297,9 +301,7 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let of_conll ?loc lines =
let sorted_lines =
Conll.root ::
(List.sort (fun line1 line2 -> Pervasives.compare line1.Conll.num line2.Conll.num) lines) in
let sorted_lines = Conll.root :: (List.sort Conll.compare lines) in
let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in
......@@ -309,7 +311,6 @@ module G_graph = struct
let loc = Loc.opt_set_line i loc in
Gid_map.add (Gid.Old i) (G_node.of_conll ?loc line) acc)
Gid_map.empty sorted_lines in
let map_with_edges =
List.fold_left
(fun acc line ->
......@@ -352,7 +353,7 @@ module G_graph = struct
(fun acc2 (fn,fv) -> G_fs.set_feat fn fv acc2)
G_fs.empty
(("phon", phon) :: ("cat", (List.assoc "label" t_atts)) :: other_feats) in
let new_node = G_node.set_fs (G_node.set_pos G_node.empty (float i)) new_fs in
let new_node = G_node.set_fs (G_node.set_position (float i) G_node.empty) new_fs in
(Gid_map.add (Gid.Old i) new_node acc, Str_map.add id (Gid.Old i) acc_map)
| _ -> Log.critical "[G_graph.of_xml] Not a wellformed <T> tag"
) (Gid_map.empty, Str_map.empty) t_list in
......@@ -527,8 +528,13 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let set_feat ?loc graph node_id feat_name new_value =
let node = Gid_map.find node_id graph.map in
let new_fs = G_fs.set_feat ?loc feat_name new_value (G_node.get_fs node) in
{ graph with map = Gid_map.add node_id (G_node.set_fs node new_fs) graph.map }
let new_node =
match feat_name with
| "position" -> G_node.set_position (float_of_string new_value) node
| _ ->
let new_fs = G_fs.set_feat ?loc feat_name new_value (G_node.get_fs node) in
(G_node.set_fs node new_fs) in
{ graph with map = Gid_map.add node_id new_node graph.map }
(* -------------------------------------------------------------------------------- *)
let update_feat ?loc graph tar_id tar_feat_name item_list =
......@@ -546,7 +552,6 @@ module G_graph = struct
let new_feature_value = List_.to_string (fun s->s) "" strings_to_concat in
(set_feat ?loc graph tar_id tar_feat_name new_feature_value, new_feature_value)
(* -------------------------------------------------------------------------------- *)
let del_feat graph node_id feat_name =
let node = Gid_map.find node_id graph.map in
......@@ -570,21 +575,27 @@ module G_graph = struct
) graph.meta;
(* nodes *)
let nodes = Gid_map.fold (fun id node acc -> (id,node)::acc) graph.map [] in
let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let nodes = Gid_map.fold
(fun id node acc ->
if G_node.is_conll_root node
then acc
else (id,node)::acc
) graph.map [] in
let sorted_nodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
List.iter
(fun (id,node) ->
bprintf buff " N_%s %s;\n" (Gid.to_string id) (G_node.to_gr node)
) sorted_nodes;
(* edges *)
Gid_map.iter
(fun id node ->
List.iter
(fun (id,node) ->
Massoc_gid.iter
(fun tar edge ->
bprintf buff " N_%s -[%s]-> N_%s;\n" (Gid.to_string id) (G_edge.to_string edge) (Gid.to_string tar)
) (G_node.get_next node)
) graph.map;
) sorted_nodes;
bprintf buff "}\n";
Buffer.contents buff
......@@ -592,7 +603,7 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let to_sentence ?main_feat graph =
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
let words = List.map
(fun (id, node) -> G_fs.to_word ?main_feat (G_node.get_fs node)
......@@ -617,7 +628,7 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let to_dep ?filter ?main_feat ?(deco=G_deco.empty) graph =
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
let buff = Buffer.create 32 in
bprintf buff "[GRAPH] { opacity=0; scale = 200; fontname=\"Arial\"; }\n";
......@@ -627,7 +638,7 @@ module G_graph = struct
List.iter
(fun (id, node) ->
let fs = G_node.get_fs node in
let dep_fs = G_fs.to_dep ?filter ?main_feat fs in
let dep_fs = G_fs.to_dep ~position:(G_node.get_position node) ?filter ?main_feat fs in
let style = match (List.mem id deco.G_deco.nodes, G_fs.get_string_atom "void" fs) with
| (true, _) -> "; forecolor=red; subcolor=red; "
| (false, Some "y") -> "; forecolor=red; subcolor=red; "
......@@ -684,7 +695,7 @@ module G_graph = struct
let to_raw graph =
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
let raw_nodes = List.map (fun (gid,node) -> (gid, G_fs.to_raw (G_node.get_fs node))) snodes in
let get_num gid = list_num (fun (x,_) -> x=gid) raw_nodes in
......@@ -705,8 +716,13 @@ module G_graph = struct
let nodes = Gid_map.fold
(fun gid node acc -> (gid,node)::acc)
graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.pos_comp n1 n2) nodes in
let get_num gid = (list_num (fun (x,_) -> x=gid) snodes) in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
let get_num gid =
let gnode = List.assoc gid snodes in
if G_node.is_conll_root gnode
then 0.
else G_node.get_position (List.assoc gid snodes) in
(* Warning: [govs_labs] maps [gid]s to [num]s *)
let govs_labs =
......@@ -716,7 +732,7 @@ module G_graph = struct
Massoc_gid.fold
(fun acc2 tar_gid edge ->
let old = try Gid_map.find tar_gid acc2 with Not_found -> [] in
Gid_map.add tar_gid ((string_of_int src_num, G_edge.to_string edge)::old) acc2
Gid_map.add tar_gid ((sprintf "%g" src_num, G_edge.to_string edge)::old) acc2
) acc (G_node.get_next node)
) graph.map Gid_map.empty in
......@@ -741,12 +757,12 @@ module G_graph = struct
) gov_labs in
let (govs,labs) = List.split sorted_gov_labs in
let fs = G_node.get_fs node in
bprintf buff "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n"
bprintf buff "%g\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n"
(get_num gid)
(match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "NO_PHON")
(match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "NO_LEMMA")
(match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "NO_CAT")
(match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "_")
(match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "_e_")
(match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "_e_")
(match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "X")
(match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "X")
(G_fs.to_conll ~exclude: ["phon"; "lemma"; "cat"; "pos"; "position"] fs)
(String.concat "|" govs)
(String.concat "|" labs)
......
......@@ -9,18 +9,20 @@ open Grew_fs
module G_node = struct
type t = {
fs: G_fs.t;
pos: float option;
next: G_edge.t Massoc_gid.t;
position: float;
conll_root: bool;
}
let get_fs t = t.fs
let set_fs t fs = {t with fs = fs}
let get_next t = t.next
let set_fs t fs = {t with fs = fs}
let set_pos t pos = {t with pos = Some pos}
let get_position t = t.position
let set_position position t = { t with position }
let empty = { fs = G_fs.empty; pos = None; next = Massoc_gid.empty; conll_root=false }
let empty = { fs = G_fs.empty; next = Massoc_gid.empty; position = -1.; conll_root=false }
let is_conll_root t = t.conll_root
......@@ -29,38 +31,27 @@ module G_node = struct
(G_fs.to_string t.fs)
(Massoc_gid.to_string G_edge.to_string t.next)
let to_gr t =
sprintf "%s [%s] "
(match t.pos with Some i -> sprintf "(%g)" i | None -> "")
(G_fs.to_gr t.fs)
let to_gr t = if t.position < 0.
then sprintf "[%s] " (G_fs.to_gr t.fs)
else sprintf "(%g) [%s] " t.position (G_fs.to_gr t.fs)
let add_edge g_edge gid_tar t =
match Massoc_gid.add gid_tar g_edge t.next with
| Some l -> Some {t with next = l}
| None -> None
let build (ast_node, loc) =
let build ?def_position (ast_node, loc) =
let fs = G_fs.build ast_node.Ast.fs in
let fs_with_num = match ast_node.Ast.position with
| None -> fs
| Some num -> G_fs.set_feat "position" (string_of_int num) fs in
(ast_node.Ast.node_id,
{ fs = fs_with_num;
pos = (match ast_node.Ast.position with Some n -> Some (float n) | None -> None);
next = Massoc_gid.empty;
conll_root=false;
} )
let position = match (ast_node.Ast.position, def_position) with
| (Some position, _) -> position
| (None, Some position) -> position
| (None, None) -> Error.bug "Cannot build a node without position" in
(ast_node.Ast.node_id, { empty with fs; position })
let of_conll ?loc line =
if line = Conll.root
then { fs = G_fs.empty; pos = Some 0.; next = Massoc_gid.empty; conll_root=true; }
else
{
fs = G_fs.of_conll ?loc line;
pos = Some (String_.to_float line.Conll.num);
next = Massoc_gid.empty;
conll_root=false;
}
then { empty with conll_root=true }
else { empty with fs = G_fs.of_conll ?loc line; position = float_of_string line.Conll.num }
let remove (id_tar : Gid.t) label t = {t with next = Massoc_gid.remove id_tar label t.next}
......@@ -77,9 +68,9 @@ module G_node = struct
let rm_out_edges t = {t with next = Massoc_gid.empty}
let build_neighbour t = {empty with pos = match t.pos with Some x -> Some (x +. 0.01) | None -> None}
let build_neighbour t = { empty with position = (get_position t) +. 0.01 }
let pos_comp n1 n2 = Pervasives.compare n1.pos n2.pos
let position_comp n1 n2 = Pervasives.compare n1.position n2.position
let rename mapping n = {n with next = Massoc_gid.rename mapping n.next}
end
......@@ -116,11 +107,12 @@ module P_node = struct
| Some l -> Some {t with next = l}
| None -> None
let match_ ?param p_node g_node = P_fs.match_ ?param p_node.fs (G_node.get_fs g_node)
let match_ ?param p_node g_node =
(* (match param with None -> printf "<None>" | Some p -> printf "<Some>"; Lex_par.dump p); *)
if P_fs.check_position ?param (G_node.get_position g_node) p_node.fs
then P_fs.match_ ?param p_node.fs (G_node.get_fs g_node)
else raise P_fs.Fail
let compare_pos t1 t2 = Pervasives.compare t1.loc t2.loc
end
(* ================================================================================ *)
......@@ -16,7 +16,7 @@ module G_node: sig
val get_next: t -> G_edge.t Massoc_gid.t
val set_fs: t -> G_fs.t -> t
val set_pos: t -> float -> t
val set_position: float -> t -> t
val is_conll_root: t -> bool
......@@ -30,10 +30,12 @@ module G_node: sig
val rm_out_edges: t -> t
val add_edge: G_edge.t -> Gid.t -> t -> t option
val build: Ast.node -> (Id.name * t)
val build: ?def_position: float -> Ast.node -> (Id.name * t)
val of_conll: ?loc:Loc.t -> Conll.line -> t
val pos_comp: t -> t -> int
val get_position: t -> float
val position_comp: t -> t -> int
val build_neighbour: t -> t
......
......@@ -401,8 +401,12 @@ module Rule = struct
let fullfill graph matching cst =
let get_node pid = G_graph.find (Pid_map.find pid matching.n_match) graph in
let get_string_feat pid feat_name = G_fs.get_string_atom feat_name (G_node.get_fs (get_node pid)) in
let get_float_feat pid feat_name = G_fs.get_float_feat feat_name (G_node.get_fs (get_node pid)) in
let get_string_feat pid = function
| "position" -> Some (sprintf "%g" (G_node.get_position (get_node pid)))
| feat_name -> G_fs.get_string_atom feat_name (G_node.get_fs (get_node pid)) in
let get_float_feat pid = function
| "position" -> Some (G_node.get_position (get_node pid))
| feat_name -> G_fs.get_float_feat feat_name (G_node.get_fs (get_node pid)) in
match cst with
| Cst_out (pid,edge) ->
......
......@@ -611,6 +611,14 @@ module Conll = struct
let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
let line_to_string l =
let (gov_list, lab_list) = List.split l.deps in
sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
l.num l.phon l.lemma l.pos1 l.pos2
(match l.morph with [] -> "_" | list -> String.concat "|" (List.map (fun (f,v) -> sprintf "%s=%s" f v) list))
(String.concat "|" (gov_list))
(String.concat "|" (lab_list))
let parse_morph file_name line_num = function
| "_" -> []
| morph ->
......@@ -648,6 +656,9 @@ module Conll = struct
List.map (parse_line file_name) lines
let parse file_name lines = List.map (parse_line file_name) lines
(* We would prefer to compare the float equivalent of l1.num l2.num but this would break the dicho_find function *)
let compare l1 l2 = Pervasives.compare ((* float_of_string *) l1.num) ((* float_of_string *) l2.num)
end (* module Conll *)
(* ================================================================================ *)
......@@ -661,6 +672,14 @@ module Lex_par = struct
let empty=[]
let append = List.append
let dump t =
printf "[Lex_par.dump] --> size = %d\n" (List.length t);
List.iter (fun (pp,cp) ->
printf "%s##%s\n"
(String.concat "#" pp)
(String.concat "#" cp)
) t
let rm_peripheral_white s =
Str.global_replace (Str.regexp "\\( \\|\t\\)*$") ""
(Str.global_replace (Str.regexp "^\\( \\|\t\\)*") "" s)
......
......@@ -266,11 +266,15 @@ module Conll: sig
deps: (string * string ) list;
}
val line_to_string: line -> string
val root:line
val load: string -> line list
val parse: string -> (int * string) list -> line list
val compare: line -> line -> int
end
(** module for rule that are lexically parametrized *)
......@@ -280,6 +284,8 @@ module Lex_par: sig
val empty:t
val append: t -> t -> t
val dump: t -> unit
(** [from_lines filename nb_pattern_var nb_command_var strings] *)
val from_lines: ?loc: Loc.t -> int -> int -> string list -> t
......
......@@ -84,7 +84,7 @@ let localize t = (t,get_loc ())
%token <Grew_ast.Ast.complex_id> COMPLEX_ID
%token <string> STRING
%token <int> INT
%token <float> FLOAT
%token <string list> COMMENT
%token <string list> LP
......@@ -113,7 +113,7 @@ let localize t = (t,get_loc ())
string_or_int:
| v=COMPLEX_ID { Ast.simple_id_of_ci v }
| v=STRING { v }
| v=INT { string_of_int v }
| v=FLOAT { Printf.sprintf "%g" v }
label_ident:
| x=separated_nonempty_list(DDOT,COMPLEX_ID) { String.concat ":" (List.map Ast.label_id_of_ci x) }
......@@ -122,7 +122,7 @@ simple_id_with_loc:
| id=COMPLEX_ID { (Ast.simple_id_of_ci id,!Parser_global.current_line+1) }
num:
| INT { $1 }
| FLOAT { $1 }
/*=============================================================================================*/
/* GREW GRAPH */
......
......@@ -106,7 +106,8 @@ and global = parse
| "graph" { GRAPH }
| digit+ as number { INT (int_of_string number) }
| digit+ ('.' digit*)? as number { FLOAT (float_of_string number) }
| '$' ident as pat_var { DOLLAR_ID pat_var}
| '@' ident as cmd_var { AROBAS_ID cmd_var }
| "@#" color as col { COLOR col }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment