(**********************************************************************************)
(* Libcaml-grew - a Graph Rewriting library dedicated to NLP applications *)
(* *)
(* Copyright 2011-2013 Inria, Université de Lorraine *)
(* *)
(* Webpage: http://grew.loria.fr *)
(* License: CeCILL (see LICENSE folder or "http://www.cecill.info") *)
(* Authors: see AUTHORS file *)
(**********************************************************************************)
open Printf
open Log
open Conll
open Grew_base
open Grew_types
open Grew_ast
open Grew_domain
let decode_feat_name s = Str.global_replace (Str.regexp "__\\([0-9a-z]+\\)$") "[\\1]" s
(* ================================================================================ *)
module Feature_value = struct
let build_disj ?loc ?domain name unsorted_values =
Domain.build_disj ?loc ?domain name unsorted_values
let build_value ?loc ?domain name value =
match build_disj ?loc ?domain name [value] with
| [x] -> x
| _ -> Error.bug ?loc "[Feature_value.build_value]"
end (* module Feature_value *)
(* ================================================================================ *)
module G_feature = struct
type t = string * value
let get_name = fst
let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)
(* another order used for printing purpose only *)
let print_order = ["phon"; "form"; "cat"; "upos"; "lemma"; "pos"; "xpos"]
let print_cmp (name1,_) (name2,_) =
match (List_.index name1 print_order, List_.index name2 print_order) with
| (Some i, Some j) -> Pervasives.compare i j
| (Some i, None) -> -1
| (None, Some j) -> 1
| (None, None) -> Pervasives.compare name1 name2
let build ?domain = function
| ({Ast.kind=Ast.Equality [atom]; name=name},loc) ->
(name, Feature_value.build_value ~loc ?domain name atom)
| (uf,loc) -> Error.build ~loc "in graph nodes, features must follow the shape \"name = value\" (error on feature: \"%s\")" (Ast.u_feature_to_string uf)
let to_string (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
let to_gr (feat_name, feat_val) = sprintf "%s=\"%s\"" feat_name (string_of_value feat_val)
let to_dot (feat_name, feat_val) =
let string_val = string_of_value feat_val in
match Str.split (Str.regexp ":C:") string_val with
| [] -> Error.bug "[G_feature.to_dot] feature value '%s'" string_val
| fv::_ -> sprintf "%s=%s" feat_name fv
let buff_dot buff (feat_name, feat_val) =
let string_val = string_of_value feat_val in
match Str.split (Str.regexp ":C:") string_val with
| [] -> Error.bug "[G_feature.to_dot] feature value '%s'" string_val
| fv::_ -> bprintf buff "
%s | = | %s |
\n" (decode_feat_name feat_name) fv
end (* module G_feature *)
(* ================================================================================ *)
module P_feature = struct
(* feature= (feature_name, disjunction of atomic values) *)
type cst =
| Absent
| Equal of value list (* with Equal constr, the list MUST never be empty *)
| Different of value list
(* NB: in the current version, |in_param| ≤ 1 *)
type v = {
cst: cst;
in_param: int list; (* the list of parameters to which the value must belong *)
}
type t = string * v
let dump (feature_name, {cst; in_param}) =
printf "[P_feature.dump]\n";
printf "%s%s\n"
feature_name
(match cst with
| Different [] -> "=*"
| Different l -> "≠" ^ (String.concat "|" (List.map string_of_value l))
| Equal l -> "=" ^ (String.concat "|" (List.map string_of_value l))
| Absent -> " must be Absent!");
printf "in_param=[%s]\n" (String.concat "," (List.map string_of_int in_param));
printf "%!"
let to_json ?domain (feature_name, {cst}) =
`Assoc [
("feature_name", `String feature_name);
( match cst with
| Absent -> ("absent", `Null)
| Equal val_list -> ("equal", `List (List.map (fun x -> `String (string_of_value x)) val_list))
| Different val_list -> ("different", `List (List.map (fun x -> `String (string_of_value x)) val_list))
)
]
let get_name = fst
let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)
exception Fail_unif
(** raise [P_feature.Fail_unif] *)
let unif_value v1 v2 = match (v1, v2) with
| ({cst=Absent;in_param=[]},{cst=Absent;in_param=[]}) -> v1
| ({cst=Absent;in_param=[]},_)
| (_,{cst=Absent;in_param=[]}) -> raise Fail_unif
| ({cst=cst1; in_param=in1}, {cst=cst2; in_param=in2}) ->
let cst = match (cst1, cst2) with
| (Equal l1, Equal l2) ->
(match List_.sort_inter l1 l2 with
| [] -> raise Fail_unif
| l -> Equal l)
| (Equal l1, Different l2)
| (Different l2, Equal l1) ->
(match List_.sort_diff l1 l2 with
| [] -> raise Fail_unif
| l -> Equal l)
| (Different l1, Different l2) -> Different (List_.sort_union l1 l2)
| _ -> Error.bug "[P_feature.unif_value] inconsistent match case" in
let (in_) = match (in1,in2) with
| (_,[]) -> (in1)
| ([],_) -> (in2)
| _ -> Error.build "more than one parameter constraint for the same feature in not yet implemented" in
{cst; in_param=in_}
let to_string ?param_names t =
let param_string index = match param_names with
| None -> sprintf "$%d" index
| Some (l,_) -> sprintf "%s" (List.nth l index) in
match t with
| (feat_name, {cst=Absent ;in_param=[]}) -> sprintf "!%s" feat_name
| (feat_name, {cst=Equal atoms;in_param=[]}) -> sprintf "%s=%s" feat_name (List_.to_string string_of_value "|" atoms)
| (feat_name, {cst=Different [];in_param=[]}) -> sprintf "%s=*" feat_name
| (feat_name, {cst=Different atoms;in_param=[]}) -> sprintf "%s≠%s" feat_name (List_.to_string string_of_value "|" atoms)
| (feat_name, {cst=Equal atoms;in_param=[one_in]}) -> sprintf "%s=%s=$%s" feat_name (List_.to_string string_of_value "|" atoms) (param_string one_in)
| (feat_name, {cst=Different [];in_param=[one_in]}) -> sprintf "%s=$%s" feat_name (param_string one_in)
| (feat_name, {cst=Different atoms;in_param=[one_in]}) -> sprintf "%s≠%s^%s=%s" feat_name (List_.to_string string_of_value "|" atoms) feat_name (param_string one_in)
| _ -> Error.bug "[P_feature.to_string] multiple parameters are not handled"
let build ?domain ?pat_vars = function
| ({Ast.kind=Ast.Absent; name=name}, loc) ->
Domain.check_feature_name ~loc ?domain name;
(name, {cst=Absent;in_param=[];})
| ({Ast.kind=Ast.Equality unsorted_values; name=name}, loc) ->
let values = Feature_value.build_disj ~loc ?domain name unsorted_values in (name, {cst=Equal values;in_param=[];})
| ({Ast.kind=Ast.Disequality unsorted_values; name=name}, loc) ->
let values = Feature_value.build_disj ~loc ?domain name unsorted_values in (name, {cst=Different values;in_param=[];})
| ({Ast.kind=Ast.Equal_param var; name=name}, loc) ->
begin
match pat_vars with
| None -> Error.bug ~loc "[P_feature.build] param '%s' in an unparametrized rule" var
| Some l ->
match List_.index var l with
| Some index -> (name, {cst=Different []; in_param = [index]})
| None -> Error.build ~loc "[P_feature.build] Unknown pattern variable '%s'" var
end
end (* module P_feature *)
(* ================================================================================ *)
module G_fs = struct
(* list are supposed to be strictly ordered wrt compare *)
type t = G_feature.t list
(* ---------------------------------------------------------------------- *)
let empty = []
(* ---------------------------------------------------------------------- *)
let set_feat ?loc ?domain feature_name atom t =
let new_value = Feature_value.build_value ?loc ?domain feature_name atom in
let rec loop = function
| [] -> [(feature_name, new_value)]
| ((fn,_)::_) as t when feature_name < fn -> (feature_name, new_value)::t
| (fn,_)::t when feature_name = fn -> (feature_name, new_value)::t
| (fn,a)::t -> (fn,a) :: (loop t)
in loop t
(* ---------------------------------------------------------------------- *)
let del_feat = List_.sort_remove_assoc_opt
(* ---------------------------------------------------------------------- *)
let get_atom = List_.sort_assoc
(* ---------------------------------------------------------------------- *)
let get_string_atom feat_name t =
match List_.sort_assoc feat_name t with
| None -> None
| Some v -> Some (conll_string_of_value v)
(* ---------------------------------------------------------------------- *)
let get_float_feat feat_name t =
match List_.sort_assoc feat_name t with
| None -> None
| Some (Float i) -> Some i
| Some (String s) -> Error.build "[Fs.get_float_feat] feat_name=%s, value=%s" feat_name s
(* ---------------------------------------------------------------------- *)
let to_string t = List_.to_string G_feature.to_string "," t
(* ---------------------------------------------------------------------- *)
let to_gr t = List_.to_string G_feature.to_gr ", " t
(* ---------------------------------------------------------------------- *)
let build ?domain ast_fs =
let unsorted = List.map (fun feat -> G_feature.build ?domain feat) ast_fs in
List.sort G_feature.compare unsorted
(* ---------------------------------------------------------------------- *)
let of_conll ?loc ?domain line =
let (c2, c3, c4, c5) = Domain.conll_fields domain in
let raw_list0 =
(c2, Feature_value.build_value ?loc ?domain c2 line.Conll.form)
:: (c4, Feature_value.build_value ?loc ?domain c4 line.Conll.upos)
:: (List.map (fun (f,v) -> (f, Feature_value.build_value ?loc ?domain f v)) line.Conll.feats) in
let raw_list1 = match line.Conll.xpos with
| "" | "_" -> raw_list0
| s -> (c5, Feature_value.build_value ?loc ?domain c5 s) :: raw_list0 in
let raw_list2 = match line.Conll.lemma with
| "" | "_" -> raw_list1
| s -> (c3, Feature_value.build_value ?loc ?domain c3 s) :: raw_list1 in
List.sort G_feature.compare raw_list2
(* ---------------------------------------------------------------------- *)
let pst_leaf ?loc ?domain phon = [("phon", Feature_value.build_value ?loc ?domain "phon" phon)]
let pst_node ?loc ?domain cat = [("cat", Feature_value.build_value ?loc ?domain "cat" cat)]
(* ---------------------------------------------------------------------- *)
exception Fail_unif
let unif fs1 fs2 =
let rec loop = function
| [], fs | fs, [] -> fs
| (f1::t1, f2::t2) when G_feature.compare f1 f2 < 0 -> f1 :: loop (t1, f2::t2)
| (f1::t1, f2::t2) when G_feature.compare f1 f2 > 0 -> f2 :: loop (f1::t1, t2)
(* all remaining case are fn1 = fn2 *)
| ((fn, a1)::t1, (_, a2)::t2) when a1=a2 -> (fn,a1) :: (loop (t1, t2))
| _ -> raise Fail_unif
in try Some (loop (fs1, fs2)) with Fail_unif -> None
(* ---------------------------------------------------------------------- *)
let get_main ?main_feat t =
let default_list = ["phon"; "form"; "label"; "cat"; "upos"] in
let main_list = match main_feat with
| None -> default_list
| Some string -> (Str.split (Str.regexp "\\( *; *\\)\\|#") string) @ default_list in
let rec loop = function
| [] -> (None, t)
| feat_name :: tail ->
match List_.sort_assoc feat_name t with
| Some atom -> (Some (feat_name, atom), List_.sort_remove_assoc feat_name t)
| None -> loop tail in
loop main_list
(* ---------------------------------------------------------------------- *)
let to_dot ?(decorated_feat=("",[])) ?main_feat t =
let buff = Buffer.create 32 in
let () = match (fst decorated_feat) with
| "" -> ()
| pid -> bprintf buff "[%s] |
\n" pid in
let next =
match get_main ?main_feat t with
| (None, sub) -> sub
| (Some (feat_name,atom), sub) ->
if List.mem feat_name (snd decorated_feat)
then bprintf buff "%s |
\n" (string_of_value atom)
else bprintf buff "%s |
\n" (string_of_value atom);
sub in
let next = List.sort G_feature.print_cmp next in
List.iter
(fun g_feat ->
G_feature.buff_dot buff g_feat
) next;
match Buffer.contents buff with
| "" -> ""
| s -> sprintf "\n" s
(* ---------------------------------------------------------------------- *)
let to_word ?main_feat t =
match get_main ?main_feat t with
| (None, _) -> "#"
| (Some (_,atom), _) -> string_of_value atom
(* ---------------------------------------------------------------------- *)
let escape_sharp s =
Str.global_replace (Str.regexp "#") "__SHARP__" s
(* ---------------------------------------------------------------------- *)
let to_dep ?(decorated_feat=("",[])) ?position ?main_feat ?filter t =
let (pid_name, feat_list) = decorated_feat in
let (main_opt, sub) = get_main ?main_feat t in
let sub = List.sort G_feature.print_cmp sub in
let main = match main_opt with
| None -> []
| Some (feat_name, atom) ->
let esc_atom = escape_sharp (string_of_value atom) in
[ if List.mem feat_name (snd decorated_feat)
then sprintf "%s:B:yellow" esc_atom
else esc_atom] in
let word_list = match pid_name with
| "" -> main
| _ -> (sprintf "[%s]:B:yellow" pid_name)::main in
let word = match word_list with
| [] -> "_"
| l -> String.concat "#" l in
let last = match (!Global.debug, position) with
| (true, Some f) -> [(G_feature.to_string ("position", Float f))^":B:lightblue"]
| _ -> [] in
let lines = List.fold_left
(fun acc (feat_name, atom) ->
let esc_atom = escape_sharp (G_feature.to_string (decode_feat_name feat_name, atom)) in
if List.mem feat_name (snd decorated_feat)
then (sprintf "%s:B:yellow" esc_atom) :: acc
else
match filter with
| Some test when not (test feat_name) -> acc
| _ -> esc_atom :: acc
) last sub in
let subword = String.concat "#" (List.rev lines) in
sprintf " word=\"%s\"; subword=\"%s\"" word subword
(* ---------------------------------------------------------------------- *)
let to_conll_string ?exclude t =
let reduced_t = match exclude with
| None -> t
| Some list -> List.filter (fun (fn,_) -> not (List.mem fn list)) t in
let ud_ordering = (* In UD CoNLL-U format, features are sorted wrt lowercase form *)
List.sort
(fun feat1 feat2 -> Pervasives.compare (String.lowercase_ascii (G_feature.get_name feat1)) (String.lowercase_ascii (G_feature.get_name feat2)))
reduced_t in
match reduced_t with
| [] -> "_"
| _ -> String.concat "|"
(List.map
(function
| (fn, String "true") -> fn
| (fn, fv) -> (decode_feat_name fn)^"="^(string_of_value fv))
ud_ordering
)
(* ---------------------------------------------------------------------- *)
let to_conll ?exclude t =
let reduced_t = match exclude with
| None -> t
| Some list -> List.filter (fun (fn,_) -> not (List.mem fn list)) t in
let ud_ordering = (* In UD CoNLL-U format, features are sorted wrt lowercase form *)
List.sort
(fun feat1 feat2 -> Pervasives.compare (String.lowercase_ascii (G_feature.get_name feat1)) (String.lowercase_ascii (G_feature.get_name feat2)))
reduced_t in
List.map (fun (fn, fv) -> (fn, string_of_value fv)) ud_ordering
end (* module G_fs *)
(* ================================================================================ *)
module P_fs = struct
(* list are supposed to be striclty ordered wrt compare *)
type t = P_feature.t list
let empty = []
let to_json ?domain t = `List (List.map (P_feature.to_json ?domain) t)
let check_position ?param position t =
try
match (List.assoc "position" t, position) with
| ({P_feature.cst=P_feature.Equal pos_list; in_param=[]}, Some p) -> List.mem (Float p) pos_list
| ({P_feature.cst=P_feature.Equal pos_list; in_param=[]}, None) -> false
| ({P_feature.cst=P_feature.Different pos_list; in_param=[]}, Some p) -> not (List.mem (Float p) pos_list)
| ({P_feature.cst=P_feature.Different pos_list; in_param=[]}, None) -> false
| ({P_feature.cst=P_feature.Absent}, Some _) -> false
| ({P_feature.cst=P_feature.Absent}, None) -> true
| _ -> Error.bug "Position can't be parametrized"
with Not_found -> true
let build ?domain ?pat_vars ast_fs =
let unsorted = List.map (P_feature.build ?domain ?pat_vars) ast_fs in
List.sort P_feature.compare unsorted
let feat_list t = List.map P_feature.get_name t
let to_string t = List_.to_string P_feature.to_string "\\n" t
let to_dep ?filter param_names t =
let reduced = match filter with
| None -> t
| Some test -> List.filter (fun (fn,_) -> test fn) t in
List_.to_string (P_feature.to_string ~param_names) "#" reduced
let to_dot t = List_.to_string P_feature.to_string "\\n" t
exception Fail
let match_ ?param p_fs g_fs =
let p_fs_wo_pos =
try List.remove_assoc "position" p_fs
with Not_found -> p_fs in
let rec loop acc = function
| [], _ -> acc
(* a feature_name present only in instance -> Skip it *)
| ((fn_pat, fv_pat)::t_pat, (fn, _)::t) when fn_pat > fn -> loop acc ((fn_pat, fv_pat)::t_pat, t)
(* Two next cases: p_fs requires for the absence of a feature -> OK *)
| ((fn_pat, {P_feature.cst=P_feature.Absent})::t_pat, []) -> loop acc (t_pat, [])
| ((fn_pat, {P_feature.cst=P_feature.Absent})::t_pat, (fn, fa)::t) when fn_pat < fn -> loop acc (t_pat, (fn, fa)::t)
(* Two next cases: each feature_name present in p_fs must be in instance: [] means unif failure *)
| _, [] -> raise Fail
| ((fn_pat, _)::_, (fn, _)::_) when fn_pat < fn -> raise Fail
(* Next cases: fn_pat = fn *)
| ((_, {P_feature.cst=cst; P_feature.in_param=in_param})::t_pat, (_, atom)::t) ->
(* check for the constraint part and fail if needed *)
let () = match cst with
| P_feature.Absent -> raise Fail
| P_feature.Equal fv when not (List_.sort_mem atom fv) -> raise Fail
| P_feature.Different fv when List_.sort_mem atom fv -> raise Fail
| _ -> () in
(* if constraint part don't fail, look for lexical parameters *)
match (acc, in_param) with
| (_,[]) -> loop acc (t_pat,t)
| (None,_) -> Log.bug "[P_fs.match_] Parametrized constraint in a non-parametrized rule"; exit 2
| (Some param, [index]) ->
(match Lex_par.select index (string_of_value atom) param with
| None -> raise Fail
| Some new_param -> loop (Some new_param) (t_pat,t)
)
| _ -> Error.bug "[P_fs.match_] several different parameters contraints for the same feature is not implemented" in
loop param (p_fs_wo_pos,g_fs)
exception Fail_unif
let unif fs1 fs2 =
let rec loop = function
| [], fs -> fs
| fs, [] -> fs
| ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 < fn2 -> (fn1,v1) :: (loop (t1,(fn2,v2)::t2))
| ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 > fn2 -> (fn2,v2) :: (loop ((fn1,v1)::t1,t2))
(* all remaining case are fn1 = fn2 *)
| ((fn1,v1)::t1, (fn2,v2)::t2) (* when fn1 = fn2 *) ->
try (fn1,P_feature.unif_value v1 v2) :: (loop (t1,t2))
with
| P_feature.Fail_unif -> raise Fail_unif
| Error.Build (msg,_) -> Error.build "Feature '%s', %s" fn1 msg
in loop (fs1, fs2)
end (* module P_fs *)