Commit e766a53a authored by Bruno Guillaume's avatar Bruno Guillaume

add conll_fields in feature_domain

parent 4e050ac5
......@@ -111,7 +111,12 @@ end
(* ================================================================================ *)
module Feature_domain = struct
type t = Ast.feature_spec list
type t = {
decls: Ast.feature_spec list;
conll_fields: (string * string * string * string);
}
let default_conll_fields = ("phon", "lemma", "cat", "pos")
let dump t =
Printf.printf "========= Feature domain =========\n";
......@@ -119,7 +124,7 @@ module Feature_domain = struct
| Ast.Closed (fn, values) -> Printf.printf " %s : %s\n" fn (String.concat ", " values)
| Ast.Open fn -> Printf.printf " %s is OPEN\n" fn
| Ast.Num fn -> Printf.printf " %s id NUMERICAL\n" fn
) t;
) t.decls;
Printf.printf "==================================\n%!"
let to_json t =
......@@ -128,7 +133,7 @@ module Feature_domain = struct
| Ast.Closed (fn, values) -> (fn, `List (List.map (fun x -> `String x) values))
| Ast.Open fn -> (fn, `String "Open")
| Ast.Num fn -> (fn, `String "Num")
) t
) t.decls
)
let get_name = function
......@@ -136,21 +141,27 @@ module Feature_domain = struct
| Ast.Open fn -> fn
| Ast.Num fn -> fn
let is_defined feature_name feature_domain =
List.exists (fun item -> get_name item = feature_name) feature_domain
let is_defined feature_name decls =
List.exists (fun item -> get_name item = feature_name) decls
let rec build = function
let rec build_decls = function
| [] -> [Ast.Num "position"]
| (Ast.Num "position") :: tail -> Log.warning "[Feature_domain] declaration of the feature name \"position\" in useless"; build tail
| (Ast.Num "position") :: tail -> Log.warning "[Feature_domain] declaration of the feature name \"position\" in useless"; build_decls tail
| (Ast.Open "position") :: _
| (Ast.Closed ("position",_)) :: _ ->
Error.build "[Feature_domain] The feature named \"position\" is reserved and must be types 'integer', you cannot not redefine it"
| (Ast.Num fn) :: tail | (Ast.Open fn) :: tail | Ast.Closed (fn,_) :: tail when is_defined fn tail ->
Error.build "[Feature_domain] The feature named \"%s\" is defined several times" fn
| x :: tail -> x :: (build tail)
| x :: tail -> x :: (build_decls tail)
let build ?conll_fields feature_spec_list =
let decls = build_decls feature_spec_list in
match conll_fields with
| Some cf -> { decls; conll_fields=cf }
| None -> { decls; conll_fields = default_conll_fields }
let feature_names feature_domain =
List.map (function Ast.Closed (fn, _) | Ast.Open fn | Ast.Num fn -> fn) feature_domain
List.map (function Ast.Closed (fn, _) | Ast.Open fn | Ast.Num fn -> fn) feature_domain.decls
let merge list1 list2 =
List.fold_left
......@@ -175,13 +186,13 @@ module Feature_domain = struct
| Ast.Open fn when fn = feature_name -> true
| Ast.Num fn when fn = feature_name -> true
| _ -> false
) feature_domain
) feature_domain.decls
let is_num feature_domain feature_name =
List.exists (function
| Ast.Num fn when fn = feature_name -> true
| _ -> false
) feature_domain
) feature_domain.decls
let sub feature_domain name1 name2 =
match (get name1 feature_domain, get name2 feature_domain) with
......@@ -191,7 +202,7 @@ module Feature_domain = struct
| _ -> false
let is_open feature_domain name =
List.exists (function Ast.Open n when n=name -> true | _ -> false) feature_domain
List.exists (function Ast.Open n when n=name -> true | _ -> false) feature_domain.decls
(* This function is defined here because it is used by check_feature *)
let build_disj ?loc ?feature_domain name unsorted_values =
......@@ -199,7 +210,7 @@ module Feature_domain = struct
match (feature_domain, name.[0]) with
| (None, _)
| (Some _, '_') -> List.map (fun s -> String s) values (* no check on feat_name starting with '_' *)
| (Some dom, _) ->
| (Some {decls=dom}, _) ->
let rec loop = function
| [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
| ((Ast.Open n)::_) when n = name ->
......@@ -286,7 +297,12 @@ module Domain = struct
let check_feature_name ?loc ?domain name = match domain with
| Some (Feature feature_domain) | Some (Both (_, feature_domain)) ->
if not (Feature_domain.is_defined name feature_domain)
if not (Feature_domain.is_defined name feature_domain.decls)
then Error.build ?loc "The feature name \"%s\" in not defined in the domain" name
| _ -> ()
let conll_fields = function
| Some (Feature feature_domain) | Some (Both (_, feature_domain)) ->
feature_domain.Feature_domain.conll_fields
| _ -> Feature_domain.default_conll_fields
end
......@@ -36,8 +36,9 @@ end
module Feature_domain: sig
type t
val build: Ast.feature_spec list -> t
val build:
?conll_fields: (string * string * string * string) ->
Ast.feature_spec list -> t
(** [sub domain fn1 fn2] returns [true] iff the domain of [fn1] is a subset if the domain of [fn2]. *)
val sub: t -> feature_name -> feature_name -> bool
......@@ -80,4 +81,6 @@ module Domain : sig
(** [check_feature_name ~loc domain feature_name] fails iff a domain is set and [feature_name] is not defined in the current domain. *)
val check_feature_name: ?loc:Loc.t -> ?domain:t -> feature_name -> unit
val conll_fields: t option -> (string * string * string * string)
end
......@@ -40,7 +40,7 @@ module G_feature = struct
let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)
(* another order used for printing purpose only *)
let print_order = ["phon"; "cat"; "lemma"; "pos"]
let print_order = ["phon"; "form"; "cat"; "upos"; "lemma"; "pos"; "xpos"]
let print_cmp (name1,_) (name2,_) =
match (List_.index name1 print_order, List_.index name2 print_order) with
| (Some i, Some j) -> Pervasives.compare i j
......@@ -225,16 +225,17 @@ module G_fs = struct
(* ---------------------------------------------------------------------- *)
let of_conll ?loc ?domain line =
let (c2, c3, c4, c5) = Domain.conll_fields domain in
let raw_list0 =
("phon", Feature_value.build_value ?loc ?domain "phon" line.Conll.form)
:: ("cat", Feature_value.build_value ?loc ?domain "cat" line.Conll.upos)
(c2, Feature_value.build_value ?loc ?domain c2 line.Conll.form)
:: (c4, Feature_value.build_value ?loc ?domain c4 line.Conll.upos)
:: (List.map (fun (f,v) -> (f, Feature_value.build_value ?loc ?domain f v)) line.Conll.feats) in
let raw_list1 = match line.Conll.xpos with
| "" | "_" -> raw_list0
| s -> ("pos", Feature_value.build_value ?loc ?domain "pos" s) :: raw_list0 in
| s -> (c5, Feature_value.build_value ?loc ?domain c5 s) :: raw_list0 in
let raw_list2 = match line.Conll.lemma with
| "" | "_" -> raw_list1
| s -> ("lemma", Feature_value.build_value ?loc ?domain "lemma" s) :: raw_list1 in
| s -> (c3, Feature_value.build_value ?loc ?domain c3 s) :: raw_list1 in
List.sort G_feature.compare raw_list2
......@@ -257,9 +258,10 @@ module G_fs = struct
(* ---------------------------------------------------------------------- *)
let get_main ?main_feat t =
let default_list = ["phon"; "form"; "label"; "cat"; "upos"] in
let main_list = match main_feat with
| None -> ["phon";"label"; "cat"]
| Some string -> (Str.split (Str.regexp "\\( *; *\\)\\|#") string) @ ["phon";"label"; "cat"] in
| None -> default_list
| Some string -> (Str.split (Str.regexp "\\( *; *\\)\\|#") string) @ default_list in
let rec loop = function
| [] -> (None, t)
| feat_name :: tail ->
......
......@@ -17,6 +17,7 @@ open Grew_ast
open Grew_types
open Grew_edge
open Grew_domain
open Grew_fs
open Grew_node
......@@ -903,15 +904,16 @@ module G_graph = struct
) gov_labs in
let id_of_gid gid = Conll.Id.of_string (string_of_float (get_num gid)) in
let (c2, c3, c4, c5) = Domain.conll_fields domain in
let fs = G_node.get_fs node in
Some {
Conll.line_num = 0;
id = id_of_gid gid;
form = (match G_fs.get_string_atom "phon" fs with Some p -> p | None -> "_");
lemma = (match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "_");
upos = (match G_fs.get_string_atom "cat" fs with Some p -> p | None -> "_");
xpos = (match G_fs.get_string_atom "pos" fs with Some p -> p | None -> "_");
feats = (G_fs.to_conll ~exclude: ["phon"; "lemma"; "cat"; "pos"; "position"] fs);
form = (match G_fs.get_string_atom c2 fs with Some p -> p | None -> "_");
lemma = (match G_fs.get_string_atom c3 fs with Some p -> p | None -> "_");
upos = (match G_fs.get_string_atom c4 fs with Some p -> p | None -> "_");
xpos = (match G_fs.get_string_atom c5 fs with Some p -> p | None -> "_");
feats = (G_fs.to_conll ~exclude: [c2; c3; c4; c5; "position"] fs);
deps = List.map (fun (gov,lab) -> ( Conll.Id.of_string gov, lab)) sorted_gov_labs;
efs = G_node.get_efs node;
} ) snodes in
......
......@@ -515,14 +515,25 @@ module Grs = struct
let from_ast filename ast =
let conll_fields = match List_.opt_map
(fun x -> match x with
| New_ast.Conll_fields desc -> Some desc
| _ -> None
) ast with
| [] -> None
| [[c2;c3;c4;c5]] -> Some (c2,c3,c4,c5)
| [_] -> Error.build "conll_fields declaration does not contains exactly 4 values"
| _ :: _ :: _ -> Error.build "Several conll_fields declaration" in
let feature_domains = List_.opt_map
(fun x -> match x with
| New_ast.Features desc -> Some desc
| _ -> None
) ast in
let feature_domain = match feature_domains with
| [] -> None
| h::t -> Some (Feature_domain.build (List.fold_left Feature_domain.merge h t)) in
| h::t -> Some (Feature_domain.build ?conll_fields (List.fold_left Feature_domain.merge h t)) in
let label_domains = List_.opt_map
(fun x -> match x with
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment