Commit 1ceae0b2 authored by Bruno Guillaume's avatar Bruno Guillaume

remove `conll_fields` mechanism

parent de399a2a
......@@ -16,6 +16,12 @@ open Grew_types
(* ================================================================================ *)
module Ast = struct
let to_uname = function
| "cat" -> "upos"
| "pos" -> "xpos"
| "phon" -> "form"
| x -> x
(* general function for checking that an identifier is of the right kind *)
(* allowed is a char list which is a sub set of ['#'; '.'; ':'; '*'] *)
let check_special name allowed s =
......@@ -66,7 +72,7 @@ module Ast = struct
let parse_feature_ident s =
check_special "feature ident" ["."] s;
match Str.full_split (Str.regexp "\\.") s with
| [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, fn)
| [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, to_uname fn)
| _ -> Error.build "The identifier '%s' must be a feature identifier (with exactly one '.' symbol, like \"V.cat\" for instance)" s
(* ---------------------------------------------------------------------- *)
......@@ -78,7 +84,7 @@ module Ast = struct
check_special "feature ident" ["."] s;
match Str.full_split (Str.regexp "\\.") s with
| [Str.Text base; ] -> (base, None)
| [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, Some fn)
| [Str.Text base; Str.Delim "."; Str.Text fn] -> (base, Some (to_uname fn))
| _ -> Error.build "The identifier '%s' must be a feature identifier (with at most one '.' symbol, like \"V\" or \"V.cat\" for instance)" s
......@@ -381,7 +387,6 @@ module Ast = struct
Closed (feature_name, without_duplicate)
type domain = {
conll_fields: string list option;
feature_domain: feature_spec list;
label_domain: (string * string list) list;
}
......
......@@ -13,6 +13,8 @@ open Grew_types
module Ast : sig
val to_uname: feature_name -> feature_name
(* ---------------------------------------------------------------------- *)
(* simple_ident: cat or V *)
type simple_ident = Id.name
......@@ -188,7 +190,6 @@ module Ast : sig
val build_closed: feature_name -> feature_atom list -> feature_spec
type domain = {
conll_fields: string list option;
feature_domain: feature_spec list;
label_domain: (string * string list) list;
}
......
......@@ -113,10 +113,8 @@ end
module Feature_domain = struct
type t = {
decls: Ast.feature_spec list;
conll_fields: (string * string * string * string);
}
let default_conll_fields = ("form", "lemma", "upos", "xpos")
let dump t =
Printf.printf "========= Feature domain =========\n";
......@@ -151,14 +149,16 @@ module Feature_domain = struct
| (Ast.Closed ("position",_)) :: _ ->
Error.build "[Feature_domain] The feature named \"position\" is reserved and must be types 'integer', you cannot not redefine it"
| (Ast.Num fn) :: tail | (Ast.Open fn) :: tail | Ast.Closed (fn,_) :: tail when is_defined fn tail ->
Error.build "[Feature_domain] The feature named \"%s\" is defined several times" fn
begin
if fn = "form" || fn = "upos" || fn = "xpos"
then Error.build "[Feature_domain] The feature named \"%s\" is defined several times (see http://grew.fr/features/note-about-backward-compatibility for details)" fn
else Error.build "[Feature_domain] The feature named \"%s\" is defined several times" fn
end
| x :: tail -> x :: (build_decls tail)
let build ?conll_fields feature_spec_list =
let decls = build_decls feature_spec_list in
match conll_fields with
| Some cf -> { decls; conll_fields=cf }
| None -> { decls; conll_fields = default_conll_fields }
let build feature_spec_list =
{decls = build_decls feature_spec_list}
let feature_names feature_domain =
List.map (function Ast.Closed (fn, _) | Ast.Open fn | Ast.Num fn -> fn) feature_domain.decls
......@@ -206,11 +206,6 @@ module Feature_domain = struct
(* This function is defined here because it is used by check_feature *)
let build_disj ?loc ?feature_domain name unsorted_values =
let intern_name = match name with
| "cat" -> "upos"
| "pos" -> "xpos"
| "phon" -> "form"
| x -> x in
let values = List.sort Pervasives.compare unsorted_values in
match (feature_domain, name.[0]) with
| (None, _)
......@@ -229,7 +224,7 @@ module Feature_domain = struct
| l when List.for_all (fun x -> x.[0] = '_') l -> List.map (fun s -> String s) values
| l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
(List_.to_string (fun x->x) ", " l)
intern_name
name
)
| _::t -> loop t in
loop dom
......@@ -305,9 +300,4 @@ module Domain = struct
if not (Feature_domain.is_defined name feature_domain.decls)
then Error.build ?loc "The feature name \"%s\" in not defined in the domain" name
| _ -> ()
let conll_fields = function
| Some (Feature feature_domain) | Some (Both (_, feature_domain)) ->
feature_domain.Feature_domain.conll_fields
| _ -> Feature_domain.default_conll_fields
end
......@@ -36,14 +36,13 @@ end
module Feature_domain: sig
type t
val build:
?conll_fields: (string * string * string * string) ->
Ast.feature_spec list -> t
val build: Ast.feature_spec list -> t
(** [sub domain fn1 fn2] returns [true] iff the domain of [fn1] is a subset if the domain of [fn2]. *)
val sub: t -> feature_name -> feature_name -> bool
val merge: Ast.feature_spec list -> Ast.feature_spec list -> Ast.feature_spec list
end (* module Feature_domain *)
(* ================================================================================ *)
......@@ -81,6 +80,4 @@ module Domain : sig
(** [check_feature_name ~loc domain feature_name] fails iff a domain is set and [feature_name] is not defined in the current domain. *)
val check_feature_name: ?loc:Loc.t -> ?domain:t -> feature_name -> unit
val conll_fields: t option -> (string * string * string * string)
end
......@@ -225,17 +225,16 @@ module G_fs = struct
(* ---------------------------------------------------------------------- *)
let of_conll ?loc ?domain line =
let (c2, c3, c4, c5) = Domain.conll_fields domain in
let raw_list0 =
(c2, Feature_value.build_value ?loc ?domain c2 line.Conll.form)
:: (c4, Feature_value.build_value ?loc ?domain c4 line.Conll.upos)
("form", Feature_value.build_value ?loc ?domain "form" line.Conll.form)
:: ("upos", Feature_value.build_value ?loc ?domain "upos" line.Conll.upos)
:: (List.map (fun (f,v) -> (f, Feature_value.build_value ?loc ?domain f v)) line.Conll.feats) in
let raw_list1 = match line.Conll.xpos with
| "" | "_" -> raw_list0
| s -> (c5, Feature_value.build_value ?loc ?domain c5 s) :: raw_list0 in
| s -> ("xpos", Feature_value.build_value ?loc ?domain "xpos" s) :: raw_list0 in
let raw_list2 = match line.Conll.lemma with
| "" | "_" -> raw_list1
| s -> (c3, Feature_value.build_value ?loc ?domain c3 s) :: raw_list1 in
| s -> ("lemma", Feature_value.build_value ?loc ?domain "lemma" s) :: raw_list1 in
List.sort G_feature.compare raw_list2
......
......@@ -959,16 +959,15 @@ module G_graph = struct
) gov_labs in
let id_of_gid gid = Conll.Id.of_string (string_of_float (get_num gid)) in
let (c2, c3, c4, c5) = Domain.conll_fields domain in
let fs = G_node.get_fs node in
Some {
Conll.line_num = 0;
id = id_of_gid gid;
form = (match G_fs.get_string_atom c2 fs with Some p -> p | None -> "_");
lemma = (match G_fs.get_string_atom c3 fs with Some p -> p | None -> "_");
upos = (match G_fs.get_string_atom c4 fs with Some p -> p | None -> "_");
xpos = (match G_fs.get_string_atom c5 fs with Some p -> p | None -> "_");
feats = (G_fs.to_conll ~exclude: [c2; c3; c4; c5; "position"] fs);
form = (match G_fs.get_string_atom "form" fs with Some p -> p | None -> "_");
lemma = (match G_fs.get_string_atom "lemma" fs with Some p -> p | None -> "_");
upos = (match G_fs.get_string_atom "upos" fs with Some p -> p | None -> "_");
xpos = (match G_fs.get_string_atom "xpos" fs with Some p -> p | None -> "_");
feats = (G_fs.to_conll ~exclude: ["form"; "lemma"; "upos"; "xpos"; "position"] fs);
deps = List.map (fun (gov,lab) -> ( Conll.Id.of_string gov, lab)) sorted_gov_labs;
efs = G_node.get_efs node;
} ) snodes in
......
......@@ -209,15 +209,9 @@ module Old_grs = struct
) t.strategies
let domain_build ast_domain =
let conll_fields = match ast_domain.Ast.conll_fields with
| Some [c2;c3;c4;c5] -> Some (c2,c3,c4,c5)
| Some _ -> Error.build "conll_fields declaration does not contains exactly 4 values"
| _ -> None in
Domain.build
(Label_domain.build ast_domain.Ast.label_domain)
(Feature_domain.build ?conll_fields ast_domain.Ast.feature_domain)
(Feature_domain.build ast_domain.Ast.feature_domain)
let build filename =
let ast = Loader.grs filename in
......@@ -520,16 +514,6 @@ module Grs = struct
let domain t = t.domain
let from_ast filename ast =
let conll_fields = match List_.opt_map
(fun x -> match x with
| New_ast.Conll_fields desc -> Some desc
| _ -> None
) ast with
| [] -> None
| [[c2;c3;c4;c5]] -> Some (c2,c3,c4,c5)
| [_] -> Error.build "conll_fields declaration does not contains exactly 4 values"
| _ :: _ :: _ -> Error.build "Several conll_fields declaration" in
let feature_domains = List_.opt_map
(fun x -> match x with
| New_ast.Features desc -> Some desc
......@@ -538,7 +522,7 @@ module Grs = struct
let feature_domain = match feature_domains with
| [] -> None
| h::t -> Some (Feature_domain.build ?conll_fields (List.fold_left Feature_domain.merge h t)) in
| h::t -> Some (Feature_domain.build (List.fold_left Feature_domain.merge h t)) in
let label_domains = List_.opt_map
(fun x -> match x with
......
......@@ -154,7 +154,7 @@ and standard target = parse
| "import" { IMPORT }
| "domain" { DOMAIN }
| "features" { FEATURES }
| "conll_fields" { CONLL_FIELDS }
| "conll_fields" { Log.fwarning "\"conll_fields\" is deprecated, ignored"; DUMMY }
| "feature" { FEATURE }
| "file" { FILE }
| "labels" { Global.label_flag := true; LABELS }
......
......@@ -32,6 +32,8 @@ let get_loc () = Global.get_loc ()
let localize t = (t,get_loc ())
%}
%token DUMMY
%token LACC /* { */
%token RACC /* } */
%token LBRACKET /* [ */
......@@ -73,7 +75,6 @@ let localize t = (t,get_loc ())
%token INCL /* include */
%token IMPORT /* import */
%token FEATURES /* features */
%token CONLL_FIELDS /* conll_fields */
%token FEATURE /* feature */
%token FILE /* file */
%token LABELS /* labels */
......@@ -221,20 +222,13 @@ gr_item:
| n1_loc=node_id_with_loc label=delimited(LTR_EDGE_LEFT,label_ident,LTR_EDGE_RIGHT) n2=node_id
{ Graph_edge ({Ast.edge_id = None; src=fst n1_loc; edge_label_cst=Ast.Pos_list [label]; tar=n2}, snd n1_loc) }
/*=============================================================================================*/
/* CONLL FIELD DEFINITION */
/*=============================================================================================*/
conll_fields:
| CONLL_FIELDS LACC x=separated_nonempty_list_final_opt(SEMIC,simple_id) RACC { x }
/*=============================================================================================*/
/* DOMAIN DEFINITION */
/*=============================================================================================*/
domain:
| c=option(conll_fields) f=features_group g=labels EOF
| c=option(DUMMY) f=features_group g=labels EOF
{
{ Ast.feature_domain = f;
conll_fields = c;
label_domain = g;
}
}
......@@ -292,7 +286,7 @@ feature:
{ Ast.Open feature_name }
feature_name:
| ci=ID { ci }
| ci=ID { Ast.to_uname ci }
features_values:
| SHARP { ["#"] }
......@@ -555,29 +549,30 @@ node_features:
/* cat = n|v|adj */
| name_loc=simple_id_with_loc EQUAL values=separated_nonempty_list(PIPE,feature_value)
{ let (name,loc) = name_loc in
let uname = Ast.to_uname name in
match values with
| ["*"] -> ({Ast.kind = Ast.Disequality []; name},loc)
| _ -> ({Ast.kind = Ast.Equality values; name }, loc) }
| ["*"] -> ({Ast.kind = Ast.Disequality []; name=uname},loc)
| _ -> ({Ast.kind = Ast.Equality values; name=uname }, loc) }
/* cat = * */
| name_loc=simple_id_with_loc EQUAL STAR
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Disequality []; name},loc) }
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
/* cat */
| name_loc=simple_id_with_loc
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Disequality []; name},loc) }
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
/* cat<>n|v|adj */
| name_loc=simple_id_with_loc DISEQUAL values=separated_nonempty_list(PIPE,feature_value)
{ let (name,loc) = name_loc in ( {Ast.kind = Ast.Disequality values; name}, loc) }
{ let (name,loc) = name_loc in ( {Ast.kind = Ast.Disequality values; name=Ast.to_uname name}, loc) }
/* lemma=$lem */
| name_loc=simple_id_with_loc EQUAL p=DOLLAR_ID
{ let (name,loc) = name_loc in ( {Ast.kind = Ast.Equal_param p; name }, loc) }
{ let (name,loc) = name_loc in ( {Ast.kind = Ast.Equal_param p; name=Ast.to_uname name }, loc) }
/* !lemma */
| BANG name_loc=simple_id_with_loc
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Absent; name}, loc) }
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Absent; name=Ast.to_uname name}, loc) }
/*=============================================================================================*/
/* COMMANDS DEFINITION */
......@@ -734,7 +729,6 @@ new_grs:
| decls = list(decl) EOF { decls }
decl:
| c=conll_fields { New_ast.Conll_fields c }
| f=features_group { New_ast.Features f }
| l=labels { New_ast.Labels l }
| r=rule { New_ast.Rule r }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment