Commit 0c10f65f authored by Bruno Guillaume's avatar Bruno Guillaume
Browse files

add Python style ranges

parent 5741e6ef
......@@ -19,6 +19,11 @@ type feature_value =
| String of string
| Float of float
let get_range_feature_value range = function
| String s -> String (String_.get_range range s)
| Float f when range = (None, None) -> Float f
| Float f -> Error.run "Cannot extract substring from a numeric feature \"%g\"" f
let string_of_value = function
| String s -> Str.global_replace (Str.regexp "\"") "\\\""
(Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" s)
......@@ -300,12 +305,12 @@ module Ast = struct
{ pattern with pat_pos = new_pat_pos; pat_negs = new_pat_negs;}
type concat_item =
| Qfn_or_lex_item of pointed
| String_item of string
| Qfn_or_lex_item of (pointed * Range.t)
| String_item of (string * Range.t)
let string_of_concat_item = function
| Qfn_or_lex_item pointed -> sprintf "%s.%s" (fst pointed) (snd pointed)
| String_item s -> sprintf "\"%s\"" s
| Qfn_or_lex_item (pointed,range) -> sprintf "%s.%s" (fst pointed) (snd pointed)
| String_item (s,range) -> sprintf "\"%s\"" s
type side = Prepend | Append
......
......@@ -17,6 +17,8 @@ type feature_value =
| String of string
| Float of float
val get_range_feature_value: Range.t -> feature_value -> feature_value
val string_of_value : feature_value -> string
val conll_string_of_value : feature_value -> string
......@@ -168,8 +170,8 @@ module Ast : sig
val complete_and_check_pattern : pattern -> pattern
type concat_item =
| Qfn_or_lex_item of (string * string)
| String_item of string
| Qfn_or_lex_item of (pointed * Range.t)
| String_item of (string * Range.t)
type side = Prepend | Append
......
......@@ -72,6 +72,19 @@ module Error = struct
end (* module Error *)
(* ================================================================================ *)
module Range = struct
type t = (int option * int option)
let to_string = function
| (None, None) -> ""
| (Some x, None) -> sprintf "[%d:]" x
| (None, Some y) -> sprintf "[:%d]" y
| (Some x, Some y) -> sprintf "[%d:%d]" x y
let to_json r = `String (to_string r)
end
(* ================================================================================ *)
module String_ = struct
let rm_first_char = function "" -> "" | s -> String.sub s 1 ((String.length s) - 1)
......@@ -85,6 +98,31 @@ module String_ = struct
| h :: tail -> (loop tail) ^ sep ^ h in
loop l
(** Pyhton like substring extraction
[get_range (init_opt, final_opt) s] return the python output of s[init_opt:final_opt]
NB: indexes correspond to UTF-8 chars. ex: [get_range (None, Some (-1)) "été"] ==> "ét"
*)
let get_range (iopt,fopt) s =
match (iopt, fopt) with
| (None, None) -> s
| _ ->
match CCUtf8_string.of_string s with
| None -> Error.run "[String_.get_range] '%s' is not a valid UTF-8 string encoding" s
| Some utf8_s ->
let char_list = CCUtf8_string.to_list utf8_s in
let len = CCUtf8_string.n_chars utf8_s in
let init = match iopt with
| None -> 0
| Some i when i < 0 -> max (len + i) 0
| Some i -> min i len in
let final = match fopt with
| None -> len
| Some i when i < 0 -> max (len + i) 0
| Some i -> min i len in
if final < init
then ""
else char_list |> CCList.drop init |> CCList.take (final - init) |> CCUtf8_string.of_list |> CCUtf8_string.to_string
end (* module String_ *)
(* ================================================================================ *)
......@@ -634,9 +672,9 @@ module Timeout = struct
| Some delay ->
if Unix.gettimeofday () -. !counter > delay
then raise (Stop delay)
let get_duration () = !duration
end (* module Timeout *)
(* ================================================================================ *)
......
......@@ -52,6 +52,14 @@ module Error: sig
val warning: ?loc: Loc.t -> ('a, unit, string, unit) format4 -> 'a
end
(* ================================================================================ *)
module Range: sig
type t = (int option * int option)
val to_string: t -> string
val to_json: t -> Yojson.Basic.t
end
(* ================================================================================ *)
module String_: sig
(* [re_match regexp s] returns true iff the full string [s] matches with [regexp] *)
......@@ -62,6 +70,12 @@ module String_: sig
val rm_first_char: string -> string
val rev_concat: string -> string list -> string
(** Pyhton like substring extraction
[get_range (init_opt, final_opt) s] return the python output of s[init_opt:final_opt]
NB: indexes correspond to UTF-8 chars. ex: [get_range (None, Some (-1)) "été"] ==> "ét"
*)
val get_range: Range.t -> string -> string
end (* module String_ *)
(* ================================================================================ *)
......
......@@ -32,18 +32,26 @@ module Command = struct
| Node_feat of (command_node * string)
| Edge_feat of (string * string)
| String_item of string
| Lexical_field of (string * string)
let item_to_json = function
| Node_feat (cn, feature_name) -> `Assoc [("copy_feat",
`Assoc [
("node",command_node_to_json cn);
("feature_name", `String feature_name);
]
)]
| Edge_feat (edge_id, feat_name) -> `Assoc [("edge_id", `String edge_id); ("feat_name", `String feat_name)]
| Lexical_field of Ast.pointed
let json_of_item = function
| Node_feat (cn, feature_name) ->
`Assoc
[("copy_feat",
`Assoc [
("node",command_node_to_json cn);
("feature_name", `String feature_name);
]
)]
| Edge_feat (edge_id, feat_name) ->
`Assoc [("edge_id", `String edge_id); ("feat_name", `String feat_name)]
| String_item s -> `Assoc [("string", `String s)]
| Lexical_field (lex,field) -> `Assoc [("lexical_filed", `String (lex ^ "." ^ field))]
| Lexical_field (lex,field) -> `Assoc [("lexical_field", `String (lex ^ "." ^ field))]
type ranged_item = item * Range.t
let json_of_ranged_item (item, range) =
`Assoc [("item", json_of_item item); ("range", Range.to_json range)]
(* the command in pattern *)
type p =
......@@ -55,8 +63,8 @@ module Command = struct
| ADD_EDGE_ITEMS of (command_node * command_node * (string * string) list)
| DEL_FEAT of (command_node * string)
| DEL_EDGE_FEAT of (string * string) (* (edge identifier, feature_name) *)
| UPDATE_FEAT of (command_node * string * item list)
| UPDATE_EDGE_FEAT of (string * string * item list) (* edge identifier, feat_name, new_value *)
| UPDATE_FEAT of (command_node * string * ranged_item list)
| UPDATE_EDGE_FEAT of (string * string * ranged_item list) (* edge identifier, feat_name, new_value *)
(* *)
| NEW_NODE of string
| NEW_BEFORE of (string * command_node)
......@@ -123,7 +131,7 @@ module Command = struct
`Assoc [
("node",command_node_to_json cn);
("feature_name", `String feature_name);
("items", `List (List.map item_to_json items));
("items", `List (List.map json_of_ranged_item items));
]
)]
......@@ -172,7 +180,7 @@ module Command = struct
`Assoc [
("edge_id", `String edge_id);
("feat_name", `String feat_name);
("items", `List (List.map item_to_json items));
("items", `List (List.map json_of_ranged_item items));
]
)]
| DEL_EDGE_FEAT (edge_id, feat_name) ->
......@@ -190,7 +198,7 @@ module Command = struct
("regexp", `String regexp);
("separator", `String separator)
]
)]
)]
| UNORDER cn -> `Assoc [("unorder", command_node_to_json cn)]
| INSERT_BEFORE (cn1,cn2) -> `Assoc [("insert_before", `Assoc [("inserted", command_node_to_json cn1); ("site", command_node_to_json cn2)])]
| INSERT_AFTER (cn1,cn2) -> `Assoc [("insert_after", `Assoc [("inserted", command_node_to_json cn1); ("site", command_node_to_json cn2)])]
......@@ -309,22 +317,22 @@ module Command = struct
| (Ast.Update_feat ((node_or_edge_id, feat_name), ast_items), loc) ->
let of_ast_item = function
| Ast.Qfn_or_lex_item (id_or_lex,feature_name_or_lex_field) ->
| Ast.Qfn_or_lex_item ((id_or_lex,feature_name_or_lex_field), range) ->
if List.mem_assoc id_or_lex lexicons
then
begin
Lexicons.check ~loc id_or_lex feature_name_or_lex_field lexicons;
Lexical_field (id_or_lex, feature_name_or_lex_field)
(Lexical_field (id_or_lex, feature_name_or_lex_field), range)
end
else if List.mem id_or_lex kni
then
begin
Node_feat (cn_of_node_id id_or_lex, feature_name_or_lex_field)
(Node_feat (cn_of_node_id id_or_lex, feature_name_or_lex_field), range)
end
else if List.mem id_or_lex kei
then Edge_feat (id_or_lex, feature_name_or_lex_field)
then (Edge_feat (id_or_lex, feature_name_or_lex_field), range)
else Error.build ~loc "Unknown identifier \"%s\"" id_or_lex
| Ast.String_item s -> String_item s in
| Ast.String_item (s, range) -> (String_item s, range) in
begin
match (List.mem node_or_edge_id kni, List.mem node_or_edge_id kei) with
......
......@@ -26,6 +26,8 @@ module Command : sig
| String_item of string
| Lexical_field of (string * string)
type ranged_item = item * Range.t
type p =
| DEL_NODE of command_node
| DEL_EDGE_EXPL of (command_node * command_node *G_edge.t)
......@@ -35,8 +37,8 @@ module Command : sig
| ADD_EDGE_ITEMS of (command_node * command_node * (string * string) list)
| DEL_FEAT of (command_node * string)
| DEL_EDGE_FEAT of (string * string) (* (edge identifier, feature_name) *)
| UPDATE_FEAT of (command_node * string * item list)
| UPDATE_EDGE_FEAT of (string * string * item list) (* edge identifier, feat_name, new_value *)
| UPDATE_FEAT of (command_node * string * ranged_item list)
| UPDATE_EDGE_FEAT of (string * string * ranged_item list) (* edge identifier, feat_name, new_value *)
(* *)
| NEW_NODE of string
| NEW_BEFORE of (string * command_node)
......
......@@ -215,7 +215,8 @@ and standard target = parse
| "graph" { GRAPH }
| digit+ ('.' digit*)? as number { FLOAT (float_of_string number) }
| "-"? digit+ ('.' digit*) as number { FLOAT (float_of_string number) }
| "-"? digit+ as number { INT (int_of_string number) }
| '$' general_ident { raise (Error "Syntax of lexicon has changed! Please read grew.fr/lexicons_change for updating instructions") }
......
......@@ -118,6 +118,7 @@ let localize t = (t,get_loc ())
%token <string> STRING
%token <string> REGEXP
%token <float> FLOAT
%token <int> INT
%token <string list> COMMENT
%token <string * (int *string) list> LEX_PAR
......@@ -150,6 +151,9 @@ let localize t = (t,get_loc ())
/*=============================================================================================*/
/* BASIC DEFINITIONS */
/*=============================================================================================*/
number:
| f=FLOAT { f }
| i=INT { float_of_int i }
label_ident:
| x=ID { Ast.parse_label_ident x }
......@@ -163,7 +167,7 @@ simple_id_with_loc:
simple_id_or_float:
| id=ID { Ast.parse_simple_ident id }
| v=FLOAT { Printf.sprintf "%g" v }
| v=number { Printf.sprintf "%g" v }
node_id:
| id=ID { Ast.parse_node_ident id }
......@@ -180,7 +184,7 @@ feature_ident_with_loc :
feature_value:
| v=ID { Ast.parse_simple_ident v }
| v=STRING { v }
| v=FLOAT { Printf.sprintf "%g" v }
| v=number { Printf.sprintf "%g" v }
simple_or_pointed :
| id=ID { Ast.parse_simple_or_pointed id }
......@@ -191,15 +195,15 @@ simple_or_pointed_with_loc :
pattern_feature_value:
| v=ID { Ast.parse_simple_or_pointed v }
| v=STRING { Ast.Simple v }
| v=FLOAT { Ast.Simple (Printf.sprintf "%g" v) }
| v=number { Ast.Simple (Printf.sprintf "%g" v) }
ineq_value:
| v=ID { Ineq_sofi (Ast.parse_simple_or_pointed v) }
| v=FLOAT { Ineq_float v }
| v=number{ Ineq_float v }
ineq_value_with_loc:
| v=ID { localize (Ineq_sofi (Ast.parse_simple_or_pointed v)) }
| v=FLOAT { localize (Ineq_float v) }
| v=number{ localize (Ineq_float v) }
/*=============================================================================================*/
/* GREW GRAPH */
......@@ -221,7 +225,7 @@ gr_item:
{ Graph_meta (id, value) }
/* B [phon="pense", lemma="penser", cat=v, mood=ind ] */
| id_loc=node_id_with_loc position=option(delimited(LPAREN, FLOAT ,RPAREN)) feats=delimited(LBRACKET,separated_list_final_opt(COMMA,node_features),RBRACKET)
| id_loc=node_id_with_loc position=option(delimited(LPAREN, number,RPAREN)) feats=delimited(LBRACKET,separated_list_final_opt(COMMA,node_features),RBRACKET)
{ let (id,loc) = id_loc in
Graph_node ({Ast.node_id = id; fs=feats}, loc) }
/* A */
......@@ -307,7 +311,7 @@ basic:
edge_item:
| id=ID { Ast.parse_node_ident id }
| v=FLOAT { Printf.sprintf "%g" v }
| v=number { Printf.sprintf "%g" v }
label_atom:
| name=simple_id_or_float EQUAL l=separated_nonempty_list(PIPE,edge_item) { Ast.Atom_eq (name, l)}
......@@ -542,12 +546,12 @@ pat_item:
/* X.feat >= 12.34 */
| feat_id1_loc=feature_ident_with_loc GE num=FLOAT
| num=FLOAT LE feat_id1_loc=feature_ident_with_loc
| num=number LE feat_id1_loc=feature_ident_with_loc
{ let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_ineq_cst (Ast.Ge, feat_id1, num), loc) }
/* X.feat <= 12.34 */
| feat_id1_loc=feature_ident_with_loc LE num=FLOAT
| num=FLOAT GE feat_id1_loc=feature_ident_with_loc
| num=number GE feat_id1_loc=feature_ident_with_loc
{ let (feat_id1,loc)=feat_id1_loc in Pat_const (Ast.Feature_ineq_cst (Ast.Le, feat_id1, num), loc) }
/* A << B */
......@@ -828,14 +832,24 @@ concat_item:
| gi=ID
{
match Ast.parse_simple_or_pointed gi with
| Ast.Simple value -> Ast.String_item value
| Ast.Pointed (s1, s2) -> Ast.Qfn_or_lex_item (s1, s2)
| Ast.Simple value -> Ast.String_item (value, (None,None))
| Ast.Pointed (s1, s2) -> Ast.Qfn_or_lex_item ((s1, s2),(None,None))
}
| s=STRING { Ast.String_item s }
| f=FLOAT { Ast.String_item (Printf.sprintf "%g" f) }
| gi=ID r=range /* Python style substring */
{
match Ast.parse_simple_or_pointed gi with
| Ast.Simple value -> Ast.String_item (value, r)
| Ast.Pointed (s1, s2) -> Ast.Qfn_or_lex_item ((s1, s2), r)
}
| s=STRING { Ast.String_item (s, (None,None)) }
| f=number { Ast.String_item (Printf.sprintf "%g" f, (None,None)) }
range:
| LBRACKET x=INT DDOT y=INT RBRACKET { (Some x, Some y) }
| LBRACKET DDOT y=INT RBRACKET { (None, Some y) }
| LBRACKET x=INT DDOT RBRACKET { (Some x, None) }
| LBRACKET DDOT RBRACKET { (None, None) }
/*=============================================================================================*/
/* ISOLATED PATTERN (grep mode) */
/*=============================================================================================*/
......
......@@ -1024,17 +1024,17 @@ module Rule = struct
let node_find cnode = onf_find ~loc cnode (matching, state.created_nodes) in
let feature_value_of_item feat_name = function
| Command.String_item s -> typed_vos feat_name s
| Command.Node_feat (cnode, feat_name) ->
| (Command.String_item s, range) -> get_range_feature_value range (typed_vos feat_name s)
| (Command.Node_feat (cnode, feat_name), range) ->
let gid = node_find cnode in
let node = G_graph.find gid state.graph in
let fs = G_node.get_fs node in
begin
match G_fs.get_value_opt feat_name fs with
| None -> Error.run ~loc "Node feature named `%s` is undefined" feat_name
| Some v -> v
| Some v -> get_range_feature_value range v
end
| Command.Edge_feat (edge_id, feat_name) ->
| (Command.Edge_feat (edge_id, feat_name), range) ->
begin
match String_map.find_opt edge_id state.e_mapping with
| None -> Error.bug "Cannot find edge_id %s" edge_id
......@@ -1043,22 +1043,22 @@ module Rule = struct
then
begin
match G_edge.to_string_opt ~config edge with
| Some s -> String s
| Some s -> get_range_feature_value range (String s)
| None -> Error.run "Cannot use not regular edge label as a concat item"
end
else
match G_edge.get_sub_opt feat_name edge with
| None -> Error.run ~loc "[onf_apply_command] Edge feature named %s is undefined" feat_name
| Some fv -> fv
| Some fv -> get_range_feature_value range fv
end
| Command.Lexical_field (lex_id, field) ->
| (Command.Lexical_field (lex_id, field), range) ->
begin
match List.assoc_opt lex_id matching.l_param with
| None -> Error.run ~loc "Undefined lexicon %s" lex_id
| Some lexicon ->
match Lexicon.get_opt field lexicon with
| None -> Error.bug "Inconsistent lexicon lex_id=%s field=%s" lex_id field
| Some value -> typed_vos feat_name value
| Some value -> get_range_feature_value range (typed_vos feat_name value)
end in
match command with
......@@ -1168,7 +1168,7 @@ module Rule = struct
| Some (src_gid,old_edge,tar_gid) ->
let new_edge =
match (feat_name, item_list) with
| ("label", [Command.Edge_feat (src_edge_id, "label")]) -> (* special case of label copy "e.label = f.label" *)
| ("label", [Command.Edge_feat (src_edge_id, "label"), (None,None)]) -> (* special case of label copy "e.label = f.label" *)
begin
match String_map.find_opt src_edge_id state.e_mapping with
| None -> Error.run ~loc "UPDATE_EDGE_FEAT (RHS) The edge identifier '%s' is undefined" src_edge_id
......@@ -1365,17 +1365,17 @@ module Rule = struct
let node_find cnode = find ~loc cnode gwh matching in
let feature_value_list_of_item feat_name = function
| Command.String_item s -> [typed_vos feat_name s]
| Command.Node_feat (cnode, feat_name) ->
| (Command.String_item s, range) -> [get_range_feature_value range (typed_vos feat_name s)]
| (Command.Node_feat (cnode, feat_name), range) ->
let gid = node_find cnode in
let node = G_graph.find gid gwh.graph in
let fs = G_node.get_fs node in
begin
match G_fs.get_value_opt feat_name fs with
| None -> Error.run ~loc "Node feature named `%s` is undefined" feat_name
| Some v -> [v]
| Some v -> [get_range_feature_value range v]
end
| Command.Edge_feat (edge_id, feat_name) ->
| (Command.Edge_feat (edge_id, feat_name), range) ->
begin
let (_,edge,_) =
match String_map.find_opt edge_id gwh.e_mapping with
......@@ -1386,13 +1386,13 @@ module Rule = struct
| None -> Error.run ~loc "The edge identifier '%s' is undefined" edge_id in
match G_edge.get_sub_opt feat_name edge with
| None -> Error.run ~loc "[gwh_apply_command] Edge feature named %s is undefined" feat_name
| Some fv -> [fv]
| Some fv -> [get_range_feature_value range fv]
end
| Command.Lexical_field (lex_id, field) ->
| (Command.Lexical_field (lex_id, field), range) ->
begin
match List.assoc_opt lex_id matching.l_param with
| None -> Error.run ~loc "Undefined lexicon %s" lex_id
| Some lexicon -> List.map (fun x -> typed_vos feat_name x) (Lexicon.read_all field lexicon)
| Some lexicon -> List.map (fun x -> get_range_feature_value range (typed_vos feat_name x)) (Lexicon.read_all field lexicon)
end in
match command with
......@@ -1560,7 +1560,8 @@ module Rule = struct
let new_edges =
match (feat_name, item_list) with
(* special behavior for "f.label = e.label" *)
| ("label", [Command.Edge_feat (src_edge_id, "label")]) ->
| ("label", [Command.Edge_feat (src_edge_id, "label"), (None,None)]) -> (* special case of label copy "e.label = f.label" *)
let src_edge =
match String_map.find_opt src_edge_id gwh.e_mapping with
| Some (_,e,_) -> e
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment