Commit 720df465 authored by Bruno Guillaume's avatar Bruno Guillaume

error tracking

parent 1ebb9ec1
......@@ -320,7 +320,7 @@ module Ast = struct
type lexicon =
| File of string
| Final of string list
| Final of (int * string) list
type lexicon_info = (string * lexicon) list
......
......@@ -171,7 +171,7 @@ module Ast : sig
type lexicon =
| File of string
| Final of string list
| Final of (int * string) list
type lexicon_info = (string * lexicon) list
......
......@@ -26,6 +26,7 @@ module Loc = struct
let file_line f l = (Some f, Some l)
let file_opt_line fo l = (fo, Some l)
let file_opt_line_opt fo lo = (fo, lo)
let set_line l (x,_) = (x, Some l)
let to_string = function
| (Some file, Some line) -> sprintf "[file: %s, line: %d]" (Filename.basename file) line
......@@ -609,6 +610,8 @@ module Global = struct
let get_loc () = !current_loc
let loc_string () = Loc.to_string !current_loc
let get_line () = snd (get_loc ())
let new_file filename =
current_loc := (Some filename, Some 1);
label_flag := false
......
......@@ -54,6 +54,8 @@ module Loc: sig
val file_opt_line_opt: string option -> int option -> t
val file: string -> t
val set_line: int -> t -> t
val to_string: t -> string
end
......@@ -289,6 +291,7 @@ module Global: sig
val new_line: unit -> unit
val get_loc: unit -> Loc.t
val get_line: unit -> int option
val loc_string: unit -> string
val label_flag: bool ref
......
......@@ -19,6 +19,8 @@
let escaped = ref false
let lexicon_lines = ref []
let split_comment com =
let raw = Str.split (Str.regexp "\n") com in
List.filter (fun l -> not (Str.string_match (Str.regexp "[ \t]*$") l 0)) raw
......@@ -98,12 +100,19 @@ and string_lex re target = parse
(* a dedicated lexer for lexical parameter: read everything until "#END" *)
and lp_lex name target = parse
| '\n' { Global.new_line (); Lexing.new_line lexbuf; bprintf buff "\n"; lp_lex name target lexbuf }
| '\n' { (match Global.get_line () with
| None -> raise (Error "no loc in lexer")
| Some l -> lexicon_lines := (l, Buffer.contents buff) :: !lexicon_lines
);
Global.new_line ();
Lexing.new_line lexbuf;
Buffer.clear buff;
lp_lex name target lexbuf
}
| _ as c { bprintf buff "%c" c; lp_lex name target lexbuf }
| "#END" [' ' '\t']* '\n' { Global.new_line ();
let s = Buffer.contents buff in
let lines= Str.split (Str.regexp "\n") s in
LEX_PAR ( name, lines)
let lines= List.rev !lexicon_lines in
LEX_PAR (name, lines)
}
(* The lexer must be different when label_ident are parsed. The [global] lexer calls either
......@@ -151,7 +160,11 @@ and standard target = parse
| '%' { comment global lexbuf }
| "#BEGIN" [' ' '\t']* (label_ident as li) [' ' '\t']* '\n'
{ Global.new_line (); Buffer.clear buff; lp_lex li global lexbuf}
{ Global.new_line ();
Buffer.clear buff;
lexicon_lines := [];
lp_lex li global lexbuf
}
| '\n' { Global.new_line (); Lexing.new_line lexbuf; global lexbuf}
......@@ -160,8 +173,7 @@ and standard target = parse
| "domain" { DOMAIN }
| "features" { FEATURES }
| "conll_fields" { Log.fwarning "\"conll_fields\" is deprecated, ignored"; DUMMY }
| "feature" { FEATURE }
| "file" { FILE }
| "from" { FROM }
| "labels" { Global.label_flag := true; LABELS }
| "match" { Log.fwarning "%s \"match\" is deprecated, please use \"pattern\" instead" (Global.loc_string ()); PATTERN }
......@@ -202,7 +214,7 @@ and standard target = parse
| digit+ ('.' digit*)? as number { FLOAT (float_of_string number) }
| '$' general_ident as pat_var { DOLLAR_ID pat_var}
| '$' general_ident { raise (Error "Syntax of lexicon has changed! Please read grew.fr/lexicons for undating instructions") }
| '@' general_ident as cmd_var { AROBAS_ID cmd_var }
| "@#" color as col { COLOR col }
......
......@@ -75,8 +75,7 @@ let localize t = (t,get_loc ())
%token INCL /* include */
%token IMPORT /* import */
%token FEATURES /* features */
%token FEATURE /* feature */
%token FILE /* file */
%token FROM /* from */
%token LABELS /* labels */
%token PATTERN /* pattern */
%token WITHOUT /* without */
......@@ -107,7 +106,6 @@ let localize t = (t,get_loc ())
%token EMPTY /* Empty */
%token TRY /* Try */
%token <string> DOLLAR_ID /* $id */
%token <string> AROBAS_ID /* @id */
%token <string> COLOR /* @#89abCD */
......@@ -119,7 +117,7 @@ let localize t = (t,get_loc ())
%token <string> REGEXP
%token <float> FLOAT
%token <string list> COMMENT
%token <string * string list> LEX_PAR
%token <string * (int *string) list> LEX_PAR
%token EOF /* end of file */
......@@ -337,45 +335,31 @@ rules:
| r = list(rule) { r }
rule:
| doc=option(COMMENT) RULE id_loc=simple_id_with_loc LACC p=pos_item n=list(neg_item) cmds=commands RACC lex_par=list(lex_par)
| doc=option(COMMENT) RULE id_loc=simple_id_with_loc file_lexicons = option(external_lexicons) LACC p=pos_item n=list(neg_item) cmds=commands RACC final_lexicons=list(final_lexicon)
{
let lexicons = match file_lexicons with
| Some l -> l @ final_lexicons
| None -> final_lexicons in
{ Ast.rule_id = fst id_loc;
pattern = Ast.complete_pattern { Ast.pat_pos = p; Ast.pat_negs = n };
commands = cmds;
param = None;
lex_par = None;
lexicon_info = lex_par;
rule_doc = begin match doc with Some d -> d | None -> [] end;
rule_loc = snd id_loc;
rule_dir = None;
}
}
| doc=option(COMMENT) RULE id_loc=simple_id_with_loc param=param LACC p=pos_item n=list(neg_item) cmds=commands RACC lex_par=option(lex_par)
{
{ Ast.rule_id = fst id_loc;
pattern = Ast.complete_pattern { Ast.pat_pos = p; Ast.pat_negs = n };
commands = cmds;
param = Some param;
lex_par = None;
lexicon_info = []; (* TODOLEX *)
lexicon_info = lexicons;
rule_doc = begin match doc with Some d -> d | None -> [] end;
rule_loc = snd id_loc;
rule_dir = None;
}
}
lex_par:
| lex_par = LEX_PAR { (fst lex_par, Ast.Final (snd lex_par)) }
external_lexicons:
| LPAREN external_lexicons= separated_nonempty_list_final_opt(COMA, external_lexicon) RPAREN { external_lexicons }
param:
| LPAREN FEATURE vars=separated_nonempty_list(COMA,var) RPAREN { ([],vars) }
| LPAREN FEATURE vars=separated_nonempty_list(COMA,var) SEMIC files=separated_list(COMA,file) RPAREN { (files,vars) }
external_lexicon:
| lex_name=simple_id FROM file=STRING { (lex_name, Ast.File file)}
file:
| FILE f=STRING { f }
var:
| i=DOLLAR_ID { i }
| i=AROBAS_ID { i }
final_lexicon:
| final_lexicon = LEX_PAR { (fst final_lexicon, Ast.Final (snd final_lexicon)) }
pos_item:
| PATTERN i=pn_item { i }
......@@ -466,7 +450,6 @@ pat_item:
| Ast.Simple value ->
Pat_const (Ast.Feature_eq_cst (feat_id1, value), loc)
| Ast.Pointed (s1, s2) ->
Printf.printf "###%s--%s###\n%!" s1 s2;
Pat_const (Ast.Feature_eq_lex_or_fs (feat_id1, (s1,s2)), loc)
}
......@@ -551,8 +534,6 @@ pat_item:
(* TODO : axe lex_field *)
(* __ERRORS__ *)
| (Ineq_float _, Ineq_float _) -> Error.build "the '>' symbol can be used with 2 constants"
| (Ineq_float _, Ineq_float _) -> Error.build "the '>' symbol can be used with 2 constants"
| _ -> Error.build "the '>' symbol can be used with 2 nodes or with 2 features but not in a mix inequality"
}
......@@ -591,13 +572,10 @@ node_features:
let uname = Ast.to_uname name in
match values with
| [Ast.Simple "*"] ->
Printf.printf "###node_features[1.1] --> %s\n%!" name;
({Ast.kind = Ast.Disequality []; name=uname},loc)
| [Ast.Pointed (lex,fn)] ->
Printf.printf "###node_features[1.2] --> %s\n%!" name;
({Ast.kind = Ast.Equal_lex (lex,fn); name=uname }, loc)
| l ->
Printf.printf "###node_features[1.3] --> %s\n%!" name;
let value_list = List.map (function
| Ast.Simple x -> x
| Ast.Pointed (lex,fn) -> Error.build "Lexical reference '%s.%s' cannot be used in a disjunction" lex fn
......@@ -607,13 +585,11 @@ node_features:
/* cat = * */
| name_loc=simple_id_with_loc EQUAL STAR
{ let (name,loc) = name_loc in
Printf.printf "###node_features[2] --> %s\n%!" name;
({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
/* cat */
| name_loc=simple_id_with_loc
{ let (name,loc) = name_loc in
Printf.printf "###node_features[3] --> %s\n%!" name;
({Ast.kind = Ast.Disequality []; name=Ast.to_uname name},loc) }
/* cat<>n|v|adj */
......@@ -623,27 +599,18 @@ node_features:
let uname = Ast.to_uname name in
match values with
| [Ast.Pointed (lex,fn)] ->
Printf.printf "###node_features[4.2] --> %s\n%!" name;
({Ast.kind = Ast.Disequal_lex (lex,fn); name=uname }, loc)
| l ->
Printf.printf "###node_features[4.3] --> %s\n%!" name;
let value_list = List.map (function
| Ast.Simple x -> x
| Ast.Pointed (lex,fn) -> Error.build "Lexical reference '%s.%s' cannot be used in a disjunction" lex fn
) l in ({Ast.kind = Ast.Disequality value_list; name=uname }, loc)
}
/* lemma=$lem */
| name_loc=simple_id_with_loc EQUAL p=DOLLAR_ID
{ let (name,loc) = name_loc in
Printf.printf "###node_features[5] --> %s\n%!" name;
( {Ast.kind = Ast.Equal_param p; name=Ast.to_uname name }, loc) }
/* !lemma */
| BANG name_loc=simple_id_with_loc
{ let (name,loc) = name_loc in
Printf.printf "###node_features[6] --> %s\n%!" name;
({Ast.kind = Ast.Absent; name=Ast.to_uname name}, loc) }
{ let (name,loc) = name_loc in ({Ast.kind = Ast.Absent; name=Ast.to_uname name}, loc) }
/*=============================================================================================*/
/* COMMANDS DEFINITION */
......@@ -749,8 +716,6 @@ concat_item:
}
| s=STRING { Ast.String_item s }
| f=FLOAT { Ast.String_item (Printf.sprintf "%g" f) }
| p=AROBAS_ID { Ast.Param_item p }
| p=DOLLAR_ID { Ast.Param_item p }
/*=============================================================================================*/
......
......@@ -510,9 +510,9 @@ module Rule = struct
command :: (loop (new_kni,new_kei) tail) in
loop (known_node_ids, known_edge_ids) ast_commands
let build_lex = function
let build_lex loc = function
| Ast.File filename -> Lexicon.load filename
| Ast.Final line_list -> Lexicon.build (List.map (fun s -> Str.split (Str.regexp "\t") s) line_list)
| Ast.Final (line_list) -> Lexicon.build loc line_list
(* ====================================================================== *)
......@@ -525,9 +525,9 @@ module Rule = struct
let lexicons = List.fold_left (fun acc (name,lex) ->
try
let prev = List.assoc name acc in
(name, (Lexicon.union prev (build_lex lex))) :: (List.remove_assoc name acc)
(name, (Lexicon.union prev (build_lex rule_ast.Ast.rule_loc lex))) :: (List.remove_assoc name acc)
with
Not_found -> (name, build_lex lex) :: acc
Not_found -> (name, build_lex rule_ast.Ast.rule_loc lex) :: acc
) [] rule_ast.Ast.lexicon_info in
let lexicon_names = List.map fst lexicons in
......@@ -1505,11 +1505,10 @@ module Rule = struct
| Command.Lexical_field (lex_name, field) ->
(try
let lexicon = List.assoc lex_name matching.l_param in
let v = Lexicon.read field lexicon in
let v = Lexicon.get field lexicon in
Concat_item.String v
with
| Not_found -> Error.run ~loc "UPDATE_FEAT: the lexicon '%s' does not exist" lex_name
| Lexicon.Not_functional_lexicon -> Error.run ~loc "UPDATE_FEAT: the lexicon is not functional" lex_name
)
| Command.Param index ->
(match matching.m_param with
......@@ -1752,7 +1751,16 @@ module Rule = struct
(function
| Command.Feat (cnode, feat_name) -> Concat_item.Feat (node_find cnode, feat_name)
| Command.String s -> Concat_item.String s
| Command.Lexical_field _ -> failwith "TODOLEX3"
| Command.Lexical_field (lex_name, field) ->
(try
let lexicon = List.assoc lex_name matching.l_param in
let v = Lexicon.read field lexicon in
Concat_item.String v
with
| Not_found -> Error.run ~loc "UPDATE_FEAT: the lexicon '%s' does not exist" lex_name
| Lexicon.Not_functional_lexicon -> Error.run ~loc "UPDATE_FEAT: the lexicon is not functional" lex_name
)
| Command.Param index ->
(match matching.m_param with
| None -> Error.bug "Cannot apply a UPDATE_FEAT command without parameter"
......
......@@ -170,18 +170,44 @@ module Lexicon = struct
| [] :: xss -> transpose xss
| (x::xs) :: xss -> (x :: List.map List.hd xss) :: transpose (xs :: List.map List.tl xss)
let build items =
if items = [] then Error.bug "[Lexicon.build] a lexicon must not be empty";
let tr = transpose items in
let sorted_tr = List.sort (fun l1 l2 -> Pervasives.compare (List.hd l1) (List.hd l2)) tr in
match transpose sorted_tr with
| [] -> Error.bug "[Lexicon.build] inconsistent data"
| header :: lines_list -> { header; lines = List.fold_right Line_set.add lines_list Line_set.empty }
exception Equal of string
let strict_compare x y =
match Pervasives.compare x y with
| 0 -> raise (Equal x)
| x -> x
let build loc items =
let real_items = List.filter (fun (_,x) -> x <> "" && x.[0] <> '%') items in
match real_items with
| [] | [_] -> Error.build ~loc "[Lexicon.build] a lexicon must not be empty"
| (linenum_h, h)::t ->
let fields = Str.split (Str.regexp "\t") h in
let l = List.length fields in
let rec loop = function
| [] -> []
| (linenum, line)::tail ->
let items = Str.split (Str.regexp "\t") line in
if List.length items <> l then
begin
let loc = Loc.set_line linenum loc in
Error.build ~loc "[Lexicon.build] line with %d items (%d expected!!)" (List.length items) l
end;
fields :: (loop tail) in
let items_list = loop t in
let tr = transpose items_list in
try
let sorted_tr = List.sort (fun l1 l2 -> strict_compare (List.hd l1) (List.hd l2)) tr in
match transpose sorted_tr with
| [] -> Error.bug ~loc "[Lexicon.build] inconsistent data"
| header :: lines_list -> { header; lines = List.fold_right Line_set.add lines_list Line_set.empty }
with Equal v ->
let loc = Loc.set_line linenum_h loc in
Error.build ~loc "[Lexicon.build] the field name \"%s\" is used twice" v
let load file =
let lines = File.read file in
let items = List.map (fun line -> Str.split (Str.regexp "\t") line) lines in
build items
let lines = File.read_ln file in
let loc = Loc.file file in
build loc lines
let reduce sub_list lexicon =
let sorted_sub_list = List.sort Pervasives.compare sub_list in
......@@ -208,9 +234,11 @@ module Lexicon = struct
let new_set = Line_set.filter (fun line -> List.nth line index = value) lex.lines in
if Line_set.is_empty new_set
then None
else ( Printf.printf "###>>> Lexicon select %d --> %d\n%!" (Line_set.cardinal lex.lines) (Line_set.cardinal new_set);
Some { lex with lines = new_set }
)
else
let _ =
printf "###>>> %d --> %d \n%!" (Line_set.cardinal lex.lines) (Line_set.cardinal new_set) in
Some { lex with lines = new_set }
let unselect head value lex =
match List_.index head lex.header with
......@@ -224,8 +252,7 @@ module Lexicon = struct
let projection head lex =
match List_.index head lex.header with
| None -> Error.build "[Lexicon.projection] cannot find %s in lexicon" head
| Some index ->
Line_set.fold (fun line acc -> String_set.add (List.nth line index) acc) lex.lines String_set.empty
| Some index -> Line_set.fold (fun line acc -> String_set.add (List.nth line index) acc) lex.lines String_set.empty
exception Not_functional_lexicon
let read head lex =
......@@ -234,6 +261,8 @@ module Lexicon = struct
| [one] -> one
| _ -> raise Not_functional_lexicon
let get head lex = String_set.choose (projection head lex)
let read_multi head lex =
match String_set.elements (projection head lex) with
| [] -> Error.bug "[Lexicon.read] a lexicon must not be empty"
......@@ -244,6 +273,7 @@ end (* module Lexicon *)
module Lexicons = struct
type t = (string * Lexicon.t) list
end
(* ================================================================================ *)
module Concat_item = struct
type t =
......
......@@ -108,11 +108,11 @@ end (* module Lex_par *)
module Lexicon : sig
type t
(** [build items] build a lexicon from a list.
(** [build loc items] build a lexicon from a list.
The first list is interpreted as the column headers.
All other lines are lexicon items.
It is supposed that all sublist have the same length *)
val build: string list list -> t
val build: Loc.t -> (int * string) list -> t
(** [load file] build a lexicon from a file.
The file should contain same data than the ones in the build function
......@@ -138,6 +138,9 @@ module Lexicon : sig
* raise [Not_functional_lexicon] if several values are defined *)
val read: string -> t -> string
(** [get head lexicon] return [value] if one items have the [value] in the [head] field *)
val get: string -> t -> string
(** [read_multi head lexicon] returns "v_1/…/v_k" where v_i are the values of the [head] column *)
val read_multi: string -> t -> string
end (* module Lexicon *)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment