Commit 2fcfd051 authored by bguillaum's avatar bguillaum

better error handling

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@6917 7838e531-6607-4d57-9587-6c381814729c
parent c4685859
......@@ -39,7 +39,7 @@ module G_edge = struct
let to_string = Label.to_string
let make ?(locals=[||]) string = Label.from_string ~locals string
let make ?loc ?(locals=[||]) string = Label.from_string ?loc ~locals string
let build ?locals (ast_edge, loc) =
match ast_edge.Ast.negative, ast_edge.Ast.edge_labels with
......
......@@ -27,7 +27,7 @@ module G_edge: sig
val to_string:t -> string
val make: ?locals:Label.decl array -> string -> t
val make: ?loc:Loc.t -> ?locals:Label.decl array -> string -> t
val build: ?locals:Label.decl array -> Ast.edge -> t
......
......@@ -230,23 +230,28 @@ module G_graph = struct
{map=map;lub=Array.length table}
let of_conll lines =
let of_conll ?loc lines =
let nodes =
List.fold_left
(fun acc line -> Gid_map.add line.Conll.num (G_node.of_conll line) acc)
(fun acc line ->
Gid_map.add line.Conll.num (G_node.of_conll line) acc)
Gid_map.empty lines in
let nodes_with_edges =
List.fold_left
(fun acc line ->
(* add line number information in loc *)
let loc = Loc.opt_set_line line.Conll.line_num loc in
if line.Conll.gov=0
then acc
else
let gov_node =
try Gid_map.find line.Conll.gov acc
with Not_found -> Log.fcritical "Ill-formed CONLL file: line number %d refers to the on existing gov %d" line.Conll.num line.Conll.gov in
match G_node.add_edge (G_edge.make line.Conll.dep_lab) line.Conll.num gov_node with
with Not_found ->
Error.build ?loc "[G_graph.of_conll] the line refers to unknown gov %d" line.Conll.gov in
match G_node.add_edge (G_edge.make ?loc line.Conll.dep_lab) line.Conll.num gov_node with
| None -> acc
| Some new_node -> Gid_map.add line.Conll.gov new_node acc
) nodes lines in
......
......@@ -61,7 +61,7 @@ module G_graph: sig
Ast.edge list ->
t
val of_conll: Conll.line list -> t
val of_conll: ?loc:Loc.t -> Conll.line list -> t
val to_gr: t -> string
val to_dot: ?main_feat:string -> ?deco:Deco.t -> t -> string
......
......@@ -31,8 +31,10 @@ module Instance = struct
let graph = G_graph.build gr_ast.Ast.nodes gr_ast.Ast.edges in
{ empty with graph = graph }
let of_conll lines =
{ empty with graph = G_graph.of_conll lines }
let of_conll ?loc lines =
Printf.printf "====> [Instance.of_conll] loc=%s"
(match loc with None -> "None" | Some (f,l) -> Printf.sprintf "(%s,%d)" f l);
{ empty with graph = G_graph.of_conll ?loc lines }
let rev_steps t =
{ t with big_step = match t.big_step with
......
......@@ -17,7 +17,7 @@ module Instance : sig
val build: Ast.gr -> t
val of_conll: Conll.line list -> t
val of_conll: ?loc:Loc.t -> Conll.line list -> t
(* rev_steps reverse the small step list: during rewriting, the last rule is in the head of the list and the reverse is needed for display *)
val rev_steps: t -> t
......
......@@ -14,6 +14,10 @@ module Loc = struct
let to_string (file,line) = Printf.sprintf "(file: %s, line: %d)" (Filename.basename file) line
let opt_set_line line = function
| None -> None
| Some (file,_) -> Some (file, line)
let opt_to_string = function
| None -> ""
| Some x -> to_string x
......@@ -40,6 +44,22 @@ module File = struct
with End_of_file ->
close_in in_ch;
List.rev !rev_lines
let read_ln file =
let in_ch = open_in file in
let cpt = ref 0 in
let rev_lines = ref [] in
try
while true do
let line = input_line in_ch in
incr cpt;
if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
then ()
else rev_lines := (!cpt, line) :: !rev_lines
done; assert false
with End_of_file ->
close_in in_ch;
List.rev !rev_lines
end (* module File *)
......@@ -478,6 +498,7 @@ end
module Conll = struct
type line = {
line_num: int;
num: int;
phon: string;
lemma: string;
......@@ -487,33 +508,40 @@ module Conll = struct
gov: int;
dep_lab: string;
}
let load file =
let parse_morph = function
| "_" -> []
| morph ->
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name] -> (feat_name, "true")
| [feat_name; feat_value] -> (feat_name, feat_value)
| _ -> Log.fcritical "Cannot not parse CONLL feat '%s' (too many '=')" morph
) (Str.split (Str.regexp "|") morph)
let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s
let parse line =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->
{num = int_of_string num;
phon = escape_quote phon;
lemma = escape_quote lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph morph;
gov = int_of_string gov;
dep_lab = dep_lab;
}
| _ -> Log.fcritical "Cannot not parse CONLL line '%s'" line
let parse_morph line_num = function
| "_" -> []
| morph ->
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name] -> (feat_name, "true")
| [feat_name; feat_value] -> (feat_name, feat_value)
| _ -> Error.build ~loc:(file,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
) (Str.split (Str.regexp "|") morph) in
let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s in
let parse (line_num, line) =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->
{line_num = line_num;
num = int_of_string num;
phon = escape_quote phon;
lemma = escape_quote lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph line_num morph;
gov = int_of_string gov;
dep_lab = dep_lab;
}
| _ ->
Error.build ~loc:(file,line_num) "[Conll.load] illegal line \n>>>>>%s<<<<<<" line in
let lines = File.read_ln file in
List.map parse lines
end
(* This module defiens a type for lexical parameter (i.e. one line in a lexical file) *)
......
......@@ -11,6 +11,8 @@ module IntMap : Map.S with type key = int
module Loc: sig
type t = string * int
val opt_set_line: int -> t option -> t option
val to_string: t -> string
end
......@@ -180,17 +182,18 @@ end
module Conll: sig
type line = {
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
}
line_num: int;
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
}
val parse: string -> line
val load: string -> line list
end
(** module for rule that are lexically parametrized *)
......
......@@ -74,8 +74,9 @@ let load_gr file =
let load_conll file =
try
let lines = File.read file in
Instance.of_conll (List.map Conll.parse lines)
(* let lines = File.read file in *)
(* Instance.of_conll (List.map Conll.parse lines) *)
Instance.of_conll ~loc:(file,-1) (Conll.load file)
with
| Grew_parser.Parse_error (msg,Some (sub_file,l)) ->
raise (Parsing_err (sprintf "[file:%s, line:%d] %s" sub_file l msg))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment