Commit 5a1bc904 authored by bguillaum's avatar bguillaum

VERSION 0.9.6 better conll support & bugfix

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@6724 7838e531-6607-4d57-9587-6c381814729c
parent 29fd6d3f
......@@ -28,7 +28,7 @@ INFO = @INFO@
OCAMLFIND_DIR=`ocamlfind printconf destdir`
VERSION = 0.9.5
VERSION = 0.9.6
cleanup:
rm -rf *.cmo *.cmx *.cmi *.annot *.o *.*~
......
......@@ -52,13 +52,16 @@ module Feature_structure = struct
List.sort Feature.compare unsorted
let of_conll line =
let morph_fs =
List.map (fun (feat_name, feat_value) -> Feature.Equal (feat_name, [feat_value])) line.Conll.morph in
Feature.Equal ("phon", [line.Conll.phon]) ::
Feature.Equal ("lemma", [line.Conll.lemma]) ::
Feature.Equal ("cat", [line.Conll.pos2]) ::
morph_fs
let unsorted =
Feature.Equal ("phon", [line.Conll.phon]) ::
Feature.Equal ("lemma", [line.Conll.lemma]) ::
Feature.Equal ("cat", [line.Conll.pos2]) ::
morph_fs in
List.sort Feature.compare unsorted
let empty = []
let rec get name = function
......@@ -168,8 +171,8 @@ module Feature_structure = struct
loop ((Feature.Different (fn_pat, fv_pat))::t_pat, t)
| ((Feature.Equal (fn_pat, fv_pat))::t_pat, (Feature.Equal (fn, fv))::t)
(* when fn_pat = fn *) ->
(match fv_pat, fv with
(* when fn_pat = fn *) ->
(match fv_pat, fv with
| [],_ | _, [] -> loop (t_pat,t)
| l_pat,l -> not (List_.sort_disjoint l_pat l) && loop (t_pat,t)
)
......@@ -181,8 +184,8 @@ module Feature_structure = struct
| l_pat,l -> (List_.sort_disjoint l_pat l) && loop (t_pat,t)
)
| _ -> Log.bug "[Feature_structure.set_feat]: Disequality not allowed in graph features"; exit 2
in loop (pattern,fs)
exception Fail_unif
exception Bug_unif of string
......
......@@ -451,18 +451,19 @@ module Conll = struct
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name; feat_value] -> (feat_value, feat_value)
| [feat_name; feat_value] -> (feat_name, feat_value)
| [feat_name] -> (feat_name, "true")
| _ -> Log.fcritical "Cannot not parse CONLL feat '%s' (too many '=')" morph
) (Str.split (Str.regexp "|") morph)
let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s
let parse line =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->
{num = int_of_string num;
phon = phon;
lemma = lemma;
phon = escape_quote phon;
lemma = escape_quote lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph morph;
......
......@@ -65,8 +65,30 @@ let load_gr file =
)
let load_conll file =
let lines = File.read file in
Instance.of_conll (List.map Conll.parse lines)
try
let lines = File.read file in
Instance.of_conll (List.map Conll.parse lines)
with
| Grew_parser.Parse_error msg -> raise (Parsing_err msg)
| Error.Build (msg,loc) -> raise (Build (msg,loc))
| Error.Bug (msg, loc) -> raise (Bug (msg,loc))
| exc -> raise (Bug (sprintf "UNCATCHED EXCEPTION: %s" (Printexc.to_string exc), None))
let load_graph file =
if Filename.check_suffix file ".gr"
then load_gr file
else if Filename.check_suffix file ".conll"
then load_conll file
else
begin
Log.fwarning "Unknown file format for input graph '%s', try to guess..." file;
try load_gr file with
Parsing_err _ ->
try load_conll file with
Parsing_err _ ->
Log.fcritical "[Libgrew.load_graph] Cannot guess input file format of file '%s'. Use .gr or .conll file extension" file
end
let rewrite ~gr ~grs ~seq =
try Grs.rewrite grs seq gr
......
......@@ -48,13 +48,12 @@ val get_sequence_names: Grs.t -> string list
val empty_gr : Instance.t
(** get a graph from a file
(** get a graph from a file either in 'gr' or 'conll' format.
File extension should be '.gr' or '.conll'.
@raise Parsing_err if libgrew can't parse the file
@raise File_dont_exists if the file doesn't exists
*)
val load_gr: string -> Instance.t
val load_conll: string -> Instance.t
val load_graph: string -> Instance.t
val save_index: dirname:string -> base_names: string list -> unit
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment