Commit a9d8ab79 authored by bguillaum's avatar bguillaum

Support for CONLL+ format (several governors for the same lexical unit)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7626 7838e531-6607-4d57-9587-6c381814729c
parent 418371c5
VERSION = 0.16.1
VERSION = 0.17.0
INSTALL_DIR_LIB = @OCAMLLIB@
INSTALL_DIR = @prefix@/bin/
......
......@@ -307,16 +307,19 @@ module G_graph = struct
(* add line number information in loc *)
let loc = Loc.opt_set_line line.Conll.line_num loc in
if line.Conll.gov=0
then acc
else
let gov_node =
try Gid_map.find (Gid.Old line.Conll.gov) acc
with Not_found ->
Error.build ?loc "[G_graph.of_conll] the line refers to unknown gov %d" line.Conll.gov in
match G_node.add_edge (G_edge.make ?loc line.Conll.dep_lab) (Gid.Old line.Conll.num) gov_node with
| None -> acc
| Some new_node -> Gid_map.add (Gid.Old line.Conll.gov) new_node acc
List.fold_left
(fun acc2 (gov, dep_lab) ->
if gov=0
then acc
else
let gov_node =
try Gid_map.find (Gid.Old gov) acc
with Not_found ->
Error.build ?loc "[G_graph.of_conll] the line refers to unknown gov %d" gov in
match G_node.add_edge (G_edge.make ?loc dep_lab) (Gid.Old line.Conll.num) gov_node with
| None -> acc
| Some new_node -> Gid_map.add (Gid.Old gov) new_node acc2
) acc line.Conll.deps
) nodes lines in
{meta=[]; map=nodes_with_edges}
......
......@@ -594,8 +594,7 @@ module Conll = struct
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
deps: ( int * string ) list;
}
let load file =
......@@ -615,7 +614,10 @@ module Conll = struct
let parse (line_num, line) =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->
| [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
let gov_list = List.map int_of_string (Str.split (Str.regexp "|") govs)
and lab_list = Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in
{line_num = line_num;
num = int_of_string num;
phon = escape_quote phon;
......@@ -623,8 +625,7 @@ module Conll = struct
pos1 = pos1;
pos2 = pos2;
morph = parse_morph line_num morph;
gov = int_of_string gov;
dep_lab = dep_lab;
deps = deps;
}
| l ->
Error.build ~loc:(file,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line in
......
......@@ -248,8 +248,7 @@ module Conll: sig
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
deps: ( int * string ) list;
}
val load: string -> line list
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment