Commit 29fd6d3f authored by bguillaum's avatar bguillaum

- add conll format support

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@6723 7838e531-6607-4d57-9587-6c381814729c
parent d14601e3
......@@ -46,11 +46,19 @@ end
module Feature_structure = struct
(* list are supposed to be striclty ordered wrt compare*)
type t = Feature.t list
let build ?domain ast_fs =
let unsorted = List.map (Feature.build ?domain) ast_fs in
List.sort Feature.compare unsorted
let of_conll line =
let morph_fs =
List.map (fun (feat_name, feat_value) -> Feature.Equal (feat_name, [feat_value])) line.Conll.morph in
Feature.Equal ("phon", [line.Conll.phon]) ::
Feature.Equal ("lemma", [line.Conll.lemma]) ::
Feature.Equal ("cat", [line.Conll.pos2]) ::
morph_fs
let empty = []
let rec get name = function
......
......@@ -10,6 +10,8 @@ module Feature_structure: sig
val build: ?domain:Ast.domain -> Ast.feature list -> t
val of_conll: Conll.line -> t
val get: string -> t -> string list option
val get_atom: string -> t -> string option
......
......@@ -195,7 +195,6 @@ module G_graph = struct
| Feat of (Gid.t * string)
| String of string
let map_add_edge map id_src label id_tar =
let node_src =
(* Not found can be raised when adding an edge from pos to neg *)
......@@ -204,21 +203,18 @@ module G_graph = struct
| None -> None
| Some new_node -> Some (Gid_map.add id_src new_node map)
let build ?domain ?(locals=[||]) full_node_list full_edge_list =
let (named_nodes, constraints) =
let named_nodes =
let rec loop already_bound = function
| [] -> ([],[])
| [] -> []
| (ast_node, loc) :: tail ->
let (tail_nodes, tail_const) = loop (ast_node.Ast.node_id :: already_bound) tail in
let tail = loop (ast_node.Ast.node_id :: already_bound) tail in
if List.mem ast_node.Ast.node_id already_bound
then (tail_nodes, (ast_node, loc)::tail_const)
else (G_node.build ?domain (ast_node, loc) :: tail_nodes, tail_const) in
then Log.fcritical "[GRS] [Graph.build] try to build a graph with twice the same node id '%s'" ast_node.Ast.node_id
else G_node.build ?domain (ast_node, loc) :: tail in
loop [] full_node_list in
(* let named_nodes = List.map (Node.build ?domain) full_node_list in *)
let sorted_nodes = List.sort (fun (id1,_) (id2,_) -> Pervasives.compare id1 id2) named_nodes in
let (sorted_ids, node_list) = List.split sorted_nodes in
......@@ -245,6 +241,27 @@ module G_graph = struct
{map=map;lub=Array.length table}
let of_conll lines =
let nodes =
List.fold_left
(fun acc line -> Gid_map.add line.Conll.num (G_node.of_conll line) acc)
Gid_map.empty lines in
let nodes_with_edges =
List.fold_left
(fun acc line ->
if line.Conll.gov=0
then acc
else
let gov_node = Gid_map.find line.Conll.gov acc in
match G_node.add_edge (G_edge.make line.Conll.dep_lab) line.Conll.num gov_node with
| None -> acc
| Some new_node -> Gid_map.add line.Conll.gov new_node acc
) nodes lines in
{map = nodes_with_edges; lub=1+(Gid_map.fold (fun _ _ acc -> acc+1) nodes_with_edges 0)}
(* ---------------------------------------------------- *)
(* Update functions *)
(* ---------------------------------------------------- *)
......
......@@ -65,6 +65,9 @@ module G_graph: sig
Ast.node list ->
Ast.edge list ->
t
val of_conll: Conll.line list -> t
val to_gr: t -> string
val to_dot: ?main_feat:string -> ?deco:Deco.t -> t -> string
val to_dep: ?main_feat:string -> ?deco:Deco.t -> t -> string
......
......@@ -42,6 +42,13 @@ module G_node = struct
next = Massoc.empty;
} )
let of_conll line = {
fs = Feature_structure.of_conll line;
pos = Some line.Conll.num;
next = Massoc.empty;
}
let remove id_tar label t = {t with next = Massoc.remove id_tar label t.next}
let remove_key node_id t =
......
......@@ -29,6 +29,8 @@ module G_node: sig
val add_edge: G_edge.t -> int -> t -> t option
val build: ?domain:Ast.domain -> Ast.node -> (Id.name * t)
val of_conll: Conll.line -> t
val pos_comp: t -> t -> int
val build_neighbour: t -> t
......
......@@ -31,6 +31,9 @@ module Instance = struct
let graph = G_graph.build gr_ast.Ast.nodes gr_ast.Ast.edges in
{ empty with graph = graph }
let of_conll lines =
{ empty with graph = G_graph.of_conll lines }
let rev_steps t =
{ t with big_step = match t.big_step with
| None -> None
......
......@@ -17,6 +17,8 @@ module Instance : sig
val build: Ast.gr -> t
val of_conll: Conll.line list -> t
(* rev_steps reverse the small step list: during rewriting, the last rule is in the head of the list and the reverse is needed for display *)
val rev_steps: t -> t
......
......@@ -435,3 +435,39 @@ module Html = struct
fprintf out_ch "</html>\n";
end
module Conll = struct
type line = {
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
}
let parse_morph morph =
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name; feat_value] -> (feat_value, feat_value)
| [feat_name] -> (feat_name, "true")
| _ -> Log.fcritical "Cannot not parse CONLL feat '%s' (too many '=')" morph
) (Str.split (Str.regexp "|") morph)
let parse line =
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->
{num = int_of_string num;
phon = phon;
lemma = lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph morph;
gov = int_of_string gov;
dep_lab = dep_lab;
}
| _ -> Log.fcritical "Cannot not parse CONLL line '%s'" line
end
......@@ -148,3 +148,18 @@ module Html: sig
val enter: out_channel -> ?title: string -> ?header: string -> string -> unit
val leave: out_channel -> unit
end
module Conll: sig
type line = {
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
gov: int;
dep_lab: string;
}
val parse: string -> line
end
......@@ -64,6 +64,10 @@ let load_gr file =
raise (File_dont_exists file)
)
let load_conll file =
let lines = File.read file in
Instance.of_conll (List.map Conll.parse lines)
let rewrite ~gr ~grs ~seq =
try Grs.rewrite grs seq gr
with
......
......@@ -54,6 +54,8 @@ val empty_gr : Instance.t
*)
val load_gr: string -> Instance.t
val load_conll: string -> Instance.t
val save_index: dirname:string -> base_names: string list -> unit
val write_html:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment