Commit df8198d9 authored by bguillaum's avatar bguillaum

version 0.33: switch to libcaml-conll

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@8908 7838e531-6607-4d57-9587-6c381814729c
parent 8db2bcd6
true: package(xml-light dep2pict camomile, cairo2, log)
true: package(xml-light conll dep2pict camomile, cairo2, log)
......@@ -330,7 +330,7 @@ module Ast = struct
}
type gr = {
meta: (string * string) list;
meta: string list;
nodes: node list;
edges: edge list;
}
......
......@@ -204,7 +204,7 @@ module Ast : sig
}
type gr = {
meta: (string * string) list;
meta: string list;
nodes: node list;
edges: edge list;
}
......
......@@ -10,6 +10,7 @@
open Printf
open Log
open Conll
open Grew_base
open Grew_types
......@@ -210,10 +211,10 @@ module G_fs = struct
(* ---------------------------------------------------------------------- *)
let of_conll ?loc domain line =
let raw_list0 =
("phon", Feature_value.build_value ?loc domain "phon" line.Conll.phon)
:: ("cat", Feature_value.build_value ?loc domain "cat" line.Conll.pos1)
:: (List.map (fun (f,v) -> (f, Feature_value.build_value ?loc domain f v)) line.Conll.morph) in
let raw_list1 = match line.Conll.pos2 with
("phon", Feature_value.build_value ?loc domain "phon" line.Conll.form)
:: ("cat", Feature_value.build_value ?loc domain "cat" line.Conll.upos)
:: (List.map (fun (f,v) -> (f, Feature_value.build_value ?loc domain f v)) line.Conll.feats) in
let raw_list1 = match line.Conll.xpos with
| "" | "_" -> raw_list0
| s -> ("pos", Feature_value.build_value ?loc domain "pos" s) :: raw_list0 in
let raw_list2 = match line.Conll.lemma with
......
......@@ -8,6 +8,8 @@
(* Authors: see AUTHORS file *)
(**********************************************************************************)
open Conll
open Grew_base
open Grew_types
open Grew_ast
......
......@@ -10,6 +10,7 @@
open Printf
open Log
open Conll
open Grew_base
open Grew_ast
......@@ -208,7 +209,7 @@ end (* module G_deco *)
(* ================================================================================ *)
module G_graph = struct
type t = {
meta: (string * string) list; (* meta-informations *)
meta: string list; (* meta-informations *)
map: G_node.t Gid_map.t; (* node description *)
fusion: (Gid.t * (Gid.t * string)) list; (* the list of fusion word considered in UD conll *)
}
......@@ -343,10 +344,10 @@ module G_graph = struct
(* -------------------------------------------------------------------------------- *)
let of_conll ?loc domain (meta, lines, range_lines) =
let sorted_lines = Conll.root :: (List.sort Conll.compare lines) in
let of_conll ?loc domain conll =
let sorted_lines = Conll.root :: (List.sort Conll.compare conll.Conll.lines) in
let gtable = (Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines), string_of_int) in
let gtable = (Array.of_list (List.map (fun line -> line.Conll.id) sorted_lines), string_of_int) in
let map_without_edges =
List_.foldi_left
......@@ -359,7 +360,7 @@ module G_graph = struct
(fun acc line ->
(* add line number information in loc *)
let loc = Loc.opt_set_line line.Conll.line_num loc in
let dep_id = Id.gbuild ?loc line.Conll.num gtable in
let dep_id = Id.gbuild ?loc line.Conll.id gtable in
List.fold_left
(fun acc2 (gov, dep_lab) ->
let gov_id = Id.gbuild ?loc gov gtable in
......@@ -371,7 +372,7 @@ module G_graph = struct
(match loc with Some l -> Loc.to_string l | None -> "")
)
) acc line.Conll.deps
) map_without_edges lines in
) map_without_edges conll.Conll.lines in
let fusion =
List.map
......@@ -380,9 +381,9 @@ module G_graph = struct
(Gid.Old (Id.gbuild ?loc last gtable),
fusion)
)
) range_lines in
) conll.Conll.multiwords in
{meta; map=map_with_edges; fusion}
{meta = conll.Conll.meta; map=map_with_edges; fusion}
(* -------------------------------------------------------------------------------- *)
(** input : "Le/DET/le petit/ADJ/petit chat/NC/chat dort/V/dormir ./PONCT/." *)
......@@ -390,24 +391,24 @@ module G_graph = struct
let units = Str.split (Str.regexp " ") brown in
let conll_lines = List.mapi
(fun i item -> match Str.full_split (Str.regexp "/[A-Z'+'']+/") item with
| [Str.Text phon; Str.Delim pos; Str.Text lemma] ->
| [Str.Text form; Str.Delim pos; Str.Text lemma] ->
let pos = String.sub pos 1 ((String.length pos)-2) in
let morph = match (i,sentid) with
let feats = match (i,sentid) with
| (0,Some id) -> [("sentid", id)]
| _ -> [] in
{
Conll.line_num=0;
num = i+1;
phon;
id = i+1;
form;
lemma;
pos1 = "_";
pos2 = pos;
morph;
upos = "_";
xpos = pos;
feats;
deps = [(i, "SUC")]
}
| _ -> Error.build "[Graph.of_brown] Cannot parse Brown item >>>%s<<< (expected \"phon/POS/lemma\")" item
) units in
of_conll domain ([], conll_lines, [])
of_conll domain { Conll.meta=[]; lines=conll_lines; multiwords=[] }
(* -------------------------------------------------------------------------------- *)
let opt_att atts name =
......@@ -654,8 +655,8 @@ module G_graph = struct
(* meta data *)
List.iter
(fun (name, value) ->
bprintf buff " %s = \"%s\";\n" name value
(fun (s) ->
bprintf buff " %s;\n" s
) graph.meta;
(* nodes *)
......@@ -800,7 +801,7 @@ module G_graph = struct
(graph.meta, List.map snd raw_nodes, !edge_list)
(* -------------------------------------------------------------------------------- *)
let to_conll domain graph =
let to_conll_string domain graph =
let nodes = Gid_map.fold
(fun gid node acc -> (gid,node)::acc)
graph.map [] in
......@@ -832,7 +833,7 @@ module G_graph = struct
) graph.map Gid_map.empty in
let buff = Buffer.create 32 in
List.iter (fun (k,v) -> bprintf buff "# %s: %s\n" k v) graph.meta;
List.iter (fun v -> bprintf buff "# %s\n" v) graph.meta;
List.iter
(fun (gid, node) ->
begin
......
......@@ -8,6 +8,8 @@
(* Authors: see AUTHORS file *)
(**********************************************************************************)
open Conll
open Grew_base
open Grew_types
......@@ -165,10 +167,10 @@ module G_graph: sig
val to_dot: Domain.t -> ?main_feat:string -> ?deco:G_deco.t -> t -> string
val to_sentence: ?main_feat:string -> t -> string
val to_dep: Domain.t -> ?filter : string list -> ?main_feat:string -> ?deco:G_deco.t -> t -> string
val to_conll: Domain.t -> t -> string
val to_conll_string: Domain.t -> t -> string
val to_raw: Domain.t -> t ->
(string * string) list *
string list *
(string * string) list list *
(int * string * int) list
end (* module G_graph *)
\ No newline at end of file
......@@ -67,7 +67,7 @@ module Rewrite_history = struct
let save_conll label_domain base t =
let rec loop file_name t =
match (t.good_nf, t.bad_nf) with
| [],[] -> File.write (Instance.to_conll label_domain t.instance) (file_name^".conll")
| [],[] -> File.write (Instance.to_conll_string label_domain t.instance) (file_name^".conll")
| l, _ -> List.iteri (fun i son -> loop (sprintf "%s_%d" file_name i) son) l
in loop base t
......@@ -76,7 +76,7 @@ module Rewrite_history = struct
let rec loop t =
match (t.good_nf, t.bad_nf) with
| [],[] ->
File.write (Instance.to_conll label_domain t.instance) (sprintf "%s__%d.conll" base !cpt);
File.write (Instance.to_conll_string label_domain t.instance) (sprintf "%s__%d.conll" base !cpt);
incr cpt
| l, _ -> List.iter loop l
in loop t; !cpt
......@@ -111,8 +111,8 @@ module Rewrite_history = struct
| ([],[]) ->
let output =
match header with
| Some h -> sprintf "%% %s\n%s" h (Instance.to_conll label_domain t.instance)
| None -> Instance.to_conll label_domain t.instance in
| Some h -> sprintf "%% %s\n%s" h (Instance.to_conll_string label_domain t.instance)
| None -> Instance.to_conll_string label_domain t.instance in
File.write output (base^".conll")
| ([one], []) -> loop one
| _ -> Error.run "[save_det_conll] Not a single rewriting"
......@@ -136,7 +136,7 @@ module Rewrite_history = struct
match (t.good_nf, t.bad_nf) with
| [],[] ->
let graph = t.instance.Instance.graph in
Some (G_graph.to_conll label_domain graph)
Some (G_graph.to_conll_string label_domain graph)
| [one], [] -> loop one
| _ -> None
in loop t
......
......@@ -9,6 +9,7 @@
(**********************************************************************************)
open Printf
open Conll
open Grew_base
open Grew_types
......@@ -66,7 +67,7 @@ module G_node = struct
let of_conll ?loc domain line =
if line = Conll.root
then { empty with conll_root=true }
else { empty with fs = G_fs.of_conll ?loc domain line; position = float line.Conll.num }
else { empty with fs = G_fs.of_conll ?loc domain line; position = float line.Conll.id }
let remove (id_tar : Gid.t) label t = {t with next = Massoc_gid.remove id_tar label t.next}
......
......@@ -8,6 +8,8 @@
(* Authors: see AUTHORS file *)
(**********************************************************************************)
open Conll
open Grew_base
open Grew_types
open Grew_fs
......
......@@ -20,7 +20,7 @@ type pat_item =
| Pat_const of Ast.const
type graph_item =
| Graph_meta of (string * string)
| Graph_meta of string
| Graph_node of Ast.node
| Graph_edge of Ast.edge
......@@ -188,7 +188,7 @@ gr:
gr_item:
(* sentence = "Jean dort." *)
| id=simple_id EQUAL value=feature_value
{ Graph_meta (id, value) }
{ Graph_meta (id ^ " = " ^ value) }
(* B (1) [phon="pense", lemma="penser", cat=v, mood=ind ] *)
(* B [phon="pense", lemma="penser", cat=v, mood=ind ] *)
......
......@@ -67,7 +67,7 @@ module Instance = struct
let to_gr domain t = G_graph.to_gr domain t.graph
let to_conll domain t = G_graph.to_conll domain t.graph
let to_conll_string domain t = G_graph.to_conll_string domain t.graph
let save_dot_png domain ?filter ?main_feat base t =
ignore (Dot.to_png_file (G_graph.to_dot domain ?main_feat t.graph) (base^".png"))
......
......@@ -42,8 +42,8 @@ module Instance : sig
(** [to_gr t] returns a string which contains the "gr" code of the current graph *)
val to_gr: Domain.t -> t -> string
(** [to_conll t] returns a string which contains the "conll" code of the current graph *)
val to_conll: Domain.t -> t -> string
(** [to_conll_string t] returns a string which contains the "conll" code of the current graph *)
val to_conll_string: Domain.t -> t -> string
(** [save_dep_png base t] writes a file "base.png" with the dep representation of [t].
NB: if the Dep2pict is not available, nothing is done *)
......
......@@ -380,97 +380,6 @@ module Label_cst = struct
| (edge_labels, false) -> Pos (List.sort compare (List.map (Label.from_string ?loc domain ?locals) edge_labels))
end (* module Label_cst *)
(* ================================================================================ *)
module Conll = struct
type line = {
line_num: int;
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
deps: (int * string ) list;
}
let root = { line_num = -1; num=0; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
let line_to_string l =
let (gov_list, lab_list) = List.split l.deps in
sprintf "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
l.num l.phon l.lemma l.pos1 l.pos2
(match l.morph with [] -> "_" | list -> String.concat "|" (List.map (fun (f,v) -> sprintf "%s=%s" f v) list))
(List_.to_string string_of_int "|" gov_list)
(String.concat "|" lab_list)
type range_line = {
first: int;
last: int;
fusion: string;
}
let range_line_to_string l =
sprintf "%d-%d\t%s\t_\t_\t_\t_\t_\t_\t_\t_" l.first l.last l.fusion
type t = (string * string) list * line list * range_line list
let parse_morph file_name line_num = function
| "_" -> []
| morph ->
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name] -> (feat_name, "true")
| [feat_name; feat_value] -> (feat_name, feat_value)
| _ -> Error.build ~loc:(Loc.file_line file_name line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
) (Str.split (Str.regexp "|") morph)
let underscore s = if s = "" then "_" else s
let parse file_name lines =
List.fold_right
(fun (line_num, line) (acc_meta, acc_basic, acc_fusion) ->
if line.[0] = '#'
then
begin
match Str.bounded_split (Str.regexp "\\(# *\\)\\|\\( *: *\\)") line 2 with
| [tag; value] -> ((tag, value) :: acc_meta, acc_basic, acc_fusion)
| _ -> (acc_meta, acc_basic, acc_fusion)
end
else
match Str.split (Str.regexp "\t") line with
| [ f1; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
begin
try
match Str.split (Str.regexp "-") f1 with
| [f;l] -> (acc_meta, acc_basic, {first=int_of_string f; last=int_of_string l; fusion=phon}:: acc_fusion)
| [num] ->
let gov_list = if govs = "_" then [] else List.map int_of_string (Str.split (Str.regexp "|") govs)
and lab_list = if dep_labs = "_" then [] else Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in
(acc_meta, {
line_num = line_num;
num = int_of_string num;
phon = underscore phon;
lemma = underscore lemma;
pos1 = underscore pos1;
pos2 = underscore pos2;
morph = parse_morph file_name line_num morph;
deps = deps;
}::acc_basic, acc_fusion)
| _ -> Error.build ~loc:(Loc.file_line file_name line_num) "[Conll.load] illegal field one >>>>>%s<<<<<<" f1
with exc -> Error.build ~loc:(Loc.file_line file_name line_num) "[Conll.load] illegal line, exc=%s\n>>>>>%s<<<<<<" (Printexc.to_string exc) line
end
| l -> Error.build ~loc:(Loc.file_line file_name line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line
) lines ([], [], [])
let load file_name =
let lines = File.read_ln file_name in
parse file_name lines
let compare l1 l2 = Pervasives.compare l1.num l2.num
end (* module Conll *)
(* ================================================================================ *)
(* This module defines a type for lexical parameter (i.e. one line in a lexical file) *)
module Lex_par = struct
......
......@@ -159,39 +159,6 @@ module Feature_value: sig
val build_value: ?loc:Loc.t -> Domain.t -> feature_name -> feature_atom -> value
end (* module Feature_domain *)
(* ================================================================================ *)
module Conll: sig
type line = {
line_num: int;
num: int;
phon: string;
lemma: string;
pos1: string;
pos2: string;
morph: (string * string) list;
deps: (int * string ) list;
}
val root: line
val line_to_string: line -> string
type range_line = {
first: int;
last: int;
fusion: string;
}
val range_line_to_string: range_line -> string
type t = (string * string) list * line list * range_line list
val load: string -> t
val parse: string -> (int * string) list -> t
val compare: line -> line -> int
end (* module Conll *)
(* ================================================================================ *)
(** module for rules that are lexically parametrized *)
......
......@@ -10,6 +10,7 @@
open Printf
open Log
open Conll
(* ==================================================================================================== *)
(** {2 Location} *)
......@@ -107,7 +108,7 @@ type t = Grew_graph.G_graph.t
let load_conll domain file =
handle ~name:"Graph.load_conll" ~file
(fun () ->
Grew_graph.G_graph.of_conll ~loc:(Grew_base.Loc.file file) domain (Grew_types.Conll.load file)
Grew_graph.G_graph.of_conll ~loc:(Grew_base.Loc.file file) domain (Conll.load file)
) ()
let load_brown domain file =
......@@ -132,11 +133,8 @@ type t = Grew_graph.G_graph.t
loop [load_gr; load_conll; load_brown]
) ()
let of_conll domain file_name line_list =
handle ~name:"Graph.of_conll"
(fun () ->
Grew_graph.G_graph.of_conll ~loc:(Grew_base.Loc.file file_name) domain (Grew_types.Conll.parse file_name line_list)
) ()
let of_conll domain conll =
handle ~name:"Graph.xxx_of_conll" (fun () -> Grew_graph.G_graph.of_conll domain conll) ()
let of_brown domain ?sentid brown =
handle ~name:"Graph.of_brown" (fun () -> Grew_graph.G_graph.of_brown domain ?sentid brown) ()
......@@ -150,8 +148,8 @@ type t = Grew_graph.G_graph.t
let to_gr domain graph =
handle ~name:"Graph.to_gr" (fun () -> Grew_graph.G_graph.to_gr domain graph) ()
let to_conll domain graph =
handle ~name:"Graph.to_conll" (fun () -> Grew_graph.G_graph.to_conll domain graph) ()
let to_conll_string domain graph =
handle ~name:"Graph.to_conll_string" (fun () -> Grew_graph.G_graph.to_conll_string domain graph) ()
let to_sentence ?main_feat gr =
handle ~name:"Graph.to_sentence"
......@@ -162,7 +160,7 @@ type t = Grew_graph.G_graph.t
let save_conll domain filename graph =
handle ~name:"Graph.save_conll" (fun () ->
let out_ch = open_out filename in
fprintf out_ch "%s" (Grew_graph.G_graph.to_conll domain graph);
fprintf out_ch "%s" (Grew_graph.G_graph.to_conll_string domain graph);
close_out out_ch
) ()
......@@ -250,7 +248,7 @@ module Rewrite = struct
let save_index ~dirname ~base_names =
handle ~name:"Rewrite.save_index" (fun () ->
let out_ch = open_out (Filename.concat dirname "index") in
List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
Array.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
close_out out_ch
) ()
......@@ -293,7 +291,7 @@ module Rewrite = struct
handle ~name:"Rewrite.make_index" (fun () ->
let init = Grew_html.Corpus_stat.empty grs seq in
let corpus_stat =
List.fold_left
Array.fold_left
(fun acc base_name ->
Grew_html.Corpus_stat.add_gr_stat base_name (Grew_html.Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
) init base_names in
......
......@@ -8,6 +8,8 @@
(* Authors: see AUTHORS file *)
(**********************************************************************************)
open Conll
(* ==================================================================================================== *)
(** {2 Location} *)
(* ==================================================================================================== *)
......@@ -83,8 +85,7 @@ module Graph : sig
@raise File_dont_exists if the file doesn't exists. *)
val load: Domain.t -> string -> t
(** [of_conll filename line_list] *)
val of_conll: Domain.t -> string -> (int * string) list -> t
val of_conll: Domain.t -> Conll.t -> t
val of_brown: Domain.t -> ?sentid:string -> string -> t
......@@ -96,7 +97,7 @@ module Graph : sig
- the list of edge (src, label, tar) where src and tar refers to the position in the node list
*)
val raw: Domain.t -> t ->
(string * string) list *
string list *
(string * string) list list *
(int * string * int) list
......@@ -106,7 +107,7 @@ module Graph : sig
val to_gr: Domain.t -> t -> string
val to_conll: Domain.t -> t -> string
val to_conll_string: Domain.t -> t -> string
(** [search_pattern pattern graph] returns the list of the possible matching of [pattern] in [graph] *)
val search_pattern: Domain.t -> Pattern.t -> t -> Matching.t list
......@@ -179,7 +180,7 @@ module Rewrite: sig
val conll_dep_string: Domain.t -> ?keep_empty_rh:bool -> history -> string option
val save_index: dirname:string -> base_names: string list -> unit
val save_index: dirname:string -> base_names: string array -> unit
val write_annot: Domain.t -> title:string -> string -> string -> (string * history) list -> unit
......@@ -187,7 +188,7 @@ module Rewrite: sig
val error_html: Domain.t -> ?no_init:bool -> ?main_feat:string -> ?dot: bool -> header: string -> string -> ?init:Graph.t -> string -> unit
val make_index: title: string -> grs_file: string -> html: bool -> grs: Grs.t -> seq: string -> input_dir: string -> output_dir: string -> base_names: string list -> unit
val make_index: title: string -> grs_file: string -> html: bool -> grs: Grs.t -> seq: string -> input_dir: string -> output_dir: string -> base_names: string array -> unit
val html_sentences: title:string -> string -> (bool * string * int * string) list -> unit
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment