Commit d5e9950f authored by bguillaum's avatar bguillaum

output in one Conll file

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7937 7838e531-6607-4d57-9587-6c381814729c
parent bc1dd696
......@@ -178,7 +178,7 @@ module G_fs = struct
:: ("position", Domain.build_one "position" line.Conll.num)
:: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in
let unsorted = match line.Conll.pos2 with
| "_" -> unsorted_without_pos
| "" | "_" -> unsorted_without_pos
| s -> ("pos", Domain.build_one "pos" s) :: unsorted_without_pos in
List.sort G_feature.compare unsorted
......
......@@ -302,7 +302,9 @@ module G_graph = struct
let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in
let map_without_edges =
List_.foldi_left (fun i acc line -> Gid_map.add (Gid.Old i) (G_node.of_conll line) acc) Gid_map.empty sorted_lines in
List_.foldi_left
(fun i acc line -> Gid_map.add (Gid.Old i) (G_node.of_conll line) acc)
Gid_map.empty sorted_lines in
let map_with_edges =
List.fold_left
......
......@@ -89,10 +89,23 @@ module Rewrite_history = struct
match (t.good_nf, t.bad_nf) with
| [],[] ->
let graph = t.instance.Instance.graph in
G_graph.to_dep graph
Some (G_graph.to_dep graph)
| [one], [] -> loop one
| _ -> Error.run "Not a single rewriting"
| _ -> None
in loop t
let conll_dep_string ?(keep_empty_rh=false) t =
if (not keep_empty_rh) && is_empty t
then None
else
let rec loop t =
match (t.good_nf, t.bad_nf) with
| [],[] ->
let graph = t.instance.Instance.graph in
Some (G_graph.to_conll graph)
| [one], [] -> loop one
| _ -> None
in loop t
end (* Rewrite_history *)
(* ==================================================================================================== *)
......
......@@ -37,7 +37,9 @@ module Rewrite_history: sig
val save_det_gr: string -> t -> unit
val save_det_conll: ?header:string -> string -> t -> unit
val det_dep_string: t -> string
val det_dep_string: t -> string option
val conll_dep_string: ?keep_empty_rh:bool -> t -> string option
end
module Modul: sig
......
......@@ -463,7 +463,7 @@ module Html_doc = struct
end
module Html_rh = struct
let build ?filter ?main_feat ?(dot=false) ?(init_graph=true) ?(out_gr=false) ?header ~graph_file prefix t =
let build ?filter ?main_feat ?(dot=false) ?(init_graph=true) ?(out_gr=false) ?header ?graph_file prefix t =
(* remove files from previous runs *)
let _ = Unix.system (sprintf "rm -f %s*.html" prefix) in
......@@ -495,8 +495,13 @@ module Html_rh = struct
| None -> ()
end;
wnl "<b>Input file</b>: <a href=\"%s\">%s</a><br/>"
graph_file (Filename.basename graph_file);
begin
match graph_file with
| Some gf ->
wnl "<b>Input file</b>: <a href=\"%s\">%s</a><br/>"
gf (Filename.basename gf)
| None -> ()
end;
wnl "<b>Input sentence</b>: <font color=\"green\"><i>%s</i></font></p><br/>"
(G_graph.to_sentence ?main_feat t.Rewrite_history.instance.Instance.graph);
......
......@@ -21,7 +21,7 @@ module Html_rh: sig
?init_graph:bool ->
?out_gr:bool ->
?header:string ->
graph_file:string ->
?graph_file:string ->
string ->
Rewrite_history.t ->
unit
......
......@@ -610,40 +610,43 @@ module Conll = struct
let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
let load file =
let parse_morph line_num = function
| "_" -> []
| morph ->
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name] -> (feat_name, "true")
| [feat_name; feat_value] -> (feat_name, feat_value)
| _ -> Error.build ~loc:(file,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
) (Str.split (Str.regexp "|") morph) in
let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s in
let parse (line_num, line) =
let parse_morph file_name line_num = function
| "_" -> []
| morph ->
List.map
(fun feat ->
match Str.split (Str.regexp "=") feat with
| [feat_name] -> (feat_name, "true")
| [feat_name; feat_value] -> (feat_name, feat_value)
| _ -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
) (Str.split (Str.regexp "|") morph)
let underscore s = if s = "" then "_" else s
let parse_line file_name (line_num, line) =
try
match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
let gov_list = Str.split (Str.regexp "|") govs
and lab_list = Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in
| [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
let gov_list = if govs = "_" then [] else Str.split (Str.regexp "|") govs
and lab_list = if dep_labs = "_" then [] else Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in
{line_num = line_num;
num = num;
phon = phon;
lemma = lemma;
pos1 = pos1;
pos2 = pos2;
morph = parse_morph line_num morph;
phon = underscore phon;
lemma = underscore lemma;
pos1 = underscore pos1;
pos2 = underscore pos2;
morph = parse_morph file_name line_num morph;
deps = deps;
}
| l ->
Error.build ~loc:(file,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line in
| l ->
Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line
with exc -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, exc=%s\n>>>>>%s<<<<<<" (Printexc.to_string exc) line
let load file_name =
let lines = File.read_ln file_name in
List.map (parse_line file_name) lines
let lines = File.read_ln file in
List.map parse lines
let parse file_name lines = List.map (parse_line file_name) lines
end (* module Conll *)
(* ================================================================================ *)
......
......@@ -269,6 +269,8 @@ module Conll: sig
val root:line
val load: string -> line list
val parse: string -> (int * string) list -> line list
end
(** module for rule that are lexically parametrized *)
......
......@@ -39,9 +39,8 @@ let handle ?(name="") ?(file="No file defined") fct () =
| Error.Run (msg, loc) -> raise (Run (msg,loc))
| exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
let is_empty rh =
handle ~name:"num_sol" (fun () -> Rewrite_history.is_empty rh) ()
handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()
let num_sol rh =
handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()
......@@ -110,6 +109,13 @@ let load_conll file =
Instance.from_graph graph
) ()
let of_conll file_name line_list =
handle ~name:"of_conll"
(fun () ->
let graph = G_graph.of_conll (Conll.parse file_name line_list) in
Instance.from_graph graph
) ()
let load_graph file =
handle ~name:"load_graph" ~file
(fun () ->
......@@ -171,6 +177,9 @@ let save_det_conll ?header base rew_hist =
let det_dep_string rew_hist =
handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) ()
let conll_dep_string ?keep_empty_rh rew_hist =
handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string ?keep_empty_rh rew_hist) ()
let write_html
?(no_init=false)
?(out_gr=false)
......@@ -178,7 +187,7 @@ let write_html
?main_feat
?dot
~header
~graph_file
?graph_file
rew_hist
output_base =
handle ~name:"write_html" (fun () ->
......@@ -190,7 +199,7 @@ let write_html
~out_gr
~init_graph: (not no_init)
~header
~graph_file
?graph_file
output_base rew_hist
)
) ()
......
......@@ -62,7 +62,9 @@ val save_det_gr: string -> Rewrite_history.t -> unit
val save_det_conll: ?header:string -> string -> Rewrite_history.t -> unit
val det_dep_string: Rewrite_history.t -> string
val det_dep_string: Rewrite_history.t -> string option
val conll_dep_string: ?keep_empty_rh:bool -> Rewrite_history.t -> string option
(** get a graph from a file either in 'gr' or 'conll' format.
File extension should be '.gr' or '.conll'.
......@@ -71,6 +73,8 @@ File extension should be '.gr' or '.conll'.
*)
val load_graph: string -> Instance.t
val of_conll: string -> (int * string) list -> Instance.t
val xml_graph: Xml.xml -> Instance.t
(** [raw_graph instance] returns all graph information with a triple of basic caml types:
......@@ -92,7 +96,7 @@ val write_html:
?main_feat: string ->
?dot: bool ->
header: string ->
graph_file: string ->
?graph_file: string ->
Rewrite_history.t -> string -> unit
val error_html:
......
......@@ -366,7 +366,7 @@ neg_item:
| WITHOUT i = pn_item { i }
pn_item:
| l = delimited(LACC,separated_nonempty_list_final_opt(SEMIC,pat_item),RACC)
| l = delimited(LACC,separated_list_final_opt(SEMIC,pat_item),RACC)
{
{
Ast.pat_nodes = List_.opt_map (function Pat_node n -> Some n | _ -> None) l;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment