Commit d5e9950f authored by bguillaum's avatar bguillaum

output in one Conll file

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@7937 7838e531-6607-4d57-9587-6c381814729c
parent bc1dd696
...@@ -178,7 +178,7 @@ module G_fs = struct ...@@ -178,7 +178,7 @@ module G_fs = struct
:: ("position", Domain.build_one "position" line.Conll.num) :: ("position", Domain.build_one "position" line.Conll.num)
:: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in :: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in
let unsorted = match line.Conll.pos2 with let unsorted = match line.Conll.pos2 with
| "_" -> unsorted_without_pos | "" | "_" -> unsorted_without_pos
| s -> ("pos", Domain.build_one "pos" s) :: unsorted_without_pos in | s -> ("pos", Domain.build_one "pos" s) :: unsorted_without_pos in
List.sort G_feature.compare unsorted List.sort G_feature.compare unsorted
......
...@@ -302,7 +302,9 @@ module G_graph = struct ...@@ -302,7 +302,9 @@ module G_graph = struct
let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in let table = Array.of_list (List.map (fun line -> line.Conll.num) sorted_lines) in
let map_without_edges = let map_without_edges =
List_.foldi_left (fun i acc line -> Gid_map.add (Gid.Old i) (G_node.of_conll line) acc) Gid_map.empty sorted_lines in List_.foldi_left
(fun i acc line -> Gid_map.add (Gid.Old i) (G_node.of_conll line) acc)
Gid_map.empty sorted_lines in
let map_with_edges = let map_with_edges =
List.fold_left List.fold_left
......
...@@ -89,10 +89,23 @@ module Rewrite_history = struct ...@@ -89,10 +89,23 @@ module Rewrite_history = struct
match (t.good_nf, t.bad_nf) with match (t.good_nf, t.bad_nf) with
| [],[] -> | [],[] ->
let graph = t.instance.Instance.graph in let graph = t.instance.Instance.graph in
G_graph.to_dep graph Some (G_graph.to_dep graph)
| [one], [] -> loop one | [one], [] -> loop one
| _ -> Error.run "Not a single rewriting" | _ -> None
in loop t in loop t
let conll_dep_string ?(keep_empty_rh=false) t =
if (not keep_empty_rh) && is_empty t
then None
else
let rec loop t =
match (t.good_nf, t.bad_nf) with
| [],[] ->
let graph = t.instance.Instance.graph in
Some (G_graph.to_conll graph)
| [one], [] -> loop one
| _ -> None
in loop t
end (* Rewrite_history *) end (* Rewrite_history *)
(* ==================================================================================================== *) (* ==================================================================================================== *)
......
...@@ -37,7 +37,9 @@ module Rewrite_history: sig ...@@ -37,7 +37,9 @@ module Rewrite_history: sig
val save_det_gr: string -> t -> unit val save_det_gr: string -> t -> unit
val save_det_conll: ?header:string -> string -> t -> unit val save_det_conll: ?header:string -> string -> t -> unit
val det_dep_string: t -> string val det_dep_string: t -> string option
val conll_dep_string: ?keep_empty_rh:bool -> t -> string option
end end
module Modul: sig module Modul: sig
......
...@@ -463,7 +463,7 @@ module Html_doc = struct ...@@ -463,7 +463,7 @@ module Html_doc = struct
end end
module Html_rh = struct module Html_rh = struct
let build ?filter ?main_feat ?(dot=false) ?(init_graph=true) ?(out_gr=false) ?header ~graph_file prefix t = let build ?filter ?main_feat ?(dot=false) ?(init_graph=true) ?(out_gr=false) ?header ?graph_file prefix t =
(* remove files from previous runs *) (* remove files from previous runs *)
let _ = Unix.system (sprintf "rm -f %s*.html" prefix) in let _ = Unix.system (sprintf "rm -f %s*.html" prefix) in
...@@ -495,8 +495,13 @@ module Html_rh = struct ...@@ -495,8 +495,13 @@ module Html_rh = struct
| None -> () | None -> ()
end; end;
wnl "<b>Input file</b>: <a href=\"%s\">%s</a><br/>" begin
graph_file (Filename.basename graph_file); match graph_file with
| Some gf ->
wnl "<b>Input file</b>: <a href=\"%s\">%s</a><br/>"
gf (Filename.basename gf)
| None -> ()
end;
wnl "<b>Input sentence</b>: <font color=\"green\"><i>%s</i></font></p><br/>" wnl "<b>Input sentence</b>: <font color=\"green\"><i>%s</i></font></p><br/>"
(G_graph.to_sentence ?main_feat t.Rewrite_history.instance.Instance.graph); (G_graph.to_sentence ?main_feat t.Rewrite_history.instance.Instance.graph);
......
...@@ -21,7 +21,7 @@ module Html_rh: sig ...@@ -21,7 +21,7 @@ module Html_rh: sig
?init_graph:bool -> ?init_graph:bool ->
?out_gr:bool -> ?out_gr:bool ->
?header:string -> ?header:string ->
graph_file:string -> ?graph_file:string ->
string -> string ->
Rewrite_history.t -> Rewrite_history.t ->
unit unit
......
...@@ -610,40 +610,43 @@ module Conll = struct ...@@ -610,40 +610,43 @@ module Conll = struct
let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] } let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
let load file = let parse_morph file_name line_num = function
let parse_morph line_num = function | "_" -> []
| "_" -> [] | morph ->
| morph -> List.map
List.map (fun feat ->
(fun feat -> match Str.split (Str.regexp "=") feat with
match Str.split (Str.regexp "=") feat with | [feat_name] -> (feat_name, "true")
| [feat_name] -> (feat_name, "true") | [feat_name; feat_value] -> (feat_name, feat_value)
| [feat_name; feat_value] -> (feat_name, feat_value) | _ -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
| _ -> Error.build ~loc:(file,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph ) (Str.split (Str.regexp "|") morph)
) (Str.split (Str.regexp "|") morph) in
let underscore s = if s = "" then "_" else s
let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s in let parse_line file_name (line_num, line) =
try
let parse (line_num, line) =
match Str.split (Str.regexp "\t") line with match Str.split (Str.regexp "\t") line with
| [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] -> | [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
let gov_list = Str.split (Str.regexp "|") govs let gov_list = if govs = "_" then [] else Str.split (Str.regexp "|") govs
and lab_list = Str.split (Str.regexp "|") dep_labs in and lab_list = if dep_labs = "_" then [] else Str.split (Str.regexp "|") dep_labs in
let deps = List.combine gov_list lab_list in let deps = List.combine gov_list lab_list in
{line_num = line_num; {line_num = line_num;
num = num; num = num;
phon = phon; phon = underscore phon;
lemma = lemma; lemma = underscore lemma;
pos1 = pos1; pos1 = underscore pos1;
pos2 = pos2; pos2 = underscore pos2;
morph = parse_morph line_num morph; morph = parse_morph file_name line_num morph;
deps = deps; deps = deps;
} }
| l -> | l ->
Error.build ~loc:(file,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line in Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line
with exc -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, exc=%s\n>>>>>%s<<<<<<" (Printexc.to_string exc) line
let load file_name =
let lines = File.read_ln file_name in
List.map (parse_line file_name) lines
let lines = File.read_ln file in let parse file_name lines = List.map (parse_line file_name) lines
List.map parse lines
end (* module Conll *) end (* module Conll *)
(* ================================================================================ *) (* ================================================================================ *)
......
...@@ -269,6 +269,8 @@ module Conll: sig ...@@ -269,6 +269,8 @@ module Conll: sig
val root:line val root:line
val load: string -> line list val load: string -> line list
val parse: string -> (int * string) list -> line list
end end
(** module for rule that are lexically parametrized *) (** module for rule that are lexically parametrized *)
......
...@@ -39,9 +39,8 @@ let handle ?(name="") ?(file="No file defined") fct () = ...@@ -39,9 +39,8 @@ let handle ?(name="") ?(file="No file defined") fct () =
| Error.Run (msg, loc) -> raise (Run (msg,loc)) | Error.Run (msg, loc) -> raise (Run (msg,loc))
| exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None)) | exc -> raise (Bug (sprintf "[Libgrew.%s] UNCATCHED EXCEPTION: %s" name (Printexc.to_string exc), None))
let is_empty rh = let is_empty rh =
handle ~name:"num_sol" (fun () -> Rewrite_history.is_empty rh) () handle ~name:"is_empty" (fun () -> Rewrite_history.is_empty rh) ()
let num_sol rh = let num_sol rh =
handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) () handle ~name:"num_sol" (fun () -> Rewrite_history.num_sol rh) ()
...@@ -110,6 +109,13 @@ let load_conll file = ...@@ -110,6 +109,13 @@ let load_conll file =
Instance.from_graph graph Instance.from_graph graph
) () ) ()
let of_conll file_name line_list =
handle ~name:"of_conll"
(fun () ->
let graph = G_graph.of_conll (Conll.parse file_name line_list) in
Instance.from_graph graph
) ()
let load_graph file = let load_graph file =
handle ~name:"load_graph" ~file handle ~name:"load_graph" ~file
(fun () -> (fun () ->
...@@ -171,6 +177,9 @@ let save_det_conll ?header base rew_hist = ...@@ -171,6 +177,9 @@ let save_det_conll ?header base rew_hist =
let det_dep_string rew_hist = let det_dep_string rew_hist =
handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) () handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) ()
let conll_dep_string ?keep_empty_rh rew_hist =
handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string ?keep_empty_rh rew_hist) ()
let write_html let write_html
?(no_init=false) ?(no_init=false)
?(out_gr=false) ?(out_gr=false)
...@@ -178,7 +187,7 @@ let write_html ...@@ -178,7 +187,7 @@ let write_html
?main_feat ?main_feat
?dot ?dot
~header ~header
~graph_file ?graph_file
rew_hist rew_hist
output_base = output_base =
handle ~name:"write_html" (fun () -> handle ~name:"write_html" (fun () ->
...@@ -190,7 +199,7 @@ let write_html ...@@ -190,7 +199,7 @@ let write_html
~out_gr ~out_gr
~init_graph: (not no_init) ~init_graph: (not no_init)
~header ~header
~graph_file ?graph_file
output_base rew_hist output_base rew_hist
) )
) () ) ()
......
...@@ -62,7 +62,9 @@ val save_det_gr: string -> Rewrite_history.t -> unit ...@@ -62,7 +62,9 @@ val save_det_gr: string -> Rewrite_history.t -> unit
val save_det_conll: ?header:string -> string -> Rewrite_history.t -> unit val save_det_conll: ?header:string -> string -> Rewrite_history.t -> unit
val det_dep_string: Rewrite_history.t -> string val det_dep_string: Rewrite_history.t -> string option
val conll_dep_string: ?keep_empty_rh:bool -> Rewrite_history.t -> string option
(** get a graph from a file either in 'gr' or 'conll' format. (** get a graph from a file either in 'gr' or 'conll' format.
File extension should be '.gr' or '.conll'. File extension should be '.gr' or '.conll'.
...@@ -71,6 +73,8 @@ File extension should be '.gr' or '.conll'. ...@@ -71,6 +73,8 @@ File extension should be '.gr' or '.conll'.
*) *)
val load_graph: string -> Instance.t val load_graph: string -> Instance.t
val of_conll: string -> (int * string) list -> Instance.t
val xml_graph: Xml.xml -> Instance.t val xml_graph: Xml.xml -> Instance.t
(** [raw_graph instance] returns all graph information with a triple of basic caml types: (** [raw_graph instance] returns all graph information with a triple of basic caml types:
...@@ -92,7 +96,7 @@ val write_html: ...@@ -92,7 +96,7 @@ val write_html:
?main_feat: string -> ?main_feat: string ->
?dot: bool -> ?dot: bool ->
header: string -> header: string ->
graph_file: string -> ?graph_file: string ->
Rewrite_history.t -> string -> unit Rewrite_history.t -> string -> unit
val error_html: val error_html:
......
...@@ -366,7 +366,7 @@ neg_item: ...@@ -366,7 +366,7 @@ neg_item:
| WITHOUT i = pn_item { i } | WITHOUT i = pn_item { i }
pn_item: pn_item:
| l = delimited(LACC,separated_nonempty_list_final_opt(SEMIC,pat_item),RACC) | l = delimited(LACC,separated_list_final_opt(SEMIC,pat_item),RACC)
{ {
{ {
Ast.pat_nodes = List_.opt_map (function Pat_node n -> Some n | _ -> None) l; Ast.pat_nodes = List_.opt_map (function Pat_node n -> Some n | _ -> None) l;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment