Commit d92aaf2c authored by bguillaum's avatar bguillaum

ongoing strategies

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/semagramme/libcaml-grew/trunk@9088 7838e531-6607-4d57-9587-6c381814729c
parent fc569a06
......@@ -313,49 +313,7 @@ module Ast = struct
mod_dir: string; (* the directory where the module is defined (for lp file localisation) *)
}
type old_sequence = {
seq_name:string;
seq_mod:string list;
seq_doc:string list;
seq_loc:Loc.t;
}
type new_sequence =
| Ref of string
| List of new_sequence list
| Plus of new_sequence list
| Star of new_sequence
| Diamond of new_sequence
let rec new_sequence_to_string = function
| Ref m -> m
| List l -> "[" ^ (String.concat "; " (List.map new_sequence_to_string l)) ^ "]"
| Plus l -> "[" ^ (String.concat "+" (List.map new_sequence_to_string l)) ^ "]"
| Star s -> "[" ^ (new_sequence_to_string s) ^"]" ^ "*"
| Diamond s -> "◇" ^ "[" ^(new_sequence_to_string s)^"]"
let rec flatten = function
| Ref m -> Ref m
| Star s -> Star (flatten s)
| Diamond s -> Diamond (flatten s)
| List l ->
let fl = List.map flatten l in
let rec loop = function
| [] -> []
| (List l) :: tail -> l @ (loop tail)
| x :: tail -> x :: (loop tail)
in List (loop fl)
| Plus l ->
let fl = List.map flatten l in
let rec loop = function
| [] -> []
| (Plus l) :: tail -> l @ (loop tail)
| x :: tail -> x :: (loop tail)
in Plus (loop fl)
type sequence =
| Old of old_sequence
| New of ((string * Loc.t) * new_sequence)
(** a GRS: graph rewriting system *)
type module_or_include =
......@@ -367,20 +325,18 @@ module Ast = struct
label_domain: (string * string list) list;
}
let empty_domain = { feature_domain=[]; label_domain=[] }
type domain_wi = Dom of domain | Dom_file of string
type grs_wi = {
domain_wi: domain_wi option;
modules_wi: module_or_include list;
sequences_wi: sequence list;
strategies_wi: Strategy.t list;
}
type grs = {
domain: domain option;
modules: modul list;
sequences: sequence list;
strategies: Strategy.t list;
}
type gr = {
......@@ -389,6 +345,6 @@ module Ast = struct
edges: edge list;
}
let empty_grs = { domain = None; modules = []; sequences= [] }
let empty_grs = { domain = None; modules = []; strategies= [] }
end (* module Ast *)
......@@ -174,26 +174,10 @@ module Ast : sig
mod_dir: string; (* the directory where the module is defined (for lp file localisation) *)
}
type old_sequence = {
seq_name:string;
seq_mod:string list;
seq_doc:string list;
seq_loc:Loc.t;
}
type new_sequence =
| Ref of string
| List of new_sequence list
| Plus of new_sequence list
| Star of new_sequence
| Diamond of new_sequence
val new_sequence_to_string : new_sequence -> string
val flatten : new_sequence -> new_sequence
type sequence =
| Old of old_sequence
| New of ((string * Loc.t) * new_sequence)
type module_or_include =
| Modul of modul
......@@ -209,14 +193,14 @@ module Ast : sig
type grs_wi = {
domain_wi: domain_wi option;
modules_wi: module_or_include list;
sequences_wi: sequence list;
strategies_wi: Strategy.t list;
}
(* a GRS: graph rewriting system *)
type grs = {
domain: domain option;
modules: modul list;
sequences: sequence list;
strategies: Strategy.t list;
}
type gr = {
......
......@@ -158,9 +158,6 @@ module G_fs = struct
(* list are supposed to be strictly ordered wrt compare *)
type t = G_feature.t list
(* ---------------------------------------------------------------------- *)
let to_raw t = List.map (fun (name, value) -> (name, string_of_value value)) t
(* ---------------------------------------------------------------------- *)
let empty = []
......
......@@ -38,7 +38,6 @@ module G_fs: sig
val to_dot: ?decorated_feat:(string * string list) -> ?main_feat: string -> t -> string
val to_word: ?main_feat: string -> t -> string
val to_dep: ?decorated_feat:(string * string list) -> ?position:float -> ?main_feat: string -> ?filter: string list -> t -> string
val to_raw: t -> (string * string) list
val to_conll: ?exclude: string list -> t -> string
(** [get_annot_info fs] searches for a feature with name starting with "__".
......
......@@ -785,24 +785,6 @@ module G_graph = struct
| _::t -> loop (n+1) t
in loop 0
let to_raw ?domain graph =
let nodes = Gid_map.fold (fun id elt acc -> (id,elt)::acc) graph.map [] in
let snodes = List.sort (fun (_,n1) (_,n2) -> G_node.position_comp n1 n2) nodes in
let raw_nodes = List.map (fun (gid,node) -> (gid, G_fs.to_raw (G_node.get_fs node))) snodes in
let get_num gid = list_num (fun (x,_) -> x=gid) raw_nodes in
let edge_list = ref [] in
Gid_map.iter
(fun src_gid node ->
Massoc_gid.iter
(fun tar_gid edge ->
edge_list := (get_num src_gid, G_edge.to_string ?domain edge, get_num tar_gid) :: !edge_list
)
(G_node.get_next node)
)
graph.map;
(graph.meta, List.map snd raw_nodes, !edge_list)
(* -------------------------------------------------------------------------------- *)
let to_conll_string ?domain graph =
let nodes = Gid_map.fold
......
......@@ -168,9 +168,4 @@ module G_graph: sig
val to_sentence: ?main_feat:string -> t -> string
val to_dep: ?domain:Domain.t -> ?filter : string list -> ?main_feat:string -> ?deco:G_deco.t -> t -> string
val to_conll_string: ?domain:Domain.t -> t -> string
val to_raw: ?domain:Domain.t -> t ->
string list *
(string * string) list list *
(int * string * int) list
end (* module G_graph *)
\ No newline at end of file
This diff is collapsed.
......@@ -106,5 +106,5 @@ module Grs: sig
val rule_iter: (string -> Rule.t -> unit) -> t -> unit
val filter_iter: (string -> Rule.t -> unit) -> t -> unit
val modules_of_sequence: t -> string -> Modul.t list
(* val modules_of_sequence: t -> string -> Modul.t list*)
end (* module Grs *)
......@@ -323,17 +323,14 @@ module Html_doc = struct
wnl " <div class=\"navbar\">&nbsp;<a href=\"index.html\">Up</a></div>";
wnl " <center><h1>List of sequences</h1></center>";
List.iter
(function
| Ast.New _ -> failwith "Wait..."
| Ast.Old seq ->
wnl "<h6>%s</h6>" seq.Ast.seq_name;
List.iter (fun l -> wnl "<p>%s</p>" (doc_to_html l)) seq.Ast.seq_doc;
(fun ast_seq ->
wnl "<h6>%s</h6>" ast_seq.Strategy.name;
List.iter (fun l -> wnl "<p>%s</p>" (doc_to_html l)) ast_seq.Strategy.doc;
wnl "<div class=\"code\">";
wnl "%s" (String.concat " ⇨ " (List.map (fun x -> sprintf "<a href=\"%s.html\">%s</a>" x x) seq.Ast.seq_mod));
wnl "%s" (Strategy.to_string ast_seq.Strategy.def);
wnl "</div>";
) ast.Ast.sequences;
) ast.Ast.strategies;
wnl " </body>";
wnl "</html>";
Buffer.contents buff
......@@ -824,7 +821,7 @@ module Corpus_stat = struct
let empty ~grs ~seq =
(* let modules = try List.assoc seq grs.Grs.sequences with Not_found -> [seq] in *)
let modules = Grs.modules_of_sequence grs seq in
let modules = [] (* Grs.modules_of_sequence grs seq *) in
let map = List.fold_left
(fun acc modul ->
if List.exists (fun m -> modul.Modul.name = m.Modul.name) modules
......
......@@ -86,7 +86,7 @@ module Loader = struct
{
Ast.domain = domain;
Ast.modules = flatten_modules main_file grs_wi.Ast.modules_wi;
Ast.sequences = grs_wi.Ast.sequences_wi;
Ast.strategies = grs_wi.Ast.strategies_wi;
}
(* ------------------------------------------------------------------------------------------*)
......
......@@ -224,14 +224,14 @@ grs_wi:
{
{ Ast.domain_wi=(match d with Some dom -> Some (Ast.Dom dom) | None -> None);
modules_wi=m;
sequences_wi=match s with Some seq -> seq | None -> [];
strategies_wi=match s with Some seq -> seq | None -> [];
}
}
| DOMAIN file=STRING m=module_or_include_list s=option(sequences) EOF
{
{ Ast.domain_wi= Some (Ast.Dom_file file);
modules_wi=m;
sequences_wi=match s with Some seq -> seq | None -> [];
strategies_wi=match s with Some seq -> seq | None -> [];
}
}
......@@ -460,7 +460,6 @@ pat_edge_or_const:
| STAR labels=delimited(LTR_EDGE_LEFT,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2_loc=simple_id_with_loc
{ let (n2,loc) = n2_loc in Pat_const (Ast.Cst_in (n2,Ast.Pos_list labels), loc) }
(* "A -[^X|Y]-> B" *)
| n1_loc=simple_id_with_loc labels=delimited(LTR_EDGE_LEFT_NEG,separated_nonempty_list(PIPE,pattern_label_ident),LTR_EDGE_RIGHT) n2=simple_id
{ let (n1,loc) = n1_loc in Pat_edge ({Ast.edge_id = None; src=n1; edge_label_cst=Ast.Neg_list labels; tar=n2}, loc) }
......@@ -693,22 +692,31 @@ sequence:
seq_mod = mod_names;*/
| doc = option(COMMENT) id_loc=simple_id_with_loc mod_names=delimited(LACC,separated_list_final_opt(SEMIC,simple_id),RACC)
{ let (name,loc) = id_loc in
{
Strategy.name;
def = Strategy.Seq (List.map (fun m -> Strategy.Ref m) mod_names);
doc = begin match doc with Some d -> d | None -> [] end;
loc;
}
}
| doc = option(COMMENT) id_loc=simple_id_with_loc EQUAL def=op_seq
{ let (name,loc) = id_loc in
{
Ast.Old { Ast.seq_name = fst id_loc;
seq_mod = mod_names ;
seq_doc = begin match doc with Some d -> d | None -> [] end;
seq_loc = snd id_loc;
Strategy.name;
def;
doc = begin match doc with Some d -> d | None -> [] end;
loc;
}
}
| doc = option(COMMENT) id_loc=simple_id_with_loc EQUAL s=op_seq { Ast.New (id_loc, s) }
op_seq:
| m=simple_id { Ast.Ref m }
| m=simple_id { Strategy.Ref m }
| LPAREN s=op_seq RPAREN { s }
| s=op_seq STAR { Ast.Star (s) }
| s1=op_seq PLUS s2=op_seq { Ast.Plus [s1; s2] }
| s1=op_seq SEMIC s2=op_seq { Ast.List [s1; s2] }
| DISEQUAL s=op_seq { Ast.Diamond s }
| s=op_seq STAR { Strategy.Star (s) }
| s1=op_seq PLUS s2=op_seq { Strategy.Plus [s1; s2] }
| s1=op_seq SEMIC s2=op_seq { Strategy.Seq [s1; s2] }
| DISEQUAL s=op_seq { Strategy.Diamond s }
/*=============================================================================================*/
......
......@@ -1104,6 +1104,7 @@ module Rule = struct
)
to_do_set (Instance_set.empty,nf_set) in
loop new_to_do_set new_nf_set in
let nfs = loop (Instance_set.singleton instance) Instance_set.empty in
let reduced_nfs = filter_equal_nfs nfs in
......
......@@ -96,6 +96,8 @@ module Rule : sig
Instance.t ->
Instance_set.t * Instance_set.t
val one_step: ?domain: Domain.t -> string -> Instance.t -> t list -> Instance.t list
(** the type matching encodes the graph morphism from a pattern to a graph *)
(* NB: it was made public for the grep mode *)
type matching
......
......@@ -443,3 +443,46 @@ module Concat_item = struct
| String of string
end (* module Concat_item *)
(* ================================================================================ *)
module Strategy = struct
type def =
| Ref of string
| Seq of def list (* /!\ The list must not be empty *)
| Plus of def list (* /!\ The list must not be empty *)
| Star of def
| Diamond of def
type t = {
name:string;
def:def;
doc:string list;
loc:Loc.t;
}
let rec to_string = function
| Ref m -> m
| Seq l -> "[" ^ (String.concat "; " (List.map to_string l)) ^ "]"
| Plus l -> "[" ^ (String.concat "+" (List.map to_string l)) ^ "]"
| Star s -> "[" ^ (to_string s) ^"]" ^ "*"
| Diamond s -> "◇" ^ "[" ^(to_string s)^"]"
(* invariant: Seq list and Plus list are not empty in the input and so not empty in the output *)
let rec flatten = function
| Ref m -> Ref m
| Star s -> Star (flatten s)
| Diamond s -> Diamond (flatten s)
| Seq l ->
let fl = List.map flatten l in
let rec loop = function
| [] -> []
| (Seq l) :: tail -> l @ (loop tail)
| x :: tail -> x :: (loop tail)
in Seq (loop fl)
| Plus l ->
let fl = List.map flatten l in
let rec loop = function
| [] -> []
| (Plus l) :: tail -> l @ (loop tail)
| x :: tail -> x :: (loop tail)
in Plus (loop fl)
end (* module Strategy *)
......@@ -177,5 +177,31 @@ module Concat_item : sig
| String of string
end (* module Concat_item *)
(* ================================================================================ *)
module Strategy : sig
(* recursive definition of strategies *)
type def =
| Ref of string (* reference to a module name or to another strategy *)
| Seq of def list (* a sequence of strategies to apply one after the other *)
| Plus of def list (* a set of strategies to apply in parallel *)
| Star of def (* a strategy to apply iteratively *)
| Diamond of def (* pick one normal form a the given strategy *)
(* /!\ The list must not be empty in the Seq or Plus constructor *)
(* string dump of a strat *)
val to_string : def -> string
(* build an equivalent strategies where embedded Seq in Seq (resp Plus in Plus) are flattened *)
val flatten : def -> def
(* a strategy is given by its descrition in the grs file and the 4 fields: *)
type t = {
name:string; (* a unique name of the stratgy *)
def:def; (* the definition itself *)
doc:string list; (* lines of docs (if any in the GRS file) *)
loc:Loc.t; (* the location of the [name] of the strategy *)
}
end (* module Strategy *)
......@@ -168,9 +168,6 @@ type t = Grew_graph.G_graph.t
close_out out_ch
) ()
let raw ?domain gr =
handle ~name:"Graph.raw" (fun () -> Grew_graph.G_graph.to_raw ?domain gr) ()
let search_pattern ?domain pattern graph = Grew_rule.Rule.match_in_graph ?domain pattern graph
let node_matching pattern graph matching = Grew_rule.Rule.node_matching pattern graph matching
......@@ -310,3 +307,134 @@ end
(* -------------- remove... after merge ------------
let xml_graph xml =
handle ~name:"xml_graph" (fun () -> G_graph.of_xml xml) ()
let rewrite ~gr ~grs ~seq =
handle ~name:"rewrite" (fun () -> Grs.rewrite grs seq gr) ()
let display ~gr ~grs ~seq =
handle ~name:"display" (fun () -> Grs.build_rew_display grs seq gr) ()
let write_stat filename rew_hist =
handle ~name:"write_stat" (fun () -> Gr_stat.save filename (Gr_stat.from_rew_history rew_hist)) ()
let write_annot ~title static_dir annot_dir base_name_rew_hist_list =
handle ~name:"write_annot" (fun () -> Html_annot.build ~title static_dir annot_dir base_name_rew_hist_list) ()
let save_index ~dirname ~base_names =
handle ~name:"save_index" (fun () ->
let out_ch = open_out (Filename.concat dirname "index") in
List.iter (fun f -> fprintf out_ch "%s\n" f) base_names;
close_out out_ch
) ()
let save_graph_conll filename graph =
handle ~name:"save_graph_conll" (fun () ->
let out_ch = open_out filename in
fprintf out_ch "%s" (G_graph.to_conll graph);
close_out out_ch
) ()
let save_gr base rew_hist =
handle ~name:"save_gr" (fun () -> Rewrite_history.save_gr base rew_hist) ()
let save_conll base rew_hist =
handle ~name:"save_conll" (fun () -> Rewrite_history.save_conll base rew_hist) ()
let save_full_conll base rew_hist =
handle ~name:"save_full_conll" (fun () -> Rewrite_history.save_full_conll base rew_hist) ()
let save_det_gr base rew_hist =
handle ~name:"save_det_gr" (fun () -> Rewrite_history.save_det_gr base rew_hist) ()
let save_det_conll ?header base rew_hist =
handle ~name:"save_deeeet_conll" (fun () -> Rewrite_history.save_det_conll ?header base rew_hist) ()
let det_dep_string rew_hist =
handle ~name:"det_dep_string" (fun () -> Rewrite_history.det_dep_string rew_hist) ()
let conll_dep_string ?keep_empty_rh rew_hist =
handle ~name:"conll_dep_string" (fun () -> Rewrite_history.conll_dep_string ?keep_empty_rh rew_hist) ()
let write_html
?(no_init=false)
?(out_gr=false)
?filter
?main_feat
?dot
~header
?graph_file
rew_hist
output_base =
handle ~name:"write_html" (fun () ->
ignore (
Html_rh.build
?filter
?main_feat
?dot
~out_gr
~init_graph: (not no_init)
~header
?graph_file
output_base rew_hist
)
) ()
let error_html
?(no_init=false)
?main_feat
?dot
~header
msg
?init
output_base =
handle ~name:"error_html" (fun () ->
ignore (
Html_rh.error
?main_feat
?dot
~init_graph: (not no_init)
~header
output_base msg init
)
) ()
let make_index ~title ~grs_file ~html ~grs ~seq ~input_dir ~output_dir ~base_names =
handle ~name:"make_index" (fun () ->
let init = Corpus_stat.empty grs seq in
let corpus_stat =
List.fold_left
(fun acc base_name ->
Corpus_stat.add_gr_stat base_name (Gr_stat.load (Filename.concat output_dir (base_name^".stat"))) acc
) init base_names in
Corpus_stat.save_html title grs_file input_dir output_dir corpus_stat
) ()
let html_sentences ~title = handle ~name:"html_sentences" (fun () -> Html_sentences.build ~title) ()
let feature_names () = handle ~name:"feature_names" (fun () -> Domain.feature_names ()) ()
let to_dot_graph ?main_feat ?(deco=G_deco.empty) graph =
handle ~name:"to_dot_graph" (fun () -> G_graph.to_dot ?main_feat graph ~deco) ()
let to_dep_graph ?filter ?main_feat ?(deco=G_deco.empty) graph =
handle ~name:"to_dep_graph" (fun () -> G_graph.to_dep ?filter ?main_feat ~deco graph) ()
let to_gr_graph graph =
handle ~name:"to_gr_graph" (fun () -> G_graph.to_gr graph) ()
let to_conll_graph graph =
handle ~name:"to_conll_graph" (fun () -> G_graph.to_conll graph) ()
type pattern = Rule.pattern
type matching = Rule.matching
let load_pattern file =
handle ~name:"load_pattern" (fun () -> Rule.build_pattern (Loader.pattern file)) ()
let match_in_graph pattern graph = Rule.match_in_graph pattern graph
let match_deco pattern matching = Rule.match_deco pattern matching
-------------- remove... after merge ------------ *)
......@@ -94,16 +94,6 @@ module Graph : sig
val to_sentence: ?main_feat:string -> t -> string
(** [raw_graph instance] returns all graph information with a triple of elementary caml types:
- the meta data
- the list of node (node is a list of feature (feature is string * string))
- the list of edge (src, label, tar) where src and tar refers to the position in the node list
*)
val raw: ?domain:Domain.t -> t ->
string list *
(string * string) list list *
(int * string * int) list
val to_dot : ?domain:Domain.t -> ?main_feat:string -> ?deco:Deco.t -> t -> string
val to_dep : ?domain:Domain.t -> ?filter: string list -> ?main_feat:string -> ?deco:Deco.t -> t -> string
......@@ -183,6 +173,8 @@ module Rewrite: sig
val save_det_gr: ?domain:Domain.t -> string -> history -> unit
val save_index: dirname:string -> base_names: string list -> unit
val save_det_conll: ?domain:Domain.t -> ?header:string -> string -> history -> unit
val det_dep_string: ?domain:Domain.t -> history -> string option
......@@ -202,4 +194,13 @@ module Rewrite: sig
val html_sentences: title:string -> string -> (bool * string * int * string) list -> unit
end
(* (* type and function added for grew-web *)
type pattern
type matching
(** [load_pattern file] returns the pattern described in the given [file] *)
val load_pattern: string -> pattern
(** [match_in_graph pattern graph] returns the list of the possible matching of [pattern] in [graph] *)
val match_in_graph: pattern -> Graph.t -> matching list
*)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment