Commit 56a95e78 authored by Bruno Guillaume's avatar Bruno Guillaume
Browse files

Add Projection module for AG GetLexicon

parent 8ecded92
......@@ -13,6 +13,7 @@ open Printf
module String_set = Set.Make (String)
module String_map = Map.Make (String)
module String_opt_map = Map.Make (struct type t = string option let compare = compare end)
module Int_set = Set.Make (struct type t = int let compare = Stdlib.compare end)
module Int_map = Map.Make (struct type t = int let compare = Stdlib.compare end)
......
......@@ -10,6 +10,7 @@
module String_set : Set.S with type elt = string
module String_map : Map.S with type key = string
module String_opt_map : Map.S with type key = string option
module Int_set : Set.S with type elt = int
module Int_map : Map.S with type key = int
......
......@@ -277,6 +277,9 @@ module G_graph = struct
let fold_gid fct t init =
Gid_map.fold (fun gid _ acc -> fct gid acc) t.map init
let fold_node fct t init =
Gid_map.fold (fun _ node acc -> fct node acc) t.map init
let track_rules (rule_name,_) t =
if !Global.track_rules
then
......@@ -476,13 +479,13 @@ module G_graph = struct
let open Yojson.Basic.Util in
let meta =
try json |> member "meta" |> to_assoc
|> CCList.filter_map (fun (k,v) ->
match v with
| `String s -> Some (k,s)
| `Int i -> Some (k, string_of_int i)
| `Float f -> Some (k, string_of_float f)
| _ -> None
)
|> CCList.filter_map (fun (k,v) ->
match v with
| `String s -> Some (k,s)
| `Int i -> Some (k, string_of_int i)
| `Float f -> Some (k, string_of_float f)
| _ -> None
)
with Type_error _ -> [] in
(* for error reporting *)
......@@ -1267,7 +1270,7 @@ module G_graph = struct
try
Gid_map.iter
(fun gid node ->
if G_fs.get_value_opt "form" (G_node.get_fs node) = Some (String "__0__")
if G_node.is_conll_zero node
then raise (Find gid)
) graph.map; graph
with Find gid_root -> match del_node_opt gid_root graph with
......@@ -1475,6 +1478,14 @@ module G_graph = struct
acc
) t.map String_set.empty
let insert_proj keys t proj =
fold_node
(fun node acc ->
if G_node.is_conll_zero node
then acc
else G_node.insert_proj keys node acc
) t proj
let is_projective t =
let (arc_positions, pos_to_gid_map) =
Gid_map.fold (fun src_gid src_node (acc, acc_map) ->
......
......@@ -232,6 +232,9 @@ module G_graph: sig
val to_json: t -> Yojson.Basic.t
val insert_proj: string list -> t -> Projection.t -> Projection.t
val is_projective: t -> bool
type dfs_output = {
......
......@@ -9,6 +9,7 @@
(**********************************************************************************)
open Printf
open CCOpt.Infix
open Grew_base
open Grew_types
......@@ -52,6 +53,8 @@ module G_node = struct
let set_position p t = { t with position = Some p }
let unset_position t = { t with position = None }
let is_conll_zero t = G_fs.get_value_opt "form" t.fs = Some (String "__0__")
let is_eud_empty t = match G_fs.get_value_opt "_UD_empty" t.fs with
| Some (String "Yes") -> true
| _ -> false
......@@ -134,6 +137,10 @@ module G_node = struct
| (Some name, None) -> { t with name }
| _ -> Error.run "[G_node.unshift] Inconsistent data"
let insert_proj keys t proj =
let fs = get_fs t in
let values = List.map (fun k -> string_of_value <$> (G_fs.get_value_opt k fs)) keys in
Projection.insert values proj
end (* module G_node *)
(* ================================================================================ *)
......
......@@ -42,6 +42,7 @@ module G_node: sig
val set_position: int -> t -> t
val unset_position: t -> t
val is_conll_zero: t -> bool
val is_eud_empty: t -> bool
val dump: config:Conllx_config.t -> t -> string
......@@ -68,6 +69,8 @@ module G_node: sig
val shift: string -> int -> t -> t
val unshift: string -> t -> t
val insert_proj: (string list) -> t -> Projection.t -> Projection.t
end (* module G_node *)
(* ================================================================================ *)
......
......@@ -79,8 +79,6 @@ module Massoc_pid = Massoc_make (Pid)
(* ================================================================================ *)
module Massoc_string = Massoc_make (String)
(* ================================================================================ *)
module Lexicon = struct
module Line_set = Set.Make (struct type t=string list let compare = Stdlib.compare end)
......@@ -183,7 +181,6 @@ module Lexicon = struct
then load ?loc (Filename.concat dir filename)
else load ?loc filename
| Ast.Final (line_list) -> of_item_list ?loc line_list
end (* module Lexicon *)
(* ================================================================================ *)
......@@ -198,3 +195,62 @@ module Lexicons = struct
Error.build ?loc "Undefined field name \"%s\" in lexicon %s" field_name lexicon_name
| _ -> ()
end (* module Lexicons *)
(* ================================================================================ *)
module Projection = struct
type t =
| Leaf of int
| Node of t String_opt_map.t
let empty = Node String_opt_map.empty
(* nbre of occurrences *)
let rec cardinal = function
| Leaf n -> n
| Node map -> String_opt_map.fold (fun _ t acc -> acc + (cardinal t)) map 0
(* nbre of element *)
let rec size = function
| Leaf _ -> 1
| Node map -> String_opt_map.fold (fun _ t acc -> acc + (size t)) map 0
let rec insert data t =
match (data, t) with
| ([], Leaf i) -> Leaf (i+1)
| (value::tail, Node map) ->
let sub =
match (tail, String_opt_map.find_opt value map) with
| (_,Some t') -> t'
| ([],None) -> Leaf 0
| (_,None) -> Node String_opt_map.empty in
Node (String_opt_map.add value (insert tail sub) map)
| _ -> Error.bug "[Projection.insert] inconsitent data"
let rec prune_unambiguous depth t =
match (depth, t) with
| (1, Node map) ->
Node (
String_opt_map.fold
(fun k v acc ->
if size v > 1
then String_opt_map.add k v acc
else acc
) map String_opt_map.empty
)
| (n, Node map) -> Node (String_opt_map.map (fun v -> prune_unambiguous (depth - 1) v) map)
| _ -> Error.bug "[Projection.prune_unambiguous] no enough depth in the projection"
let to_json keys t =
let rec loop acc keys partial t =
match (keys, t) with
| ([], Leaf n) -> (`Assoc (("freq", `Int n) :: partial)) :: acc
| (key :: tail, Node map) ->
String_opt_map.fold
(fun value sub_t acc2 ->
let new_partial = (key, match value with Some s -> `String s | _ -> `Null) :: partial in
loop acc2 tail new_partial sub_t
) map acc
| _ -> Error.bug "[Projection.insert] inconsitent data" in
`List (loop [] keys [] t)
end
\ No newline at end of file
......@@ -95,3 +95,18 @@ module Lexicons : sig
val check: ?loc:Loc.t -> string -> string -> t -> unit
end (* module Lexicons *)
(* ================================================================================ *)
module Projection : sig
type t
val empty: t
val cardinal: t -> int
val insert: string option list -> t -> t
val prune_unambiguous: int -> t -> t
val to_json: string list -> t -> Yojson.Basic.t
end (* module Projection *)
......@@ -25,6 +25,7 @@ module Loc = struct
let to_string = Grew_base.Loc.to_string
end
(* ==================================================================================================== *)
(** {2 Exceptions} *)
(* ==================================================================================================== *)
......@@ -63,6 +64,19 @@ module Libgrew = struct
let set_track_impact flag = Grew_base.Global.track_impact:= flag
end
(* ==================================================================================================== *)
(** {2 Projection} *)
(* ==================================================================================================== *)
module Projection = struct
type t = Grew_types.Projection.t
let empty = Grew_types.Projection.empty
let prune_unambiguous = Grew_types.Projection.prune_unambiguous
let to_json = Grew_types.Projection.to_json
end
(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
......@@ -136,6 +150,8 @@ module Graph = struct
let set_meta key value t = Grew_graph.G_graph.set_meta key value t
let insert_proj keys t proj = Grew_graph.G_graph.insert_proj keys t proj
let load_gr ~config file =
if not (Sys.file_exists file)
then raise (Libgrew.Error ("File_not_found: " ^ file))
......@@ -265,6 +281,9 @@ module Graph = struct
let trace_depth t =
Grew_graph.G_graph.trace_depth t
let insert_proj keys t proj =
Grew_graph.G_graph.insert_proj keys t proj
end
(* ==================================================================================================== *)
......
......@@ -27,6 +27,22 @@ module Libgrew : sig
exception Bug of string
end
(* ==================================================================================================== *)
(** {2 Projection} *)
(* ==================================================================================================== *)
module Projection : sig
type t
val empty: t
val prune_unambiguous: int -> t -> t
val to_json: string list -> t -> Yojson.Basic.t
end
(* ==================================================================================================== *)
(** {2 Patterns} *)
(* ==================================================================================================== *)
......@@ -131,6 +147,8 @@ module Graph : sig
val set_meta: string -> string -> t -> t
val insert_proj: string list -> t -> Projection.t -> Projection.t
val get_feature_values: string -> t -> String_set.t
val get_relations: config:Conllx_config.t -> t -> String_set.t
val get_features: t -> String_set.t
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment