grew_fs.ml 14.4 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

bguillaum's avatar
bguillaum committed
11
open Printf
pj2m's avatar
pj2m committed
12
open Log
bguillaum's avatar
bguillaum committed
13

14
open Libgrew_utils
bguillaum's avatar
bguillaum committed
15
open Grew_ast
pj2m's avatar
pj2m committed
16

bguillaum's avatar
bguillaum committed
17

bguillaum's avatar
bguillaum committed
18
type value = String of string | Float of float
bguillaum's avatar
bguillaum committed
19 20

let string_of_value = function
bguillaum's avatar
bguillaum committed
21 22 23 24
  | String s -> Str.global_replace (Str.regexp "\"") "\\\"" s
  | Float i -> String_.of_float i

let conll_string_of_value = function
bguillaum's avatar
bguillaum committed
25
  | String s -> s
bguillaum's avatar
bguillaum committed
26
  | Float i -> String_.of_float i
bguillaum's avatar
bguillaum committed
27

28 29 30
(* ==================================================================================================== *)
module Domain = struct
  let current = ref None
pj2m's avatar
pj2m committed
31

32
  let reset () = current := None
pj2m's avatar
pj2m committed
33

bguillaum's avatar
bguillaum committed
34 35
  let init ast_domain =
    current := Some (Ast.normalize_domain ast_domain)
36

37 38
  let build ?loc name unsorted_values =
    let values = List.sort Pervasives.compare unsorted_values in
bguillaum's avatar
bguillaum committed
39
    match (name.[0], !current) with
bguillaum's avatar
bguillaum committed
40 41 42
      | ('_', _) (* no check on feat_name starting with '_' *)
      | (_, None) -> List.map (fun s -> String s) values (* no domain defined *)
      | (_, Some dom) ->
bguillaum's avatar
bguillaum committed
43 44 45 46 47
        let rec loop = function
          | [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
          | ((Ast.Open n)::_) when n = name ->
            List.map (fun s -> String s) values
          | ((Ast.Int n)::_) when n = name ->
bguillaum's avatar
bguillaum committed
48
            (try List.map (fun s -> Float (String_.to_float s)) values
bguillaum's avatar
bguillaum committed
49 50 51 52
            with Failure _ -> Error.build ?loc "[GRS] The feature '%s' is of type int" name)
          | ((Ast.Closed (n,vs))::_) when n = name ->
            (match List_.sort_diff values vs with
              | [] -> List.map (fun s -> String s) values
bguillaum's avatar
bguillaum committed
53
              | l when List.for_all (fun x -> x.[0] = '_') l -> List.map (fun s -> String s) values
bguillaum's avatar
bguillaum committed
54 55 56 57 58
              | l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
	        (List_.to_string (fun x->x) ", " l)
	        name
            )
          | _::t -> loop t in
bguillaum's avatar
bguillaum committed
59
        loop dom
bguillaum's avatar
bguillaum committed
60 61 62 63 64

  let build_one ?loc name value =
    match build ?loc name [value] with
      | [x] -> x
      | _ -> Error.bug ?loc "[Domain.build_one]"
65 66 67 68 69

  let feature_names () =
    match !current with
      | None -> None
      | Some dom -> Some (List.map (function Ast.Closed (fn, _) | Ast.Open fn | Ast.Int fn -> fn) dom)
70 71 72 73
end

(* ==================================================================================================== *)
module G_feature = struct
bguillaum's avatar
bguillaum committed
74 75

  type t = string * value
76 77

  let get_name = fst
78

pj2m's avatar
pj2m committed
79 80
  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
81
  let build (x : Ast.feature) = match x with
82
    | ({Ast.kind=Ast.Equality [atom]; name=name},loc) ->
bguillaum's avatar
bguillaum committed
83
      (name, Domain.build_one ~loc name atom)
84 85
    | _ -> Error.build "Illegal feature declaration in Graph (must be '=' and atomic)"

bguillaum's avatar
bguillaum committed
86
  let to_string (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
87

bguillaum's avatar
bguillaum committed
88
  let to_gr (feat_name, feat_val) = sprintf "%s=\"%s\"" feat_name (string_of_value feat_val)
bguillaum's avatar
bguillaum committed
89 90
      
  let to_dot (feat_name, feat_val) =
bguillaum's avatar
bguillaum committed
91 92 93 94
    let string_val = string_of_value feat_val in
    match Str.split (Str.regexp ":C:") string_val with
      | [] -> Error.bug "[G_feature.to_dot] feature value '%s'" string_val
      | fv::_ -> sprintf "%s=%s" feat_name fv
95 96 97 98 99 100 101
end

(* ==================================================================================================== *)
module P_feature = struct
  (* feature= (feature_name, disjunction of atomic values) *) 

  type v = 
bguillaum's avatar
bguillaum committed
102 103
    | Equal of value list  (* with Equal constr, the list MUST never be empty *)
    | Different of value list
bguillaum's avatar
bguillaum committed
104 105
    | Param of int
    | Absent
106 107 108 109 110 111 112

  type t = string * v

  let get_name = fst

  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
113 114 115 116 117 118 119 120 121
  let unif_value v1 v2 = 
    match (v1, v2) with
    | (Equal l1, Equal l2) -> 
        (match List_.sort_inter l1 l2 with
        | [] -> Error.build "Unification failure" 
        | l -> Equal l)
    | (Different l1, Different l2) -> Different (List_.sort_union l1 l2)
    | _ -> Error.build "cannot unify heterogeneous pattern features"
        
bguillaum's avatar
bguillaum committed
122
  let to_string ?param_names = function
bguillaum's avatar
bguillaum committed
123
    | (feat_name, Equal atoms) -> sprintf "%s=%s" feat_name (List_.to_string string_of_value "|" atoms)
124
    | (feat_name, Different []) -> sprintf "%s=*" feat_name
bguillaum's avatar
bguillaum committed
125
    | (feat_name, Different atoms) -> sprintf "%s<>%s" feat_name (List_.to_string string_of_value "|" atoms)
bguillaum's avatar
bguillaum committed
126
    | (feat_name, Absent) -> sprintf "!%s" feat_name
bguillaum's avatar
bguillaum committed
127 128 129 130
    | (feat_name, Param index) -> 
      match param_names with
        | None -> sprintf "%s=$%d" feat_name index 
        | Some (l,_) -> sprintf "%s=%s" feat_name (List.nth l index)
131 132 133

  let build ?pat_vars = function
    | ({Ast.kind=Ast.Equality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
134
      let values = Domain.build ~loc name unsorted_values in (name, Equal values)
135
    | ({Ast.kind=Ast.Disequality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
136
      let values = Domain.build ~loc name unsorted_values in (name, Different values)
bguillaum's avatar
bguillaum committed
137
    | ({Ast.kind=Ast.Absent; name=name}, loc) -> (name, Absent)
138
    | ({Ast.kind=Ast.Param var; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
139
        match pat_vars with
140
        | None -> Error.bug ~loc "[P_feature.build] param '%s' in an unparametrized rule" var
bguillaum's avatar
bguillaum committed
141 142
        | Some l -> 
            match List_.pos var l with
143 144
            | Some index -> (name, Param index)
            | None -> Error.build ~loc "[P_feature.build] Unknown pattern variable '%s'" var
pj2m's avatar
pj2m committed
145 146
end

147 148
(* ==================================================================================================== *)
module G_fs = struct
pj2m's avatar
pj2m committed
149
  (* list are supposed to be striclty ordered wrt compare*)
150
  type t = G_feature.t list
bguillaum's avatar
bguillaum committed
151

bguillaum's avatar
bguillaum committed
152
  let to_raw t = List.map (fun (name, value) -> (name, string_of_value value)) t
bguillaum's avatar
bguillaum committed
153

154
  let empty = []
bguillaum's avatar
bguillaum committed
155

156
  let set_feat ?loc feature_name atom t =
bguillaum's avatar
bguillaum committed
157
    let new_value = Domain.build_one ?loc feature_name atom in
158
    let rec loop = function
bguillaum's avatar
bguillaum committed
159 160 161
    | [] -> [(feature_name, new_value)]
    | ((fn,_)::_) as t when feature_name < fn -> (feature_name, new_value)::t
    | (fn,_)::t when feature_name = fn -> (feature_name, new_value)::t
162 163
    | (fn,a)::t -> (fn,a) :: (loop t) 
    in loop t
164

165
  let del_feat = List_.sort_remove_assoc
166

167 168
  let get_atom = List_.sort_assoc

bguillaum's avatar
bguillaum committed
169 170 171 172 173 174
  let get_annot_info fs =
    match List.filter (fun (fn,value) -> String.length fn > 1 && String.sub fn 0 2 = "__") fs with
      | [] -> None
      | [(fn,value)] -> Some (fn,conll_string_of_value value)
      | _ -> Error.build "[Fs.get_annot_info] More than one annot feature in the same feature structure"

bguillaum's avatar
bguillaum committed
175 176 177
  let get_string_atom feat_name t = 
    match List_.sort_assoc feat_name t with
      | None -> None
bguillaum's avatar
bguillaum committed
178
      | Some v -> Some (conll_string_of_value v)
bguillaum's avatar
bguillaum committed
179

bguillaum's avatar
bguillaum committed
180
  let get_float_feat feat_name t =
bguillaum's avatar
bguillaum committed
181 182
    match List_.sort_assoc feat_name t with
      | None -> None
bguillaum's avatar
bguillaum committed
183 184
      | Some (Float i) -> Some i
      | Some _ -> Error.build "[Fs.get_float_feat]"
bguillaum's avatar
bguillaum committed
185

bguillaum's avatar
bguillaum committed
186
  let to_string t = List_.to_string G_feature.to_string "," t
187
  let to_gr t = List_.to_string G_feature.to_gr ", " t
pj2m's avatar
pj2m committed
188

189 190 191 192
  let build ast_fs =
    let unsorted = List.map (fun feat -> G_feature.build feat) ast_fs in
    List.sort G_feature.compare unsorted

193
  let of_conll ?loc line =
194
    let unsorted_without_pos =
195 196 197 198
      ("phon", Domain.build_one ?loc "phon" line.Conll.phon)
      :: ("lemma", Domain.build_one ?loc "lemma" line.Conll.lemma)
      :: ("cat", Domain.build_one ?loc "cat" line.Conll.pos1)
      :: (List.map (fun (f,v) -> (f, Domain.build_one ?loc f v)) line.Conll.morph) in
199
    let unsorted = match line.Conll.pos2 with
bguillaum's avatar
bguillaum committed
200
      | "" | "_" -> unsorted_without_pos
201
      | s -> ("pos", Domain.build_one "pos" s) :: unsorted_without_pos in
202 203 204 205 206 207 208 209 210 211 212 213 214
    List.sort G_feature.compare unsorted

  exception Fail_unif 
  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs | fs, [] -> fs
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 < 0 -> f1 :: loop (t1, f2::t2)
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 > 0 -> f2 :: loop (f1::t1, t2)
                                                                    
      (* all remaining case are fn1 = fn2 *)
      | ((fn, a1)::t1, (_, a2)::t2) when a1=a2 -> (fn,a1) :: (loop (t1, t2))
      | _ -> raise Fail_unif
    in try Some (loop (fs1, fs2)) with Fail_unif -> None
215 216 217

  let get_main ?main_feat t =
    let main_list = match main_feat with
bguillaum's avatar
bguillaum committed
218
    | None -> ["phon"]
bguillaum's avatar
bguillaum committed
219
    | Some string -> Str.split (Str.regexp "\\( *; *\\)\\|#") string in
220

221 222 223
    let rec loop = function
      | [] -> (None, t)
      | feat_name :: tail ->
224 225 226
          match List_.sort_assoc feat_name t with
          | Some atom -> (Some atom, List_.sort_remove_assoc feat_name t)
          | None -> loop tail in
227
    loop main_list
bguillaum's avatar
bguillaum committed
228

229 230
  let to_dot ?main_feat t =
    match get_main ?main_feat t with
bguillaum's avatar
bguillaum committed
231
    | (None, _) -> List_.to_string G_feature.to_dot "\\n" t
bguillaum's avatar
bguillaum committed
232 233
    | (Some atom, sub) ->
      sprintf "{%s|%s}" (string_of_value atom) (List_.to_string G_feature.to_dot "\\n" sub)
234
          
235 236 237
  let to_word ?main_feat t =
    match get_main ?main_feat t with
      | (None, _) -> "#"
bguillaum's avatar
bguillaum committed
238
      | (Some atom, _) -> string_of_value atom
239
        
240
  let to_dep ?position ?main_feat ?filter t =
241
    let (main_opt, sub) = get_main ?main_feat t in
242
    let last = match position with Some f when f > 0. -> [("position", Float f)] | _ -> [] in
243
    let reduced_sub = match filter with
244 245
      | None -> sub @ last
      | Some l -> (List.filter (fun (fn,_) -> List.mem fn l) sub) @ last in
bguillaum's avatar
bguillaum committed
246
    sprintf " word=\"%s\"; subword=\"%s\""
bguillaum's avatar
bguillaum committed
247
      (match main_opt with Some atom -> string_of_value atom | None -> "_")
248
      (List_.to_string G_feature.to_string "#" reduced_sub)
bguillaum's avatar
bguillaum committed
249 250 251 252 253

  let to_conll ?exclude t =
    let reduced_t = match exclude with
      | None -> t
      | Some list -> List.filter (fun (fn,_) -> not (List.mem fn list || fn.[0]='_')) t in
bguillaum's avatar
bguillaum committed
254 255 256 257 258 259 260
    match reduced_t with
      | [] -> "_"
      | _ -> String.concat "|"
        (List.map
           (function (fn, String "true") -> fn | (fn, fv) -> fn^"="^(string_of_value fv))
           reduced_t
        )
bguillaum's avatar
bguillaum committed
261
end (* module G_fs *)
262 263 264 265 266
 
(* ==================================================================================================== *)
module P_fs = struct
  (* list are supposed to be striclty ordered wrt compare*)
  type t = P_feature.t list
pj2m's avatar
pj2m committed
267

268
  let empty = []
pj2m's avatar
pj2m committed
269

270 271 272 273 274 275 276 277 278 279 280 281
  let check_position ?param position t =
    try
      match List.assoc "position" t with
        | P_feature.Equal pos_list -> List.mem (Float position) pos_list
        | P_feature.Different pos_list -> not (List.mem (Float position) pos_list)
        | P_feature.Absent -> false
        | P_feature.Param index ->
          match param with
            | Some p -> float_of_string (Lex_par.get_param_value index p) = position
            | None -> Log.bug "[P_fs.check_position] Illegal parametrized pattern feature"; exit 2
    with Not_found -> true

282 283 284
  let build ?pat_vars ast_fs =
    let unsorted = List.map (P_feature.build ?pat_vars) ast_fs in
    List.sort P_feature.compare unsorted 
pj2m's avatar
pj2m committed
285

286
  let to_string t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
287

288 289 290 291 292
  let to_dep ?filter param_names t =
    let reduced = match filter with
      | None -> t
      | Some l -> List.filter (fun (fn,_) -> List.mem fn l) t in
    List_.to_string (P_feature.to_string ~param_names) "#" reduced
bguillaum's avatar
bguillaum committed
293

294
  let to_dot t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
295

296
  exception Fail
pj2m's avatar
pj2m committed
297

298
  let match_ ?param pattern fs =
299 300 301
    let pattern_wo_pos =
      try List.remove_assoc "position" pattern
      with Not_found -> pattern in
302 303
    let rec loop acc = function
      | [], _ -> acc
pj2m's avatar
pj2m committed
304

bguillaum's avatar
bguillaum committed
305 306 307 308 309 310 311 312
      (* a feature_name present only in instance -> Skip it *)
      | ((fn_pat, fv_pat)::t_pat, (fn, _)::t) when fn_pat > fn -> loop acc ((fn_pat, fv_pat)::t_pat, t)

      (* Three next cases: pattern requires for the absence of a feature. case 1&2: OK, go on, 3: fail *)
      | ((fn_pat, P_feature.Absent)::t_pat, []) -> loop acc (t_pat, [])
      | ((fn_pat, P_feature.Absent)::t_pat, (fn, fa)::t) when fn_pat < fn -> loop acc (t_pat, (fn, fa)::t)
      | ((fn_pat, P_feature.Absent)::t_pat, (fn, fa)::t) when fn_pat = fn -> raise Fail

313 314 315
      (* Two next cases: each feature_name present in pattern must be in instance: [] means unif failure *)
      | _, [] -> raise Fail
      | ((fn_pat, _)::_, (fn, _)::_) when fn_pat < fn -> raise Fail
pj2m's avatar
pj2m committed
316

317 318 319
      (* Next cases: fn_pat = fn *)
      | ((_, (P_feature.Equal fv))::t_pat, (_, fa)::t) when List_.sort_mem fa fv -> loop acc (t_pat,t)
      | ((_, (P_feature.Different fv))::t_pat, (_, fa)::t) when not (List_.sort_mem fa fv) -> loop acc (t_pat,t)
pj2m's avatar
pj2m committed
320

321 322 323 324
      | ((_, (P_feature.Param index))::t_pat, (_, atom)::t) ->
          (match acc with
          | None -> Log.bug "[P_fs.compatible] Illegal parametrized pattern feature"; exit 2
          | Some param ->
bguillaum's avatar
bguillaum committed
325
              (match Lex_par.filter index (string_of_value atom) param with
326
              | None -> raise Fail
327 328 329
              | Some new_param -> loop (Some new_param) (t_pat,t)
              )
          )
pj2m's avatar
pj2m committed
330

331 332
      (* remaining cases: Equal and not list_mem  |  Diff and not list_mem -> fail*)  
      | _ -> raise Fail
333
    in loop param (pattern_wo_pos,fs)
pj2m's avatar
pj2m committed
334 335 336 337

  let filter fs_p fs_g = 
    let rec loop = function
      | [], fs -> true
bguillaum's avatar
bguillaum committed
338 339 340 341 342 343 344

      | ((fn1,_)::_ as f1, (fn2,_)::t2) when fn1 > fn2 -> loop (f1, t2)

      | ((fn1,P_feature.Absent)::t1, []) -> loop (t1,[])
      | ((fn1,P_feature.Absent)::t1, ((fn2,_)::_ as f2)) when fn1 < fn2 -> loop (t1,f2)
      | ((fn1,P_feature.Absent)::t1, (fn2,_)::_) when fn1 = fn2 -> false

pj2m's avatar
pj2m committed
345 346
      | fs, [] -> false

347
      | ((fn1,_)::_, (fn2,_)::_) when fn1 < fn2 -> false
pj2m's avatar
pj2m committed
348 349

      (* all remaining case are fn1 = fn2 *)
350 351 352
      | ((_, (P_feature.Equal fv))::t1, (_, atom)::t2) when List_.sort_mem atom fv -> loop (t1, t2)
      | ((_, (P_feature.Different fv))::t1, (_, atom)::t2) when not (List_.sort_mem atom fv) -> loop (t1, t2)
      | _ -> false
pj2m's avatar
pj2m committed
353

354
    in loop (fs_p, fs_g)
bguillaum's avatar
bguillaum committed
355 356 357 358 359 360 361 362 363 364 365 366

  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs -> fs
      | fs, [] -> fs

      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 < fn2 -> (fn1,v1) :: (loop (t1,(fn2,v2)::t2))
      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 > fn2 -> (fn2,v2) :: (loop ((fn1,v1)::t1,t2))

      (* all remaining case are fn1 = fn2 *)
      | ((fn1,v1)::t1, (fn2,v2)::t2) (* when fn1 = fn2 *) -> (fn1,P_feature.unif_value v1 v2) :: (loop (t1,t2))
    in loop (fs1, fs2)      
pj2m's avatar
pj2m committed
367
end