grew_fs.ml 11.2 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
open Printf
pj2m's avatar
pj2m committed
2
open Log
bguillaum's avatar
bguillaum committed
3 4 5

open Grew_utils
open Grew_ast
pj2m's avatar
pj2m committed
6

bguillaum's avatar
bguillaum committed
7 8 9 10 11 12 13

type value = String of string | Int of int

let string_of_value = function
  | String s -> s
  | Int i -> string_of_int i

14 15 16
(* ==================================================================================================== *)
module Domain = struct
  let current = ref None
pj2m's avatar
pj2m committed
17

18
  let reset () = current := None
pj2m's avatar
pj2m committed
19

20 21
  let init ast_domain = current := Some ast_domain

22 23
  let build ?loc name unsorted_values =
    let values = List.sort Pervasives.compare unsorted_values in
bguillaum's avatar
bguillaum committed
24
    match (name.[0], !current) with
bguillaum's avatar
bguillaum committed
25 26 27
      | ('_', _) (* no check on feat_name starting with '_' *)
      | (_, None) -> List.map (fun s -> String s) values (* no domain defined *)
      | (_, Some dom) ->
bguillaum's avatar
bguillaum committed
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
        let rec loop = function
          | [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
          | ((Ast.Open n)::_) when n = name ->
            List.map (fun s -> String s) values
          | ((Ast.Int n)::_) when n = name ->
            (try List.map (fun s -> Int (int_of_string s)) values
            with Failure _ -> Error.build ?loc "[GRS] The feature '%s' is of type int" name)
          | ((Ast.Closed (n,vs))::_) when n = name ->
            (match List_.sort_diff values vs with
              | [] -> List.map (fun s -> String s) values
              | l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
	        (List_.to_string (fun x->x) ", " l)
	        name
            )
          | _::t -> loop t in
bguillaum's avatar
bguillaum committed
43
        loop dom
bguillaum's avatar
bguillaum committed
44 45 46 47 48

  let build_one ?loc name value =
    match build ?loc name [value] with
      | [x] -> x
      | _ -> Error.bug ?loc "[Domain.build_one]"
49 50 51 52 53

  let feature_names () =
    match !current with
      | None -> None
      | Some dom -> Some (List.map (function Ast.Closed (fn, _) | Ast.Open fn | Ast.Int fn -> fn) dom)
54 55 56 57
end

(* ==================================================================================================== *)
module G_feature = struct
bguillaum's avatar
bguillaum committed
58 59

  type t = string * value
60 61

  let get_name = fst
62

pj2m's avatar
pj2m committed
63 64
  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
65
  let build (x : Ast.feature) = match x with
66
    | ({Ast.kind=Ast.Equality [atom]; name=name},loc) ->
bguillaum's avatar
bguillaum committed
67
      (name, Domain.build_one ~loc name atom)
68 69
    | _ -> Error.build "Illegal feature declaration in Graph (must be '=' and atomic)"

bguillaum's avatar
bguillaum committed
70
  let to_string (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
71

bguillaum's avatar
bguillaum committed
72
  let to_gr (feat_name, feat_val) = sprintf "%s=\"%s\"" feat_name (string_of_value feat_val)
bguillaum's avatar
bguillaum committed
73 74
      
  let to_dot (feat_name, feat_val) =
bguillaum's avatar
bguillaum committed
75 76 77 78
    let string_val = string_of_value feat_val in
    match Str.split (Str.regexp ":C:") string_val with
      | [] -> Error.bug "[G_feature.to_dot] feature value '%s'" string_val
      | fv::_ -> sprintf "%s=%s" feat_name fv
79 80 81 82 83 84 85
end

(* ==================================================================================================== *)
module P_feature = struct
  (* feature= (feature_name, disjunction of atomic values) *) 

  type v = 
bguillaum's avatar
bguillaum committed
86 87
    | Equal of value list  (* with Equal constr, the list MUST never be empty *)
    | Different of value list
88 89 90 91 92 93 94 95
    | Param of int 

  type t = string * v

  let get_name = fst

  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
96 97 98 99 100 101 102 103 104
  let unif_value v1 v2 = 
    match (v1, v2) with
    | (Equal l1, Equal l2) -> 
        (match List_.sort_inter l1 l2 with
        | [] -> Error.build "Unification failure" 
        | l -> Equal l)
    | (Different l1, Different l2) -> Different (List_.sort_union l1 l2)
    | _ -> Error.build "cannot unify heterogeneous pattern features"
        
bguillaum's avatar
bguillaum committed
105
  let to_string ?param_names = function
bguillaum's avatar
bguillaum committed
106
    | (feat_name, Equal atoms) -> sprintf "%s=%s" feat_name (List_.to_string string_of_value "|" atoms)
107
    | (feat_name, Different []) -> sprintf "%s=*" feat_name
bguillaum's avatar
bguillaum committed
108
    | (feat_name, Different atoms) -> sprintf "%s<>%s" feat_name (List_.to_string string_of_value "|" atoms)
bguillaum's avatar
bguillaum committed
109 110 111 112
    | (feat_name, Param index) -> 
      match param_names with
        | None -> sprintf "%s=$%d" feat_name index 
        | Some (l,_) -> sprintf "%s=%s" feat_name (List.nth l index)
113 114 115

  let build ?pat_vars = function
    | ({Ast.kind=Ast.Equality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
116
      let values = Domain.build ~loc name unsorted_values in (name, Equal values)
117
    | ({Ast.kind=Ast.Disequality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
118
      let values = Domain.build ~loc name unsorted_values in (name, Different values)
119
    | ({Ast.kind=Ast.Param var; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
120
        match pat_vars with
121
        | None -> Error.bug ~loc "[P_feature.build] param '%s' in an unparametrized rule" var
bguillaum's avatar
bguillaum committed
122 123
        | Some l -> 
            match List_.pos var l with
124 125
            | Some index -> (name, Param index)
            | None -> Error.build ~loc "[P_feature.build] Unknown pattern variable '%s'" var
pj2m's avatar
pj2m committed
126 127
end

128 129
(* ==================================================================================================== *)
module G_fs = struct
pj2m's avatar
pj2m committed
130
  (* list are supposed to be striclty ordered wrt compare*)
131
  type t = G_feature.t list
bguillaum's avatar
bguillaum committed
132

bguillaum's avatar
bguillaum committed
133
  let to_raw t = List.map (fun (name, value) -> (name, string_of_value value)) t
bguillaum's avatar
bguillaum committed
134

135
  let empty = []
bguillaum's avatar
bguillaum committed
136

137
  let set_feat ?loc feature_name atom t =
bguillaum's avatar
bguillaum committed
138
    let new_value = Domain.build_one ?loc feature_name atom in
139
    let rec loop = function
bguillaum's avatar
bguillaum committed
140 141 142
    | [] -> [(feature_name, new_value)]
    | ((fn,_)::_) as t when feature_name < fn -> (feature_name, new_value)::t
    | (fn,_)::t when feature_name = fn -> (feature_name, new_value)::t
143 144
    | (fn,a)::t -> (fn,a) :: (loop t) 
    in loop t
145

146
  let del_feat = List_.sort_remove_assoc
147

148 149
  let get_atom = List_.sort_assoc

bguillaum's avatar
bguillaum committed
150 151 152 153 154
  let get_string_atom feat_name t = 
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some v -> Some (string_of_value v)

bguillaum's avatar
bguillaum committed
155 156 157 158 159 160
  let get_int_feat feat_name t =
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some (Int i) -> Some i
      | Some _ -> Error.build "[Fs.get_int_feat]"

bguillaum's avatar
bguillaum committed
161
  let to_string t = List_.to_string G_feature.to_string "," t
162
  let to_gr t = List_.to_string G_feature.to_gr ", " t
pj2m's avatar
pj2m committed
163

164 165 166 167 168
  let build ast_fs =
    let unsorted = List.map (fun feat -> G_feature.build feat) ast_fs in
    List.sort G_feature.compare unsorted

  let of_conll line =
bguillaum's avatar
bguillaum committed
169
    let unsorted =
bguillaum's avatar
bguillaum committed
170 171 172 173 174 175
      ("phon", Domain.build_one "phon" line.Conll.phon)
      :: ("lemma", Domain.build_one "lemma" line.Conll.lemma)
      :: ("cat", Domain.build_one "cat" line.Conll.pos1)
      :: ("pos", Domain.build_one "pos" line.Conll.pos2)
      :: ("position", Domain.build_one "position" (string_of_int line.Conll.num))
      :: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in
176 177 178 179 180 181 182 183 184 185 186 187 188
    List.sort G_feature.compare unsorted

  exception Fail_unif 
  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs | fs, [] -> fs
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 < 0 -> f1 :: loop (t1, f2::t2)
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 > 0 -> f2 :: loop (f1::t1, t2)
                                                                    
      (* all remaining case are fn1 = fn2 *)
      | ((fn, a1)::t1, (_, a2)::t2) when a1=a2 -> (fn,a1) :: (loop (t1, t2))
      | _ -> raise Fail_unif
    in try Some (loop (fs1, fs2)) with Fail_unif -> None
189 190 191 192

  let get_main ?main_feat t =
    let main_list = match main_feat with
    | None -> []
bguillaum's avatar
bguillaum committed
193
    | Some string -> Str.split (Str.regexp "\\( *; *\\)\\|#") string in
194

195 196 197
    let rec loop = function
      | [] -> (None, t)
      | feat_name :: tail ->
198 199 200
          match List_.sort_assoc feat_name t with
          | Some atom -> (Some atom, List_.sort_remove_assoc feat_name t)
          | None -> loop tail in
201
    loop main_list
bguillaum's avatar
bguillaum committed
202

203 204
  let to_dot ?main_feat t =
    match get_main ?main_feat t with
bguillaum's avatar
bguillaum committed
205
    | (None, _) -> List_.to_string G_feature.to_dot "\\n" t
bguillaum's avatar
bguillaum committed
206 207
    | (Some atom, sub) ->
      sprintf "{%s|%s}" (string_of_value atom) (List_.to_string G_feature.to_dot "\\n" sub)
208
          
209 210 211
  let to_word ?main_feat t =
    match get_main ?main_feat t with
      | (None, _) -> "#"
bguillaum's avatar
bguillaum committed
212
      | (Some atom, _) -> string_of_value atom
213
        
214
  let to_dep ?main_feat ?filter t =
215
    let (main_opt, sub) = get_main ?main_feat t in
216 217 218
    let reduced_sub = match filter with
      | None -> sub
      | Some l -> List.filter (fun (fn,_) -> List.mem fn l) sub in
219
    sprintf " word=\"%s\"; subword=\"%s\"; " 
bguillaum's avatar
bguillaum committed
220
      (match main_opt with Some atom -> string_of_value atom | None -> "")
221
      (List_.to_string G_feature.to_string "#" reduced_sub)
bguillaum's avatar
bguillaum committed
222
end (* module G_fs *)
223 224 225 226 227
 
(* ==================================================================================================== *)
module P_fs = struct
  (* list are supposed to be striclty ordered wrt compare*)
  type t = P_feature.t list
pj2m's avatar
pj2m committed
228

229
  let empty = []
pj2m's avatar
pj2m committed
230

231 232 233
  let build ?pat_vars ast_fs =
    let unsorted = List.map (P_feature.build ?pat_vars) ast_fs in
    List.sort P_feature.compare unsorted 
pj2m's avatar
pj2m committed
234

235
  let to_string t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
236

237 238 239 240 241
  let to_dep ?filter param_names t =
    let reduced = match filter with
      | None -> t
      | Some l -> List.filter (fun (fn,_) -> List.mem fn l) t in
    List_.to_string (P_feature.to_string ~param_names) "#" reduced
bguillaum's avatar
bguillaum committed
242

243
  let to_dot t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
244

245
  exception Fail
pj2m's avatar
pj2m committed
246

247 248 249
  let match_ ?param pattern fs =
    let rec loop acc = function
      | [], _ -> acc
pj2m's avatar
pj2m committed
250

251 252 253
      (* Two next cases: each feature_name present in pattern must be in instance: [] means unif failure *)
      | _, [] -> raise Fail
      | ((fn_pat, _)::_, (fn, _)::_) when fn_pat < fn -> raise Fail
pj2m's avatar
pj2m committed
254

255 256
      (* a feature_name present only in instance -> Skip it *)
      | ((fn_pat, fv_pat)::t_pat, (fn, _)::t) when fn_pat > fn -> loop acc ((fn_pat, fv_pat)::t_pat, t)
pj2m's avatar
pj2m committed
257

258 259 260
      (* Next cases: fn_pat = fn *)
      | ((_, (P_feature.Equal fv))::t_pat, (_, fa)::t) when List_.sort_mem fa fv -> loop acc (t_pat,t)
      | ((_, (P_feature.Different fv))::t_pat, (_, fa)::t) when not (List_.sort_mem fa fv) -> loop acc (t_pat,t)
pj2m's avatar
pj2m committed
261

262 263 264 265
      | ((_, (P_feature.Param index))::t_pat, (_, atom)::t) ->
          (match acc with
          | None -> Log.bug "[P_fs.compatible] Illegal parametrized pattern feature"; exit 2
          | Some param ->
bguillaum's avatar
bguillaum committed
266
              (match Lex_par.filter index (string_of_value atom) param with
267
              | None -> raise Fail
268 269 270
              | Some new_param -> loop (Some new_param) (t_pat,t)
              )
          )
pj2m's avatar
pj2m committed
271

272 273 274
      (* remaining cases: Equal and not list_mem  |  Diff and not list_mem -> fail*)  
      | _ -> raise Fail
    in loop param (pattern,fs)
pj2m's avatar
pj2m committed
275 276 277 278 279 280

  let filter fs_p fs_g = 
    let rec loop = function
      | [], fs -> true
      | fs, [] -> false

281 282
      | ((fn1,_)::_, (fn2,_)::_) when fn1 < fn2 -> false
      | ((fn1,_)::_ as f1, (fn2,_)::t2) when fn1 > fn2 -> loop (f1, t2)
pj2m's avatar
pj2m committed
283 284

      (* all remaining case are fn1 = fn2 *)
285 286 287
      | ((_, (P_feature.Equal fv))::t1, (_, atom)::t2) when List_.sort_mem atom fv -> loop (t1, t2)
      | ((_, (P_feature.Different fv))::t1, (_, atom)::t2) when not (List_.sort_mem atom fv) -> loop (t1, t2)
      | _ -> false
pj2m's avatar
pj2m committed
288

289
    in loop (fs_p, fs_g)
bguillaum's avatar
bguillaum committed
290 291 292 293 294 295 296 297 298 299 300 301

  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs -> fs
      | fs, [] -> fs

      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 < fn2 -> (fn1,v1) :: (loop (t1,(fn2,v2)::t2))
      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 > fn2 -> (fn2,v2) :: (loop ((fn1,v1)::t1,t2))

      (* all remaining case are fn1 = fn2 *)
      | ((fn1,v1)::t1, (fn2,v2)::t2) (* when fn1 = fn2 *) -> (fn1,P_feature.unif_value v1 v2) :: (loop (t1,t2))
    in loop (fs1, fs2)      
pj2m's avatar
pj2m committed
302
end