grew_fs.ml 10.7 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
open Printf
pj2m's avatar
pj2m committed
2
open Log
bguillaum's avatar
bguillaum committed
3 4 5

open Grew_utils
open Grew_ast
pj2m's avatar
pj2m committed
6

bguillaum's avatar
bguillaum committed
7 8 9 10 11 12 13

type value = String of string | Int of int

let string_of_value = function
  | String s -> s
  | Int i -> string_of_int i

14 15 16
(* ==================================================================================================== *)
module Domain = struct
  let current = ref None
pj2m's avatar
pj2m committed
17

18
  let reset () = current := None
pj2m's avatar
pj2m committed
19

20 21
  let init ast_domain = current := Some ast_domain

22 23
  let build ?loc name unsorted_values =
    let values = List.sort Pervasives.compare unsorted_values in
bguillaum's avatar
bguillaum committed
24
    match (name.[0], !current) with
bguillaum's avatar
bguillaum committed
25 26 27
      | ('_', _) (* no check on feat_name starting with '_' *)
      | (_, None) -> List.map (fun s -> String s) values (* no domain defined *)
      | (_, Some dom) ->
bguillaum's avatar
bguillaum committed
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
        let rec loop = function
          | [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
          | ((Ast.Open n)::_) when n = name ->
            List.map (fun s -> String s) values
          | ((Ast.Int n)::_) when n = name ->
            (try List.map (fun s -> Int (int_of_string s)) values
            with Failure _ -> Error.build ?loc "[GRS] The feature '%s' is of type int" name)
          | ((Ast.Closed (n,vs))::_) when n = name ->
            (match List_.sort_diff values vs with
              | [] -> List.map (fun s -> String s) values
              | l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
	        (List_.to_string (fun x->x) ", " l)
	        name
            )
          | _::t -> loop t in
bguillaum's avatar
bguillaum committed
43
        loop dom
bguillaum's avatar
bguillaum committed
44 45 46 47 48

  let build_one ?loc name value =
    match build ?loc name [value] with
      | [x] -> x
      | _ -> Error.bug ?loc "[Domain.build_one]"
49 50 51 52
end

(* ==================================================================================================== *)
module G_feature = struct
bguillaum's avatar
bguillaum committed
53 54

  type t = string * value
55 56

  let get_name = fst
57

pj2m's avatar
pj2m committed
58 59
  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
60
  let build (x : Ast.feature) = match x with
61
    | ({Ast.kind=Ast.Equality [atom]; name=name},loc) ->
bguillaum's avatar
bguillaum committed
62
      (name, Domain.build_one ~loc name atom)
63 64
    | _ -> Error.build "Illegal feature declaration in Graph (must be '=' and atomic)"

bguillaum's avatar
bguillaum committed
65
  let to_string (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
66

bguillaum's avatar
bguillaum committed
67
  let to_gr (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
bguillaum's avatar
bguillaum committed
68 69
      
  let to_dot (feat_name, feat_val) =
bguillaum's avatar
bguillaum committed
70 71 72 73
    let string_val = string_of_value feat_val in
    match Str.split (Str.regexp ":C:") string_val with
      | [] -> Error.bug "[G_feature.to_dot] feature value '%s'" string_val
      | fv::_ -> sprintf "%s=%s" feat_name fv
74 75 76 77 78 79 80
end

(* ==================================================================================================== *)
module P_feature = struct
  (* feature= (feature_name, disjunction of atomic values) *) 

  type v = 
bguillaum's avatar
bguillaum committed
81 82
    | Equal of value list  (* with Equal constr, the list MUST never be empty *)
    | Different of value list
83 84 85 86 87 88 89 90
    | Param of int 

  type t = string * v

  let get_name = fst

  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
91 92 93 94 95 96 97 98 99
  let unif_value v1 v2 = 
    match (v1, v2) with
    | (Equal l1, Equal l2) -> 
        (match List_.sort_inter l1 l2 with
        | [] -> Error.build "Unification failure" 
        | l -> Equal l)
    | (Different l1, Different l2) -> Different (List_.sort_union l1 l2)
    | _ -> Error.build "cannot unify heterogeneous pattern features"
        
bguillaum's avatar
bguillaum committed
100
  let to_string ?param_names = function
bguillaum's avatar
bguillaum committed
101
    | (feat_name, Equal atoms) -> sprintf "%s=%s" feat_name (List_.to_string string_of_value "|" atoms)
102
    | (feat_name, Different []) -> sprintf "%s=*" feat_name
bguillaum's avatar
bguillaum committed
103
    | (feat_name, Different atoms) -> sprintf "%s<>%s" feat_name (List_.to_string string_of_value "|" atoms)
bguillaum's avatar
bguillaum committed
104 105 106 107
    | (feat_name, Param index) -> 
      match param_names with
        | None -> sprintf "%s=$%d" feat_name index 
        | Some (l,_) -> sprintf "%s=%s" feat_name (List.nth l index)
108 109 110

  let build ?pat_vars = function
    | ({Ast.kind=Ast.Equality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
111
      let values = Domain.build ~loc name unsorted_values in (name, Equal values)
112
    | ({Ast.kind=Ast.Disequality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
113
      let values = Domain.build ~loc name unsorted_values in (name, Different values)
114
    | ({Ast.kind=Ast.Param var; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
115
        match pat_vars with
116
        | None -> Error.bug ~loc "[P_feature.build] param '%s' in an unparametrized rule" var
bguillaum's avatar
bguillaum committed
117 118
        | Some l -> 
            match List_.pos var l with
119 120
            | Some index -> (name, Param index)
            | None -> Error.build ~loc "[P_feature.build] Unknown pattern variable '%s'" var
pj2m's avatar
pj2m committed
121 122
end

123 124
(* ==================================================================================================== *)
module G_fs = struct
pj2m's avatar
pj2m committed
125
  (* list are supposed to be striclty ordered wrt compare*)
126
  type t = G_feature.t list
bguillaum's avatar
bguillaum committed
127

bguillaum's avatar
bguillaum committed
128
  let to_raw t = List.map (fun (name, value) -> (name, string_of_value value)) t
bguillaum's avatar
bguillaum committed
129

130
  let empty = []
bguillaum's avatar
bguillaum committed
131

132
  let set_feat ?loc feature_name atom t =
bguillaum's avatar
bguillaum committed
133
    let new_value = Domain.build_one ?loc feature_name atom in
134
    let rec loop = function
bguillaum's avatar
bguillaum committed
135 136 137
    | [] -> [(feature_name, new_value)]
    | ((fn,_)::_) as t when feature_name < fn -> (feature_name, new_value)::t
    | (fn,_)::t when feature_name = fn -> (feature_name, new_value)::t
138 139
    | (fn,a)::t -> (fn,a) :: (loop t) 
    in loop t
140

141
  let del_feat = List_.sort_remove_assoc
142

143 144
  let get_atom = List_.sort_assoc

bguillaum's avatar
bguillaum committed
145 146 147 148 149
  let get_string_atom feat_name t = 
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some v -> Some (string_of_value v)

bguillaum's avatar
bguillaum committed
150 151 152 153 154 155
  let get_int_feat feat_name t =
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some (Int i) -> Some i
      | Some _ -> Error.build "[Fs.get_int_feat]"

bguillaum's avatar
bguillaum committed
156
  let to_string t = List_.to_string G_feature.to_string "," t
157
  let to_gr t = List_.to_string G_feature.to_gr ", " t
pj2m's avatar
pj2m committed
158

159 160 161 162 163
  let build ast_fs =
    let unsorted = List.map (fun feat -> G_feature.build feat) ast_fs in
    List.sort G_feature.compare unsorted

  let of_conll line =
bguillaum's avatar
bguillaum committed
164
    let unsorted =
bguillaum's avatar
bguillaum committed
165 166 167 168 169 170
      ("phon", Domain.build_one "phon" line.Conll.phon)
      :: ("lemma", Domain.build_one "lemma" line.Conll.lemma)
      :: ("cat", Domain.build_one "cat" line.Conll.pos1)
      :: ("pos", Domain.build_one "pos" line.Conll.pos2)
      :: ("position", Domain.build_one "position" (string_of_int line.Conll.num))
      :: (List.map (fun (f,v) -> (f, Domain.build_one f v)) line.Conll.morph) in
171 172 173 174 175 176 177 178 179 180 181 182 183
    List.sort G_feature.compare unsorted

  exception Fail_unif 
  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs | fs, [] -> fs
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 < 0 -> f1 :: loop (t1, f2::t2)
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 > 0 -> f2 :: loop (f1::t1, t2)
                                                                    
      (* all remaining case are fn1 = fn2 *)
      | ((fn, a1)::t1, (_, a2)::t2) when a1=a2 -> (fn,a1) :: (loop (t1, t2))
      | _ -> raise Fail_unif
    in try Some (loop (fs1, fs2)) with Fail_unif -> None
184 185 186 187

  let get_main ?main_feat t =
    let main_list = match main_feat with
    | None -> []
bguillaum's avatar
bguillaum committed
188
    | Some string -> Str.split (Str.regexp "\\( *; *\\)\\|#") string in
189

190 191 192
    let rec loop = function
      | [] -> (None, t)
      | feat_name :: tail ->
193 194 195
          match List_.sort_assoc feat_name t with
          | Some atom -> (Some atom, List_.sort_remove_assoc feat_name t)
          | None -> loop tail in
196
    loop main_list
bguillaum's avatar
bguillaum committed
197

198 199
  let to_dot ?main_feat t =
    match get_main ?main_feat t with
bguillaum's avatar
bguillaum committed
200
    | (None, _) -> List_.to_string G_feature.to_dot "\\n" t
bguillaum's avatar
bguillaum committed
201 202
    | (Some atom, sub) ->
      sprintf "{%s|%s}" (string_of_value atom) (List_.to_string G_feature.to_dot "\\n" sub)
203
          
204 205 206
  let to_word ?main_feat t =
    match get_main ?main_feat t with
      | (None, _) -> "#"
bguillaum's avatar
bguillaum committed
207
      | (Some atom, _) -> string_of_value atom
208
        
209 210 211
  let to_dep ?main_feat t =
    let (main_opt, sub) = get_main ?main_feat t in
    sprintf " word=\"%s\"; subword=\"%s\"; " 
bguillaum's avatar
bguillaum committed
212
      (match main_opt with Some atom -> string_of_value atom | None -> "")
bguillaum's avatar
bguillaum committed
213
      (List_.to_string G_feature.to_string "#" sub)
bguillaum's avatar
bguillaum committed
214
end (* module G_fs *)
215 216 217 218 219
 
(* ==================================================================================================== *)
module P_fs = struct
  (* list are supposed to be striclty ordered wrt compare*)
  type t = P_feature.t list
pj2m's avatar
pj2m committed
220

221
  let empty = []
pj2m's avatar
pj2m committed
222

223 224 225
  let build ?pat_vars ast_fs =
    let unsorted = List.map (P_feature.build ?pat_vars) ast_fs in
    List.sort P_feature.compare unsorted 
pj2m's avatar
pj2m committed
226

227
  let to_string t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
228

bguillaum's avatar
bguillaum committed
229
  let to_dep param_names t = List_.to_string (P_feature.to_string ~param_names) "#" t
bguillaum's avatar
bguillaum committed
230

231
  let to_dot t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
232

233
  exception Fail
pj2m's avatar
pj2m committed
234

235 236 237
  let match_ ?param pattern fs =
    let rec loop acc = function
      | [], _ -> acc
pj2m's avatar
pj2m committed
238

239 240 241
      (* Two next cases: each feature_name present in pattern must be in instance: [] means unif failure *)
      | _, [] -> raise Fail
      | ((fn_pat, _)::_, (fn, _)::_) when fn_pat < fn -> raise Fail
pj2m's avatar
pj2m committed
242

243 244
      (* a feature_name present only in instance -> Skip it *)
      | ((fn_pat, fv_pat)::t_pat, (fn, _)::t) when fn_pat > fn -> loop acc ((fn_pat, fv_pat)::t_pat, t)
pj2m's avatar
pj2m committed
245

246 247 248
      (* Next cases: fn_pat = fn *)
      | ((_, (P_feature.Equal fv))::t_pat, (_, fa)::t) when List_.sort_mem fa fv -> loop acc (t_pat,t)
      | ((_, (P_feature.Different fv))::t_pat, (_, fa)::t) when not (List_.sort_mem fa fv) -> loop acc (t_pat,t)
pj2m's avatar
pj2m committed
249

250 251 252 253
      | ((_, (P_feature.Param index))::t_pat, (_, atom)::t) ->
          (match acc with
          | None -> Log.bug "[P_fs.compatible] Illegal parametrized pattern feature"; exit 2
          | Some param ->
bguillaum's avatar
bguillaum committed
254
              (match Lex_par.filter index (string_of_value atom) param with
255
              | None -> raise Fail
256 257 258
              | Some new_param -> loop (Some new_param) (t_pat,t)
              )
          )
pj2m's avatar
pj2m committed
259

260 261 262
      (* remaining cases: Equal and not list_mem  |  Diff and not list_mem -> fail*)  
      | _ -> raise Fail
    in loop param (pattern,fs)
pj2m's avatar
pj2m committed
263 264 265 266 267 268

  let filter fs_p fs_g = 
    let rec loop = function
      | [], fs -> true
      | fs, [] -> false

269 270
      | ((fn1,_)::_, (fn2,_)::_) when fn1 < fn2 -> false
      | ((fn1,_)::_ as f1, (fn2,_)::t2) when fn1 > fn2 -> loop (f1, t2)
pj2m's avatar
pj2m committed
271 272

      (* all remaining case are fn1 = fn2 *)
273 274 275
      | ((_, (P_feature.Equal fv))::t1, (_, atom)::t2) when List_.sort_mem atom fv -> loop (t1, t2)
      | ((_, (P_feature.Different fv))::t1, (_, atom)::t2) when not (List_.sort_mem atom fv) -> loop (t1, t2)
      | _ -> false
pj2m's avatar
pj2m committed
276

277
    in loop (fs_p, fs_g)
bguillaum's avatar
bguillaum committed
278 279 280 281 282 283 284 285 286 287 288 289

  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs -> fs
      | fs, [] -> fs

      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 < fn2 -> (fn1,v1) :: (loop (t1,(fn2,v2)::t2))
      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 > fn2 -> (fn2,v2) :: (loop ((fn1,v1)::t1,t2))

      (* all remaining case are fn1 = fn2 *)
      | ((fn1,v1)::t1, (fn2,v2)::t2) (* when fn1 = fn2 *) -> (fn1,P_feature.unif_value v1 v2) :: (loop (t1,t2))
    in loop (fs1, fs2)      
pj2m's avatar
pj2m committed
290
end