grew_fs.ml 11.7 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
open Printf
pj2m's avatar
pj2m committed
2
open Log
bguillaum's avatar
bguillaum committed
3
4
5

open Grew_utils
open Grew_ast
pj2m's avatar
pj2m committed
6

bguillaum's avatar
bguillaum committed
7
8
9
10
11
12
13

type value = String of string | Int of int

let string_of_value = function
  | String s -> s
  | Int i -> string_of_int i

14
15
16
(* ==================================================================================================== *)
module Domain = struct
  let current = ref None
pj2m's avatar
pj2m committed
17

18
  let reset () = current := None
pj2m's avatar
pj2m committed
19

20
21
  let init ast_domain = current := Some ast_domain

22
23
24
25
26
27
28
29
30
31
32
  let check ?loc name values =
    if name.[0] <> '_'
    then
    match (name.[0], !current) with
      | ('_', _)
      | (_,None) -> ()
      | (_, Some d) ->
        let rec loop = function
          | [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
          | ((Ast.Open n)::_) when n = name -> ()
          | ((Ast.Closed (n,vs))::_) when n = name ->
33
            (match List_.sort_diff values vs with 
34
35
36
37
              | [] -> ()
              | l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
	        (List_.to_string (fun x->x) ", " l)
	        name
38
            )
39
40
          | _::t -> loop t in
        loop d
bguillaum's avatar
bguillaum committed
41

42
43
  let build ?loc name unsorted_values =
    let values = List.sort Pervasives.compare unsorted_values in
bguillaum's avatar
bguillaum committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    match (name.[0], !current) with
      | ('_', _)
      | (_, None) -> List.map (fun s -> String s) values (* no check on feat_name starting with '_' *)
      | (_, Some d) ->
        let rec loop = function
          | [] -> Error.build ?loc "[GRS] Unknown feature name '%s'" name
          | ((Ast.Open n)::_) when n = name ->
            List.map (fun s -> String s) values
          | ((Ast.Int n)::_) when n = name ->
            (try List.map (fun s -> Int (int_of_string s)) values
            with Failure _ -> Error.build ?loc "[GRS] The feature '%s' is of type int" name)
          | ((Ast.Closed (n,vs))::_) when n = name ->
            (match List_.sort_diff values vs with
              | [] -> List.map (fun s -> String s) values
              | l -> Error.build ?loc "Unknown feature values '%s' for feature name '%s'"
	        (List_.to_string (fun x->x) ", " l)
	        name
            )
          | _::t -> loop t in
        loop d

  let build_one ?loc name value =
    match build ?loc name [value] with
      | [x] -> x
      | _ -> Error.bug ?loc "[Domain.build_one]"
69
70
71
72
end

(* ==================================================================================================== *)
module G_feature = struct
bguillaum's avatar
bguillaum committed
73
74

  type t = string * value
75
76

  let get_name = fst
77

pj2m's avatar
pj2m committed
78
79
  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
80
  let build (x : Ast.feature) = match x with
81
    | ({Ast.kind=Ast.Equality [atom]; name=name},loc) ->
bguillaum's avatar
bguillaum committed
82
83
84
	(* Domain.check ~loc name [atom]; *)
	(* (name, atom) *)
      (name, Domain.build_one ~loc name atom)
85
86
    | _ -> Error.build "Illegal feature declaration in Graph (must be '=' and atomic)"

bguillaum's avatar
bguillaum committed
87
  let to_string (feat_name, feat_val) = sprintf "%s=%s" feat_name (string_of_value feat_val)
88

bguillaum's avatar
bguillaum committed
89
90
91
92
  let to_gr (feat_name, feat_val) =
    match feat_val with
      | String s -> sprintf "%s=\"%s\"" feat_name s
      | Int i -> sprintf "%s=\"%d\"" feat_name i
bguillaum's avatar
bguillaum committed
93
94
      
  let to_dot (feat_name, feat_val) =
bguillaum's avatar
bguillaum committed
95
96
97
98
99
100
    match feat_val with
      | Int i -> sprintf "%s=%d" feat_name i
      | String s ->
        match Str.split (Str.regexp ":C:") s with
          | [] -> Error.bug "[G_feature.to_dot] feature value '%s'" s
          | fv::_ -> sprintf "%s=%s" feat_name fv
101
102
103
104
105
106
107
end

(* ==================================================================================================== *)
module P_feature = struct
  (* feature= (feature_name, disjunction of atomic values) *) 

  type v = 
bguillaum's avatar
bguillaum committed
108
109
    | Equal of value list  (* with Equal constr, the list MUST never be empty *)
    | Different of value list
110
111
112
113
114
115
116
117
    | Param of int 

  type t = string * v

  let get_name = fst

  let compare feat1 feat2 = Pervasives.compare (get_name feat1) (get_name feat2)

bguillaum's avatar
bguillaum committed
118
119
120
121
122
123
124
125
126
  let unif_value v1 v2 = 
    match (v1, v2) with
    | (Equal l1, Equal l2) -> 
        (match List_.sort_inter l1 l2 with
        | [] -> Error.build "Unification failure" 
        | l -> Equal l)
    | (Different l1, Different l2) -> Different (List_.sort_union l1 l2)
    | _ -> Error.build "cannot unify heterogeneous pattern features"
        
bguillaum's avatar
bguillaum committed
127
  let to_string ?param_names = function
bguillaum's avatar
bguillaum committed
128
    | (feat_name, Equal atoms) -> sprintf "%s=%s" feat_name (List_.to_string string_of_value "|" atoms)
129
    | (feat_name, Different []) -> sprintf "%s=*" feat_name
bguillaum's avatar
bguillaum committed
130
    | (feat_name, Different atoms) -> sprintf "%s<>%s" feat_name (List_.to_string string_of_value "|" atoms)
bguillaum's avatar
bguillaum committed
131
132
133
134
    | (feat_name, Param index) -> 
      match param_names with
        | None -> sprintf "%s=$%d" feat_name index 
        | Some (l,_) -> sprintf "%s=%s" feat_name (List.nth l index)
135
136
137

  let build ?pat_vars = function
    | ({Ast.kind=Ast.Equality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
138
139
140
141
      let values = Domain.build ~loc name unsorted_values in (name, Equal values)
	(* let values = List.sort Pervasives.compare unsorted_values in *)
	(* Domain.check ~loc name values; *)
	(* (name, Equal values) *)
142
    | ({Ast.kind=Ast.Disequality unsorted_values; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
143
144
145
146
      let values = Domain.build ~loc name unsorted_values in (name, Different values)
	(* let values = List.sort Pervasives.compare unsorted_values in *)
	(* Domain.check ~loc name values; *)
	(* (name, Different values) *)
147
    | ({Ast.kind=Ast.Param var; name=name}, loc) ->
bguillaum's avatar
bguillaum committed
148
        match pat_vars with
149
        | None -> Error.bug ~loc "[P_feature.build] param '%s' in an unparametrized rule" var
bguillaum's avatar
bguillaum committed
150
151
        | Some l -> 
            match List_.pos var l with
152
153
            | Some index -> (name, Param index)
            | None -> Error.build ~loc "[P_feature.build] Unknown pattern variable '%s'" var
pj2m's avatar
pj2m committed
154
155
end

156
157
(* ==================================================================================================== *)
module G_fs = struct
pj2m's avatar
pj2m committed
158
  (* list are supposed to be striclty ordered wrt compare*)
159
  type t = G_feature.t list
bguillaum's avatar
bguillaum committed
160

bguillaum's avatar
bguillaum committed
161
  let to_raw t = List.map (fun (name, value) -> (name, string_of_value value)) t
bguillaum's avatar
bguillaum committed
162

163
  let empty = []
bguillaum's avatar
bguillaum committed
164

165
  let set_feat ?loc feature_name atom t =
bguillaum's avatar
bguillaum committed
166
167
    let new_value = Domain.build_one ?loc feature_name atom in
    (* Domain.check ?loc feature_name [atom]; *)
168
    let rec loop = function
bguillaum's avatar
bguillaum committed
169
170
171
    | [] -> [(feature_name, new_value)]
    | ((fn,_)::_) as t when feature_name < fn -> (feature_name, new_value)::t
    | (fn,_)::t when feature_name = fn -> (feature_name, new_value)::t
172
173
    | (fn,a)::t -> (fn,a) :: (loop t) 
    in loop t
174

175
  let del_feat = List_.sort_remove_assoc
176

177
178
  let get_atom = List_.sort_assoc

bguillaum's avatar
bguillaum committed
179
180
181
182
183
  let get_string_atom feat_name t = 
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some v -> Some (string_of_value v)

bguillaum's avatar
bguillaum committed
184
185
186
187
188
189
  let get_int_feat feat_name t =
    match List_.sort_assoc feat_name t with
      | None -> None
      | Some (Int i) -> Some i
      | Some _ -> Error.build "[Fs.get_int_feat]"

bguillaum's avatar
bguillaum committed
190
  let to_string t = List_.to_string G_feature.to_string "," t
191
  let to_gr t = List_.to_string G_feature.to_gr ", " t
pj2m's avatar
pj2m committed
192

193
194
195
196
197
  let build ast_fs =
    let unsorted = List.map (fun feat -> G_feature.build feat) ast_fs in
    List.sort G_feature.compare unsorted

  let of_conll line =
bguillaum's avatar
bguillaum committed
198
199
200
    let unsorted =
      ("phon", String line.Conll.phon)
      :: ("lemma", String line.Conll.lemma)
201
202
      :: ("cat", String line.Conll.pos1)
      :: ("pos", String line.Conll.pos2)
bguillaum's avatar
bguillaum committed
203
      :: ("position", Int line.Conll.num)
bguillaum's avatar
bguillaum committed
204
      :: (List.map (fun (f,v) -> (f, String v)) line.Conll.morph) in
205
206
207
208
209
210
211
212
213
214
215
216
217
    List.sort G_feature.compare unsorted

  exception Fail_unif 
  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs | fs, [] -> fs
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 < 0 -> f1 :: loop (t1, f2::t2)
      | (f1::t1, f2::t2) when G_feature.compare f1 f2 > 0 -> f2 :: loop (f1::t1, t2)
                                                                    
      (* all remaining case are fn1 = fn2 *)
      | ((fn, a1)::t1, (_, a2)::t2) when a1=a2 -> (fn,a1) :: (loop (t1, t2))
      | _ -> raise Fail_unif
    in try Some (loop (fs1, fs2)) with Fail_unif -> None
218
219
220
221

  let get_main ?main_feat t =
    let main_list = match main_feat with
    | None -> []
bguillaum's avatar
bguillaum committed
222
    | Some string -> Str.split (Str.regexp "\\( *; *\\)\\|#") string in
223

224
225
226
    let rec loop = function
      | [] -> (None, t)
      | feat_name :: tail ->
227
228
229
          match List_.sort_assoc feat_name t with
          | Some atom -> (Some atom, List_.sort_remove_assoc feat_name t)
          | None -> loop tail in
230
    loop main_list
bguillaum's avatar
bguillaum committed
231

232
233
  let to_dot ?main_feat t =
    match get_main ?main_feat t with
bguillaum's avatar
bguillaum committed
234
    | (None, _) -> List_.to_string G_feature.to_dot "\\n" t
bguillaum's avatar
bguillaum committed
235
236
    | (Some atom, sub) ->
      sprintf "{%s|%s}" (string_of_value atom) (List_.to_string G_feature.to_dot "\\n" sub)
237
          
238
239
240
  let to_word ?main_feat t =
    match get_main ?main_feat t with
      | (None, _) -> "#"
bguillaum's avatar
bguillaum committed
241
      | (Some atom, _) -> string_of_value atom
242
        
243
244
245
  let to_dep ?main_feat t =
    let (main_opt, sub) = get_main ?main_feat t in
    sprintf " word=\"%s\"; subword=\"%s\"; " 
bguillaum's avatar
bguillaum committed
246
      (match main_opt with Some atom -> string_of_value atom | None -> "")
bguillaum's avatar
bguillaum committed
247
      (List_.to_string G_feature.to_string "#" sub)
bguillaum's avatar
bguillaum committed
248
end (* module G_fs *)
249
250
251
252
253
 
(* ==================================================================================================== *)
module P_fs = struct
  (* list are supposed to be striclty ordered wrt compare*)
  type t = P_feature.t list
pj2m's avatar
pj2m committed
254

255
  let empty = []
pj2m's avatar
pj2m committed
256

257
258
259
  let build ?pat_vars ast_fs =
    let unsorted = List.map (P_feature.build ?pat_vars) ast_fs in
    List.sort P_feature.compare unsorted 
pj2m's avatar
pj2m committed
260

261
  let to_string t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
262

bguillaum's avatar
bguillaum committed
263
  let to_dep param_names t = List_.to_string (P_feature.to_string ~param_names) "#" t
bguillaum's avatar
bguillaum committed
264

265
  let to_dot t = List_.to_string P_feature.to_string "\\n" t
pj2m's avatar
pj2m committed
266

267
  exception Fail
pj2m's avatar
pj2m committed
268

269
270
271
  let match_ ?param pattern fs =
    let rec loop acc = function
      | [], _ -> acc
pj2m's avatar
pj2m committed
272

273
274
275
      (* Two next cases: each feature_name present in pattern must be in instance: [] means unif failure *)
      | _, [] -> raise Fail
      | ((fn_pat, _)::_, (fn, _)::_) when fn_pat < fn -> raise Fail
pj2m's avatar
pj2m committed
276

277
278
      (* a feature_name present only in instance -> Skip it *)
      | ((fn_pat, fv_pat)::t_pat, (fn, _)::t) when fn_pat > fn -> loop acc ((fn_pat, fv_pat)::t_pat, t)
pj2m's avatar
pj2m committed
279

280
281
282
      (* Next cases: fn_pat = fn *)
      | ((_, (P_feature.Equal fv))::t_pat, (_, fa)::t) when List_.sort_mem fa fv -> loop acc (t_pat,t)
      | ((_, (P_feature.Different fv))::t_pat, (_, fa)::t) when not (List_.sort_mem fa fv) -> loop acc (t_pat,t)
pj2m's avatar
pj2m committed
283

284
285
286
287
      | ((_, (P_feature.Param index))::t_pat, (_, atom)::t) ->
          (match acc with
          | None -> Log.bug "[P_fs.compatible] Illegal parametrized pattern feature"; exit 2
          | Some param ->
bguillaum's avatar
bguillaum committed
288
              (match Lex_par.filter index (string_of_value atom) param with
289
              | None -> raise Fail
290
291
292
              | Some new_param -> loop (Some new_param) (t_pat,t)
              )
          )
pj2m's avatar
pj2m committed
293

294
295
296
      (* remaining cases: Equal and not list_mem  |  Diff and not list_mem -> fail*)  
      | _ -> raise Fail
    in loop param (pattern,fs)
pj2m's avatar
pj2m committed
297
298
299
300
301
302

  let filter fs_p fs_g = 
    let rec loop = function
      | [], fs -> true
      | fs, [] -> false

303
304
      | ((fn1,_)::_, (fn2,_)::_) when fn1 < fn2 -> false
      | ((fn1,_)::_ as f1, (fn2,_)::t2) when fn1 > fn2 -> loop (f1, t2)
pj2m's avatar
pj2m committed
305
306

      (* all remaining case are fn1 = fn2 *)
307
308
309
      | ((_, (P_feature.Equal fv))::t1, (_, atom)::t2) when List_.sort_mem atom fv -> loop (t1, t2)
      | ((_, (P_feature.Different fv))::t1, (_, atom)::t2) when not (List_.sort_mem atom fv) -> loop (t1, t2)
      | _ -> false
pj2m's avatar
pj2m committed
310

311
    in loop (fs_p, fs_g)
bguillaum's avatar
bguillaum committed
312
313
314
315
316
317
318
319
320
321
322
323

  let unif fs1 fs2 = 
    let rec loop = function
      | [], fs -> fs
      | fs, [] -> fs

      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 < fn2 -> (fn1,v1) :: (loop (t1,(fn2,v2)::t2))
      | ((fn1,v1)::t1, (fn2,v2)::t2) when fn1 > fn2 -> (fn2,v2) :: (loop ((fn1,v1)::t1,t2))

      (* all remaining case are fn1 = fn2 *)
      | ((fn1,v1)::t1, (fn2,v2)::t2) (* when fn1 = fn2 *) -> (fn1,P_feature.unif_value v1 v2) :: (loop (t1,t2))
    in loop (fs1, fs2)      
pj2m's avatar
pj2m committed
324
end