libgrew_utils.ml 24.4 KB
Newer Older
bguillaum's avatar
bguillaum committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
open Log
bguillaum's avatar
bguillaum committed
12
open Printf
pj2m's avatar
pj2m committed
13

bguillaum's avatar
bguillaum committed
14
module StringSet = Set.Make (String)
pj2m's avatar
pj2m committed
15
16
17
module StringMap = Map.Make (String)

module IntSet = Set.Make (struct type t = int let compare = Pervasives.compare end)
18
19
module IntMap = Map.Make (struct type t = int let compare = Pervasives.compare end)

bguillaum's avatar
bguillaum committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
(* ================================================================================ *)
module Loc = struct
  type t = string * int

  let to_string (file,line) = sprintf "(file: %s, line: %d)" (Filename.basename file) line

  let opt_set_line line = function
    | None -> None
    | Some (file,_) -> Some (file, line)

  let opt_to_string = function
    | None -> ""
    | Some x -> to_string x
end (* module Loc *)

(* ================================================================================ *)
module Error = struct

  exception Build of (string * Loc.t option)
  exception Run of (string * Loc.t option)
  exception Bug of (string * Loc.t option)

bguillaum's avatar
Typos    
bguillaum committed
42
  let build_ ?loc message = raise (Build (message, loc))
bguillaum's avatar
bguillaum committed
43
44
45
46
47
48
49
50
  let build ?loc = Printf.ksprintf (build_ ?loc)

  let run_ ?loc message = raise (Run (message, loc))
  let run ?loc = Printf.ksprintf (run_ ?loc)

  let bug_ ?loc message = raise (Bug (message, loc))
  let bug ?loc = Printf.ksprintf (bug_ ?loc)
end (* module Error *)
51

bguillaum's avatar
bguillaum committed
52
53
54
55
56
(* ================================================================================ *)
module String_ = struct

  let to_float string =
    try float_of_string string
bguillaum's avatar
bguillaum committed
57
58
59
    with _ ->
      try float_of_string (Str.global_replace (Str.regexp "\\.") "," string)
      with _ -> Error.build "[String_.to_float] cannot convert '%s'" string
60

bguillaum's avatar
bguillaum committed
61
  let of_float float = Str.global_replace (Str.regexp ",") "." (sprintf "%g" float)
62

bguillaum's avatar
bguillaum committed
63
end (* module String_ *)
64

bguillaum's avatar
bguillaum committed
65
66
67
68
69
70
71
72
(* ================================================================================ *)
module Dot = struct
  let to_png_file dot output_file =
    let temp_file_name,out_ch = Filename.open_temp_file ~mode:[Open_rdonly;Open_wronly;Open_text] "grewui_" ".dot" in
    fprintf out_ch "%s" dot;
    close_out out_ch;
    ignore(Sys.command(sprintf "dot -Tpng -o %s %s " output_file temp_file_name))
end (* module Dot *)
73

bguillaum's avatar
bguillaum committed
74
75
76
77
78
79
80
81

(* ================================================================================ *)
module File = struct
  let write data name =
    let out_ch = open_out name in
    fprintf out_ch "%s\n" data;
    close_out out_ch

bguillaum's avatar
bguillaum committed
82
  let read file =
bguillaum's avatar
bguillaum committed
83
    let in_ch = open_in file in
bguillaum's avatar
bguillaum committed
84
85
86
    (* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
    (match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);

bguillaum's avatar
bguillaum committed
87
88
89
90
91
92
93
94
    let rev_lines = ref [] in
    try
      while true do
        let line = input_line in_ch in
        if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
        then ()
        else rev_lines := line :: !rev_lines
      done; assert false
bguillaum's avatar
bguillaum committed
95
    with End_of_file ->
bguillaum's avatar
bguillaum committed
96
97
      close_in in_ch;
      List.rev !rev_lines
bguillaum's avatar
bguillaum committed
98

bguillaum's avatar
bguillaum committed
99
100
  (* [read_ln file] returns a list of couples (line_num, line). Blank lines and lines starting with '%' are ignored. *)
  let read_ln file =
bguillaum's avatar
bguillaum committed
101
    let in_ch = open_in file in
bguillaum's avatar
bguillaum committed
102
103
104
    (* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
    (match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);

bguillaum's avatar
bguillaum committed
105
106
107
108
109
110
111
112
113
114
    let cpt = ref 0 in
    let rev_lines = ref [] in
    try
      while true do
        let line = input_line in_ch in
        incr cpt;
        if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
        then ()
        else rev_lines := (!cpt, line) :: !rev_lines
      done; assert false
bguillaum's avatar
bguillaum committed
115
    with End_of_file ->
bguillaum's avatar
bguillaum committed
116
117
      close_in in_ch;
      List.rev !rev_lines
bguillaum's avatar
bguillaum committed
118
119
120
 end (* module File *)

(* ================================================================================ *)
121
module Pid = struct
bguillaum's avatar
bguillaum committed
122
123
124
  (* type t = int *)
  type t = Pos of int | Neg of int

125
  let compare = Pervasives.compare
bguillaum's avatar
bguillaum committed
126
127
128
129
130
131
132
133

  let to_id = function
    | Pos i -> sprintf "p_%d" i
    | Neg i -> sprintf "n_%d" i

  let to_string = function
    | Pos i -> sprintf "Pos %d" i
    | Neg i -> sprintf "Neg %d" i
bguillaum's avatar
bguillaum committed
134
135
136
end (* module Pid *)

(* ================================================================================ *)
137
module Pid_map =
bguillaum's avatar
bguillaum committed
138
  struct
139
    include Map.Make (Pid)
140

pj2m's avatar
pj2m committed
141
    exception True
bguillaum's avatar
bguillaum committed
142

pj2m's avatar
pj2m committed
143
144
    let exists fct map =
      try
bguillaum's avatar
bguillaum committed
145
146
147
        iter
          (fun key value ->
            if fct key value
148
149
150
            then raise True
          ) map;
        false
pj2m's avatar
pj2m committed
151
152
      with True -> true

bguillaum's avatar
bguillaum committed
153
154
    (* let range key_set m =  *)
    (*   IntSet.fold (fun k s -> (IntSet.add (find k m) s)) key_set IntSet.empty *)
155

bguillaum's avatar
bguillaum committed
156
157
    (* let keys m =  *)
    (*   fold (fun k v s -> (IntSet.add k s)) m IntSet.empty *)
158

bguillaum's avatar
bguillaum committed
159
    (* union of two maps*)
pj2m's avatar
pj2m committed
160
    let union_map m m' = fold (fun k v m'' -> (add k v m'')) m m'
161

bguillaum's avatar
bguillaum committed
162
  end (* module Pid_map *)
pj2m's avatar
pj2m committed
163

bguillaum's avatar
bguillaum committed
164
165
166
(* ================================================================================ *)
module Pid_set = Set.Make (Pid)

bguillaum's avatar
bguillaum committed
167
168
(* ================================================================================ *)
module Gid = struct
169
170
  type t =
    | Old of int
171
    | New of (int * int) (* identifier for "created nodes" *)
172

bguillaum's avatar
bguillaum committed
173
  (* a compare function which ensures that new nodes are at the "end" of the graph *)
174
175
176
177
178
  let compare t1 t2 = match (t1,t2) with
    | Old _ , New _ -> -1
    | New _, Old _ -> 1
    | Old o1, Old o2 -> Pervasives.compare o1 o2
    | New n1, New n2 -> Pervasives.compare n1 n2
179
180
181
182

  let to_string = function
    | Old i -> sprintf "%d" i
    | New (i,j) -> sprintf"%d__%d" i j
bguillaum's avatar
bguillaum committed
183
end (* module Gid *)
pj2m's avatar
pj2m committed
184

bguillaum's avatar
bguillaum committed
185
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
186
module Gid_map = Map.Make (Gid)
pj2m's avatar
pj2m committed
187

bguillaum's avatar
bguillaum committed
188
(* ================================================================================ *)
pj2m's avatar
pj2m committed
189
190
191
module Array_ = struct
  let dicho_mem elt array =
    let rec loop low high =
bguillaum's avatar
bguillaum committed
192
      (if low > high
pj2m's avatar
pj2m committed
193
194
      then false
      else
195
196
197
198
        match (low+high)/2 with
        | middle when array.(middle) = elt -> true
        | middle when array.(middle) < elt -> loop (middle+1) high
        | middle -> loop low (middle - 1)
bguillaum's avatar
bguillaum committed
199
      ) in
pj2m's avatar
pj2m committed
200
201
202
203
204
205
206
207
208
    loop 0 ((Array.length array) - 1)

  (* dichotomic search in a sorted array *)
  let dicho_find elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when array.(middle) = elt -> middle
      | middle when array.(middle) < elt -> loop (middle+1) high
bguillaum's avatar
bguillaum committed
209
      | middle -> loop low (middle - 1) in
pj2m's avatar
pj2m committed
210
    loop 0 ((Array.length array) - 1)
bguillaum's avatar
bguillaum committed
211

pj2m's avatar
pj2m committed
212
213
214
215
216
217
  let dicho_find_assoc elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when fst array.(middle) = elt -> middle
      | middle when fst array.(middle) < elt -> loop (middle+1) high
bguillaum's avatar
bguillaum committed
218
      | middle -> loop low (middle - 1) in
pj2m's avatar
pj2m committed
219
    loop 0 ((Array.length array) - 1)
bguillaum's avatar
bguillaum committed
220
end (* module Array_ *)
pj2m's avatar
pj2m committed
221

bguillaum's avatar
bguillaum committed
222
(* ================================================================================ *)
pj2m's avatar
pj2m committed
223
module List_ = struct
bguillaum's avatar
bguillaum committed
224
225
226
227
228
  let rec set position elt = function
    | [] -> failwith "List_.set"
    | _::t when position = 0 -> elt::t
    | x::t -> x:: (set (position-1) elt t)

pj2m's avatar
pj2m committed
229
230
231
232
233
  let rec rm elt = function
    | [] -> raise Not_found
    | x::t when x=elt -> t
    | x::t -> x::(rm elt t)

bguillaum's avatar
bguillaum committed
234
  let pos x l =
bguillaum's avatar
bguillaum committed
235
236
237
238
239
240
    let rec loop i = function
    | [] -> None
    | h::t when h=x -> Some i
    | _::t -> loop (i+1) t in
    loop 0 l

pj2m's avatar
pj2m committed
241
242
243
244
245
246
247
  let rec opt = function
    | [] -> []
    | None :: t -> opt t
    | Some x :: t -> x :: (opt t)

  let rec opt_map f = function
    | [] -> []
bguillaum's avatar
bguillaum committed
248
    | x::t ->
249
250
251
        match f x with
        | None -> opt_map f t
        | Some r -> r :: (opt_map f t)
pj2m's avatar
pj2m committed
252
253
254
255
256

  let rec flat_map f = function
    | [] -> []
    | x::t -> (f x)@(flat_map f t)

bguillaum's avatar
bguillaum committed
257
258
  let iteri fct =
    let rec loop i = function
pj2m's avatar
pj2m committed
259
260
261
      | [] -> ()
      | h::t -> (fct i h); (loop (i+1) t) in
    loop 0
bguillaum's avatar
bguillaum committed
262
263
264

  let mapi fct =
    let rec loop i = function
pj2m's avatar
pj2m committed
265
266
267
268
      | [] -> []
      | h::t -> let head = fct i h in head :: (loop (i+1) t)
    in loop 0

bguillaum's avatar
bguillaum committed
269
270
  let opt_mapi fct =
    let rec loop i = function
271
272
273
274
275
276
277
      | [] -> []
      | h::t ->
        match fct i h with
          | None -> loop (i+1) t
          | Some res -> res :: (loop (i+1) t)
    in loop 0

pj2m's avatar
pj2m committed
278
  let foldi_left f init l =
bguillaum's avatar
bguillaum committed
279
280
    fst
      (List.fold_left
281
282
         (fun (acc,i) elt -> (f i acc elt, i+1))
         (init,0) l
pj2m's avatar
pj2m committed
283
284
285
286
287
288
      )

  let rec remove elt = function
    | [] -> raise Not_found
    | a::tail when a = elt -> tail
    | a::tail -> a::(remove elt tail)
289

pj2m's avatar
pj2m committed
290
291
292
  let to_string string_of_item sep = function
    | [] -> ""
    | h::t -> List.fold_left (fun acc elt -> acc ^ sep ^ (string_of_item elt)) (string_of_item h) t
293

pj2m's avatar
pj2m committed
294
295
  let rec sort_insert elt = function
    | [] -> [elt]
bguillaum's avatar
bguillaum committed
296
    | h::t when elt<h -> elt::h::t
pj2m's avatar
pj2m committed
297
298
299
300
301
302
    | h::t -> h::(sort_insert elt t)

  let rec sort_mem elt = function
    | [] -> false
    | h::_ when elt<h -> false
    | h::_ when elt=h -> true
303
    | h::t (* when elt>h *) -> sort_mem elt t
304
305
306
307
308

  let rec sort_assoc key = function
    | [] -> None
    | (k,_)::_ when key<k -> None
    | (k,_)::t when key>k -> sort_assoc key t
bguillaum's avatar
bguillaum committed
309
    | (_,v)::_ -> Some v
310
311
312
313
314
315
316

  let rec sort_remove_assoc key = function
    | [] -> []
    | (k,_)::_ as t when key<k -> t
    | (k,v)::t when key>k -> (k,v) :: (sort_remove_assoc key t)
    | (_,v)::t -> t

pj2m's avatar
pj2m committed
317
318
  exception Usort

bguillaum's avatar
bguillaum committed
319
  let rec usort_remove key = function
pj2m's avatar
pj2m committed
320
321
322
323
324
325
326
327
328
329
330
331
    | [] -> raise Not_found
    | x::t when key < x -> raise Not_found
    | x::t when key = x -> t
    | x::t -> x::(usort_remove key t)

  let usort_insert ?(compare=Pervasives.compare) elt l =
    let rec loop = function
      | [] -> [elt]
      | x::t when compare elt x < 0 -> elt :: x :: t
      | x::t when compare elt x > 0 -> x :: (loop t)
    | _ -> raise Usort in
    try Some (loop l) with Usort -> None
332

bguillaum's avatar
bguillaum committed
333
  let rec sort_disjoint l1 l2 =
pj2m's avatar
pj2m committed
334
335
336
337
    match (l1,l2) with
    | [], _ | _, [] -> true
    | h1::t1 , h2::t2 when h1<h2 -> sort_disjoint t1 l2
    | h1::t1 , h2::t2 when h1>h2 -> sort_disjoint l1 t2
bguillaum's avatar
bguillaum committed
338
    | _ -> false
339

bguillaum's avatar
bguillaum committed
340
  let sort_is_empty_inter l1 l2 =
pj2m's avatar
pj2m committed
341
342
343
344
345
    let rec loop = function
      | [], _ | _, [] -> true
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> false in
bguillaum's avatar
bguillaum committed
346
    loop (l1,l2)
pj2m's avatar
pj2m committed
347

bguillaum's avatar
bguillaum committed
348
  let sort_inter l1 l2 =
pj2m's avatar
pj2m committed
349
350
351
352
353
354
    let rec loop = function
      | [], _ | _, [] -> []
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> x1 :: loop (t1, t2) in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
355
356

  let sort_union l1 l2 =
bguillaum's avatar
bguillaum committed
357
358
359
360
361
362
363
364
    let rec loop = function
      | [], l | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> x2 :: loop (x1::t1, t2)
      | x1::t1, x2::t2 -> x1 :: loop (t1, t2) in
    loop (l1,l2)


pj2m's avatar
pj2m committed
365
  exception Not_disjoint
bguillaum's avatar
bguillaum committed
366
  let sort_disjoint_union ?(compare=Pervasives.compare) l1 l2 =
pj2m's avatar
pj2m committed
367
368
369
370
371
372
    let rec loop = function
      | [], l | l, [] -> l
      | x1::t1, x2::t2 when (compare x1 x2) < 0 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when (compare x1  x2) > 0 -> x2 :: loop (x1::t1, t2)
      | _ -> raise Not_disjoint in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
373
374

  let sort_include l1 l2 =
pj2m's avatar
pj2m committed
375
376
377
378
379
380
381
    let rec loop = function
      | [], l -> true
      | l, [] -> false
      | x1::t1, x2::t2 when x1 < x2 -> false
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
382
383

  let sort_included_diff l1 l2 =
pj2m's avatar
pj2m committed
384
385
386
387
388
389
390
391
    let rec loop = function
      | [], l -> failwith "[sort_included_diff] not included"
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> failwith "[sort_included_diff] not included"
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

bguillaum's avatar
bguillaum committed
392
  let sort_diff l1 l2 =
pj2m's avatar
pj2m committed
393
394
395
396
397
398
399
400
    let rec loop = function
      | [], l -> []
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

401
  let foldi_left f init l =
bguillaum's avatar
bguillaum committed
402
403
    fst
      (List.fold_left
404
405
         (fun (acc,i) elt -> (f i acc elt, i+1))
         (init,0) l
406
      )
bguillaum's avatar
bguillaum committed
407
408
409
410
411
412
413
414

  let prev_next_iter fct list =
    let int_fct prev next elt = fct ?prev ?next elt in
    let rec loop prev = function
      | [] -> ()
      | [last] -> int_fct prev None last
      | head::snd::tail -> int_fct prev (Some snd) head; loop (Some head) (snd::tail)
    in loop None list
bguillaum's avatar
bguillaum committed
415
end (* module List_ *)
pj2m's avatar
pj2m committed
416

bguillaum's avatar
bguillaum committed
417
(* ================================================================================ *)
418
419
420
421
module type OrderedType =
  sig
    type t
    val compare: t -> t -> int
bguillaum's avatar
bguillaum committed
422
  end (* module type OrderedType *)
423

bguillaum's avatar
bguillaum committed
424
(* ================================================================================ *)
425
426
427
428
429
430
431
432
module type S =
  sig
    type key

    type +'a t

    val empty: 'a t

bguillaum's avatar
bguillaum committed
433
    (* an empty list returned if the key is undefined *)
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
    val assoc: key -> 'a t -> 'a list

    val is_empty: 'a t -> bool

    val to_string: ('a -> string) -> 'a t -> string

    val iter: (key -> 'a -> unit) -> 'a t -> unit

    val add: key -> 'a -> 'a t -> 'a t option

    val fold: ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b

    (* raise Not_found if no (key,elt) *)
    val remove: key -> 'a -> 'a t -> 'a t

    (* raise Not_found if no (key,elt) *)
    val remove_key: key -> 'a t -> 'a t

    (* [mem key value t ] test if the couple (key, value) is in the massoc [t]. *)
    val mem: key -> 'a -> 'a t -> bool

    (* mem_key key t] tests is [key] is associated to at least one value in [t]. *)
    val mem_key: key -> 'a t -> bool

    exception Not_disjoint
bguillaum's avatar
bguillaum committed
459
    val disjoint_union: 'a t -> 'a t -> 'a t
460
461
462
463
464

    exception Duplicate
    val merge_key: key -> key -> 'a t -> 'a t

    val exists: (key -> 'a -> bool) -> 'a t -> bool
465
466

    val rename: (key * key) list -> 'a t -> 'a t
bguillaum's avatar
bguillaum committed
467
  end (* module type S *)
468

bguillaum's avatar
bguillaum committed
469
(* ================================================================================ *)
470
471
472
473
474
475
476
477
478
479
480
module Massoc_make (Ord: OrderedType) = struct
  module M = Map.Make (Ord)

  type key = Ord.t

  type 'a t = ('a list) M.t

  let empty = M.empty

  let is_empty t = (t=empty)

bguillaum's avatar
bguillaum committed
481
482
  let assoc key t =
    try M.find key t
483
484
    with Not_found -> []

bguillaum's avatar
bguillaum committed
485
  let to_string _ _ = failwith "Not implemted"
486
487

  let iter fct t =
bguillaum's avatar
bguillaum committed
488
    M.iter
489
490
491
      (fun key list -> List.iter (fun elt -> fct key elt) list
      ) t

bguillaum's avatar
bguillaum committed
492
  let add key elt t =
493
494
    try
      let list = M.find key t in
bguillaum's avatar
bguillaum committed
495
      match List_.usort_insert elt list with
496
497
498
499
500
501
502
        | Some l -> Some (M.add key l t)
        | None -> None
    with Not_found -> Some (M.add key [elt] t)

  let fold fct init t =
    M.fold
      (fun key list acc ->
bguillaum's avatar
bguillaum committed
503
        List.fold_left
504
505
506
507
          (fun acc2 elt ->
            fct acc2 key elt)
          acc list)
      t init
bguillaum's avatar
bguillaum committed
508

509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
  (* Not found raised in the value is not defined *)
  let remove key value t =
    match M.find key t with
      | [one] when one=value -> M.remove key t
      | old -> M.add key (List_.usort_remove value old) t

  let rec remove_key key t = M.remove key t

  let rec mem key value t =
    try List_.sort_mem value (M.find key t)
    with Not_found -> false

  let rec mem_key key t = M.mem key t

  exception Not_disjoint

  let disjoint_union t1 t2 =
bguillaum's avatar
bguillaum committed
526
    M.fold
527
      (fun key list acc ->
bguillaum's avatar
bguillaum committed
528
        try
529
530
531
532
533
534
535
536
          let old = M.find key acc in
          M.add key (List_.sort_disjoint_union list old) acc
        with
          | Not_found -> M.add key list acc
          | List_.Not_disjoint -> raise Not_disjoint
      ) t1 t2

  exception Duplicate
bguillaum's avatar
bguillaum committed
537

538
539
540
541
542
  let merge_key i j t =
    try
      let old_i = M.find i t in
      let old_j = try M.find j t with Not_found -> [] in
      M.add j (List_.sort_disjoint_union old_i old_j) (M.remove i t)
bguillaum's avatar
bguillaum committed
543
    with
544
545
546
      | Not_found -> (* no key i *) t
      | List_.Not_disjoint -> raise Duplicate

bguillaum's avatar
bguillaum committed
547
548
(* New implementation of exists but exists fct not implemented in ocaml < 3.12 *)
(*
549
550
551
552
553
  let exists fct t =
    M.exists
      (fun key list ->
        List.exists (fun elt -> fct key elt) list
      ) t
bguillaum's avatar
bguillaum committed
554
555
556
557
558
559
560
561
562
563
564
565
566
*)

  exception True
  let exists fct t =
    try
      M.iter
        (fun key list ->
          if List.exists (fun elt -> fct key elt) list
          then raise True
        ) t;
      false
    with True -> true

567
568
569
570
571
572
573
  let rename mapping t =
    M.fold
      (fun key value acc ->
        let new_key = try List.assoc key mapping with Not_found -> key in
        M.add new_key value acc
      ) t M.empty

574
575
end (* module Massoc_make *)

bguillaum's avatar
bguillaum committed
576
(* ================================================================================ *)
577
module Massoc_gid = Massoc_make (Gid)
pj2m's avatar
pj2m committed
578

bguillaum's avatar
bguillaum committed
579
580
581
(* ================================================================================ *)
module Massoc_pid = Massoc_make (Pid)

pj2m's avatar
pj2m committed
582

bguillaum's avatar
bguillaum committed
583
(* ================================================================================ *)
pj2m's avatar
pj2m committed
584
585
586
587
588
589
590
591
592
593
594
module Id = struct
  type name = string

  type t = int

  type table = name array

  let build ?(loc:Loc.t option) string table =
    try Array_.dicho_find string table
    with Not_found -> Error.build ?loc "Identifier '%s' not found" string

bguillaum's avatar
bguillaum committed
595
  let build_opt string table =
pj2m's avatar
pj2m committed
596
597
    try Some (Array_.dicho_find string table)
    with Not_found -> None
bguillaum's avatar
bguillaum committed
598
end (* module Id *)
599

bguillaum's avatar
bguillaum committed
600
(* ================================================================================ *)
601
module Html = struct
bguillaum's avatar
bguillaum committed
602
603
  let css = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">"

604
605
606
607
608
609
610
611
612
613
614
  let enter out_ch ?title ?header base_name =
    fprintf out_ch "<html>\n";
    (match title with
    | Some t -> fprintf out_ch "<head>\n%s\n<title>%s</title>\n</head>\n" css t
    | None -> fprintf out_ch "<head>\n%s\n</head>\n" css
    );
    fprintf out_ch "<body>\n";

    (match header with None -> () | Some s -> fprintf out_ch "%s\n" s);

    (match title with
bguillaum's avatar
bguillaum committed
615
    | Some t -> fprintf out_ch "<h1>%s</h1>\n" t
616
617
    | None -> ()
    )
bguillaum's avatar
bguillaum committed
618
  let leave out_ch =
619
620
    fprintf out_ch "</body>\n";
    fprintf out_ch "</html>\n";
bguillaum's avatar
bguillaum committed
621
end  (* module Html *)
622

bguillaum's avatar
bguillaum committed
623
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
624
625
module Conll = struct
  type line = {
bguillaum's avatar
bguillaum committed
626
      line_num: int;
bguillaum's avatar
bguillaum committed
627
      num: string;
bguillaum's avatar
bguillaum committed
628
629
630
631
632
      phon: string;
      lemma: string;
      pos1: string;
      pos2: string;
      morph: (string * string) list;
bguillaum's avatar
bguillaum committed
633
      deps: (string * string ) list;
bguillaum's avatar
bguillaum committed
634
    }
bguillaum's avatar
bguillaum committed
635

bguillaum's avatar
bguillaum committed
636
  let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
bguillaum's avatar
bguillaum committed
637

638
639
640
641
642
643
644
645
  let line_to_string l =
    let (gov_list, lab_list) = List.split l.deps in
    sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
      l.num l.phon l.lemma l.pos1 l.pos2
      (match l.morph with [] -> "_" | list -> String.concat "|" (List.map (fun (f,v) -> sprintf "%s=%s" f v) list))
      (String.concat "|" (gov_list))
      (String.concat "|" (lab_list))

bguillaum's avatar
bguillaum committed
646
647
648
649
650
651
652
653
654
655
656
657
658
  let parse_morph file_name line_num = function
    | "_" -> []
    | morph ->
      List.map
        (fun feat ->
          match Str.split (Str.regexp "=") feat with
            | [feat_name] -> (feat_name, "true")
            | [feat_name; feat_value] -> (feat_name, feat_value)
            | _ -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
        ) (Str.split (Str.regexp "|") morph)

  let underscore s = if s = "" then "_" else s
  let parse_line file_name (line_num, line) =
bguillaum's avatar
bguillaum committed
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
    match Str.split (Str.regexp "\t") line with
      | [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
        begin
          try
            let gov_list = if govs = "_" then [] else Str.split (Str.regexp "|") govs
            and lab_list = if dep_labs = "_" then [] else Str.split (Str.regexp "|") dep_labs in
            let deps = List.combine gov_list lab_list in
            {line_num = line_num;
             num = num;
             phon = underscore phon;
             lemma = underscore lemma;
             pos1 = underscore pos1;
             pos2 = underscore pos2;
             morph = parse_morph file_name line_num morph;
             deps = deps;
            }
          with exc -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, exc=%s\n>>>>>%s<<<<<<" (Printexc.to_string exc) line
        end
      | l -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line
bguillaum's avatar
bguillaum committed
678
679
680
681

  let load file_name =
    let lines = File.read_ln file_name in
    List.map (parse_line file_name) lines
bguillaum's avatar
bguillaum committed
682

bguillaum's avatar
bguillaum committed
683
  let parse file_name lines = List.map (parse_line file_name) lines
684
685
686

    (* We would prefer to compare the float equivalent of l1.num l2.num but this would break the dicho_find function *)
  let compare l1 l2 = Pervasives.compare ((* float_of_string *) l1.num) ((* float_of_string *) l2.num)
bguillaum's avatar
bguillaum committed
687
end (* module Conll *)
688

bguillaum's avatar
bguillaum committed
689
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
690
(* This module defines a type for lexical parameter (i.e. one line in a lexical file) *)
691
692
693
694
695
696
module Lex_par = struct

  type item = string list * string list (* first list: pattern parameters $id , second list command parameters @id *)

  type t = item list

697
698
699
  let empty=[]
  let append = List.append

700
701
702
703
704
705
706
707
  let dump t =
    printf "[Lex_par.dump] --> size = %d\n" (List.length t);
    List.iter (fun (pp,cp) ->
      printf "%s##%s\n"
        (String.concat "#" pp)
        (String.concat "#" cp)
    ) t

bguillaum's avatar
bguillaum committed
708
  let rm_peripheral_white s =
709
710
711
    Str.global_replace (Str.regexp "\\( \\|\t\\)*$") ""
    (Str.global_replace (Str.regexp "^\\( \\|\t\\)*") "" s)

bguillaum's avatar
bguillaum committed
712
  let parse_line ?loc nb_p nb_c line =
713
    let line = rm_peripheral_white line in
714
715
716
717
718
719
720
721
722
723
724
725
726
    if line = "" || line.[0] = '%'
    then None
    else
      match Str.split (Str.regexp "##") line with
        | [args] when nb_c = 0 ->
          (match Str.split (Str.regexp "#") args with
            | l when List.length l = nb_p -> Some (l,[])
            | _ -> Error.bug ?loc
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args"
              line nb_p)
        | [args; values] ->
          (match (Str.split (Str.regexp "#") args, Str.split (Str.regexp "#") values) with
            | (lp,lc) when List.length lp = nb_p && List.length lc = nb_c -> Some (lp,lc)
bguillaum's avatar
bguillaum committed
727
            | _ -> Error.bug ?loc
728
729
730
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args and %d values"
              line nb_p nb_c)
        | _ -> Error.bug ?loc "Illegal param line: '%s'" line
bguillaum's avatar
bguillaum committed
731

732
  let from_lines ?loc nb_p nb_c lines = List_.opt_map (parse_line ?loc nb_p nb_c) lines
733

734
  let load ?loc dir nb_p nb_c file =
735
    try
bguillaum's avatar
bguillaum committed
736
      let full_file =
737
738
739
740
        if Filename.is_relative file
        then Filename.concat dir file
        else file in
      let lines = File.read full_file in
741
      List_.opt_mapi (fun i line -> parse_line ~loc:(full_file,i) nb_p nb_c line) lines
742
743
    with Sys_error _ -> Error.build ?loc "External lexical file '%s' not found" file

bguillaum's avatar
bguillaum committed
744
745
746
  let sub x y = List.mem x (Str.split (Str.regexp "|") y)

  let filter index atom t =
bguillaum's avatar
bguillaum committed
747
    match
bguillaum's avatar
bguillaum committed
748
749
750
      List_.opt_map
        (fun (p_par, c_par) ->
          let par = List.nth p_par index in
bguillaum's avatar
bguillaum committed
751
          if atom=par
bguillaum's avatar
bguillaum committed
752
753
754
755
756
757
          then Some (p_par, c_par)
          else
            if sub atom par (* atom is one of the values of the disjunction par *)
            then Some (List_.set index atom p_par, c_par)
            else None
        ) t
bguillaum's avatar
bguillaum committed
758
    with
759
    | [] -> None
bguillaum's avatar
bguillaum committed
760
    | t -> Some t
bguillaum's avatar
bguillaum committed
761
762
763
764
765

  let get_param_value index = function
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
    | (params,_)::_ -> List.nth params index

766
767
768
  let get_command_value index = function
    | [(_,one)] -> List.nth one index
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
bguillaum's avatar
bguillaum committed
769
770
771
772
773
774
    | (_,[sing])::tail when index=0 ->
        Printf.sprintf "%s/%s"
          sing
          (List_.to_string
             (function
               | (_,[s]) -> s
bguillaum's avatar
bguillaum committed
775
776
777
               | _ -> Error.bug "[Lex_par.get_command_value] inconsistent param"
             ) "/" tail
          )
778
    | l -> Error.run "Lexical parameter are not functionnal"
779

bguillaum's avatar
bguillaum committed
780
end (* module Lex_par *)
781

bguillaum's avatar
bguillaum committed
782
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
783
784
785
(* copy from leopar *)
module Timeout = struct
  exception Stop
786

bguillaum's avatar
bguillaum committed
787
788
  let counter = ref 0.
  let timeout = ref None
bguillaum's avatar
bguillaum committed
789

bguillaum's avatar
bguillaum committed
790
791
792
  let start () = counter := Unix.time ()

  let check () =
bguillaum's avatar
bguillaum committed
793
    match !timeout with
bguillaum's avatar
bguillaum committed
794
795
796
797
    | None -> ()
    | Some delay ->
        if Unix.time () -. !counter > delay
        then raise Stop
bguillaum's avatar
bguillaum committed
798
end (* module Timeout *)