libgrew_utils.ml 24.3 KB
Newer Older
bguillaum's avatar
bguillaum committed
1 2 3 4 5 6 7 8 9 10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2013 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: http://grew.loria.fr                                               *)
(*    License: CeCILL (see LICENSE folder or "http://www.cecill.info")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

pj2m's avatar
pj2m committed
11
open Log
bguillaum's avatar
bguillaum committed
12
open Printf
pj2m's avatar
pj2m committed
13

bguillaum's avatar
bguillaum committed
14
module StringSet = Set.Make (String)
pj2m's avatar
pj2m committed
15 16 17
module StringMap = Map.Make (String)

module IntSet = Set.Make (struct type t = int let compare = Pervasives.compare end)
18 19
module IntMap = Map.Make (struct type t = int let compare = Pervasives.compare end)

bguillaum's avatar
bguillaum committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
(* ================================================================================ *)
module Loc = struct
  type t = string * int

  let to_string (file,line) = sprintf "(file: %s, line: %d)" (Filename.basename file) line

  let opt_set_line line = function
    | None -> None
    | Some (file,_) -> Some (file, line)

  let opt_to_string = function
    | None -> ""
    | Some x -> to_string x
end (* module Loc *)

(* ================================================================================ *)
module Error = struct

  exception Build of (string * Loc.t option)
  exception Run of (string * Loc.t option)
  exception Bug of (string * Loc.t option)

bguillaum's avatar
Typos  
bguillaum committed
42
  let build_ ?loc message = raise (Build (message, loc))
bguillaum's avatar
bguillaum committed
43 44 45 46 47 48 49 50
  let build ?loc = Printf.ksprintf (build_ ?loc)

  let run_ ?loc message = raise (Run (message, loc))
  let run ?loc = Printf.ksprintf (run_ ?loc)

  let bug_ ?loc message = raise (Bug (message, loc))
  let bug ?loc = Printf.ksprintf (bug_ ?loc)
end (* module Error *)
51

bguillaum's avatar
bguillaum committed
52 53 54 55 56
(* ================================================================================ *)
module String_ = struct

  let to_float string =
    try float_of_string string
bguillaum's avatar
bguillaum committed
57 58 59
    with _ ->
      try float_of_string (Str.global_replace (Str.regexp "\\.") "," string)
      with _ -> Error.build "[String_.to_float] cannot convert '%s'" string
60

bguillaum's avatar
bguillaum committed
61
  let of_float float = Str.global_replace (Str.regexp ",") "." (sprintf "%g" float)
62

bguillaum's avatar
bguillaum committed
63
end (* module String_ *)
64

bguillaum's avatar
bguillaum committed
65 66 67 68 69 70 71 72
(* ================================================================================ *)
module Dot = struct
  let to_png_file dot output_file =
    let temp_file_name,out_ch = Filename.open_temp_file ~mode:[Open_rdonly;Open_wronly;Open_text] "grewui_" ".dot" in
    fprintf out_ch "%s" dot;
    close_out out_ch;
    ignore(Sys.command(sprintf "dot -Tpng -o %s %s " output_file temp_file_name))
end (* module Dot *)
73

bguillaum's avatar
bguillaum committed
74 75 76 77 78 79 80 81

(* ================================================================================ *)
module File = struct
  let write data name =
    let out_ch = open_out name in
    fprintf out_ch "%s\n" data;
    close_out out_ch

bguillaum's avatar
bguillaum committed
82
  let read file =
bguillaum's avatar
bguillaum committed
83
    let in_ch = open_in file in
bguillaum's avatar
bguillaum committed
84 85 86
    (* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
    (match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);

bguillaum's avatar
bguillaum committed
87 88 89 90 91 92 93 94
    let rev_lines = ref [] in
    try
      while true do
        let line = input_line in_ch in
        if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
        then ()
        else rev_lines := line :: !rev_lines
      done; assert false
bguillaum's avatar
bguillaum committed
95
    with End_of_file ->
bguillaum's avatar
bguillaum committed
96 97
      close_in in_ch;
      List.rev !rev_lines
bguillaum's avatar
bguillaum committed
98

bguillaum's avatar
bguillaum committed
99 100
  (* [read_ln file] returns a list of couples (line_num, line). Blank lines and lines starting with '%' are ignored. *)
  let read_ln file =
bguillaum's avatar
bguillaum committed
101
    let in_ch = open_in file in
bguillaum's avatar
bguillaum committed
102 103 104
    (* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
    (match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);

bguillaum's avatar
bguillaum committed
105 106 107 108 109 110 111 112 113 114
    let cpt = ref 0 in
    let rev_lines = ref [] in
    try
      while true do
        let line = input_line in_ch in
        incr cpt;
        if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
        then ()
        else rev_lines := (!cpt, line) :: !rev_lines
      done; assert false
bguillaum's avatar
bguillaum committed
115
    with End_of_file ->
bguillaum's avatar
bguillaum committed
116 117
      close_in in_ch;
      List.rev !rev_lines
bguillaum's avatar
bguillaum committed
118 119 120
 end (* module File *)

(* ================================================================================ *)
121
module Pid = struct
bguillaum's avatar
bguillaum committed
122 123 124
  (* type t = int *)
  type t = Pos of int | Neg of int

125
  let compare = Pervasives.compare
bguillaum's avatar
bguillaum committed
126 127 128 129 130 131 132 133

  let to_id = function
    | Pos i -> sprintf "p_%d" i
    | Neg i -> sprintf "n_%d" i

  let to_string = function
    | Pos i -> sprintf "Pos %d" i
    | Neg i -> sprintf "Neg %d" i
bguillaum's avatar
bguillaum committed
134 135 136
end (* module Pid *)

(* ================================================================================ *)
137
module Pid_map =
bguillaum's avatar
bguillaum committed
138
  struct
139
    include Map.Make (Pid)
140

pj2m's avatar
pj2m committed
141
    exception True
bguillaum's avatar
bguillaum committed
142

pj2m's avatar
pj2m committed
143 144
    let exists fct map =
      try
bguillaum's avatar
bguillaum committed
145 146 147
        iter
          (fun key value ->
            if fct key value
148 149 150
            then raise True
          ) map;
        false
pj2m's avatar
pj2m committed
151 152
      with True -> true

bguillaum's avatar
bguillaum committed
153 154
    (* let range key_set m =  *)
    (*   IntSet.fold (fun k s -> (IntSet.add (find k m) s)) key_set IntSet.empty *)
155

bguillaum's avatar
bguillaum committed
156 157
    (* let keys m =  *)
    (*   fold (fun k v s -> (IntSet.add k s)) m IntSet.empty *)
158

bguillaum's avatar
bguillaum committed
159
    (* union of two maps*)
pj2m's avatar
pj2m committed
160
    let union_map m m' = fold (fun k v m'' -> (add k v m'')) m m'
161

bguillaum's avatar
bguillaum committed
162
  end (* module Pid_map *)
pj2m's avatar
pj2m committed
163

bguillaum's avatar
bguillaum committed
164 165 166
(* ================================================================================ *)
module Pid_set = Set.Make (Pid)

bguillaum's avatar
bguillaum committed
167 168
(* ================================================================================ *)
module Gid = struct
169 170
  type t =
    | Old of int
171
    | New of (int * int) (* identifier for "created nodes" *)
172

bguillaum's avatar
bguillaum committed
173
  (* a compare function which ensures that new nodes are at the "end" of the graph *)
174 175 176 177 178
  let compare t1 t2 = match (t1,t2) with
    | Old _ , New _ -> -1
    | New _, Old _ -> 1
    | Old o1, Old o2 -> Pervasives.compare o1 o2
    | New n1, New n2 -> Pervasives.compare n1 n2
179 180 181 182

  let to_string = function
    | Old i -> sprintf "%d" i
    | New (i,j) -> sprintf"%d__%d" i j
bguillaum's avatar
bguillaum committed
183
end (* module Gid *)
pj2m's avatar
pj2m committed
184

bguillaum's avatar
bguillaum committed
185
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
186
module Gid_map = Map.Make (Gid)
pj2m's avatar
pj2m committed
187

bguillaum's avatar
bguillaum committed
188
(* ================================================================================ *)
pj2m's avatar
pj2m committed
189 190 191
module Array_ = struct
  let dicho_mem elt array =
    let rec loop low high =
bguillaum's avatar
bguillaum committed
192
      (if low > high
pj2m's avatar
pj2m committed
193 194
      then false
      else
195 196 197 198
        match (low+high)/2 with
        | middle when array.(middle) = elt -> true
        | middle when array.(middle) < elt -> loop (middle+1) high
        | middle -> loop low (middle - 1)
bguillaum's avatar
bguillaum committed
199
      ) in
pj2m's avatar
pj2m committed
200 201 202 203 204 205 206 207 208
    loop 0 ((Array.length array) - 1)

  (* dichotomic search in a sorted array *)
  let dicho_find elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when array.(middle) = elt -> middle
      | middle when array.(middle) < elt -> loop (middle+1) high
bguillaum's avatar
bguillaum committed
209
      | middle -> loop low (middle - 1) in
pj2m's avatar
pj2m committed
210
    loop 0 ((Array.length array) - 1)
bguillaum's avatar
bguillaum committed
211

pj2m's avatar
pj2m committed
212 213 214 215 216 217
  let dicho_find_assoc elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when fst array.(middle) = elt -> middle
      | middle when fst array.(middle) < elt -> loop (middle+1) high
bguillaum's avatar
bguillaum committed
218
      | middle -> loop low (middle - 1) in
pj2m's avatar
pj2m committed
219
    loop 0 ((Array.length array) - 1)
bguillaum's avatar
bguillaum committed
220
end (* module Array_ *)
pj2m's avatar
pj2m committed
221

bguillaum's avatar
bguillaum committed
222
(* ================================================================================ *)
pj2m's avatar
pj2m committed
223
module List_ = struct
bguillaum's avatar
bguillaum committed
224 225 226 227 228
  let rec set position elt = function
    | [] -> failwith "List_.set"
    | _::t when position = 0 -> elt::t
    | x::t -> x:: (set (position-1) elt t)

pj2m's avatar
pj2m committed
229 230 231 232 233
  let rec rm elt = function
    | [] -> raise Not_found
    | x::t when x=elt -> t
    | x::t -> x::(rm elt t)

bguillaum's avatar
bguillaum committed
234
  let pos x l =
bguillaum's avatar
bguillaum committed
235 236 237 238 239 240
    let rec loop i = function
    | [] -> None
    | h::t when h=x -> Some i
    | _::t -> loop (i+1) t in
    loop 0 l

pj2m's avatar
pj2m committed
241 242 243 244 245 246 247
  let rec opt = function
    | [] -> []
    | None :: t -> opt t
    | Some x :: t -> x :: (opt t)

  let rec opt_map f = function
    | [] -> []
bguillaum's avatar
bguillaum committed
248
    | x::t ->
249 250 251
        match f x with
        | None -> opt_map f t
        | Some r -> r :: (opt_map f t)
pj2m's avatar
pj2m committed
252 253 254 255 256

  let rec flat_map f = function
    | [] -> []
    | x::t -> (f x)@(flat_map f t)

bguillaum's avatar
bguillaum committed
257 258
  let iteri fct =
    let rec loop i = function
pj2m's avatar
pj2m committed
259 260 261
      | [] -> ()
      | h::t -> (fct i h); (loop (i+1) t) in
    loop 0
bguillaum's avatar
bguillaum committed
262 263 264

  let mapi fct =
    let rec loop i = function
pj2m's avatar
pj2m committed
265 266 267 268
      | [] -> []
      | h::t -> let head = fct i h in head :: (loop (i+1) t)
    in loop 0

bguillaum's avatar
bguillaum committed
269 270
  let opt_mapi fct =
    let rec loop i = function
271 272 273 274 275 276 277
      | [] -> []
      | h::t ->
        match fct i h with
          | None -> loop (i+1) t
          | Some res -> res :: (loop (i+1) t)
    in loop 0

pj2m's avatar
pj2m committed
278
  let foldi_left f init l =
bguillaum's avatar
bguillaum committed
279 280
    fst
      (List.fold_left
281 282
         (fun (acc,i) elt -> (f i acc elt, i+1))
         (init,0) l
pj2m's avatar
pj2m committed
283 284 285 286 287 288
      )

  let rec remove elt = function
    | [] -> raise Not_found
    | a::tail when a = elt -> tail
    | a::tail -> a::(remove elt tail)
289

pj2m's avatar
pj2m committed
290 291 292
  let to_string string_of_item sep = function
    | [] -> ""
    | h::t -> List.fold_left (fun acc elt -> acc ^ sep ^ (string_of_item elt)) (string_of_item h) t
293

pj2m's avatar
pj2m committed
294 295
  let rec sort_insert elt = function
    | [] -> [elt]
bguillaum's avatar
bguillaum committed
296
    | h::t when elt<h -> elt::h::t
pj2m's avatar
pj2m committed
297 298 299 300 301 302
    | h::t -> h::(sort_insert elt t)

  let rec sort_mem elt = function
    | [] -> false
    | h::_ when elt<h -> false
    | h::_ when elt=h -> true
303
    | h::t (* when elt>h *) -> sort_mem elt t
304 305 306 307 308

  let rec sort_assoc key = function
    | [] -> None
    | (k,_)::_ when key<k -> None
    | (k,_)::t when key>k -> sort_assoc key t
bguillaum's avatar
bguillaum committed
309
    | (_,v)::_ -> Some v
310 311 312 313 314 315 316

  let rec sort_remove_assoc key = function
    | [] -> []
    | (k,_)::_ as t when key<k -> t
    | (k,v)::t when key>k -> (k,v) :: (sort_remove_assoc key t)
    | (_,v)::t -> t

pj2m's avatar
pj2m committed
317 318
  exception Usort

bguillaum's avatar
bguillaum committed
319
  let rec usort_remove key = function
pj2m's avatar
pj2m committed
320 321 322 323 324 325 326 327 328 329 330 331
    | [] -> raise Not_found
    | x::t when key < x -> raise Not_found
    | x::t when key = x -> t
    | x::t -> x::(usort_remove key t)

  let usort_insert ?(compare=Pervasives.compare) elt l =
    let rec loop = function
      | [] -> [elt]
      | x::t when compare elt x < 0 -> elt :: x :: t
      | x::t when compare elt x > 0 -> x :: (loop t)
    | _ -> raise Usort in
    try Some (loop l) with Usort -> None
332

bguillaum's avatar
bguillaum committed
333
  let rec sort_disjoint l1 l2 =
pj2m's avatar
pj2m committed
334 335 336 337
    match (l1,l2) with
    | [], _ | _, [] -> true
    | h1::t1 , h2::t2 when h1<h2 -> sort_disjoint t1 l2
    | h1::t1 , h2::t2 when h1>h2 -> sort_disjoint l1 t2
bguillaum's avatar
bguillaum committed
338
    | _ -> false
339

bguillaum's avatar
bguillaum committed
340
  let sort_is_empty_inter l1 l2 =
pj2m's avatar
pj2m committed
341 342 343 344 345
    let rec loop = function
      | [], _ | _, [] -> true
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> false in
bguillaum's avatar
bguillaum committed
346
    loop (l1,l2)
pj2m's avatar
pj2m committed
347

bguillaum's avatar
bguillaum committed
348
  let sort_inter l1 l2 =
pj2m's avatar
pj2m committed
349 350 351 352 353 354
    let rec loop = function
      | [], _ | _, [] -> []
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> x1 :: loop (t1, t2) in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
355 356

  let sort_union l1 l2 =
bguillaum's avatar
bguillaum committed
357 358 359 360 361 362 363 364
    let rec loop = function
      | [], l | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> x2 :: loop (x1::t1, t2)
      | x1::t1, x2::t2 -> x1 :: loop (t1, t2) in
    loop (l1,l2)


pj2m's avatar
pj2m committed
365
  exception Not_disjoint
bguillaum's avatar
bguillaum committed
366
  let sort_disjoint_union ?(compare=Pervasives.compare) l1 l2 =
pj2m's avatar
pj2m committed
367 368 369 370 371 372
    let rec loop = function
      | [], l | l, [] -> l
      | x1::t1, x2::t2 when (compare x1 x2) < 0 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when (compare x1  x2) > 0 -> x2 :: loop (x1::t1, t2)
      | _ -> raise Not_disjoint in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
373 374

  let sort_include l1 l2 =
pj2m's avatar
pj2m committed
375 376 377 378 379 380 381
    let rec loop = function
      | [], l -> true
      | l, [] -> false
      | x1::t1, x2::t2 when x1 < x2 -> false
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)
bguillaum's avatar
bguillaum committed
382 383

  let sort_included_diff l1 l2 =
pj2m's avatar
pj2m committed
384 385 386 387 388 389 390 391
    let rec loop = function
      | [], l -> failwith "[sort_included_diff] not included"
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> failwith "[sort_included_diff] not included"
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

bguillaum's avatar
bguillaum committed
392
  let sort_diff l1 l2 =
pj2m's avatar
pj2m committed
393 394 395 396 397 398 399 400
    let rec loop = function
      | [], l -> []
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

401
  let foldi_left f init l =
bguillaum's avatar
bguillaum committed
402 403
    fst
      (List.fold_left
404 405
         (fun (acc,i) elt -> (f i acc elt, i+1))
         (init,0) l
406
      )
bguillaum's avatar
bguillaum committed
407 408 409 410 411 412 413 414

  let prev_next_iter fct list =
    let int_fct prev next elt = fct ?prev ?next elt in
    let rec loop prev = function
      | [] -> ()
      | [last] -> int_fct prev None last
      | head::snd::tail -> int_fct prev (Some snd) head; loop (Some head) (snd::tail)
    in loop None list
bguillaum's avatar
bguillaum committed
415
end (* module List_ *)
pj2m's avatar
pj2m committed
416

bguillaum's avatar
bguillaum committed
417
(* ================================================================================ *)
418 419 420 421
module type OrderedType =
  sig
    type t
    val compare: t -> t -> int
bguillaum's avatar
bguillaum committed
422
  end (* module type OrderedType *)
423

bguillaum's avatar
bguillaum committed
424
(* ================================================================================ *)
425 426 427 428 429 430 431 432
module type S =
  sig
    type key

    type +'a t

    val empty: 'a t

bguillaum's avatar
bguillaum committed
433
    (* an empty list returned if the key is undefined *)
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
    val assoc: key -> 'a t -> 'a list

    val is_empty: 'a t -> bool

    val to_string: ('a -> string) -> 'a t -> string

    val iter: (key -> 'a -> unit) -> 'a t -> unit

    val add: key -> 'a -> 'a t -> 'a t option

    val fold: ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b

    (* raise Not_found if no (key,elt) *)
    val remove: key -> 'a -> 'a t -> 'a t

    (* raise Not_found if no (key,elt) *)
    val remove_key: key -> 'a t -> 'a t

    (* [mem key value t ] test if the couple (key, value) is in the massoc [t]. *)
    val mem: key -> 'a -> 'a t -> bool

    (* mem_key key t] tests is [key] is associated to at least one value in [t]. *)
    val mem_key: key -> 'a t -> bool

    exception Not_disjoint
bguillaum's avatar
bguillaum committed
459
    val disjoint_union: 'a t -> 'a t -> 'a t
460 461 462 463 464

    exception Duplicate
    val merge_key: key -> key -> 'a t -> 'a t

    val exists: (key -> 'a -> bool) -> 'a t -> bool
465 466

    val rename: (key * key) list -> 'a t -> 'a t
bguillaum's avatar
bguillaum committed
467
  end (* module type S *)
468

bguillaum's avatar
bguillaum committed
469
(* ================================================================================ *)
470 471 472 473 474 475 476 477 478 479 480
module Massoc_make (Ord: OrderedType) = struct
  module M = Map.Make (Ord)

  type key = Ord.t

  type 'a t = ('a list) M.t

  let empty = M.empty

  let is_empty t = (t=empty)

bguillaum's avatar
bguillaum committed
481 482
  let assoc key t =
    try M.find key t
483 484
    with Not_found -> []

bguillaum's avatar
bguillaum committed
485
  let to_string _ _ = failwith "Not implemted"
486 487

  let iter fct t =
bguillaum's avatar
bguillaum committed
488
    M.iter
489 490 491
      (fun key list -> List.iter (fun elt -> fct key elt) list
      ) t

bguillaum's avatar
bguillaum committed
492
  let add key elt t =
493 494
    try
      let list = M.find key t in
bguillaum's avatar
bguillaum committed
495
      match List_.usort_insert elt list with
496 497 498 499 500 501 502
        | Some l -> Some (M.add key l t)
        | None -> None
    with Not_found -> Some (M.add key [elt] t)

  let fold fct init t =
    M.fold
      (fun key list acc ->
bguillaum's avatar
bguillaum committed
503
        List.fold_left
504 505 506 507
          (fun acc2 elt ->
            fct acc2 key elt)
          acc list)
      t init
bguillaum's avatar
bguillaum committed
508

509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
  (* Not found raised in the value is not defined *)
  let remove key value t =
    match M.find key t with
      | [one] when one=value -> M.remove key t
      | old -> M.add key (List_.usort_remove value old) t

  let rec remove_key key t = M.remove key t

  let rec mem key value t =
    try List_.sort_mem value (M.find key t)
    with Not_found -> false

  let rec mem_key key t = M.mem key t

  exception Not_disjoint

  let disjoint_union t1 t2 =
bguillaum's avatar
bguillaum committed
526
    M.fold
527
      (fun key list acc ->
bguillaum's avatar
bguillaum committed
528
        try
529 530 531 532 533 534 535 536
          let old = M.find key acc in
          M.add key (List_.sort_disjoint_union list old) acc
        with
          | Not_found -> M.add key list acc
          | List_.Not_disjoint -> raise Not_disjoint
      ) t1 t2

  exception Duplicate
bguillaum's avatar
bguillaum committed
537

538 539 540 541 542
  let merge_key i j t =
    try
      let old_i = M.find i t in
      let old_j = try M.find j t with Not_found -> [] in
      M.add j (List_.sort_disjoint_union old_i old_j) (M.remove i t)
bguillaum's avatar
bguillaum committed
543
    with
544 545 546
      | Not_found -> (* no key i *) t
      | List_.Not_disjoint -> raise Duplicate

bguillaum's avatar
bguillaum committed
547 548
(* New implementation of exists but exists fct not implemented in ocaml < 3.12 *)
(*
549 550 551 552 553
  let exists fct t =
    M.exists
      (fun key list ->
        List.exists (fun elt -> fct key elt) list
      ) t
bguillaum's avatar
bguillaum committed
554 555 556 557 558 559 560 561 562 563 564 565 566
*)

  exception True
  let exists fct t =
    try
      M.iter
        (fun key list ->
          if List.exists (fun elt -> fct key elt) list
          then raise True
        ) t;
      false
    with True -> true

567 568 569 570 571 572 573
  let rename mapping t =
    M.fold
      (fun key value acc ->
        let new_key = try List.assoc key mapping with Not_found -> key in
        M.add new_key value acc
      ) t M.empty

574 575
end (* module Massoc_make *)

bguillaum's avatar
bguillaum committed
576
(* ================================================================================ *)
577
module Massoc_gid = Massoc_make (Gid)
pj2m's avatar
pj2m committed
578

bguillaum's avatar
bguillaum committed
579 580 581
(* ================================================================================ *)
module Massoc_pid = Massoc_make (Pid)

pj2m's avatar
pj2m committed
582

bguillaum's avatar
bguillaum committed
583
(* ================================================================================ *)
pj2m's avatar
pj2m committed
584 585 586 587 588 589 590 591 592 593 594
module Id = struct
  type name = string

  type t = int

  type table = name array

  let build ?(loc:Loc.t option) string table =
    try Array_.dicho_find string table
    with Not_found -> Error.build ?loc "Identifier '%s' not found" string

bguillaum's avatar
bguillaum committed
595
  let build_opt string table =
pj2m's avatar
pj2m committed
596 597
    try Some (Array_.dicho_find string table)
    with Not_found -> None
bguillaum's avatar
bguillaum committed
598
end (* module Id *)
599

bguillaum's avatar
bguillaum committed
600
(* ================================================================================ *)
601
module Html = struct
bguillaum's avatar
bguillaum committed
602 603
  let css = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">"

604 605 606 607 608 609 610 611 612 613 614
  let enter out_ch ?title ?header base_name =
    fprintf out_ch "<html>\n";
    (match title with
    | Some t -> fprintf out_ch "<head>\n%s\n<title>%s</title>\n</head>\n" css t
    | None -> fprintf out_ch "<head>\n%s\n</head>\n" css
    );
    fprintf out_ch "<body>\n";

    (match header with None -> () | Some s -> fprintf out_ch "%s\n" s);

    (match title with
bguillaum's avatar
bguillaum committed
615
    | Some t -> fprintf out_ch "<h1>%s</h1>\n" t
616 617
    | None -> ()
    )
bguillaum's avatar
bguillaum committed
618
  let leave out_ch =
619 620
    fprintf out_ch "</body>\n";
    fprintf out_ch "</html>\n";
bguillaum's avatar
bguillaum committed
621
end  (* module Html *)
622

bguillaum's avatar
bguillaum committed
623
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
624 625
module Conll = struct
  type line = {
bguillaum's avatar
bguillaum committed
626
      line_num: int;
bguillaum's avatar
bguillaum committed
627
      num: string;
bguillaum's avatar
bguillaum committed
628 629 630 631 632
      phon: string;
      lemma: string;
      pos1: string;
      pos2: string;
      morph: (string * string) list;
bguillaum's avatar
bguillaum committed
633
      deps: (string * string ) list;
bguillaum's avatar
bguillaum committed
634
    }
bguillaum's avatar
bguillaum committed
635

bguillaum's avatar
bguillaum committed
636
  let root = { line_num = -1; num="0"; phon="ROOT"; lemma="__"; pos1="_X"; pos2=""; morph=[]; deps=[] }
bguillaum's avatar
bguillaum committed
637

638 639 640 641 642 643 644 645
  let line_to_string l =
    let (gov_list, lab_list) = List.split l.deps in
    sprintf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
      l.num l.phon l.lemma l.pos1 l.pos2
      (match l.morph with [] -> "_" | list -> String.concat "|" (List.map (fun (f,v) -> sprintf "%s=%s" f v) list))
      (String.concat "|" (gov_list))
      (String.concat "|" (lab_list))

bguillaum's avatar
bguillaum committed
646 647 648 649 650 651 652 653 654 655 656 657 658 659
  let parse_morph file_name line_num = function
    | "_" -> []
    | morph ->
      List.map
        (fun feat ->
          match Str.split (Str.regexp "=") feat with
            | [feat_name] -> (feat_name, "true")
            | [feat_name; feat_value] -> (feat_name, feat_value)
            | _ -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal morphology \n>>>>>%s<<<<<<" morph
        ) (Str.split (Str.regexp "|") morph)

  let underscore s = if s = "" then "_" else s
  let parse_line file_name (line_num, line) =
    try
bguillaum's avatar
bguillaum committed
660
      match Str.split (Str.regexp "\t") line with
bguillaum's avatar
bguillaum committed
661 662 663 664
        | [ num; phon; lemma; pos1; pos2; morph; govs; dep_labs; _; _ ] ->
          let gov_list = if govs = "_" then [] else Str.split (Str.regexp "|") govs
          and lab_list = if dep_labs = "_" then [] else Str.split (Str.regexp "|") dep_labs in
          let deps = List.combine gov_list lab_list in
bguillaum's avatar
bguillaum committed
665
          {line_num = line_num;
bguillaum's avatar
bguillaum committed
666
           num = num;
bguillaum's avatar
bguillaum committed
667 668 669 670 671
           phon = underscore phon;
           lemma = underscore lemma;
           pos1 = underscore pos1;
           pos2 = underscore pos2;
           morph = parse_morph file_name line_num morph;
672
           deps = deps;
bguillaum's avatar
bguillaum committed
673
         }
bguillaum's avatar
bguillaum committed
674 675 676 677 678 679 680
        | l ->
          Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, %d fields (10 are expected)\n>>>>>%s<<<<<<" (List.length l) line
    with exc -> Error.build ~loc:(file_name,line_num) "[Conll.load] illegal line, exc=%s\n>>>>>%s<<<<<<" (Printexc.to_string exc) line

  let load file_name =
    let lines = File.read_ln file_name in
    List.map (parse_line file_name) lines
bguillaum's avatar
bguillaum committed
681

bguillaum's avatar
bguillaum committed
682
  let parse file_name lines = List.map (parse_line file_name) lines
683 684 685

    (* We would prefer to compare the float equivalent of l1.num l2.num but this would break the dicho_find function *)
  let compare l1 l2 = Pervasives.compare ((* float_of_string *) l1.num) ((* float_of_string *) l2.num)
bguillaum's avatar
bguillaum committed
686
end (* module Conll *)
687

bguillaum's avatar
bguillaum committed
688
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
689
(* This module defines a type for lexical parameter (i.e. one line in a lexical file) *)
690 691 692 693 694 695
module Lex_par = struct

  type item = string list * string list (* first list: pattern parameters $id , second list command parameters @id *)

  type t = item list

696 697 698
  let empty=[]
  let append = List.append

699 700 701 702 703 704 705 706
  let dump t =
    printf "[Lex_par.dump] --> size = %d\n" (List.length t);
    List.iter (fun (pp,cp) ->
      printf "%s##%s\n"
        (String.concat "#" pp)
        (String.concat "#" cp)
    ) t

bguillaum's avatar
bguillaum committed
707
  let rm_peripheral_white s =
708 709 710
    Str.global_replace (Str.regexp "\\( \\|\t\\)*$") ""
    (Str.global_replace (Str.regexp "^\\( \\|\t\\)*") "" s)

bguillaum's avatar
bguillaum committed
711
  let parse_line ?loc nb_p nb_c line =
712
    let line = rm_peripheral_white line in
713 714 715 716 717 718 719 720 721 722 723 724 725
    if line = "" || line.[0] = '%'
    then None
    else
      match Str.split (Str.regexp "##") line with
        | [args] when nb_c = 0 ->
          (match Str.split (Str.regexp "#") args with
            | l when List.length l = nb_p -> Some (l,[])
            | _ -> Error.bug ?loc
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args"
              line nb_p)
        | [args; values] ->
          (match (Str.split (Str.regexp "#") args, Str.split (Str.regexp "#") values) with
            | (lp,lc) when List.length lp = nb_p && List.length lc = nb_c -> Some (lp,lc)
bguillaum's avatar
bguillaum committed
726
            | _ -> Error.bug ?loc
727 728 729
              "Illegal lexical parameter line: \"%s\" doesn't contain %d args and %d values"
              line nb_p nb_c)
        | _ -> Error.bug ?loc "Illegal param line: '%s'" line
bguillaum's avatar
bguillaum committed
730

731
  let from_lines ?loc nb_p nb_c lines = List_.opt_map (parse_line ?loc nb_p nb_c) lines
732

733
  let load ?loc dir nb_p nb_c file =
734
    try
bguillaum's avatar
bguillaum committed
735
      let full_file =
736 737 738 739
        if Filename.is_relative file
        then Filename.concat dir file
        else file in
      let lines = File.read full_file in
740
      List_.opt_mapi (fun i line -> parse_line ~loc:(full_file,i) nb_p nb_c line) lines
741 742
    with Sys_error _ -> Error.build ?loc "External lexical file '%s' not found" file

bguillaum's avatar
bguillaum committed
743 744 745
  let sub x y = List.mem x (Str.split (Str.regexp "|") y)

  let filter index atom t =
bguillaum's avatar
bguillaum committed
746
    match
bguillaum's avatar
bguillaum committed
747 748 749
      List_.opt_map
        (fun (p_par, c_par) ->
          let par = List.nth p_par index in
bguillaum's avatar
bguillaum committed
750
          if atom=par
bguillaum's avatar
bguillaum committed
751 752 753 754 755 756
          then Some (p_par, c_par)
          else
            if sub atom par (* atom is one of the values of the disjunction par *)
            then Some (List_.set index atom p_par, c_par)
            else None
        ) t
bguillaum's avatar
bguillaum committed
757
    with
758
    | [] -> None
bguillaum's avatar
bguillaum committed
759
    | t -> Some t
bguillaum's avatar
bguillaum committed
760 761 762 763 764

  let get_param_value index = function
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
    | (params,_)::_ -> List.nth params index

765 766 767
  let get_command_value index = function
    | [(_,one)] -> List.nth one index
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
bguillaum's avatar
bguillaum committed
768 769 770 771 772 773
    | (_,[sing])::tail when index=0 ->
        Printf.sprintf "%s/%s"
          sing
          (List_.to_string
             (function
               | (_,[s]) -> s
bguillaum's avatar
bguillaum committed
774 775 776
               | _ -> Error.bug "[Lex_par.get_command_value] inconsistent param"
             ) "/" tail
          )
777
    | l -> Error.run "Lexical parameter are not functionnal"
778

bguillaum's avatar
bguillaum committed
779
end (* module Lex_par *)
780

bguillaum's avatar
bguillaum committed
781
(* ================================================================================ *)
bguillaum's avatar
bguillaum committed
782 783 784
(* copy from leopar *)
module Timeout = struct
  exception Stop
785

bguillaum's avatar
bguillaum committed
786 787
  let counter = ref 0.
  let timeout = ref None
bguillaum's avatar
bguillaum committed
788

bguillaum's avatar
bguillaum committed
789 790 791
  let start () = counter := Unix.time ()

  let check () =
bguillaum's avatar
bguillaum committed
792
    match !timeout with
bguillaum's avatar
bguillaum committed
793 794 795 796
    | None -> ()
    | Some delay ->
        if Unix.time () -. !counter > delay
        then raise Stop
bguillaum's avatar
bguillaum committed
797
end (* module Timeout *)