grew_utils.ml 16.8 KB
Newer Older
pj2m's avatar
pj2m committed
1 2 3
open Log
open Printf 

bguillaum's avatar
bguillaum committed
4
module StringSet = Set.Make (String)
pj2m's avatar
pj2m committed
5 6 7 8
module StringMap = Map.Make (String)

module IntSet = Set.Make (struct type t = int let compare = Pervasives.compare end)

9 10 11 12 13 14 15 16
module IntMap = Map.Make (struct type t = int let compare = Pervasives.compare end)

module Pid = struct
  type t = int
  let compare = Pervasives.compare
end

module Pid_map =
pj2m's avatar
pj2m committed
17
  struct 
18
    include Map.Make (Pid)
pj2m's avatar
pj2m committed
19
(** returns the image of a map [m]*)
20

pj2m's avatar
pj2m committed
21 22 23
    exception True
    let exists fct map =
      try
24 25 26 27 28 29
        iter 
          (fun key value -> 
            if fct key value 
            then raise True
          ) map;
        false
pj2m's avatar
pj2m committed
30 31 32 33
      with True -> true

    let range key_set m = 
      IntSet.fold (fun k s -> (IntSet.add (find k m) s)) key_set IntSet.empty
34

pj2m's avatar
pj2m committed
35 36
    let keys m = 
      fold (fun k v s -> (IntSet.add k s)) m IntSet.empty
37

pj2m's avatar
pj2m committed
38 39
(* union of two maps*)
    let union_map m m' = fold (fun k v m'' -> (add k v m'')) m m'
40

pj2m's avatar
pj2m committed
41
    exception MatchNotInjective
42

pj2m's avatar
pj2m committed
43 44 45
(*
 * union of two injective maps having different ranges :
 * \forall x \neq y \in m: m(x) \neq m(y)
46
 * \forall x' \neq y' \in m': m'(x) \neq m'(y)W
pj2m's avatar
pj2m committed
47 48 49 50 51 52 53 54 55
 * \forall x \in m /\ m': m(x) = m'(x)
 * \forall x \in m : x \not\in\m' => \forall y \in m' m(x) \neq m'(y)
 *)
    let union_if m m' = 
      let keys_m = keys m in
      let keys_m' = keys m' in
      let inter_keys = IntSet.inter keys_m keys_m' in
      if IntSet.for_all (fun elt -> (find elt m) = (find elt m')) inter_keys
      then 
56 57 58 59 60 61 62
        let keys_s_m' = IntSet.diff keys_m' inter_keys in
        let range_m = range keys_m m in 
        let range_m' = range keys_s_m' m' in
        if (IntSet.inter range_m range_m') = IntSet.empty
        then union_map m m'
        else raise MatchNotInjective
      else raise MatchNotInjective          
pj2m's avatar
pj2m committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
  end
    
module Loc = struct
  type t = string * int 

  let to_string (file,line) = Printf.sprintf "(file: %s, line: %d)" (Filename.basename file) line

  let opt_to_string = function
    | None -> ""
    | Some x -> to_string x
end

 



module File = struct
  let write data name =
    let out_ch = open_out name in
    fprintf out_ch "%s\n" data;
    close_out out_ch
bguillaum's avatar
bguillaum committed
84 85 86 87 88 89

  let read file = 
    let in_ch = open_in file in
    let rev_lines = ref [] in
    try
      while true do
bguillaum's avatar
bguillaum committed
90
        let line = input_line in_ch in
91
        if (Str.string_match (Str.regexp "^[ \t]*$") line 0) || (line.[0] = '%')
bguillaum's avatar
bguillaum committed
92 93
        then ()
        else rev_lines := line :: !rev_lines
bguillaum's avatar
bguillaum committed
94 95 96 97
      done; assert false
    with End_of_file -> 
      close_in in_ch;
      List.rev !rev_lines
pj2m's avatar
pj2m committed
98 99 100 101 102 103 104 105
 end

module Array_ = struct
  let dicho_mem elt array =
    let rec loop low high =
      (if low > high 
      then false
      else
106 107 108 109
        match (low+high)/2 with
        | middle when array.(middle) = elt -> true
        | middle when array.(middle) < elt -> loop (middle+1) high
        | middle -> loop low (middle - 1)
pj2m's avatar
pj2m committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
      ) in 
    loop 0 ((Array.length array) - 1)

  (* dichotomic search in a sorted array *)
  let dicho_find elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when array.(middle) = elt -> middle
      | middle when array.(middle) < elt -> loop (middle+1) high
      | middle -> loop low (middle - 1) in 
    loop 0 ((Array.length array) - 1)
      
  let dicho_find_assoc elt array =
    let rec loop low high =
      (if low > high then raise Not_found);
      match (low+high)/2 with
      | middle when fst array.(middle) = elt -> middle
      | middle when fst array.(middle) < elt -> loop (middle+1) high
      | middle -> loop low (middle - 1) in 
    loop 0 ((Array.length array) - 1)
end


module List_ = struct
bguillaum's avatar
bguillaum committed
135 136 137 138 139
  let rec set position elt = function
    | [] -> failwith "List_.set"
    | _::t when position = 0 -> elt::t
    | x::t -> x:: (set (position-1) elt t)

pj2m's avatar
pj2m committed
140 141 142 143 144
  let rec rm elt = function
    | [] -> raise Not_found
    | x::t when x=elt -> t
    | x::t -> x::(rm elt t)

bguillaum's avatar
bguillaum committed
145 146 147 148 149 150 151
  let pos x l = 
    let rec loop i = function
    | [] -> None
    | h::t when h=x -> Some i
    | _::t -> loop (i+1) t in
    loop 0 l

pj2m's avatar
pj2m committed
152 153 154 155 156 157 158 159
  let rec opt = function
    | [] -> []
    | None :: t -> opt t
    | Some x :: t -> x :: (opt t)

  let rec opt_map f = function
    | [] -> []
    | x::t -> 
160 161 162
        match f x with
        | None -> opt_map f t
        | Some r -> r :: (opt_map f t)
pj2m's avatar
pj2m committed
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

  let rec flat_map f = function
    | [] -> []
    | x::t -> (f x)@(flat_map f t)

  let iteri fct = 
    let rec loop i = function 
      | [] -> ()
      | h::t -> (fct i h); (loop (i+1) t) in
    loop 0
      
  let mapi fct = 
    let rec loop i = function 
      | [] -> []
      | h::t -> let head = fct i h in head :: (loop (i+1) t)
    in loop 0

  let foldi_left f init l =
    fst 
      (List.fold_left 
183 184
         (fun (acc,i) elt -> (f i acc elt, i+1))
         (init,0) l
pj2m's avatar
pj2m committed
185 186 187 188 189 190
      )

  let rec remove elt = function
    | [] -> raise Not_found
    | a::tail when a = elt -> tail
    | a::tail -> a::(remove elt tail)
191

pj2m's avatar
pj2m committed
192 193 194
  let to_string string_of_item sep = function
    | [] -> ""
    | h::t -> List.fold_left (fun acc elt -> acc ^ sep ^ (string_of_item elt)) (string_of_item h) t
195

pj2m's avatar
pj2m committed
196 197 198 199 200 201 202 203 204
  let rec sort_insert elt = function
    | [] -> [elt]
    | h::t when elt<h -> elt::h::t 
    | h::t -> h::(sort_insert elt t)

  let rec sort_mem elt = function
    | [] -> false
    | h::_ when elt<h -> false
    | h::_ when elt=h -> true
205
    | h::t (* when elt>h *) -> sort_mem elt t
206 207 208 209 210 211 212 213 214 215 216 217 218 219

  let rec sort_assoc key = function
    | [] -> None
    | (k,_)::_ when key<k -> None
    | (k,_)::t when key>k -> sort_assoc key t
    | (_,v)::_ -> Some v 

  let rec sort_remove_assoc key = function
    | [] -> []
    | (k,_)::_ as t when key<k -> t
    | (k,v)::t when key>k -> (k,v) :: (sort_remove_assoc key t)
    | (_,v)::t -> t


pj2m's avatar
pj2m committed
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
  exception Usort

  let rec usort_remove key = function 
    | [] -> raise Not_found
    | x::t when key < x -> raise Not_found
    | x::t when key = x -> t
    | x::t -> x::(usort_remove key t)

  let usort_insert ?(compare=Pervasives.compare) elt l =
    let rec loop = function
      | [] -> [elt]
      | x::t when compare elt x < 0 -> elt :: x :: t
      | x::t when compare elt x > 0 -> x :: (loop t)
    | _ -> raise Usort in
    try Some (loop l) with Usort -> None
235

pj2m's avatar
pj2m committed
236 237 238 239 240 241
  let rec sort_disjoint l1 l2 = 
    match (l1,l2) with
    | [], _ | _, [] -> true
    | h1::t1 , h2::t2 when h1<h2 -> sort_disjoint t1 l2
    | h1::t1 , h2::t2 when h1>h2 -> sort_disjoint l1 t2
    | _ -> false 
242

pj2m's avatar
pj2m committed
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
  let sort_is_empty_inter l1 l2 = 
    let rec loop = function
      | [], _ | _, [] -> true
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> false in
    loop (l1,l2) 

  let sort_inter l1 l2 = 
    let rec loop = function
      | [], _ | _, [] -> []
      | x1::t1, x2::t2 when x1 < x2 -> loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> x1 :: loop (t1, t2) in
    loop (l1,l2)
      
  exception Not_disjoint
  let sort_disjoint_union ?(compare=Pervasives.compare) l1 l2 = 
    let rec loop = function
      | [], l | l, [] -> l
      | x1::t1, x2::t2 when (compare x1 x2) < 0 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when (compare x1  x2) > 0 -> x2 :: loop (x1::t1, t2)
      | _ -> raise Not_disjoint in
    loop (l1,l2)
      
  let sort_include l1 l2 = 
    let rec loop = function
      | [], l -> true
      | l, [] -> false
      | x1::t1, x2::t2 when x1 < x2 -> false
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)
      
  let sort_included_diff l1 l2 = 
    let rec loop = function
      | [], l -> failwith "[sort_included_diff] not included"
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> failwith "[sort_included_diff] not included"
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

  let sort_diff l1 l2 = 
    let rec loop = function
      | [], l -> []
      | l, [] -> l
      | x1::t1, x2::t2 when x1 < x2 -> x1 :: loop (t1, x2::t2)
      | x1::t1, x2::t2 when x1 > x2 -> loop (x1::t1, t2)
      | x1::t1, x2::t2 -> loop (t1, t2) in
    loop (l1,l2)

295 296 297 298 299 300
  let foldi_left f init l =
    fst 
      (List.fold_left 
	 (fun (acc,i) elt -> (f i acc elt, i+1))
	 (init,0) l
      )
pj2m's avatar
pj2m committed
301 302 303 304 305 306 307
end


module Massoc = struct
  (* Massoc is implemented with caml lists *)
  (* invariant: we suppose that all 'a list in the structure are not empty! *) 
  type 'a t = (int * 'a list) list
308

pj2m's avatar
pj2m committed
309 310 311 312 313 314 315 316 317 318 319 320 321
  let empty = []

  let is_empty t = (t=[])

  let rec assoc key = function
    | [] -> []
    | (h,_)::_ when key<h -> []
    | (h,v)::t when key=h -> v
    | (h,_)::t (* when key>h *) -> assoc key t

  let to_string elt_to_string t = 
    List_.to_string 
      (fun (i,elt_list) -> 
322
        sprintf "%d -> [%s]" i (List_.to_string elt_to_string "," elt_list)
pj2m's avatar
pj2m committed
323 324 325 326 327
      ) "; " t
    
  let iter fct t =
    List.iter 
      (fun (key,list) ->
328 329 330
        List.iter 
          (fun elt -> fct key elt)
          list
pj2m's avatar
pj2m committed
331
      ) t
332

pj2m's avatar
pj2m committed
333 334 335
  let rec add key elt = function
    | [] -> Some [(key, [elt])]
    | (h,list)::t when h=key -> 
336 337 338 339
        (match List_.usort_insert elt list with 
        | Some new_list -> Some ((h, new_list)::t)
        | None -> None
        )
pj2m's avatar
pj2m committed
340 341
    | ((h,_)::_) as t when key<h -> Some ((key,[elt])::t)
    | (h,l)::t (* when key>h *) -> 
342 343
        match (add key elt t) with Some t' -> Some ((h,l)::t') | None -> None

pj2m's avatar
pj2m committed
344 345 346
  let fold_left fct init t =
    List.fold_left 
      (fun acc (key,list) ->
347 348 349 350
        List.fold_left 
          (fun acc2 elt ->
            fct acc2 key elt)
          acc list)
pj2m's avatar
pj2m committed
351
      init t
352

pj2m's avatar
pj2m committed
353 354 355 356 357 358 359 360 361 362 363 364
  let rec remove key value = function
    | [] -> raise Not_found 
    | (h,_)::_ when key<h -> raise Not_found 
    | (h,[v])::t when key=h && value=v -> t 
    | (h,list)::t when key=h -> (h,List_.usort_remove value list)::t
    | (h,list)::t (* when key>h *) -> (h,list) :: (remove key value t)

  let rec remove_key key = function
    | [] -> raise Not_found
    | (h,_)::_ when key<h -> raise Not_found 
    | (h,list)::t when key=h -> t
    | (h,list)::t (* when key>h *) -> (h,list) :: (remove_key key t)
365

pj2m's avatar
pj2m committed
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
  let rec mem key value = function
    | [] -> false
    | (h,_)::_ when key<h -> false
    | (h,list)::t when key=h -> List_.sort_mem value list
    | (h,list)::t (* when key>h *) -> mem key value t

  let rec mem_key key = function
    | [] -> false
    | (h,_)::_ when key<h -> false
    | (h,_)::t when key=h -> true
    | (h,_)::t (* when key>h *) -> mem_key key t

  exception Not_disjoint
  let disjoint_union t1 t2 = 
    let rec loop = function
      | [], t | t, [] -> t
      | ((h1,l1)::t1, (h2,l2)::t2) when h1 < h2 -> (h1,l1)::(loop (t1,((h2,l2)::t2)))
      | ((h1,l1)::t1, (h2,l2)::t2) when h1 > h2 -> (h2,l2)::(loop (((h1,l1)::t1),t2))
      | ((h1,l1)::t1, (h2,l2)::t2) (* when h1=h2*) ->
385 386
          try (h1,List_.sort_disjoint_union l1 l2)::(loop (t1, t2))
          with List_.Not_disjoint -> raise Not_disjoint
pj2m's avatar
pj2m committed
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    in loop (t1, t2)

  exception Duplicate
  let merge_key i j t =
    try
      let i_list = List.assoc i t in
      disjoint_union (remove_key i t) [j,i_list]
    with 
    | Not_found -> (* no key i *) t
    | Not_disjoint -> raise Duplicate


  let exists fct t = List.exists (fun (key,list) -> List.exists (fun value -> fct key value) list) t
end

module Error = struct
bguillaum's avatar
bguillaum committed
403 404 405 406 407

  exception Build of (string * Loc.t option)
  exception Run of (string * Loc.t option)
  exception Bug of (string * Loc.t option)

pj2m's avatar
pj2m committed
408 409 410 411 412 413 414 415
  let build_ ?loc message = 
    Log.fmessage "[%s] %s" (match loc with None -> "?" | Some x -> Loc.to_string x) message;
    raise (Build (message, loc))
  let build ?loc = Printf.ksprintf (build_ ?loc)

  let run_ ?loc message = raise (Run (message, loc))
  let run ?loc = Printf.ksprintf (run_ ?loc)

bguillaum's avatar
bguillaum committed
416 417
  let bug_ ?loc message = raise (Bug (message, loc))
  let bug ?loc = Printf.ksprintf (bug_ ?loc)
pj2m's avatar
pj2m committed
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
end



module Id = struct
  type name = string

  type t = int

  type table = name array

  let build ?(loc:Loc.t option) string table =
    try Array_.dicho_find string table
    with Not_found -> Error.build ?loc "Identifier '%s' not found" string

  let build_opt string table = 
    try Some (Array_.dicho_find string table)
    with Not_found -> None
end
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459

module Html = struct
  let css = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">" 
      
  let enter out_ch ?title ?header base_name =
    fprintf out_ch "<html>\n";
    (match title with
    | Some t -> fprintf out_ch "<head>\n%s\n<title>%s</title>\n</head>\n" css t
    | None -> fprintf out_ch "<head>\n%s\n</head>\n" css
    );
    fprintf out_ch "<body>\n";

    (match header with None -> () | Some s -> fprintf out_ch "%s\n" s);

    (match title with
    | Some t -> fprintf out_ch "<h1>%s</h1>\n" t 
    | None -> ()
    )
  let leave out_ch = 
    fprintf out_ch "</body>\n";
    fprintf out_ch "</html>\n";
end      

bguillaum's avatar
bguillaum committed
460 461 462 463 464 465 466 467 468 469 470 471
module Conll = struct
  type line = {
      num: int;
      phon: string;
      lemma: string;
      pos1: string;
      pos2: string;
      morph: (string * string) list;
      gov: int;
      dep_lab: string;
    }
        
bguillaum's avatar
bguillaum committed
472 473 474 475 476 477 478 479 480 481 482
  let parse_morph = function
    | "_" -> []
    | morph ->  
        List.map 
          (fun feat -> 
            match Str.split (Str.regexp "=") feat with
            | [feat_name] -> (feat_name, "true")
            | [feat_name; feat_value] -> (feat_name, feat_value)
            | _ -> Log.fcritical "Cannot not parse CONLL feat '%s' (too many '=')" morph
          ) (Str.split (Str.regexp "|") morph)
          
483
  let escape_quote s = Str.global_replace (Str.regexp "\"") "\\\"" s
bguillaum's avatar
bguillaum committed
484 485 486 487 488

  let parse line = 
    match Str.split (Str.regexp "\t") line with
    | [ num; phon; lemma; pos1; pos2; morph; gov; dep_lab; _; _ ] ->      
        {num = int_of_string num;
489 490
         phon = escape_quote phon;
         lemma = escape_quote lemma;
bguillaum's avatar
bguillaum committed
491 492 493 494 495 496 497 498
         pos1 = pos1;
         pos2 = pos2;
         morph = parse_morph morph;
         gov = int_of_string gov;
         dep_lab = dep_lab;
       }
    | _ -> Log.fcritical "Cannot not parse CONLL line '%s'" line
end
499 500 501 502 503 504 505 506

(* This module defiens a type for lexical parameter (i.e. one line in a lexical file) *)
module Lex_par = struct

  type item = string list * string list (* first list: pattern parameters $id , second list command parameters @id *)

  type t = item list

507 508 509 510
  let rm_peripheral_white s = 
    Str.global_replace (Str.regexp "\\( \\|\t\\)*$") ""
    (Str.global_replace (Str.regexp "^\\( \\|\t\\)*") "" s)

511
  let load ?loc dir nb_p nb_c file =
512
    try
513 514 515 516 517 518
      let full_file = 
        if Filename.is_relative file
        then Filename.concat dir file
        else file in
      
      let lines = File.read full_file in
519 520 521
      let param =
          (List.map
             (fun line ->
522
               let line = rm_peripheral_white line in
523 524 525 526
               match Str.split (Str.regexp "##") line with
               | [args] when nb_c = 0 ->
                   (match Str.split (Str.regexp "#") args with
                   | l when List.length l = nb_p -> (l,[])
527 528 529
                   | _ -> Error.bug 
                         "Illegal param line in file \"%s\", the line \"%s\" doesn't contain %d args"
                         full_file line nb_p)
530 531 532
               | [args; values] ->
                   (match (Str.split (Str.regexp "#") args, Str.split (Str.regexp "#") values) with
                   | (lp,lc) when List.length lp = nb_p && List.length lc = nb_c -> (lp,lc)
533 534 535 536
                   | _ -> Error.bug 
                         "Illegal param line in file \"%s\", the line \"%s\" doesn't contain %d args and %d values"
                         full_file line nb_p nb_c)
               | _ -> Error.bug "Illegal param line in file '%s' line '%s'" full_file line
537 538 539 540 541
             ) lines
          ) in
      param
    with Sys_error _ -> Error.build ?loc "External lexical file '%s' not found" file

bguillaum's avatar
bguillaum committed
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
  let sub x y = List.mem x (Str.split (Str.regexp "|") y)

  let filter index atom t =
    match 
      List_.opt_map
        (fun (p_par, c_par) ->
          let par = List.nth p_par index in
          if atom=par 
          then Some (p_par, c_par)
          else
            if sub atom par (* atom is one of the values of the disjunction par *)
            then Some (List_.set index atom p_par, c_par)
            else None
        ) t
    with 
557
    | [] -> None
bguillaum's avatar
bguillaum committed
558 559
    | t -> Some t
    
560 561 562
  let get_command_value index = function
    | [(_,one)] -> List.nth one index
    | [] -> Error.bug "[Lex_par.get_command_value] empty parameter"
563 564 565 566 567
    | (_,[sing])::tail when index=0 -> 
        Printf.sprintf "%s/%s" 
          sing 
          (List_.to_string (function (_,[s]) -> s | _ -> Error.bug "[Lex_par.get_command_value] inconsistent param") "/" tail)
    | l -> Error.run "Lexical parameter are not functionnal"
568 569 570

end

bguillaum's avatar
bguillaum committed
571 572 573
(* copy from leopar *)
module Timeout = struct
  exception Stop
574

bguillaum's avatar
bguillaum committed
575 576 577 578 579 580 581 582 583 584 585 586
  let counter = ref 0.
  let timeout = ref None
  
  let start () = counter := Unix.time ()

  let check () =
    match !timeout with 
    | None -> ()
    | Some delay ->
        if Unix.time () -. !counter > delay
        then raise Stop
end