global.ml 5.65 KB
Newer Older
bguillaum's avatar
New gui  
bguillaum committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
open Printf
open Log

type format =  Dep | Conll | Xml | Png | Svg | Pdf
exception Found of format
let get_format file =
  try 
    List.iter
      (fun (ext, fmt) ->
        if Filename.check_suffix file ext
        then raise (Found fmt)
      ) [ ("dep", Dep); ("conll",Conll); ("xml",Xml); ("png",Png); ("svg",Svg); ("pdf",Pdf) ];
    Log.fcritical "Unkwnow file extension for file \"%s\"" file
  with Found fmt -> fmt
let string_of_format = function
  | Dep -> "dep"
  | Conll -> "conll"
  | Xml -> "xml"
  | Png -> "png"
  | Svg -> "svg"
  | Pdf -> "pdf"



let (input_file : string option ref) = ref None
let (output_file : string option ref) = ref None

let current_infos = ref ["pos"]

let (current_array : (string * string) array ref) = ref [||]
let (current_position : int option ref) = ref None   (* position of the current_focus in the current_array *)

let current_source = ref ""
let modified = ref false

let debug = ref false

let get_pos () = match !current_position with
  | None -> 0
  | Some v -> v

(* -------------------------------------------------------------------------------- *)
let array_assoc key array =
  let len = Array.length array in
  let rec loop i =
    if i = len
    then raise Not_found
    else
      match array.(i) with
        | (k,v) when k=key -> (i,v)
        | _ -> loop (i+1)
  in loop 0


(* -------------------------------------------------------------------------------- *)
let search_sentid sentid =
  let (new_pos,_) = array_assoc sentid !current_array in
  current_position := Some new_pos

(* -------------------------------------------------------------------------------- *)
let update_source () =
  match !current_position with
    | None -> ()
    | Some p -> current_source := (snd !current_array.(p))

(* -------------------------------------------------------------------------------- *)
let first () =
  current_position := Some 0;
  update_source ()

(* -------------------------------------------------------------------------------- *)
let last () =
  current_position := Some ((Array.length !current_array) - 1);
  update_source ()
let has_more_than_one () = Array.length !current_array > 1
(* -------------------------------------------------------------------------------- *)
let next () = 
  match !current_position with
    | Some p when p < (Array.length !current_array)-1 ->
      current_position := Some (p+1);
      update_source ()
    | x -> ()

(* -------------------------------------------------------------------------------- *)
let has_next () = 
  match !current_position with
    | Some p when p < (Array.length !current_array)-1 -> true
    | _ -> false

(* -------------------------------------------------------------------------------- *)
let prev () = 
  match !current_position with
    | Some p when p > 0 ->
      current_position := Some (p-1);
      update_source ()
    | x -> ()

(* -------------------------------------------------------------------------------- *)
let has_prev () = 
  match !current_position with
    | Some p when p > 0 -> true
    | _ -> false

(* -------------------------------------------------------------------------------- *)
let view_label () =
  match (!current_array, !current_position) with
    | ([||], None) -> "No dep"
    | ([|(id,_)|], Some 0) when id="00001" -> "no_id"
    | ([|(id,_)|], Some 0) -> sprintf "[%s]" id
    | (a, Some p) ->
      let (id, _) = a.(p) in
      if id = sprintf "%05d" p
      then sprintf "(%d/%d) [no_id]" (p+1) (Array.length a)
      else sprintf "(%d/%d) [%s]" (p+1) (Array.length a) id
    | _ -> Log.critical "Inconsistent state"

(* -------------------------------------------------------------------------------- *)
let file_label () =
  match !input_file with
    | None -> "No file loaded"
    | Some in_file -> in_file

(* -------------------------------------------------------------------------------- *)
let write file string =
  let out_ch = open_out file in
  fprintf out_ch "%s\n" string;
  close_out out_ch

(* -------------------------------------------------------------------------------- *)
let save file =
  let out_ch = open_out file in
  Array.iter (fun (_,src) -> fprintf out_ch "%s\n\n" src) !current_array;
  close_out out_ch

(* -------------------------------------------------------------------------------- *)
let load file =
  let in_ch = open_in file in
  let buff = Buffer.create 32 in
  let cpt = ref 0 in
  let sentid = ref None in
  let current_list = ref [] in
  try
    while true do
      match (!sentid, input_line in_ch) with
        | None, "" -> ()
        | Some si, "" ->
          current_list := (si, Buffer.contents buff) :: !current_list;
          Buffer.clear buff;
          sentid := None;
        | Some oc, line -> Printf.bprintf buff "%s\n" line
        | None, line ->
          incr cpt;
          let new_sentid = 
            match Str.split (Str.regexp "\t") line with
              | [_;_;_;_;_;"_";_;_;_;_] -> None
              | [_;_;_;_;_;fs_string;_;_;_;_] ->
                let fs = List.map
                  (fun feat_string ->
                    match Str.split (Str.regexp "=") feat_string with
                      | [name;value] -> (name,value)
                      | _ -> failwith (Printf.sprintf "#1 >>%S<<\n%!" feat_string)
                  ) (Str.split (Str.regexp "|") fs_string) in
                (try Some (List.assoc "sentid" fs) with Not_found -> Some (sprintf "%05d.conll" !cpt))
              | _ -> Some (sprintf "%05d.conll" !cpt) in
          sentid := new_sentid;
          Printf.bprintf buff "%s\n" line
    done
  with End_of_file ->
    (match !sentid with
      | Some si -> current_list := (si, Buffer.contents buff) :: !current_list
      | None -> ());
    current_array := Array.of_list (List.rev !current_list)