Commit 0d7e430b authored by Bruno Guillaume's avatar Bruno Guillaume
Browse files

get rid of old implementation

parent 1e39f584
......@@ -2,5 +2,5 @@ name = "conll"
description = "Libcaml-conll"
version = "&VERSION&"
requires = ""
archive(byte) = "conll.cma conllx.cma"
archive(native) = "conll.cmxa conllx.cmxa"
archive(byte) = "conllx.cma"
archive(native) = "conllx.cmxa"
OCB_FLAGS = -use-ocamlfind -I src
OCB = ocamlbuild $(OCB_FLAGS)
LIB_FILES = conll.cma conll.cmxa conll.a conll.cmi conll.cmx conll.cmxs conll_types.cmi conll_types.cmx \
conllx.cma conllx.cmxa conllx.a conllx.cmi conllx.cmx conllx.cmxs
LIB_FILES = conllx.cma conllx.cmxa conllx.a conllx.cmi conllx.cmx conllx.cmxs
INSTALL_FILES = $(LIB_FILES:%=_build/src/%)
VERSION = `cat VERSION`
......
This diff is collapsed.
open Conll_types
exception Conll_error of Yojson.Basic.t
module Sentence : sig
val fr_clean_spaces: string -> string
end
module Id_with_proj: sig
type t = Id.t * int option
end
module Id_with_proj_set : Set.S with type elt = Id_with_proj.t
module Mwe : sig
type kind = Ne | Mwe
type t = {
mwepos: string option;
kind: kind;
label: string option;
criterion: string option;
first: Id_with_proj.t;
items: Id_with_proj_set.t;
}
end
module Conll : sig
module Id: Conll_types.Id_type
type line = {
line_num: int;
id: Id.t;
form: string;
lemma: string;
upos: string;
xpos: string;
feats: (string * string) list;
deps: (Id.t * string ) list;
efs: (string * string) list;
}
val build_line:
id:Id.t ->
form: string ->
?lemma: string ->
?upos: string ->
?xpos: string ->
?feats: (string * string) list ->
?deps: (Id.t * string ) list ->
unit ->
line
val root: line
val compare: line -> line -> int
type multiword = {
mw_line_num: int option;
first: int;
last: int;
fusion: string;
mw_efs: (string * string) list;
}
type t = {
file: string option;
meta: string list;
lines: line list;
multiwords: multiword list;
mwes: Mwe.t Int_map.t;
}
val void: t
val is_void: t -> bool
val from_string: string -> t
val to_string: ?cupt:bool -> t -> string
val to_dot: t -> string
val save_dot: string -> t -> unit
val load: ?tf_wf:bool -> string -> t
val get_sentid_meta: t -> string option
val get_sentid: t -> string option
val set_sentid: string -> t -> t
val set_label: Id.t -> string -> t -> t
val ensure_sentid_in_meta: ?default:string -> t -> t
val normalize_multiwords: t -> t
val build_sentence: t -> string
val get_sentence: t -> string option
val html_sentence: ?highlight: int list -> t -> string
val merge: string -> t -> t -> t
end
module Conll_corpus : sig
type t = (string * Conll.t) array
(* In the next 3 functions, if a log_file is given, data are loaded in a robust mode:
no error is produced on Conll error but problems are reported in the given log_file.
If the log_file already doesn't exist, it is created; else data are append to the file.
*)
val load: ?tf_wf:bool -> ?log_file:string -> string -> t
val load_list: ?tf_wf: bool -> ?log_file:string -> string list -> t
val from_lines: ?tf_wf: bool -> ?log_file:string -> ?basename: string -> (int * string) list -> t
val save: string -> t -> unit
val save_sub: string -> int -> int -> t -> unit
val dump: t -> unit
val token_size: t -> int
(* [web_anno corpus base_output size]
outputs a sequences of files of [size] sentences,
prepared for input in webanno *)
val web_anno: t -> string -> int -> unit
val get: string -> t -> Conll.t option
end
module Stat : sig
type t
type key = Upos | Xpos
val build: key -> Conll_corpus.t -> t
val dump: t -> unit
(* build the table file. Args: corpus_id stat *)
val to_html: string -> t -> string
end
\ No newline at end of file
open Printf
(* ======================================================================================================================== *)
module type Id_type = sig
type t = int * int option (* 8.1 --> (8, Some 1) *)
val to_string: t -> string
val to_dot: t -> string
val to_int: t -> int option
exception Wrong_id of string
val of_string: string -> t
val of_int: int -> t
val compare: t -> t -> int
(* [min id1 id2] return the smallest id (according to compare) *)
val min: t -> t -> t
(* [max id1 id2] return the biggest id (according to compare) *)
val max: t -> t -> t
(* [min_max id1 id2] return (min,max) in a signle call *)
val min_max: t -> t -> (t*t)
(* [shift delta id] increases the position by delta *)
val shift: int -> t -> t
end
(* ======================================================================================================================== *)
module Id = struct
type t = int * int option (* 8.1 --> (8, Some 1) *)
let to_string = function
| (i, None) -> sprintf "%d" i
| (i, Some j) -> sprintf "%d.%d" i j
let to_dot = function
| (i, None) -> sprintf "%d" i
| (i, Some j) -> sprintf "%d_%d" i j
let to_int = function
| (i, None) -> Some i
| (i, Some _) -> None
exception Wrong_id of string
let of_string s =
try
match Str.split (Str.regexp "\\.") s with
| [i] -> (int_of_string i, None)
| [i;j] -> (int_of_string i, Some (int_of_string j))
| _ -> raise (Wrong_id s)
with Failure _ -> raise (Wrong_id s)
let of_int i = (i,None)
let compare id1 id2 =
match (id1, id2) with
| ((i1, _), (i2, _)) when i1 <> i2 -> Stdlib.compare i1 i2
(* all other cases: i1=i2 *)
| ((_,None), (_,Some _)) -> -1
| ((_,Some _), (_,None)) -> 1
| ((_,Some sub_i1), (_,Some sub_i2)) -> Stdlib.compare sub_i1 sub_i2
| ((_,None), (_,None)) -> 0
let min id1 id2 = if compare id1 id2 < 0 then id1 else id2
let max id1 id2 = if compare id1 id2 < 0 then id2 else id1
let min_max id1 id2 = if compare id1 id2 < 0 then (id1, id2) else (id2, id1)
let shift delta (i,j) = (i+delta,j)
end
module Id_set = Set.Make (Id)
module Id_map = Map.Make (Id)
module Int_map = Map.Make (struct type t = int let compare = Stdlib.compare end)
module type Id_type = sig
type t = int * int option (* 8.1 --> (8, Some 1) *)
val to_string: t -> string
val to_dot: t -> string
val to_int: t -> int option
exception Wrong_id of string
val of_string: string -> t
val of_int: int -> t
val compare: t -> t -> int
(* [min id1 id2] return the smallest id (according to compare) *)
val min: t -> t -> t
(* [max id1 id2] return the biggest id (according to compare) *)
val max: t -> t -> t
(* [min_max id1 id2] return (min,max) in a single call *)
val min_max: t -> t -> (t*t)
(* [shift delta id] increases the position by delta *)
val shift: int -> t -> t
end
module Id : Id_type
module Id_set : Set.S with type elt = Id.t
module Id_map : Map.S with type key = Id.t
module Int_map : Map.S with type key = int
open Printf
(* ======================================================================================================================== *)
module List_ = struct
let to_string string_of_item sep = function
| [] -> ""
| h::t -> List.fold_left (fun acc elt -> acc ^ sep ^ (string_of_item elt)) (string_of_item h) t
end
(* ======================================================================================================================== *)
module String_ = struct
let check_prefix prefix s =
let len = String.length prefix in
String.length s >= len && String.sub s 0 len = prefix
let remove_prefix prefix s =
let len = String.length prefix in
if String.length s >= len && String.sub s 0 len = prefix
then Some (String.sub s len ((String.length s)-len))
else None
end
(* ======================================================================================================================== *)
module File = struct
let read_rev file =
let in_ch = open_in file in
let line_num = ref 0 in
let res = ref [] in
try
(* if the input file contains an UTF-8 byte order mark (EF BB BF), skip 3 bytes, else get back to 0 *)
(match input_byte in_ch with 0xEF -> seek_in in_ch 3 | _ -> seek_in in_ch 0);
while true do
incr line_num;
res := (!line_num, input_line in_ch) :: !res
done; assert false
with End_of_file -> close_in in_ch; !res
let read file = List.rev (read_rev file)
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment