grew_corpus.mli 2.08 KB
Newer Older
Bruno Guillaume's avatar
Bruno Guillaume committed
1
2
3
4
5
6
7
8
9
10
(**********************************************************************************)
(*    Libcaml-grew - a Graph Rewriting library dedicated to NLP applications      *)
(*                                                                                *)
(*    Copyright 2011-2021 Inria, Université de Lorraine                           *)
(*                                                                                *)
(*    Webpage: https://grew.fr                                                     *)
(*    License: CeCILL (see LICENSE folder or "http://cecill.info/")            *)
(*    Authors: see AUTHORS file                                                   *)
(**********************************************************************************)

Bruno Guillaume's avatar
Bruno Guillaume committed
11
12
open Conllx

13
14
15
16
17
18
19
open Grew_base
open Grew_graph


module Corpus : sig
  type t

20
21
  val merge: t list -> t

Bruno Guillaume's avatar
Bruno Guillaume committed
22
23
  val graph_of_sent_id: string -> t -> G_graph.t option

24
25
  val size: t -> int
  val get_graph: int -> t -> G_graph.t
Bruno Guillaume's avatar
Bruno Guillaume committed
26
  val is_conll: t -> bool
27
28
29
  val get_sent_id: int -> t -> string
  val get_text: int -> t -> string

30
  val fold_left: ('a -> string -> G_graph.t -> 'a) -> 'a -> t -> 'a
31
  val fold_right: (string -> G_graph.t -> 'a -> 'a) -> t -> 'a -> 'a
32
  val iteri: (int -> string -> G_graph.t -> unit) -> t -> unit
33
34

  val permut_length: t -> int array
35

36
37
38
  val from_stdin: ?ext:string -> ?log_file: string -> ?config:Conllx_config.t -> unit -> t
  val from_string: ?ext:string -> ?log_file: string -> ?config:Conllx_config.t -> string -> t
  val from_file: ?ext:string -> ?log_file: string -> ?config:Conllx_config.t -> string -> t
Bruno Guillaume's avatar
Bruno Guillaume committed
39
  val from_dir: ?log_file: string -> ?config:Conllx_config.t -> string -> t
40
41
42
43
44
45
46
end

module Corpus_desc : sig

  type t

  val build_corpus: t -> Corpus.t
47
  val load_corpus_opt: t -> Corpus.t option
48

49
  val get_config: t -> Conllx_config.t
Bruno Guillaume's avatar
Bruno Guillaume committed
50

51
52
53
  val is_rtl: t -> bool
  val is_audio: t -> bool
  val get_id: t -> string
54
  val get_lang_opt: t -> string option
55
56
57
  val get_directory: t -> string
  val load_json: string -> t list

58
  val compile: ?force:bool -> ?grew_match: string -> ?tmp_gm2: bool -> t -> unit
59
60
61

  val clean: t -> unit
end