corpus.mli 3.67 KB
Newer Older
Idir Lankri's avatar
Idir Lankri committed
1
(**************************************************************************)
Idir Lankri's avatar
Idir Lankri committed
2
(*                                                                        *)
Idir Lankri's avatar
Idir Lankri committed
3 4 5 6 7 8 9
(*                     The Sanskrit Heritage Platform                     *)
(*                                                                        *)
(*                              Idir Lankri                               *)
(*                                                                        *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)

10 11
(* Operations on the corpus tree *)

12
module Section : sig
13 14 15 16 17 18
  type t
  ;
  value label : t -> string
  ;
end
;
19 20 21 22 23
module Analyzer : sig
  type t = [ Graph ]
  ;
  value path : t -> string
  ;
24 25
  value relocatable_path : t -> string
  ;
26 27 28 29 30
end
;
module Analysis : sig
  type t
  ;
31
  value make : Analyzer.t -> Html.language -> string -> int (* Num.num *) -> t
32 33 34 35 36
  ;
  value analyzer : t -> Analyzer.t
  ;
  value lang : t -> Html.language
  ;
37
  value checkpoints : t -> string
38
  ;
39
  value nb_sols : t -> int (* Num.num *)
40 41 42 43 44 45 46 47
  ;
end
;
module Encoding : sig
  type t = [ Velthuis | WX | KH | SLP1 | Devanagari | IAST ]
  ;
  value to_string : t -> string
  ;
48 49 50 51 52 53
  value of_string : string -> t
  ;
  value encode : t -> string -> Word.word
  ;
  value decode : t -> Word.word -> string
  ;
54 55
end
;
56 57 58
module Sentence : sig
  type t
  ;
59
  value make : int -> list Word.word -> bool -> Analysis.t -> t
60
  ;
61 62
  value id : t -> int
  ;
63
  value text : Encoding.t -> t -> string
64
  ;
65
  value analysis : t -> Analysis.t
66 67 68 69
  ;
end
;
module type Location = sig
70
  value path : string
71 72 73 74
  ;
end
;
module type S = sig
75 76
  (* Contents of a corpus subdirectory: either it is empty (constructor
     [Empty]), otherwise we are on leaves of the tree (constructor
77
     [Sentences]) or on branches (constructor [Sections]).  *)
78
  type contents =
79
    [ Empty
80
    | Sections of list Section.t
81 82 83 84
    | Sentences of list Sentence.t
    ]
  ;
  (* List the contents of the given corpus subdirectory.  Note that the
85
     returned elements are sorted according to [Section.compare] or
86 87
     [Sentence.compare] depending on the case.  Raise [Sys_error] when
     an operating system error occurs.  *)
88 89
  value contents : string -> contents
  ;
Idir Lankri's avatar
Idir Lankri committed
90 91
  (* Exception raised by [save_sentence] when the sentence to be saved
     already exists.  *)
92 93
  exception Sentence_already_exists
  ;
94
  (* Raise [Sentence_already_exists] if the sentence to be saved already
95 96 97 98
     exists and [force] is [False] and [Sys_error] when an operating
     system error occurs.  *)
  value save_sentence :
    bool -> string -> int -> list Word.word -> bool -> Analysis.t -> unit
99
  ;
100
  exception Section_already_exists of string
101
  ;
102
  (* Raise [Section_already_exists] if the given corpus directory
103 104
     already exists and [Unix.Unix_error] when an operating system error
     occurs.  *)
105 106
  value mkdir : string -> unit
  ;
107 108 109 110 111 112
  exception No_such_sentence
  ;
  (* Raise [No_such_sentence] if the requested sentence does not
     exist.  *)
  value sentence : string -> int -> Sentence.t
  ;
113
  type permission = [ Reader | Annotator | Manager ]
114
  ;
115
  value default_permission : permission
116
  ;
117
  value string_of_permission : permission -> string
118
  ;
119
  value permission_of_string : string -> permission
120
  ;
121
  value url : string -> permission -> Sentence.t -> string
122
  ;
123
  value relocatable_url : string -> permission -> Sentence.t -> string
124
  ;
125 126 127
  (* [citation subdir id ]] returns an URL to the analysis
     of the sentence whose number is [id] in the corpus
     subdirectory [subdir]. Raise [Failure "citation"] if an
128
     error occurs.  *)
129
  value citation : string -> int -> string 
130
  ;
131 132 133
end
;
module Make (Loc : Location) : S
134
;