...
 
Commits (177)
This diff is collapsed.
......@@ -6,7 +6,7 @@
{\Large (Sanskrit Library V#VERSION; Zen toolkit V3.1)}\\[15pt]
{\large #DATE}\\[15pt]
{\large G\'erard Huet \& Pawan Goyal}\\[10pt]
{\large Copyright \copyright ~2000-2017 Inria}\\[20pt]
{\large Copyright \copyright ~2000-2018 Inria}\\[20pt]
\end{center}
% Macros
......
No preview for this file type
......@@ -5,13 +5,13 @@
# Gérard Huet #
# #
############################################################################
# Installation instructions Copyright Gérard Huet 2017 #
# Installation instructions Copyright Gérard Huet 2018 #
############################################################################
This is just a reminder for quick installation.
A more complete installation procedure is described in file SETUP/INSTALL.
0) Install OCaml (>= 4.02). See http://ocaml.org for installation
0) Install OCaml (>= 4.07). See http://ocaml.org for installation
instructions.
Then install the Camplp4 processor in the corresponding distribution.
......@@ -20,13 +20,21 @@ If OCaml has been installed through the OPAM package manager, then:
opam install camlp4
should do the job. Alternatively, see https://github.com/ocaml/camlp4
for download and installation instructions.
for download and installation instructions. You may have to install
the auxiliary package Ocamlbuild.
1) Install the Heritage_resources package:
Beware: camlp4 is not updated at every Ocaml distribution, so you may have
to settle for not the most recent version of Ocaml.
1) Install the Zen computational linguistics library:
git clone https://gitlab.inria.fr/huet/Zen.git
2) Install the Heritage_resources package:
git clone https://gitlab.inria.fr/huet/Heritage_Resources.git
2) Create your configuration file "config" in directory SETUP.
3) Create your configuration file "config" in directory SETUP.
Look at the SETUP/CONFIGS directory to find examples. All fields are
explained in SETUP/config_help.txt.
......
This diff is collapsed.
......@@ -22,8 +22,6 @@
../ZEN/minimap.cmi
../ZEN/minimap.cmx : ../ZEN/share.cmx ../ZEN/lexmap.cmx ../ZEN/deco.cmx \
../ZEN/minimap.cmi
../ZEN/zen_lexer.cmo :
../ZEN/zen_lexer.cmx :
control.cmo :
control.cmx :
version.cmo :
......@@ -32,21 +30,23 @@ date.cmo : version.cmo
date.cmx : version.cmx
canon.cmo : ../ZEN/word.cmo
canon.cmx : ../ZEN/word.cmx
transduction.cmo : ../ZEN/zen_lexer.cmo
transduction.cmx : ../ZEN/zen_lexer.cmx
min_lexer.cmo :
min_lexer.cmx :
transduction.cmo : min_lexer.cmo
transduction.cmx : min_lexer.cmx
encode.cmo : ../ZEN/word.cmo transduction.cmo phonetics.cmo canon.cmo
encode.cmx : ../ZEN/word.cmx transduction.cmx phonetics.cmx canon.cmx
order.cmo :
order.cmx :
padapatha.cmo : ../ZEN/word.cmo phonetics.cmo encode.cmo
padapatha.cmx : ../ZEN/word.cmx phonetics.cmx encode.cmx
skt_lexer.cmo : encode.cmo
skt_lexer.cmx : encode.cmx
sanskrit.cmi : ../ZEN/word.cmo
sanskrit.cmo : transduction.cmo skt_lexer.cmo padapatha.cmo encode.cmo \
canon.cmo sanskrit.cmi
sanskrit.cmx : transduction.cmx skt_lexer.cmx padapatha.cmx encode.cmx \
canon.cmx sanskrit.cmi
skt_lexer.cmo : encode.cmo
skt_lexer.cmx : encode.cmx
test_stamp.cmo : version.cmo paths.cmo ../ZEN/gen.cmo control.cmo
test_stamp.cmx : version.cmx paths.cmx ../ZEN/gen.cmx control.cmx
dir.cmi :
......@@ -152,12 +152,12 @@ dispatcher.cmi : ../ZEN/word.cmo phases.cmo morphology.cmi \
load_transducers.cmo html.cmo ../ZEN/deco.cmo auto.cmi
dispatcher.cmo : ../ZEN/word.cmo web.cmo skt_morph.cmi phonetics.cmo \
phases.cmo pada.cmo naming.cmo morphology.cmi load_transducers.cmo \
../ZEN/list2.cmo html.cmo ../ZEN/gen.cmo encode.cmo ../ZEN/deco.cmo \
control.cmo canon.cmo auto.cmi dispatcher.cmi
../ZEN/list2.cmo html.cmo encode.cmo ../ZEN/deco.cmo control.cmo \
canon.cmo auto.cmi dispatcher.cmi
dispatcher.cmx : ../ZEN/word.cmx web.cmx skt_morph.cmi phonetics.cmx \
phases.cmx pada.cmx naming.cmx morphology.cmi load_transducers.cmx \
../ZEN/list2.cmx html.cmx ../ZEN/gen.cmx encode.cmx ../ZEN/deco.cmx \
control.cmx canon.cmx auto.cmi dispatcher.cmi
../ZEN/list2.cmx html.cmx encode.cmx ../ZEN/deco.cmx control.cmx \
canon.cmx auto.cmi dispatcher.cmi
segmenter.cmo : ../ZEN/word.cmo web.cmo phonetics.cmo ../ZEN/list2.cmo \
../ZEN/gen.cmo encode.cmo ../ZEN/deco.cmo auto.cmi
segmenter.cmx : ../ZEN/word.cmx web.cmx phonetics.cmx ../ZEN/list2.cmx \
......@@ -167,7 +167,7 @@ load_morphs.cmo : ../ZEN/word.cmo web.cmo morphology.cmi html.cmo \
load_morphs.cmx : ../ZEN/word.cmx web.cmx morphology.cmi html.cmx \
../ZEN/gen.cmx ../ZEN/deco.cmx control.cmx
lexer.cmi : ../ZEN/word.cmo phases.cmo morphology.cmi load_transducers.cmo \
html.cmo dispatcher.cmi
load_morphs.cmo html.cmo dispatcher.cmi
lexer.cmo : ../ZEN/word.cmo web.cmo transduction.cmo skt_morph.cmi \
segmenter.cmo phases.cmo naming.cmo morphology.cmi morpho_string.cmo \
morpho_scl.cmo morpho_html.cmo morpho.cmo load_transducers.cmo \
......@@ -193,13 +193,15 @@ reader.cmx : web.cmx scl_parser.cmx sanskrit.cmx rank.cmx phases.cmx \
paths.cmx html.cmx encode.cmx control.cmx checkpoints.cmx cgi.cmx \
canon.cmx
parser.cmo : ../ZEN/word.cmo web.cmo skt_morph.cmi scl_parser.cmo \
sanskrit.cmi SCLpaths.cmo paths.cmo paraphrase.cmi ../ZEN/list2.cmo \
lexer.cmi inflected.cmi html.cmo ../ZEN/gen.cmo encode.cmo date.cmo \
control.cmo constraints.cmi checkpoints.cmo cgi.cmo canon.cmo
sanskrit.cmi SCLpaths.cmo phases.cmo paths.cmo paraphrase.cmi \
morpho_html.cmo ../ZEN/list2.cmo lexer.cmi inflected.cmi html.cmo \
../ZEN/gen.cmo encode.cmo date.cmo control.cmo constraints.cmi \
checkpoints.cmo cgi.cmo canon.cmo
parser.cmx : ../ZEN/word.cmx web.cmx skt_morph.cmi scl_parser.cmx \
sanskrit.cmx SCLpaths.cmx paths.cmx paraphrase.cmx ../ZEN/list2.cmx \
lexer.cmx inflected.cmx html.cmx ../ZEN/gen.cmx encode.cmx date.cmx \
control.cmx constraints.cmx checkpoints.cmx cgi.cmx canon.cmx
sanskrit.cmx SCLpaths.cmx phases.cmx paths.cmx paraphrase.cmx \
morpho_html.cmx ../ZEN/list2.cmx lexer.cmx inflected.cmx html.cmx \
../ZEN/gen.cmx encode.cmx date.cmx control.cmx constraints.cmx \
checkpoints.cmx cgi.cmx canon.cmx
constraints.cmi : skt_morph.cmi morphology.cmi
constraints.cmo : ../ZEN/word.cmo skt_morph.cmi morphology.cmi \
../ZEN/list2.cmo html.cmo canon.cmo constraints.cmi
......@@ -214,10 +216,6 @@ paraphrase.cmx : web.cmx skt_morph.cmi morphology.cmi html.cmx \
constraints.cmx paraphrase.cmi
bank_lexer.cmo :
bank_lexer.cmx :
regression.cmo : web.cmo version.cmo sanskrit.cmi rank.cmo encode.cmo \
date.cmo constraints.cmi bank_lexer.cmo
regression.cmx : web.cmx version.cmx sanskrit.cmx rank.cmx encode.cmx \
date.cmx constraints.cmx bank_lexer.cmx
checkpoints.cmo : phases.cmo encode.cmo control.cmo canon.cmo bank_lexer.cmo
checkpoints.cmx : phases.cmx encode.cmx control.cmx canon.cmx bank_lexer.cmx
graph_segmenter.cmo : ../ZEN/word.cmo web.cmo phonetics.cmo ../ZEN/list2.cmo \
......@@ -287,10 +285,10 @@ mk_sandhi_page.cmx : web.cmx html.cmx
mk_corpus_page.cmo : web_corpus.cmi web.cmo params.cmi html.cmo
mk_corpus_page.cmx : web_corpus.cmx web.cmx params.cmx html.cmx
corpus.cmi : ../ZEN/word.cmo html.cmo
corpus.cmo : ../ZEN/word.cmo sanskrit.cmi paths.cmo params.cmi html.cmo \
../ZEN/gen.cmo dir.cmi cgi.cmo canon.cmo corpus.cmi
corpus.cmx : ../ZEN/word.cmx sanskrit.cmx paths.cmx params.cmx html.cmx \
../ZEN/gen.cmx dir.cmx cgi.cmx canon.cmx corpus.cmi
corpus.cmo : ../ZEN/word.cmo paths.cmo params.cmi html.cmo ../ZEN/gen.cmo \
encode.cmo dir.cmi cgi.cmo canon.cmo corpus.cmi
corpus.cmx : ../ZEN/word.cmx paths.cmx params.cmx html.cmx ../ZEN/gen.cmx \
encode.cmx dir.cmx cgi.cmx canon.cmx corpus.cmi
web_corpus.cmi : corpus.cmi
web_corpus.cmo : paths.cmo corpus.cmi web_corpus.cmi
web_corpus.cmx : paths.cmx corpus.cmx web_corpus.cmi
......
#load "dynlink.cma";;
#load "camlp4r.cma";;
#load "camlp4/camlp4r.cma";;
......@@ -4,6 +4,6 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
This diff is collapsed.
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* The auto structure *)
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Automaton = struct i*)
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* A simple lexer recognizing idents formed from ASCII letters and integers
......
......@@ -4,13 +4,14 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Canon = struct i*)
(* Inverse of [Transduction.code_raw] - word to VH transliteration *)
value canon = fun
(* Except that .ll has no canonical code *)
value canon = fun
[ 0 -> "-" (* notation for suffixes and segmentation hint in compounds *)
| 1 -> "a"
| 2 -> "aa"
......@@ -26,7 +27,7 @@ value canon = fun
| 12 -> "o"
| 13 -> "au"
| 14 -> ".m" (* anusvaara *)
| 15 -> "~~" (* anunaasika candrabindu *)
| 15 -> "~~" (* anun\=asika candrabindu *)
| 16 -> ".h"
| 17 -> "k"
| 18 -> "kh"
......@@ -60,7 +61,7 @@ value canon = fun
| 46 -> "z" (* used to be "\"s" -- fragile *)
| 47 -> ".s"
| 48 -> "s"
| 49 -> "h"
| 49 -> "h" (* h/.dh *)
| 50 -> "_" (* hiatus *)
| -1 -> "'" (* avagraha *)
| -2 -> "[-]" (* amuissement - lopa of a or aa in preceding preverb *)
......@@ -71,7 +72,8 @@ value canon = fun
| -7 -> "aa|I" (* sandhi of aa and ii *I *)
| -8 -> "aa|U" (* sandhi of aa and uu *U *)
| -9 -> "aa|A" (* sandhi of aa and aa *A *)
| -10 -> "+" (* notation for segmentation hint *)
| 123 -> "aa|C" (* sandhi of aa and ch *C for ch gemination in cch *)
| 100 -> "+" (* notation for segmentation hint *)
| 124 -> failwith "Canon: Unrestored special phoneme j'" (* j/z *)
| 149 -> failwith "Canon: Unrestored special phoneme h'" (* h/gh *)
| 249 -> failwith "Canon: Unrestored special phoneme h''" (* h/dh *)
......@@ -179,11 +181,15 @@ value canon_WX = fun
| 50 -> "_" (* hiatus *)
| -1 -> "Z" (* avagraha *)
| -2 -> "[-]" (* amuissement - lopa of current aa- or preceding a- or aa- *)
| -3 -> "A|a" (* sandhi of aa and (a,aa) *a *)
| -4 -> "A|i" (* sandhi of aa and (i,ii) *e *)
| -5 -> "A|u" (* sandhi of aa and (u,uu) *u *)
| -6 -> "A|r" (* sandhi of aa and .r *r *)
| -10 -> "+" (* explicit compound with no sandhi - experimental *)
| -3 -> "A|a" (* sandhi of aa and a *a *)
| -4 -> "A|i" (* sandhi of aa and i *e *)
| -5 -> "A|u" (* sandhi of aa and u *u *)
| -6 -> "A|r" (* sandhi of aa and .r *r *)
| -7 -> "aa|I" (* sandhi of aa and I *I *)
| -8 -> "aa|U" (* sandhi of aa and U *U *)
| -9 -> "aa|A" (* sandhi of aa and A *A *)
| 123 -> "aa|C" (* sandhi of aa and C *C for duplication *)
| 100 -> "+" (* explicit compound with no sandhi - experimental *)
| n -> if n<0 || n>59 then failwith mess
where mess = "Canon: Illegal char " ^ string_of_int n
else "#" ^ Char.escaped (Char.chr (n-2)) (* homo index 1 to 9 *)
......@@ -195,7 +201,7 @@ value decode_WX word =
(* Sanskrit Library SLP1 decoding *)
value canon_SL = fun
[ 0 -> "-"
| -10 -> "+"
| 100 -> "+"
| 1 -> "a"
| 2 -> "A"
| 3 -> "i"
......@@ -258,7 +264,7 @@ value decode_SL word =
(* Kyoto-Harvard decoding *)
value canon_KH = fun
[ 0 -> "-"
| -10 -> "+"
| 100 -> "+"
| 1 -> "a"
| 2 -> "A"
| 3 -> "i"
......@@ -285,11 +291,11 @@ value canon_KH = fun
| 24 -> "j"
| 25 -> "jh"
| 26 -> "J"
| 27 -> ".t"
| 28 -> ".th"
| 29 -> ".d"
| 30 -> ".dh"
| 31 -> ".n"
| 27 -> "T"
| 28 -> "Th"
| 29 -> "D"
| 30 -> "Dh"
| 31 -> "N"
| 32 -> "t"
| 33 -> "th"
| 34 -> "d"
......@@ -329,7 +335,7 @@ value switch_decode = fun (* normalizes anusvaara in its input *)
(* Decoding without double quotes *)
value canon2 = fun
[ 0 -> "-"
| -10 -> "+"
| 100 -> "+"
| 1 -> "a"
| 2 -> "A"
| 3 -> "i"
......@@ -381,11 +387,15 @@ value canon2 = fun
| 49 -> "h"
| 50 -> "_" (* hiatus *)
| -1 -> "'"
| -2 -> "[-]" (* Inconsistent with previous versions *)
| -3 -> "A|a" (* sandhi of A and (a,A) - phantom phoneme *)
| -4 -> "A|i" (* sandhi of A and (i,I) - phantom phoneme *)
| -5 -> "A|u" (* sandhi of A and (u,U) - phantom phoneme *)
| -6 -> "A|.r" (* sandhi of A and .r) - phantom phoneme *)
| -2 -> "[-]"
| -3 -> "aa|a" (* sandhi of A and a *a *)
| -4 -> "aa|i" (* sandhi of A and i *i *)
| -5 -> "aa|u" (* sandhi of A and u *u *)
| -6 -> "aa|r" (* sandhi of A and .r *r *)
| -7 -> "aa|I" (* sandhi of aa and I *I *)
| -8 -> "aa|U" (* sandhi of aa and U *U *)
| -9 -> "aa|A" (* sandhi of aa and A *A *)
| 123 -> "aa|C" (* sandhi of aa and C *C *)
| n -> if n<0 || n>59 then failwith ("canon2: " ^ string_of_int n)
else ("#" ^ Char.escaped (Char.chr (n-2)))
]
......@@ -396,9 +406,9 @@ value catenate2 c (s,b) =
let protected = if b && b' then "_" ^ s else s in
(canon2 c ^ protected , b')
;
(* [decode2 : word -> string] *)
(* decode2 : word -> string (debug for [Morpho_xml] *)
value decode2 word =
try let (s,_) = List.fold_right catenate2 word ("",False) in s
try let (s,_) = List.fold_right catenate2 word ("",False) in s
with [ Failure _ -> failwith ("decode2: " ^ robust_decode (Word.mirror word)) ]
;
value canon_upper = fun
......@@ -457,7 +467,7 @@ value decode_ref word =
;
value canon_html = fun
[ 0 -> "-"
| -10 -> "+"
| 100 -> "+"
| 1 -> "a"
| 2 -> "aa"
| 3 -> "i"
......@@ -561,7 +571,7 @@ value canon_upper_html = fun
(* Roman with diacritics Unicode - latin extended *)
value canon_uniromcode = fun
[ 0 -> "-"
| -10 -> "+"
| 100 -> "+"
| 1 -> "a"
| 2 -> "&#257;"
| 3 -> "i"
......@@ -649,7 +659,7 @@ value stem_to_string html =
exception Hiatus
;
value indic_unicode_point = fun
[ 0 | -10 -> (* - *) "70"
[ 0 | 100 -> (* - *) "70"
| 1 -> (* a *) "05"
| 2 -> (* aa *) "06"
| 3 -> (* i *) "07"
......@@ -711,7 +721,7 @@ value indic_unicode_point = fun
else "" (* homo index dropped *)
]
and matra_indic_unicode_point = fun
[ -10 (* + *) (* necessary for word form ending in consonant *)
[ 100 (* + *) (* necessary for word form ending in consonant *)
| 0 -> (* - *) "70" (* id for iics *)
| 1 -> (* a *) "" (* default *)
| 2 -> (* aa *) "3E"
......@@ -731,7 +741,7 @@ and matra_indic_unicode_point = fun
]
;
(* om 50 udatta 51 anudatta 52 grave 53 acute 54 avagraha 3D .ll 61
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F ° 70 *)
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F deg 70 *)
value inject_point s = "&#x09" ^ s ^ ";"
;
value deva_unicode c =
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI utilities *)
......@@ -24,63 +24,63 @@ value hexa_val conf =
;
value decode_url s =
let rec need_decode i =
if i < Bytes.length s then
if i < String.length s then
match s.[i] with
[ '%' | '+' -> True
| _ -> need_decode (succ i)
]
else False in
let rec compute_len i i1 =
if i < Bytes.length s then
if i < String.length s then
let i =
match s.[i] with
[ '%' when i + 2 < Bytes.length s -> i + 3
[ '%' when i + 2 < String.length s -> i + 3
| _ -> succ i
]
in
compute_len i (succ i1)
else i1 in
let rec copy_decode_in s1 i i1 =
if i < Bytes.length s then
if i < String.length s then
let i =
match s.[i] with
[ '%' when i + 2 < Bytes.length s ->
let v = hexa_val s.[i + 1] * 16 + hexa_val s.[i + 2]
in do {Bytes.set s1 i1 (Char.chr v); i + 3}
[ '%' when i + 2 < String.length s ->
let v = hexa_val s.[i+1] * 16 + hexa_val s.[i+2] in
do {Bytes.set s1 i1 (Char.chr v); i + 3}
| '+' -> do {Bytes.set s1 i1 ' '; succ i}
| x -> do {Bytes.set s1 i1 x; succ i}
] in
copy_decode_in s1 i (succ i1)
else s1 in
let rec strip_heading_and_trailing_spaces s =
if Bytes.length s > 0 then
if String.length s > 0 then
if s.[0] == ' ' then
strip_heading_and_trailing_spaces (Bytes.sub s 1 (Bytes.length s - 1))
else if s.[Bytes.length s - 1] == ' ' then
strip_heading_and_trailing_spaces (Bytes.sub s 0 (Bytes.length s - 1))
strip_heading_and_trailing_spaces (String.sub s 1 (String.length s - 1))
else if s.[String.length s - 1] == ' ' then
strip_heading_and_trailing_spaces (String.sub s 0 (String.length s - 1))
else s
else s in
if need_decode 0 then
let len = compute_len 0 0 in
let s1 = Bytes.create len in
strip_heading_and_trailing_spaces (copy_decode_in s1 0 0)
else s;
(* ça convertit une chaine venant de l'URL en une a-list; la chaine est
une suite de paires clé=valeur séparées par des ; ou des \& *)
strip_heading_and_trailing_spaces (Bytes.to_string (copy_decode_in s1 0 0))
else s
;
(* converts a string coming from the URL into an a-list; the string is
a sequence of pairs key=value separated by ; or \& *)
value create_env s =
let rec get_assoc beg i =
if i == Bytes.length s then
if i == beg then [] else [Bytes.sub s beg (i - beg)]
if i == String.length s then
if i == beg then [] else [String.sub s beg (i - beg)]
else if s.[i] == ';' || s.[i] == '&' then
let next_i = succ i in
[Bytes.sub s beg (i - beg) :: get_assoc next_i next_i]
[String.sub s beg (i - beg) :: get_assoc next_i next_i]
else get_assoc beg (succ i) in
let rec separate i s =
if i = Bytes.length s then (s, "")
if i = String.length s then (s, "")
else if s.[i] == '=' then
(Bytes.sub s 0 i, Bytes.sub s (succ i) (Bytes.length s - succ i))
(String.sub s 0 i, String.sub s (succ i) (String.length s - succ i))
else separate (succ i) s in
List.map (separate 0) (get_assoc 0 0)
;
......@@ -105,18 +105,13 @@ value url_encode s =
(* Unreserved characters *)
[ 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '.' | '_' | '~' as c ->
String.make 1 c
(* Special case of the space character *)
| ' ' -> "+"
(* Reserved characters *)
| c -> "%" ^ hexa_str c
]
in
] in
let char_of_string s =
if String.length s = 1 then s.[0] else failwith "char_of_string"
in
if String.length s = 1 then s.[0] else failwith "char_of_string" in
let subst s = s |> Str.matched_string |> char_of_string |> url_encode in
let any_char = Str.regexp ".\\|\n" in
Str.global_substitute any_char subst s
......@@ -128,8 +123,7 @@ value url ?query ?fragment path =
let opt_part prefix = fun
[ None -> ""
| Some part -> prefix ^ part
]
in
] in
let query_part = opt_part "?" query in
let fragment_part = opt_part "#" fragment in
path ^ query_part ^ fragment_part
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Checkpoints management *)
......@@ -16,11 +16,7 @@ value rec phase_encode = fun
"<{" ^ string_of_phase ph ^ "}{" ^
string_of_phase ph' ^ "}{" ^
Canon.decode prev ^ "}{" ^ Canon.decode form ^ "}>"
| Tad (ph,ph') form sfx ->
"(" ^ phase_encode ph ^ "{" ^
string_of_phase ph' ^ "}{" ^
Canon.decode form ^ "}{" ^ Canon.decode sfx ^ "})"
| phase -> "{" ^ string_of_phase phase ^ "}"
| phase -> "{" ^ string_of_phase phase ^ "}"
]
and bool_encode b = if b then "t" else "f"
;
......@@ -60,11 +56,7 @@ EXTEND Gram
; pre = TEXT; form = TEXT ; ">" ->
Comp (phase_of_string p, phase_of_string p')
(Encode.code_string pre) (Encode.code_string form)
| "("; p = phase; p' = TEXT (* Taddhita *)
; form = TEXT; sfx = TEXT; ")" ->
Tad (p, phase_of_string p')
(Encode.code_string form) (Encode.code_string sfx)
| p = TEXT -> phase_of_string p
| p = TEXT -> phase_of_string p
] ] ;
phase_rword:
[ [ s = phase; ","; o = TEXT -> (s, Encode.rev_code_string o) ] ] ;
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Conj_infos : sig i*)
......@@ -13,13 +13,14 @@
type vmorph =
[ Prim of int and bool and Word.word (* primary conjugation *)
(* gana pada form of present 3rd sg for checking *)
| Causa of Word.word (* causative 3rd sg conjugation *)
| Inten of Word.word (* intensive 3rd sg conjugation *)
| Desid of Word.word (* desiderative 3rd sg conjugation *)
(* gana pada form of present 3rd sg for checking *)
(* pada=True Paradmaipada pada=False AAtmanepada *)
| Causa of Word.word (* causative 3rd sg form *)
| Inten of Word.word (* intensive 3rd sg form *)
| Desid of Word.word (* desiderative 3rd sg form *)
]
;
type root_infos = (vmorph * bool) (* [True] means root admits preverb aa- *)
; (* NB should be (list vmorph * bool) for good factorisation *)
; (* NB could be (list vmorph * bool) for better factorisation *)
(*i end; i*)
......@@ -4,12 +4,13 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI-bin conjugation for computing root conjugations. *)
(* This CGI is triggered by page [grammar_page] in [dico_dir]. *)
(* Reads its input in shell variable [QUERY_STRING] URI-encoded. *)
(* Reads its grammatical information from [public_roots_infos_file] *)
(* Prints an html document of root conjugations on [stdout]. *)
(*i Test: (csh): setenv QUERY_STRING "q=i;c=2"; ./conjugation i*)
(*i Web: http://skt_server_url/cgi-bin/sktconjug?q=i;c=2 i*)
......@@ -341,6 +342,7 @@ value display_inflected_u font inf absya per abstva = do
; display_ind (absolutive_caption True font) font abstva
; display_ind (absolutive_caption False font) font (List.map prefix_dash absya)
where prefix_dash (c,w) = (c,[ 0 :: w ])
(* NB will display twice absol in -am *)
; display_ind (peripft_caption font) font per
; pl center_end
}
......@@ -420,8 +422,8 @@ value look_up_and_display font gana entry =
[ [] -> acc
| [ x :: rest ] -> match x with
[ Parts.Pppa_ con stem _ when con=conj ->
let sm = Parts.fix stem "at"
and sf = Parts.fix stem "atii" in do
let sm = Parts.fix stem "vat"
and sf = Parts.fix stem "vatii" in do
{ display_part font entry Pppa sm sf
; p acc rest
}
......@@ -589,10 +591,9 @@ value look_up_and_display font gana entry =
; pl table_end (* Mauve *)
; pl center_end
; pl html_paragraph (* Now display indeclinable root forms if any *)
; let (inf,_,_,_) = Deco.fold sort_out_u init_u indecls.val
; let (inf,_,_,abstvaa) = Deco.fold sort_out_u init_u abstvaa.val
and (_,absya,_,_) = Deco.fold sort_out_u init_u absya.val
and (_,_,per,_) = Deco.fold sort_out_u init_u peri.val
and (_,_,_,abstvaa) = Deco.fold sort_out_u init_u abstvaa.val in
and (_,_,per,_) = Deco.fold sort_out_u init_u peri.val in
if absya=[] && per=[] && abstvaa=[] then () else do
(* Display indeclinable forms *)
{ pl center_begin
......@@ -638,12 +639,15 @@ value look_up_and_display font gana entry =
; let infos = (* should be a call to a service that gives one [entry_infos] *)
(Gen.gobble public_roots_infos_file : Deco.deco root_infos) in
let entry_infos = Deco.assoc (Encode.code_string entry) infos in
if gana = 0 then secondary_conjugs entry_infos
else print_conjug Primary Parts.participles.val
if gana = 0 then secondary_conjugs entry_infos (* legacy *)
else do { print_conjug Primary Parts.participles.val
; secondary_conjugs entry_infos (* new *)
}
}
;
value in_lexicon entry = (* entry as a string in VH transliteration *)
Index.is_in_lexicon (Encode.code_string entry)
(* Problem: may give link to a non-root entry if called from Grammar service *)
and doubt s = "?" ^ s
;
(* Compute homonym index for a given present class. *)
......@@ -657,6 +661,7 @@ value resolve_homonym entry =
[ 1 -> match entry with
[ ".rc"
| "krudh"
| "kha~nj"
| "cit"
| "chad"
| "tyaj"
......@@ -754,17 +759,22 @@ value resolve_homonym entry =
| 4 -> match entry with
[ "k.sudh"
| "dam"
| "diiv"
| "d.rz"
| "druh"
| "dhii"
| "naz"
| "pad"
| "pu.s"
| "budh"
| "mad"
| "yudh"
| "zam"
| "saa"
| "sidh"
| "snih"
| "snuh" -> first entry
| "div" -> first "diiv" (* since MW spells div *)
| "as"
| "i.s"
| "tan"
......@@ -772,7 +782,6 @@ value resolve_homonym entry =
| "draa"
| "dhaa"
| "pat"
| "zam"
| "svid" -> second entry
| "vaa" -> third entry
| _ -> entry
......@@ -782,6 +791,7 @@ value resolve_homonym entry =
| "k.r"
| "dhuu"
| "v.r" -> first entry
| "p.r"
| "su"
| "hi" -> second entry
| _ -> entry
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Syntactico/semantic analysis and penalty computations. *)
......@@ -111,6 +111,7 @@ type regime =
| Quotative (* aahur - it is said *)
(*| Bitransitive - use of transitive with 2 accusatives *)
(*| Regime of (list case * list case) - specific regime - unused so far *)
(* Actually a root should have a valency list like [ 0; 1; 2 ] for "bhaa.s" *)
]
;
......@@ -129,7 +130,7 @@ value root_regime = fun
| "t.r.s#1" | "trap" | "tras" | "tvar" | "tsar" | "dak.s" | "dal" | "das"
| "dah#1" | "dih" | "diik.s" | "diip" | "du.s" | "d.rh" | "dev#1" | "dyut#1"
| "draa#1" | "draa#2" | "dhaav#1" | "dhru" | "dhvan" | "dhv.r" | "na.t"
| "nand" | "nard" | "naz#1" | "nah" | "nii#1" | "n.rt" | "pat#1" | "pi#2"
| "nand" | "nard" | "naz#1" | "nah" | "nii#1" | "n.rt" | "pat#1" | "pii"
| "puuy" | "p.r#2" | "pyaa" | "prath" | "phal" | "ba.mh" | "bal" | "bha.n.d"
| "bhand" | "bha.s" | "bhaa#1" | "bhaas#1" | "bhii#1" | "bhuj#1" | "bhuu#1"
| "bhra.mz" | "bhram" | "bhraaj" | "ma.mh" | "majj" | "mad#1" | "mud#1" | "muh"
......@@ -148,6 +149,8 @@ value root_regime = fun
| "v.r.s" -> Factitive
| "ah" -> Quotative
| _ -> (* sakarmaka in all usages *) Transitive
(* But "bhaa.s" is Transitive, even though he may be used with 0 or 2 objects *)
(* Thus a penalty should not occur if he has no object or 2 objects *)
]
;
(* But valency may depend on gana for the present system *)
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Module Control contains exceptions of global scope *)
......
......@@ -46,7 +46,7 @@ module Analysis : sig
type t
;
value make :
Analyzer.t -> Html.language -> string -> Num.num -> t
Analyzer.t -> Html.language -> string -> int (* Num.num *) -> t
;
value analyzer : t -> Analyzer.t
;
......@@ -54,14 +54,14 @@ module Analysis : sig
;
value checkpoints : t -> string
;
value nb_sols : t -> Num.num
value nb_sols : t -> int (* Num.num *)
;
end = struct
type t =
{ analyzer : Analyzer.t
; lang : Html.language
; checkpoints : string
; nb_sols : Num.num
; nb_sols : int (* Num.num *)
}
;
value make analyzer lang checkpoints nb_sols =
......@@ -82,6 +82,12 @@ module Encoding : sig
;
value to_string : t -> string
;
value of_string : string -> t
;
value encode : t -> string -> Word.word
;
value decode : t -> Word.word -> string
;
end = struct
type t = [ Velthuis | WX | KH | SLP1 | Devanagari | IAST ]
;
......@@ -94,6 +100,25 @@ end = struct
| IAST -> "roma"
]
;
value rec of_string = fun
[ "VH" -> Velthuis
| "WX" -> WX
| "KH" -> KH
| "SL" -> SLP1
| "deva" -> Devanagari
| "roma" -> IAST
| _ -> Velthuis
]
;
value encode encoding = encoding |> to_string |> Encode.switch_code
;
value decode = fun
[ Velthuis | WX | KH | SLP1 as encoding ->
encoding |> to_string |> Canon.switch_decode
| Devanagari -> Canon.unidevcode
| IAST -> Canon.uniromcode
]
;
end
;
(* What about metadata (date, author, history...) ? *)
......@@ -128,15 +153,7 @@ end = struct
value id s = s.id
;
value text encoding s =
let encode_word =
match encoding with
[ Encoding.Velthuis | Encoding.WX | Encoding.KH | Encoding.SLP1 ->
encoding |> Encoding.to_string |> Canon.switch_decode
| Encoding.Devanagari -> Canon.unidevcode
| Encoding.IAST -> Canon.uniromcode
]
in
s.text |> List.map encode_word |> String.concat " "
s.text |> List.map (Encoding.decode encoding) |> String.concat " "
;
value unsandhied s = s.unsandhied
;
......@@ -266,17 +283,25 @@ module Make (Loc : Location) : S = struct
| Manager -> "manager"
]
;
value permission_of_string = fun
[ "annotator" -> Annotator
| "manager" -> Manager
| _ -> Reader
value restrict_permission perm =
match Html.target with
[ Html.Server -> Reader
| Html.Simputer | Html.Computer | Html.Station -> perm
]
;
value permission_of_string s = s |> to_perm |> restrict_permission
where to_perm = fun
[ "annotator" -> Annotator
| "manager" -> Manager
| _ -> Reader
]
;
value url dir permission sentence =
let analysis = Sentence.analysis sentence in
let encoding = Encoding.of_string Paths.default_transliteration in
let env =
[ (Params.corpus_permission, string_of_permission permission)
; ("text", Sentence.text Encoding.Velthuis sentence)
; ("text", Sentence.text encoding sentence)
; ("cpts", Analysis.checkpoints analysis)
; (Params.corpus_dir, dir)
; (Params.sentence_no, sentence |> Sentence.id |> string_of_int)
......@@ -294,6 +319,7 @@ module Make (Loc : Location) : S = struct
let env =
[ (Params.corpus_permission, string_of_permission permission)
; ("text", Sentence.text Encoding.Velthuis sentence)
; ("t", Encoding.(to_string Velthuis))
; ("cpts", Analysis.checkpoints analysis)
; (Params.corpus_dir, dir)
; (Params.sentence_no, sentence |> Sentence.id |> string_of_int)
......
......@@ -28,7 +28,7 @@ end
module Analysis : sig
type t
;
value make : Analyzer.t -> Html.language -> string -> Num.num -> t
value make : Analyzer.t -> Html.language -> string -> int (* Num.num *) -> t
;
value analyzer : t -> Analyzer.t
;
......@@ -36,7 +36,7 @@ module Analysis : sig
;
value checkpoints : t -> string
;
value nb_sols : t -> Num.num
value nb_sols : t -> int (* Num.num *)
;
end
;
......@@ -45,6 +45,12 @@ module Encoding : sig
;
value to_string : t -> string
;
value of_string : string -> t
;
value encode : t -> string -> Word.word
;
value decode : t -> Word.word -> string
;
end
;
module Sentence : sig
......
......@@ -4,7 +4,7 @@
(* *)
(* Idir Lankri *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
open Html;
......@@ -18,9 +18,9 @@ open Web;
type gap = { start : int; stop : int }
;
(* The following functions assume that the given list is sorted in
increasing order and represents a subset of positive integers. In
particular, the lowest bound of a gap is at least [1] and the
greatest at most [max_int]). We call "group" a list of consecutive
increasing order and represents a subset of positive integers.
In particular, the lowest bound of a gap is at least [1] and the
greatest at most [max_int]). We call "group" a list of consecutive
integers. *)
value max_gap = { start = 1; stop = max_int }
......@@ -115,28 +115,23 @@ value sentence_links dir permission sentences =
match font with
[ Multilingual.Deva -> Corpus.Encoding.Devanagari
| Multilingual.Roma -> Corpus.Encoding.IAST
]
in
] in
let text = Corpus.Sentence.text encoding sentence in
let display =
match font with
[ Multilingual.Deva -> deva16_blue
| Multilingual.Roma -> span Trans16
]
in
] in
text
|> anchor_ref (sentence |> Web_corpus.url dir permission |> escape)
|> display
in
|> display in
List.map to_anchor_ref sentences
;
value section_selection dir sections =
let options =
let prefixes =
List.map (fun x -> Filename.concat dir x) sections
in
List.combine prefixes sections
in
List.map (fun x -> Filename.concat dir x) sections in
List.combine prefixes sections in
option_select_label Params.corpus_dir options
;
value add_sentence_form dir permission gap =
......@@ -230,8 +225,6 @@ value body dir permission =
|> pl
; close_page_with_margin ()
}
| Web_corpus.Sentences sentences ->
let groups = group_sentences dir sentences in
do
......@@ -243,7 +236,6 @@ value body dir permission =
groups |> List.map (htmlify_group dir permission) |> List.iter pl
; close_page_with_margin ()
}
| Web_corpus.Sections sections ->
do
{ center_begin |> pl
......@@ -259,7 +251,7 @@ value body dir permission =
value mk_page dir permission =
let title_str =
"Sanskrit Corpus " ^
(permission |> Web_corpus.string_of_permission |> String.capitalize)
(permission |> Web_corpus.string_of_permission |> String.capitalize_ascii)
in
let clickable_title =
let query =
......
......@@ -12,7 +12,6 @@
(* Generate the page displaying a view of the given corpus subdirectory.
The output channel is as always either [stdout] for CGI output or
a static HTML file (according to the "magic switch"
Web.output_channel). NB: No error handling is done by this
function. *)
[Web.output_channel]). NB: No error handling is done by this function. *)
value mk_page : string -> Web_corpus.permission -> unit
;
......@@ -4,7 +4,7 @@
(* *)
(* Idir Lankri *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI script [manager] for corpus management, i.e. for listing and
......@@ -12,16 +12,14 @@
value main =
let env = Cgi.create_env (Cgi.query_string ()) in
let corpdir = Cgi.decoded_get Params.corpus_dir "" env in
let permission =
Web_corpus.permission_of_string (Cgi.decoded_get Params.corpus_permission "" env)
in
let corpdir = Cgi.decoded_get Params.corpus_dir "" env
and corpperm = Cgi.decoded_get Params.corpus_permission "" env in
let permission = Web_corpus.permission_of_string corpperm in
let lang = Html.default_language in
try
Corpus_manager.mk_page corpdir permission
with
[ Sys_error msg -> Web.abort Html.default_language Control.sys_err_mess msg
| _ ->
Web.abort Html.default_language Control.fatal_err_mess
"Unexpected anomaly"
[ Sys_error msg -> Web.abort lang Control.sys_err_mess msg
| _ -> Web.abort lang Control.fatal_err_mess "Unexpected anomaly"
]
;
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Date utilities *)
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(****************************************************************)
......@@ -28,7 +28,7 @@ open Gen;
#load "version.cmo";
open Version;
#load "date.cmo";
#load "html.cmo";
(* #load "html.cmo"; *)
(* #load "web.cmo";
open Web;
#load "cgi.cmo"; *)
......@@ -38,6 +38,7 @@ open Canon;
open Phonetics;
open List;
#load "min_lexer.cmo";
#load "transduction.cmo";
open Transduction;
#load "encode.cmo";
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI-bin declension for computing declensions. *)
......@@ -63,26 +63,26 @@ value prlist_font font =
]
;
value display_title font = do
{ pl html_paragraph
; pl (table_begin (centered Mauve))
; ps tr_begin
; ps th_begin
; ps (dtitle font)
; ps th_end
; ps tr_end
; pl table_end (* Mauve *)
; pl html_paragraph
{ html_paragraph |> pl
; table_begin (centered Mauve) |> pl
; tr_begin |> ps
; th_begin |> ps
; dtitle font |> ps
; th_end |> ps
; tr_end |> ps
; table_end |> pl (* Mauve *)
; html_paragraph |> pl
}
and display_subtitle title = do
{ pl html_paragraph
; pl (table_begin (centered Deep_sky))
; ps tr_begin
; ps th_begin
; ps title
; ps th_end
; ps tr_end
; pl table_end (* Centered *)
; pl html_paragraph
{ html_paragraph |> pl
; table_begin (centered Deep_sky) |> pl
; tr_begin |> ps
; th_begin |> ps
; title |> ps
; th_end |> ps
; tr_end |> ps
; table_end |> pl (* Centered *)
; html_paragraph |> pl
}
;
value cases_of decls =
......@@ -100,32 +100,32 @@ value cases_of decls =
in List.fold_left reorg init decls (* (v,n,a,i,d,ab,g,l) *)
;
value print_ro1 caption s d p = do
{ ps tr_begin
; ps th_begin
; ps caption
; ps (xml_next "th")
; ps s
; ps (xml_next "th")
; ps d
; ps (xml_next "th")
; ps p
; ps th_end
; pl tr_end
{ tr_begin |> ps
; th_begin |> ps
; caption |> ps
; xml_next "th" |> ps
; s |> ps
; xml_next "th" |> ps
; d |> ps
; xml_next "th" |> ps
; p |> ps
; th_end |> ps
; tr_end |> ps
}
;
value print_row_font font case s d p =
let prlist = prlist_font font in do
{ ps (tr_mouse_begin (color Light_blue) (color Pale_yellow))
; ps th_begin
; ps case
; ps (xml_next "th")
; prlist s
; ps (xml_next "th")
; prlist d
; ps (xml_next "th")
; prlist p
; ps th_end
; pl tr_end
{ tr_mouse_begin (color Light_blue) (color Pale_yellow) |> ps
; th_begin |> ps
; case |> ps
; xml_next "th" |> ps
; s |> prlist
; xml_next "th" |> ps
; d |> prlist
; xml_next "th" |> ps
; p |> prlist
; th_end |> ps
; tr_end |> pl
}
;
value display_gender font gender = fun
......@@ -165,25 +165,25 @@ value display_gender font gender = fun
value display_iic font = fun
[ [] -> ()
| l -> do
{ pl html_paragraph
; ps (h3_begin C3)
; ps (compound_name font); ps " "
{ html_paragraph |> pl
; h3_begin C3 |> ps
; compound_name font |> ps; ps " "
; let print_iic w = pr_i font w in
List.iter print_iic l
; ps h3_end
; h3_end |> ps
}
]
;
value display_avy font = fun
[ [] -> ()
| l -> do
{ pl html_paragraph
; ps (h3_begin C3)
; ps (avyaya_name font); ps " "
{ html_paragraph |> pl
; h3_begin C3 |> ps
; avyaya_name font |> ps; ps " "
; let ifc_form w = [ 0 ] (* - *) @ w in
let print_iic w = pr_f font (ifc_form w) in
List.iter print_iic l
; ps h3_end
; h3_end |> ps
}
]
;
......@@ -197,7 +197,7 @@ value sort_out accu form = fun
| Neu -> (mas,fem,[ t :: neu ],any,iic,avy)
| Deictic _ -> (mas,fem,neu,[ t :: any ],iic,avy)
]
| Bare_stem | Auxi_form -> (mas,fem,neu,any,[ f :: iic ],avy)
| Bare_stem | Gati -> (mas,fem,neu,any,[ f :: iic ],avy)
| Avyayaf_form -> (mas,fem,neu,any,iic,[ f :: avy ])
| Ind_form _ | Verb_form _ _ _ | Ind_verb _ | Abs_root _
| Avyayai_form | Unanalysed | PV _
......@@ -214,15 +214,15 @@ value display_inflected font (gen_deco,pn_deco,voca_deco,iic_deco,avy_deco) =
let (mas,fem,neu,any,_,_) = Deco.fold sort_out non_vocas voca_deco
and iic = List.map fst (Deco.contents iic_deco)
and avy = List.map fst (Deco.contents avy_deco) in do
{ pl center_begin
{ center_begin |> pl
; display_gender font Mas mas
; display_gender font Fem fem
; display_gender font Neu neu
; display_gender font (Deictic Numeral) any (* arbitrary *)
; display_iic font iic
; display_avy font avy
; pl center_end
; pl html_paragraph
; center_end |> pl
; html_paragraph |> pl
}
;
(* [entry:skt] [part:string] *)
......
This diff is collapsed.
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Dispatcher: Sanskrit Engine in 55 phases automaton (plus 2 fake ones) *)
......@@ -53,7 +53,9 @@ value trim_tags :
;
value validate : output -> output (* consistency check and glueing *)
;
value color_of_phase : phase -> Html.color;
value terminal_sa : output -> bool
;
value color_of_phase : phase -> Html.color
;
end;
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Encode = struct i*)
......@@ -18,15 +18,20 @@ exception In_error of string (* Error in user or corpus input *)
value is_vowel c = vowel c || c>100