Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Gérard Huet
Heritage_Platform
Commits
a80f53ed
Commit
a80f53ed
authored
Jun 04, 2020
by
Gérard Huet
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Devanaagarii printing driven by config for Interface/Reader
parent
912b875a
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
22 changed files
with
207 additions
and
159 deletions
+207
-159
ML/conjugation.ml
ML/conjugation.ml
+6
-5
ML/declension.ml
ML/declension.ml
+28
-24
ML/encode.ml
ML/encode.ml
+7
-7
ML/html.ml
ML/html.ml
+1
-1
ML/indexer.ml
ML/indexer.ml
+7
-5
ML/indexerd.ml
ML/indexerd.ml
+5
-3
ML/interface.ml
ML/interface.ml
+8
-7
ML/make_xml_data.ml
ML/make_xml_data.ml
+3
-3
ML/mk_reader_page.ml
ML/mk_reader_page.ml
+5
-5
ML/morpho.ml
ML/morpho.ml
+27
-29
ML/morpho_html.ml
ML/morpho_html.ml
+47
-30
ML/morphology.mli
ML/morphology.mli
+1
-1
ML/parser.ml
ML/parser.ml
+3
-1
ML/phonetics.ml
ML/phonetics.ml
+35
-21
ML/reader.ml
ML/reader.ml
+4
-2
ML/sanskrit.ml
ML/sanskrit.ml
+4
-4
ML/sanskrit.mli
ML/sanskrit.mli
+3
-3
ML/transduction.ml
ML/transduction.ml
+2
-2
ML/verbs.ml
ML/verbs.ml
+6
-1
ML/version.ml
ML/version.ml
+1
-1
ML/web.ml
ML/web.ml
+3
-3
SETUP/version.txt
SETUP/version.txt
+1
-1
No files found.
ML/conjugation.ml
View file @
a80f53ed
...
@@ -18,11 +18,11 @@
...
@@ -18,11 +18,11 @@
(*i executable module Conjugation = struct i*)
(*i executable module Conjugation = struct i*)
open
Skt_morph
;
open
Skt_morph
;
open
Morphology
;
(* inflected
[
Verb_form] etc. *)
open
Morphology
;
(*
[
inflected Verb_form] etc. *)
open
Conj_infos
;
(* [vmorph Causa Inten Desid root_infos] *)
open
Conj_infos
;
(* [vmorph Causa Inten Desid root_infos] *)
open
Inflected
;
(* roots.val indecls.val etc. *)
open
Inflected
;
(*
[
roots.val indecls.val
]
etc. *)
open
Html
;
open
Html
;
open
Web
;
(* ps pl font Deva Roma pr_font etc. *)
open
Web
;
(*
[
ps pl font Deva Roma pr_font
]
etc. *)
open
Cgi
;
open
Cgi
;
open
Multilingual
;
(* [gentense tense_name captions] *)
open
Multilingual
;
(* [gentense tense_name captions] *)
...
@@ -843,7 +843,8 @@ value conjs_engine () = do
...
@@ -843,7 +843,8 @@ value conjs_engine () = do
try
try
let
url_encoded_entry
=
List
.
assoc
"q"
env
let
url_encoded_entry
=
List
.
assoc
"q"
env
and
url_encoded_class
=
List
.
assoc
"c"
env
and
url_encoded_class
=
List
.
assoc
"c"
env
and
font
=
font_of_string
(
get
"font"
env
Paths
.
default_display_font
)
and
font
=
let
s
=
get
"font"
env
Paths
.
default_display_font
in
font_of_string
s
(* deva vs roma print *)
(* OBS and stamp = get "v" env "" *)
(* OBS and stamp = get "v" env "" *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
...
@@ -872,7 +873,7 @@ value conjs_engine () = do
...
@@ -872,7 +873,7 @@ value conjs_engine () = do
let
entry
=
resolve_homonym
entry_VH
gana
in
(* VH string with homo *)
let
entry
=
resolve_homonym
entry_VH
gana
in
(* VH string with homo *)
let
known
=
in_lexicon
entry
(* in lexicon? *)
let
known
=
in_lexicon
entry
(* in lexicon? *)
(* we should check it is indeed a root or denominative *)
in
do
(* we should check it is indeed a root or denominative *)
in
do
{
let
link
=
if
known
then
Morpho_html
.
skt_anchor
False
font
entry
{
let
link
=
if
known
then
Morpho_html
.
skt_anchor
False
entry
else
doubt
(
Morpho_html
.
skt_roma
entry
)
in
else
doubt
(
Morpho_html
.
skt_roma
entry
)
in
let
subtitle
=
hyperlink_title
font
link
in
let
subtitle
=
hyperlink_title
font
link
in
display_subtitle
(
h1_center
subtitle
)
display_subtitle
(
h1_center
subtitle
)
...
...
ML/declension.ml
View file @
a80f53ed
...
@@ -19,8 +19,8 @@
...
@@ -19,8 +19,8 @@
open
Skt_morph
;
open
Skt_morph
;
open
Morphology
;
(* [Noun_form] etc. *)
open
Morphology
;
(* [Noun_form] etc. *)
open
Html
;
(* [narrow_screen html_red] etc. *)
open
Html
;
(* [narrow_screen html_red] etc. *)
open
Web
;
(* ps pl font Deva Roma pr_font etc. *)
open
Web
;
(*
[
ps pl font Deva Roma pr_font
]
etc. *)
open
Cgi
;
(* [create_env] etc.
*)
open
Cgi
;
(* [create_env] etc. *)
open
Multilingual
;
(* [declension_title compound_name avyaya_name] *)
open
Multilingual
;
(* [declension_title compound_name avyaya_name] *)
value
dtitle
font
=
h1_title
(
declension_title
narrow_screen
font
)
value
dtitle
font
=
h1_title
(
declension_title
narrow_screen
font
)
...
@@ -46,7 +46,7 @@ value display_subtitle title = do
...
@@ -46,7 +46,7 @@ value display_subtitle title = do
;
title
|>
ps
;
title
|>
ps
;
th_end
|>
ps
;
th_end
|>
ps
;
tr_end
|>
ps
;
tr_end
|>
ps
;
table_end
|>
pl
(*
C
entered *)
;
table_end
|>
pl
(*
c
entered *)
;
html_paragraph
|>
pl
;
html_paragraph
|>
pl
}
}
;
;
...
@@ -132,19 +132,19 @@ value display_iic font = fun
...
@@ -132,19 +132,19 @@ value display_iic font = fun
|
l
->
do
|
l
->
do
{
html_paragraph
|>
pl
{
html_paragraph
|>
pl
;
h3_begin
C3
|>
ps
;
h3_begin
C3
|>
ps
;
compound_name
font
|>
ps
;
ps
"
"
;
compound_name
font
|>
ps
;
"
"
|>
ps
;
let
print_iic
w
=
pr_i
font
w
in
;
let
print_iic
w
=
pr_i
font
w
in
List
.
iter
print_iic
l
List
.
iter
print_iic
l
;
h3_end
|>
ps
;
h3_end
|>
ps
}
}
]
]
;
;
value
display_avy
font
=
fun
value
display_avy
a
font
=
fun
[
[]
->
()
[
[]
->
()
|
l
->
do
|
l
->
do
{
html_paragraph
|>
pl
{
html_paragraph
|>
pl
;
h3_begin
C3
|>
ps
;
h3_begin
C3
|>
ps
;
avyaya_name
font
|>
ps
;
ps
"
"
;
avyaya_name
font
|>
ps
;
"
"
|>
ps
;
let
ifc_form
w
=
[
0
]
(* - *)
@
w
in
;
let
ifc_form
w
=
[
0
]
(* - *)
@
w
in
let
print_iic
w
=
pr_font
font
(
ifc_form
w
)
in
let
print_iic
w
=
pr_font
font
(
ifc_form
w
)
in
List
.
iter
print_iic
l
List
.
iter
print_iic
l
...
@@ -154,16 +154,16 @@ value display_avy font = fun
...
@@ -154,16 +154,16 @@ value display_avy font = fun
;
;
value
sort_out
accu
form
=
fun
value
sort_out
accu
form
=
fun
[
[
(
_
,
morphs
)
]
->
List
.
fold_left
(
reorg
form
)
accu
morphs
[
[
(
_
,
morphs
)
]
->
List
.
fold_left
(
reorg
form
)
accu
morphs
where
reorg
f
(
mas
,
fem
,
neu
,
any
,
iic
,
avy
)
=
fun
where
reorg
f
(
mas
,
fem
,
neu
,
any
,
iic
,
avy
a
)
=
fun
[
Noun_form
g
n
c
->
let
t
=
(
n
,
c
,
f
)
in
[
Noun_form
g
n
c
->
let
t
=
(
n
,
c
,
f
)
in
match
g
with
match
g
with
[
Mas
->
([
t
::
mas
]
,
fem
,
neu
,
any
,
iic
,
avy
)
[
Mas
->
([
t
::
mas
]
,
fem
,
neu
,
any
,
iic
,
avy
a
)
|
Fem
->
(
mas
,
[
t
::
fem
]
,
neu
,
any
,
iic
,
avy
)
|
Fem
->
(
mas
,
[
t
::
fem
]
,
neu
,
any
,
iic
,
avy
a
)
|
Neu
->
(
mas
,
fem
,
[
t
::
neu
]
,
any
,
iic
,
avy
)
|
Neu
->
(
mas
,
fem
,
[
t
::
neu
]
,
any
,
iic
,
avy
a
)
|
Deictic
_
->
(
mas
,
fem
,
neu
,
[
t
::
any
]
,
iic
,
avy
)
|
Deictic
_
->
(
mas
,
fem
,
neu
,
[
t
::
any
]
,
iic
,
avy
a
)
]
]
|
Bare_stem
|
Gati
->
(
mas
,
fem
,
neu
,
any
,
[
f
::
iic
]
,
avy
)
|
Bare_stem
|
Gati
->
(
mas
,
fem
,
neu
,
any
,
[
f
::
iic
]
,
avy
a
)
|
Avyayaf_form
->
(
mas
,
fem
,
neu
,
any
,
iic
,
[
f
::
avy
])
|
Avyayaf_form
->
(
mas
,
fem
,
neu
,
any
,
iic
,
[
f
::
avy
a
])
|
Ind_form
_
|
Verb_form
_
_
_
|
Ind_verb
_
|
Abs_root
_
|
Ind_form
_
|
Verb_form
_
_
_
|
Ind_verb
_
|
Abs_root
_
|
Avyayai_form
|
Unanalysed
|
PV
_
|
Avyayai_form
|
Unanalysed
|
PV
_
|
Part_form
_
_
_
_
->
|
Part_form
_
_
_
_
->
...
@@ -173,19 +173,19 @@ value sort_out accu form = fun
...
@@ -173,19 +173,19 @@ value sort_out accu form = fun
]
]
and
init
=
([]
,
[]
,
[]
,
[]
,
[]
,
[]
)
and
init
=
([]
,
[]
,
[]
,
[]
,
[]
,
[]
)
;
;
value
display_inflected
font
(
gen_deco
,
pn_deco
,
voca_deco
,
iic_deco
,
avy_deco
)
=
value
display_inflected
font
(
gen_deco
,
pn_deco
,
voca_deco
,
iic_deco
,
avy
a
_deco
)
=
let
nouns
=
Deco
.
fold
sort_out
init
gen_deco
in
let
nouns
=
Deco
.
fold
sort_out
init
gen_deco
in
let
non_vocas
=
Deco
.
fold
sort_out
nouns
pn_deco
in
let
non_vocas
=
Deco
.
fold
sort_out
nouns
pn_deco
in
let
(
mas
,
fem
,
neu
,
any
,_,_
)
=
Deco
.
fold
sort_out
non_vocas
voca_deco
let
(
mas
,
fem
,
neu
,
any
,_,_
)
=
Deco
.
fold
sort_out
non_vocas
voca_deco
and
iic
=
List
.
map
fst
(
Deco
.
contents
iic_deco
)
and
iic
=
List
.
map
fst
(
Deco
.
contents
iic_deco
)
and
avy
=
List
.
map
fst
(
Deco
.
contents
avy_deco
)
in
do
and
avy
a
=
List
.
map
fst
(
Deco
.
contents
avy
a
_deco
)
in
do
{
center_begin
|>
pl
{
center_begin
|>
pl
;
display_gender
font
Mas
mas
;
display_gender
font
Mas
mas
;
display_gender
font
Fem
fem
;
display_gender
font
Fem
fem
;
display_gender
font
Neu
neu
;
display_gender
font
Neu
neu
;
display_gender
font
(
Deictic
Numeral
)
any
(* arbitrary *)
;
display_gender
font
(
Deictic
Numeral
)
any
(* arbitrary *)
;
display_iic
font
iic
;
display_iic
font
iic
;
display_avy
font
avy
;
display_avy
a
font
avy
a
;
center_end
|>
pl
;
center_end
|>
pl
;
html_paragraph
|>
pl
;
html_paragraph
|>
pl
}
}
...
@@ -241,7 +241,8 @@ value decls_engine () = do
...
@@ -241,7 +241,8 @@ value decls_engine () = do
and
url_encoded_participle
=
get
"p"
env
""
and
url_encoded_participle
=
get
"p"
env
""
and
url_encoded_source
=
get
"r"
env
""
and
url_encoded_source
=
get
"r"
env
""
(* optional root origin - used by participles in conjugation tables *)
(* optional root origin - used by participles in conjugation tables *)
and
font
=
font_of_string
(
get
"font"
env
Paths
.
default_display_font
)
and
font
=
let
s
=
get
"font"
env
Paths
.
default_display_font
in
font_of_string
s
(* deva vs roma print *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
let
entry_tr
=
decode_url
url_encoded_entry
(* : string in translit *)
let
entry_tr
=
decode_url
url_encoded_entry
(* : string in translit *)
...
@@ -257,14 +258,17 @@ value decls_engine () = do
...
@@ -257,14 +258,17 @@ value decls_engine () = do
(* will be avoided by unique name lookup *)
(* will be avoided by unique name lookup *)
let
entry
=
resolve_homonym
entry_VH
in
(* compute homonymy index *)
let
entry
=
resolve_homonym
entry_VH
in
(* compute homonymy index *)
let
link
=
let
link
=
if
in_lexicon
entry
then
Morpho_html
.
skt_anchor
False
font
entry
if
in_lexicon
entry
then
Morpho_html
.
skt_anchor
False
entry
(* We should check it is indeed a substantive entry
(* We should check it is indeed a substantive entry
and that Any is used for deictics/numbers (TODO) *)
and that Any is used for deictics/numbers (TODO) *)
else
let
root
=
if
source
=
""
then
"?"
(* unknown in lexicon *)
(* Also it should use unique naming for possible homo index *)
else
" from "
^
else
Morpho_html
.
skt_html_font
font
entry
|>
italics
in
if
in_lexicon
source
then
Morpho_html
.
skt_anchor
False
font
source
(* OBSOLETE indication of root for kridanta
else
doubt
(
Morpho_html
.
skt_roma
source
)
in
[let root = if source = "" then "?" (* unknown in lexicon *)
Morpho_html
.
skt_roma
entry
^
root
in
else " from " ^ (* should test font *) in
if in_lexicon source then Morpho_html.skt_anchor False font source
else doubt (Morpho_html.skt_roma source) in (* should test font *)
Morpho_html.skt_utf font entry ^ root in] *)
let
subtitle
=
hyperlink_title
font
link
in
do
let
subtitle
=
hyperlink_title
font
link
in
do
{
display_subtitle
(
h1_center
subtitle
)
{
display_subtitle
(
h1_center
subtitle
)
;
try
look_up
font
entry
(
Nouns
.
Gender
gender
)
part
;
try
look_up
font
entry
(
Nouns
.
Gender
gender
)
part
...
@@ -273,7 +277,7 @@ value decls_engine () = do
...
@@ -273,7 +277,7 @@ value decls_engine () = do
;
page_end
lang
True
;
page_end
lang
True
}
}
with
[
Stream
.
Error
_
->
with
[
Stream
.
Error
_
->
abort
lang
(
"Illegal "
^
translit
^
"
transliteration
"
)
entry_tr
]
abort
lang
(
"Illegal "
^
translit
^
"
input
"
)
entry_tr
]
}
}
;
;
value
safe_engine
()
=
value
safe_engine
()
=
...
...
ML/encode.ml
View file @
a80f53ed
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
(* *)
(* *)
(* Gérard Huet *)
(* Gérard Huet *)
(* *)
(* *)
(* ©20
19
Institut National de Recherche en Informatique et en Automatique *)
(* ©20
20
Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(**************************************************************************)
(*i module Encode = struct i*)
(*i module Encode = struct i*)
...
@@ -18,7 +18,7 @@ exception In_error of string (* Error in user or corpus input *)
...
@@ -18,7 +18,7 @@ exception In_error of string (* Error in user or corpus input *)
value
is_vowel
c
=
vowel
c
||
c
>
100
&&
c
<
114
(* accounts for upper case *)
value
is_vowel
c
=
vowel
c
||
c
>
100
&&
c
<
114
(* accounts for upper case *)
;
;
(* anusvara substituted by nasal or normalized to 14 when original *)
(* anusvara substituted by nasal or normalized to 14 when original *)
(* anunaasika
before
vowels treated as anusvaara *)
(* anunaasika
after
vowels treated as anusvaara *)
value
rec
normalize
=
normal_rec
False
value
rec
normalize
=
normal_rec
False
where
rec
normal_rec
after_vow
=
fun
where
rec
normal_rec
after_vow
=
fun
[
[]
->
[]
[
[]
->
[]
...
@@ -50,8 +50,8 @@ value code_string str = normalize (code_raw str) (* standard VH *)
...
@@ -50,8 +50,8 @@ value code_string str = normalize (code_raw str) (* standard VH *)
and
code_string_WX
str
=
normalize
(
code_raw_WX
str
)
and
code_string_WX
str
=
normalize
(
code_raw_WX
str
)
and
code_string_KH
str
=
normalize
(
code_raw_KH
str
)
and
code_string_KH
str
=
normalize
(
code_raw_KH
str
)
and
code_string_SL
str
=
normalize
(
code_raw_SL
str
)
and
code_string_SL
str
=
normalize
(
code_raw_SL
str
)
and
code_skt_ref
str
=
normalize
(
code_rawu
str
)
and
code_skt_ref
str
=
normalize
(
code_rawu
str
)
(* with upper letters *)
and
code_skt_ref_d
str
=
normalize
(
code_rawd
str
)
and
code_skt_ref_d
str
=
normalize
(
code_rawd
str
)
(* no diacritics *)
;
;
(* Switching code function according to transliteration convention *)
(* Switching code function according to transliteration convention *)
value
switch_code
=
fun
(* normalizes anusvaara in its input *)
value
switch_code
=
fun
(* normalizes anusvaara in its input *)
...
@@ -113,11 +113,11 @@ value code_strip_raw s = rev_strip (code_raw s)
...
@@ -113,11 +113,11 @@ value code_strip_raw s = rev_strip (code_raw s)
(* A cleaner solution would be to have type lexeme = (word * int)
(* A cleaner solution would be to have type lexeme = (word * int)
and "x#5" represented as (x,5) (0 if no homophone) *)
and "x#5" represented as (x,5) (0 if no homophone) *)
;
;
value
skt_to_deva
str
=
try
Canon
.
unidevcode
(
code_string
str
)
with
value
skt_to_deva
str
=
try
Canon
.
unidevcode
(
code_string
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
[
Failure
_
->
raise
(
In_error
str
)
]
and
skt_raw_to_deva
str
=
try
Canon
.
unidevcode
(
code_raw
str
)
with
and
skt_raw_to_deva
str
=
try
Canon
.
unidevcode
(
code_raw
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
[
Failure
_
->
raise
(
In_error
str
)
]
and
skt_
raw_
strip_to_deva
str
=
try
Canon
.
unidevcode
(
code_strip_raw
str
)
with
and
skt_strip_to_deva
str
=
try
Canon
.
unidevcode
(
code_strip_raw
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
[
Failure
_
->
raise
(
In_error
str
)
]
;
;
(* Following not needed since [Transduction.skt_to_html] is more direct
(* Following not needed since [Transduction.skt_to_html] is more direct
...
...
ML/html.ml
View file @
a80f53ed
...
@@ -220,7 +220,7 @@ value rgb = fun (* a few selected HTML colors in rgb data *)
...
@@ -220,7 +220,7 @@ value rgb = fun (* a few selected HTML colors in rgb data *)
|
Blue
->
"#0000FF"
(* Canard = "#0000C0" ou "#0080FF" *)
|
Blue
->
"#0000FF"
(* Canard = "#0000C0" ou "#0080FF" *)
|
Green
->
"#008000"
(* Teal = "#008080" Olive = "#808000" *)
|
Green
->
"#008000"
(* Teal = "#008080" Olive = "#808000" *)
|
Aquamarine
->
"#6FFFC3"
(* actually Light Aquamarine *)
|
Aquamarine
->
"#6FFFC3"
(* actually Light Aquamarine *)
|
Lawngreen
->
"#7CFC00"
|
Lawngreen
->
"#66ff99"
(* was
"#7CFC00"
*)
|
Yellow
->
"#FFFF00"
|
Yellow
->
"#FFFF00"
|
Orange
->
"#FFA000"
|
Orange
->
"#FFA000"
|
Cyan
->
"#00FFFF"
(* Aqua = Cyan, Turquoise = "#40E0D0" *)
|
Cyan
->
"#00FFFF"
(* Aqua = Cyan, Turquoise = "#40E0D0" *)
...
...
ML/indexer.ml
View file @
a80f53ed
...
@@ -32,8 +32,8 @@ value answer_end () = do
...
@@ -32,8 +32,8 @@ value answer_end () = do
;
pl
html_paragraph
;
pl
html_paragraph
}
}
;
;
value
ok
(
mess
,
s
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor
_R
False
s
)
}
value
ok
(
mess
,
s
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor
False
s
)
}
and
ok2
(
mess
,
s1
,
s2
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor_R
2
s1
s2
)
}
and
ok2
(
mess
,
s1
,
s2
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor_R
s1
s2
)
}
(* ok2 prints the entry under the spelling given by the user, i.e. without
(* ok2 prints the entry under the spelling given by the user, i.e. without
normalisation, thus e.g. sandhi is not written sa.mdhi, and possibly
normalisation, thus e.g. sandhi is not written sa.mdhi, and possibly
suffixed by homonymy index 1, e.g. b.rh. *)
suffixed by homonymy index 1, e.g. b.rh. *)
...
@@ -59,7 +59,7 @@ and report_failure s = do
...
@@ -59,7 +59,7 @@ and report_failure s = do
{
ps
" not found in dictionary"
{
ps
" not found in dictionary"
;
pl
html_break
;
pl
html_break
;
ps
"Closest entry in lexical order: "
;
ps
"Closest entry in lexical order: "
;
ps
(
Morpho_html
.
skt_anchor
_R
False
s
)
;
ps
(
Morpho_html
.
skt_anchor
False
s
)
;
pl
html_break
;
pl
html_break
}
}
;
;
...
@@ -101,6 +101,8 @@ value print_word word (entry,lex,page) = match lex with
...
@@ -101,6 +101,8 @@ value print_word word (entry,lex,page) = match lex with
value
read_mw_index
()
=
value
read_mw_index
()
=
(
Gen
.
gobble
Data
.
public_mw_index_file
:
Deco
.
deco
(
string
*
string
*
string
))
(
Gen
.
gobble
Data
.
public_mw_index_file
:
Deco
.
deco
(
string
*
string
*
string
))
;
;
value
skt_red
s
=
html_red
(
Morpho_html
.
skt_roma
s
)
;
value
index_engine
()
=
do
value
index_engine
()
=
do
{
pl
http_header
{
pl
http_header
;
page_begin
heritage_dictionary_title
;
page_begin
heritage_dictionary_title
...
@@ -124,7 +126,7 @@ value index_engine () = do
...
@@ -124,7 +126,7 @@ value index_engine () = do
let
mw_index
=
read_mw_index
()
in
let
mw_index
=
read_mw_index
()
in
let
words
=
Deco
.
assoc
word
mw_index
in
let
words
=
Deco
.
assoc
word
mw_index
in
match
words
with
match
words
with
[
[]
->
do
{
ps
(
Morpho_html
.
skt_red
str_VH
)
[
[]
->
do
{
ps
(
skt_red
str_VH
)
;
ps
" not found in MW dictionary"
;
ps
" not found in MW dictionary"
;
pl
html_break
;
pl
html_break
}
}
...
@@ -143,7 +145,7 @@ value index_engine () = do
...
@@ -143,7 +145,7 @@ value index_engine () = do
(* even though str may exist as inflected form *)
(* even though str may exist as inflected form *)
with
(* Matching entry not found - we try declensions *)
with
(* Matching entry not found - we try declensions *)
[
Index
.
Last
last
->
do
[
Index
.
Last
last
->
do
{
ps
(
Morpho_html
.
skt_red
str_VH
)
{
ps
(
skt_red
str_VH
)
;
try_declensions
word
last
;
try_declensions
word
last
}
}
]
]
...
...
ML/indexerd.ml
View file @
a80f53ed
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
(* *)
(* *)
(* Gérard Huet *)
(* Gérard Huet *)
(* *)
(* *)
(* ©20
19
Institut National de Recherche en Informatique et en Automatique *)
(* ©20
20
Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(**************************************************************************)
(* CGI-bin indexerd for indexing in sanskrit dico without diacritics. *)
(* CGI-bin indexerd for indexing in sanskrit dico without diacritics. *)
...
@@ -47,13 +47,15 @@ value postlude () = do
...
@@ -47,13 +47,15 @@ value postlude () = do
;
page_end
Html
.
French
True
;
page_end
Html
.
French
True
}
}
;
;
value
print_word
c
=
pl
(
Morpho_html
.
skt_anchor
_R
False
(
Canon
.
decode_ref
c
))
value
print_word
c
=
pl
(
Morpho_html
.
skt_anchor
False
(
Canon
.
decode_ref
c
))
;
;
(* Each dummy is mapped to a list of words - all the words which
(* Each dummy is mapped to a list of words - all the words which
give back the dummy by normalisation such as removing diacritics *)
give back the dummy by normalisation such as removing diacritics *)
value
read_dummies
()
=
value
read_dummies
()
=
(
Gen
.
gobble
Data
.
public_dummies_file
:
Deco
.
deco
Word
.
word
)
(
Gen
.
gobble
Data
.
public_dummies_file
:
Deco
.
deco
Word
.
word
)
;
;
value
skt_red
s
=
html_red
(
Morpho_html
.
skt_roma
s
)
;
value
index_engine
()
=
value
index_engine
()
=
let
abor
=
abort
Html
.
French
(* may not preserve the current lang *)
in
let
abor
=
abort
Html
.
French
(* may not preserve the current lang *)
in
try
let
dummies_deco
=
read_dummies
()
in
do
try
let
dummies_deco
=
read_dummies
()
in
do
...
@@ -69,7 +71,7 @@ value index_engine () =
...
@@ -69,7 +71,7 @@ value index_engine () =
;
ps
(
div_begin
Latin12
)
;
ps
(
div_begin
Latin12
)
;
let
words
=
Deco
.
assoc
dummy
dummies_deco
in
;
let
words
=
Deco
.
assoc
dummy
dummies_deco
in
match
words
with
match
words
with
[
[]
->
do
{
ps
(
Morpho_html
.
skt_red
str
)
[
[]
->
do
{
ps
(
skt_red
str
)
;
ps
" not found in Heritage dictionary"
;
ps
" not found in Heritage dictionary"
;
ps
html_break
;
pl
html_break
;
ps
html_break
;
pl
html_break
}
}
...
...
ML/interface.ml
View file @
a80f53ed
...
@@ -518,9 +518,10 @@ value graph_engine () = do
...
@@ -518,9 +518,10 @@ value graph_engine () = do
and
us
=
get
"us"
env
"f"
(* sandhied text default *)
and
us
=
get
"us"
env
"f"
(* sandhied text default *)
and
translit
=
get
"t"
env
Paths
.
default_transliteration
(* translit input *)
and
translit
=
get
"t"
env
Paths
.
default_transliteration
(* translit input *)
and
lex
=
get
"lex"
env
Paths
.
default_lexicon
(* lexicon choice *)
and
lex
=
get
"lex"
env
Paths
.
default_lexicon
(* lexicon choice *)
and
font
=
get
"font"
env
Paths
.
default_display_font
(* deva vs roma print *)
and
font
=
get
"font"
env
Paths
.
default_display_font
in
let
ft
=
font_of_string
font
(* Deva vs Roma print *)
and
cache
=
get
"cache"
env
"f"
(* no cache default *)
in
and
cache
=
get
"cache"
env
"f"
(* no cache default *)
in
let
()
=
sanskrit_
display
.
val
:=
f
on
t
let
()
=
sanskrit_
font
.
val
:=
ft
and
()
=
cache_active
.
val
:=
cache
and
()
=
cache_active
.
val
:=
cache
and
abs
=
get
"abs"
env
"f"
(* default local paths *)
in
and
abs
=
get
"abs"
env
"f"
(* default local paths *)
in
let
lang
=
language_of
lex
(* language default *)
let
lang
=
language_of
lex
(* language default *)
...
@@ -537,13 +538,13 @@ value graph_engine () = do
...
@@ -537,13 +538,13 @@ value graph_engine () = do
and
link_num
=
get
"linkNumber"
env
"0"
(* is there a better default? *)
and
link_num
=
get
"linkNumber"
env
"0"
(* is there a better default? *)
and
sol_num
=
get
"allSol"
env
"0"
in
(* Needed for Validate mode *)
and
sol_num
=
get
"allSol"
env
"0"
in
(* Needed for Validate mode *)
let
url_enc_corpus_permission
=
(* Corpus mode *)
let
url_enc_corpus_permission
=
(* Corpus mode *)
get
Params
.
corpus_permission
env
"true"
in
get
Params
.
corpus_permission
env
"true"
in
let
corpus_permission
=
let
corpus_permission
=
url_enc_corpus_permission
url_enc_corpus_permission
|>
decode_url
|>
decode_url
|>
Web_corpus
.
permission_of_string
in
|>
Web_corpus
.
permission_of_string
in
let
corpus_dir
=
get
Params
.
corpus_dir
env
""
in
let
corpus_dir
=
get
Params
.
corpus_dir
env
""
let
sentence_no
=
get
Params
.
sentence_no
env
""
in
and
sentence_no
=
get
Params
.
sentence_no
env
""
in
let
text
=
arguments
translit
lex
font
cache
st
us
cp
url_encoded_input
let
text
=
arguments
translit
lex
font
cache
st
us
cp
url_encoded_input
url_encoded_topic
abs
sol_num
corpus
sent_id
link_num
url_encoded_topic
abs
sol_num
corpus
sent_id
link_num
url_enc_corpus_permission
corpus_dir
sentence_no
url_enc_corpus_permission
corpus_dir
sentence_no
...
@@ -568,7 +569,7 @@ value graph_engine () = do
...
@@ -568,7 +569,7 @@ value graph_engine () = do
let
revised
=
decode_url
(
get
"revised"
env
""
)
(* User-aid revision *)
let
revised
=
decode_url
(
get
"revised"
env
""
)
(* User-aid revision *)
and
rev_off
=
int_of_string
(
get
"rev_off"
env
"-1"
)
and
rev_off
=
int_of_string
(
get
"rev_off"
env
"-1"
)
and
rev_ind
=
int_of_string
(
get
"rev_ind"
env
"-1"
)
in
and
rev_ind
=
int_of_string
(
get
"rev_ind"
env
"-1"
)
in
try
do
try
do
{
match
(
revised
,
rev_off
,
rev_ind
)
with
{
match
(
revised
,
rev_off
,
rev_ind
)
with
[
(
""
,-
1
,-
1
)
->
(* Standard input processing *** Main call *** *)
[
(
""
,-
1
,-
1
)
->
(* Standard input processing *** Main call *** *)
check_sentence
translit
uns
text
checkpoints
input
sol_num
check_sentence
translit
uns
text
checkpoints
input
sol_num
...
@@ -624,7 +625,7 @@ value graph_engine () = do
...
@@ -624,7 +625,7 @@ value graph_engine () = do
else
()
else
()
;
close_page_with_margin
()
;
close_page_with_margin
()
;
page_end
lang
True
;
page_end
lang
True
}
}
with
with
[
Sys_error
s
->
abort
lang
Control
.
sys_err_mess
s
(* file pb *)
[
Sys_error
s
->
abort
lang
Control
.
sys_err_mess
s
(* file pb *)
|
Stream
.
Error
s
->
abort
lang
Control
.
stream_err_mess
s
(* file pb *)
|
Stream
.
Error
s
->
abort
lang
Control
.
stream_err_mess
s
(* file pb *)
...
...
ML/make_xml_data.ml
View file @
a80f53ed
...
@@ -327,7 +327,7 @@ value print_inverse_map_xml trans form (delta,morphs) =
...
@@ -327,7 +327,7 @@ value print_inverse_map_xml trans form (delta,morphs) =
}
}
;
;
(* Outputs an XML stream on stdout *)
(* Outputs an XML stream on stdout *)
value
print_header
trans
=
do
value
print_
xml_
header
trans
=
do
{
"<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
?>"
|>
pl
{
"<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
?>"
|>
pl
;
"<!DOCTYPE forms SYSTEM
\"
"
^
trans
^
"_morph.dtd
\"
>"
|>
pl
;
"<!DOCTYPE forms SYSTEM
\"
"
^
trans
^
"_morph.dtd
\"
>"
|>
pl
;
"<!-- Header"
|>
pl
;
"<!-- Header"
|>
pl
...
@@ -339,7 +339,7 @@ value print_header trans = do
...
@@ -339,7 +339,7 @@ value print_header trans = do
}
}
;
;
value
print_xml
trans
inflected_map
=
do
value
print_xml
trans
inflected_map
=
do
{
print_header
trans
{
print_
xml_
header
trans
;
"<forms>"
|>
pl
;
"<forms>"
|>
pl
;
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
;
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
;
"</forms>"
|>
pl
;
"</forms>"
|>
pl
...
@@ -352,7 +352,7 @@ value print_xml_word trans (w,_) = do
...
@@ -352,7 +352,7 @@ value print_xml_word trans (w,_) = do
}
}
;
;
value
print_xml_list
trans
banks
prevs
=
do
value
print_xml_list
trans
banks
prevs
=
do
{
print_header
trans
{
print_
xml_
header
trans
;
"<forms>"
|>
pl
;
"<forms>"
|>
pl
;
let
print_bank
inflected_map
=
;
let
print_bank
inflected_map
=
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
in
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
in
...
...
ML/mk_reader_page.ml
View file @
a80f53ed
...
@@ -62,7 +62,7 @@ value reader_page () = do
...
@@ -62,7 +62,7 @@ value reader_page () = do
and
url_encoded_mode
=
get
"mode"
env
"g"
and
url_encoded_mode
=
get
"mode"
env
"g"
and
url_encoded_topic
=
get
"topic"
env
""
and
url_encoded_topic
=
get
"topic"
env
""
and
st
=
get
"st"
env
"t"
(* default vaakya rather than isolated pada *)
and
st
=
get
"st"
env
"t"
(* default vaakya rather than isolated pada *)
(*
and cp = get "cp" env default_mode TODO: dead code *)
(*
[
and cp = get "cp" env default_mode TODO: dead code
]
*)
and
us
=
get
"us"
env
"f"
(* default input sandhied *)
and
us
=
get
"us"
env
"f"
(* default input sandhied *)
and
cache_active
=
get
"cache"
env
cache_active
.
val
and
cache_active
=
get
"cache"
env
cache_active
.
val
and
translit
=
get
"t"
env
Paths
.
default_transliteration
and
translit
=
get
"t"
env
Paths
.
default_transliteration
...
@@ -76,7 +76,7 @@ value reader_page () = do
...
@@ -76,7 +76,7 @@ value reader_page () = do
let
corpus_dir
=
Cgi
.
decoded_get
Params
.
corpus_dir
""
env
in
let
corpus_dir
=
Cgi
.
decoded_get
Params
.
corpus_dir
""
env
in
let
sentence_no
=
Cgi
.
decoded_get
Params
.
sentence_no
""
env
in
do
let
sentence_no
=
Cgi
.
decoded_get
Params
.
sentence_no
""
env
in
do
{
pl
(
body_begin
back_ground
)
{
body_begin
back_ground
|>
pl
;
print_title
(
Some
lang
)
reader_title
;
print_title
(
Some
lang
)
reader_title
;
h3_begin
C3
|>
pl
;
h3_begin
C3
|>
pl
;
if
Web_corpus
.(
permission_of_string
corpus_permission
=
Annotator
)
then
;
if
Web_corpus
.(
permission_of_string
corpus_permission
=
Annotator
)
then
...
@@ -98,12 +98,12 @@ value reader_page () = do
...
@@ -98,12 +98,12 @@ value reader_page () = do
[
(
" Unsandhied "
,
"t"
,
us
=
"t"
)
[
(
" Unsandhied "
,
"t"
,
us
=
"t"
)
;
(
" Sandhied "
,
"f"
,
us
=
"f"
)
;
(
" Sandhied "
,
"f"
,
us
=
"f"
)
]
|>
pl
]
|>
pl
(*
Mode
Simple deprecated
(*
option
Simple deprecated
TODO
; pl " Parser strength "
[
; pl " Parser strength "
; pl (option_select_default "cp"
; pl (option_select_default "cp"
[ (" Full ","t",cp="t")
[ (" Full ","t",cp="t")
; (" Simple ","f",cp="f")
; (" Simple ","f",cp="f")
]) *)
])
]
*)
(* Sanskrit printer deva/roma *)
(* Sanskrit printer deva/roma *)
;
" Sanskrit display font"
|>
pl
;
" Sanskrit display font"
|>
pl
;
sanskrit_font_switch_default
font
"font"
|>
ps
;
sanskrit_font_switch_default
font
"font"
|>
ps
...
...
ML/morpho.ml
View file @
a80f53ed
...
@@ -21,9 +21,9 @@ module Morpho_out (Chan: sig value chan: ref out_channel; end)
...
@@ -21,9 +21,9 @@ module Morpho_out (Chan: sig value chan: ref out_channel; end)
value
ps
s
=
output_string
Chan
.
chan
.
val
s
value
ps
s
=
output_string
Chan
.
chan
.
val
s
;
;