Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
Heritage_Platform
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gérard Huet
Heritage_Platform
Commits
a80f53ed
Commit
a80f53ed
authored
Jun 04, 2020
by
Gérard Huet
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Devanaagarii printing driven by config for Interface/Reader
parent
912b875a
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
22 changed files
with
207 additions
and
159 deletions
+207
-159
ML/conjugation.ml
ML/conjugation.ml
+6
-5
ML/declension.ml
ML/declension.ml
+28
-24
ML/encode.ml
ML/encode.ml
+7
-7
ML/html.ml
ML/html.ml
+1
-1
ML/indexer.ml
ML/indexer.ml
+7
-5
ML/indexerd.ml
ML/indexerd.ml
+5
-3
ML/interface.ml
ML/interface.ml
+8
-7
ML/make_xml_data.ml
ML/make_xml_data.ml
+3
-3
ML/mk_reader_page.ml
ML/mk_reader_page.ml
+5
-5
ML/morpho.ml
ML/morpho.ml
+27
-29
ML/morpho_html.ml
ML/morpho_html.ml
+47
-30
ML/morphology.mli
ML/morphology.mli
+1
-1
ML/parser.ml
ML/parser.ml
+3
-1
ML/phonetics.ml
ML/phonetics.ml
+35
-21
ML/reader.ml
ML/reader.ml
+4
-2
ML/sanskrit.ml
ML/sanskrit.ml
+4
-4
ML/sanskrit.mli
ML/sanskrit.mli
+3
-3
ML/transduction.ml
ML/transduction.ml
+2
-2
ML/verbs.ml
ML/verbs.ml
+6
-1
ML/version.ml
ML/version.ml
+1
-1
ML/web.ml
ML/web.ml
+3
-3
SETUP/version.txt
SETUP/version.txt
+1
-1
No files found.
ML/conjugation.ml
View file @
a80f53ed
...
...
@@ -18,11 +18,11 @@
(*i executable module Conjugation = struct i*)
open
Skt_morph
;
open
Morphology
;
(*
inflected [
Verb_form] etc. *)
open
Morphology
;
(*
[inflected
Verb_form] etc. *)
open
Conj_infos
;
(* [vmorph Causa Inten Desid root_infos] *)
open
Inflected
;
(*
roots.val indecls.val
etc. *)
open
Inflected
;
(*
[roots.val indecls.val]
etc. *)
open
Html
;
open
Web
;
(*
ps pl font Deva Roma pr_font
etc. *)
open
Web
;
(*
[ps pl font Deva Roma pr_font]
etc. *)
open
Cgi
;
open
Multilingual
;
(* [gentense tense_name captions] *)
...
...
@@ -843,7 +843,8 @@ value conjs_engine () = do
try
let
url_encoded_entry
=
List
.
assoc
"q"
env
and
url_encoded_class
=
List
.
assoc
"c"
env
and
font
=
font_of_string
(
get
"font"
env
Paths
.
default_display_font
)
and
font
=
let
s
=
get
"font"
env
Paths
.
default_display_font
in
font_of_string
s
(* deva vs roma print *)
(* OBS and stamp = get "v" env "" *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
...
...
@@ -872,7 +873,7 @@ value conjs_engine () = do
let
entry
=
resolve_homonym
entry_VH
gana
in
(* VH string with homo *)
let
known
=
in_lexicon
entry
(* in lexicon? *)
(* we should check it is indeed a root or denominative *)
in
do
{
let
link
=
if
known
then
Morpho_html
.
skt_anchor
False
font
entry
{
let
link
=
if
known
then
Morpho_html
.
skt_anchor
False
entry
else
doubt
(
Morpho_html
.
skt_roma
entry
)
in
let
subtitle
=
hyperlink_title
font
link
in
display_subtitle
(
h1_center
subtitle
)
...
...
ML/declension.ml
View file @
a80f53ed
...
...
@@ -19,8 +19,8 @@
open
Skt_morph
;
open
Morphology
;
(* [Noun_form] etc. *)
open
Html
;
(* [narrow_screen html_red] etc. *)
open
Web
;
(* ps pl font Deva Roma pr_font
etc. *)
open
Cgi
;
(* [create_env] etc.
*)
open
Web
;
(* [ps pl font Deva Roma pr_font]
etc. *)
open
Cgi
;
(* [create_env] etc.
*)
open
Multilingual
;
(* [declension_title compound_name avyaya_name] *)
value
dtitle
font
=
h1_title
(
declension_title
narrow_screen
font
)
...
...
@@ -46,7 +46,7 @@ value display_subtitle title = do
;
title
|>
ps
;
th_end
|>
ps
;
tr_end
|>
ps
;
table_end
|>
pl
(*
C
entered *)
;
table_end
|>
pl
(*
c
entered *)
;
html_paragraph
|>
pl
}
;
...
...
@@ -132,19 +132,19 @@ value display_iic font = fun
|
l
->
do
{
html_paragraph
|>
pl
;
h3_begin
C3
|>
ps
;
compound_name
font
|>
ps
;
ps
" "
;
compound_name
font
|>
ps
;
" "
|>
ps
;
let
print_iic
w
=
pr_i
font
w
in
List
.
iter
print_iic
l
;
h3_end
|>
ps
}
]
;
value
display_avy
font
=
fun
value
display_avy
a
font
=
fun
[
[]
->
()
|
l
->
do
{
html_paragraph
|>
pl
;
h3_begin
C3
|>
ps
;
avyaya_name
font
|>
ps
;
ps
" "
;
avyaya_name
font
|>
ps
;
" "
|>
ps
;
let
ifc_form
w
=
[
0
]
(* - *)
@
w
in
let
print_iic
w
=
pr_font
font
(
ifc_form
w
)
in
List
.
iter
print_iic
l
...
...
@@ -154,16 +154,16 @@ value display_avy font = fun
;
value
sort_out
accu
form
=
fun
[
[
(
_
,
morphs
)
]
->
List
.
fold_left
(
reorg
form
)
accu
morphs
where
reorg
f
(
mas
,
fem
,
neu
,
any
,
iic
,
avy
)
=
fun
where
reorg
f
(
mas
,
fem
,
neu
,
any
,
iic
,
avy
a
)
=
fun
[
Noun_form
g
n
c
->
let
t
=
(
n
,
c
,
f
)
in
match
g
with
[
Mas
->
([
t
::
mas
]
,
fem
,
neu
,
any
,
iic
,
avy
)
|
Fem
->
(
mas
,
[
t
::
fem
]
,
neu
,
any
,
iic
,
avy
)
|
Neu
->
(
mas
,
fem
,
[
t
::
neu
]
,
any
,
iic
,
avy
)
|
Deictic
_
->
(
mas
,
fem
,
neu
,
[
t
::
any
]
,
iic
,
avy
)
[
Mas
->
([
t
::
mas
]
,
fem
,
neu
,
any
,
iic
,
avy
a
)
|
Fem
->
(
mas
,
[
t
::
fem
]
,
neu
,
any
,
iic
,
avy
a
)
|
Neu
->
(
mas
,
fem
,
[
t
::
neu
]
,
any
,
iic
,
avy
a
)
|
Deictic
_
->
(
mas
,
fem
,
neu
,
[
t
::
any
]
,
iic
,
avy
a
)
]
|
Bare_stem
|
Gati
->
(
mas
,
fem
,
neu
,
any
,
[
f
::
iic
]
,
avy
)
|
Avyayaf_form
->
(
mas
,
fem
,
neu
,
any
,
iic
,
[
f
::
avy
])
|
Bare_stem
|
Gati
->
(
mas
,
fem
,
neu
,
any
,
[
f
::
iic
]
,
avy
a
)
|
Avyayaf_form
->
(
mas
,
fem
,
neu
,
any
,
iic
,
[
f
::
avy
a
])
|
Ind_form
_
|
Verb_form
_
_
_
|
Ind_verb
_
|
Abs_root
_
|
Avyayai_form
|
Unanalysed
|
PV
_
|
Part_form
_
_
_
_
->
...
...
@@ -173,19 +173,19 @@ value sort_out accu form = fun
]
and
init
=
([]
,
[]
,
[]
,
[]
,
[]
,
[]
)
;
value
display_inflected
font
(
gen_deco
,
pn_deco
,
voca_deco
,
iic_deco
,
avy_deco
)
=
value
display_inflected
font
(
gen_deco
,
pn_deco
,
voca_deco
,
iic_deco
,
avy
a
_deco
)
=
let
nouns
=
Deco
.
fold
sort_out
init
gen_deco
in
let
non_vocas
=
Deco
.
fold
sort_out
nouns
pn_deco
in
let
(
mas
,
fem
,
neu
,
any
,_,_
)
=
Deco
.
fold
sort_out
non_vocas
voca_deco
and
iic
=
List
.
map
fst
(
Deco
.
contents
iic_deco
)
and
avy
=
List
.
map
fst
(
Deco
.
contents
avy
_deco
)
in
do
and
avy
a
=
List
.
map
fst
(
Deco
.
contents
avya
_deco
)
in
do
{
center_begin
|>
pl
;
display_gender
font
Mas
mas
;
display_gender
font
Fem
fem
;
display_gender
font
Neu
neu
;
display_gender
font
(
Deictic
Numeral
)
any
(* arbitrary *)
;
display_iic
font
iic
;
display_avy
font
avy
;
display_avy
a
font
avya
;
center_end
|>
pl
;
html_paragraph
|>
pl
}
...
...
@@ -241,7 +241,8 @@ value decls_engine () = do
and
url_encoded_participle
=
get
"p"
env
""
and
url_encoded_source
=
get
"r"
env
""
(* optional root origin - used by participles in conjugation tables *)
and
font
=
font_of_string
(
get
"font"
env
Paths
.
default_display_font
)
and
font
=
let
s
=
get
"font"
env
Paths
.
default_display_font
in
font_of_string
s
(* deva vs roma print *)
and
translit
=
get
"t"
env
"VH"
(* DICO created in VH trans *)
and
lex
=
get
"lex"
env
"SH"
(* default Heritage *)
in
let
entry_tr
=
decode_url
url_encoded_entry
(* : string in translit *)
...
...
@@ -257,14 +258,17 @@ value decls_engine () = do
(* will be avoided by unique name lookup *)
let
entry
=
resolve_homonym
entry_VH
in
(* compute homonymy index *)
let
link
=
if
in_lexicon
entry
then
Morpho_html
.
skt_anchor
False
font
entry
if
in_lexicon
entry
then
Morpho_html
.
skt_anchor
False
entry
(* We should check it is indeed a substantive entry
and that Any is used for deictics/numbers (TODO) *)
else
let
root
=
if
source
=
""
then
"?"
(* unknown in lexicon *)
else
" from "
^
if
in_lexicon
source
then
Morpho_html
.
skt_anchor
False
font
source
else
doubt
(
Morpho_html
.
skt_roma
source
)
in
Morpho_html
.
skt_roma
entry
^
root
in
(* Also it should use unique naming for possible homo index *)
else
Morpho_html
.
skt_html_font
font
entry
|>
italics
in
(* OBSOLETE indication of root for kridanta
[let root = if source = "" then "?" (* unknown in lexicon *)
else " from " ^ (* should test font *) in
if in_lexicon source then Morpho_html.skt_anchor False font source
else doubt (Morpho_html.skt_roma source) in (* should test font *)
Morpho_html.skt_utf font entry ^ root in] *)
let
subtitle
=
hyperlink_title
font
link
in
do
{
display_subtitle
(
h1_center
subtitle
)
;
try
look_up
font
entry
(
Nouns
.
Gender
gender
)
part
...
...
@@ -273,7 +277,7 @@ value decls_engine () = do
;
page_end
lang
True
}
with
[
Stream
.
Error
_
->
abort
lang
(
"Illegal "
^
translit
^
" transliteration
"
)
entry_tr
]
abort
lang
(
"Illegal "
^
translit
^
" input
"
)
entry_tr
]
}
;
value
safe_engine
()
=
...
...
ML/encode.ml
View file @
a80f53ed
...
...
@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©20
19
Institut National de Recherche en Informatique et en Automatique *)
(* ©20
20
Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Encode = struct i*)
...
...
@@ -18,7 +18,7 @@ exception In_error of string (* Error in user or corpus input *)
value
is_vowel
c
=
vowel
c
||
c
>
100
&&
c
<
114
(* accounts for upper case *)
;
(* anusvara substituted by nasal or normalized to 14 when original *)
(* anunaasika
before
vowels treated as anusvaara *)
(* anunaasika
after
vowels treated as anusvaara *)
value
rec
normalize
=
normal_rec
False
where
rec
normal_rec
after_vow
=
fun
[
[]
->
[]
...
...
@@ -50,8 +50,8 @@ value code_string str = normalize (code_raw str) (* standard VH *)
and
code_string_WX
str
=
normalize
(
code_raw_WX
str
)
and
code_string_KH
str
=
normalize
(
code_raw_KH
str
)
and
code_string_SL
str
=
normalize
(
code_raw_SL
str
)
and
code_skt_ref
str
=
normalize
(
code_rawu
str
)
and
code_skt_ref_d
str
=
normalize
(
code_rawd
str
)
and
code_skt_ref
str
=
normalize
(
code_rawu
str
)
(* with upper letters *)
and
code_skt_ref_d
str
=
normalize
(
code_rawd
str
)
(* no diacritics *)
;
(* Switching code function according to transliteration convention *)
value
switch_code
=
fun
(* normalizes anusvaara in its input *)
...
...
@@ -113,11 +113,11 @@ value code_strip_raw s = rev_strip (code_raw s)
(* A cleaner solution would be to have type lexeme = (word * int)
and "x#5" represented as (x,5) (0 if no homophone) *)
;
value
skt_to_deva
str
=
try
Canon
.
unidevcode
(
code_string
str
)
with
value
skt_to_deva
str
=
try
Canon
.
unidevcode
(
code_string
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
and
skt_raw_to_deva
str
=
try
Canon
.
unidevcode
(
code_raw
str
)
with
and
skt_raw_to_deva
str
=
try
Canon
.
unidevcode
(
code_raw
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
and
skt_
raw_
strip_to_deva
str
=
try
Canon
.
unidevcode
(
code_strip_raw
str
)
with
and
skt_strip_to_deva
str
=
try
Canon
.
unidevcode
(
code_strip_raw
str
)
with
[
Failure
_
->
raise
(
In_error
str
)
]
;
(* Following not needed since [Transduction.skt_to_html] is more direct
...
...
ML/html.ml
View file @
a80f53ed
...
...
@@ -220,7 +220,7 @@ value rgb = fun (* a few selected HTML colors in rgb data *)
|
Blue
->
"#0000FF"
(* Canard = "#0000C0" ou "#0080FF" *)
|
Green
->
"#008000"
(* Teal = "#008080" Olive = "#808000" *)
|
Aquamarine
->
"#6FFFC3"
(* actually Light Aquamarine *)
|
Lawngreen
->
"#
7CFC00"
|
Lawngreen
->
"#
66ff99"
(* was "#7CFC00" *)
|
Yellow
->
"#FFFF00"
|
Orange
->
"#FFA000"
|
Cyan
->
"#00FFFF"
(* Aqua = Cyan, Turquoise = "#40E0D0" *)
...
...
ML/indexer.ml
View file @
a80f53ed
...
...
@@ -32,8 +32,8 @@ value answer_end () = do
;
pl
html_paragraph
}
;
value
ok
(
mess
,
s
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor
_R
False
s
)
}
and
ok2
(
mess
,
s1
,
s2
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor_R
2
s1
s2
)
}
value
ok
(
mess
,
s
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor
False
s
)
}
and
ok2
(
mess
,
s1
,
s2
)
=
do
{
ps
mess
;
pl
(
Morpho_html
.
skt_anchor_R
s1
s2
)
}
(* ok2 prints the entry under the spelling given by the user, i.e. without
normalisation, thus e.g. sandhi is not written sa.mdhi, and possibly
suffixed by homonymy index 1, e.g. b.rh. *)
...
...
@@ -59,7 +59,7 @@ and report_failure s = do
{
ps
" not found in dictionary"
;
pl
html_break
;
ps
"Closest entry in lexical order: "
;
ps
(
Morpho_html
.
skt_anchor
_R
False
s
)
;
ps
(
Morpho_html
.
skt_anchor
False
s
)
;
pl
html_break
}
;
...
...
@@ -101,6 +101,8 @@ value print_word word (entry,lex,page) = match lex with
value
read_mw_index
()
=
(
Gen
.
gobble
Data
.
public_mw_index_file
:
Deco
.
deco
(
string
*
string
*
string
))
;
value
skt_red
s
=
html_red
(
Morpho_html
.
skt_roma
s
)
;
value
index_engine
()
=
do
{
pl
http_header
;
page_begin
heritage_dictionary_title
...
...
@@ -124,7 +126,7 @@ value index_engine () = do
let
mw_index
=
read_mw_index
()
in
let
words
=
Deco
.
assoc
word
mw_index
in
match
words
with
[
[]
->
do
{
ps
(
Morpho_html
.
skt_red
str_VH
)
[
[]
->
do
{
ps
(
skt_red
str_VH
)
;
ps
" not found in MW dictionary"
;
pl
html_break
}
...
...
@@ -143,7 +145,7 @@ value index_engine () = do
(* even though str may exist as inflected form *)
with
(* Matching entry not found - we try declensions *)
[
Index
.
Last
last
->
do
{
ps
(
Morpho_html
.
skt_red
str_VH
)
{
ps
(
skt_red
str_VH
)
;
try_declensions
word
last
}
]
...
...
ML/indexerd.ml
View file @
a80f53ed
...
...
@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©20
19
Institut National de Recherche en Informatique et en Automatique *)
(* ©20
20
Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI-bin indexerd for indexing in sanskrit dico without diacritics. *)
...
...
@@ -47,13 +47,15 @@ value postlude () = do
;
page_end
Html
.
French
True
}
;
value
print_word
c
=
pl
(
Morpho_html
.
skt_anchor
_R
False
(
Canon
.
decode_ref
c
))
value
print_word
c
=
pl
(
Morpho_html
.
skt_anchor
False
(
Canon
.
decode_ref
c
))
;
(* Each dummy is mapped to a list of words - all the words which
give back the dummy by normalisation such as removing diacritics *)
value
read_dummies
()
=
(
Gen
.
gobble
Data
.
public_dummies_file
:
Deco
.
deco
Word
.
word
)
;
value
skt_red
s
=
html_red
(
Morpho_html
.
skt_roma
s
)
;
value
index_engine
()
=
let
abor
=
abort
Html
.
French
(* may not preserve the current lang *)
in
try
let
dummies_deco
=
read_dummies
()
in
do
...
...
@@ -69,7 +71,7 @@ value index_engine () =
;
ps
(
div_begin
Latin12
)
;
let
words
=
Deco
.
assoc
dummy
dummies_deco
in
match
words
with
[
[]
->
do
{
ps
(
Morpho_html
.
skt_red
str
)
[
[]
->
do
{
ps
(
skt_red
str
)
;
ps
" not found in Heritage dictionary"
;
ps
html_break
;
pl
html_break
}
...
...
ML/interface.ml
View file @
a80f53ed
...
...
@@ -518,9 +518,10 @@ value graph_engine () = do
and
us
=
get
"us"
env
"f"
(* sandhied text default *)
and
translit
=
get
"t"
env
Paths
.
default_transliteration
(* translit input *)
and
lex
=
get
"lex"
env
Paths
.
default_lexicon
(* lexicon choice *)
and
font
=
get
"font"
env
Paths
.
default_display_font
(* deva vs roma print *)
and
font
=
get
"font"
env
Paths
.
default_display_font
in
let
ft
=
font_of_string
font
(* Deva vs Roma print *)
and
cache
=
get
"cache"
env
"f"
(* no cache default *)
in
let
()
=
sanskrit_
display
.
val
:=
fon
t
let
()
=
sanskrit_
font
.
val
:=
f
t
and
()
=
cache_active
.
val
:=
cache
and
abs
=
get
"abs"
env
"f"
(* default local paths *)
in
let
lang
=
language_of
lex
(* language default *)
...
...
@@ -537,13 +538,13 @@ value graph_engine () = do
and
link_num
=
get
"linkNumber"
env
"0"
(* is there a better default? *)
and
sol_num
=
get
"allSol"
env
"0"
in
(* Needed for Validate mode *)
let
url_enc_corpus_permission
=
(* Corpus mode *)
get
Params
.
corpus_permission
env
"true"
in
get
Params
.
corpus_permission
env
"true"
in
let
corpus_permission
=
url_enc_corpus_permission
|>
decode_url
|>
Web_corpus
.
permission_of_string
in
let
corpus_dir
=
get
Params
.
corpus_dir
env
""
in
let
sentence_no
=
get
Params
.
sentence_no
env
""
in
let
corpus_dir
=
get
Params
.
corpus_dir
env
""
and
sentence_no
=
get
Params
.
sentence_no
env
""
in
let
text
=
arguments
translit
lex
font
cache
st
us
cp
url_encoded_input
url_encoded_topic
abs
sol_num
corpus
sent_id
link_num
url_enc_corpus_permission
corpus_dir
sentence_no
...
...
@@ -568,7 +569,7 @@ value graph_engine () = do
let
revised
=
decode_url
(
get
"revised"
env
""
)
(* User-aid revision *)
and
rev_off
=
int_of_string
(
get
"rev_off"
env
"-1"
)
and
rev_ind
=
int_of_string
(
get
"rev_ind"
env
"-1"
)
in
try
do
try
do
{
match
(
revised
,
rev_off
,
rev_ind
)
with
[
(
""
,-
1
,-
1
)
->
(* Standard input processing *** Main call *** *)
check_sentence
translit
uns
text
checkpoints
input
sol_num
...
...
@@ -624,7 +625,7 @@ value graph_engine () = do
else
()
;
close_page_with_margin
()
;
page_end
lang
True
}
}
with
[
Sys_error
s
->
abort
lang
Control
.
sys_err_mess
s
(* file pb *)
|
Stream
.
Error
s
->
abort
lang
Control
.
stream_err_mess
s
(* file pb *)
...
...
ML/make_xml_data.ml
View file @
a80f53ed
...
...
@@ -327,7 +327,7 @@ value print_inverse_map_xml trans form (delta,morphs) =
}
;
(* Outputs an XML stream on stdout *)
value
print_header
trans
=
do
value
print_
xml_
header
trans
=
do
{
"<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
?>"
|>
pl
;
"<!DOCTYPE forms SYSTEM
\"
"
^
trans
^
"_morph.dtd
\"
>"
|>
pl
;
"<!-- Header"
|>
pl
...
...
@@ -339,7 +339,7 @@ value print_header trans = do
}
;
value
print_xml
trans
inflected_map
=
do
{
print_header
trans
{
print_
xml_
header
trans
;
"<forms>"
|>
pl
;
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
;
"</forms>"
|>
pl
...
...
@@ -352,7 +352,7 @@ value print_xml_word trans (w,_) = do
}
;
value
print_xml_list
trans
banks
prevs
=
do
{
print_header
trans
{
print_
xml_
header
trans
;
"<forms>"
|>
pl
;
let
print_bank
inflected_map
=
Deco
.
iter
(
print_inverse_map_xml
trans
)
inflected_map
in
...
...
ML/mk_reader_page.ml
View file @
a80f53ed
...
...
@@ -62,7 +62,7 @@ value reader_page () = do
and
url_encoded_mode
=
get
"mode"
env
"g"
and
url_encoded_topic
=
get
"topic"
env
""
and
st
=
get
"st"
env
"t"
(* default vaakya rather than isolated pada *)
(*
and cp = get "cp" env default_mode TODO: dead code
*)
(*
[and cp = get "cp" env default_mode TODO: dead code ]
*)
and
us
=
get
"us"
env
"f"
(* default input sandhied *)
and
cache_active
=
get
"cache"
env
cache_active
.
val
and
translit
=
get
"t"
env
Paths
.
default_transliteration
...
...
@@ -76,7 +76,7 @@ value reader_page () = do
let
corpus_dir
=
Cgi
.
decoded_get
Params
.
corpus_dir
""
env
in
let
sentence_no
=
Cgi
.
decoded_get
Params
.
sentence_no
""
env
in
do
{
pl
(
body_begin
back_ground
)
{
body_begin
back_ground
|>
pl
;
print_title
(
Some
lang
)
reader_title
;
h3_begin
C3
|>
pl
;
if
Web_corpus
.(
permission_of_string
corpus_permission
=
Annotator
)
then
...
...
@@ -98,12 +98,12 @@ value reader_page () = do
[
(
" Unsandhied "
,
"t"
,
us
=
"t"
)
;
(
" Sandhied "
,
"f"
,
us
=
"f"
)
]
|>
pl
(*
Mode Simple deprecated
; pl " Parser strength "
(*
option Simple deprecated TODO
[
; pl " Parser strength "
; pl (option_select_default "cp"
[ (" Full ","t",cp="t")
; (" Simple ","f",cp="f")
]) *)
])
]
*)
(* Sanskrit printer deva/roma *)
;
" Sanskrit display font"
|>
pl
;
sanskrit_font_switch_default
font
"font"
|>
ps
...
...
ML/morpho.ml
View file @
a80f53ed
...
...
@@ -21,9 +21,9 @@ module Morpho_out (Chan: sig value chan: ref out_channel; end)
value
ps
s
=
output_string
Chan
.
chan
.
val
s
;
value
pl
s
=
ps
(
s
^
"
\n
"
)
value
pl
s
=
s
^
"
\n
"
|>
ps
;
value
pr_word
w
=
ps
(
Canon
.
decode
w
)
value
pr_word
w
=
Canon
.
decode
w
|>
ps
;
value
print_morph
m
=
string_morph
m
|>
ps
and
print_verbal
vb
=
string_verbal
vb
|>
ps
...
...
@@ -46,13 +46,13 @@ value rec select_morphs (seg_num,sub) seg_count = fun
|
[
last
::
[]
]
->
select_morph
(
seg_num
,
sub
,
seg_count
)
last
|
[
first
::
rest
]
->
do
{
select_morph
(
seg_num
,
sub
,
seg_count
)
first
;
ps
" | "
;
" | "
|>
ps
;
select_morphs
(
seg_num
,
sub
)
(
seg_count
+
1
)
rest
}
]
;
value
print_morphs
(
seg_num
,
sub
)
morphs
=
match
seg_num
with
[
0
->
let
bar
()
=
ps
" | "
in
[
0
->
let
bar
()
=
" | "
|>
ps
in
List2
.
process_list_sep
print_morph
bar
morphs
|
_
->
select_morphs
(
seg_num
,
sub
)
1
morphs
]
...
...
@@ -65,29 +65,29 @@ value print_morphs (seg_num,sub) morphs = match seg_num with
[pu : word -> unit] prints un-analysed chunks. *)
value
print_inv_morpho
pe
pne
pu
form
(
seg_num
,
sub
)
generative
(
delta
,
morphs
)
=
let
stem
=
Word
.
patch
delta
form
in
do
(* stem may have homo index *)
{
ps
"["
{
"["
|>
ps
;
if
generative
then
(* interpret stem as unique name *)
let
(
homo
,
bare_stem
)
=
homo_undo
stem
in
let
krit_infos
=
Deco
.
assoc
bare_stem
unique_kridantas
in
try
let
(
verbal
,
root
)
=
look_up_homo
homo
krit_infos
in
do
{
match
Deco
.
assoc
bare_stem
lexical_kridantas
with
[
[]
(* not in lexicon *)
->
if
stem
=
[
3
;
32
;
1
]
(* ita ifc *)
then
pe
stem
else
pne
bare_stem
if
stem
=
[
3
;
32
;
1
]
(* ita ifc *)
then
stem
|>
pe
else
bare_stem
|>
pne
|
entries
(* bare stem is lexicalized *)
->
if
List
.
exists
(
fun
(
_
,
h
)
->
h
=
homo
)
entries
then
pe
stem
(* stem with exact homo is lexical entry *)
else
pne
bare_stem
then
stem
|>
pe
(* stem with exact homo is lexical entry *)
else
bare_stem
|>
pne
]
;
ps
" { "
;
print_verbal
verbal
;
ps
" }["
;
pe
root
;
ps
"]"
}
with
[
_
->
pu
bare_stem
]
;
" { "
|>
ps
;
print_verbal
verbal
;
" }["
|>
ps
;
root
|>
pe
;
"]"
|>
ps
}
with
[
_
->
bare_stem
|>
pu
]
else
match
morphs
with
[
[
Unanalysed
]
->
pu
stem
|
_
->
pe
stem
[
[
Unanalysed
]
->
stem
|>
pu
|
_
->
stem
|>
pe
]
;
ps
"]{"
;
"]{"
|>
ps
;
print_morphs
(
seg_num
,
sub
)
morphs
;
ps
"}"
;
"}"
|>
ps
}
;
(* Decomposes a preverb sequence into the list of its components *)
...
...
@@ -99,12 +99,10 @@ value print_inv_morpho_link pvs pe pne pu form =
let
pv
=
if
Phonetics
.
phantomatic
form
then
[
2
]
(* aa- *)(*i OBSOLETE i*)
else
pvs
in
let
encaps
print
e
=
(* encapsulates prefixing with possible preverbs *)
if
pv
=
[]
then
print
e
else
let
pr_pv
pv
=
do
{
pe
pv
;
ps
"-"
}
in
do
{
List
.
iter
pr_pv
(
decomp_pvs
pvs
)
;
print
e
}
in
print_inv_morpho
(
encaps
pe
)
(
encaps
pne
)
pu
form
if
pv
=
[]
then
print
e
else
let
pr_pv
pv
=
do
{
pv
|>
pe
;
"-"
|>
ps
}
in
do
{
List
.
iter
pr_pv
(
decomp_pvs
pvs
);
print
e
}
in
print_inv_morpho
(
encaps
pe
)
(
encaps
pne
)
pu
form
(* Possible overgeneration when derivative of a root non attested with pv
since only existential test in [Dispatcher.validate_pv]. Thus
[anusandhiiyate] should show [dhaa#1], not [dhaa#2], [dhii#1] or [dhyaa] *)
...
...
@@ -113,21 +111,21 @@ value print_inv_morpho_link pvs pe pne pu form =
(* Used in [Lexer.record_tagging] for regression analysis *)
value
report_morph
gen
form
(
delta
,
morphs
)
=
let
stem
=
Word
.
patch
delta
form
in
do
(* stem may have homo index *)
{
ps
"{ "
{
"{ "
|>
ps
;
print_morphs
(
0
,
0
)
morphs
;
ps
" }["
;
" }["
|>
ps
;
if
gen
then
(* interpret stem as unique name *)
let
(
homo
,
bare
_stem
)
=
homo_undo
stem
in
let
krid_infos
=
Deco
.
assoc
bare
_stem
unique_kridantas
in
let
(
homo
,
bare
)
=
homo_undo
stem
in
let
krid_infos
=
Deco
.
assoc
bare
unique_kridantas
in
let
(
vb
,
root
)
=
look_up_homo
homo
krid_infos
in
do
{
match
Deco
.
assoc
stem
lexical_kridantas
with
[
[]
(* not in lexicon *)
->
do
{
ps
"G:"
;
pr_word
bare_stem
}
|
_
(* stem is lexical
entry *)
->
do
{
ps
"L:"
;
pr_word
stem
}
[
[]
(* not in lexicon *)
->
do
{
"G:"
|>
ps
;
pr_word
bare
}
|
_
(* stem is lexical
ized *)
->
do
{
"L:"
|>
ps
;
pr_word
stem
}
]
;
ps
" { "
;
print_verbal
vb
;
ps
" }["
;
pr_word
root
;
ps
"]"
;
" { "
|>
ps
;
print_verbal
vb
;
" }["
|>
ps
;
pr_word
root
;
"]"
|>
ps
}
else
pr_word
stem
;
ps
"]"
;
"]"
|>
ps
}
;
...
...
ML/morpho_html.ml
View file @
a80f53ed
...
...
@@ -37,24 +37,39 @@ value url_cache s =
mw_dico_url
^
mw_defining_page
s
^
"#"
^
Encode
.
anchor
s
;
(* Romanisation of Sanskrit *)
value
skt_roma
s
=
italics
(
Transduction
.
skt_to_html
s
)
value
skt_roma
s
=
Transduction
.
skt_to_html
s
(* Function [skt_roma] differs from [Encode.skt_to_roma]
because it does not go through encoding [s] as a word,
and the complications of dealing with possible hiatus. *)
;
value
skt_r
ed
s
=
html_red
(
skt_roma
s
)
value
skt_r
oma_it
s
=
skt_roma
s
|>
italics
;
value
skt_anchor
cached
font
form
=
(* for Declension Conjugation *)
(* ignores possible homo index *)
value
skt_deva
s
=
Encode
.
skt_strip_to_deva
s
;
value
skt_html_font
font
s
=
match
font
with
[
Roma
->
skt_roma
s
|
Deva
->
skt_deva
s
]
;
value
skt_html
s
=
(* ubiquitous for font *)
let
font
=
sanskrit_font
.
val
in
skt_html_font
font
s
;
value
skt_italics
form
=
skt_html
form
|>
italics
;
value
skt_anchor_font
font
is_cache
form
=
(* for Declension Conjugation *)
let
s
=
match
font
with
[
Deva
->
deva20_blue_center
(
Encode
.
skt_raw_strip_to_deva
form
)
|
Roma
->
skt_roma
form
(* no stripping in Roma *)
[
Deva
->
deva20_blue_center
(
Encode
.
skt_strip_to_deva
form
)
(* NB This removes the possible homo index *)
|
Roma
->
skt_roma_it
form
(* no stripping in Roma *)
]
and
url_function
=
if
cached
then
url_cache
else
url
in
and
url_function
=
if
is_cache
then
url_cache
else
url
in
anchor
Navy_
(
url_function
form
)
s
;
value
skt_anchor_R
cached
=
skt_anchor
cached
Roma
(* for Declension, Indexer *)
(*i [and skt_anchor_D = skt_anchor Deva] unused i*)
and
skt_anchor_R2
s
s'
=
anchor
Navy_
(
url
s
)
(
skt_roma
s'
)
(* for Indexer *)
value
skt_anchor
is_cache
=
let
font
=
sanskrit_font
.
val
in
skt_anchor_font
font
is_cache
(* for Declension, Indexer *)
and
skt_anchor_R
s
s'
=
anchor
Navy_
(
url
s
)
(
skt_roma_it
s'
)
(* for Indexer *)
;
value
no_hom
entry
=
(* low-level string hacking *)
match
(
String
.
sub
entry
((
String
.
length
entry
)
-
1
)
1
)
with
...
...
@@ -72,27 +87,25 @@ value skt_anchor_M word entry page cache =
let
vocable
=
if
no_hom
entry
then
word
else
let
pos
=
(
String
.
length
entry
)
-
1
in
word
^
"#"
^
(
String
.
sub
entry
pos
1
)
in
anchor_mw
(
skt_roma
vocable
)
;
value
skt_graph_anchor_R
cache
form
=
let
s
=
skt_roma
form
in
let
url_function
=
if
cache
then
url_cache
else
url
in
anchor_graph
Navy_
(
url_function
form
)
s
;
value
printer
w
=
(* do not eta reduce ! *)
match
sanskrit_display
.
val
with
[
"deva"
->
Canon
.
unidevcode
w
|
"roma"
->
Canon
.
uniromcode
w
|
_
->
failwith
"Unknown default display font"
anchor_mw
(
skt_roma_it
vocable
)
;
value
skt_graph_anchor
is_cache
form
=
let
url_function
=
if
is_cache
then
url_cache
else
url
in
anchor_graph
Navy_
(
url_function
form
)
(
skt_italics
form
)
;
(* This is an alternative to [skt_html] above - some cleaning-up is needed *)
value
skt_utf
w
=
(* do not eta reduce ! *)
match
sanskrit_font
.
val
with
[
Deva
->
Canon
.
unidevcode
(
Encode
.
strip
w
)
|
Roma
->
Canon
.
uniromcode
w
]
;
value
print_stem
w
=
printer
w
|>
ps
(* w in lexicon or not *)
and
print_chunk
w
=
printer
w
|>
ps
and
print_entry
w
=
skt_anchor_R
False
(
Canon
.
decode
w
)
|>
ps
(* w in lexicon *)