...
 
Commits (4)
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Canon = struct i*)
......@@ -732,7 +732,7 @@ and matra_indic_unicode_point = fun
]
;
(* om 50 udatta 51 anudatta 52 grave 53 acute 54 avagraha 3D .ll 61
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F ° 70 *)
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F deg 70 *)
value inject_point s = "&#x09" ^ s ^ ";"
;
value deva_unicode c =
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Encode = struct i*)
......@@ -21,12 +21,12 @@ value is_vowel c = vowel c || c>100 && c<114 (* accounts for upper case *)
value rec normalize = normal_rec False
where rec normal_rec after_vow = fun
[ [] -> []
| [ 14 (* .m *) :: [] ] -> [ 14 ] (* and NOT m *)
| [ 14 (* .m *) ] -> [ 14 ] (* and NOT m *)
| [ 14 (* .m *) :: [ c :: l ] ] ->
if after_vow then
let c' = homonasal c in [ c' :: [ c :: normal_rec (is_vowel c) l ] ]
else raise (In_error "Anusvaara should follow vowel")
| [ 16 (* .h *) :: [] ] ->
| [ 16 (* .h *) ] ->
if after_vow then [ 16 ]
else raise (In_error "Visarga should follow vowel")
(* No change to visarga since eg praata.hsvasu.h comes from praatar|svasu.h
......@@ -79,7 +79,7 @@ value strip w = match w with
;
value rstem w = strip (Word.mirror w)
;
value rev_strip w = Word.mirror (rstem w) (* ugly - temp *)
value rev_strip w = Word.mirror (rstem w) (* [compute_mw_links] *)
;
(* Builds revword normalised stem from entry string of root *)
(* Used by [Verbs.revstem], [Nouns.enter_iic], [Print_dict] *)
......
......@@ -10,7 +10,7 @@
(* A very simple lexer recognizing 1 character idents and integers
and skipping spaces and comments between [%] and eol;
used for various transduction tasks with Camlp4 Grammars.
It is a copy of ZEN/zen_lexer.ml in order to simplify dependencies. *)
It is a copy of [ZEN/zen_lexer.ml] in order to simplify dependencies. *)
(*i module Min_lexer = struct i*)
......
......@@ -1265,10 +1265,10 @@ value build_as gen stem entry =
; decline Nom (match gen with
[ Mas -> match entry with (* gram Muller p 72, Whitney §416 *)
[ "anehas" | "uzanas" | "da.mzas" (* Puruda.mzas *) -> "aa"
| _ -> "aas"
]
| _ -> "aas" (* Kane§108 candramas vedhas su/dur/unmanas *)
]
| Fem -> "aas"
| Neu -> "as"
| Neu -> "as" (* manas payas vyas? avas1 zreyas saras vacas *)
| _ -> raise (Control.Anomaly "Nouns")
])
; decline Acc (match gen with
......@@ -1312,7 +1312,7 @@ value build_as gen stem entry =
; decline Dat "obhyas"
; decline Abl "obhyas"
; decline Gen "asaam"
; decline Loc "a.hsu" (* decline Loc "assu" *)
; decline Loc "a.hsu" (* decline Loc "assu" *) (* Kane§108 opt "astu" *)
])
]
; Bare Noun (mirror rstem) (* as *)
......@@ -1381,6 +1381,51 @@ value build_nas entry =
]
]
;
value build_dos gen entry = (* Kale§108a *)
let decline case form = (case,code form) in
enter entry
[ Declined Noun gen
[ (Singular,
[ decline Voc "dos"
; decline Nom "dos"
; decline Acc "dos"
; decline Ins "do.saa"
; decline Dat "do.se"
; decline Abl "do.sas"
; decline Gen "do.sas"
; decline Loc "do.si"
])
; (Dual, let form = match gen with
[ Mas | Fem -> "do.sau"
| Neu -> "do.sii"
| _ -> raise (Control.Anomaly "Nouns")
] in
[ decline Voc form
; decline Nom form
; decline Acc form
; decline Ins "dorbhyaam"
; decline Dat "dorbhyaam"
; decline Abl "dorbhyaam"
; decline Gen "dor.sos"
; decline Loc "dor.sos"
])
; (Plural, let form = match gen with
[ Mas | Fem -> "do.sas"
| Neu -> "do.m.si"
| _ -> raise (Control.Anomaly "Nouns")
] in
[ decline Voc form
; decline Nom form
; decline Acc form
; decline Ins "dorbhis"
; decline Dat "dorbhyas"
; decline Abl "dorbhyas"
; decline Gen "do.saam"
; decline Loc "do.h.su"
])
]
]
;
value build_is gen stem entry =
let decline case suff = (case,fix stem suff)
and rstem = [ 48 :: [ 3 :: stem ] ] in
......@@ -1887,7 +1932,6 @@ value build_anadvah stem entry = (* ana.dvah *)
; decline Loc "utsu"
])
]
; Avyayaf (code "uham")
]
;
value build_neu_a stem entry =
......@@ -2401,7 +2445,7 @@ value build_neu_brahman entry =
]
;
value build_aksan stem entry =
(* stem = ak.san, asthan, dadhan, sakthan Whitney §431 *)
(* stem = ak.san, asthan, dadhan, sakthan Whitney §431 Kale§126 *)
let decline case suff = (case,fix stem suff) in
enter entry
[ Declined Noun Neu
......@@ -2416,19 +2460,21 @@ value build_aksan stem entry =
; decline Loc "ni"
; decline Loc "ani" (* \Pan{7,1,75} *)
])
; (Dual,
; (Dual, let l =
[ decline Voc "inii"
; decline Voc "ii"
; decline Nom "inii"
; decline Nom "ii" (* Sun and moon *)
; decline Acc "inii"
; decline Acc "ii"
; decline Ins "ibhyaam"
; decline Dat "ibhyaam"
; decline Abl "ibhyaam"
; decline Gen "nos"
; decline Loc "nos"
])
] in if entry="ak.san" then
[ decline Voc "ii"
; decline Nom "ii"
; decline Acc "ii"
] @ l (* Vedic: Sun and moon *)
else l)
; (Plural,
[ decline Voc "iini"
; decline Nom "iini"
......@@ -2967,8 +3013,8 @@ value build_fem_ii trunc entry =
; declines Gen "aas"
; declines Loc "aam"
])
; (Dual,
[ declines Voc "au"
; (Dual, if entry = "ubhayii" then [] else
[ declines Voc "au"
; declines Nom "au"
; declines Acc "au"
; declinel Ins "bhyaam"
......@@ -3052,7 +3098,7 @@ value poly_ii_decls decline =
; decline Gen "yas"
; decline Loc "yi"
])
; (Dual,
; (Dual,
[ decline Voc "yaa"
; decline Nom "yaa"
; decline Acc "yaa"
......@@ -4198,7 +4244,8 @@ value build_pron_a g stem entry = (* g=Mas ou g=Neu *)
] in if pseudo_nominal then
[ decline Abl "aat" :: [ decline Loc "e" ::
[ decline Voc "a" :: l ] ] ] else l)
; (Dual, let l =
; (Dual, if entry = "ubhaya" (* no dual *) then []
else let l =
[ decline Nom (if g=Mas then "au" else "e")
; decline Acc (if g=Mas then "au" else "e")
; decline Ins "aabhyaam"
......@@ -5072,11 +5119,11 @@ value compute_nouns_stem_form e stem d p =
| [ 45 :: r3 ] (* -vas *) ->
if p = "Ppfta" then build_mas_vas r3 e
else match r3 with
[ [ 1 :: [ 43 :: _ ] ] (* -ravas *)
| [ 5 :: [ 48 :: _ ] ] (* - suvas *) -> build_as Mas r2 e
[ [ 1 :: [ 43 :: _ ] ] (* -ravas *) -> build_as Mas r2 e
(* uccaisravas, puruuravas, ugrazravas, vizravas non ppf *)
| [ 3 :: r4 ] (* -ivas *) -> build_mas_ivas r4 e
| [ 35 :: _ ] (* -dhvas *) -> build_root Mas stem e
| [ 35 :: _ ] (* -dhvas *)
| [ 5 :: [ 48 :: _ ] ] (* -suvas *) -> build_root Mas stem e
| _ (* other ppf *) -> build_mas_vas r3 e
]
| [ 43 :: [ 48 :: _ ]] (* -sras *) -> build_root Mas stem e
......@@ -5088,10 +5135,10 @@ value compute_nouns_stem_form e stem d p =
| [ 2 :: _ ] (* -aas *) -> () (* avoids reporting bahu aas bhaas *)
| [ 3 :: r2 ] (* -is *) -> match r2 with
[ [ 46; 2 :: _ ] (* niraazis *) -> build_aazis Mas r2 e
| _ -> build_is Mas r2 e
| _ -> build_is Mas r2 e (* udarcis *)
]
| [ 5 :: r2 ] (* -us *) -> build_us Mas r2 e
| [ 12; 34 ] (* dos *) -> () (* avoids reporting bahu *)
| [ 5 :: r2 ] (* -us *) -> build_us Mas r2 e (* acak.sus *)
| [ 12; 34 ] (* dos *) -> build_dos Mas e
| [ 14; 5; 37 ] (* pu.ms *) -> build_pums [ 41; 5; 37 ] stem e
| [ 14; 5; 37; 1; 36 ] (* napu.ms *)
-> build_pums [ 41; 5; 37; 1; 36 ] stem e
......@@ -5276,7 +5323,7 @@ value compute_nouns_stem_form e stem d p =
else match r3 with
[ [ 1 ] (* avas1 - non ppf *)
| [ 1 :: [ 43 :: _ ] ] (* -ravas eg zravas, sravas - non ppf *)
| [ 5 :: [ 48 :: _ ] ] (* - suvas *)
| [ 5 :: [ 48 :: _ ] ] (* -suvas *)
| [ 3; 43; 1; 45 ] (* varivas *) -> build_as Neu r2 e
| [ 3 :: r4 ] (* ivas *) -> build_neu_ivas r4 e
| [ 35 :: _ ] (* -dhvas *) -> build_root Neu stem e
......@@ -5292,9 +5339,10 @@ value compute_nouns_stem_form e stem d p =
| [ 40 :: _ ] (* bhaas aabhaas *) -> () (* missing paradigm *)
| _ -> report stem Neu
]
| [ 3 :: r2 ] (* -is *) -> build_is Neu r2 e
| [ 5 :: r2 ] (* -us *) -> build_us Neu r2 e
| _ -> build_root Neu stem e (* dos *)
| [ 3 :: r2 ] (* -is *) -> build_is Neu r2 e (* jyotis havis *)
| [ 5 :: r2 ] (* -us *) -> build_us Neu r2 e (* cak.sus dhanus *)
| [ 12; 34 ] (* dos *) -> build_dos Neu e
| _ -> build_root Neu stem e
]
| [ 49 :: r1 ] (* -h *) -> match r1 with
[ [ 1; 34 ] (* dah2 *) (* duhify *)
......@@ -5490,7 +5538,7 @@ value compute_nouns_stem_form e stem d p =
| _ -> build_is Fem r2 e
]
| [ 5 :: r2 ] (* -us *) -> build_us Fem r2 e
| [ 12; 34 ] (* dos *)
| [ 12; 34 ] (* dos *) -> build_dos Fem e
| [ 14; 2; 41 ] (* maa.ms *) -> () (* avoids reporting bahu *)
| [ 14 :: [ 5 :: _ ] ] -> () (* -pu.ms *)
| _ -> report stem g
......
......@@ -43,8 +43,8 @@ value voices_of = fun
| "ghraa" | "cakaas" | "ca.t" | "cand" | "cam" | "car" | "cal" | "cit#1"
| "cumb" | "chur" | "ch.rd" | "jak.s" | "jap" | "jabh#2" | "jam" | "jalp"
| "jas" | "jaag.r" | "jinv" | "jiiv" | "jvar" | "jval" | "tak" | "tak.s"
| "ta~nc" | "tam" | "tarj" | "tup" | "tu.s" | "t.rp#1" | "t.r.s#1" | "t.rr"
| "tyaj#1" | "tras" | "tru.t" | "tvak.s" | "tsar" | "da.mz" | "dagh"
| "ta~nc" | "tam" | "tarj" | "tup" | "tu.s" | "t.rp#1" | "t.r.s#1" | "t.rh"
| "t.rr" | "tyaj#1" | "tras" | "tru.t" | "tvak.s" | "tsar" | "da.mz" | "dagh"
| "dabh" | "dam#1" | "dal" | "das" | "dah#1" | "daa#2" | "daa#3" | "diiv#1"
| "du" | "du.s" | "d.rp" | "d.rbh" | "d.rz#1" | "d.rh" | "d.rr" | "dhyaa"
| "draa#1" | "dru#1" | "druh#1" | "dham" | "dhaa#2" | "dhru" | "dhvan"
......
......@@ -4,9 +4,10 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Chunking mechanism for guessing partial padapatha form from list of chunks *)
(* Essential for maximum parallelism in segmentation *)
value sanskrit_chunk encode s =
match encode s with (* avagraha reverts to a *)
......
......@@ -128,21 +128,6 @@ value print_uni_kridanta pvs phase word multitags (n,m) =
; ps th_end
}
;
(* deprecated
value print_uni_taddhita pvs m phase stem sfx sfx_phase = fun
[ [ (delta,polytag) ] -> (* we assume n=1 taddhita form unambiguous *)
let unitag = [ project m polytag ]
and gen = generative phase
and cached = False in do
{ ps th_begin
; pl (Lex.table_morph_of sfx_phase) (* table begin *)
; let _ = Lex.print_morph_tad pvs cached 0 gen stem sfx 0 (delta,unitag) in ()
; ps table_end (* table end *)
; ps th_end
}
| _ -> failwith "Multiple sfx tag"
]
; *)
value print_projection phase rword ((_,m) as index) = do
{ ps tr_begin (* tr begins *)
; Morpho_html.print_signifiant_yellow rword
......@@ -151,14 +136,6 @@ value print_projection phase rword ((_,m) as index) = do
[ Atomic tags -> print_uni_kridanta [] phase word tags index
| Preverbed (_,phase) pvs form tags ->
print_uni_kridanta pvs phase form tags index
(* deprecated
| Taddhita (ph,form) sfx sfx_phase sfx_tags ->
match Lex.tags_of ph form with
[ Atomic _ -> print_uni_taddhita [] m phase form sfx sfx_phase sfx_tags
| Preverbed _ pvs _ _ ->
print_uni_taddhita pvs m phase form sfx sfx_phase sfx_tags
| _ -> failwith "taddhita recursion unavailable"
] *)
]
; ps tr_end (* tr ends *)
}
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Phonetics = struct i*)
......@@ -318,7 +318,7 @@ value phantom_elim = fun
]
;
(* For m.rj-like verbs (Whitney§219-a) Panini{8,2,36}
"bhraaj" "m.rj" "yaj1" "raaj1" "vraj" "s.rj1"
"bhraaj" "m.rj" "yaj1" "raaj1" "vraj" "s.rj1" "bh.rjj"
replace phoneme j=24 by j'=124 with sandhi j'+t = .s.t (j' is j going to z) *)
value mrijify stem = match stem with
[ [ 24 :: r ] -> [ 124 :: r ]
......@@ -394,8 +394,10 @@ value finalize rstem = match rstem with
| 24 (* j *) (* e.g. bhi{\d s}aj; bhuj; as{\d r}j -yuj *)
| 25 (* jh *) -> match rest with
[ [ 26 (* \~n *) :: ante ] -> [ 21 (* \.n *) :: ante ]
| [ 21 (* \.n *) :: _ ] -> rest
| _ -> [ 17 (* k *) :: rest ] (* but sometimes {\d t} - beware *)
| [ 24 (* j *) :: ante ] | [ 22 (* c *) :: ante ]
-> [ 27 (* {\d t} *) :: ante ] (* majj bh.rjj pracch *)
| [ 21 (* \.n *) :: _ ] -> rest
| _ -> [ 17 (* k *) :: rest ] (* but sometimes {\d t} - eg devej *)
]
| 20 (* gh *) -> [ 17 (* k *) :: asp rest ]
| 26 (* \~n *) -> [ 21 (* \.n *) :: rest ]
......@@ -415,7 +417,13 @@ value finalize rstem = match rstem with
| _ -> [ 27 (* {\d t} *) :: rest ] (* default *)
(* NB optionally nak Whitney§218a *)
]
| 47 (* {\d s} *) -> [ 27 (* {\d t} *) :: rest ] (* e.g. dvi{\d s} {\R} dvi{\d t} *)
| 47 (* {\d s} *) -> match rest with
[ [ 7 :: [ 35 :: _ ] ] (* -dh{\d r}{\d s} {\R} -dh{\d r}k *)
-> [ 17 (* k *) :: rest ] (* Kane §97 *)
| [ 17 :: ante ] (* -k{\d s} {\R} -k *)
-> [ 17 (* k *) :: ante ] (* vivik.s Kane §97 but MW: vivi.t *)
| _ -> [ 27 (* {\d t} *) :: rest ] (* e.g. dvi{\d s} {\R} dvi{\d t} *)
]
| 49 (* h *) -> [ 27 (* {\d t} *) :: asp rest ] (* e.g. lih {\R} li{\d t} *)
| 149 (* h' *) -> [ 17 (* k *) :: asp rest ] (* -duh {\R} -dhuk , impft doh adhok, etc. *)
| 249 (* h'' *) -> [ 32 (* t *) :: asp rest ]
......@@ -443,7 +451,11 @@ value finalize_r stem = match stem with
else stem
| [] -> failwith "Illegal arg r to finalize"
]
| 48 (* s *) -> [ 34 (* t *) :: rest ] (* for roots sras dhvas *)
| 48 (* s *) -> match rest with
[ [ 1 :: [ 45 :: [ 35 :: _ ] ] ] -> [ 34 (* t *) :: rest ] (* dhvas *)
| [ 1 :: [ 45 :: _ ] ] -> stem (* suvas *)
| _ -> [ 34 (* t *) :: rest ] (* sras *)
]
| _ -> finalize stem
]
]
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI-bin sktreader alias Reader for segmentation, tagging and parsing.
......
This diff is collapsed.
......@@ -8,4 +8,4 @@
(**************************************************************************)
(* Generated by make version - see main Makefile *)
value version="3.12" and version_date="2019-01-22";
value version="3.13" and version_date="2019-02-03";
VERSION='3.12'
DATE='2019-01-22'
VERSION='3.13'
DATE='2019-02-03'