Commit 26fa3951 authored by Gérard Huet's avatar Gérard Huet

VERSION bumped to 3.13

parent a002873f
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Canon = struct i*)
......@@ -732,7 +732,7 @@ and matra_indic_unicode_point = fun
]
;
(* om 50 udatta 51 anudatta 52 grave 53 acute 54 avagraha 3D .ll 61
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F ° 70 *)
danda 64 ddanda 65 0 66 1 67 2 68 3 69 4 6A 5 6B 6 6C 7 6D 8 6E 9 6F deg 70 *)
value inject_point s = "&#x09" ^ s ^ ";"
;
value deva_unicode c =
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Encode = struct i*)
......@@ -21,12 +21,12 @@ value is_vowel c = vowel c || c>100 && c<114 (* accounts for upper case *)
value rec normalize = normal_rec False
where rec normal_rec after_vow = fun
[ [] -> []
| [ 14 (* .m *) :: [] ] -> [ 14 ] (* and NOT m *)
| [ 14 (* .m *) ] -> [ 14 ] (* and NOT m *)
| [ 14 (* .m *) :: [ c :: l ] ] ->
if after_vow then
let c' = homonasal c in [ c' :: [ c :: normal_rec (is_vowel c) l ] ]
else raise (In_error "Anusvaara should follow vowel")
| [ 16 (* .h *) :: [] ] ->
| [ 16 (* .h *) ] ->
if after_vow then [ 16 ]
else raise (In_error "Visarga should follow vowel")
(* No change to visarga since eg praata.hsvasu.h comes from praatar|svasu.h
......@@ -79,7 +79,7 @@ value strip w = match w with
;
value rstem w = strip (Word.mirror w)
;
value rev_strip w = Word.mirror (rstem w) (* ugly - temp *)
value rev_strip w = Word.mirror (rstem w) (* [compute_mw_links] *)
;
(* Builds revword normalised stem from entry string of root *)
(* Used by [Verbs.revstem], [Nouns.enter_iic], [Print_dict] *)
......
......@@ -10,7 +10,7 @@
(* A very simple lexer recognizing 1 character idents and integers
and skipping spaces and comments between [%] and eol;
used for various transduction tasks with Camlp4 Grammars.
It is a copy of ZEN/zen_lexer.ml in order to simplify dependencies. *)
It is a copy of [ZEN/zen_lexer.ml] in order to simplify dependencies. *)
(*i module Min_lexer = struct i*)
......
......@@ -43,8 +43,8 @@ value voices_of = fun
| "ghraa" | "cakaas" | "ca.t" | "cand" | "cam" | "car" | "cal" | "cit#1"
| "cumb" | "chur" | "ch.rd" | "jak.s" | "jap" | "jabh#2" | "jam" | "jalp"
| "jas" | "jaag.r" | "jinv" | "jiiv" | "jvar" | "jval" | "tak" | "tak.s"
| "ta~nc" | "tam" | "tarj" | "tup" | "tu.s" | "t.rp#1" | "t.r.s#1" | "t.rr"
| "tyaj#1" | "tras" | "tru.t" | "tvak.s" | "tsar" | "da.mz" | "dagh"
| "ta~nc" | "tam" | "tarj" | "tup" | "tu.s" | "t.rp#1" | "t.r.s#1" | "t.rh"
| "t.rr" | "tyaj#1" | "tras" | "tru.t" | "tvak.s" | "tsar" | "da.mz" | "dagh"
| "dabh" | "dam#1" | "dal" | "das" | "dah#1" | "daa#2" | "daa#3" | "diiv#1"
| "du" | "du.s" | "d.rp" | "d.rbh" | "d.rz#1" | "d.rh" | "d.rr" | "dhyaa"
| "draa#1" | "dru#1" | "druh#1" | "dham" | "dhaa#2" | "dhru" | "dhvan"
......
......@@ -4,9 +4,10 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Chunking mechanism for guessing partial padapatha form from list of chunks *)
(* Essential for maximum parallelism in segmentation *)
value sanskrit_chunk encode s =
match encode s with (* avagraha reverts to a *)
......
......@@ -128,21 +128,6 @@ value print_uni_kridanta pvs phase word multitags (n,m) =
; ps th_end
}
;
(* deprecated
value print_uni_taddhita pvs m phase stem sfx sfx_phase = fun
[ [ (delta,polytag) ] -> (* we assume n=1 taddhita form unambiguous *)
let unitag = [ project m polytag ]
and gen = generative phase
and cached = False in do
{ ps th_begin
; pl (Lex.table_morph_of sfx_phase) (* table begin *)
; let _ = Lex.print_morph_tad pvs cached 0 gen stem sfx 0 (delta,unitag) in ()
; ps table_end (* table end *)
; ps th_end
}
| _ -> failwith "Multiple sfx tag"
]
; *)
value print_projection phase rword ((_,m) as index) = do
{ ps tr_begin (* tr begins *)
; Morpho_html.print_signifiant_yellow rword
......@@ -151,14 +136,6 @@ value print_projection phase rword ((_,m) as index) = do
[ Atomic tags -> print_uni_kridanta [] phase word tags index
| Preverbed (_,phase) pvs form tags ->
print_uni_kridanta pvs phase form tags index
(* deprecated
| Taddhita (ph,form) sfx sfx_phase sfx_tags ->
match Lex.tags_of ph form with
[ Atomic _ -> print_uni_taddhita [] m phase form sfx sfx_phase sfx_tags
| Preverbed _ pvs _ _ ->
print_uni_taddhita pvs m phase form sfx sfx_phase sfx_tags
| _ -> failwith "taddhita recursion unavailable"
] *)
]
; ps tr_end (* tr ends *)
}
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Phonetics = struct i*)
......@@ -318,7 +318,7 @@ value phantom_elim = fun
]
;
(* For m.rj-like verbs (Whitney§219-a) Panini{8,2,36}
"bhraaj" "m.rj" "yaj1" "raaj1" "vraj" "s.rj1"
"bhraaj" "m.rj" "yaj1" "raaj1" "vraj" "s.rj1" "bh.rjj"
replace phoneme j=24 by j'=124 with sandhi j'+t = .s.t (j' is j going to z) *)
value mrijify stem = match stem with
[ [ 24 :: r ] -> [ 124 :: r ]
......@@ -394,8 +394,10 @@ value finalize rstem = match rstem with
| 24 (* j *) (* e.g. bhi{\d s}aj; bhuj; as{\d r}j -yuj *)
| 25 (* jh *) -> match rest with
[ [ 26 (* \~n *) :: ante ] -> [ 21 (* \.n *) :: ante ]
| [ 21 (* \.n *) :: _ ] -> rest
| _ -> [ 17 (* k *) :: rest ] (* but sometimes {\d t} - beware *)
| [ 24 (* j *) :: ante ] | [ 22 (* c *) :: ante ]
-> [ 27 (* {\d t} *) :: ante ] (* majj bh.rjj pracch *)
| [ 21 (* \.n *) :: _ ] -> rest
| _ -> [ 17 (* k *) :: rest ] (* but sometimes {\d t} - eg devej *)
]
| 20 (* gh *) -> [ 17 (* k *) :: asp rest ]
| 26 (* \~n *) -> [ 21 (* \.n *) :: rest ]
......@@ -415,7 +417,13 @@ value finalize rstem = match rstem with
| _ -> [ 27 (* {\d t} *) :: rest ] (* default *)
(* NB optionally nak Whitney§218a *)
]
| 47 (* {\d s} *) -> [ 27 (* {\d t} *) :: rest ] (* e.g. dvi{\d s} {\R} dvi{\d t} *)
| 47 (* {\d s} *) -> match rest with
[ [ 7 :: [ 35 :: _ ] ] (* -dh{\d r}{\d s} {\R} -dh{\d r}k *)
-> [ 17 (* k *) :: rest ] (* Kane §97 *)
| [ 17 :: ante ] (* -k{\d s} {\R} -k *)
-> [ 17 (* k *) :: ante ] (* vivik.s Kane §97 but MW: vivi.t *)
| _ -> [ 27 (* {\d t} *) :: rest ] (* e.g. dvi{\d s} {\R} dvi{\d t} *)
]
| 49 (* h *) -> [ 27 (* {\d t} *) :: asp rest ] (* e.g. lih {\R} li{\d t} *)
| 149 (* h' *) -> [ 17 (* k *) :: asp rest ] (* -duh {\R} -dhuk , impft doh adhok, etc. *)
| 249 (* h'' *) -> [ 32 (* t *) :: asp rest ]
......@@ -443,7 +451,11 @@ value finalize_r stem = match stem with
else stem
| [] -> failwith "Illegal arg r to finalize"
]
| 48 (* s *) -> [ 34 (* t *) :: rest ] (* for roots sras dhvas *)
| 48 (* s *) -> match rest with
[ [ 1 :: [ 45 :: [ 35 :: _ ] ] ] -> [ 34 (* t *) :: rest ] (* dhvas *)
| [ 1 :: [ 45 :: _ ] ] -> stem (* suvas *)
| _ -> [ 34 (* t *) :: rest ] (* sras *)
]
| _ -> finalize stem
]
]
......
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* CGI-bin sktreader alias Reader for segmentation, tagging and parsing.
......
......@@ -8,4 +8,4 @@
(**************************************************************************)
(* Generated by make version - see main Makefile *)
value version="3.12" and version_date="2019-01-22";
value version="3.13" and version_date="2019-02-03";
VERSION='3.12'
DATE='2019-01-22'
VERSION='3.13'
DATE='2019-02-03'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment