Commit 10a2fd42 authored by Gérard Huet's avatar Gérard Huet

Productive taddhitas suppressed - some navyanyaaya compounds will be now...

Productive taddhitas suppressed - some navyanyaaya compounds will be now unrecognizable without lexicon acquisition
parent bd50f79b
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Checkpoints management *) (* Checkpoints management *)
...@@ -16,11 +16,7 @@ value rec phase_encode = fun ...@@ -16,11 +16,7 @@ value rec phase_encode = fun
"<{" ^ string_of_phase ph ^ "}{" ^ "<{" ^ string_of_phase ph ^ "}{" ^
string_of_phase ph' ^ "}{" ^ string_of_phase ph' ^ "}{" ^
Canon.decode prev ^ "}{" ^ Canon.decode form ^ "}>" Canon.decode prev ^ "}{" ^ Canon.decode form ^ "}>"
| Tad (ph,ph') form sfx -> | phase -> "{" ^ string_of_phase phase ^ "}"
"(" ^ phase_encode ph ^ "{" ^
string_of_phase ph' ^ "}{" ^
Canon.decode form ^ "}{" ^ Canon.decode sfx ^ "})"
| phase -> "{" ^ string_of_phase phase ^ "}"
] ]
and bool_encode b = if b then "t" else "f" and bool_encode b = if b then "t" else "f"
; ;
...@@ -60,11 +56,7 @@ EXTEND Gram ...@@ -60,11 +56,7 @@ EXTEND Gram
; pre = TEXT; form = TEXT ; ">" -> ; pre = TEXT; form = TEXT ; ">" ->
Comp (phase_of_string p, phase_of_string p') Comp (phase_of_string p, phase_of_string p')
(Encode.code_string pre) (Encode.code_string form) (Encode.code_string pre) (Encode.code_string form)
| "("; p = phase; p' = TEXT (* Taddhita *) | p = TEXT -> phase_of_string p
; form = TEXT; sfx = TEXT; ")" ->
Tad (p, phase_of_string p')
(Encode.code_string form) (Encode.code_string sfx)
| p = TEXT -> phase_of_string p
] ] ; ] ] ;
phase_rword: phase_rword:
[ [ s = phase; ","; o = TEXT -> (s, Encode.rev_code_string o) ] ] ; [ [ s = phase; ","; o = TEXT -> (s, Encode.rev_code_string o) ] ] ;
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
type vmorph = type vmorph =
[ Prim of int and bool and Word.word (* primary conjugation *) [ Prim of int and bool and Word.word (* primary conjugation *)
(* gana pada form of present 3rd sg for checking *) (* gana pada form of present 3rd sg for checking *)
(* pada=True Paradmaipada pada=False AAtmanepada *) (* pada=True Paradmaipada pada=False AAtmanepada *)
| Causa of Word.word (* causative 3rd sg form *) | Causa of Word.word (* causative 3rd sg form *)
| Inten of Word.word (* intensive 3rd sg form *) | Inten of Word.word (* intensive 3rd sg form *)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Dispatcher: Sanskrit Engine in 53 phases automaton (plus 2 fake ones) *) (* Dispatcher: Sanskrit Engine in 53 phases automaton (plus 2 fake ones) *)
...@@ -78,8 +78,6 @@ value transducer = fun ...@@ -78,8 +78,6 @@ value transducer = fun
| Avy -> transducers.avya (* ifc avyayiibhava *) | Avy -> transducers.avya (* ifc avyayiibhava *)
| Inftu -> transducers.inftu (* infinitives in -tu iic. Renou HLS 72 *) | Inftu -> transducers.inftu (* infinitives in -tu iic. Renou HLS 72 *)
| Kama -> transducers.kama (* ifcs of kaama/manas: tyaktukaama dra.s.tumanas *) | Kama -> transducers.kama (* ifcs of kaama/manas: tyaktukaama dra.s.tumanas *)
| Sfx -> transducers.sfx (* ifc taddhita suffixes *)
| Isfx -> transducers.isfx (* iifc taddhita suffixes *)
| Cache -> transducers.cache (* cached forms *) | Cache -> transducers.cache (* cached forms *)
| Noun | Iic | Iik | Voca | Krid | Pvk | Vok | Noun | Iic | Iik | Voca | Krid | Pvk | Vok
-> raise (Control.Anomaly "composite phase") -> raise (Control.Anomaly "composite phase")
...@@ -116,7 +114,7 @@ value cached = (* potentially cached lexicon acquisitions *) ...@@ -116,7 +114,7 @@ value cached = (* potentially cached lexicon acquisitions *)
; ;
(* initial1, initial2: phases *) (* initial1, initial2: phases *)
value initial1 = value initial1 =
(* All phases but Ifc, Abso, Auxi, Auxik, Auxiick, Lopa, Lopak, Sfx, Isfx. *) (* All phases but Ifc, Abso, Auxi, Auxik, Auxiick, Lopa, Lopak. *)
[ Inde; Iicv; Iicc; Nouv; Nouc; Pron; A; An; Root; Kriv; Kric; Iikv; Iikc [ Inde; Iicv; Iicc; Nouv; Nouc; Pron; A; An; Root; Kriv; Kric; Iikv; Iikc
; Peri; Pv; Pvkv; Pvkc; Iiv; Iivv; Iivc; Iiy; Inv; Ai; Ani; Absv; Absc; Inftu ; Peri; Pv; Pvkv; Pvkc; Iiv; Iivv; Iivc; Iiy; Inv; Ai; Ani; Absv; Absc; Inftu
; Vocv; Vocc; Vokv; Vokc ] @ cached ; Vocv; Vocc; Vokv; Vokc ] @ cached
...@@ -128,7 +126,7 @@ value initial full = if full then initial1 else initial2 ...@@ -128,7 +126,7 @@ value initial full = if full then initial1 else initial2
(* dispatch1: Word.word -> phase -> phases *) (* dispatch1: Word.word -> phase -> phases *)
value dispatch1 w = fun (* w is the current input word *) value dispatch1 w = fun (* w is the current input word *)
[ Nouv | Nouc | Pron | Inde | Abso | Auxi | Auxik | Kama | Ifc [ Nouv | Nouc | Pron | Inde | Abso | Auxi | Auxik | Kama | Ifc
| Kriv | Kric | Absv | Absc | Avy | Lopak | Sfx | Root | Lopa -> | Kriv | Kric | Absv | Absc | Avy | Lopak | Root | Lopa ->
if phantomatic w then [ Root; Kriv; Kric; Iikv; Iikc; Abso ] (* aa- pv *) if phantomatic w then [ Root; Kriv; Kric; Iikv; Iikc; Abso ] (* aa- pv *)
else initial1 else initial1
| A -> if phantomatic w then [] | A -> if phantomatic w then []
...@@ -142,19 +140,15 @@ value dispatch1 w = fun (* w is the current input word *) ...@@ -142,19 +140,15 @@ value dispatch1 w = fun (* w is the current input word *)
justified by \Pan{2,2,6} a-x only if x is a subanta. *) justified by \Pan{2,2,6} a-x only if x is a subanta. *)
| Iicv | Iicc | Iikv | Iikc | Iiif | Auxiick -> (* Compounding *) | Iicv | Iicc | Iikv | Iikc | Iiif | Auxiick -> (* Compounding *)
[ Iicv; Iicc; Nouv; Nouc; A; An; Ifc; Iikv; Iikc; Kriv; Kric [ Iicv; Iicc; Nouv; Nouc; A; An; Ifc; Iikv; Iikc; Kriv; Kric
; Pvkv; Pvkc; Iiif; Iivv; Iivc; Vocv; Vocc; Vokv; Vokc ] @ ; Pvkv; Pvkc; Iiif; Iivv; Iivc; Vocv; Vocc; Vokv; Vokc ] @ cached
[ Sfx; Isfx ] @ cached | Pv -> if phantomatic w then [] else
| Pv -> if phantomatic w then [] else
if amuitic w then [ Lopa ] else [ Root; Abso; Peri; Inftu ] if amuitic w then [ Lopa ] else [ Root; Abso; Peri; Inftu ]
| Pvkc | Pvkv -> if phantomatic w then [] else | Pvkc | Pvkv -> if phantomatic w then [] else
if amuitic w then [ Lopak ] else [ Iikv; Iikc; Kriv; Kric; Vokv; Vokc ] if amuitic w then [ Lopak ] else [ Iikv; Iikc; Kriv; Kric; Vokv; Vokc ]
| Iiv -> [ Auxi ] (* as bhuu and k.r finite forms *) | Iiv -> [ Auxi ] (* as bhuu and k.r finite forms *)
| Iivv | Iivc -> [ Auxik; Auxiick ] (* bhuu and k.r kridanta forms *) | Iivv | Iivc -> [ Auxik; Auxiick ] (* bhuu and k.r kridanta forms *)
| Iiy -> [ Avy ] | Iiy -> [ Avy ]
| Isfx -> (* Compounding with taddhita *) | Peri -> [ Auxi ] (* overgenerates, should be only perfect forms *)
[ Iicv; Iicc; Nouv; Nouc; A; An; Ifc; Iikv; Iikc; Kriv; Kric
; Pvkv; Pvkc; Iiif; Iivv; Iivc; Vocv; Vocc; Vokv; Vokc ] @ cached
| Peri -> [ Auxi ] (* overgenerates, should be only perfect forms *)
| Inftu -> [ Kama ] | Inftu -> [ Kama ]
| Vocc | Vocv | Vokv | Vokc | Cache -> [] | Vocc | Vocv | Vokv | Vokc | Cache -> []
(* only chunk-final vocatives so no Iic overlap *) (* only chunk-final vocatives so no Iic overlap *)
...@@ -194,8 +188,7 @@ value terminal = (* Accepting phases *) ...@@ -194,8 +188,7 @@ value terminal = (* Accepting phases *)
; Vocc; Vocv; Vokv; Vokc; Inv ; Vocc; Vocv; Vokv; Vokc; Inv
; Lopa; Lopak ; Lopa; Lopak
; Avy; Kama ; Avy; Kama
; Sfx ; Cache
; Cache
] ]
; ;
...@@ -410,9 +403,8 @@ value rec chop word = fun ...@@ -410,9 +403,8 @@ value rec chop word = fun
] ]
] ]
; ;
value sfx_phase = fun [ Sfx | Isfx -> True | _ -> False ] value iic_phase = fun
and iic_phase = fun [ Iicv | Iicc | Iikv | Iikc
[ Iicv | Iicc | Iikv | Iikc
| Comp (_,Iikv) _ _ | Comp (_,Iikc) _ _ -> True | Comp (_,Iikv) _ _ | Comp (_,Iikc) _ _ -> True
| _ -> False ] | _ -> False ]
; ;
...@@ -628,15 +620,7 @@ value validate out = match out with ...@@ -628,15 +620,7 @@ value validate out = match out with
if Phonetics.consonant_initial (Word.mirror form) then [] if Phonetics.consonant_initial (Word.mirror form) then []
else out else out
(*i TODO: similar test for dual forms i*) (*i TODO: similar test for dual forms i*)
(* Finally we glue taddita suffix "forms" to the previous (iic) segment *) | [ (phase,_,_) :: [ (pv,_,_) :: _ ] ] when preverb_phase pv ->
(* NB This cumulates with the preverb glueing but not with itself *)
| [ (sfxph,sfx,s) :: [ (ph,rstem,sv) :: r ] ] when sfx_phase sfxph
&& iic_phase ph ->
let sfx_form = Word.mirror sfx in
let stem = Word.mirror rstem in
let tad_form = Word.mirror (apply_sandhi rstem sfx_form sv) in
[ (Tad (ph,sfxph) stem sfx_form,tad_form,s) :: r ]
| [ (phase,_,_) :: [ (pv,_,_) :: _ ] ] when preverb_phase pv ->
let m = "validate: " ^ string_of_phase pv ^ " " ^ string_of_phase phase in let m = "validate: " ^ string_of_phase pv ^ " " ^ string_of_phase phase in
raise (Control.Anomaly m) (* all preverbs ought to have been processed *) raise (Control.Anomaly m) (* all preverbs ought to have been processed *)
(* [ | [ (pv,_,_) :: _ ] when preverb_phase pv -> out ] noop (* [ | [ (pv,_,_) :: _ ] when preverb_phase pv -> out ] noop
...@@ -676,10 +660,7 @@ value rec color_of_phase = fun ...@@ -676,10 +660,7 @@ value rec color_of_phase = fun
| Ifc | Ifc2 -> Cyan | Ifc | Ifc2 -> Cyan
| Unknown -> Grey | Unknown -> Grey
| Comp (_,ph) _ _ -> color_of_phase ph | Comp (_,ph) _ _ -> color_of_phase ph
| Tad (_,ph) _ _ -> if ph=Sfx then Deep_sky else Yellow | Pv | Pvk | Pvkc | Pvkv -> failwith "Illegal preverb segment"
| Pv | Pvk | Pvkc | Pvkv -> failwith "Illegal preverb segment"
| Sfx -> Deep_sky (* necessary for [Lexer.print_segment2] *)
| Isfx -> Yellow (* idem *)
(*i NB: unused background colors: Pink Green Aquamarine Chamois i*) (*i NB: unused background colors: Pink Green Aquamarine Chamois i*)
] ]
; ;
......
...@@ -77,7 +77,9 @@ value strip w = match w with ...@@ -77,7 +77,9 @@ value strip w = match w with
| [] -> failwith "Empty stem to strip" | [] -> failwith "Empty stem to strip"
] ]
; ;
value rev_strip w = Word.mirror (strip (Word.mirror w)) (* ugly - temp *) value rstem w = strip (Word.mirror w)
;
value rev_strip w = Word.mirror (rstem w) (* ugly - temp *)
; ;
(* Builds revword normalised stem from entry string of root *) (* Builds revword normalised stem from entry string of root *)
(* Used by [Verbs.revstem], [Nouns.enter_iic], [Print_dict] *) (* Used by [Verbs.revstem], [Nouns.enter_iic], [Print_dict] *)
......
...@@ -106,22 +106,12 @@ value print_tags_tad pvs seg_num phase stem sfx sfx_tags = ...@@ -106,22 +106,12 @@ value print_tags_tad pvs seg_num phase stem sfx sfx_tags =
let ptag = print_morph_tad pvs seg_num False (generative phase) stem sfx in let ptag = print_morph_tad pvs seg_num False (generative phase) stem sfx in
fold_vert ptag sfx_tags fold_vert ptag sfx_tags
; ;
(* This is called "printing morphology interface style". Taddhitaanta forms (* This is called "printing morphology interface style". *)
are printed as fake compounds of iic the stem and ifc the taddhita form. *)
value print_morpho phase word = value print_morpho phase word =
match tags_of phase word with match tags_of phase word with
[ Atomic tags -> print_tags [] 0 phase word tags [ Atomic tags -> print_tags [] 0 phase word tags
| Preverbed (_,phase) pvs form tags -> print_tags pvs 0 phase form tags | Preverbed (_,phase) pvs form tags -> print_tags pvs 0 phase form tags
| Taddhita (ph,form) sfx _ sfx_tags ->
match tags_of ph form with
[ Atomic _ -> (* stem, tagged as iic *)
print_tags_tad [] 0 ph form sfx sfx_tags
| Preverbed _ pvs _ _ -> (* stem, tagged as iic *)
print_tags_tad pvs 0 ph form sfx sfx_tags
| _ -> raise (Control.Anomaly "taddhita recursion")
]
] ]
(* PB: if form has homonymy, we get t1 t2 t for [t1 | t2].t - confusion *)
; ;
(* Parsing mandatory checkpoints *) (* Parsing mandatory checkpoints *)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Sanskrit Phrase Lexer in 40 phases version. *) (* Sanskrit Phrase Lexer in 40 phases version. *)
...@@ -75,20 +75,7 @@ value print_morph pvs cached seg_num gen form n tag = do ...@@ -75,20 +75,7 @@ value print_morph pvs cached seg_num gen form n tag = do
; n+1 ; n+1
} }
; ;
(* generalisation of [print_morph] to taddhitas *) value print_tags pvs seg_num phase form tags =
value print_morph_tad pvs cached seg_num gen stem sfx n tag = do
(* n is the index in the list of tags of an ambiguous form *)
{ tr_begin |> ps
; th_begin |> ps
; span_begin Latin12 |> ps
; Morpho_html.print_inflected_link_tad pvs cached stem sfx (seg_num,n) gen tag
; span_end |> ps
; th_end |> ps
; tr_end |> ps
; n+1
}
;
value print_tags pvs seg_num phase form tags =
let ptag = print_morph pvs (is_cache phase) seg_num (generative phase) form in let ptag = print_morph pvs (is_cache phase) seg_num (generative phase) form in
let _ = List.fold_left ptag 1 tags in () let _ = List.fold_left ptag 1 tags in ()
; ;
...@@ -100,9 +87,7 @@ value rec scl_phase = fun ...@@ -100,9 +87,7 @@ value rec scl_phase = fun
| Inde | Abso | Absv | Absc | Avy -> "inde" | Inde | Abso | Absv | Absc | Avy -> "inde"
| Iic | Iic2 | A | An | Iicv | Iicc | Iik | Iikv | Iikc | Iiif | Auxiick | Iic | Iic2 | A | An | Iicv | Iicc | Iik | Iikv | Iikc | Iiif | Auxiick
| Ai | Ani -> "iic" | Ai | Ani -> "iic"
| Sfx -> "suffix" | Iiv | Iivv | Iivc -> "iiv"
| Isfx -> "iicsuffix"
| Iiv | Iivv | Iivc -> "iiv"
| Iiy -> "iiy" | Iiy -> "iiy"
| Peri -> "peri" | Peri -> "peri"
| Inftu -> "inftu" | Inftu -> "inftu"
...@@ -112,13 +97,12 @@ value rec scl_phase = fun ...@@ -112,13 +97,12 @@ value rec scl_phase = fun
| Unknown -> "unknown" | Unknown -> "unknown"
| Cache -> "Cache" | Cache -> "Cache"
| Comp (_,ph) _ _ -> "preverbed " ^ scl_phase ph | Comp (_,ph) _ _ -> "preverbed " ^ scl_phase ph
| Tad (ph,_) _ _ -> "taddhita " ^ scl_phase ph
] ]
; ;
value print_scl_morph pvs gen form tag = do value print_scl_morph pvs gen form tag = do
{ ps (xml_begin "tag") { ps (xml_begin "tag")
; Morpho_scl.print_scl_inflected pvs form gen tag ; Morpho_scl.print_scl_inflected pvs form gen tag
; ps (xml_end "tag") ; ps (xml_end "tag")
} }
; ;
value print_scl_tags pvs phase form tags = value print_scl_tags pvs phase form tags =
...@@ -137,7 +121,6 @@ value extract_lemma phase word = ...@@ -137,7 +121,6 @@ value extract_lemma phase word =
| Preverbed (_,phase) pvs form tags -> (* tags to be trimmed to [ok_tags] *) | Preverbed (_,phase) pvs form tags -> (* tags to be trimmed to [ok_tags] *)
if pvs = [] then tags if pvs = [] then tags
else trim_tags (generative phase) form (Canon.decode pvs) tags else trim_tags (generative phase) form (Canon.decode pvs) tags
| Taddhita _ _ _ tags -> tags
] ]
; ;
(* Returns the offset correction (used by SL interface) *) (* Returns the offset correction (used by SL interface) *)
...@@ -171,19 +154,7 @@ value process_kridanta pvs seg_num phase form tags = do ...@@ -171,19 +154,7 @@ value process_kridanta pvs seg_num phase form tags = do
; th_end |> ps ; th_end |> ps
; (phase, form, ok_tags) ; (phase, form, ok_tags)
}} }}
; ;
value process_taddhita pvs seg_num phase stem sfx_phase sfx sfx_tags =
let gen = generative phase
and cached = False in
let ptag = print_morph_tad pvs cached seg_num gen stem sfx in do
{ th_begin |> ps
; table_morph_of sfx_phase |> pl (* table begin *)
; let _ = List.fold_left ptag 1 sfx_tags in ()
; table_end |> ps (* table end *)
; th_end |> ps
; (sfx_phase, sfx, sfx_tags)
}
;
(* Same structure as [Interface.print_morpho] *) (* Same structure as [Interface.print_morpho] *)
value print_morpho phase word = do value print_morpho phase word = do
{ table_morph_of phase |> pl (* table begin *) { table_morph_of phase |> pl (* table begin *)
...@@ -196,15 +167,7 @@ value print_morpho phase word = do ...@@ -196,15 +167,7 @@ value print_morpho phase word = do
process_kridanta [] 0 phase word tags process_kridanta [] 0 phase word tags
| Preverbed (_,phase) pvs form tags -> | Preverbed (_,phase) pvs form tags ->
process_kridanta pvs 0 phase form tags process_kridanta pvs 0 phase form tags
| Taddhita (ph,form) sfx sfx_phase sfx_tags -> ] in ()
match tags_of ph form with
[ Atomic _ -> (* stem, tagged as iic *)
process_taddhita [] 0 ph form sfx_phase sfx sfx_tags
| Preverbed _ pvs _ _ -> (* stem, tagged as iic *)
process_taddhita pvs 0 ph form sfx_phase sfx sfx_tags
| _ -> failwith "Anomaly: taddhita recursion"
]
] in ()
; span_end |> ps ; span_end |> ps
; th_end |> ps ; th_end |> ps
; tr_end |> ps ; tr_end |> ps
...@@ -212,7 +175,7 @@ value print_morpho phase word = do ...@@ -212,7 +175,7 @@ value print_morpho phase word = do
} }
; ;
(* Segment printing with phonetics without semantics for Reader *) (* Segment printing with phonetics without semantics for Reader *)
value print_segment offset (phase,rword,transition) = do value print_segment offset (phase,rword,transition) = do
{ "[ " |> ps { "[ " |> ps
; Morpho_html.print_signifiant_off rword offset ; Morpho_html.print_signifiant_off rword offset
; print_morpho phase (mirror rword) ; print_morpho phase (mirror rword)
...@@ -243,15 +206,7 @@ value print_scl_segment counter (phase,rword) = ...@@ -243,15 +206,7 @@ value print_scl_segment counter (phase,rword) =
if pvs = [] then tags if pvs = [] then tags
else trim_tags (generative phase) form (Canon.decode pvs) tags in else trim_tags (generative phase) form (Canon.decode pvs) tags in
print_scl_tags pvs phase form ok_tags print_scl_tags pvs phase form ok_tags
| Taddhita (_,form) sfx sfx_phase sfx_tags -> ]
let taddhitanta_phase = match sfx_phase with
[ Sfx -> Noun
| Isfx -> Iic
| _ -> failwith "Wrong taddhita structure"
]
and taddhitanta_stem = form @ sfx (* very experimental *) in
print_scl_tags [] taddhitanta_phase taddhitanta_stem sfx_tags
]
; "'>" |> ps (* closes <input *) ; "'>" |> ps (* closes <input *)
; Canon.unidevcode word |> ps ; Canon.unidevcode word |> ps
; td_end |> ps ; td_end |> ps
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Sanskrit Phrase Lexer *) (* Sanskrit Phrase Lexer *)
...@@ -47,14 +47,9 @@ module Lexer : functor (* takes its prelude and iterator control as parameters * ...@@ -47,14 +47,9 @@ module Lexer : functor (* takes its prelude and iterator control as parameters *
(* Exported for Parser *) (* Exported for Parser *)
value process_kridanta: Word.word -> int -> Phases.phase -> Word.word -> value process_kridanta: Word.word -> int -> Phases.phase -> Word.word ->
Morphology.multitag -> (Phases.phase * Word.word * Morphology.multitag); Morphology.multitag -> (Phases.phase * Word.word * Morphology.multitag);
value process_taddhita: Word.word -> int -> Phases.phase -> Word.word -> value table_morph_of : Phases.phase -> string;
Phases.phase -> Word.word -> Morphology.multitag ->
(Phases.phase * Word.word * Morphology.multitag);
value table_morph_of : Phases.phase -> string;
value print_morph : Word.word -> bool -> int -> bool -> Word.word -> int -> value print_morph : Word.word -> bool -> int -> bool -> Word.word -> int ->
Morphology.unitag -> int; Morphology.unitag -> int;
value print_morph_tad : Word.word -> bool -> int -> bool -> Word.word ->
Word.word -> int -> Morphology.unitag -> int;
(* END Exported for Parser *) (* END Exported for Parser *)
value all_checks : ref (list Viccheda.check); value all_checks : ref (list Viccheda.check);
value un_analyzable : Word.word -> (list Disp.segment * Viccheda.resumption); value un_analyzable : Word.word -> (list Disp.segment * Viccheda.resumption);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* [Load_morphs] *) (* [Load_morphs] *)
...@@ -39,11 +39,9 @@ module Morphs ...@@ -39,11 +39,9 @@ module Morphs
| Iik (* K.ridaantaas as left component - used to be called Piic *) | Iik (* K.ridaantaas as left component - used to be called Piic *)
| Iikv | Iikc | Kriv | Kric | Vocv | Vocc | Vokv | Vokc | Iikv | Iikc | Kriv | Kric | Vocv | Vocc | Vokv | Vokc
| Iiy | Avy | Inftu | Kama | Iiy | Avy | Inftu | Kama
| Sfx | Isfx | Cache (* Cached lexicon acquisitions *)
| Cache (* Cached lexicon acquisitions *) | Unknown (* Unrecognized chunk *)
| Unknown (* Unrecognized chunk *)
| Comp of (phase * phase) and (* pv *) Word.word and (* root form *) Word.word | Comp of (phase * phase) and (* pv *) Word.word and (* root form *) Word.word
| Tad of (phase * phase) and (* nominal *) Word.word and (* sfx *) Word.word
]; end) ]; end)
= struct = struct
...@@ -54,7 +52,6 @@ by Dispatcher. Preverbed segments may be finite verb forms or kridantas. *) ...@@ -54,7 +52,6 @@ by Dispatcher. Preverbed segments may be finite verb forms or kridantas. *)
type tag_sort = type tag_sort =
[ Atomic of lemmas [ Atomic of lemmas
| Preverbed of (phase * phase) and (* pv *) Word.word and Word.word and lemmas | Preverbed of (phase * phase) and (* pv *) Word.word and Word.word and lemmas
| Taddhita of (phase * Word.word) and (* sfx *) Word.word and phase and lemmas
] ]
; ;
(* Fake tags of nan prefixes *) (* Fake tags of nan prefixes *)
...@@ -115,8 +112,6 @@ value load_morphs () = ...@@ -115,8 +112,6 @@ value load_morphs () =
; ifcs2 = load_morpho Web.public_ifcs2_file ; ifcs2 = load_morpho Web.public_ifcs2_file
; inftu = load_morpho Web.public_inftu_file ; inftu = load_morpho Web.public_inftu_file
; kama = load_morpho Web.public_kama_file ; kama = load_morpho Web.public_kama_file
; sfxs = load_morpho Web.public_sfxs_file
; isfxs = load_morpho Web.public_isfxs_file
; caches = load_morpho_cache Web.public_cache_file ; caches = load_morpho_cache Web.public_cache_file
} }
; ;
...@@ -151,9 +146,7 @@ value morpho_tags = fun ...@@ -151,9 +146,7 @@ value morpho_tags = fun
| Ifc2 -> morpho.ifcs2 | Ifc2 -> morpho.ifcs2
| Inftu -> morpho.inftu | Inftu -> morpho.inftu
| Kama -> morpho.kama | Kama -> morpho.kama
| Sfx -> morpho.sfxs | Cache -> morpho.caches
| Isfx -> morpho.isfxs
| Cache -> morpho.caches
| _ -> raise (Control.Anomaly "morpho_tags") | _ -> raise (Control.Anomaly "morpho_tags")
] ]
; ;
...@@ -171,11 +164,7 @@ value tags_of phase word = ...@@ -171,11 +164,7 @@ value tags_of phase word =
Preverbed sort pv form tag Preverbed sort pv form tag
(* NB [Preverbed] comprises tin verbal forms of verbs with preverbs as well (* NB [Preverbed] comprises tin verbal forms of verbs with preverbs as well
as sup kridanta forms with preverbs. The preverbs are packed in pv. *) as sup kridanta forms with preverbs. The preverbs are packed in pv. *)
| Tad (ph,sfx_ph) form sfx -> (* tag inherited from fake suffix entry *) | _ -> Atomic (Deco.assoc word (morpho_tags phase))
let sfx_tag = Deco.assoc sfx (morpho_tags sfx_ph) in
(* [let stem_tag = Deco.assoc sfx (morpho_tags ph) in] - possible extension *)
Taddhita (ph,form) [ 0 :: sfx ] sfx_ph sfx_tag (* 0 = "-" *)
| _ -> Atomic (Deco.assoc word (morpho_tags phase))
(* NB Atomic comprises tin verbal forms of roots as well as sup atomic forms (* NB Atomic comprises tin verbal forms of roots as well as sup atomic forms
and all the pure stems collections Iic Iiv etc. *) and all the pure stems collections Iic Iiv etc. *)
] ]
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* [Load_transducers] *) (* [Load_transducers] *)
...@@ -63,8 +63,6 @@ type transducer_vect = ...@@ -63,8 +63,6 @@ type transducer_vect =
; iikc : auto (* consonant-initial iik *) ; iikc : auto (* consonant-initial iik *)
; kriv : auto (* vowel-initial krids *) ; kriv : auto (* vowel-initial krids *)
; kric : auto (* consonant-initial krids *) ; kric : auto (* consonant-initial krids *)
; sfx : auto (* taddhita suffixes *)
; isfx : auto (* taddhita suffixes for iic stems *)
; cache : auto (* user-defined supplement to noun *) ; cache : auto (* user-defined supplement to noun *)
} }
; ;
...@@ -113,8 +111,6 @@ value load_transducer cat = ...@@ -113,8 +111,6 @@ value load_transducer cat =
| "Voca" -> Web.public_transvoca_file | "Voca" -> Web.public_transvoca_file
| "Inv" -> Web.public_transinv_file | "Inv" -> Web.public_transinv_file
| "Prev" -> Web.public_transp_file | "Prev" -> Web.public_transp_file
| "Sfx" -> Web.public_transsfx_file
| "Isfx" -> Web.public_transisfx_file
| "Cache" -> Web.public_transca_file | "Cache" -> Web.public_transca_file
| _ -> failwith ("Unexpected category: " ^ cat) | _ -> failwith ("Unexpected category: " ^ cat)
] in ] in
...@@ -216,8 +212,6 @@ value transducers = ...@@ -216,8 +212,6 @@ value transducers =
; iikc = iikc ; iikc = iikc
; absv = absv ; absv = absv
; absc = absc ; absc = absc
; sfx = load_transducer "Sfx"
; isfx = load_transducer "Isfx"
; cache = load_transducer "Cache" ; cache = load_transducer "Cache"
} }
; ;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* This module contains various service utilities for CGI programs *) (* This module contains various service utilities for CGI programs *)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©20189 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Morphology interface *) (* Morphology interface *)
...@@ -70,8 +70,6 @@ type morphology = ...@@ -70,8 +70,6 @@ type morphology =
; kama : inflected_map ; kama : inflected_map
; iiys : inflected_map ; iiys : inflected_map
; avys : inflected_map ; avys : inflected_map
; sfxs : inflected_map
; isfxs : inflected_map
; caches : inflected_map ; caches : inflected_map
} }
; ;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(*i module Nouns = struct i*) (*i module Nouns = struct i*)
...@@ -880,6 +880,7 @@ value build_an g stem entry = ...@@ -880,6 +880,7 @@ value build_an g stem entry =
] ]
; Bare Noun (wrap stem 1) ; Bare Noun (wrap stem 1)
; Avyayaf (fix stem "am") ; Avyayaf (fix stem "am")
; Indecl Tas (fix stem "atas")
] @ if g=Neu then [ Avyayaf (fix stem "a") ] else []) (* \Pan{5,4,109} *) ] @ if g=Neu then [ Avyayaf (fix stem "a") ] else []) (* \Pan{5,4,109} *)
; ;
value build_an_god stem entry = (* Whitney §426a *) value build_an_god stem entry = (* Whitney §426a *)
...@@ -2014,6 +2015,7 @@ value build_neu_u trunc entry = (* stems in -u and -uu *) ...@@ -2014,6 +2015,7 @@ value build_neu_u trunc entry = (* stems in -u and -uu *)
] ]
; Bare Noun (mirror stems) ; Bare Noun (mirror stems)
; Avyayaf (mirror stems) ; Avyayaf (mirror stems)
; Indecl Tas (fix stems "tas") (* eg vastutas *)
] ]
; ;
value build_neu_ri trunc entry = value build_neu_ri trunc entry =
...@@ -4229,7 +4231,9 @@ value build_pron_a g stem entry = (* g=Mas ou g=Neu *) ...@@ -4229,7 +4231,9 @@ value build_pron_a g stem entry = (* g=Mas ou g=Neu *)
else if g=Mas && stem = [ 42; 36; 1 ] (* anya *) else if g=Mas && stem = [ 42; 36; 1 ] (* anya *)
then [ Bare phase (code "anya") ] (* optional anya- *) then [ Bare phase (code "anya") ] (* optional anya- *)
else if pseudo_nominal && g=Mas then else if pseudo_nominal && g=Mas then
[ Avyayaf (fix stem "am"); Avyayaf (fix stem "aat") ] [ Avyayaf (fix stem "am"); Avyayaf (fix stem "aat")
; Indecl Tas (fix stem "atas")
]
else []) else [])
@ (if g=Mas then match entry with @ (if g=Mas then match entry with
[ "eka" -> [ Cvi (code "ekii") ] [ "eka" -> [ Cvi (code "ekii") ]
...@@ -5018,7 +5022,7 @@ value compute_nouns_stem_form e stem d p = ...@@ -5018,7 +5022,7 @@ value compute_nouns_stem_form e stem d p =
| _ -> build_van Mas r3 e | _ -> build_van Mas r3 e
] ]
| [ 49 :: r3 ] (* -han *) -> build_han r3 e | [ 49 :: r3 ] (* -han *) -> build_han r3 e
| _ -> build_an Mas r2 e | _ -> build_an Mas r2 e (* raajan *)
] ]
| [ 3 :: r2 ] (* -in *) -> match r2 with | [ 3 :: r2 ] (* -in *) -> match r2 with
[ [ 33 :: r3 ] (* -thin *)-> match r3 with