(**************************************************************************) (* *) (* The Sanskrit Heritage Platform *) (* *) (* Gérard Huet *) (* *) (* ©2017 Institut National de Recherche en Informatique et en Automatique *) (**************************************************************************) (*i module Inflected = struct i*) (* Morphology : computation of inflected forms in [inflected_map decls]. *) open Skt_morph; open Morphology; (* [inflected_map] *) open Word; (* Holds the state vector : [(nouns,roots,preverbs,segmenting_mode)] where: *) (* nouns is accumulator for the set of declined forms of substantives *) (* pronouns is accumulator for the set of declined forms of pronouns *) (* vocas is accumulator for the set of vocative forms of substantives *) (* roots is accumulator for the set of conjugated forms of roots *) (* preverbs is accumulator for the set of preverb sequences *) (* [segmenting_mode] tells whether phantom phonemes are generated or not. *) (* Admits aa- as a preverb -- global set in [Verbs.compute_conjugs_stems] *) value admits_aa = ref False and admits_lopa = ref False ; value morpho_gen = ref True (* morphology generation time *) (* Turn to [False] for cgi execution (fake conjugation and nophantoms) *) ; (* The [inflected_map] lexicons of inflected forms: nouns, iics, etc are computed by [Make_nouns] and are dumped as persistent global databases nouns.rem etc. They are also used on the fly locally by [Declension] and [Conjugation]. *) value lexicalized_kridantas = ref (Deco.empty : Naming.deco_krid) (* It will be set by [Make_roots.roots_to_conjugs] for the [unique_kridantas] computation. *) ; value access_lexical_krid stem = Deco.assoc stem lexicalized_kridantas.val ; (* We look up the lexicalized kridantas register to see if entry is a krid. *) (* This test should be done before, in [Print_dict] that has the info ? *) value is_kridanta entry = try let (hom,stem) = Encode.decompose_str entry in let krids = access_lexical_krid stem in let _ = List.find (fun (_,h) -> h=hom) krids in True with [ Not_found -> False ] ; value unique_kridantas = ref Deco.empty (* This structure holds the unique names to kridantas. It is initialized to the lexicalized one in [Make_roots.roots_to_conjugs], which completes it with the kridantas generated by Parts. At the end of morphological generation its final value is stored in persistent [Install.unique_kridantas_file], and transfered to [Install.public_unique_kridantas_file] read from module Naming. *) ; value access_krid stem = Deco.assoc stem unique_kridantas.val and register_krid stem vrp = (* used in [Parts.gen_stem] *) unique_kridantas.val := Deco.add1 unique_kridantas.val stem vrp ; (* Inflected forms of nouns pronouns numbers, *) (* also used separately for ifc only nouns *) value nouns = ref (Deco.empty : inflected_map) and pronouns = ref (Deco.empty : inflected_map) (* demonstrative + personal pn *) and vocas = ref (Deco.empty : inflected_map) ; (* Add morphological feature i to form w relative to entry e, with d = diff e *) value add_morph w d i = nouns.val := Lexmap.addl nouns.val w (d w,i) and add_morphpro w d i = (* pronouns not usable as ifc *) pronouns.val := Lexmap.addl pronouns.val w (d w,i) (* Add vocative feature i to form w relative to entry e, with d = diff e *) and add_voca w d i = vocas.val := Lexmap.addl vocas.val w (d w,i) ; (* auxiliary verbs used in the inchoative cvi construction *) value auxiliary = fun [ "bhuu#1" | "k.r#1" | "as#1" -> True | _ -> False ] ; (* iic forms *) value iics = ref (Deco.empty : inflected_map) ; value add_morphi w d i = iics.val := Lexmap.addl iics.val w (d w,i) ; (* avyaya iic forms *) value avyayais = ref (Deco.empty : inflected_map) ; (* avyaya ifc forms *) value avyayafs = ref (Deco.empty : inflected_map) ; value add_morphyai w d i = avyayais.val := Lexmap.addl avyayais.val w (d w,i) ; value add_morphyaf w d i = avyayafs.val := Lexmap.addl avyayafs.val w (d w,i) ; (* Used by [Nouns.fake_compute_decls] for declension of single entry *) value nominal_databases () = (nouns.val,pronouns.val,vocas.val,iics.val,avyayafs.val) and reset_nominal_databases () = do { nouns.val := Deco.empty ; pronouns.val := Deco.empty ; vocas.val := Deco.empty ; iics.val := Deco.empty } ; (* iiv forms *) value iivs = ref (Deco.empty : inflected_map) ; value add_morphvi w d i = iivs.val := Lexmap.addl iivs.val w (d w,i) ; (* finite forms of auxiliary roots k.r bhuu as *) value auxi = ref (Deco.empty : inflected_map) ; value add_morphauxi w d i = if Phonetics.phantomatic w then () else auxi.val := Lexmap.addl auxi.val w (d w,i) ; (* periphrastic perfect forms *) value peri = ref (Deco.empty : inflected_map) ; value add_morphperi w d i = peri.val := Lexmap.addl peri.val w (d w,i) ; (* indeclinable forms - adverbs, conjonctions, particles *) value indecls = ref (Deco.empty : inflected_map) ; value add_morphin w d i = indecls.val := Lexmap.addl indecls.val w (d w,i) ; (* invocations are registered in invs *) value invs = ref (Deco.empty : inflected_map) ; value add_invoc w d i = invs.val := Lexmap.addl invs.val w (d w,i) ; (* indeclinable verbal forms usable without preverbs: infinitives, abs-tvaa *) value abstvaa = ref (Deco.empty : inflected_map) ; value add_morphabstvaa w d i = abstvaa.val := Lexmap.addl abstvaa.val w (d w,i) ; (* indeclinable verbal forms usable with preverbs: infinitives, abs-ya *) value absya = ref (Deco.empty : inflected_map) ; value add_morphabsya w d i aapv = do { absya.val := Lexmap.addl absya.val w (d w,i) (* now we add fake absol forms with phantom phonemes *) ; if morpho_gen.val && aapv then match w with [ [ 1 :: r ] -> (* aa-a gives *a *) let fake = [ (* *a *) -3 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 2 :: r ] -> let fake = [ (* *A *) -9 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 3 :: r ] -> let fake = [ (* *i *) -4 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 4 :: r ] -> let fake = [ (* *I *) -7 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 5 :: r ] -> let fake = [ (* *u *) -5 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 6 :: r ] -> let fake = [ (* *U *) -8 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | [ 7 :: r ] -> let fake = [ (* *r *) -6 :: r ] in absya.val := Lexmap.addl absya.val fake (d fake,i) | _ -> () ] else () } ; (* root finite conjugated forms *) value roots = ref (Deco.empty : inflected_map) ; value add_morphc w d i aapv = do { roots.val := Lexmap.addl roots.val w (d w,i) (* now we add fake conjugated forms with phantom phonemes *) ; if morpho_gen.val && aapv then do (* \Pan{6,1,95} *) { match w with [ [ 1 :: r ] -> (* aa-a gives *a *) let fake = [ (* *a *) -3 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 2 :: r ] -> let fake = [ (* *A *) -9 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 3 :: r ] -> let fake = [ (* *i *) -4 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 4 :: r ] -> let fake = [ (* *I *) -7 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 5 :: r ] -> let fake = [ (* *u *) -5 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 6 :: r ] -> let fake = [ (* *U *) -8 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | [ 7 :: r ] -> let fake = [ (* *r *) -6 :: r ] in roots.val := Lexmap.addl roots.val fake (d fake,i) | _ -> () ] } else () } ; (* root finite forms starting with e or o *) value lopas = ref (Deco.empty : inflected_map) and lopaks = ref (Deco.empty : inflected_map) ; (* Concerns \Pan{6,1,94} a,\=a (preverb) | e (root) -> e; same for o. *) (* Ex: upelayati prelayati upo.sati pro.sati *) value add_morphlopa w d i = match w with [ [ 10 :: _ ] | [ 12 :: _ ] -> let amui = [ -2 :: w ] (* amuitic form *) in lopas.val := Lexmap.addl lopas.val amui (d amui,i) | _ -> () ] ; (* New style of forms generators - stem argument generated as pseudo-entry *) (* inflected forms of participles - and more generally kridantas *) value parts = ref (Deco.empty : inflected_map) ; value add_morphpa w stem i aapv = do { parts.val := Lexmap.addl parts.val w (diff w stem,i) (* now we add fake participial forms with phantom phonemes *) ; if morpho_gen.val && aapv then match w with [ [ 1 :: r ] -> (* aa-a gives *a *) let fake = [ (* *a *) -3 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 2 :: r ] -> let fake = [ (* *A *) -9 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 3 :: r ] -> let fake = [ (* *i *) -4 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 4 :: r ] -> let fake = [ (* *I *) -7 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 5 :: r ] -> let fake = [ (* *u *) -5 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 6 :: r ] -> let fake = [ (* *U *) -8 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | [ 7 :: r ] -> (* aa-.r gives *r *) let fake = [ (* *r *) -6 :: r ] in parts.val := Lexmap.addl parts.val fake (diff fake stem,i) | _ -> () ] else () } and add_morphlopak w stem i aapv = match w with [ [ 10 :: _ ] | [ 12 :: _ ] -> let amui = [ -2 :: w ] (* amuitic form *) in lopaks.val := Lexmap.addl lopaks.val amui (diff amui stem,i) | _ -> () ] ; (* participial vocatives *) value partvocs = ref (Deco.empty : inflected_map) ; value add_morphpav w stem i aapv = do { partvocs.val := Lexmap.addl partvocs.val w (diff w stem,i) (* now we add fake participial forms with phantom phonemes *) ; if morpho_gen.val && aapv then match w with [ [ 1 :: r ] -> (* aa-a gives *a *) let fake = [ (* *a *) -3 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 2 :: r ] -> let fake = [ (* *A *) -9 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 3 :: r ] -> let fake = [ (* *i *) -4 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 4 :: r ] -> let fake = [ (* *I *) -7 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 5 :: r ] -> let fake = [ (* *u *) -5 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 6 :: r ] -> let fake = [ (* *U *) -8 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | [ 7 :: r ] -> (* aa-.r gives *r *) let fake = [ (* *r *) -6 :: r ] in partvocs.val := Lexmap.addl partvocs.val fake (diff fake stem,i) | _ -> () ] else () } ; (* piic forms *) value piics = ref (Deco.empty : inflected_map) ; value add_morphpi w stem i aapv = do { piics.val := Lexmap.addl piics.val w (diff w stem,i) (* now we add fake participial iic forms with phantom phonemes *) ; if morpho_gen.val && aapv then match w with [ [ 1 :: r ] -> (* aa-a gives *a *) let fake = [ (* *a *) -3 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 2 :: r ] -> let fake = [ (* *A *) -9 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 3 :: r ] -> let fake = [ (* *i *) -4 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 4 :: r ] -> let fake = [ (* *I *) -7 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 5 :: r ] -> let fake = [ (* *u *) -5 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 6 :: r ] -> let fake = [ (* *U *) -8 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | [ 7 :: r ] -> (* aa-.r gives *r *) let fake = [ (* *r *) -6 :: r ] in piics.val := Lexmap.addl piics.val fake (diff fake stem,i) | _ -> () ] else () } ; (* kridantas of auxiliary roots k.r bhuu for cvi -ii compounds *) value auxik = ref (Deco.empty : inflected_map) ; value add_morphauxik w stem i = if Phonetics.phantomatic w then () else auxik.val := Lexmap.addl auxik.val w (diff w stem,i) ; value auxiick = ref (Deco.empty : inflected_map) ; value add_morphauxiick w stem i = if Phonetics.phantomatic w then () else auxiick.val := Lexmap.addl auxiick.val w (diff w stem,i) ; (* Root infinitives in -tu with forms of kaama *) value inftu = ref (Deco.empty : inflected_map) and kama = ref (Deco.empty : inflected_map) ; value add_morphinftu w d i = (* similar to [add_morphin] *) if Phonetics.phantomatic w then () else inftu.val := Lexmap.addl inftu.val w (d w,i) and add_morphkama w d i = (* similar to [add_morph] *) kama.val := Lexmap.addl kama.val w (d w,i) ; (* Preverb sequences *) value preverbs = ref (Deco.empty : Deco.deco word) ; value add_morphp w i = preverbs.val := Deco.add preverbs.val w i ; (* Inflectional categories *) type nominal = [ Noun (* lexicalized stem - noun, adjective or number *) | Pron (* lexicalized stem - pronoun *) | Krid of verbal and string (* kridantas of roots *) ] ; type flexion = [ Declined of nominal and gender and list (number * list (case * word)) | Conju of finite and list (number * list (person * word)) | Indecl of ind_kind and word (* avyaya, particle, interjection, nota *) | Bare of nominal and word (* Iic *) | Avyayai of word (* Iic of avyayiibhaava cpd *) | Avyayaf of word (* Ifc of avyayiibhaava cpd *) | Cvi of word (* -cvi suffixed stem (iiv) for inchoative compound verbs *) | Preverb of word and list word | Invar of modal and word (* inf abs-ya perpft *) | Inftu of conjugation and Word.word (* infinitive in -tu *) | Absotvaa of conjugation and word (* abs-tvaa *) ] ; value is_taddhita = fun (* unused at present - see [Subst.taddhitas] *) [ "taa" | "tva" | "vat" | "mat" | "tas" | "kataa" | "katva" (* -ka-taa -ka-tva *) | "vattva" | "tvavat"-> True | _ -> False ] ; value sort_taddhita s = s (*i OBS: [if is_taddhita s then "-" ^ s else s] i*) ; (* enter1: string -> flexion -> unit *) value enter1 entry = let lexeme = sort_taddhita entry in let delta = Encode.diff_str lexeme (* partial application *) and aapv = admits_aa.val (* for phantom forms generation *) in fun [ Declined Noun g lg -> List.iter enterg lg (* nouns *) where enterg (n,ln) = List.iter entern ln where entern (c,w) = let f = Noun_form g n c in if c=Voc then if morpho_gen.val && is_kridanta entry then ((* f is in Kridv *)) else add_voca w delta f (* non-generative Voca *) else do { add_morph w delta f ; match entry with (* generative ifcs of infinitive bahus *) [ "kaama" (* volition : who wants to do *) | "manas" (* consideration : who thinks about doing *) (* | "zakya" (* consideration : who is able to do *) kridanta *) -> add_morphkama w delta f | _ -> () ] } | Declined Pron g lg -> List.iter enterg lg (* pronouns *) where enterg (n,ln) = List.iter entern ln where entern (c,w) = let f = Noun_form g n c in if c=Voc then add_voca w delta f else add_morphpro w delta f | Conju f lv -> List.iter enterv lv where enterv (n,ln) = List.iter entern ln where entern (p,w) = let v = Verb_form f n p in do { add_morphc w delta v aapv (* Now we take care of \Pan{6,1,94} when not blocked by \Pan{6,1,89} *) (* ex: prejate, + (Kazikaa) upelayati prelayati upo.sati pro.sati *) ; if morpho_gen.val then if entry = "i" || entry = "edh" then () (* \Pan{6,1,89} *) else add_morphlopa w delta v else () ; (* Now auxiliaries for verbal cvi compounds *) if auxiliary entry then add_morphauxi w delta v else () } | Indecl k w -> match k with [ Adv | Part | Conj | Default | Prep | Tas -> add_morphin w delta (Ind_form k) | Interj -> add_invoc w delta (Ind_form k) | Avya -> () (* since generative *) | Abs | Infl | Nota -> () (* no recording in morph tables *) (* Abs generated by absolutives of verbs, Infl by flexions of nouns, and our parser does not deal with the specific notations of Panini suutras *) ] | Bare Noun w | Bare Pron w -> add_morphi w delta Bare_stem | Avyayai w -> add_morphyai w delta Avyayai_form | Avyayaf w -> add_morphyaf w delta Avyayaf_form | Cvi w -> add_morphvi w delta Auxi_form | Invar m w -> let (_,vi) = m and f = Ind_verb m in match vi with [ Infi -> do (* 2 cases: with and without preverbs - saves one phase *) { add_morphabsya w delta f aapv ; add_morphin w delta f ; if auxiliary entry then add_morphauxi w delta f else () } (*i was [add_morphc w delta f] but prevents being in same chunk as finite verb. PB : infinitives with preverbs are in diff category (Absoya); duplication could be avoided by using an extra specific phase. Example of overgeneration: "paritraatum" with two "traatum". i*) | Absoya (* abso in -ya *) -> do { add_morphabsya w delta f aapv (* abs-ya: pv or cvii (gati) mandatory *) ; if auxiliary entry then add_morphauxi w delta f else () } | Perpft -> add_morphperi w delta f (* NB Allows perpft of verbs with preverbs but overgenerates since it allows perpft followed by a non perfect form of auxiliary *) ] | Inftu m w -> let f = Ind_verb (m,Infi) in add_morphinftu w delta f (* infinitive in -tu *) | Absotvaa c w -> let f = Abs_root c in add_morphabstvaa w delta f (* abs-tvaa: no preverb *) | Preverb w lw -> add_morphp w lw (* w is (normalised) sandhi of lw *) | _ -> failwith "Unexpected arg to enter" ] ; (* [enter_form]: word -> flexion -> unit *) (* 1st argument is a stem generated by derivational morphology, it may have a homo index computed by [Parts.gen_stem]. *) (* [enter_form] enters in the relevant data bank one of its inflected forms. *) (* Special treatment to have kridanta forms for auxiliaries, since their lexicalised action nouns are not recognized as generative, and thus must be skipped to avoid overgeneration. *) value enter_form stem = let aapv = admits_aa.val (* for phantom forms generation *) in fun [ Declined (Krid v root) g lg -> List.iter enterg lg where enterg (n,ln) = List.iter entern ln where entern (c,w) = let p = Part_form v g n c in (* We lose the root, and v is used only in Constraints. Both can be recovered from stem using [unique_kridantas] *) if c=Voc then add_morphpav w stem p aapv else do { match v with [ (_,Action_noun) -> add_morphauxik w stem p (* cvi patch *) | _ -> do { add_morphpa w stem p aapv ; if auxiliary root then add_morphauxik w stem p else () } ] ; if morpho_gen.val then if root = "i" || root = "edh" then () (* \Pan{6,1,89} *) else add_morphlopak w stem p aapv else () } | Bare (Krid (_,Action_noun) root) w -> add_morphauxiick w stem Bare_stem (* cvi *) | Bare (Krid _ root) w -> let f = Bare_stem in do (* losing verbal and root *) { add_morphpi w stem f aapv ; if auxiliary root then add_morphauxiick w stem f else () } | _ -> failwith "Unexpected arg to enter_form" ] ; value enter entry = List.iter (enter1 entry) and enter_forms w = List.iter (enter_form w) ; (*i end; i*)