morpho_scl.ml 6.34 KB
Newer Older
huet's avatar
huet committed
1 2 3 4 5 6 7 8 9
(**************************************************************************)
(*                                                                        *)
(*                     The Sanskrit Heritage Platform                     *)
(*                                                                        *)
(*                              Gérard Huet                               *)
(*                                                                        *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)

Gérard Huet's avatar
Gérard Huet committed
10
(*i module Morpho_scl = struct i*)
huet's avatar
huet committed
11 12 13 14 15 16 17 18 19

(* Prints lists of inflected forms in XML for use by external Web services. *)
(* Adapted from [Morpho_xml] *)
(* Uses WX for transliteration output. *)

open Skt_morph;
open Morphology; (* [inflected] and its constructors [Noun_form], ... *)
open Naming; (* [look_up_homo homo_undo unique_kridantas lexical_kridantas] *)

Gérard Huet's avatar
Gérard Huet committed
20
value ps = print_string
huet's avatar
huet committed
21
;
Gérard Huet's avatar
Gérard Huet committed
22 23 24
value pr_ext_gana k = ps (string_of_int k) 
;
value print_ext_number = fun 
huet's avatar
huet committed
25 26 27 28
  [ Singular -> ps "<sg/>" 
  | Dual     -> ps "<du/>"
  | Plural   -> ps "<pl/>"
  ]
Gérard Huet's avatar
Gérard Huet committed
29
and print_ext_gender = fun 
huet's avatar
huet committed
30 31 32 33 34
  [ Mas -> ps "<m/>"
  | Neu -> ps "<n/>"
  | Fem -> ps "<f/>" 
  | Deictic _ -> ps "<d/>" 
  ]
Gérard Huet's avatar
Gérard Huet committed
35
and print_ext_case = fun 
huet's avatar
huet committed
36 37 38 39 40 41 42 43 44
  [ Nom -> ps "<nom/>"
  | Acc -> ps "<acc/>"
  | Ins -> ps "<ins/>"
  | Dat -> ps "<dat/>"
  | Abl -> ps "<abl/>"
  | Gen -> ps "<gen/>"
  | Loc -> ps "<loc/>"
  | Voc -> ps "<voc/>" 
  ] 
Gérard Huet's avatar
Gérard Huet committed
45
and print_ext_person = fun 
huet's avatar
huet committed
46 47 48 49
  [ First  -> ps "<fst/>" 
  | Second -> ps "<snd/>" 
  | Third  -> ps "<thd/>" 
  ] 
Gérard Huet's avatar
Gérard Huet committed
50
and print_ext_voice = fun 
huet's avatar
huet committed
51 52 53 54
  [ Active  -> ps "<ac/>" 
  | Middle  -> ps "<md/>" 
  | Passive -> ps "<ps/>"
  ] 
Gérard Huet's avatar
Gérard Huet committed
55
and print_ext_pr_mode = fun
huet's avatar
huet committed
56 57 58 59 60
  [ Present    -> ps "<pr gana="
  | Imperative -> ps "<imp gana="
  | Optative   -> ps "<opt gana="
  | Imperfect  -> ps "<impft gana="
  ]
Gérard Huet's avatar
Gérard Huet committed
61
and print_ext_pr_mode_ps = fun
huet's avatar
huet committed
62 63 64 65 66
  [ Present    -> ps "<prps/>"
  | Imperative -> ps "<impps/>"
  | Optative   -> ps "<optps/>"
  | Imperfect  -> ps "<impftps/>"
  ]
Gérard Huet's avatar
Gérard Huet committed
67
and print_ext_tense = fun
huet's avatar
huet committed
68 69
  [ Future       -> ps "<fut/>"
  | Perfect      -> ps "<pft/>"
Gérard Huet's avatar
Gérard Huet committed
70 71
  | Aorist k     -> do { ps "<aor gana="; pr_ext_gana k; ps "/>" }
  | Injunctive k -> do { ps "<inj gana="; pr_ext_gana k; ps "/>" }
huet's avatar
huet committed
72 73 74 75
  | Conditional  -> ps "<cond/>"
  | Benedictive  -> ps "<ben/>"
  ]
;
Gérard Huet's avatar
Gérard Huet committed
76 77 78
value print_ext_paradigm = fun
  [ Conjug t v    -> do { print_ext_tense t; print_ext_voice v }
  | Presenta k pr -> do { print_ext_pr_mode pr; pr_ext_gana k; 
huet's avatar
huet committed
79
                          ps "/><ac/>" }
Gérard Huet's avatar
Gérard Huet committed
80
  | Presentm k pr -> do { print_ext_pr_mode pr; pr_ext_gana k; 
huet's avatar
huet committed
81
                          ps "/><md/>" }
Gérard Huet's avatar
Gérard Huet committed
82
  | Presentp pr   -> print_ext_pr_mode_ps pr
huet's avatar
huet committed
83 84
  | Perfut v      -> ps "<perfut/>" (* TODO: mark voice *)
  ]
Gérard Huet's avatar
Gérard Huet committed
85
and print_ext_conjugation = fun 
huet's avatar
huet committed
86 87 88 89 90
  [ Primary      -> ()
  | Causative    -> ps "<ca/>"
  | Intensive    -> ps "<int/>"
  | Desiderative -> ps "<des/>"
  ]
Gérard Huet's avatar
Gérard Huet committed
91
and print_ext_nominal = fun
huet's avatar
huet committed
92 93
  [ Ppp     -> ps "<pp/>"
  | Pppa    -> ps "<ppa/>"
Gérard Huet's avatar
Gérard Huet committed
94 95 96 97 98 99 100 101 102 103
  | Ppra k  -> do { ps "<ppr gana="; pr_ext_gana k; ps "/>";
                    print_ext_voice Active }
  | Pprm k  -> do { ps "<ppr gana="; pr_ext_gana k; ps "/>";
                    print_ext_voice Middle }
  | Pprp    -> do { ps "<ppr/>"; print_ext_voice Passive }
  | Ppfta   -> do { ps "<ppf/>"; print_ext_voice Active }
  | Ppftm   -> do { ps "<ppf/>"; print_ext_voice Middle }
  | Pfuta   -> do { ps "<pfu/>"; print_ext_voice Active }
  | Pfutm   -> do { ps "<pfu/>"; print_ext_voice Middle }
  | Pfutp k -> do { ps "<pfp/>"; pr_ext_gana k }
huet's avatar
huet committed
104 105
  | _       -> ps "<act/>" (* action verbal nouns *)
  ]
Gérard Huet's avatar
Gérard Huet committed
106
and print_ext_invar = fun 
huet's avatar
huet committed
107 108 109 110
  [ Infi   -> ps "<inf/>" 
  | Absoya -> ps "<abs/>"
  | Perpft -> ps "<perpft/>"
  ]
Gérard Huet's avatar
Gérard Huet committed
111
and print_ext_kind = fun
huet's avatar
huet committed
112 113 114 115 116 117 118 119
  [ Part -> ps "<part/>"
  | Prep -> ps "<prep/>"
  | Conj -> ps "<conj/>"
  | Abs  -> ps "<abs/>"
  | Adv  -> ps "<adv/>"
  | _    -> ps "<und/>"
  ]
;
Gérard Huet's avatar
Gérard Huet committed
120 121 122 123 124 125
value print_ext_finite (c,p) = 
  do { print_ext_conjugation c; print_ext_paradigm p }
and   print_ext_verbal (c,n) = 
  do { print_ext_conjugation c; print_ext_nominal n }
and   print_ext_modal (c,i)  = 
  do { print_ext_conjugation c; print_ext_invar i }
huet's avatar
huet committed
126
;
Gérard Huet's avatar
Gérard Huet committed
127
value print_ext_morph = fun
huet's avatar
huet committed
128 129
  [ Noun_form g n c 
  | Part_form _ g n c -> do
Gérard Huet's avatar
Gérard Huet committed
130 131 132
      { print_ext_case c
      ; print_ext_number n
      ; print_ext_gender g
huet's avatar
huet committed
133 134 135
      }
  | Bare_stem | Avyayai_form -> ps "<iic/>"
  | Verb_form f n p -> do
Gérard Huet's avatar
Gérard Huet committed
136 137 138
      { print_ext_finite f
      ; print_ext_number n
      ; print_ext_person p
huet's avatar
huet committed
139
      }
Gérard Huet's avatar
Gérard Huet committed
140
  | Ind_form k -> print_ext_kind k
huet's avatar
huet committed
141
  | Avyayaf_form -> ps "<avya/>"
Gérard Huet's avatar
Gérard Huet committed
142
  | Abs_root c -> do { print_ext_conjugation c; ps "<abs/>" }
huet's avatar
huet committed
143
  | Auxi_form -> ps "<iiv/>"
Gérard Huet's avatar
Gérard Huet committed
144
  | Ind_verb m -> print_ext_modal m
huet's avatar
huet committed
145 146 147 148
  | PV _ -> ps "<pv/>"
  | Unanalysed -> ps "<unknown/>" 
  ]
;
Gérard Huet's avatar
Gérard Huet committed
149
value print_ext_morphs = 
huet's avatar
huet committed
150
  let choice () = ps "</choice><choice>" in
Gérard Huet's avatar
Gérard Huet committed
151
  List2.process_list_sep print_ext_morph choice
huet's avatar
huet committed
152
;
Gérard Huet's avatar
Gérard Huet committed
153
value print_inv_morpho_ext pe pne form generative (delta,morphs) = 
huet's avatar
huet committed
154 155
  let stem = Word.patch delta form in do (* stem may have homo index *)
    { ps "<morpho_infl><choice>"
Gérard Huet's avatar
Gérard Huet committed
156
    ; print_ext_morphs morphs
huet's avatar
huet committed
157 158 159 160 161 162 163 164 165 166 167 168 169
    ; ps "</choice></morpho_infl>"
    ; ps "<morpho_gen>"
    ; if generative then (* interpret stem as unique name *)
        let (homo,bare_stem) = homo_undo stem in
        let krid_infos = Deco.assoc bare_stem unique_kridantas in 
        try let (verbal,root) = look_up_homo homo krid_infos in do
        { match Deco.assoc bare_stem lexical_kridantas with
          [ [] (* not in lexicon *) -> pne bare_stem
          | entries (* bare stem is lexicalized *) -> 
              if List.exists (fun (_,h) -> h=homo) entries
                 then pe stem (* stem with exact homo is lexical entry *)
              else pne bare_stem
          ]
Gérard Huet's avatar
Gérard Huet committed
170
        ; ps "<krid>"; print_ext_verbal verbal
huet's avatar
huet committed
171 172 173 174 175 176
        ; ps "</krid><root>"; pe root; ps "</root>"
        } with [ _ -> pne bare_stem ]
      else pe stem
    ; ps "</morpho_gen>"
    }
;
Gérard Huet's avatar
Gérard Huet committed
177
value print_inv_morpho_link_ext pvs pe pne form = 
huet's avatar
huet committed
178 179 180 181
  let pv = if Phonetics.phantomatic form then [ 2 ] (* aa- *) 
           else pvs in
  let encaps print e = if pv = [] then print e
  else do { ps (Canon.decode_WX pvs ^ "-"); print e } in
Gérard Huet's avatar
Gérard Huet committed
182 183
  print_inv_morpho_ext (encaps pe) (encaps pne) form 
and print_ext_entry w = (* ps offline in WX notation for UoH interface *)
huet's avatar
huet committed
184 185 186
  ps ("<entry wx=\"" ^ Canon.decode_WX w ^ "\"/>") 
;
(* Used in [Lexer.print_ext_morph] *)
Gérard Huet's avatar
Gérard Huet committed
187 188
value print_ext_inflected_link pvs = 
  print_inv_morpho_link_ext pvs print_ext_entry print_ext_entry
huet's avatar
huet committed
189 190 191
;

(*i end; i*)