Commit cbf345d1 authored by Gérard Huet's avatar Gérard Huet

Simplified paradigm for denominative verbs

parent 0e93f760
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©201 Institut National de Recherche en Informatique et en Automatique *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(*i module Canon = struct i*)
......
......@@ -29,8 +29,6 @@ value sheets = (* cascading style sheets data *)
; ("span",B1); ("span",B2); ("span",B3); ("span",Header_deva); ("span",Math)
; ("span",Devac); ("span",Header_tran); ("span",Deva16); ("span",Deva16c)
; ("body",Mauve_back); ("body",Pink_back); ("body",Chamois_back)
(*;[("body",Pict_om); ("body",Pict_om2); ("body",Pict_om3); ("body",Pict_om4)
; ("body",Pict_gan); ("body",Pict_hare); ("body",Pict_geo)] deprecated *)
; ("table",Bandeau); ("table",Center_); ("table",Body); ("table",Pad60)
; ("table",Yellow_back); ("table",Yellow_cent); ("table",Deep_sky_cent)
; ("table",Salmon_back); ("table",Aquamarine_back)
......
......@@ -366,13 +366,6 @@ value styles = fun
| Green_back -> [ Bgcolor Green ]
| Light_blue_back -> [ Bgcolor Light_blue ]
| Lavender_back -> [ Bgcolor Lavender ]
(*[ | Pict_om -> [ Bgpict Om; No_margin ]
| Pict_om2 -> [ Bgpict Om2; No_margin ]
| Pict_om3 -> [ Bgpict Om3; No_margin ]
| Pict_om4 -> [ Bgpict Om4; No_margin ]
| Pict_gan -> [ Bgpict Gan; No_margin ]
| Pict_hare -> [ Bgpict Hare; No_margin ]
| Pict_geo -> [ Bgpict Geo; No_margin ] ]*)
| Blue_ -> [ trans_font; Color Blue ]
| Green_ -> [ trans_font; Color Green ]
| Navy_ -> [ trans_font; Color Navy ]
......@@ -450,13 +443,6 @@ value class_of = fun
| Lawngreen_back -> "lawngreen_back"
| Aquamarine_back -> "aquamarine_back"
| Grey_back -> "grey_back"
(*[ | Pict_om -> "pict_om"
| Pict_om2 -> "pict_om2"
| Pict_om3 -> "pict_om3"
| Pict_om4 -> "pict_om4"
| Pict_gan -> "pict_gan"
| Pict_hare -> "pict_hare"
| Pict_geo -> "pict_geo" ]*)
| Blue_ -> "blue"
| Green_ -> "green"
| Navy_ -> "navy"
......
......@@ -109,7 +109,6 @@ value index_engine () = do
let env = create_env query in
let translit = get "t" env Paths.default_transliteration
and lex = get "lex" env Paths.default_lexicon (* default by config *)
and font = get "font" env Paths.default_display_font
and url_encoded_entry = get "q" env "" in
let lang = language_of lex in do
{ print_title_solid Mauve (Some lang) (dico_title lang)
......
......@@ -31,10 +31,9 @@ module Lexer (* takes its prelude and control arguments as module parameters *)
value out_chan : ref out_channel; (* output channel *)
end) = struct
open Html;
open Html;
open Web; (* ps pl abort etc. *)
open Cgi;
open Phases; (* Phases *)
open Phases; (* phase *)
......@@ -106,7 +105,7 @@ value print_tags pvs seg_num phase form tags =
let ptag = print_morph pvs (is_cache phase) seg_num (generative phase) form in
let _ = List.fold_left ptag 1 tags in ()
;
value rec str_phase = fun
value rec scl_phase = fun
[ Pv | Pvk | Pvkc | Pvkv -> "pv"
| Noun | Noun2 | Nouc | Nouv | Krid | Kriv | Kric | Lopak | Pron | Auxik
-> "noun"
......@@ -125,8 +124,8 @@ value rec str_phase = fun
| Ifc | Ifc2 -> "ifc"
| Unknown -> "unknown"
| Cache -> "Cache"
| Comp (_,ph) _ _ -> "preverbed " ^ str_phase ph
| Tad (ph,_) _ _ -> "taddhita " ^ str_phase ph
| Comp (_,ph) _ _ -> "preverbed " ^ scl_phase ph
| Tad (ph,_) _ _ -> "taddhita " ^ scl_phase ph
]
;
value print_scl_morph pvs gen form tag = do
......@@ -137,7 +136,7 @@ value print_scl_morph pvs gen form tag = do
;
value print_scl_tags pvs phase form tags =
let table phase =
xml_begin_with_att "tags" [ ("phase",str_phase phase) ] in do
xml_begin_with_att "tags" [ ("phase",scl_phase phase) ] in do
{ ps (table phase)
; List.iter (print_scl_morph pvs (generative phase) form) tags
; ps (xml_end "tags")
......@@ -215,7 +214,7 @@ value print_morpho phase word = do
process_taddhita [] 0 ph form sfx_phase sfx sfx_tags
| Preverbed _ pvs _ _ -> (* stem, tagged as iic *)
process_taddhita pvs 0 ph form sfx_phase sfx sfx_tags
| _ -> failwith "taddhita recursion unavailable"
| _ -> failwith "Anomaly: taddhita recursion"
]
] in ()
; ps span_end
......@@ -228,7 +227,7 @@ value print_morpho phase word = do
value print_segment offset (phase,rword,transition) = do
{ ps "[ "
; Morpho_html.print_signifiant_off rword offset
; let word = mirror rword in print_morpho phase word
; print_morpho phase (mirror rword)
(* Now we print the sandhi transition *)
; ps "&lang;" (* < *)
; let correction = process_transition transition in do
......@@ -238,37 +237,98 @@ value print_segment offset (phase,rword,transition) = do
; offset+correction+length rword
}
}
;
(* TODO
type vakti =
[ Stem of pratipad (* forms nominal compounds *)
| Cvi of pratipad (* forms verbal compounds *)
| Avyayii of pratipad (* forms nominal invariable compounds *)
| Subanta of pratipad and inflexion_tag (* nominal padas *)
| Tinanta of kriya and inflexion_tag (* verbal padas *)
| Peri of kriya (* forms verbal compounds *)
| Absolutive of kriya (* verbal padas *)
| Infinitive of kriya (* verbal padas *)
| Indecli of inflexion_tag (* indeclinables *)
| Anartha (* unanalysed chunk *)
]
and pratipad =
[ Koza of word (* atomic nominal stems *)
| Nan (* privative prefix a- an- *)
| Kridanta of verbal and kriya
| Taddhitanta of pratipad and taddhita
]
and kriya = list preverb and word (* optional upasarga sequence and root *)
;
(* Improved version of [Load_morphs.tags_of] *)
value scl_tags phase word = match phase with
[ Pv | Pvk | Pvkc | Pvkv -> failwith "Preverb in scl_tags"
| A | Ai | An | Ani -> Stem Nan
| Unknown -> Anartha
| Iic | Iicv | Iicc | Iic2 | Iiif | Auxiick -> Stem (Koza word)
| Auxiick -> Stem (Kridanta (?,?)
| Iiv | Iivv | Iivc -> Cvi (Koza word)
| Peri -> Peripft (word)
| Iiy -> Avyayi (Koza word)
| Krid | Kriv | Kric | Lopak | Auxik -> Kridanta of verbal and kriya
| Comp ((_,ph) as sort) pv form ->
let tag = Deco.assoc form (morpho_tags ph) in
match ph with
[ Abso ->
| Peri ->
| Inftu ->
| Lopa ->
| Root -> Tinanta () tag
| ph when vkrid_phase ph -> Tinanta () tag
| ph when ikrid_phase ph -> Stem ()
| ph when krid_phase ph -> Subanta (Kridanta of verbal and kriya
]
Preverbed sort pv form tag
| Tad (ph,sfx_ph) form sfx ->
match sfx_ph with
[ Sfx -> Subanta (Taddhitanta form sfx) sfx_tag
where sfx_tag = Deco.assoc sfx (morpho_tags sfx_ph) in
| Isfx -> Stem (Taddhitanta form sfx)
| _ -> failwith "Wrong taddhita structure"
]
| Nouv | Nouc | Noun2 | Pron | Vocv | Vokc ->
Subanta (Stem ?) (Deco.assoc word (morpho_tags phase))
| Root | Lopa | Auxi -> Tinanta (Stem ?) (Deco.assoc word (morpho_tags phase))
| Absv | Absc | Abso -> Absolutive ?
| Inde | Inv -> Indecli (Deco.assoc word (morpho_tags phase))
]
;
value print_scl_tags _ = () (* whatever xml printing TODO *)
; *)
(* Similarly for [scl_plugin] mode (without offset and transitions) *)
(* Called from [Scl_parser.print_scl_output] *)
value print_scl_segment counter (phase,rword) =
let print_pada rword =
let word = Morpho_html.visargify rword in
let ic = string_of_int counter in
let word = Morpho_html.visargify rword in do
{ let solid = background (Disp.color_of_phase phase) in
pl (td_begin_class solid)
; let ic = string_of_int counter in
ps ("<input type=\"hidden\" name=\"field" ^ ic ^ "\" value='<form wx=\""
^ Canon.decode_WX word ^ "\"/>") in do
{ let solid = background (Disp.color_of_phase phase) in
pl (td_begin_class solid)
; print_pada rword
; let word = mirror rword in
match tags_of phase word with
[ Atomic tags ->
^ Canon.decode_WX word ^ "\"/>")
(* ; print_scl_tags (scl_tags phase (mirror rword)) TODO *)
(* DEPRECATED
; match tags_of phase (mirror rword) with
[ Atomic tags ->
print_scl_tags [] phase word tags
| Preverbed (_,phase) pvs form tags ->
| Preverbed (_,phase) pvs form tags ->
let ok_tags =
if pvs = [] then tags
else trim_tags (generative phase) form (Canon.decode pvs) tags in
print_scl_tags pvs phase form ok_tags
| Taddhita _ _ sfx_phase sfx_tags ->
| Taddhita _ _ sfx_phase sfx_tags ->
let taddhita_phase = match sfx_phase with
[ Sfx -> Noun
| Isfx -> Iic
| _ -> failwith "Wrong taddhita structure"
] in
print_scl_tags [] taddhita_phase word sfx_tags
]
; ps "'>" (* closes <input *)
; let word = Morpho_html.visargify rword in
ps (Canon.unidevcode word)
] *)
; ps "'>" (* closes <input *)
; ps (Canon.unidevcode word)
; ps td_end
; ps "\n"
; counter+1
......@@ -348,7 +408,7 @@ value print_uni_taddhita pvs m phase stem sfx sfx_phase = fun
; pl (table_morph_of sfx_phase) (* table begin *)
; let _ = print_morph_tad pvs cached 0 gen stem sfx 0 (delta,unitag) in ()
; ps table_end (* table end *)
; ps th_end
; ps th_end
}
| _ -> failwith "Multiple sfx tag"
]
......@@ -374,9 +434,9 @@ value print_projection phase rword ((_,m) as index) = do
value print_proj phase rword = fun
[ [] -> failwith "Projection missing"
| [ n_m :: rest ] -> do
{ print_projection phase rword n_m
; rest (* returns the rest of projections stream *)
}
{ print_projection phase rword n_m
; rest (* returns the rest of projections stream *)
}
]
;
......
......@@ -18,7 +18,6 @@ open Web; (* ps pl abort etc. *)
open Cgi; (* [create_env get] *)
value back_ground = background Chamois
(*[ obs if Install.narrow_screen then background Chamois else Pict_hare ]*)
;
value out_mode = ref None
;
......@@ -86,7 +85,6 @@ value reader_page () = do
; pl (option_select_default "cp"
[ (" Full ","t",cp="t")
; (" Simple ","f",cp="f")
(* ; ("Experiment","e",cp="e") deprecated *)
])
; pl html_break
; ps (reader_input_area_default text)
......
......@@ -160,32 +160,27 @@ value print_inv_morpho_scl pe form generative (delta,morphs) =
let (homo,bare_stem) = homo_undo stem in
let krid_infos = Deco.assoc bare_stem unique_kridantas in
try let (verbal,root) = look_up_homo homo krid_infos in do
{ match Deco.assoc bare_stem lexical_kridantas with
[ [] (* not in lexicon *) -> pe bare_stem
| entries (* bare stem is lexicalized *) ->
if List.exists (fun (_,h) -> h=homo) entries
then pe stem (* stem with exact homo is lexical entry *)
else pe bare_stem
]
{ pe bare_stem
; ps "<krid>"; print_scl_verbal verbal
; ps "</krid><root>"; pe root; ps "</root>"
} with [ _ -> pe bare_stem ]
else pe stem
; ps "</morpho_gen>"
}
;
value print_scl_entry w = (* ps offline in WX notation for UoH interface *)
ps ("<entry wx=\"" ^ Canon.decode_WX w ^ "\"/>")
;
value print_inv_morpho_scl pvs pe form =
value print_inv_morpho_scl pvs form =
let pv = if Phonetics.phantomatic form then [ 2 ] (* aa- *)
else pvs in
let encaps print e = if pv = [] then pe e
else do { ps (Canon.decode_WX pvs ^ "-"); pe e } in
print_inv_morpho_scl (encaps pe) form
and print_scl_entry w = (* ps offline in WX notation for UoH interface *)
ps ("<entry wx=\"" ^ Canon.decode_WX w ^ "\"/>")
let encaps e = if pv = [] then print_scl_entry e
else do { ps (Canon.decode_WX pvs ^ "-"); print_scl_entry e } in
print_inv_morpho_scl encaps form
;
(* Used in [Lexer.print_scl_morph] *)
value print_scl_inflected pvs =
print_inv_morpho_scl pvs print_scl_entry
print_inv_morpho_scl pvs
;
(*i end; i*)
......@@ -16,7 +16,7 @@ open Skt_morph;
(* module Morphology : sig *)
type inflexion_tag =
type inflexion_tag = (* vibhakti *)
[ Noun_form of gender and number and case (* declined nominal *)
| Part_form of verbal and gender and number and case (* declined participle *)
| Bare_stem (* iic forms *)
......
......@@ -2914,12 +2914,12 @@ value compute_passive_system conj root pastem = do
}
;
value compute_passive conj root stem =
let pastem = affix_y stem (* "y" marks passive *) in
compute_passive_system conj root pastem
let ps_stem = affix_y stem (* "y" marks passive *) in
compute_passive_system conj root ps_stem
;
value compute_passive_raw root =
let pstem = passive_stem root (revstem root) in
compute_passive Primary root pstem
let ps_stem = passive_stem root (revstem root) in
compute_passive Primary root ps_stem
;
value compute_passive_10 root ps_stem =
match root with
......@@ -4285,7 +4285,6 @@ value compute_peri_fut conj perstem entry =
])
])
;
value record_pfp_tavya conj perstem entry =
let pfp_stem = fix perstem "tavya" in
record_part (Pfutp_ conj (rev pfp_stem) entry) (* rev compat entry by Pfpart *)
......@@ -4615,13 +4614,11 @@ value record_ppp_abs_stems entry rstem ppstems =
] in
iter process_ppstem ppstems
;
(* Simple version for denominatives *)
value record_ppp_abs_den ystem entry =
let ppstem = trunc (revstem entry) in do
{ record_part_ppp (rfix ppstem "ita") entry
(* Simple version for denominatives - tentative *)
value record_ppp_abs_den ystem entry = do
{ record_part_ppp (rfix ystem "ita") entry
; record_abso_tvaa (fix ystem "itvaa") entry
; record_abso_ya (fix ppstem "ya") entry (* ? *)
(*i TODO pfp inf etc. i*)
(* no [record_abso_ya] *)
}
;
(* Absolutive in -am - Macdonell§166 Stenzler§288 \Pan{3,4,22} .namul *)
......@@ -5302,19 +5299,23 @@ value den_stem_m entry = (* in general intransitive or reflexive Whitney§1059c
| _ -> failwith ("Unknown denominative " ^ entry)
]
;
value compute_denom stem ystem entry = do (* other than present system *)
value compute_denom stem ystem entry = do (* other than present system - rare *)
{ build_perpft Primary ystem entry
; let fsuf = revcode "i.sy" in (* rare - similar to [compute_future_10] *)
compute_future (fsuf @ ystem) entry
; let perstem = [ 3 :: ystem ] (* -yi *) in
perif Primary perstem entry
; let ps_stem = trunc stem (* experimental *) in match entry with
[ "udan" | "asuuya" -> () (* wrong udaya asya *)
| _ -> do
{ compute_passive_11 entry ps_stem
; record_pfp_10 entry ps_stem (* dubious - eg clash viirya *)
}
]
; match stem with
[ [ 1 :: rest ] ->
match entry with
[ "asuuya" -> () (* wrong asya *)
| _ -> do (* experimental - rare acc. to Whitney *)
{ compute_passive_11 entry rest
; record_pfp_10 entry rest
}
]
| _ -> () (* specially wrong for consonant stems *)
]
}
;
value compute_denominative_a entry third =
......@@ -5368,12 +5369,11 @@ value compute_denominative entry pada third =
value compute_conjugs_stems entry (vmorph,aa) = do
{ admits_aa.val := aa (* sets the flag for phantom forms for aa- preverb *)
; match vmorph with
[ Conj_infos.Prim gana pada third ->
[ Conj_infos.Prim 11 pada third ->
(* note: pada of denominative verbs is lexicalized *)
compute_denominative entry pada third
| Conj_infos.Prim gana pada third ->
(* gana is root class, pada is True for Para, False for Atma of third form *)
if gana=11 (* denominative verb, special treatment *)
then compute_denominative entry pada third
(* note: pada of denominative verbs is lexicalized *)
else (* root entry *)
(* Primary conjugation *)
let rstem = revstem entry in (* root stem reversed *)
try do
......@@ -5599,9 +5599,9 @@ value compute_auxi_kridantas () =
] in do (* A few auxiliary action nouns are generative for cvi compounds *)
{ let (rst,st) = stems "kara.na" in
build_part_a_n (Primary,Action_noun) rst st "k.r#1"
; let (rst,st) = stems "kaara" in
build_part_a_m (Primary,Action_noun) rst st "k.r#1"
; let (rst,st) = stems "bhaavana" in
; let (rst,st) = stems "kaara" in (* actually, should be [Agent_noun] *)
build_part_a_m (Primary,Action_noun) rst st "k.r#1" (* also fem in -ii? *)
; let (rst,st) = stems "bhaavana" in
build_part_a_m (Primary,Action_noun) rst st "bhuu#1"
; let (rst,st) = stems "bhaava" in
build_part_a_m (Primary,Action_noun) rst st "bhuu#1"
......
......@@ -8,4 +8,4 @@
(**************************************************************************)
(* Generated by make version - see main Makefile *)
value version="2.99" and version_date="2017-06-02";
value version="2.99" and version_date="2017-06-11";
......@@ -331,18 +331,18 @@ and user_aid_title = h1_title (if narrow_screen then "User Feedback"
;
value dico_title = fun
[ French -> dico_title_fr
| English -> dico_title_en
| English -> dico_title_en
]
;
(* We set and reset [output_channel] to designate either a static html file
under creation or [stdout] to produce a cgi output dynamic page.
This is awful and should be fixed one day.
*)
value output_channel = ref stdout
value output_channel = ref stdout
;
value ps s = output_string output_channel.val s
and pc c = output_char output_channel.val c
and pi i = output_string output_channel.val (string_of_int i)
value ps s = output_string output_channel.val s
and pc c = output_char output_channel.val c
and pi i = output_string output_channel.val (string_of_int i)
;
value line () = pc '\n'
and sp () = ps " "
......
......@@ -34,7 +34,7 @@ GOLD=$(SKTRESOURCES)GOLD# Heritage in Goldendict format databases
XMLBANKS=$(SKTRESOURCES)XML# Heritage Sanskrit morphology in XML databases
# Virtual path for make
VPATH=ML:$(DATA)
VPATH=ML
# For change of make in all recursive Makefiles
MAKE=make
......@@ -310,10 +310,9 @@ $(DATA)/lopas.rem $(DATA)/pronouns.rem $(DATA)/transpn.rem $(DATA)/parts.rem \
$(DATA)/lopaks.rem $(DATA)/indecls.rem $(DATA)/abstvaa.rem $(DATA)/absya.rem \
$(DATA)/iics.rem $(DATA)/piics.rem $(DATA)/ifcs.rem $(DATA)/iivs.rem \
$(DATA)/iifcs.rem $(DATA)/auxi.rem $(DATA)/voca.rem $(DATA)/invs.rem \
$(DATA)/inftu.rem $(DATA)/kama.rem \
$(DATA)/nouns2.rem $(DATA)/iics2.rem $(DATA)/avyayais.rem $(DATA)/avyayafs.rem \
$(DATA)/ifcs2.rem $(DATA)/sandhis.rem $(DATA)/sandhis_pv.rem \
$(DATA)/sandhis_ph.rem $(DATA)/suffixes.rem $(DATA)/peris.rem \
$(DATA)/inftu.rem $(DATA)/kama.rem $(DATA)/nouns2.rem $(DATA)/iics2.rem \
$(DATA)/avyayais.rem $(DATA)/avyayafs.rem $(DATA)/ifcs2.rem $(DATA)/sandhis.rem \
$(DATA)/sandhis_pv.rem $(DATA)/sandhis_ph.rem $(DATA)/peris.rem \
$(DATA)/transperi.rem $(DATA)/transn.rem $(DATA)/transr.rem \
$(DATA)/transiiy.rem $(DATA)/transavy.rem $(DATA)/transp.rem \
$(DATA)/transpa.rem $(DATA)/transic.rem $(DATA)/sfxs.rem $(DATA)/isfxs.rem \
......@@ -380,19 +379,8 @@ new:
./configure
$(MAKE) all
# following obsolete, now ZEN is distributed as sub-directory, not tar file
ZEN: #SETUP/zen.tar
mkdir ZEN
cp -p SETUP/zen.tar ZEN
cd ZEN && tar xvf zen.tar
cd ZEN && $(MAKE) clean; rm zen.tar; $(MAKE) all
# Stand-alone distribution ought to include zen.tar and the Ocaml distribution
zen.tar:
echo "Download zen.tar from Zen site http://sanskrit.inria.fr/ZEN"
# After updating installation parameters above, the python script configure
# regenerates Makefile from SETUP/MMakefile and does "make configuration"
# regenerates Makefile from SETUP/MMakefile and executes "make configuration"
configuration:
(cat $(ML_COPYRIGHT) ; \
echo "(* Do not edit by hand - generated by configuration script - see main Makefile *)"; \
......@@ -487,7 +475,7 @@ abrevs:
# This releases on the server the new lexical data base and Web resources
# must be done after releaseMW for links adjustments
releasedata: $(DICO) $(DATA) abrevs
releasedata: abrevs
cp $(WEBSOURCES) $(SERVERPUBLICDIR)
cp $(SITEPAGES) $(SERVERPUBLICDIR) # site main pages made by configure
# cp $(GOLDENDICT) $(SERVERPUBLICDIR) # Goldendict version now separated
......@@ -518,6 +506,7 @@ releasebook:
cp $(BOOKPRINTS) $(SERVERPUBLICDIR)
releasexml: # Sanskrit morphology in xml copied from Resources to Server
# This should be deprecated soon, and XML banks released separately
mkdir -p $(SERVERPUBLICDIR)$(DATA)/XML
cp $(XML_MORPHOLOGY) $(SERVERPUBLICDIR)$(DATA)/XML
......
......@@ -5,10 +5,11 @@
PLATFORM='Computer'
# Default transliteration : 'VH', 'WX', 'KH' or 'SL'
# or possibly Unicode input under UTF-8, 'DN' forDevanagari and ‘RN’ for IAST romanization
# See meaning in http://sanskrit.inria.fr/faq.en.html#transliteration
TRANSLIT='VH'
# Default lexicon: SH for Sanskrit Heritage (French), MW for Monier-Williams
# Default lexicon: SH for Sanskrit Heritage (French), MW for Monier-Williams (English)
LEXICON='SH'
# Default display font : 'roma', 'deva'
......
VERSION='2.99'
DATE='2017-06-02'
DATE='2017-06-11'
......@@ -2428,6 +2428,23 @@ Gabe Hiemstra<br />
www.wisdomlib.org<br />
info@wisdomlib.org<br />
www.facebook.com/WisdomLibrary<br />
<hr />
From michaelnm.meyer@gmail.com<br />
À : GH<br />
Date: 7 Juin 2017<br />
À propos du Sanskrit Heritage Site<br />
Monsieur,<br />
Je me permets de vous contacter à propos du Sanskrit Heritage Site.<br />
Tout d'abord, je souhaitais vous remercier d'offrir l'accès aux outils que vous avez développés. Il m'ont été fort utiles lorsque je commençais à apprendre le sanskrit, il y a maintenant 5 ans. Je mesure l'audace de votre entreprise, le sanskrit se prêtant beaucoup moins facilement à un traitement automatique que le français ou l'anglais, par exemple.<br />
...<br />
<br />
Bien cordialement,<br />
Michaël Meyer<br />
<hr />
......
......@@ -15,7 +15,7 @@
<link rel="stylesheet" type="text/css" href="DICO/style.css" media="screen,tv"/>
</head>
<body class="pink_back"> <!-- Pale_rose -->
<body class="pink_back"> <!-- Pale_rose -->
<table class="body">
<table border="0pt" cellpadding="0" cellspacing="15pt" width="100%">
......@@ -23,11 +23,11 @@
<h1 class=b1>Sanskrit linguistic resources</h1>
<br>
<img src="IMAGES/Panini2.jpg" alt="Panini"/>
<br>
<img src="IMAGES/Panini2.jpg" alt="Panini"/>
<br>
<div class="latin12">
<div class="latin12">
<h2 class=b2>Sanskrit Morphology</h2>
......@@ -42,7 +42,7 @@ generation is available <a href="Heritage.pdf">here</a> as a PDF document.
These databanks are regularly updated. They are available for public download
as a public git archive in the Sanskrit Heritage development site:
"https://gitlab.inria.fr/huet/Heritage_Resources".
"https://gitlab.inria.fr/huet/Heritage_Resources".
<h3 class=b3>Databanks description</h3>
......@@ -51,24 +51,24 @@ defined in the
<a href="DICO/index.html">Sanskrit Heritage Dictionary</a>. These forms are
presented as lemmas linking each form to its stem entry by possible morpho-phonetic
operations. We limit ourselves to classical Sanskrit, and do not cover precative,
subjunctive, injunctive and conditional forms of the verbs.
subjunctive, injunctive and conditional forms of the verbs.
At present, we provide for two transliteration schemas, respectively
WX, used by the
<a href="http://sanskrit.uohyd.ernet.in/">Department of Sanskrit Studies at
University of Hyderabad</a>
and SLP1, used by the
and SLP1, used by the
<a href="http://sanskritlibrary.org/">Sanskrit Library</a>.
The respective data banks are listed in directories WX and SL.
The morphological lemmas are distributed in 6 files in
The morphological lemmas are distributed in 6 files in
XML format, conformant to a common DTD.
The nominal morphological declensions of nouns, adjectives and numbers,
are covered in "T_nouns.xml" (where T is respectively WX or SL).
Those of pronouns are covered in "T_pronouns.xml".
Those of pronouns are covered in "T_pronouns.xml".
The conjugated forms of roots in the present, imperfect, imperative, optative,
perfect, aorist
perfect, aorist
and future tenses, as well as passives of the present system,
for the primary conjugation and for some secondary conjugations
(causative, intensive, desiderative) are covered in "T_roots.xml".
......@@ -78,13 +78,13 @@ are listed in "T_adverbs.xml". In addition, "T_final.xml" gives additional
generative morphemes. The files are conformant to the DTD "T_morph.dtd".
<p>
Finally, the text file "X_preverbs.txt" lists common
preverb sequences, given with their sandhi analysis.
preverb sequences, given with their sandhi analysis.
<h3 class=b2>Intellectual Property</h3>
All these linguistic data banks are Copyrighted Gérard Huet 1994-2017.
They are derived from the Sanskrit Heritage Dictionary
version 2.99 dated 2017-06-02.
version 2.99 dated 2017-06-11.
<p>
Use of these linguistic resources is granted according to the
Lesser General Public Licence for Linguistic Resources.
......@@ -94,14 +94,14 @@ Thank you for referencing the origin of this data if you use it in your own work
<h2 class=b2>Methodology</h2>
We deal here with a mixture of derivational and inflexional morphology.
We deal here with a mixture of derivational and inflexional morphology.
For instance, from the roots we generate verbal and propositional stems, and from
these stems we generate in turn inflected forms: conjugated forms from the
verbal stems, and declined forms from the participial stems. But at present
we do not generate mechanically primary nominal stems from roots,
nor secondary nominal stems from primary ones, because of overgeneration.
The nominal stems, as well as the undeclinable forms, are taken from the
lexicon, that lists also some frequent participles.
The nominal stems, as well as the undeclinable forms, are taken from the
lexicon, that lists also some frequent participles.
<p>
This organization entails a different role in our morphological data bases.
The <i>basic</i> morphological categories correspond to lexical phases,
......@@ -120,7 +120,7 @@ perfect forms of the auxiliary roots <i>as</i>, <i>bhū</i> and
<i>kṛ]</i> which are duplicated in a specific auxiliary lexicon).
Here is a simplified diagram of the current state space of our lexer.
<div class="center">
<div class="center">
<img src="IMAGES/lexer17.jpg" alt="Lexer automaton">
</div>
......@@ -134,41 +134,41 @@ and the corresponding articles are also available freely on my
(papers [78], [87], [88], [94], [95], [105], [106] and [110]
are specially relevant).
This material will not be repeated here. Let us just explain a few difficulties
of the large-scale implementation of this Sanskrit analyser.
of the large-scale implementation of this Sanskrit analyser.
<p>
As usual in a non-deterministic search algorithm (here all the possible parsings
of a sentence as a sandhied stream of forms), we have two pitfalls, silence and noise.
Silence (lack of recall) means incompleteness. Some legal Sanskrit sentences
may fail to be recognized.
may fail to be recognized.
Typicallly, some root word may be missing from the base lexicon,
or some Vedic form may use some construction rare in the later language,
like precative or subjunctive.
Compounding gives rise to two complications, the raising of new cases by
<i>bahuvrīhi</i> compounding,
<i>bahuvrīhi</i> compounding,
and the formation of <i>avyayībhava</i> compounds. Some of these
constructions are treated incompletely.
<p>
The opposite of silence is noise (lack of precision), that is overgeneration.
We deal with overgeneration
<p>
The opposite of silence is noise (lack of precision), that is overgeneration.
We deal with overgeneration
in the syntactico-semantic layer of our tagger, which filters out combinations of
tags inconsistent with semantic role assignments.
We shall not discuss this technology
further in this note on morphology, and refer the interested reader to our
<a href="/DICO/reader.html"><strong>Sanskrit reader
<a href="DICO/reader.html"><strong>Sanskrit reader
demonstration page</strong></a> and its <a href="manual.html">
<strong>Reference manual</strong></a>
<p>
We remark that the respective data bases can be interrogated online by our
<p>
We remark that the respective data bases can be interrogated online by our
<a href="http://sanskrit.inria.fr/DICO/index.html#stemmer"><strong>stemmer
interface</strong></a>. But note that verbal forms prefixed by preverbs
are analysed by the tagger as non-atomic words, and only root forms and
their secondary conjugations are recognized by the stemmer.
their secondary conjugations are recognized by the stemmer.
<h2 class=b2>Help</h2>
Questions concerning these resources should be addressed to
Questions concerning these resources should be addressed to
<a href="mailto:Gerard.Huet@inria.fr">Gérard Huet</a>.
All suggestions for improvements will be gratefully considered.
All suggestions for improvements will be gratefully considered.
</td></tr>
</table>
</div>
......@@ -182,11 +182,11 @@ All suggestions for improvements will be gratefully considered.
</td><td>
<table class="center">
<tr><td>
<a href="index.html"><strong>Top</strong></a> |
<a href="DICO/index.en.html"><strong>Index</strong></a> |
<a href="DICO/index.en.html#stemmer"><strong>Stemmer</strong></a> |
<a href="DICO/grammar.en.html"><strong>Grammar</strong></a> |
<a href="DICO/sandhi.en.html"><strong>Sandhi</strong></a> |
<a href="index.html"><strong>Top</strong></a> |
<a href="DICO/index.en.html"><strong>Index</strong></a> |
<a href="DICO/index.en.html#stemmer"><strong>Stemmer</strong></a> |
<a href="DICO/grammar.en.html"><strong>Grammar</strong></a> |
<a href="DICO/sandhi.en.html"><strong>Sandhi</strong></a> |
<a href="DICO/reader.en.html"><strong>Reader</strong></a> |
<a href="faq.en.html"><strong>Help</strong></a> |
<a href="portal.en.html"><strong>Portal</strong></a>
......@@ -197,6 +197,4 @@ All suggestions for improvements will be gratefully considered.
<img src="IMAGES/logo_inria.png" alt="Logo Inria" height="50"></a>
<br></td></tr></table></div>
</body>
</html>
</html>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment