Commit 309a4bd9 authored by Gérard Huet's avatar Gérard Huet

fix print preverbs in morpho; simplification entry page

parent de743929
......@@ -142,14 +142,6 @@ value print_scl_tags pvs phase form tags =
; ps (xml_end "tags")
}
;
value print_scl_tags_tad pvs ph form sfx_tags =
let table phase =
xml_begin_with_att "tags" [ ("phase",scl_phase phase) ] in do
{ ps (table ph)
; List.iter (print_scl_morph pvs False form) sfx_tags
; ps (xml_end "tags")
}
;
(* Used in Parser *)
value extract_lemma phase word =
......
......@@ -173,6 +173,7 @@ value tags_of phase word =
as sup kridanta forms with preverbs. The preverbs are packed in pv. *)
| Tad (ph,sfx_ph) form sfx -> (* tag inherited from fake suffix entry *)
let sfx_tag = Deco.assoc sfx (morpho_tags sfx_ph) in
(* let stem_tag = Deco.assoc sfx (morpho_tags ph) in - possible extension *)
Taddhita (ph,form) [ 0 :: sfx ] sfx_ph sfx_tag (* 0 = "-" *)
| _ -> Atomic (Deco.assoc word (morpho_tags phase))
(* NB Atomic comprises tin verbal forms of roots as well as sup atomic forms
......
......@@ -93,9 +93,8 @@ value print_inv_morpho_link pvs pe pne pu form =
let pv = if Phonetics.phantomatic form then [ 2 ] (* aa- *)
else pvs in
let encaps print e = (* encapsulates prefixing with possible preverbs *)
if pv = [] then print e
else do { ps (Canon.decode pvs ^ "-"); print e } in
print_inv_morpho (encaps pe) (encaps pne) pu form
if pv = [] then print e else do { pe pvs; ps "-"; print e } in
print_inv_morpho (encaps pe) (encaps pne) pu form
(* Possible overgeneration when derivative of a root non attested with pv
since only existential test in [Dispatcher.validate_pv]. Thus
[anusandhiiyate] should show [dhaa#1], not [dhaa#2], [dhii#1] or [dhyaa] *)
......
......@@ -8,4 +8,4 @@
(**************************************************************************)
(* Generated by make version - see main Makefile *)
value version="2.99" and version_date="2017-06-13";
value version="3.00" and version_date="2017-06-20";
VERSION='2.99'
DATE='2017-06-13'
VERSION='3.00'
DATE='2017-06-20'
......@@ -49,7 +49,7 @@ published recently as
<a href="http://jlm.ipipan.waw.pl/index.php/JLM/article/view/108/140">Design and
analysis of a lean interface for Sanskrit corpus annotation</a>.
<p>
Written on June 1st 2017, for Sanskrit Engine Version 2.99.
Written on June 20th 2017, for Sanskrit Engine Version 3.00.
<h2 class="b2" id="tour">First approach to using the Sanskrit Heritage engine</h2>
......@@ -213,7 +213,7 @@ declension of stem <i>blablabla</i> in the masculine you will get
nonsensical forms such as ablative <i>blablablāt</i>.
But at least you are warned by the
system, that indicates its doubt by labeling the declension table as
<i>blablabla?</i>. If you ask for its forms in the feminine,
<i>blablabla?</i> If you ask for its forms in the feminine,
you will get a Gender anomaly report.
<p>
......@@ -261,7 +261,7 @@ The Sanskrit Heritage Dictionary is the latest edition of a Sanskrit
to French Dictionary
"Dictionnaire Français de l'H&eacute;ritage Sanskrit" compiled by
G&eacute;rard Huet since 1994. This dictionary is freely available
as a 891 pages <a href="Heritage.pdf">book</a> under the pdf format,
as a 907 pages <a href="Heritage.pdf">book</a> under the pdf format,
easily readable with Acrobat Reader, a free Adobe product.
This dictionary is still under development, and is
automatically updated along with the site,
......@@ -804,12 +804,12 @@ transliteration. Actually this notation is mandatory in certain situations
नासतोविद्यतेभावोनाभावोविद्यतेसतः will only accommodate Śaṅkara's analysis
<i>na asataḥ vidyate bhāvaḥ na abhāvaḥ vidyate sataḥ</i>, whereas
Madhva's interpretation (with <i>abhāvaḥ</i>) has to be made explicit as
नासतोविद्यतेऽभावोनाभावोविद्यतेसतः.
नासतोविद्यतेऽभावोनाभावोविद्यतेसतः
<p>
Finally, the system does not currently support degemination of stems,
such as modern renditions of <i>tattva</i> as <i>tatva</i>
or <i>vaarttaa</i> as <i>vaartaa</i>; only a few common stems such as
<i>chatra</i>, <i>chaatra</i> and <i>patra</i> are allowed.
<i>chatra</i>, <i>chaatra</i> and <i>patra</i> are recognized.
<h2 class="b2" id="regression">Regression analysis</h2>
......
......@@ -224,11 +224,23 @@ as we shall see below.
A dictionary of inflected forms of Sanskrit words is provided
in XML form under various transliteration schemes.
Please visit the <a href="xml.html">Sanskrit linguistic resources page</a>.
This resource may now be downloaded as a git repository, using command:<br/>
<span class="Green">
git clone https://gitlab.inria.fr/huet/Heritage_Resources.git
</span>
<a id="reader"></a>
<h2 class="b2"> Sanskrit Reader </h2>
<p>
The main tool provided by this site is a
<span class="Green">Sanskrit Reader</span> that allows machine-assisted
analysis of Sanskrit sentences, that is segmentation
(including sandhi viccheda), morphological tagging, and several parsers.
Please consult the <a href="manual.html">Reference manual</a> for learning how
to use these tools.
<!--
Try our interactive <a href="DICO/reader.html">Sanskrit Reader</a>.
It is able to segment simple sentences.
Try for instance to segment "tryambaka.myajaamahesugandhi.mpu.s.tivardhanam"
......@@ -239,7 +251,6 @@ You will see two segmentations, one with an identified compound form
Note that each segment is indicated with a lemma giving its stem
and the set of morphological parameters that may generate the segment form
from its stem. The stem is hyperlinked to the dictionary of choice.
<!-- "vana.mgatvaadhyaana.mkaroti" "maarjaarodugdha.mpibati", -->
</p><p>
Note also that segments are separated by phonological information
......@@ -285,8 +296,6 @@ In these diagrams, transparent nodes are non generative, and colored nodes
correspond to the lexical categories recognized by the lemmatizer. The
category Auxi is the subset of Verb consisting of conjugated forms of
roots "k.r", "as" and "bhuu" used as auxiliaries in periphrastic constructions.
<!-- The category Krid corresponds to root participles or primary derivatives
({\sl k{\d r}dantas}). -->
Pv denotes sequences of preverbs.
</p>
......@@ -408,14 +417,13 @@ the gap between Sanskrit computational linguistics tools
and management of Sanskrit digital libraries</a> was organized in December 2016
at Banaras Hindu University, at the occasion of the ICON 2016 conference.
<!--
The computational tools for Sanskrit developed at University of Hyderabad
are available here as a <a href= "~anusaaraka/">Mirror Site</a>. -->
are available here as a <a href= "~anusaaraka/">Mirror Site</a>.
<div class="center">
<img src="IMAGES/yinyang.gif" alt="Yinyang">
</div>
-->
<h2 class="b2"><img src="IMAGES/JoeCaml.png" alt="Cool Joe Caml">
The Zen Library</h2>
......@@ -425,7 +433,7 @@ on a comprehensive software platform.
The project is based on a structured lexicographic database, compiled from
the Sanskrit Heritage dictionary, and on
the Zen computational linguistics toolkit. This toolkit is a library
of programs implemented in Pidgin ML, functional core of the
of programs implemented in the
<a href="http://ocaml.org">Objective Caml</a>
programming language. The Zen library and its documentation are available
as free software under the Gnu Lesser General Public License (LGPL) from the
......@@ -441,10 +449,11 @@ The Sanskrit Portal</h2>
Please visit our <a href="portal.html">Sanskrit Portal</a>
to find links to other Sanskrit resources.
<p>
<!--
If you are reading this from a mirror site, don't forget to regularly update
this server with the development Git site
"https://gitlab.inria.fr/huet/Heritage_Platform".
"https://gitlab.inria.fr/huet/Heritage_Platform". -->
<!--
<h2 class="b2"><img src="IMAGES/om1.jpg" alt="Om">
Artwork credits</h2>
......@@ -459,7 +468,7 @@ Artwork credits</h2>
</span><br>
<span class="green">Shri Yantra design ©
<a href="MAGES/Yantra.jpg">Gérard Huet</a> 1990.<br>
</span>
</span> -->
</td></tr>
</table> <!-- body -->
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment