Commit 363d14db authored by Gérard Huet's avatar Gérard Huet

Fix Parser with trimmed tags

parent a437bc81
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet & Pawan Goyal *)
(* *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(* ©2019 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* Sanskrit Reader Summarizing interface. Yields sktgraph.cgi *)
......@@ -98,14 +98,6 @@ value print_tags pvs seg_num phase form tags =
and ptag = print_morph pvs seg_num cached gen form in
fold_vert ptag ok_tags
;
(*i EXPERIMENTAL: taddhitaantas (ad-hoc) i*)
value print_morph_tad pvs seg_num cache gen stem sfx n tag =
Morpho_html.print_graph_link_tad pvs cache stem sfx (seg_num,n) gen tag
;
value print_tags_tad pvs seg_num phase stem sfx sfx_tags =
let ptag = print_morph_tad pvs seg_num False (generative phase) stem sfx in
fold_vert ptag sfx_tags
;
(* This is called "printing morphology interface style". *)
value print_morpho phase word =
match tags_of phase word with
......
......@@ -21,7 +21,7 @@ open Skt_morph;
open Morphology; (* [inflected inflected_map] *)
open Auto.Auto; (* auto State *)
open Segmenter; (* Segment *)
open Dispatcher; (* [generative Dispatch transition phase_of_sort trim_tags] *)
open Dispatcher; (* [Dispatch transition phase_of_sort trim_tags] *)
open Word; (* word length mirror patch *)
module Lexer (* takes its prelude and control arguments as module parameters *)
......@@ -35,7 +35,7 @@ open Html;
open Web; (* ps pl abort etc. *)
open Cgi;
open Phases; (* Phases *)
open Phases; (* phase *)
open Phases; (* phase generative *)
module Lemmas = Load_morphs.Morphs Prel Phases
;
......@@ -47,7 +47,7 @@ module Transducers = Trans Prel;
module Disp = Dispatch Transducers Lemmas;
open Disp (* [transducer initial accepting dispatch input color_of_phase
transition] *)
transition trim_tags] *)
;
module Viccheda = Segment Phases Disp Control
(* [init_segment continue set_offset] *)
......@@ -114,6 +114,7 @@ value print_scl_tags pvs phase form tags =
}
;
value tags_of = Lemmas.tags_of (* For export to Parser *)
and trim_tags = Disp.trim_tags
;
value extract_lemma phase word =
match tags_of phase word with
......@@ -137,11 +138,6 @@ value print_transition = fun
| Id -> ()
]
;
value print_sfx_tags sfx = fun
[ [ tag ] -> let _ = print_morph [] False 0 False sfx 1 tag in ()
| _ -> failwith "Multiple sfx tag"
]
;
value process_kridanta pvs seg_num phase form tags = do
{ th_begin |> ps
; table_morph_of phase |> pl (* table begin *)
......
......@@ -50,6 +50,8 @@ module Lexer : functor (* takes its prelude and iterator control as parameters *
value table_morph_of : Phases.phase -> string;
value print_morph : Word.word -> bool -> int -> bool -> Word.word -> int ->
Morphology.unitag -> int;
value trim_tags : bool ->
Word.word -> string -> Morphology.multitag -> Morphology.multitag;
(* END Exported for Parser *)
value all_checks : ref (list Viccheda.check);
value un_analyzable : Word.word -> (list Disp.segment * Viccheda.resumption);
......
......@@ -109,35 +109,6 @@ value print_inv_morpho_link pvs pe pne pu form =
since only existential test in [Dispatcher.validate_pv]. Thus
[anusandhiiyate] should show [dhaa#1], not [dhaa#2], [dhii#1] or [dhyaa] *)
;
value print_inv_morpho_tad pv pe pne pu stem sfx_form (seg_num,sub)
generative (delta,morphs) =
let sfx = Word.patch delta sfx_form in do
{ ps "{ "
; print_morphs (seg_num,sub) morphs (* taddhitaanta declension *)
; ps " }["
; if generative then (* interpret stem as unique name *)
let (homo,bare_stem) = homo_undo stem in
let krit_infos = Deco.assoc bare_stem unique_kridantas in
try let (verbal,root) = look_up_homo homo krit_infos in do
{ match Deco.assoc bare_stem lexical_kridantas with
[ [] (* not in lexicon *) -> pne bare_stem
| entries (* bare stem is lexicalized *) ->
if List.exists (fun (_,h) -> h=homo) entries
then pe stem (* stem with exact homo is lexical entry *)
else pne bare_stem
]
; ps " { "; print_verbal verbal; ps " }["; pe root; ps "]"
} with [ _ -> pu bare_stem ]
else pe stem
; pne sfx; ps "]"
}
;
(* variant with link for printing of taddhitaantas *)
value print_inv_morpho_link_tad pvs pe pne pu stem sfx_form =
let pv = if Phonetics.phantomatic stem then [ 2 ] (* aa- *)
else pvs in
print_inv_morpho_tad pv pe pne pu stem sfx_form
;
(* Used in [Lexer.record_tagging] for regression analysis *)
value report_morph gen form (delta,morphs) =
......
......@@ -102,21 +102,11 @@ value print_inflected_link pvs cached =
let print_fun = if cached then print_cache else print_entry in
Morpho.print_inv_morpho_link pvs print_fun print_stem print_chunk
;
(* Variant for compound tags used in [Lexer.print_morph_tad] *)
value print_inflected_link_tad pvs cached =
let print_fun = if cached then print_cache else print_entry in
Morpho.print_inv_morpho_link_tad pvs print_fun print_stem print_chunk
;
(* Used in [Interface] to print the lemmas *)
value print_graph_link pvs cached =
let print_fun = if cached then print_graph_cache else print_graph_entry in
Morpho.print_inv_morpho_link pvs print_fun print_stem print_chunk
;
(* Used in [Interface] to print the lemmas for taddhitaantas *)
value print_graph_link_tad pvs cached =
let print_fun = if cached then print_graph_cache else print_graph_entry in
Morpho.print_inv_morpho_link_tad pvs print_fun print_stem print_chunk
;
(* Final visarga form for display: final s and r are replaced by visarga.
There is some information loss here, since -ar and -a.h do not have the
same behaviour with external sandhi, eg punar-api, antar-a'nga, antar-gata,
......
......@@ -109,7 +109,7 @@ value print_segment_roles print_sems seg_num (phase,rword,_) =
Lex.process_kridanta [] seg_num phase word tags
| Preverbed (_,phase) pvs form tags ->
Lex.process_kridanta pvs seg_num phase form tags
] in do
] in do
{ print_labels decl_tags seg_num
; print_roles print_sems decl_phase decl_tags form
}
......@@ -120,24 +120,25 @@ value project n list = List.nth list (n-1) (* Ocaml's nth starts at 0 *)
value print_uni_kridanta pvs phase word multitags (n,m) =
let (delta,polytag) = project n multitags in
let unitag = [ project m polytag ] in do
{ ps th_begin
; pl (Lex.table_morph_of phase) (* table of color of phase begins *)
{ th_begin |> ps
; Lex.table_morph_of phase |> pl (* table of color of phase begins *)
; let _ = (* print unique tagging *)
Lex.print_morph pvs False 0 (generative phase) word 0 (delta,unitag) in ()
; ps table_end (* table of color of phase ends *)
; ps th_end
; table_end |> ps (* table of color of phase ends *)
; th_end |> ps
}
;
value print_projection phase rword ((_,m) as index) = do
{ ps tr_begin (* tr begins *)
value print_projection phase rword index = do
{ tr_begin |> ps (* tr begins *)
; Morpho_html.print_signifiant_yellow rword
; let word = Word.mirror rword in
match Lex.tags_of phase word with
match Lex.tags_of phase word with
[ Atomic tags -> print_uni_kridanta [] phase word tags index
| Preverbed (_,phase) pvs form tags ->
print_uni_kridanta pvs phase form tags index
let trim = Lex.trim_tags (generative phase) form (Canon.decode pvs) in
print_uni_kridanta pvs phase form (trim tags) index
]
; ps tr_end (* tr ends *)
; tr_end |> ps (* tr ends *)
}
;
value print_proj phase rword = fun
......@@ -208,7 +209,7 @@ value analyse query output =
[ [ (_,[ a :: _ ]) :: _ ] -> List.length a
| _ -> 0
] in
pl (xml_empty_with_att "input"
pl (xml_empty_with_att "input" (* Final call to Parser for display *)
[ ("type","submit"); ("value","Submit");
("onclick","unique('" ^ parser_cgi ^ "?" ^ query
^ ";p=','" ^ string_of_int (find_len top_groups) ^ "')" )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment