Commit 70c10e8e authored by Gérard Huet's avatar Gérard Huet

improvements of sa treatment; renamings

parent a02bdf46
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# Gérard Huet & Pawan Goyal # # Gérard Huet & Pawan Goyal #
# # # #
############################################################################ ############################################################################
# Makefile of Sanskrit Heritage Software 08-01-2018 Copyright INRIA 2018 # # Makefile of Sanskrit Heritage Software 21-01-2018 Copyright INRIA 2018 #
############################################################################ ############################################################################
# Prerequisites: Ocaml and Camlp4 preprocessor # Prerequisites: Ocaml and Camlp4 preprocessor
......
...@@ -556,9 +556,9 @@ value look_up_and_display font gana entry = ...@@ -556,9 +556,9 @@ value look_up_and_display font gana entry =
and sort_out_u accu form = fun and sort_out_u accu form = fun
[ [ (_,morphs) ] -> List.fold_left (reorg form) accu morphs [ [ (_,morphs) ] -> List.fold_left (reorg form) accu morphs
where reorg f (inf,absya,per,abstva) = fun where reorg f (inf,absya,per,abstva) = fun
[ Ind_verb (c,Infi) when c=conj -> ([ (c,f) :: inf ],absya,per,abstva) [ Und_verb (c,Infi) when c=conj -> ([ (c,f) :: inf ],absya,per,abstva)
| Ind_verb (c,Absoya) when c=conj -> (inf,[ (c,f) :: absya ],per,abstva) | Und_verb (c,Absoya) when c=conj -> (inf,[ (c,f) :: absya ],per,abstva)
| Ind_verb (c,Perpft) when c=conj -> (inf,absya,[ (c,f) :: per ],abstva) | Und_verb (c,Perpft) when c=conj -> (inf,absya,[ (c,f) :: per ],abstva)
| Abs_root c when c=conj -> (inf,absya,per,[ (c,f) :: abstva ]) | Abs_root c when c=conj -> (inf,absya,per,[ (c,f) :: abstva ])
| _ -> (inf,absya,per,abstva) | _ -> (inf,absya,per,abstva)
] ]
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Syntactico/semantic analysis and penalty computations. *) (* Syntactico/semantic analysis and penalty computations. *)
...@@ -259,17 +259,17 @@ value get_roles entry = fun ...@@ -259,17 +259,17 @@ value get_roles entry = fun
else Actor (gram_role n entry c) g n (* beware n duplication *) else Actor (gram_role n entry c) g n (* beware n duplication *)
| Verb_form f n p -> get_fin_roles entry f n p | Verb_form f n p -> get_fin_roles entry f n p
| Abs_root _ -> get_abs_roles entry | Abs_root _ -> get_abs_roles entry
| Ind_form Conj -> match entry with | Und_form Conj -> match entry with
[ "ca" -> Tool Coordination [ "ca" -> Tool Coordination
| _ -> Ignored (* TODO vaa etc *) | _ -> Ignored (* TODO vaa etc *)
] ]
| Ind_form Prep -> if entry = "saha" || entry = "vinaa" || entry = "satraa" | Und_form Prep -> if entry = "saha" || entry = "vinaa" || entry = "satraa"
then Tool Post_instrument then Tool Post_instrument
else Ignored else Ignored
| Ind_form Adv -> if entry = "saha" then Tool Not_Post_instrument | Und_form Adv -> if entry = "saha" then Tool Not_Post_instrument
else Ignored else Ignored
| Ind_form Abs -> get_abs_roles entry | Und_form Abs -> get_abs_roles entry
| Ind_form Part -> match entry with | Und_form Part -> match entry with
[ "maa#2" -> Tool Prohibition [ "maa#2" -> Tool Prohibition
| _ -> Ignored | _ -> Ignored
] ]
......
...@@ -199,7 +199,7 @@ value sort_out accu form = fun ...@@ -199,7 +199,7 @@ value sort_out accu form = fun
] ]
| Bare_stem | Auxi_form -> (mas,fem,neu,any,[ f :: iic ],avy) | Bare_stem | Auxi_form -> (mas,fem,neu,any,[ f :: iic ],avy)
| Avyayaf_form -> (mas,fem,neu,any,iic,[ f :: avy ]) | Avyayaf_form -> (mas,fem,neu,any,iic,[ f :: avy ])
| Ind_form _ | Verb_form _ _ _ | Ind_verb _ | Abs_root _ | Und_form _ | Verb_form _ _ _ | Und_verb _ | Abs_root _
| Avyayai_form | Unanalysed | PV _ | Avyayai_form | Unanalysed | PV _
| Part_form _ _ _ _ -> | Part_form _ _ _ _ ->
failwith "Unexpected form in declensions" failwith "Unexpected form in declensions"
......
...@@ -290,7 +290,7 @@ value extract_gana_pada = fun ...@@ -290,7 +290,7 @@ value extract_gana_pada = fun
| Conjug _ v | Perfut v -> (None,v) | Conjug _ v | Perfut v -> (None,v)
] in ] in
(conj,(o_gana,pada_of_voice voice)) (conj,(o_gana,pada_of_voice voice))
| Ind_verb _ _ -> raise Unvoiced (* could be refined *) | Und_verb _ _ -> raise Unvoiced (* could be refined *)
| _ -> failwith "Unexpected root form" | _ -> failwith "Unexpected root form"
] ]
and extract_gana_pada_k krit = and extract_gana_pada_k krit =
...@@ -597,7 +597,7 @@ value validate out = match out with ...@@ -597,7 +597,7 @@ value validate out = match out with
else [] else []
] ]
| [ (Abso,rev_abso_form,s) :: [ (Pv,prev,sv) :: r ] ] -> | [ (Abso,rev_abso_form,s) :: [ (Pv,prev,sv) :: r ] ] ->
(* Takes care of absolutives in -ya and of infinitives with preverbs *) (* Takes care of absolutives in -ya and infinitives with preverbs *)
let pv = Word.mirror prev in let pv = Word.mirror prev in
let pv_str = Canon.decode pv let pv_str = Canon.decode pv
and abso_form = Word.mirror rev_abso_form in and abso_form = Word.mirror rev_abso_form in
...@@ -614,10 +614,18 @@ value validate out = match out with ...@@ -614,10 +614,18 @@ value validate out = match out with
] ]
(* We now prevent overgeneration of forms "sa" and "e.sa" \Pan{6,1,132} *) (* We now prevent overgeneration of forms "sa" and "e.sa" \Pan{6,1,132} *)
(*i TODO: similar test for dual forms i*) (*i TODO: similar test for dual forms i*)
| [ (ph,form,_) :: [ (Pron,[ 1; 48 ],_) :: _ ] ] (* sa *) | [ (ph,form,_) :: [ (Pron,[ 1; 48 ],_) :: _ ] ] (* sa *) ->
if Phonetics.consonant_initial (Word.mirror form)
then out else []
| [ (ph,form,_) :: [ (Pron,[ 1; 47; 10 ],_) :: _ ] ] (* e.sa *) -> | [ (ph,form,_) :: [ (Pron,[ 1; 47; 10 ],_) :: _ ] ] (* e.sa *) ->
if Phonetics.consonant_initial (Word.mirror form) if Phonetics.consonant_initial (Word.mirror form)
then out else [] then out else []
| [ (ph,form,_) :: [ (Pron,[ 48; 1; 48 ],_) :: _ ] ] (* sas *) ->
if Phonetics.consonant_initial (Word.mirror form) then []
else out
| [ (ph,form,_) :: [ (Pron,[ 48; 1; 47; 10 ],_) :: _ ] ] (* e.sas *) ->
if Phonetics.consonant_initial (Word.mirror form) then []
else out
(* Alternative: put infinitives in Root rather than Indecl+Abso (* Alternative: put infinitives in Root rather than Indecl+Abso
[| [ (Absc,_,_) :: _ ] [| [ (Absc,_,_) :: _ ]
| [ (Absv,_,_) :: _ ] -> check root is autonomous | [ (Absv,_,_) :: _ ] -> check root is autonomous
...@@ -640,7 +648,19 @@ This pv is not terminal, and should be chopped off by the next item *) ...@@ -640,7 +648,19 @@ This pv is not terminal, and should be chopped off by the next item *)
| _ -> out (* default identity *) | _ -> out (* default identity *)
] ]
; ;
value terminal_sa = fun
[ [ (Pron,[ 1; 48 ],_) :: _ ] (* sa *)
| [ (Pron,[ 1; 47; 10 ],_) :: _ ] (* e.sa *) -> True
| _ -> False
]
;
(*i unused
value terminal_sas = fun
[ [ (Pron,[ 48; 1; 48 ],_) :: _ ] (* sas *)
| [ (Pron,[ 48; 1; 47; 10 ],_) :: _ ] (* e.sas *) -> True
| _ -> False
]
; i*)
open Html; open Html;
value rec color_of_phase = fun value rec color_of_phase = fun
[ Noun | Noun2 | Lopak | Nouc | Nouv | Kriv | Kric | Krid | Auxik | Kama [ Noun | Noun2 | Lopak | Nouc | Nouv | Kriv | Kric | Krid | Auxik | Kama
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Dispatcher: Sanskrit Engine in 55 phases automaton (plus 2 fake ones) *) (* Dispatcher: Sanskrit Engine in 55 phases automaton (plus 2 fake ones) *)
...@@ -53,7 +53,10 @@ value trim_tags : ...@@ -53,7 +53,10 @@ value trim_tags :
; ;
value validate : output -> output (* consistency check and glueing *) value validate : output -> output (* consistency check and glueing *)
; ;
value color_of_phase : phase -> Html.color; value terminal_sa : output -> bool
;
value color_of_phase : phase -> Html.color
;
end; end;
...@@ -13,8 +13,11 @@ ...@@ -13,8 +13,11 @@
open List2; (* unstack ass subtract *) open List2; (* unstack ass subtract *)
open Auto.Auto; (* auto rule choices State *) open Auto.Auto; (* auto rule choices State *)
module Segment (* used by Interface : [Viccheda = Segment Phases Machine Segment_control]
(Phases: sig where Machine = Dispatch Transducers Lemmas
where Lemmas = Load_morphs.Morphs Prel Phases *)
module Segment
(Phases: sig
type phase type phase
and phases = list phase; and phases = list phase;
value unknown : phase; value unknown : phase;
...@@ -23,8 +26,8 @@ module Segment ...@@ -23,8 +26,8 @@ module Segment
value ii_phase : phase -> bool; value ii_phase : phase -> bool;
value un_lopa : phase -> phase; value un_lopa : phase -> phase;
end) end)
(Eilenberg: sig (Eilenberg: sig (* To be instanciated by Dispatcher *)
value transducer : Phases.phase -> auto; value transducer : Phases.phase -> auto;
value initial : bool -> Phases.phases; value initial : bool -> Phases.phases;
value dispatch : bool -> Word.word -> Phases.phase -> Phases.phases; value dispatch : bool -> Word.word -> Phases.phase -> Phases.phases;
value accepting : Phases.phase -> bool; value accepting : Phases.phase -> bool;
...@@ -36,6 +39,8 @@ module Segment ...@@ -36,6 +39,8 @@ module Segment
and segment = (Phases.phase * Word.word * transition) and segment = (Phases.phase * Word.word * transition)
and output = list segment; and output = list segment;
value validate : output -> output; (* consistency check / compress *) value validate : output -> output; (* consistency check / compress *)
value terminal_sa : output -> bool;
(* unused value terminal_sas : output -> bool; *)
end) end)
(Control: sig value star : ref bool; (* chunk= if star then word+ else word *) (Control: sig value star : ref bool; (* chunk= if star then word+ else word *)
value full : ref bool; (* all kridantas and nan cpds if full *) value full : ref bool; (* all kridantas and nan cpds if full *)
...@@ -130,12 +135,21 @@ value register index (phase,pada,sandhi) = ...@@ -130,12 +135,21 @@ value register index (phase,pada,sandhi) =
| [] -> update_graph [ (phase,[ pada_right ]) ] (* new bucket *) | [] -> update_graph [ (phase,[ pada_right ]) ] (* new bucket *)
] ]
; ;
type chunk_params = { offset : mutable int; segmentable : mutable bool }
(* To avoid heavy functional transmission of chunk global parameters,
we define a record of chunk parameters.
NB. offset and last are inherited attributes, segmentable is synthesized. *)
type chunk_params = { offset : mutable int
; segmentable : mutable bool
; last : mutable bool (* for sa elimination in last chunk *)
}
; ;
value cur_chunk = { offset = 0; segmentable = False } value cur_chunk = { offset = 0; segmentable = False; last = False }
; ;
value set_cur_offset n = cur_chunk.offset := n value set_cur_offset n = cur_chunk.offset := n
and set_segmentable b = cur_chunk.segmentable := b and set_segmentable b = cur_chunk.segmentable := b
and set_last b = cur_chunk.last := b
; ;
value set_offset (offset,checkpoints) = do value set_offset (offset,checkpoints) = do
{ set_cur_offset offset { set_cur_offset offset
...@@ -154,9 +168,9 @@ value reset_visual () = for i = 0 to max_seg_rows-1 do ...@@ -154,9 +168,9 @@ value reset_visual () = for i = 0 to max_seg_rows-1 do
(* The offset permits to align each segment with the input string *) (* The offset permits to align each segment with the input string *)
value offset = fun value offset = fun
[ Euphony (w,u,v) -> [ Euphony (w,u,v) ->
let off = if w=[] then 1 (* amui/lopa from Lopa/Lopak *) let off = if w=[] then 1 (* amui/lopa from Lopa/Lopak *)
else Word.length w in else Word.length w in
off - (Word.length u + Word.length v) off - (Word.length u + Word.length v)
| Id -> 0 | Id -> 0
] ]
; ;
...@@ -361,6 +375,12 @@ type backtrack = ...@@ -361,6 +375,12 @@ type backtrack =
] ]
and resumption = list backtrack (* coroutine resumptions *) and resumption = list backtrack (* coroutine resumptions *)
; ;
value check_sa contracted =
not (cur_chunk.last && terminal_sa contracted) (* forbid sa last *)
(* [ && (not (terminal_sas contracted) || cur_chunk.last) (* sa.h last only *) ]
This is too strict, in view of padapatha and und-sandhied mode
et on a donc un peu d'overgeneration, avec eg "sa.h yogii" *)
;
(* Service routines of the segmenter *) (* Service routines of the segmenter *)
...@@ -400,29 +420,29 @@ value rec react phase input output back occ = fun ...@@ -400,29 +420,29 @@ value rec react phase input output back occ = fun
let deter cont = match input with let deter cont = match input with
[ [] -> continue cont [ [] -> continue cont
| [ letter :: rest ] -> match ass letter det with | [ letter :: rest ] -> match ass letter det with
[ Some state -> [ Some state -> react phase rest output cont [ letter :: occ ] state
react phase rest output cont [ letter :: occ ] state
| None -> continue cont | None -> continue cont
] ]
] in ] in
let cont = if choices=[] then back (* non deterministic continuation *) let cont = if choices=[] then back (* non deterministic continuation *)
else [ Choose phase input output occ choices :: back ] in else [ Choose phase input output occ choices :: back ] in
(* now we look for - or + segmentation pragma *) (* now we look for - or + segmentation hint *)
let (keep,cut,input') = match input with let (keep,cut,input') = match input with
[ [ 0 :: rest ] -> (* explicit "-" compound break hint *) [ [ 0 :: rest ] -> (* explicit "-" compound break hint *)
(ii_phase phase,True,rest) (ii_phase phase,True,rest)
| [ -10 :: rest ] -> (* mandatory segmentation indicated by "+" *) | [ -10 :: rest ] -> (* mandatory segmentation "+" *)
(True,True,rest) (True,True,rest)
| _ -> (True,False,input) (* no hint in input *) | _ -> (True,False,input) (* no hint in input *)
] in ] in
if accept && keep then if accept && keep then
let segment = (phase,occ,Id) in let segment = (phase,occ,Id) in
let out = accrue segment output in (*i unknown Id sandhi - TODO i*) let out = accrue segment output in (*i unknown Id sandhi - TODO i*)
match validate out with match validate out (* validate and compact partial output *) with
[ [] -> if cut then continue cont else deter cont [ [] -> if cut then continue cont else deter cont
| contracted -> match input' with | contracted -> match input' with
[ [] -> if accepting phase then (* solution found *) [ [] -> if accepting phase (* solution found *)
do { log_chunk contracted; continue cont } && check_sa contracted (* forbid sa last *)
then do { log_chunk contracted; continue cont }
else continue cont else continue cont
| [ first :: _ ] -> (* we first try the longest matching word *) | [ first :: _ ] -> (* we first try the longest matching word *)
let cont' = schedule phase input' contracted [] cont in let cont' = schedule phase input' contracted [] cont in
...@@ -447,7 +467,8 @@ and choose phase input output back occ = fun ...@@ -447,7 +467,8 @@ and choose phase input output back occ = fun
| contracted -> | contracted ->
if v=[] (* final sandhi *) then if v=[] (* final sandhi *) then
if rest=[] && accepting phase (* solution found *) if rest=[] && accepting phase (* solution found *)
then do { log_chunk contracted; continue cont } && check_sa contracted (* forbid sa last *)
then do { log_chunk contracted; continue cont }
else continue cont else continue cont
else continue (schedule phase rest contracted v cont) else continue (schedule phase rest contracted v cont)
] ]
...@@ -486,7 +507,7 @@ value segment chunk = do ...@@ -486,7 +507,7 @@ value segment chunk = do
; ;
(* Splitting checkpoints into current and future ones *) (* Splitting checkpoints into current and future ones *)
value split_check limit = split_rec [] value split_check limit = split_rec []
where rec split_rec acc checkpts = match checkpts with where rec split_rec acc checkpts = match checkpts with
[ [] -> (Word.mirror acc,[]) [ [] -> (Word.mirror acc,[])
| [ ((index,_,_) as check) :: rest ] -> | [ ((index,_,_) as check) :: rest ] ->
if index > limit then (Word.mirror acc,checkpts) if index > limit then (Word.mirror acc,checkpts)
...@@ -496,18 +517,18 @@ value split_check limit = split_rec [] ...@@ -496,18 +517,18 @@ value split_check limit = split_rec []
(* We do not need to [dove_tail] like in Rank, since chunks are independent. *) (* We do not need to [dove_tail] like in Rank, since chunks are independent. *)
(* Returns a pair (b,n) where b is True if all chunks are segmentable so far, (* Returns a pair (b,n) where b is True if all chunks are segmentable so far,
and n is the number of potential solutions *) and n is the number of potential solutions *)
value segment_all = List.fold_left segment_chunk (True,Num.Int 1) value segment_chunk (full,count) chunk last =
where segment_chunk (flag,count) chunk =
let extremity = cur_chunk.offset+Word.length chunk in let extremity = cur_chunk.offset+Word.length chunk in
let (local,future) = split_check extremity chkpts.all_checks in do let (local,future) = split_check extremity chkpts.all_checks in do
{ chkpts.segment_checks := local { chkpts.segment_checks := local
; set_last last
; let segmentable = segment chunk ; let segmentable = segment chunk
and local_count = get_counter () in do and local_count = get_counter () in do
{ set_segmentable False { set_segmentable False
; set_offset (succ extremity,future) ; set_offset (succ extremity,future)
; if segmentable then do ; if segmentable then do
{ reset_counter () { reset_counter ()
; (flag,Num.mult_num count (Num.Int local_count)) ; (full,Num.mult_num count (Num.Int local_count))
(* we have [local_count] segmentations of the local [chunk], and, (* we have [local_count] segmentations of the local [chunk], and,
chunks being independent, the total number of solutions multiply *) chunks being independent, the total number of solutions multiply *)
} }
...@@ -515,6 +536,14 @@ value segment_all = List.fold_left segment_chunk (True,Num.Int 1) ...@@ -515,6 +536,14 @@ value segment_all = List.fold_left segment_chunk (True,Num.Int 1)
} }
} }
; ;
value segment_iter chunks = segment_chunks (True,Num.Int 1) chunks
where rec segment_chunks acc = fun (* terminal recursion *)
[ [ (* last *) chunk ] -> segment_chunk acc chunk True
| [ chunk :: rest ] -> segment_chunks (segment_chunk acc chunk False) rest
| [] -> acc
]
;
end; (* Segment *) end; (* Segment *)
...@@ -126,11 +126,11 @@ value peri = ref (Deco.empty : inflected_map) ...@@ -126,11 +126,11 @@ value peri = ref (Deco.empty : inflected_map)
value add_morphperi w d i = value add_morphperi w d i =
peri.val := Lexmap.addl peri.val w (d w,i) peri.val := Lexmap.addl peri.val w (d w,i)
; ;
(* indeclinable forms - adverbs, conjonctions, particles *) (* undeclinable forms - adverbs, conjonctions, particles *)
value indecls = ref (Deco.empty : inflected_map) value undecls = ref (Deco.empty : inflected_map)
; ;
value add_morphin w d i = value add_morphund w d i =
indecls.val := Lexmap.addl indecls.val w (d w,i) undecls.val := Lexmap.addl undecls.val w (d w,i)
; ;
(* invocations are registered in invs *) (* invocations are registered in invs *)
value invs = ref (Deco.empty : inflected_map) value invs = ref (Deco.empty : inflected_map)
...@@ -349,7 +349,7 @@ value add_morphauxiick w stem i = ...@@ -349,7 +349,7 @@ value add_morphauxiick w stem i =
value inftu = ref (Deco.empty : inflected_map) value inftu = ref (Deco.empty : inflected_map)
and kama = ref (Deco.empty : inflected_map) and kama = ref (Deco.empty : inflected_map)
; ;
value add_morphinftu w d i = (* similar to [add_morphin] *) value add_morphinftu w d i = (* similar to [add_morphund] *)
if Phonetics.phantomatic w then () else if Phonetics.phantomatic w then () else
inftu.val := Lexmap.addl inftu.val w (d w,i) inftu.val := Lexmap.addl inftu.val w (d w,i)
and add_morphkama w d i = (* similar to [add_morph] *) and add_morphkama w d i = (* similar to [add_morph] *)
...@@ -372,7 +372,7 @@ type nominal = ...@@ -372,7 +372,7 @@ type nominal =
type flexion = type flexion =
[ Declined of nominal and gender and list (number * list (case * word)) [ Declined of nominal and gender and list (number * list (case * word))
| Conju of finite and list (number * list (person * word)) | Conju of finite and list (number * list (person * word))
| Indecl of ind_kind and word (* avyaya, particle, interjection, nota *) | Undecl of und_kind and word (* avyaya, particle, interjection, nota *)
| Bare of nominal and word (* Iic *) | Bare of nominal and word (* Iic *)
| Avyayai of word (* Iic of avyayiibhaava cpd *) | Avyayai of word (* Iic of avyayiibhaava cpd *)
| Avyayaf of word (* Ifc of avyayiibhaava cpd *) | Avyayaf of word (* Ifc of avyayiibhaava cpd *)
...@@ -432,10 +432,10 @@ value enter1 entry = ...@@ -432,10 +432,10 @@ value enter1 entry =
; (* Now auxiliaries for verbal cvi compounds *) ; (* Now auxiliaries for verbal cvi compounds *)
if auxiliary entry then add_morphauxi w delta v else () if auxiliary entry then add_morphauxi w delta v else ()
} }
| Indecl k w -> match k with | Undecl k w -> match k with
[ Adv | Part | Conj | Default | Prep | Tas -> [ Adv | Part | Conj | Default | Prep | Tas ->
add_morphin w delta (Ind_form k) add_morphund w delta (Und_form k)
| Interj -> add_invoc w delta (Ind_form k) | Interj -> add_invoc w delta (Und_form k)
| Avya -> () (* since generative *) | Avya -> () (* since generative *)
| Abs | Infl | Nota -> () (* no recording in morph tables *) | Abs | Infl | Nota -> () (* no recording in morph tables *)
(* Abs generated by absolutives of verbs, Infl by flexions of nouns, and (* Abs generated by absolutives of verbs, Infl by flexions of nouns, and
...@@ -447,7 +447,7 @@ value enter1 entry = ...@@ -447,7 +447,7 @@ value enter1 entry =
| Avyayaf w -> add_morphyaf w delta Avyayaf_form | Avyayaf w -> add_morphyaf w delta Avyayaf_form
| Cvi w -> add_morphvi w delta Auxi_form | Cvi w -> add_morphvi w delta Auxi_form
| Invar m w -> let (_,vi) = m | Invar m w -> let (_,vi) = m
and f = Ind_verb m in and f = Und_verb m in
match vi with match vi with
[ Infi -> do (* 2 cases: with and without preverbs - saves one phase *) [ Infi -> do (* 2 cases: with and without preverbs - saves one phase *)
{ add_morphabsya w delta f aapv { add_morphabsya w delta f aapv
...@@ -466,7 +466,7 @@ value enter1 entry = ...@@ -466,7 +466,7 @@ value enter1 entry =
(* NB Allows perpft of verbs with preverbs but overgenerates since (* NB Allows perpft of verbs with preverbs but overgenerates since
it allows perpft followed by a non perfect form of auxiliary *) it allows perpft followed by a non perfect form of auxiliary *)
] ]
| Inftu m w -> let f = Ind_verb (m,Infi) in | Inftu m w -> let f = Und_verb (m,Infi) in
add_morphinftu w delta f (* infinitive in -tu *) add_morphinftu w delta f (* infinitive in -tu *)
| Absotvaa c w -> let f = Abs_root c in | Absotvaa c w -> let f = Abs_root c in
add_morphabstvaa w delta f (* abs-tvaa: no preverb *) add_morphabstvaa w delta f (* abs-tvaa: no preverb *)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(*i module Inflected : sig i*) (*i module Inflected : sig i*)
...@@ -31,7 +31,7 @@ value peri : ref inflected_map; ...@@ -31,7 +31,7 @@ value peri : ref inflected_map;
value auxi : ref inflected_map; value auxi : ref inflected_map;
value auxik : ref inflected_map; value auxik : ref inflected_map;
value auxiick : ref inflected_map; value auxiick : ref inflected_map;
value indecls : ref inflected_map; value undecls : ref inflected_map;
value invs : ref inflected_map; value invs : ref inflected_map;
value absya : ref inflected_map; value absya : ref inflected_map;
value abstvaa : ref inflected_map; value abstvaa : ref inflected_map;
...@@ -57,7 +57,7 @@ type nominal = ...@@ -57,7 +57,7 @@ type nominal =
type flexion = type flexion =
[ Declined of nominal and gender and list (number * list (case * Word.word)) [ Declined of nominal and gender and list (number * list (case * Word.word))
| Conju of finite and list (number * list (person * Word.word)) | Conju of finite and list (number * list (person * Word.word))
| Indecl of ind_kind and Word.word | Undecl of und_kind and Word.word
| Bare of nominal and Word.word | Bare of nominal and Word.word
| Avyayai of Word.word (* Iic of avyayiibhaava cpd *) | Avyayai of Word.word (* Iic of avyayiibhaava cpd *)
| Avyayaf of Word.word (* Ifc of avyayiibhaava cpd *) | Avyayaf of Word.word (* Ifc of avyayiibhaava cpd *)
......
...@@ -53,7 +53,7 @@ module Transducers = Trans Prel ...@@ -53,7 +53,7 @@ module Transducers = Trans Prel
; ;
module Machine = Dispatch Transducers Lemmas module Machine = Dispatch Transducers Lemmas
; ;
open Machine (* [cache_phase] *) open Machine
; ;
(* At this point we have a Finite Eilenberg machine ready to instantiate *) (* At this point we have a Finite Eilenberg machine ready to instantiate *)
(* the Eilenberg component of the Segment module. *) (* the Eilenberg component of the Segment module. *)
...@@ -74,7 +74,7 @@ end (* [Segment_control] *) ...@@ -74,7 +74,7 @@ end (* [Segment_control] *)
; ;
module Viccheda = Segment Phases Machine Segment_control module Viccheda = Segment Phases Machine Segment_control
; ;
open Viccheda (* [segment_all visual_width] etc. *) open Viccheda (* [segment_iter visual_width] etc. *)
; ;
(* At this point we have the sandhi inverser segmenting engine *) (* At this point we have the sandhi inverser segmenting engine *)
...@@ -416,7 +416,7 @@ value check_sentence translit us text_orig checkpoints sentence ...@@ -416,7 +416,7 @@ value check_sentence translit us text_orig checkpoints sentence
let devainput = String.concat " " devachunks let devainput = String.concat " " devachunks
and cpts = sort_check checkpoints in and cpts = sort_check checkpoints in
let _ = chkpts.all_checks := cpts let _ = chkpts.all_checks := cpts
and (flag,count) = segment_all chunks in and (full,count) = segment_iter chunks in (* full iff all chunks segment *)
let text = match sol_num with let text = match sol_num with
[ "0" -> update_text_with_sol text_orig count [ "0" -> update_text_with_sol text_orig count