Commit 24d8a8a8 authored by Gérard Huet's avatar Gérard Huet

Fix is_conflicting for caakiirti.h/naabibhyat/caatathyam and anavatara.h

parent 26fa3951
...@@ -155,6 +155,7 @@ value call_parser text cpts = ...@@ -155,6 +155,7 @@ value call_parser text cpts =
";cpts=" ^ string_points cpts ^ ";n=1" in ";cpts=" ^ string_points cpts ^ ";n=1" in
anchor Green_ (invoke cgi) check_sign anchor Green_ (invoke cgi) check_sign
; ;
(* Legacy interface with Sanskrit Library [
value call_SL text cpts mode corpus solutions sent_id link_num = value call_SL text cpts mode corpus solutions sent_id link_num =
let cgi = tomcat ^ corpus ^ "/SaveTagging?slp1Sentence=" let cgi = tomcat ^ corpus ^ "/SaveTagging?slp1Sentence="
^ text ^ "&numSolutions=" ^ (string_of_int solutions) ^ text ^ "&numSolutions=" ^ (string_of_int solutions)
...@@ -164,6 +165,10 @@ value call_SL text cpts mode corpus solutions sent_id link_num = ...@@ -164,6 +165,10 @@ value call_SL text cpts mode corpus solutions sent_id link_num =
^ string_points cpts in ^ string_points cpts in
anchor Green_ (invoke cgi) check_sign anchor Green_ (invoke cgi) check_sign
; ;
value invoke_SL text cpts corpus_id count sent_id link_num =
ps (td_wrap (call_SL text cpts "t" corpus_id count sent_id link_num
^ "Sanskrit Library Interface"))
;] *)
value sort_check cpts = value sort_check cpts =
let compare_index (a,_,_) (b,_,_) = compare a b in let compare_index (a,_,_) (b,_,_) = compare a b in
List.sort compare_index cpts List.sort compare_index cpts
...@@ -206,50 +211,47 @@ value build_visual k segments = ...@@ -206,50 +211,47 @@ value build_visual k segments =
] ]
; ;
(* We check whether the current segment [(w,tr,phase,k)] is conflicting with (* We check whether the current segment [(w,tr,phase,k)] is conflicting with
others at previous offset [l]; if not it is mandatory and marked blue. *) others at previous offset [n]; if not it is mandatory and marked blue. *)
(* Returns True for blue mandatory segments, False for green/red optional ones *)
(* Warning: hairy code, do not change without understanding the theory. *) (* Warning: hairy code, do not change without understanding the theory. *)
value is_conflicting (w,tr,phase,k) = value is_conflicting ((w,tr,ph,k) as segment) =
let l_w = seg_length w in is_conflicting_rec 0 let l = seg_length w in is_conflicting_rec 0
where rec is_conflicting_rec l = match visual.(l) with where rec is_conflicting_rec n = (* n is position in input string *)
[ [] -> False match visual.(n) with
| segs -> does_conflict segs [ [] -> False (* will exit here when n is length of input *)
| segs -> does_conflict segs (* we search for conflicting segments *)
where rec does_conflict = fun where rec does_conflict = fun
[ [] -> is_conflicting_rec (l+1) [ [] -> is_conflicting_rec (n+1) (* go to next input position *)
| [ (w1,tr1,phase1,k1) :: rest ] -> | [ ((w',tr',ph',k') as segment') :: rest ] ->
if (w1,tr1,phase1,k1)=(w,tr,phase,k) if segment'=segment then (* skip itself *) does_conflict rest
then (* skip itself *) does_conflict rest else let l' = seg_length w' in
else let l_w1 = seg_length w1 in if (k'<=k && k'+l'-1>k) (* w inside w' *)
if (k1<=k && k1+l_w1-1>k) || (k'<=k && k'+l'-1>=k && l=1) (* w is a or aa *)
|| (k1<=k && k1+l_w1-1>=k && l_w=1)
(* This condition is necessary for the overlapping case *) (* This condition is necessary for the overlapping case *)
|| (k<=k1 && k+l_w-1>k1 && l_w1>1) then || (k<=k' && k+l-1>k') then
(* This condition refines [(k<=k1 && k+l_w-1>k1)] but is modified if k+l-1=k' then let r' = Word.mirror w' in match_tr tr
here to take care of cases such as elayati. We do not say that elayati (* This is to check for the overlapping case, occurs when [k=k', l=1].
(at k) conflicts with a segment aa (at the same offset). If it were
conflicting, there would have existed another segment, which would be
sufficient to prove the conflict. It also points to the fact that
conflicting is not a symmetric relation. We might have to include
a test as we did below *)
if k+l_w-1=k1 then match_tr tr
(* This is to check for the overlapping case, occurs when [k=k1, l_w=1].
We need to check the sandhi conditions to decide whether this is a case We need to check the sandhi conditions to decide whether this is a case
of overlap or conflict. *) of overlap or conflict. *)
where rec match_tr = fun where rec match_tr = fun
[ [] -> True [ [] -> True
| [ v :: rst ] -> match v with | [ v :: rst ] -> match v with
[ [] -> match_tr rst [ [] -> match_tr rst
| _ -> if Word.prefix v (Word.mirror w1) | _ -> if Word.prefix v r'
then does_conflict rest then does_conflict rest
else match_tr rst else match_tr rst
] ]
] ]
else if (k1<=k && k1+l_w1-1>=k && l_w=1) then match_tr1 tr1 else if (k'<=k && k'+l'-1>=k && l=1) then match_tr' tr'
(* For the case with [l_w=1], this is to check whether w is the only (* For the case with [l=1], this is to check whether w is the only
possible v for w1, then it is an overlap returning a blue sign. possible v for w', in which case it is an overlap returning a blue sign.
If w1 has any other possible v's, there is a conflict. *) If w' has any other possible v's, there is a conflict. *)
where rec match_tr1 = fun where rec match_tr' = fun
[ [] -> does_conflict rest
| [ v :: rst ] -> match v with
[ [] -> does_conflict rest [ [] -> does_conflict rest
| [ v :: rst ] -> Word.prefix v w || match_tr1 rst | _ -> if v = w then match_tr' rst else True
]
] ]
else True else True
else does_conflict rest else does_conflict rest
...@@ -378,10 +380,6 @@ value print_interf text cpts () = vgrec 0 ...@@ -378,10 +380,6 @@ value print_interf text cpts () = vgrec 0
value update_col_length chunk = value update_col_length chunk =
max_col.val := succ (max_col.val + Word.length chunk) max_col.val := succ (max_col.val + Word.length chunk)
; ;
value invoke_SL text cpts corpus_id count sent_id link_num =
ps (td_wrap (call_SL text cpts "t" corpus_id count sent_id link_num
^ "Sanskrit Library Interface"))
;
value update_text_with_sol text count = text ^ ";allSol=" ^ string_of_int count value update_text_with_sol text count = text ^ ";allSol=" ^ string_of_int count
; ;
value call_undo text cpts = value call_undo text cpts =
...@@ -639,7 +637,7 @@ value graph_engine () = do ...@@ -639,7 +637,7 @@ value graph_engine () = do
| Encode.In_error s -> abort lang "Wrong input " s | Encode.In_error s -> abort lang "Wrong input " s
| Exit (* Sanskrit *) -> abort lang "Wrong character in input" "" | Exit (* Sanskrit *) -> abort lang "Wrong character in input" ""
| Overflow -> abort lang "Maximum input size exceeded" "" | Overflow -> abort lang "Maximum input size exceeded" ""
| Invalid_argument s -> abort lang Control.fatal_err_mess s (* sub *) | Invalid_argument s -> abort lang Control.fatal_err_mess s (* sub array *)
| Failure s -> abort lang Control.fatal_err_mess s (* anomaly *) | Failure s -> abort lang Control.fatal_err_mess s (* anomaly *)
| End_of_file -> abort lang Control.fatal_err_mess "EOF" (* EOF *) | End_of_file -> abort lang Control.fatal_err_mess "EOF" (* EOF *)
| Not_found -> let s = "You must choose a parsing option" in | Not_found -> let s = "You must choose a parsing option" in
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment