Commit ff86ab64 authored by Gérard Huet's avatar Gérard Huet

Notation ! for mandatory break, useful for vocatives

parent 6a68ee11
...@@ -115,28 +115,23 @@ value sentence_links dir permission sentences = ...@@ -115,28 +115,23 @@ value sentence_links dir permission sentences =
match font with match font with
[ Multilingual.Deva -> Corpus.Encoding.Devanagari [ Multilingual.Deva -> Corpus.Encoding.Devanagari
| Multilingual.Roma -> Corpus.Encoding.IAST | Multilingual.Roma -> Corpus.Encoding.IAST
] ] in
in
let text = Corpus.Sentence.text encoding sentence in let text = Corpus.Sentence.text encoding sentence in
let display = let display =
match font with match font with
[ Multilingual.Deva -> deva16_blue [ Multilingual.Deva -> deva16_blue
| Multilingual.Roma -> span Trans16 | Multilingual.Roma -> span Trans16
] ] in
in
text text
|> anchor_ref (sentence |> Web_corpus.url dir permission |> escape) |> anchor_ref (sentence |> Web_corpus.url dir permission |> escape)
|> display |> display in
in
List.map to_anchor_ref sentences List.map to_anchor_ref sentences
; ;
value section_selection dir sections = value section_selection dir sections =
let options = let options =
let prefixes = let prefixes =
List.map (fun x -> Filename.concat dir x) sections List.map (fun x -> Filename.concat dir x) sections in
in List.combine prefixes sections in
List.combine prefixes sections
in
option_select_label Params.corpus_dir options option_select_label Params.corpus_dir options
; ;
value add_sentence_form dir permission gap = value add_sentence_form dir permission gap =
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Idir Lankri *) (* Idir Lankri *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* CGI script [manager] for corpus management, i.e. for listing and (* CGI script [manager] for corpus management, i.e. for listing and
...@@ -12,16 +12,14 @@ ...@@ -12,16 +12,14 @@
value main = value main =
let env = Cgi.create_env (Cgi.query_string ()) in let env = Cgi.create_env (Cgi.query_string ()) in
let corpdir = Cgi.decoded_get Params.corpus_dir "" env in let corpdir = Cgi.decoded_get Params.corpus_dir "" env
let permission = and corpperm = Cgi.decoded_get Params.corpus_permission "" env in
Web_corpus.permission_of_string (Cgi.decoded_get Params.corpus_permission "" env) let permission = Web_corpus.permission_of_string corpperm in
in let lang = Html.default_language in
try try
Corpus_manager.mk_page corpdir permission Corpus_manager.mk_page corpdir permission
with with
[ Sys_error msg -> Web.abort Html.default_language Control.sys_err_mess msg [ Sys_error msg -> Web.abort lang Control.sys_err_mess msg
| _ -> | _ -> Web.abort lang Control.fatal_err_mess "Unexpected anomaly"
Web.abort Html.default_language Control.fatal_err_mess
"Unexpected anomaly"
] ]
; ;
...@@ -38,7 +38,7 @@ value xml_next op = xml_end op ^ xml_begin op ...@@ -38,7 +38,7 @@ value xml_next op = xml_end op ^ xml_begin op
value html_break = xml_empty "br" value html_break = xml_empty "br"
and html_paragraph = xml_begin "p" ^ xml_end "p" and html_paragraph = xml_begin "p" ^ xml_end "p"
; ;
(* array operations *) (* Array operations *)
value tr_begin = xml_begin "tr" value tr_begin = xml_begin "tr"
and tr_end = xml_end "tr" and tr_end = xml_end "tr"
and th_begin = xml_begin "th" and th_begin = xml_begin "th"
...@@ -124,13 +124,11 @@ value int_input ?id ?val ?(step = 1) ?(min = min_int) ?(max = max_int) name = ...@@ -124,13 +124,11 @@ value int_input ?id ?val ?(step = 1) ?(min = min_int) ?(max = max_int) name =
; ("step", string_of_int step) ; ("step", string_of_int step)
; ("min", string_of_int min) ; ("min", string_of_int min)
; ("max", string_of_int max) ; ("max", string_of_int max)
] ] in
in
let opt_attrs = let opt_attrs =
[ ("id", id) [ ("id", id)
; ("value", Gen.opt_app string_of_int val) ; ("value", Gen.opt_app string_of_int val)
] ] in
in
let attrs = add_opt_attrs opt_attrs attrs in let attrs = add_opt_attrs opt_attrs attrs in
xml_empty_with_att "input" attrs xml_empty_with_att "input" attrs
; ;
...@@ -172,24 +170,16 @@ value li ?id item = ...@@ -172,24 +170,16 @@ value li ?id item =
(* Ordered list *) (* Ordered list *)
value ol ?id ?li_id_prefix ?(start = 1) items = value ol ?id ?li_id_prefix ?(start = 1) items =
let ol = "ol" in let ol = "ol" in
let items = let process i item =
List.mapi (fun i item -> let id = let genid prefix = prefix ^ string_of_int (start + i) in
let id = Gen.opt_app genid li_id_prefix in
let genid prefix = prefix ^ string_of_int (start + i) in li ?id item in
Gen.opt_app genid li_id_prefix let lines = List.mapi process items in
in let list = String.concat "\n" lines in
li ?id item
) items
in
let list = String.concat "\n" items in
let attrs = let attrs =
add_opt_attrs [ ("id", id) ] [ ("start", string_of_int start) ] add_opt_attrs [ ("id", id) ] [ ("start", string_of_int start) ] in
in xml_begin_with_att ol attrs ^ "\n" ^ list ^ "\n" ^ xml_end ol
xml_begin_with_att ol attrs ^ "\n" ^
list ^ "\n" ^
xml_end ol
; ;
value fieldn name content = [ ("name",name); ("content",content) ] value fieldn name content = [ ("name",name); ("content",content) ]
and fieldp name content = [ ("property",name); ("content",content) ] and fieldp name content = [ ("property",name); ("content",content) ]
; ;
...@@ -203,22 +193,6 @@ type color = ...@@ -203,22 +193,6 @@ type color =
| Gold | Magenta | Mauve | Pink | Salmon | Lime | Light_blue | Lavender | Gold | Magenta | Mauve | Pink | Salmon | Lime | Light_blue | Lavender
| Lawngreen | Deep_pink | Pale_yellow | Pale_rose | Beige ] | Lawngreen | Deep_pink | Pale_yellow | Pale_rose | Beige ]
; ;
(* TO be relocated [
type pict = string (* misc background pictures *)
[ Om | Om2 | Om3 | Om4 | Gan | Hare | Geo ]
;
(* Deprecated, for use as background pictures like in the ancient Web... *)
(* Problematic, since pollutes with installation-dependent URLs *)
value pict = fun
[ "Om" -> Install.om_jpg
| "Om2" -> Install.om2_jpg
| "Om3" -> Install.om3_jpg
| "Om4" -> Install.om4_jpg
| "Gan" -> Install.ganesh_gif
| "Geo" -> Install.geo_gif
| "Hare" -> Install.hare_jpg
]
;] *)
type basic_style = type basic_style =
[ Font_family of font_family [ Font_family of font_family
| Font_style of font_style | Font_style of font_style
...@@ -357,7 +331,6 @@ type style_class = ...@@ -357,7 +331,6 @@ type style_class =
| Deep_sky_back | Carmin_back | Orange_back | Red_back | Mauve_back | Deep_sky_back | Carmin_back | Orange_back | Red_back | Mauve_back
| Lavender_back | Lavender_cent | Green_back | Lawngreen_back | Magenta_back | Lavender_back | Lavender_cent | Green_back | Lawngreen_back | Magenta_back
| Aquamarine_back | Hidden_ | Aquamarine_back | Hidden_
(*[ | Pict_om | Pict_om2 | Pict_om3 | Pict_om4 | Pict_gan | Pict_hare | Pict_geo ]*)
] ]
; ;
value background = fun value background = fun
...@@ -470,7 +443,7 @@ value styles = fun ...@@ -470,7 +443,7 @@ value styles = fun
] ]
; ;
(* Compiles a class into its style for non-css compliant browsers *) (* Compiles a class into its style for non-css compliant browsers *)
(* Mostly used by Css to compile the css style sheet *) (* Nowadays mostly used by Css to compile the css style sheet *)
value style cla = String.concat "; " (List.map style_sheet (styles cla)) value style cla = String.concat "; " (List.map style_sheet (styles cla))
; ;
value class_of = fun value class_of = fun
...@@ -549,7 +522,7 @@ value class_of = fun ...@@ -549,7 +522,7 @@ value class_of = fun
(* This support was necessary for Simputer platform *) (* This support was necessary for Simputer platform *)
value elt_begin_attrs attrs elt cl = value elt_begin_attrs attrs elt cl =
let style_attr = (* if Install.css then *) ("class",class_of cl) let style_attr = (* if Install.css then *) ("class",class_of cl)
(* else ("style",style cl) *) in (* else ("style",style cl) *) in
xml_begin_with_att elt [ style_attr :: attrs ] xml_begin_with_att elt [ style_attr :: attrs ]
; ;
value elt_begin = elt_begin_attrs [] value elt_begin = elt_begin_attrs []
...@@ -564,7 +537,7 @@ and div_begin = elt_begin "div" ...@@ -564,7 +537,7 @@ and div_begin = elt_begin "div"
and body_begin = elt_begin "body" and body_begin = elt_begin "body"
and body_begin_style = elt_begin_attrs margins "body" (* Body margins are null *) and body_begin_style = elt_begin_attrs margins "body" (* Body margins are null *)
where margins = [ ("style","margin-left: 0; margin-right: 0; margin-top: 0;") ] where margins = [ ("style","margin-left: 0; margin-right: 0; margin-top: 0;") ]
(* [table_begin_style] not compliant with HTML5 (dynamic style) *) (* Caution: [table_begin_style] is not compliant with HTML5 (dynamic style) *)
and table_begin_style style attrs = elt_begin_attrs attrs "table" style and table_begin_style style attrs = elt_begin_attrs attrs "table" style
and table_begin = elt_begin "table" and table_begin = elt_begin "table"
and td_begin_class = elt_begin "td" and td_begin_class = elt_begin "td"
...@@ -594,15 +567,13 @@ value span style text = span_begin style ^ text ^ span_end ...@@ -594,15 +567,13 @@ value span style text = span_begin style ^ text ^ span_end
and span_skt style text = span_skt_begin style ^ text ^ span_end and span_skt style text = span_skt_begin style ^ text ^ span_end
and div style text = div_begin style ^ text ^ div_end and div style text = div_begin style ^ text ^ div_end
; ;
value center = div Center_ (* Centering old style - deprecated *)
and center_begin = div_begin Center_ value center = div Center_
and center_begin = div_begin Center_
and center_end = div_end and center_end = div_end
; ;
value center_image name caption = value html_red = span Red_
center (xml_empty_with_att "img" [ ("src",name); ("alt",caption) ]) and html_devared = span_skt Devared_
;
value html_red = span Red_
and html_devared = span_skt Devared_
and html_magenta = span Magenta_ and html_magenta = span Magenta_
and html_blue = span Blue_ and html_blue = span Blue_
and html_green = span Green_ and html_green = span Green_
...@@ -666,11 +637,10 @@ value author = fieldn "author" author_name ...@@ -666,11 +637,10 @@ value author = fieldn "author" author_name
and date_copyrighted = fieldp "dc:datecopyrighted" current_year and date_copyrighted = fieldp "dc:datecopyrighted" current_year
and rights_holder = fieldp "dc:rightsholder" author_name and rights_holder = fieldp "dc:rightsholder" author_name
and keywords = fieldn "keywords" and keywords = fieldn "keywords"
"dictionary,sanskrit,heritage,dictionnaire,sanscrit,india,inde,indology,linguistics,panini,digital humanities,cultural heritage,computational linguistics,hypertext lexicon" "sanskrit,dictionary,heritage,dictionnaire,sanscrit,india,inde,indology,linguistics,panini,digital humanities,digital libraries,cultural heritage,computational linguistics,hypertext lexicon"
; ;
value heritage_dictionary_title = title "Sanskrit Heritage Dictionary" value heritage_dictionary_title = title "Sanskrit Heritage Dictionary"
; ;
(* was in Install *)
(* Supported publishing media *) (* Supported publishing media *)
type medium = [ Html | Tex ] type medium = [ Html | Tex ]
; ;
...@@ -680,7 +650,7 @@ type platform = [ Simputer | Computer | Station | Server ] ...@@ -680,7 +650,7 @@ type platform = [ Simputer | Computer | Station | Server ]
(* Current target platform to customize - needs recompiling if changed *) (* Current target platform to customize - needs recompiling if changed *)
value target = match Paths.platform with value target = match Paths.platform with
[ "Simputer" -> Simputer (* Historical - small screen *) [ "Simputer" -> Simputer (* Historical - small screen *)
| "Smartphone" -> Simputer (* Smartphone version not implemented yet *) | "Smartphone" | "Tablet" -> Simputer (* TODO *)
| "Computer" -> Computer (* Standard client installation *) | "Computer" -> Computer (* Standard client installation *)
| "Station" -> Station (* Permits external Analysis mode *) | "Station" -> Station (* Permits external Analysis mode *)
| "Server" -> Server (* Http server for Internet web services *) | "Server" -> Server (* Http server for Internet web services *)
...@@ -702,7 +672,7 @@ type language = [ French | English ] ...@@ -702,7 +672,7 @@ type language = [ French | English ]
(* Two indexing lexicons are supported, French SH and English MW.*) (* Two indexing lexicons are supported, French SH and English MW.*)
value lexicon_of = fun value lexicon_of = fun
[ French -> "SH" (* Sanskrit Heritage *) [ French -> "SH" (* Sanskrit Heritage *)
| English -> "MW" (* Monier-Williams *) | English -> "MW" (* Monier-Williams *)
] ]
and language_of = fun and language_of = fun
[ "SH" -> French [ "SH" -> French
......
...@@ -28,7 +28,7 @@ value admits_aa = ref False ...@@ -28,7 +28,7 @@ value admits_aa = ref False
and admits_lopa = ref False and admits_lopa = ref False
; ;
value morpho_gen = ref True (* morphology generation time *) value morpho_gen = ref True (* morphology generation time *)
(* Turn to [False] for cgi execution (fake conjugation and nophantoms) *) (* Turn to [False] for cgi execution (fake conjugation and no phantoms) *)
; ;
(* The [inflected_map] lexicons of inflected forms: nouns, iics, etc are computed (* The [inflected_map] lexicons of inflected forms: nouns, iics, etc are computed
by [Make_nouns] and are dumped as persistent global databases nouns.rem etc. by [Make_nouns] and are dumped as persistent global databases nouns.rem etc.
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
(* ©2018 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(* Sanskrit Reader Summarizing interface. *) (* Sanskrit Reader Summarizing interface. Yields sktgraph.cgi *)
(* We construct a CGI Interface displaying the segmentation graph in which the (* We construct a CGI Interface displaying the segmentation graph in which the
user may indicate segments as mandatory checkpoints. At any point he may user may indicate segments as mandatory checkpoints. At any point he may
...@@ -20,9 +20,9 @@ open Graph_segmenter; (* [Segment cur_chunk set_cur_offset graph visual] *) ...@@ -20,9 +20,9 @@ open Graph_segmenter; (* [Segment cur_chunk set_cur_offset graph visual] *)
open Phases; (* [Phases] *) open Phases; (* [Phases] *)
open Phases; (* [phase is_cache generative] *) open Phases; (* [phase is_cache generative] *)
open Dispatcher; (* [transducer_vect phase Dispatch transition trim_tags] *) open Dispatcher; (* [transducer_vect phase Dispatch transition trim_tags] *)
open Html; open Html; (* html constructors *)
open Web; (* [ps pl abort reader_cgi scl_toggle] etc. *) open Web; (* [ps pl abort reader_cgi scl_toggle] etc. *)
open Cgi; open Cgi; (* [url get decode_url] *)
module Prel = struct (* Interface's lexer prelude *) module Prel = struct (* Interface's lexer prelude *)
...@@ -179,8 +179,8 @@ value sort_check cpts = ...@@ -179,8 +179,8 @@ value sort_check cpts =
List.sort compare_index cpts List.sort compare_index cpts
; ;
value seg_length = fun value seg_length = fun
[ [ -2 :: rest ] -> Word.length rest [ [ -2 :: rest ] -> Word.length rest (* lopa does not count *)
| w -> Word.length w | w -> Word.length w
] ]
; ;
value rec merge_rec lpw = fun value rec merge_rec lpw = fun
...@@ -433,7 +433,7 @@ value check_sentence translit us text_orig checkpoints sentence ...@@ -433,7 +433,7 @@ value check_sentence translit us text_orig checkpoints sentence
; td_wrap (call_undo text checkpoints ^ "Undo") |> ps ; td_wrap (call_undo text checkpoints ^ "Undo") |> ps
; let call_scl_parser n = (* invocation of scl parser *) ; let call_scl_parser n = (* invocation of scl parser *)
if scl_toggle then if scl_toggle then
ps (td_wrap (call_reader text cpts "o" ^ "UoH Analysis Mode")) td_wrap (call_reader text cpts "o" ^ "UoH Analysis Mode") |> ps
else () (* [scl_parser] is not visible unless toggle is set *) in else () (* [scl_parser] is not visible unless toggle is set *) in
match count with match count with
[ Num.Int n -> if n > max_count then [ Num.Int n -> if n > max_count then
...@@ -498,7 +498,7 @@ value make_cache_transducer (cache : Morphology.inflected_map) = ...@@ -498,7 +498,7 @@ value make_cache_transducer (cache : Morphology.inflected_map) =
; Gen.dump auto_cache public_transca_file (* for [Load_transducers] *) ; Gen.dump auto_cache public_transca_file (* for [Load_transducers] *)
} }
; ;
(* We fill gendered entries incrementally in a [public_cache_txt_file] *) (* We fill gendered entries incrementally in [public_cache_txt_file] *)
value append_cache entry gender = value append_cache entry gender =
let cho = open_out_gen [ Open_wronly; Open_append; Open_text ] 0o777 let cho = open_out_gen [ Open_wronly; Open_append; Open_text ] 0o777
public_cache_txt_file in do public_cache_txt_file in do
...@@ -523,7 +523,7 @@ value quit_button corpmode corpdir sentno = ...@@ -523,7 +523,7 @@ value quit_button corpmode corpdir sentno =
]) ])
and permission = Web_corpus.string_of_permission corpmode in and permission = Web_corpus.string_of_permission corpmode in
center_begin ^ center_begin ^
cgi_begin (Cgi.url corpus_manager_cgi ~fragment:sentno) "" ^ cgi_begin (url corpus_manager_cgi ~fragment:sentno) "" ^
hidden_input Params.corpus_dir corpdir ^ hidden_input Params.corpus_dir corpdir ^
hidden_input Params.corpus_permission permission ^ hidden_input Params.corpus_permission permission ^
submit_input submit_button_label ^ submit_input submit_button_label ^
...@@ -535,7 +535,13 @@ value graph_engine () = do ...@@ -535,7 +535,13 @@ value graph_engine () = do
{ Prel.prelude () { Prel.prelude ()
; let query = Sys.getenv "QUERY_STRING" in ; let query = Sys.getenv "QUERY_STRING" in
let env = create_env query in let env = create_env query in
let url_encoded_input = get "text" env "" (* Multiple environment variables according to modes of use are:
text topic st cp us t lex cache abs cpts (standard mode)
allSol (deprecated Validate mode)
corpus sentenceNumber linkNumber (Corpus mode)
corpdir sentno corpmode (defined in Params)
guess gender revised rev_off rev_ind (User-aid) *)
let url_encoded_input = get "text" env ""
and url_encoded_topic = get "topic" env "" (* topic carry-over *) and url_encoded_topic = get "topic" env "" (* topic carry-over *)
and st = get "st" env "t" (* sentence parse default *) and st = get "st" env "t" (* sentence parse default *)
and cp = get "cp" env "t" (* complete mode default *) and cp = get "cp" env "t" (* complete mode default *)
...@@ -555,14 +561,14 @@ value graph_engine () = do ...@@ -555,14 +561,14 @@ value graph_engine () = do
and sent_id = get "sentenceNumber" env "0" and sent_id = get "sentenceNumber" env "0"
and link_num = get "linkNumber" env "0" (* is there a better default? *) and link_num = get "linkNumber" env "0" (* is there a better default? *)
and sol_num = get "allSol" env "0" in (* Needed for Validate mode *) and sol_num = get "allSol" env "0" in (* Needed for Validate mode *)
let url_enc_corpus_permission = let url_enc_corpus_permission = (* Corpus mode *)
Cgi.get Params.corpus_permission env "true" in get Params.corpus_permission env "true" in
let corpus_permission = let corpus_permission =
url_enc_corpus_permission url_enc_corpus_permission
|> Cgi.decode_url |> decode_url
|> Web_corpus.permission_of_string in |> Web_corpus.permission_of_string in
let corpus_dir = Cgi.get Params.corpus_dir env "" in let corpus_dir = get Params.corpus_dir env "" in
let sentence_no = Cgi.get Params.sentence_no env "" in let sentence_no = get Params.sentence_no env "" in
let text = arguments translit lex cache st us cp url_encoded_input let text = arguments translit lex cache st us cp url_encoded_input
url_encoded_topic abs sol_num corpus sent_id link_num url_encoded_topic abs sol_num corpus sent_id link_num
url_enc_corpus_permission corpus_dir sentence_no url_enc_corpus_permission corpus_dir sentence_no
...@@ -570,9 +576,10 @@ value graph_engine () = do ...@@ -570,9 +576,10 @@ value graph_engine () = do
try let url_encoded_cpts = List.assoc "cpts" env in (* do not use get *) try let url_encoded_cpts = List.assoc "cpts" env in (* do not use get *)
parse_cpts (decode_url url_encoded_cpts) parse_cpts (decode_url url_encoded_cpts)
with [ Not_found -> [] ] with [ Not_found -> [] ]
and guess_morph = decode_url (get "guess" env "") and guess_morph = decode_url (get "guess" env "") (* User-aid guessing *)
and pseudo_gender = decode_url (get "gender" env "") in and pseudo_gender = decode_url (get "gender" env "") in
let _ = if String.length guess_morph > 0 && Paths.platform="Station" then let _ = if String.length guess_morph > 0 && Paths.platform="Station" then
(* User-aid cache acquisition *)
let (entry,gender) = match pseudo_gender with let (entry,gender) = match pseudo_gender with
[ "" -> parse_guess guess_morph [ "" -> parse_guess guess_morph
| g -> (guess_morph,g) | g -> (guess_morph,g)
...@@ -583,14 +590,15 @@ value graph_engine () = do ...@@ -583,14 +590,15 @@ value graph_engine () = do
make_cache_transducer cache make_cache_transducer cache
} }
else () in else () in
let revised = decode_url (get "revised" env "") let revised = decode_url (get "revised" env "") (* User-aid revision *)
and rev_off = int_of_string (get "rev_off" env "-1") and rev_off = int_of_string (get "rev_off" env "-1")
and rev_ind = int_of_string (get "rev_ind" env "-1") in and rev_ind = int_of_string (get "rev_ind" env "-1") in
try do try do
{ match (revised,rev_off,rev_ind) with { match (revised,rev_off,rev_ind) with
[ ("",-1,-1) -> check_sentence translit uns text checkpoints [ ("",-1,-1) -> (* Standard input processing *** main call *** *)
input sol_num corpus sent_id link_num check_sentence translit uns text checkpoints input sol_num
| (new_word,word_off,chunk_ind) -> corpus sent_id link_num
| (new_word,word_off,chunk_ind) (* User-aid revision *) ->
let chunks = Sanskrit.read_sanskrit (Encode.switch_code translit) input in let chunks = Sanskrit.read_sanskrit (Encode.switch_code translit) input in
let rec decoded init ind = fun let rec decoded init ind = fun
[ [] -> String.sub init 0 ((String.length init)-1) [ [] -> String.sub init 0 ((String.length init)-1)
...@@ -606,8 +614,8 @@ value graph_engine () = do ...@@ -606,8 +614,8 @@ value graph_engine () = do
| [ a :: rest ] -> if cur_ind = chunk_ind then Word.length a | [ a :: rest ] -> if cur_ind = chunk_ind then Word.length a
else find_word_len (cur_ind+1) rest else find_word_len (cur_ind+1) rest
] in ] in
let word_len = find_word_len 1 chunks in let word_len = find_word_len 1 chunks
let new_chunk_len = Word.length (Encode.switch_code translit revised) in and new_chunk_len = Word.length (Encode.switch_code translit revised) in
let diff = new_chunk_len-word_len in let diff = new_chunk_len-word_len in
let revised_check = let revised_check =
let revise (k,sec,sel) = (if k<word_off then k else k+diff,sec,sel) in let revise (k,sec,sel) = (if k<word_off then k else k+diff,sec,sel) in
...@@ -635,8 +643,8 @@ value graph_engine () = do ...@@ -635,8 +643,8 @@ value graph_engine () = do
(* Quit button: continue reading (reader mode) (* Quit button: continue reading (reader mode)
or quit without saving (annotator mode) *) or quit without saving (annotator mode) *)
; if sentence_no <> "" then ; if sentence_no <> "" then
quit_button corpus_permission quit_button corpus_permission
(Cgi.decode_url corpus_dir) (Cgi.decode_url sentence_no) |> pl (decode_url corpus_dir) (decode_url sentence_no) |> pl
else () else ()
; close_page_with_margin () ; close_page_with_margin ()
; page_end lang True ; page_end lang True
......
...@@ -4,10 +4,13 @@ ...@@ -4,10 +4,13 @@
(* *) (* *)
(* Idir Lankri *) (* Idir Lankri *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
value abort report_error status = (* This is an unfinished attempt to fiter out citations from Heritage
and make a corpus document from it - unused at present *)
value abort report_error status =
do do
{ report_error () { report_error ()
; exit status ; exit status
......
...@@ -68,7 +68,7 @@ value voices_of = fun ...@@ -68,7 +68,7 @@ value voices_of = fun
| "sad#1" | "sap#1" | "saa#1" | "sidh#1" | "sidh#2" | "siiv" | "sur" | "s.r" | "sad#1" | "sap#1" | "saa#1" | "sidh#1" | "sidh#2" | "siiv" | "sur" | "s.r"
| "s.rj#1" | "s.rp" | "skand" | "skhal" | "stan" | "stubh" | "sthag" | "snaa" | "s.rj#1" | "s.rp" | "skand" | "skhal" | "stan" | "stubh" | "sthag" | "snaa"
| "snih#1" | "snu" | "snuh#1" | "sp.r" | "sphal" | "sphu.t" | "sphur" | "snih#1" | "snu" | "snuh#1" | "sp.r" | "sphal" | "sphu.t" | "sphur"
| "sm.r" | "sru" | "svan" | "svap" | "svar#1" | "svar#2" | "ha.th" | "has" | "sm.r" | "sru" | "svan" | "svap" | "svar#1" | "svar#2" | "ha.th"
| "haa#1" | "hi#2" | "hi.ms" | "h.r.s" | "hras" | "hrii#1" | "hval" | "haa#1" | "hi#2" | "hi.ms" | "h.r.s" | "hras" | "hrii#1" | "hval"
| "maarg" (* root rather than nominal verb *) | "maarg" (* root rather than nominal verb *)
(*| "viz#1" Atma needed for eg nivizate \Pan{1,3,17} *) (*| "viz#1" Atma needed for eg nivizate \Pan{1,3,17} *)
...@@ -80,6 +80,7 @@ value voices_of = fun ...@@ -80,6 +80,7 @@ value voices_of = fun
(*| "mah" Atma needed for pft. maamahe *) (*| "mah" Atma needed for pft. maamahe *)
(*| "cit#1" Atma needed for pft. cikite *) (*| "cit#1" Atma needed for pft. cikite *)
(*| "kaafk.s" | "han#1" occur also in Atma in BhG: kaafk.se hani.sye *) (*| "kaafk.s" | "han#1" occur also in Atma in BhG: kaafk.se hani.sye *)
(*| "has" Atma needed for hasate *)
(*| "a~nj" also Atma afkte | "naath" "praz" "sp.rz#1" idem *) (*| "a~nj" also Atma afkte | "naath" "praz" "sp.rz#1" idem *)
-> Para (* active only *) -> Para (* active only *)
| "az#1" | "aas#2" | "indh" | "iik.s" | "ii.d" | "iir" | "iiz#1" | "ii.s" | "az#1" | "aas#2" | "indh" | "iik.s" | "ii.d" | "iir" | "iiz#1" | "ii.s"
......
...@@ -100,7 +100,7 @@ value adjust c w = match Word.mirror w with ...@@ -100,7 +100,7 @@ value adjust c w = match Word.mirror w with
| _ -> if c=36 (* n *) || c=41 (* m *) | _ -> if c=36 (* n *) || c=41 (* m *)
then raise Glue (* since d|m->nn and n|m -> nm *) then raise Glue (* since d|m->nn and n|m -> nm *)
(* Word.mirror [ 32 :: rest ] (* n -> t *) *) (* Word.mirror [ 32 :: rest ] (* n -> t *) *)
(* incomplétude: raajan naasiin vocatif raajan *) (* incompleteness: raajan naasiin vocatif raajan *)
else w else w
] ]
| 22 (* c *) -> if c=22 then Word.mirror [ 32 :: rest ] (* c -> t *) | 22 (* c *) -> if c=22 then Word.mirror [ 32 :: rest ] (* c -> t *)
...@@ -154,6 +154,8 @@ value adjust c w = match Word.mirror w with ...@@ -154,6 +154,8 @@ value adjust c w = match Word.mirror w with
] ]
] ]
; ;
(* Called from [Sanskrit.read_processed_skt_stream] for use in [read_sanskrit]
with argument [read_chunk=sanskrit_chunk encode] *)
value padapatha read_chunk l = (* l is list of chunks separated by blanks *) value padapatha read_chunk l = (* l is list of chunks separated by blanks *)
(* returns padapatha as list of forms in terminal sandhi *) (* returns padapatha as list of forms in terminal sandhi *)
let rec pad_rec = fun (* returns (c,l) with c first char of first pada in l *) let rec pad_rec = fun (* returns (c,l) with c first char of first pada in l *)
......
...@@ -65,6 +65,7 @@ EXTEND Gramskt ...@@ -65,6 +65,7 @@ EXTEND Gramskt
[ [ s = skt; `EOI -> s ] ] ; [ [ s = skt; `EOI -> s ] ] ;
pada: (* non-empty list of chunks separated by blanks *) pada: (* non-empty list of chunks separated by blanks *)
[ [ el = LIST1 skt -> el ] ] ; [ [ el = LIST1 skt -> el ] ] ;
(*i deprecated
sloka_line: sloka_line:
[ [ p = pada; "|"; "|" -> [ p ] [ [ p = pada; "|"; "|" -> [ p ]
| p = pada; "|"; sl = sloka_line -> [ p :: sl ] | p = pada; "|"; sl = sloka_line -> [ p :: sl ]
...@@ -73,7 +74,7 @@ EXTEND Gramskt ...@@ -73,7 +74,7 @@ EXTEND Gramskt
[ [ p = pada; "|"; sl = sloka_line -> [ p :: sl ] [ [ p = pada; "|"; sl = sloka_line -> [ p :: sl ]
| p = pada -> [ p ] | p = pada -> [ p ]
| `EOI -> failwith "Empty sanskrit input" | `EOI -> failwith "Empty sanskrit input"
] ] ; ] ] ; *)
sanscrit: sanscrit:
[ [ p = pada; "|"; "|" -> [ p ] [ [ p = pada; "|"; "|" -> [ p ]
| p = pada; "|"; sl = sanscrit -> [ p :: sl ] | p = pada; "|"; sl = sanscrit -> [ p :: sl ]
...@@ -162,21 +163,6 @@ value read_processed_skt_stream encode strm = ...@@ -162,21 +163,6 @@ value read_processed_skt_stream encode strm =
where concat line lines = process line @ lines where concat line lines = process line @ lines
] ]
; ;
(* assumes Velthuis encoding *)
value read_corpus unsandhied chi = (* only used by Tagger1 *)
let encode = Transduction.code_raw (* unnormalized input from stream *)
and channel = Stream.of_channel chi
and reader = if unsandhied then read_raw_skt_stream
else read_processed_skt_stream in
reader encode channel
;
value read_VH unsandhied str =
let encode = Encode.code_string (* normalized input from string *)
and channel = Stream.of_string str
and reader = if unsandhied then read_raw_skt_stream
else read_processed_skt_stream in
reader encode channel
;
(* Now general readers with encoding parameter of type [string -> word] *) (* Now general readers with encoding parameter of type [string -> word] *)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
(* *) (* *)
(* Gérard Huet *) (* Gérard Huet *)
(* *) (* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *) (* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************) (**************************************************************************)
(*i module Sanskrit : sig i*) (*i module Sanskrit : sig i*)
...@@ -43,8 +43,6 @@ value normal_stem_skt : skt -> string; ...@@ -43,8 +43,6 @@ value normal_stem_skt : skt -> string;
value code_skt_ref : skt -> Word.word; value code_skt_ref : skt -> Word.word;
value code_skt_ref_d : skt -> Word.word; value code_skt_ref_d : skt -> Word.word;
value decode_skt : Word.word -> skt; value decode_skt : Word.word -> skt;
value read_corpus : bool -> in_channel -> list Word.word;
value read_VH : bool -> string -> list Word.word;
value read_sanskrit : (string -> Word.word) -> string -> list Word.word; value read_sanskrit : (string -> Word.word) -> string -> list Word.word;
value read_raw_sanskrit : (string -> Word.word) -> string -> list Word.word; value read_raw_sanskrit : (string -> Word.word) -> string -> list Word.word;
......
...@@ -42,7 +42,7 @@ open Format; ...@@ -42,7 +42,7 @@ open Format;
; ;
value rec filter is_kwd = parser value rec filter is_kwd = parser
[ [: `((KEYWORD s, loc) as p); strm :] -> [ [: `((KEYWORD s, loc) as p); strm :] ->
if is_kwd s then [: `p; filter is_kwd strm :] if is_kwd s || s = "!" then [: `p; filter is_kwd strm :]
else raise (Encode.In_error ("Undefined token : " ^ s)) else raise (Encode.In_error ("Undefined token : " ^ s))
| [: `x; s :] -> [: `x; filter is_kwd s :] | [: `x; s :] -> [: `x; filter is_kwd s :]
| [: :] -> [: :] | [: :] -> [: :]
......
...@@ -1200,7 +1200,7 @@ value compute_active_present2 sstem wstem set entry third = do ...@@ -1200,7 +1200,7 @@ value compute_active_present2 sstem wstem set entry third = do
; match wstem with ; match wstem with
[ [ 2 :: _ ] -> (* Ppr of roots in -aa is complex and overgenerates *) [ [ 2 :: _ ] -> (* Ppr of roots in -aa is complex and overgenerates *)
match entry with match entry with
[ "maa#1" | "yaa#1" -> () (* no ppra *) [ "bhaa#1" | "maa#1" | "yaa#1" -> () (* no known ppra *)
| _ -> let m_pstem = wstem and f_pstem = rev (fix2w wstem "at" set) in | _ -> let m_pstem = wstem and f_pstem = rev (fix2w wstem "at" set) in
record_part (Ppra_ 2 Primary m_pstem f_pstem entry) record_part (Ppra_ 2 Primary m_pstem f_pstem entry)
] ]
......
...@@ -8,4 +8,4 @@ ...@@ -8,4 +8,4 @@
(**************************************************************************) (**************************************************************************)
(* Generated by make version - see main Makefile *) (* Generated by make version - see main Makefile *)
value version="3.05" and version_date="2018-03-01"; value version="3.05" and