diff --git a/ML/conjugation.ml b/ML/conjugation.ml index 7927272ce14d00e46feabce9d436fce91fde30a0..e8aacd79a861655fd2b628e53b4870dd55410121 100644 --- a/ML/conjugation.ml +++ b/ML/conjugation.ml @@ -18,11 +18,11 @@ (*i executable module Conjugation = struct i*) open Skt_morph; -open Morphology; (* inflected [Verb_form] etc. *) +open Morphology; (* [inflected Verb_form] etc. *) open Conj_infos; (* [vmorph Causa Inten Desid root_infos] *) -open Inflected; (* roots.val indecls.val etc. *) +open Inflected; (* [roots.val indecls.val] etc. *) open Html; -open Web; (* ps pl font Deva Roma pr_font etc. *) +open Web; (* [ps pl font Deva Roma pr_font] etc. *) open Cgi; open Multilingual; (* [gentense tense_name captions] *) @@ -843,7 +843,8 @@ value conjs_engine () = do try let url_encoded_entry = List.assoc "q" env and url_encoded_class = List.assoc "c" env - and font = font_of_string (get "font" env Paths.default_display_font) + and font = let s = get "font" env Paths.default_display_font in + font_of_string s (* deva vs roma print *) (* OBS and stamp = get "v" env "" *) and translit = get "t" env "VH" (* DICO created in VH trans *) and lex = get "lex" env "SH" (* default Heritage *) in @@ -872,7 +873,7 @@ value conjs_engine () = do let entry = resolve_homonym entry_VH gana in (* VH string with homo *) let known = in_lexicon entry (* in lexicon? *) (* we should check it is indeed a root or denominative *) in do - { let link = if known then Morpho_html.skt_anchor False font entry + { let link = if known then Morpho_html.skt_anchor False entry else doubt (Morpho_html.skt_roma entry) in let subtitle = hyperlink_title font link in display_subtitle (h1_center subtitle) diff --git a/ML/declension.ml b/ML/declension.ml index 0f76b197ca547f2b1d0ce7ed18954ca9c71af1d9..43af5cbeb6a0767980afb6122901bfaf0c403861 100644 --- a/ML/declension.ml +++ b/ML/declension.ml @@ -19,8 +19,8 @@ open Skt_morph; open Morphology; (* [Noun_form] etc. *) open Html; (* [narrow_screen html_red] etc. *) -open Web; (* ps pl font Deva Roma pr_font etc. *) -open Cgi; (* [create_env] etc. *) +open Web; (* [ps pl font Deva Roma pr_font] etc. *) +open Cgi; (* [create_env] etc. *) open Multilingual; (* [declension_title compound_name avyaya_name] *) value dtitle font = h1_title (declension_title narrow_screen font) @@ -46,7 +46,7 @@ value display_subtitle title = do ; title |> ps ; th_end |> ps ; tr_end |> ps - ; table_end |> pl (* Centered *) + ; table_end |> pl (* centered *) ; html_paragraph |> pl } ; @@ -132,19 +132,19 @@ value display_iic font = fun | l -> do { html_paragraph |> pl ; h3_begin C3 |> ps - ; compound_name font |> ps; ps " " + ; compound_name font |> ps; " " |> ps ; let print_iic w = pr_i font w in List.iter print_iic l ; h3_end |> ps } ] ; -value display_avy font = fun +value display_avya font = fun [ [] -> () | l -> do { html_paragraph |> pl ; h3_begin C3 |> ps - ; avyaya_name font |> ps; ps " " + ; avyaya_name font |> ps; " " |> ps ; let ifc_form w = [ 0 ] (* - *) @ w in let print_iic w = pr_font font (ifc_form w) in List.iter print_iic l @@ -154,16 +154,16 @@ value display_avy font = fun ; value sort_out accu form = fun [ [ (_,morphs) ] -> List.fold_left (reorg form) accu morphs - where reorg f (mas,fem,neu,any,iic,avy) = fun + where reorg f (mas,fem,neu,any,iic,avya) = fun [ Noun_form g n c -> let t = (n,c,f) in match g with - [ Mas -> ([ t :: mas ],fem,neu,any,iic,avy) - | Fem -> (mas,[ t :: fem ],neu,any,iic,avy) - | Neu -> (mas,fem,[ t :: neu ],any,iic,avy) - | Deictic _ -> (mas,fem,neu,[ t :: any ],iic,avy) + [ Mas -> ([ t :: mas ],fem,neu,any,iic,avya) + | Fem -> (mas,[ t :: fem ],neu,any,iic,avya) + | Neu -> (mas,fem,[ t :: neu ],any,iic,avya) + | Deictic _ -> (mas,fem,neu,[ t :: any ],iic,avya) ] - | Bare_stem | Gati -> (mas,fem,neu,any,[ f :: iic ],avy) - | Avyayaf_form -> (mas,fem,neu,any,iic,[ f :: avy ]) + | Bare_stem | Gati -> (mas,fem,neu,any,[ f :: iic ],avya) + | Avyayaf_form -> (mas,fem,neu,any,iic,[ f :: avya ]) | Ind_form _ | Verb_form _ _ _ | Ind_verb _ | Abs_root _ | Avyayai_form | Unanalysed | PV _ | Part_form _ _ _ _ -> @@ -173,19 +173,19 @@ value sort_out accu form = fun ] and init = ([],[],[],[],[],[]) ; -value display_inflected font (gen_deco,pn_deco,voca_deco,iic_deco,avy_deco) = +value display_inflected font (gen_deco,pn_deco,voca_deco,iic_deco,avya_deco) = let nouns = Deco.fold sort_out init gen_deco in let non_vocas = Deco.fold sort_out nouns pn_deco in let (mas,fem,neu,any,_,_) = Deco.fold sort_out non_vocas voca_deco and iic = List.map fst (Deco.contents iic_deco) - and avy = List.map fst (Deco.contents avy_deco) in do + and avya = List.map fst (Deco.contents avya_deco) in do { center_begin |> pl ; display_gender font Mas mas ; display_gender font Fem fem ; display_gender font Neu neu ; display_gender font (Deictic Numeral) any (* arbitrary *) ; display_iic font iic - ; display_avy font avy + ; display_avya font avya ; center_end |> pl ; html_paragraph |> pl } @@ -241,7 +241,8 @@ value decls_engine () = do and url_encoded_participle = get "p" env "" and url_encoded_source = get "r" env "" (* optional root origin - used by participles in conjugation tables *) - and font = font_of_string (get "font" env Paths.default_display_font) + and font = let s = get "font" env Paths.default_display_font in + font_of_string s (* deva vs roma print *) and translit = get "t" env "VH" (* DICO created in VH trans *) and lex = get "lex" env "SH" (* default Heritage *) in let entry_tr = decode_url url_encoded_entry (* : string in translit *) @@ -257,14 +258,17 @@ value decls_engine () = do (* will be avoided by unique name lookup *) let entry = resolve_homonym entry_VH in (* compute homonymy index *) let link = - if in_lexicon entry then Morpho_html.skt_anchor False font entry + if in_lexicon entry then Morpho_html.skt_anchor False entry (* We should check it is indeed a substantive entry and that Any is used for deictics/numbers (TODO) *) - else let root = if source = "" then "?" (* unknown in lexicon *) - else " from " ^ - if in_lexicon source then Morpho_html.skt_anchor False font source - else doubt (Morpho_html.skt_roma source) in - Morpho_html.skt_roma entry ^ root in + (* Also it should use unique naming for possible homo index *) + else Morpho_html.skt_html_font font entry |> italics in +(* OBSOLETE indication of root for kridanta + [let root = if source = "" then "?" (* unknown in lexicon *) + else " from " ^ (* should test font *) in + if in_lexicon source then Morpho_html.skt_anchor False font source + else doubt (Morpho_html.skt_roma source) in (* should test font *) + Morpho_html.skt_utf font entry ^ root in] *) let subtitle = hyperlink_title font link in do { display_subtitle (h1_center subtitle) ; try look_up font entry (Nouns.Gender gender) part @@ -273,7 +277,7 @@ value decls_engine () = do ; page_end lang True } with [ Stream.Error _ -> - abort lang ("Illegal " ^ translit ^ " transliteration ") entry_tr ] + abort lang ("Illegal " ^ translit ^ " input ") entry_tr ] } ; value safe_engine () = diff --git a/ML/encode.ml b/ML/encode.ml index cea9b007809d7abd0f97d2b078c2b68234f8cc34..5e1f8b3157646bc0117192a9f75259aa9acd17e1 100644 --- a/ML/encode.ml +++ b/ML/encode.ml @@ -4,7 +4,7 @@ (* *) (* Gérard Huet *) (* *) -(* ©2019 Institut National de Recherche en Informatique et en Automatique *) +(* ©2020 Institut National de Recherche en Informatique et en Automatique *) (**************************************************************************) (*i module Encode = struct i*) @@ -18,7 +18,7 @@ exception In_error of string (* Error in user or corpus input *) value is_vowel c = vowel c || c>100 && c<114 (* accounts for upper case *) ; (* anusvara substituted by nasal or normalized to 14 when original *) -(* anunaasika before vowels treated as anusvaara *) +(* anunaasika after vowels treated as anusvaara *) value rec normalize = normal_rec False where rec normal_rec after_vow = fun [ [] -> [] @@ -50,8 +50,8 @@ value code_string str = normalize (code_raw str) (* standard VH *) and code_string_WX str = normalize (code_raw_WX str) and code_string_KH str = normalize (code_raw_KH str) and code_string_SL str = normalize (code_raw_SL str) -and code_skt_ref str = normalize (code_rawu str) -and code_skt_ref_d str = normalize (code_rawd str) +and code_skt_ref str = normalize (code_rawu str) (* with upper letters *) +and code_skt_ref_d str = normalize (code_rawd str) (* no diacritics *) ; (* Switching code function according to transliteration convention *) value switch_code = fun (* normalizes anusvaara in its input *) @@ -113,11 +113,11 @@ value code_strip_raw s = rev_strip (code_raw s) (* A cleaner solution would be to have type lexeme = (word * int) and "x#5" represented as (x,5) (0 if no homophone) *) ; -value skt_to_deva str = try Canon.unidevcode (code_string str) with +value skt_to_deva str = try Canon.unidevcode (code_string str) with [ Failure _ -> raise (In_error str) ] -and skt_raw_to_deva str = try Canon.unidevcode (code_raw str) with +and skt_raw_to_deva str = try Canon.unidevcode (code_raw str) with [ Failure _ -> raise (In_error str) ] -and skt_raw_strip_to_deva str = try Canon.unidevcode (code_strip_raw str) with +and skt_strip_to_deva str = try Canon.unidevcode (code_strip_raw str) with [ Failure _ -> raise (In_error str) ] ; (* Following not needed since [Transduction.skt_to_html] is more direct diff --git a/ML/html.ml b/ML/html.ml index 8dcc37c592a12e651ad1b45c7af07855b07b49da..01080f89a2cd16bc3ccb98f32ee9fa6d4cb2e0a5 100644 --- a/ML/html.ml +++ b/ML/html.ml @@ -220,7 +220,7 @@ value rgb = fun (* a few selected HTML colors in rgb data *) | Blue -> "#0000FF" (* Canard = "#0000C0" ou "#0080FF" *) | Green -> "#008000" (* Teal = "#008080" Olive = "#808000" *) | Aquamarine -> "#6FFFC3" (* actually Light Aquamarine *) - | Lawngreen -> "#7CFC00" + | Lawngreen -> "#66ff99" (* was "#7CFC00" *) | Yellow -> "#FFFF00" | Orange -> "#FFA000" | Cyan -> "#00FFFF" (* Aqua = Cyan, Turquoise = "#40E0D0" *) diff --git a/ML/indexer.ml b/ML/indexer.ml index a0c30aeb0b0c68fc5cc4e9762940322f4503283b..fbcf1adf89e499944b096efa13dd13830d00d38e 100644 --- a/ML/indexer.ml +++ b/ML/indexer.ml @@ -32,8 +32,8 @@ value answer_end () = do ; pl html_paragraph } ; -value ok (mess,s) = do { ps mess; pl (Morpho_html.skt_anchor_R False s) } - and ok2 (mess,s1,s2) = do { ps mess; pl (Morpho_html.skt_anchor_R2 s1 s2) } +value ok (mess,s) = do { ps mess; pl (Morpho_html.skt_anchor False s) } + and ok2 (mess,s1,s2) = do { ps mess; pl (Morpho_html.skt_anchor_R s1 s2) } (* ok2 prints the entry under the spelling given by the user, i.e. without normalisation, thus e.g. sandhi is not written sa.mdhi, and possibly suffixed by homonymy index 1, e.g. b.rh. *) @@ -59,7 +59,7 @@ and report_failure s = do { ps " not found in dictionary" ; pl html_break ; ps "Closest entry in lexical order: " - ; ps (Morpho_html.skt_anchor_R False s) + ; ps (Morpho_html.skt_anchor False s) ; pl html_break } ; @@ -101,6 +101,8 @@ value print_word word (entry,lex,page) = match lex with value read_mw_index () = (Gen.gobble Data.public_mw_index_file : Deco.deco (string * string * string)) ; +value skt_red s = html_red (Morpho_html.skt_roma s) +; value index_engine () = do { pl http_header ; page_begin heritage_dictionary_title @@ -124,7 +126,7 @@ value index_engine () = do let mw_index = read_mw_index () in let words = Deco.assoc word mw_index in match words with - [ [] -> do { ps (Morpho_html.skt_red str_VH) + [ [] -> do { ps (skt_red str_VH) ; ps " not found in MW dictionary" ; pl html_break } @@ -143,7 +145,7 @@ value index_engine () = do (* even though str may exist as inflected form *) with (* Matching entry not found - we try declensions *) [ Index.Last last -> do - { ps (Morpho_html.skt_red str_VH) + { ps (skt_red str_VH) ; try_declensions word last } ] diff --git a/ML/indexerd.ml b/ML/indexerd.ml index 0b1dc897696a605e8344e3bb5111729cdaa3f90b..cfa47f13a413f381c4998b7c7f5f07660e0f0624 100644 --- a/ML/indexerd.ml +++ b/ML/indexerd.ml @@ -4,7 +4,7 @@ (* *) (* Gérard Huet *) (* *) -(* ©2019 Institut National de Recherche en Informatique et en Automatique *) +(* ©2020 Institut National de Recherche en Informatique et en Automatique *) (**************************************************************************) (* CGI-bin indexerd for indexing in sanskrit dico without diacritics. *) @@ -47,13 +47,15 @@ value postlude () = do ; page_end Html.French True } ; -value print_word c = pl (Morpho_html.skt_anchor_R False (Canon.decode_ref c)) +value print_word c = pl (Morpho_html.skt_anchor False (Canon.decode_ref c)) ; (* Each dummy is mapped to a list of words - all the words which give back the dummy by normalisation such as removing diacritics *) value read_dummies () = (Gen.gobble Data.public_dummies_file : Deco.deco Word.word) ; +value skt_red s = html_red (Morpho_html.skt_roma s) +; value index_engine () = let abor = abort Html.French (* may not preserve the current lang *) in try let dummies_deco = read_dummies () in do @@ -69,7 +71,7 @@ value index_engine () = ; ps (div_begin Latin12) ; let words = Deco.assoc dummy dummies_deco in match words with - [ [] -> do { ps (Morpho_html.skt_red str) + [ [] -> do { ps (skt_red str) ; ps " not found in Heritage dictionary" ; ps html_break; pl html_break } diff --git a/ML/interface.ml b/ML/interface.ml index 57c7f467bfe386f9f6478d17a6180c40343f2bf8..9fc8821964489dc5013a3fb3aff98d73c427c297 100644 --- a/ML/interface.ml +++ b/ML/interface.ml @@ -518,9 +518,10 @@ value graph_engine () = do and us = get "us" env "f" (* sandhied text default *) and translit = get "t" env Paths.default_transliteration (* translit input *) and lex = get "lex" env Paths.default_lexicon (* lexicon choice *) - and font = get "font" env Paths.default_display_font (* deva vs roma print *) + and font = get "font" env Paths.default_display_font in + let ft = font_of_string font (* Deva vs Roma print *) and cache = get "cache" env "f" (* no cache default *) in - let () = sanskrit_display.val := font + let () = sanskrit_font.val := ft and () = cache_active.val := cache and abs = get "abs" env "f" (* default local paths *) in let lang = language_of lex (* language default *) @@ -537,13 +538,13 @@ value graph_engine () = do and link_num = get "linkNumber" env "0" (* is there a better default? *) and sol_num = get "allSol" env "0" in (* Needed for Validate mode *) let url_enc_corpus_permission = (* Corpus mode *) - get Params.corpus_permission env "true" in + get Params.corpus_permission env "true" in let corpus_permission = url_enc_corpus_permission |> decode_url |> Web_corpus.permission_of_string in - let corpus_dir = get Params.corpus_dir env "" in - let sentence_no = get Params.sentence_no env "" in + let corpus_dir = get Params.corpus_dir env "" + and sentence_no = get Params.sentence_no env "" in let text = arguments translit lex font cache st us cp url_encoded_input url_encoded_topic abs sol_num corpus sent_id link_num url_enc_corpus_permission corpus_dir sentence_no @@ -568,7 +569,7 @@ value graph_engine () = do let revised = decode_url (get "revised" env "") (* User-aid revision *) and rev_off = int_of_string (get "rev_off" env "-1") and rev_ind = int_of_string (get "rev_ind" env "-1") in - try do + try do { match (revised,rev_off,rev_ind) with [ ("",-1,-1) -> (* Standard input processing *** Main call *** *) check_sentence translit uns text checkpoints input sol_num @@ -624,7 +625,7 @@ value graph_engine () = do else () ; close_page_with_margin () ; page_end lang True - } + } with [ Sys_error s -> abort lang Control.sys_err_mess s (* file pb *) | Stream.Error s -> abort lang Control.stream_err_mess s (* file pb *) diff --git a/ML/make_xml_data.ml b/ML/make_xml_data.ml index f291f9684036e830ae838486f8d2f80e8bebc231..c9c6641fd3e2717c772a8f0a5fa29a85ae280980 100644 --- a/ML/make_xml_data.ml +++ b/ML/make_xml_data.ml @@ -327,7 +327,7 @@ value print_inverse_map_xml trans form (delta,morphs) = } ; (* Outputs an XML stream on stdout *) -value print_header trans = do +value print_xml_header trans = do { "" |> pl ; "" |> pl ; "