indexerd.ml 3.76 KB
Newer Older
huet's avatar
huet committed
1 2 3 4 5 6
(**************************************************************************)
(*                                                                        *)
(*                     The Sanskrit Heritage Platform                     *)
(*                                                                        *)
(*                              Gérard Huet                               *)
(*                                                                        *)
7
(* ©2020 Institut National de Recherche en Informatique et en Automatique *)
huet's avatar
huet committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
(**************************************************************************)

(* CGI-bin indexerd for indexing in sanskrit dico without diacritics.     *)
(* This CGI is triggered by page [index.html] in [dico_dir].              *)
(* Reads its input in shell variable [QUERY_STRING] URI-encoded.          *)
(*i Test in csh with: setenv QUERY_STRING "query=yoga"; indexerd         i*)
(*i Web invocation is http://skt_server_url/cgi-bin/sktsearch?query=str   i*)

(*i module Indexerd = struct i*)

open Html;
open Web; (* ps pl etc. *)
open Cgi;

value answer_begin () = do
  { pl (table_begin Yellow_cent)
  ; ps tr_begin
  ; ps th_begin
  }
;
value answer_end () = do
  { ps th_end
  ; ps tr_end
  ; pl table_end
  ; pl html_paragraph
  }
;
value back_ground = background Chamois
;
value prelude () = do
  { pl http_header
  ; page_begin heritage_dictionary_title
  ; pl (body_begin back_ground)
  ; pl html_paragraph
  ; print_title_solid Mauve (Some Html.French) dico_title_fr 
  }
; 
value postlude () = do
  { ()
  ; page_end Html.French True 
  }
;
50
value print_word c = pl (Morpho_html.skt_anchor False (Canon.decode_ref c))
huet's avatar
huet committed
51 52 53 54
;
(* Each dummy is mapped to a list of words - all the words which
   give back the dummy by normalisation such as removing diacritics *)
value read_dummies () =
55
  (Gen.gobble Data.public_dummies_file : Deco.deco Word.word)
huet's avatar
huet committed
56
;
57 58
value skt_red s = html_red (Morpho_html.skt_roma s)
;
huet's avatar
huet committed
59 60 61 62 63 64 65 66 67 68
value index_engine () = 
  let abor = abort Html.French (* may not preserve the current lang *) in
  try let dummies_deco = read_dummies () in do
     { prelude () 
     ; let query = Sys.getenv "QUERY_STRING" in
       let alist = create_env query in
       (* We do not assume transliteration, just ordinary roman letters *)
       (* TODO: adapt to MW search along Indexer *)
       let url_encoded_entry = List.assoc "q" alist in
       let str = decode_url url_encoded_entry in 
69
       try let dummy = Encode.code_skt_ref_d str (* normalization *) in do
70 71
           { answer_begin ()
           ; ps (div_begin Latin12)
72
           ; let words = Deco.assoc dummy dummies_deco in
huet's avatar
huet committed
73
             match words with
74
               [ [] -> do { ps (skt_red str)
huet's avatar
huet committed
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
                          ; ps " not found in Heritage dictionary"
                          ; ps html_break; pl html_break
                          }
               | _ -> List.iter print_word words
               ]  
           ; ps div_end (* Latin12 *)
           ; answer_end ()
           ; postlude ()
           }
       with [ Stream.Error _ -> abor "Illegal input " str ]
     }
 with 
  [ Sys_error s        -> abor Control.sys_err_mess s (* file pb *)
  | Stream.Error s     -> abor Control.stream_err_mess s (* file pb *)
  | Invalid_argument s -> abor Control.fatal_err_mess s (* sub *)
  | Failure s          -> abor Control.fatal_err_mess s (* anomaly *)
  | Control.Fatal s    -> abor Control.fatal_err_mess s (* anomaly *)
  | Not_found          -> abor Control.fatal_err_mess "assoc" (* assoc *)
  | End_of_file        -> abor Control.fatal_err_mess "EOF" (* EOF *)
  | Encode.In_error s  -> abor "Wrong_input " s
  | Exit               -> abor "Wrong character in input - " "use ASCII" (* Sanskrit *)
  | _                  -> abor Control.fatal_err_mess "Unexpected anomaly" (* ? *)
  ]
;
index_engine ()
;
(*i end; i*)