Commit 9bf22aed authored by Idir Lankri's avatar Idir Lankri

Complete the reading part of the corpus manager

The corpus manager can be invoked from the navigation bar.
parent 31974f95
......@@ -231,14 +231,16 @@ automaton.cmx : web.cmx ../ZEN/trie.cmx ../ZEN/share.cmx phonetics.cmx \
interface.cmi :
interface.cmo : ../ZEN/word.cmo web.cmo sanskrit.cmi phases.cmo paths.cmo \
nouns.cmi morphology.cmi morpho_html.cmo ../ZEN/mini.cmo \
load_transducers.cmo load_morphs.cmo html.cmo graph_segmenter.cmo \
../ZEN/gen.cmo encode.cmo dispatcher.cmi ../ZEN/deco.cmo control.cmo \
checkpoints.cmo cgi.cmo canon.cmo automaton.cmo interface.cmi
load_transducers.cmo load_morphs.cmo interfaceParams.cmi html.cmo \
graph_segmenter.cmo ../ZEN/gen.cmo encode.cmo dispatcher.cmi \
../ZEN/deco.cmo control.cmo checkpoints.cmo cgi.cmo canon.cmo \
automaton.cmo interface.cmi
interface.cmx : ../ZEN/word.cmx web.cmx sanskrit.cmx phases.cmx paths.cmx \
nouns.cmx morphology.cmi morpho_html.cmx ../ZEN/mini.cmx \
load_transducers.cmx load_morphs.cmx html.cmx graph_segmenter.cmx \
../ZEN/gen.cmx encode.cmx dispatcher.cmx ../ZEN/deco.cmx control.cmx \
checkpoints.cmx cgi.cmx canon.cmx automaton.cmx interface.cmi
load_transducers.cmx load_morphs.cmx interfaceParams.cmx html.cmx \
graph_segmenter.cmx ../ZEN/gen.cmx encode.cmx dispatcher.cmx \
../ZEN/deco.cmx control.cmx checkpoints.cmx cgi.cmx canon.cmx \
automaton.cmx interface.cmi
user_aid.cmo : ../ZEN/word.cmo web.cmo sanskrit.cmi phases.cmo paths.cmo \
morpho_html.cmo html.cmo ../ZEN/gen.cmo encode.cmo ../ZEN/deco.cmo \
control.cmo checkpoints.cmo cgi.cmo canon.cmo
......@@ -249,6 +251,9 @@ reset_caches.cmo : web.cmo morphology.cmi ../ZEN/gen.cmo ../ZEN/deco.cmo \
auto.cmi
reset_caches.cmx : web.cmx morphology.cmi ../ZEN/gen.cmx ../ZEN/deco.cmx \
auto.cmi
interfaceParams.cmi :
interfaceParams.cmo : interfaceParams.cmi
interfaceParams.cmx : interfaceParams.cmi
html.cmo : paths.cmo
html.cmx : paths.cmx
web.cmo : SCLpaths.cmo paths.cmo html.cmo date.cmo
......@@ -273,11 +278,13 @@ mk_index_page.cmo : web.cmo paths.cmo html.cmo
mk_index_page.cmx : web.cmx paths.cmx html.cmx
mk_grammar_page.cmo : web.cmo paths.cmo html.cmo
mk_grammar_page.cmx : web.cmx paths.cmx html.cmx
mk_reader_page.cmo : web.cmo paths.cmo html.cmo control.cmo cgi.cmo
mk_reader_page.cmx : web.cmx paths.cmx html.cmx control.cmx cgi.cmx
mk_reader_page.cmo : web.cmo paths.cmo interfaceParams.cmi html.cmo \
control.cmo cgi.cmo
mk_reader_page.cmx : web.cmx paths.cmx interfaceParams.cmx html.cmx \
control.cmx cgi.cmx
mk_sandhi_page.cmo : web.cmo html.cmo
mk_sandhi_page.cmx : web.cmx html.cmx
manager.cmo : web.cmo html.cmo dir.cmi cgi.cmo
manager.cmx : web.cmx html.cmx dir.cmx cgi.cmx
manager.cmo : web.cmo interfaceParams.cmi html.cmo dir.cmi cgi.cmo
manager.cmx : web.cmx interfaceParams.cmx html.cmx dir.cmx cgi.cmx
add_corpus.cmo : web.cmo html.cmo
add_corpus.cmx : web.cmx html.cmx
......@@ -47,7 +47,7 @@ load_morphs.ml lexer.mli lexer.ml rank.ml scl_parser.ml \
reader.ml parser.ml constraints.mli constraints.ml multilingual.ml \
paraphrase.mli paraphrase.ml bank_lexer.ml regression.ml \
checkpoints.ml graph_segmenter.ml automaton.ml interface.mli interface.ml \
user_aid.ml reset_caches.ml
user_aid.ml reset_caches.ml interfaceParams.mli interfaceParams.ml
# WEB package - HTML, HTTP, CGI utilities for Web interface
WEB=html.ml web.ml css.ml cgi.ml morpho_html.ml chapters.ml morpho_scl.ml \
......@@ -197,7 +197,8 @@ inflected.cmx html.cmx SCLpaths.cmx web.cmx naming.cmx morpho_string.cmx morpho.
load_transducers.cmx pada.cmx phases.cmx dispatcher.cmx order.cmx \
chapters.cmx morpho_html.cmx bank_lexer.cmx cgi.cmx load_morphs.cmx \
int_sandhi.cmx nouns.cmx share.cmx minimap.cmx mini.cmx nums.cmxa \
graph_segmenter.cmx checkpoints.cmx automaton.cmx interface.cmx -o interface
graph_segmenter.cmx checkpoints.cmx automaton.cmx interfaceParams.cmx \
interface.cmx -o interface
user_aid: user_aid.cmx
$(LINK) list2.cmx gen.cmx paths.cmx version.cmx date.cmx \
......@@ -218,8 +219,9 @@ morpho_scl.cmx load_morphs.cmx lexer.cmx constraints.cmx checkpoints.cmx \
paraphrase.cmx scl_parser.cmx parser.cmx -o parser
manager: manager.cmx
$(LINK) paths.cmx version.cmx date.cmx html.cmx SCLpaths.cmx \
web.cmx cgi.cmx dir.cmx $< -o $@
$(LINK) str.cmxa paths.cmx version.cmx date.cmx html.cmx \
SCLpaths.cmx web.cmx cgi.cmx dir.cmx \
interfaceParams.cmx $< -o $@
add_corpus: add_corpus.cmx
$(LINK) paths.cmx version.cmx date.cmx html.cmx SCLpaths.cmx \
......@@ -238,8 +240,10 @@ mk_grammar_page: mk_grammar_page.cmx
SCLpaths.cmx web.cmx mk_grammar_page.cmx -o mk_grammar_page
mk_reader_page: mk_reader_page.cmx
$(LINK) gen.cmx paths.cmx version.cmx date.cmx control.cmx \
html.cmx SCLpaths.cmx web.cmx cgi.cmx mk_reader_page.cmx -o mk_reader_page
$(LINK) gen.cmx paths.cmx version.cmx date.cmx control.cmx \
html.cmx SCLpaths.cmx web.cmx cgi.cmx \
interfaceParams.cmx mk_reader_page.cmx \
-o mk_reader_page
mk_nyaaya_page: mk_nyaaya_page.cmx
$(LINK) gen.cmx paths.cmx version.cmx date.cmx \
......
(*i module Dir = struct i*)
(* Return the list of files in the given directory with their absolute
name. *)
value abs_files dir =
......@@ -8,18 +6,14 @@ value abs_files dir =
;
value basenames files = List.map Filename.basename files
;
value sort l = List.sort String.compare l
;
value subdirs dir =
value subdirs dir cmp =
let subdirs = List.filter Sys.is_directory (abs_files dir) in
subdirs |> basenames |> sort
subdirs |> basenames |> List.sort cmp
;
value file_with_ext ext file =
not (Sys.is_directory file) && Filename.check_suffix file ("." ^ ext)
;
value files_with_ext ext dirname =
let files = List.filter (file_with_ext ext) (abs_files dirname) in
files |> basenames |> sort
value files_with_ext ext dir cmp =
let files = List.filter (file_with_ext ext) (abs_files dir) in
files |> basenames |> List.sort cmp
;
(*i end; i*)
(* Directory operations *)
(*i module Dir : sig i*)
(* [subdirs dirname] returns the list of subdirectories of [dirname].
This list is sorted alphabetically. *)
value subdirs : string -> list string
(* [subdirs dir cmp] returns the list of subdirectories of [dir] sorted
according to the function [cmp]. *)
value subdirs : string -> (string -> string -> int) -> list string
;
(* [files_with_ext ext dirname] returns the list of files in [dirname]
with the extension [ext]. This list is sorted alphabetically. *)
value files_with_ext : string -> string -> list string
(* [files_with_ext ext dir cmp] returns the list of files in [dir] with
the extension [ext] sorted according to the function [cmp]. *)
value files_with_ext :
string -> string -> (string -> string -> int) -> list string
;
(*i end; i*)
......@@ -109,6 +109,28 @@ value option_select_default_id id label list_options =
value text_input id control =
xml_empty_with_att "input" [ ("id",id); ("type","text"); ("name",control) ]
;
type int_input_attrs = {
step : int;
min : int;
max : int;
default : int
}
;
value int_input id control extra_attrs =
let attrs = [ ("id", id); ("type", "number"); ("name", control) ] in
let extra_attrs =
match extra_attrs with
[ None -> []
| Some attrs ->
[ ("step", string_of_int attrs.step);
("min", string_of_int attrs.min) ;
("max", string_of_int attrs.max);
("value", string_of_int attrs.default) ]]
in
xml_empty_with_att "input" (attrs @ extra_attrs)
;
value int_input' control extra_attrs = int_input control control extra_attrs
;
value radio_input control v label =
let attrs = [ ("type","radio"); ("name",control); ("value",v) ] in
(xml_empty_with_att "input" attrs) ^ label
......
......@@ -473,7 +473,8 @@ value check_sentence translit us text_orig checkpoints sentence
; max_col.val := 0
}
;
value arguments trans lex cache st us cp input topic abs sol_num corpus id ln =
value arguments trans lex cache st us cp input topic abs sol_num corpus id ln
outdir outfile =
"t=" ^ trans ^ ";lex=" ^ lex ^ ";cache=" ^ cache ^ ";st=" ^ st ^ ";us=" ^ us ^
";cp=" ^ cp ^ ";text=" ^ input ^ ";topic=" ^ topic ^ ";abs=" ^ abs ^
match sol_num with
......@@ -483,7 +484,9 @@ value arguments trans lex cache st us cp input topic abs sol_num corpus id ln =
match corpus with
[ "" -> ""
| c -> ";corpus=" ^ c ^ ";sentenceNumber=" ^ id ^ ";linkNumber=" ^ ln
]
] ^
";" ^ InterfaceParams.outdir ^ "=" ^ outdir ^
";" ^ InterfaceParams.outfile ^ "=" ^ outfile
;
(* Cache management *)
......@@ -527,8 +530,11 @@ value graph_engine () = do
and sent_id = get "sentenceNumber" env "0"
and link_num = get "linkNumber" env "0" (* is there a better default? *)
and sol_num = get "allSol" env "0" in (* Needed for Validate mode *)
let outdir = Cgi.get InterfaceParams.outdir env "" in
let outfile = Cgi.get InterfaceParams.outfile env "" in
let text = arguments translit lex cache st us cp url_encoded_input
url_encoded_topic abs sol_num corpus sent_id link_num
outdir outfile
and checkpoints =
try let url_encoded_cpts = List.assoc "cpts" env in (* do not use get *)
parse_cpts (decode_url url_encoded_cpts)
......@@ -578,6 +584,7 @@ value graph_engine () = do
List.map revise checkpoints
and updated_text = arguments translit lex cache st us cp updated_input
url_encoded_topic abs sol_num corpus sent_id link_num
outdir outfile
and new_input = decode_url updated_input in
check_sentence translit uns updated_text revised_check
new_input sol_num corpus sent_id link_num
......
value outdir = "outdir"
;
value outfile = "outfile"
;
(* Parameters of CGI [interface] *)
(* Parameter to pass to CGI [interface] for specifying the output
directory when the corpus mode is enabled. *)
value outdir : string
;
(* Parameter to pass to CGI [interface] for specifying the output file
when the corpus mode is enabled. *)
value outfile : string
;
......@@ -2,27 +2,81 @@
sentences of the corpus and calling [add_corpus] to add a sentence to
the corpus. *)
(*i module Manager = struct i*)
(*************)
(* Utilities *)
(*************)
value int_of_file file =
let chop_extension file =
try Filename.chop_extension file with [ Invalid_argument _ -> file ]
in
file
|> Filename.basename
|> chop_extension
|> int_of_string
;
value cmp_section_file file file' =
compare (int_of_file file) (int_of_file file')
;
value split file = Str.split (Str.regexp Filename.dir_sep) file
;
value rec first_gap = fun
[ [] -> 1
| [ h ] -> h
| [ x ; y :: t ] -> if y = x + 1 then first_gap [ y :: t ] else x + 1
]
;
value dir_key = "q"
(******************)
(* CGI parameters *)
(******************)
value dir_param = "q"
;
value sentence_links dir =
let to_anchor_ref file = Html.anchor_ref (Web.corpus_url ^ dir ^ file) file in
let files = Dir.files_with_ext "html" (Web.corpus_dir ^ dir) in
(*******************)
(* Page generation *)
(*******************)
value sentence_links dir files =
let to_anchor_ref file =
Html.anchor_ref (Web.corpus_url ^ dir ^ file) (Filename.chop_extension file)
in
List.map to_anchor_ref files
;
value subdir_selection dir subdirs =
let prefixed_subdirs = List.map (fun x -> dir ^ x ^ "/") subdirs in
Html.option_select_label dir_key (List.combine prefixed_subdirs subdirs)
let prefixed_subdirs =
List.map (fun x -> dir ^ x ^ Filename.dir_sep) subdirs
in
Html.option_select_label dir_param (List.combine prefixed_subdirs subdirs)
;
value body dir subdirs =
value body dir =
let subdirs =
let cmp_subdir =
let section path = List.length (split path) > 1 in
if section dir then cmp_section_file else String.compare
in
Dir.subdirs (Web.corpus_dir ^ dir) cmp_subdir
in
match subdirs with
[ [] ->
let files =
Dir.files_with_ext "html" (Web.corpus_dir ^ dir) cmp_section_file
in
let attrs =
let first_missing_sentence files =
first_gap (List.map int_of_file files)
in
{ Html.step = 1;
Html.min = 1;
Html.max = max_int;
Html.default = first_missing_sentence files }
in
do
{ sentence_links dir |> List.iter Web.pl
; Web.cgi_begin Web.add_corpus_cgi "" |> Web.pl
; Html.submit_input "Add" |> Web.pl
{ sentence_links dir files |> List.iter Web.pl
; Web.cgi_begin (Web.cgi_bin "skt_heritage") "" |> Web.pl
; Html.hidden_input InterfaceParams.outdir dir |> Web.pl
; Html.int_input' InterfaceParams.outfile (Some attrs) |> Web.pl
; Html.submit_input "Add sentence" |> Web.pl
; Web.cgi_end |> Web.pl }
| _ ->
do
{ Web.cgi_begin Web.manager_cgi "" |> Web.pl
......@@ -36,14 +90,17 @@ value make () =
let style = Html.background Html.Chamois in
let query = Cgi.query_string () in
let env = Cgi.create_env query in
let dir = Cgi.decode_url (Cgi.get dir_key env "") in
let subdirs = Dir.subdirs (Web.corpus_dir ^ dir) in
let dir = Cgi.decode_url (Cgi.get dir_param env "") in
do
{ Web.http_header |> Web.pl
; Web.page_begin meta_title
; Html.body_begin style |> Web.pl
; Html.h1_title title |> Web.pl
; body dir subdirs
; Html.center_begin |> Web.pl
(* add links at the top to navigate quickly in the corpus *)
(* ; split dir |> List.iter Web.pl *)
; body dir
; Html.center_end |> Web.pl
; Web.page_end Html.default_language True }
;
......@@ -52,5 +109,3 @@ value make () =
(***************)
value main = make ()
;
(*i end; i*)
......@@ -49,7 +49,7 @@ value reader_page () = do
[ Some lang -> do
{ open_html_file (reader_page lang) reader_meta_title; (lang,"") }
| None -> do
{ reader_prelude ""; (English,Sys.getenv "QUERY_STRING") }
{ reader_prelude ""; (Html.default_language, Sys.getenv "QUERY_STRING") }
] in try
let env = create_env query in
let url_encoded_input = get "text" env ""
......@@ -62,7 +62,9 @@ value reader_page () = do
and translit = get "t" env Paths.default_transliteration in
(* Contextual information from past discourse *)
let topic_mark = decode_url url_encoded_topic
and text = decode_url url_encoded_input in do
and text = decode_url url_encoded_input in
let outdir = Cgi.get InterfaceParams.outdir env "" in
let outfile = Cgi.get InterfaceParams.outfile env "" in do
{ pl (body_begin back_ground)
; print_title (Some lang) reader_title
; pl center_begin
......@@ -101,6 +103,8 @@ value reader_page () = do
; pl " Mode "
; pl (option_select_default_id "mode_id" "mode"
(interaction_modes_default url_encoded_mode))
; Html.hidden_input InterfaceParams.outdir outdir |> Web.pl
; Html.hidden_input InterfaceParams.outfile outfile |> Web.pl
; pl html_break
; pl (submit_input "Read")
; pl (reset_input "Reset")
......@@ -108,7 +112,8 @@ value reader_page () = do
; pl center_end
; match out_mode.val with
[ Some lang -> close_html_file lang True
| None -> do { close_page_with_margin (); page_end English True }
| None ->
do { close_page_with_margin (); page_end Html.default_language True }
]
}
with
......
......@@ -521,6 +521,7 @@ value print_site_map dyn lang = (* the various Web services of the site *)
; ps (anchor_ref (grammar_page_url lang) (emph "Grammar")); pl " | "
; ps (anchor_ref (sandhi_page_url lang) (emph "Sandhi")); pl " | "
; ps (anchor_ref (reader_page_url lang) (emph "Reader")); pl " | "
; ps (anchor_ref manager_cgi (emph "Manager")); pl " | "
; ps (anchor_ref (faq_page_url lang) (emph "Help")); pl " | "
; pl (anchor_ref (portal_page_url lang) (emph "Portal"))
}
......@@ -531,6 +532,7 @@ value print_site_map dyn lang = (* the various Web services of the site *)
; ps (anchor_ref (dico_grammar_page lang) (emph "Grammar")); pl " | "
; ps (anchor_ref (dico_sandhi_page lang) (emph "Sandhi")); pl " | "
; ps (anchor_ref (dico_reader_page lang) (emph "Reader")); pl " | "
; ps (anchor_ref manager_cgi (emph "Manager")); pl " | "
; ps (anchor_ref (rel_faq_page_url lang) (emph "Help")); pl " | "
; pl (anchor_ref (rel_portal_page_url lang) (emph "Portal"))
}
......
......@@ -33,6 +33,12 @@ MW=$(SKTRESOURCES)MW# hypertext Monier-Williams
GOLD=$(SKTRESOURCES)GOLD# Heritage in Goldendict format databases
XMLBANKS=$(SKTRESOURCES)XML# Heritage Sanskrit morphology in XML databases
# Path to global Sanskrit corpus downloaded from Git.
SKTCORPUS = #SKTCORPUS
# Path to the corpus in the server area.
SERVERCORPUS = $(SERVERPUBLICDIR)CORPUS
# Virtual path for make
VPATH=ML
......@@ -238,7 +244,7 @@ ML/rank.ml ML/scl_parser.ml ML/reader.ml ML/parser.ml ML/constraints.mli \
ML/constraints.ml ML/multilingual.ml ML/paraphrase.mli ML/paraphrase.ml \
ML/bank_lexer.ml ML/regression.ml ML/checkpoints.ml \
ML/graph_segmenter.ml ML/interface.mli ML/interface.ml ML/user_aid.ml \
ML/reset_caches.ml
ML/reset_caches.ml ML/interfaceParams.mli ML/InterfaceParams.ml
# WEB package - HTML, HTTP, CGI ad-hoc utilities for Web interface
WEB=ML/html.ml ML/web.ml ML/css.ml ML/cgi.ml ML/morpho_html.ml ML/chapters.ml \
......@@ -398,6 +404,7 @@ echo 'and default_lexicon = "$(LEXICON)"'; \
echo 'and default_display_font = "$(DISPLAY)"'; \
echo 'and skt_install_dir = "$(SKTINSTALLDIR)"'; \
echo 'and skt_resources_dir = "$(SKTRESOURCES)"'; \
echo 'and skt_corpus_dir = "$(SKTCORPUS)"'; \
echo 'and public_skt_dir = "$(SERVERPUBLICDIR)"'; \
echo 'and skt_dir_url = "$(SKTDIRURL)"'; \
echo 'and server_host = "$(SERVERHOST)"'; \
......@@ -497,6 +504,10 @@ releasedata: abrevs
cp $(SITEPAGE_EN) $(SERVERPUBLICDIR)index.html; \
fi # default language from lexicon parameter
cp -R $(DICO) $(SERVERPUBLICDIR) # hypertext Heritage dictionary
if [ $(SKTCORPUS) ]; then \
cp -R $(SKTCORPUS) $(SERVERCORPUS); \
chown -R www-data $(SERVERCORPUS); \
fi # Sanskrit corpus
# so that cgis invocations in DICO pages are relocated consistently with config:
cd $(SERVERPUBLICDIR) && for i in `ls DICO/*.html`; do (\
sed '1,$$s|!CGIDECL|$(CGIBINURL)$(CGIDECL)|g' < $$i >temp; \
......
......@@ -487,6 +487,7 @@ Artwork credits</h2>
<a href="DICO/grammar.#LANG.html"><strong>Grammar</strong></a> |
<a href="DICO/sandhi.#LANG.html"><strong>Sandhi</strong></a> |
<a href="DICO/reader.#LANG.html"><strong>Reader</strong></a> |
<a href="/cgi-bin/heritage-platform/manager.cgi"><b>Manager</b></a> |
<a href="faq.#LANG.html"><strong>Help</strong></a> |
<a href="portal.#LANG.html"><strong>Portal</strong></a>
</td></tr><tr><td>© Gérard Huet 1994-2017</td></tr></table></td><td>
......
......@@ -22,8 +22,8 @@ goldendict_path="SITE/goldendict.html"
meta_docintro_path="DOC/docintro"
docintro_path="DOC/docintro.tex"
makefile_keys=["TEMPLATE","PLATFORM","TRANSLIT","LEXICON","DISPLAY","WARN",
"SERVERHOST","SERVERPUBLICDIR","SKTDIRURL","SKTRESOURCES","CGIBINURL","CGIDIR",
"CGIEXT","MOUSEACTION"]
"SERVERHOST","SERVERPUBLICDIR","SKTDIRURL","SKTRESOURCES",
"SKTCORPUS","CGIBINURL","CGIDIR","CGIEXT","MOUSEACTION"]
#"SCLURL","SCLINSTALLDIR","TEMPAREA","OUTPUTFONT",
counter_keys=["CAPTION"]
version_keys=["VERSION","DATE"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment