Commit 23dd2736 authored by Idir Lankri's avatar Idir Lankri

Corpus_manager: Add the possibility to create a new corpus subdirectory

parent b31f82b5
......@@ -22,6 +22,7 @@ ML/mk_grammar_page
ML/mk_index_page
ML/mk_reader_page
ML/mk_sandhi_page
ML/mkdir_corpus
ML/parser
ML/reader
ML/regression
......
......@@ -292,15 +292,24 @@ corpus.cmo : ../ZEN/word.cmo web.cmo sanskrit.cmi params.cmi interface.cmi \
corpus.cmx : ../ZEN/word.cmx web.cmx sanskrit.cmx params.cmx interface.cmx \
../ZEN/gen.cmx encode.cmx dir.cmx cgi.cmx corpus.cmi
corpus_manager.cmi :
corpus_manager.cmo : web.cmo paths.cmo params.cmi multilingual.cmo html.cmo \
dir.cmi corpus.cmi canon.cmo corpus_manager.cmi
corpus_manager.cmx : web.cmx paths.cmx params.cmx multilingual.cmx html.cmx \
dir.cmx corpus.cmx canon.cmx corpus_manager.cmi
corpus_manager.cmo : web.cmo paths.cmo params.cmi multilingual.cmo \
mkdir_corpus_params.cmi html.cmo dir.cmi corpus.cmi canon.cmo \
corpus_manager.cmi
corpus_manager.cmx : web.cmx paths.cmx params.cmx multilingual.cmx \
mkdir_corpus_params.cmx html.cmx dir.cmx corpus.cmx canon.cmx \
corpus_manager.cmi
corpus_manager_cgi.cmo : params.cmi corpus_manager.cmi cgi.cmo
corpus_manager_cgi.cmx : params.cmx corpus_manager.cmx cgi.cmx
save_corpus_cgi.cmo : web.cmo params.cmi corpus_manager.cmi corpus.cmi \
cgi.cmo
save_corpus_cgi.cmx : web.cmx params.cmx corpus_manager.cmx corpus.cmx \
cgi.cmx
mkdir_corpus_params.cmi :
mkdir_corpus_params.cmo : params.cmi mkdir_corpus_params.cmi
mkdir_corpus_params.cmx : params.cmx mkdir_corpus_params.cmi
mkdir_corpus_cgi.cmo : web.cmo mkdir_corpus_params.cmi corpus_manager.cmi \
corpus.cmi cgi.cmo
mkdir_corpus_cgi.cmx : web.cmx mkdir_corpus_params.cmx corpus_manager.cmx \
corpus.cmx cgi.cmx
mk_corpus.cmo : paths.cmo params.cmi corpus.cmi
mk_corpus.cmx : paths.cmx params.cmx corpus.cmx
......@@ -56,7 +56,9 @@ mk_corpus_page.ml
# CORPUS package - corpus manager
CORPUS = corpus.mli corpus.ml corpus_manager.mli corpus_manager.ml \
corpus_manager_cgi.ml save_corpus_cgi.ml mk_corpus.ml
corpus_manager_cgi.ml save_corpus_cgi.ml \
mkdir_corpus_params.mli mkdir_corpus_params.ml \
mkdir_corpus_cgi.ml mk_corpus.ml
# extra file SCLpaths.ml for SCL interfacing - not distributed.
......@@ -147,7 +149,7 @@ bank_lexer.cmo tag_tree.cmo tag_apte.cmo
cgis: skt_morph.cmi auto.cmi indexer indexerd declension conjugation \
lemmatizer reader parser sandhier reader interface user_aid \
corpus_manager save_corpus
corpus_manager save_corpus mkdir_corpus
indexer: indexer.cmx
$(LINK) list2.cmx gen.cmx paths.cmx version.cmx date.cmx \
......@@ -242,7 +244,7 @@ corpus_manager: corpus_manager_cgi.cmx
share.cmx minimap.cmx mini.cmx nums.cmxa \
graph_segmenter.cmx checkpoints.cmx automaton.cmx \
params.cmx interface.cmx multilingual.cmx dir.cmx \
corpus.cmx corpus_manager.cmx $< -o $@
corpus.cmx mkdir_corpus_params.cmx corpus_manager.cmx $< -o $@
save_corpus: save_corpus_cgi.cmx
$(LINK) str.cmxa unix.cmxa list2.cmx gen.cmx paths.cmx \
......@@ -257,7 +259,22 @@ save_corpus: save_corpus_cgi.cmx
share.cmx minimap.cmx mini.cmx nums.cmxa \
graph_segmenter.cmx checkpoints.cmx automaton.cmx \
params.cmx interface.cmx multilingual.cmx dir.cmx \
corpus.cmx corpus_manager.cmx $< -o $@
corpus.cmx mkdir_corpus_params.cmx corpus_manager.cmx $< -o $@
mkdir_corpus: mkdir_corpus_cgi.cmx
$(LINK) str.cmxa unix.cmxa list2.cmx gen.cmx paths.cmx \
version.cmx date.cmx control.cmx word.cmx canon.cmx \
zen_lexer.cmx phonetics.cmx transduction.cmx encode.cmx \
skt_lexer.cmx padapatha.cmx sanskrit.cmx deco.cmx \
lexmap.cmx inflected.cmx html.cmx SCLpaths.cmx web.cmx \
naming.cmx morpho_string.cmx morpho.cmx \
load_transducers.cmx pada.cmx phases.cmx dispatcher.cmx \
order.cmx chapters.cmx morpho_html.cmx bank_lexer.cmx \
cgi.cmx load_morphs.cmx int_sandhi.cmx nouns.cmx \
share.cmx minimap.cmx mini.cmx nums.cmxa \
graph_segmenter.cmx checkpoints.cmx automaton.cmx \
params.cmx interface.cmx multilingual.cmx dir.cmx \
corpus.cmx mkdir_corpus_params.cmx corpus_manager.cmx $< -o $@
mk_corpus: mk_corpus.cmx
$(LINK) str.cmxa unix.cmxa list2.cmx gen.cmx paths.cmx \
......@@ -272,7 +289,7 @@ mk_corpus: mk_corpus.cmx
share.cmx minimap.cmx mini.cmx nums.cmxa \
graph_segmenter.cmx checkpoints.cmx automaton.cmx \
params.cmx interface.cmx multilingual.cmx dir.cmx \
corpus.cmx corpus_manager.cmx $< -o $@
corpus.cmx mkdir_corpus_params.cmx corpus_manager.cmx $< -o $@
css: css.cmx
$(LINK) gen.cmx paths.cmx version.cmx date.cmx html.cmx \
......@@ -377,7 +394,7 @@ clean:
rm -f css indexer indexerd sandhier reader parser lemmatizer \
declension conjugation mk_index_page mk_grammar_page mk_reader_page regression \
mk_sandhi_page mk_corpus_page sandhi_test reset_caches interface user_aid \
parse_apte tag_apte corpus_manager save_corpus mk_corpus
parse_apte tag_apte corpus_manager save_corpus mkdir_corpus mk_corpus
# make dico.cmi auto.cmi conj_infos.cmi # needed to get dependencies right
.SUFFIXES: .ml .mli .cmx .cmo .cmi
......
......@@ -61,3 +61,6 @@ value save_sentence ~corpus_location ~query =
; Web.output_channel.val := stdout
}
;
value mkdir ~corpus_location ~dirname =
Unix.mkdir (corpus_location ^ dirname) 0o755
;
......@@ -21,8 +21,7 @@ value dump_sentence_metadata : sentence_metadata -> string -> string -> unit
(* Return the identifier of the sentence stored in the given file. *)
value sentence_id : string -> int
;
value save_sentence :
~corpus_location:string -> ~query:string -> unit
(* ~corpus_dir:string -> ~sentence_no:int -> *)
(* ~translit:string -> ~unsandhied:bool -> ~text:string -> unit *)
value save_sentence : ~corpus_location:string -> ~query:string -> unit
;
value mkdir : ~corpus_location:string -> ~dirname:string -> unit
;
......@@ -92,7 +92,7 @@ value uplinks' dir =
in
List.map uplink updirs
;
value uplinks dir final_sep =
value uplinks dir =
let dir_sep = " / " in
let links = uplinks' dir in
let cur_dir =
......@@ -101,7 +101,7 @@ value uplinks dir final_sep =
| _ -> dir_sep ^ Filename.basename dir
]
in
String.concat dir_sep links ^ cur_dir ^ if final_sep then dir_sep else ""
String.concat dir_sep links ^ cur_dir
;
(* Display sentences with format "sentence || sentno" like in citations
file. *)
......@@ -141,7 +141,7 @@ value subdir_selection dir subdirs =
;
value add_sentence_form dir gap =
Web.cgi_begin (Web.cgi_bin "skt_heritage") "" ^
uplinks dir True ^ "Sentence number: " ^
uplinks dir ^ " / Sentence number: " ^
Html.hidden_input Params.corpus_dir dir ^
Html.int_input
~name:Params.sentence_no
......@@ -187,26 +187,39 @@ value group_sentences dir files =
;
value body dir =
match Corpus.content (Web.corpus_dir ^ dir) with
(* When files = [], it is possible to create a subdir or add a sentence... *)
[ Corpus.Sentences files ->
let groups = group_sentences dir files in
do
{ Html.h2_begin Html.B2 |> Web.pl
; uplinks dir False |> Web.pl
; uplinks dir |> Web.pl
; Html.h2_end |> Web.pl
; groups |> List.map (htmlify_group dir) |> List.iter Web.pl
; Html.html_break |> Web.pl
}
| Corpus.Sections subdirs ->
let selection_prompt =
uplinks dir (dir <> "") ^ subdir_selection dir subdirs ^ " " ^
Html.submit_input "Select"
"Explore " ^ subdir_selection dir subdirs ^ " " ^ Html.submit_input "Go"
in
do
{ Web.cgi_begin Web.corpus_manager_cgi "" |> Web.pl
; Html.h2_begin Html.C2 |> Web.pl
; selection_prompt |> Web.pl
{ Html.center_begin |> Web.pl
; Html.h2_begin Html.B2 |> Web.pl
; uplinks dir |> Web.pl
; Html.h2_end |> Web.pl
; Web.cgi_begin Web.corpus_manager_cgi "" |> Web.pl
; Html.h3_begin Html.B3 |> Web.pl
; selection_prompt |> Web.pl
; Html.h3_end |> Web.pl
; Web.cgi_end |> Web.pl
; Web.cgi_begin Web.mkdir_corpus_cgi "" |> Web.pl
; Html.h3_begin Html.B3 |> Web.pl
; "New heading: " |> Web.pl
; Html.hidden_input Mkdir_corpus_params.parent_dir dir |> Web.pl
; Html.text_input "foo" Mkdir_corpus_params.dirname |> Web.pl
; Html.submit_input "Create" |> Web.pl
; Html.h3_end |> Web.pl
; Web.cgi_end |> Web.pl
; Html.center_end |> Web.pl
}
]
;
......
(* CGI script [mkdir_corpus] for creating a new corpus subdirectory. *)
value main =
let query = Cgi.query_string () in
let env = Cgi.create_env query in
let dirname = Cgi.decoded_get Mkdir_corpus_params.dirname "" env in
let parent_dir = Cgi.decoded_get Mkdir_corpus_params.parent_dir "" env in
do
{ Corpus.mkdir ~corpus_location:Web.corpus_dir ~dirname:(parent_dir ^ dirname)
; Corpus_manager.make parent_dir
}
;
value dirname = "dirname"
;
value parent_dir = Params.corpus_dir
;
value dirname : string
;
value parent_dir : string
;
......@@ -53,6 +53,7 @@ and user_aid_cgi = cgi_bin Paths.cgi_user_aid (* unknown chunks processing *
and sandhier_cgi = cgi_bin Paths.cgi_sandhier (* sandhier *)
and corpus_manager_cgi = cgi_bin Paths.cgi_corpus_manager (* Corpus manager *)
and save_corpus_cgi = cgi_bin Paths.cgi_save_corpus
and mkdir_corpus_cgi = cgi_bin Paths.cgi_mkdir_corpus
;
(* Absolute paths on development site *)
value resources name = Paths.skt_resources_dir ^ name ^ "/"
......
......@@ -199,6 +199,9 @@ CGIMANAGER = sktcorpus$(CGIEXT)
# CGI name under which the program that enriches the corpus is installed
# on the server.
CGISAVECORPUS = savecorpus$(CGIEXT)
# CGI name under which the program that creates a corpus subdirectory is
# installed on the server.
CGIMKDIRCORPUS = mkdircorpus$(CGIEXT)
# what mouse action to use for viewing lemmatization
MOUSEACTION=#MOUSEACTION
......@@ -267,7 +270,8 @@ ML/tag_apte.ml
# CORPUS package - corpus manager
CORPUS = ML/corpus.mli ML/corpus.ml ML/corpus_manager.mli \
ML/corpus_manager.ml ML/corpus_manager_cgi.ml \
ML/save_corpus_cgi.ml ML/mk_corpus.ml
ML/save_corpus_cgi.ml ML/mkdir_corpus_cgi.ml \
ML/mk_corpus.ml
DEBUG= ML/morpho_debug.ml ML/debug.ml
......@@ -436,6 +440,7 @@ echo 'and cgi_graph = "$(CGIGRAPH)"'; \
echo 'and cgi_user_aid = "$(CGIUSER)"'; \
echo 'and cgi_corpus_manager = "$(CGIMANAGER)"'; \
echo 'and cgi_save_corpus = "$(CGISAVECORPUS)"'; \
echo 'and cgi_mkdir_corpus = "$(CGIMKDIRCORPUS)"'; \
echo 'and mouse_action = "$(MOUSEACTION)";') > ML/paths.ml
$(MAKE) version
......@@ -504,6 +509,7 @@ releasecgi:
$(MAKE) sandhi
$(MAKE) corpus_manager
$(MAKE) save_corpus
$(MAKE) mkdir_corpus
abrevs:
cp $(BOOK)/abrevs.pdf SITE # for documenting the Website with abbreviations
......@@ -599,6 +605,10 @@ corpus_manager: ML/corpus_manager
save_corpus: ML/save_corpus
cp $< $(CGIDIR)$(CGISAVECORPUS)
.PHONY: mkdir_corpus
mkdir_corpus: ML/mkdir_corpus
cp $< $(CGIDIR)$(CGIMKDIRCORPUS)
#### Version Management and Distribution are now managed by Git ####
##############################
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment