Commit 195d876b authored by Idir Lankri's avatar Idir Lankri

mk_corpus: Make the program more robust

parent 23dd2736
......@@ -37,60 +37,81 @@ value url_encode s =
value query_of_env env =
String.concat "&" (List.map (fun (k, v) -> k ^ "=" ^ url_encode v) env)
;
value abort report_error status =
do
{ report_error ()
; exit status
}
;
value citation_regexp = Str.regexp "\\\\citation{\\(.*\\)}"
;
value extract_citation save_sentence env line line_no =
value extract_citation env corpus_location line line_no =
try
if Str.string_match citation_regexp line 0 then
let query = query_of_env [ ("text", Str.matched_group 1 line) :: env ] in
save_sentence ~query
Corpus.save_sentence ~corpus_location ~query
else
raise Exit
with
[ _ ->
do
{ Printf.eprintf
"Line %d: \
Wrong input format (expect one citation macro per line)" line_no
; exit 1
}
abort (fun () ->
Printf.eprintf
"Line %d: \
Wrong input format (expect one citation macro per line)\n" line_no
) 1
]
;
value populate_corpus save_corpus dir file =
let rec aux ch i =
try
let line = input_line ch in
let env =
[ (Params.corpus_dir, dir.val)
; (Params.sentence_no, string_of_int i)
; ("t", Paths.default_transliteration)
]
in
do
{ extract_citation save_corpus env line i
; aux ch (i + 1)
}
with
[ End_of_file -> () ]
in
let ch = open_in file in
do
{ aux ch 1
; close_in ch
}
value populate_corpus dirname file =
if dirname.val <> "" then
let ch = open_in file in
let (corpus_location, dirname) =
if Filename.is_relative dirname.val then
("", dirname.val)
else
(Filename.dirname dirname.val ^ Filename.dir_sep,
Filename.basename dirname.val)
in
let dirname = dirname ^ Filename.dir_sep in
let rec aux i =
try
let line = input_line ch in
let env =
[ (Params.corpus_dir, dirname)
; (Params.sentence_no, string_of_int i)
; ("t", Paths.default_transliteration)
]
in
do
{ extract_citation env corpus_location line i
; aux (i + 1)
}
with
[ End_of_file -> () ]
in
do
{ Corpus.mkdir ~corpus_location ~dirname
; aux 1
; close_in ch
}
else
abort (fun () ->
Printf.eprintf
"Please specify the destination directory. \
See %s --help.\n" (Filename.basename Sys.argv.(0))
) 1
;
(***************)
(* Entry point *)
(***************)
value main =
let dir = ref "" in
let save_corpus = Corpus.save_sentence ~corpus_location:"./" in
(* -d is a mandatory option! *)
let dirname = ref "" in
let opts =
Arg.align
[ ("-d", Arg.Set_string dir,
" Set the destination directory (ending with a slash)") ]
[ ("-d", Arg.Set_string dirname,
" Specify the destination directory") ]
in
let usage_msg =
Filename.basename Sys.argv.(0) ^ " -d <dest_dir> <citation_file>"
in
Arg.parse opts (populate_corpus save_corpus dir)
(Filename.basename Sys.argv.(0) ^ " [options] <citation_file>")
Arg.parse opts (populate_corpus dirname) usage_msg
;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment