Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit a6abecdc authored by Bruno Guillaume's avatar Bruno Guillaume
Browse files

add “valid” sub-command

parent 1d4249a1
......@@ -14,7 +14,7 @@ open Libgrew
module Grew_args = struct
type mode = Undefined | Gui of string | Transform | Grep | Compile | Clean | Test
type mode = Undefined | Gui of string | Transform | Grep | Count | Valid | Compile | Clean | Test
let mode = ref Undefined
let grs = ref None
......@@ -28,10 +28,10 @@ module Grew_args = struct
let strat = ref "main"
let quiet = ref false
let timeout = ref None
let (pattern : string option ref) = ref None
let (patterns : string list ref) = ref []
let html = ref false
let grew_match = ref None
let grew_match_server = ref None
let help () = List.iter (fun x -> Printf.printf "%s\n" x) [
"----------------------------------------------------------";
......@@ -107,7 +107,9 @@ module Grew_args = struct
input_data := (Str.split (Str.regexp " ") files) @ !input_data; loop args
| "-o" :: file :: args -> output_file := Some file; loop args
| "-strat" :: s :: args -> strat := s; loop args
| "-pattern" :: file :: args -> pattern := Some file; loop args
| "-pattern" :: files :: args -> patterns := (Str.split (Str.regexp " ") files); loop args
| "-patterns" :: files :: args ->
patterns := (Str.split (Str.regexp " ") files) @ !patterns; loop args
| "-html" :: args -> html := true; loop args
| "-timeout" :: f :: args -> timeout := Some (float_of_string f); Rewrite.set_timeout (Some (float_of_string f)); loop args
......@@ -118,7 +120,7 @@ module Grew_args = struct
| "-gr" :: args -> output := Gr; loop args
| "-dot" :: args -> output := Dot; loop args
| "-grew_match" :: dir :: args -> grew_match := Some dir; loop args
| "-grew_match_server" :: dir :: args -> grew_match_server := Some dir; loop args
| "-safe_commands" :: args -> Libgrew.set_safe_commands true; loop args
| "-track_rules" :: args -> Libgrew.set_track_rules true; loop args
......@@ -133,6 +135,8 @@ module Grew_args = struct
| _ :: "gui" :: args -> mode := Gui (String.concat " " args)
| _ :: "transform" :: args -> mode := Transform; loop args
| _ :: "grep" :: args -> mode := Grep; loop args
| _ :: "count" :: args -> mode := Count; loop args
| _ :: "valid" :: args -> mode := Valid; loop args
| _ :: "compile" :: args -> mode := Compile; loop args
| _ :: "clean" :: args -> mode := Clean; loop args
| _ :: "version" :: _ -> Printf.printf "%s\n" VERSION;
......
......@@ -34,6 +34,7 @@ let handle fct () =
| Corpus_.Fail msg -> fail msg
| Sys_error msg -> fail (sprintf "System error: %s" msg)
| Yojson.Json_error msg -> fail (sprintf "Json error: %s" msg)
| Validation.Error msg -> fail (sprintf "Validation error: %s" msg)
| Libgrew.Bug msg -> fail (sprintf "Libgrew.bug, please report: %s" msg)
| exc -> fail (sprintf "Uncaught exception, please report: %s" (Printexc.to_string exc))
......@@ -158,19 +159,35 @@ let grep () = handle
let compile () =
handle
(fun () ->
List.iter (Corpus.compile ?grew_match:!Grew_args.grew_match) !Grew_args.input_data
let grew_match = match !Grew_args.grew_match_server with
| Some dir -> Some (Filename.concat dir "_meta")
| None -> None in
List.iter
(fun json_file ->
let corpus_desc_list = Corpus_desc.load_json json_file in
List.iter
(fun corpus_desc ->
Corpus_desc.compile ?grew_match corpus_desc
) corpus_desc_list
) !Grew_args.input_data
) ()
(* -------------------------------------------------------------------------------- *)
let clean () =
handle
(fun () ->
List.iter Corpus.clean !Grew_args.input_data
List.iter
(fun json_file ->
let corpus_desc_list = Corpus_desc.load_json json_file in
List.iter
(fun corpus_desc ->
Corpus_desc.clean corpus_desc
) corpus_desc_list
) !Grew_args.input_data
) ()
(* -------------------------------------------------------------------------------- *)
let count () =
let conf_files = !Grew_args.input_data in
match !Grew_args.patterns with
| [] -> Log.fwarning "No pattern given (use option -patterns)"
| l ->
......@@ -181,11 +198,11 @@ let count () =
let patterns = List.map Pattern.load l in
List.iter (
fun conf_file ->
let conf = Corpus.load_json conf_file in
let conf = Corpus_desc.load_json conf_file in
List.iter
(fun corpus_desc (*{Corpus.kind; id; dom_file; directory; urls_opt; files}*) ->
let id = Corpus.get_id corpus_desc in
let directory = Corpus.get_directory corpus_desc in
(fun corpus_desc ->
let id = Corpus_desc.get_id corpus_desc in
let directory = Corpus_desc.get_directory corpus_desc in
let marshal_file = (Filename.concat directory id) ^ ".marshal" in
let in_ch = open_in_bin marshal_file in
let data = (Marshal.from_channel in_ch : Corpus.t) in
......@@ -204,7 +221,19 @@ let count () =
) patterns;
printf "\n%!"
) conf
) conf_files
) (!Grew_args.input_data)
(* -------------------------------------------------------------------------------- *)
let valid () =
let validator_list = List.map Validation.load_json !Grew_args.patterns in
List.iter
(fun conf_file ->
List.iter
(fun corpus_desc ->
Validation.check validator_list corpus_desc
) (Corpus_desc.load_json conf_file)
) !Grew_args.input_data
(* -------------------------------------------------------------------------------- *)
(* -------------------------------------------------------------------------------- *)
......@@ -220,6 +249,7 @@ let _ =
| Grew_args.Undefined -> ()
| Grew_args.Transform -> transform ()
| Grew_args.Count-> count ()
| Grew_args.Valid-> valid ()
| Grew_args.Compile -> compile ()
| Grew_args.Clean -> clean ()
| Grew_args.Grep -> grep ()
......
......@@ -70,7 +70,7 @@ module File = struct
exception Found of int
let get_suffix file_name =
let len = String.length file_name in
let len = String.length file_name in
try
for i = len-1 downto 0 do
if file_name.[i] = '.'
......@@ -88,8 +88,8 @@ module File = struct
incr cpt;
res := (!cpt, input_line stdin) :: !res
done;
assert false
with End_of_file -> List.rev !res
assert false
with End_of_file -> List.rev !res
end (* module File *)
......@@ -119,17 +119,17 @@ module List_ = struct
let rec opt_map fct = function
| [] -> []
| h::t ->
match (fct h) with
| Some x -> x::(opt_map fct t)
| None -> opt_map fct t
match (fct h) with
| Some x -> x::(opt_map fct t)
| None -> opt_map fct t
let opt_mapi fct =
let rec loop i = function
| [] -> []
| h::t ->
match fct i h with
| Some x -> x::(loop (i+1) t)
| None -> loop (i+1) t
match fct i h with
| Some x -> x::(loop (i+1) t)
| None -> loop (i+1) t
in loop 0
end (* module List_ *)
......@@ -149,12 +149,12 @@ module Corpus_ = struct
let brown_list =
List_.opt_map
(fun (i,line) -> match Str.split (Str.regexp "#") line with
| [] -> None
| [line] -> let sentid = sprintf "%05d" i in Some (sentid, Graph.of_brown ?domain ~sentid line)
| [sentid; line] -> Some (sentid, Graph.of_brown ?domain ~sentid line)
| _ -> raise (Fail (sprintf "[line %d] Illegal Brown line >>>%s<<<<\n%!" i line))
| [] -> None
| [line] -> let sentid = sprintf "%05d" i in Some (sentid, Graph.of_brown ?domain ~sentid line)
| [sentid; line] -> Some (sentid, Graph.of_brown ?domain ~sentid line)
| _ -> raise (Fail (sprintf "[line %d] Illegal Brown line >>>%s<<<<\n%!" i line))
) lines in
Array.of_list brown_list
Array.of_list brown_list
let load_brown ?domain file =
let lines = File.read file in
......@@ -167,46 +167,46 @@ module Corpus_ = struct
let get_graphs ?domain source_list =
match source_list with
| [source] ->
begin
if not (Sys.file_exists source)
then raise (File_not_found source);
if Sys.is_directory source
then (* if [source] is a folder *)
begin
let files_array = Sys.readdir source in
let graph_list =
Array.fold_right
(fun file acc ->
if Filename.check_suffix file ".gr"
then (Filename.chop_extension file, Graph.load ?domain (Filename.concat source file)) :: acc
else if (Filename.check_suffix file ".conll" || Filename.check_suffix file ".conllu")
then
let conll = Conll.load (Filename.concat source file) in
let graph = Graph.of_conll ?domain conll in
match Conll.get_sentid conll with
| Some sentid -> (sentid, graph) :: acc
| None -> (file, graph) :: acc
else acc
) files_array [] in
Array.of_list graph_list
if not (Sys.file_exists source)
then raise (File_not_found source);
if Sys.is_directory source
then (* if [source] is a folder *)
begin
let files_array = Sys.readdir source in
let graph_list =
Array.fold_right
(fun file acc ->
if Filename.check_suffix file ".gr"
then (Filename.chop_extension file, Graph.load ?domain (Filename.concat source file)) :: acc
else if (Filename.check_suffix file ".conll" || Filename.check_suffix file ".conllu")
then
let conll = Conll.load (Filename.concat source file) in
let graph = Graph.of_conll ?domain conll in
match Conll.get_sentid conll with
| Some sentid -> (sentid, graph) :: acc
| None -> (file, graph) :: acc
else acc
) files_array [] in
Array.of_list graph_list
end
else (* if [source] is a file *)
match File.get_suffix source with
| Some s when String_.contains "conll" s -> load_conll ?domain source
| Some s when String_.contains "cupt" s -> load_conll ?domain source
| Some s when String_.contains "melt" s -> load_brown ?domain source
| Some s when String_.contains "brown" s -> load_brown ?domain source
| Some s when String_.contains "gr" s -> [| (source, Graph.load ?domain source) |]
| _ ->
Log.fwarning "Unknown suffix for file \"%s\", trying to guess format..." source;
try load_conll ?domain source
with _ ->
try load_brown ?domain source
with _ -> raise (Fail (sprintf "Cannot load file \"%s\", unknown format" source))
end
else (* if [source] is a file *)
match File.get_suffix source with
| Some s when String_.contains "conll" s -> load_conll ?domain source
| Some s when String_.contains "cupt" s -> load_conll ?domain source
| Some s when String_.contains "melt" s -> load_brown ?domain source
| Some s when String_.contains "brown" s -> load_brown ?domain source
| Some s when String_.contains "gr" s -> [| (source, Graph.load ?domain source) |]
| _ ->
Log.fwarning "Unknown suffix for file \"%s\", trying to guess format..." source;
try load_conll ?domain source
with _ ->
try load_brown ?domain source
with _ -> raise (Fail (sprintf "Cannot load file \"%s\", unknown format" source))
end
| [] -> raise (Fail ( "Empty input list\n%!"))
| _ ->
let conll_corpus = Conll_corpus.load_list source_list in
......@@ -228,10 +228,10 @@ end (* module Corpus *)
(* ==================================================================================================== *)
module Int =
struct
type t = int
let compare = Stdlib.compare
end
struct
type t = int
let compare = Stdlib.compare
end
module Int_set = Set.Make (Int)
module Int_map = Map.Make (Int)
......@@ -260,3 +260,89 @@ module Timer = struct
table := Int_map.remove !cpt !table;
diff
end
(* ==================================================================================================== *)
module Validation = struct
exception Error of string
type item = {
pattern: string list;
description: string;
}
type modul = {
title: string;
items: item list
}
let load_json json_file =
let open Yojson.Basic.Util in
let json = Yojson.Basic.from_file json_file in
let parse_one json =
let pattern =
try json |> member "pattern" |> to_string |> (fun x -> [x])
with Type_error _ ->
try json
|> member "pattern"
|> to_list
|> (List.map to_string)
with Type_error _ -> raise (Error (sprintf "[Validation.load_json, file \"%s\"] \"pattern\" field is mandatory and must be a string or a list of strings" json_file)) in
let description =
try json |> member "description" |> to_string
with Type_error _ -> "No description" in
{ pattern; description } in
let title =
try json |> member "title" |> to_string
with Type_error _ -> raise (Error (sprintf "[Validation.load_json, file \"%s\"] \"title\"_desc field is mandatory and must be a string" json_file)) in
let items = List.map parse_one (json |> member "items" |> to_list) in
{ title; items }
let check modul_list (corpus_desc:Corpus_desc.t) =
let corpus = Corpus_desc.build_corpus corpus_desc in
let date =
let tm = Unix.localtime (Unix.time ()) in
sprintf "%d/%02d/%02d - %02d:%02d"
(tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
tm.Unix.tm_hour tm.Unix.tm_min in
let modules =
`List
(List.map
(fun modul ->
let (out_items : Yojson.Basic.t) =
`List
(List.map
(fun item ->
let grew_pattern = Pattern.parse (String.concat " " item.pattern) in
let count =
Corpus.fold_left (fun acc graph ->
acc + (List.length (Graph.search_pattern grew_pattern graph))
) 0 corpus in
`Assoc [
"count", `Int count;
"pattern", `List (List.map (fun x -> `String x) item.pattern);
"description", `String item.description
]
) modul.items) in
`Assoc ["title", `String modul.title; "items", out_items]
) modul_list) in
let json = `Assoc [
"corpus", `String (Corpus_desc.get_id corpus_desc);
"date", `String date;
"modules", modules
] in
printf "%s\n" (Yojson.Basic.pretty_to_string json)
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment