TestGen.ml 2.91 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
(* This testing infrastructure has been contributed by Gabriel Radanne.
   It is based on regenerate: https://github.com/regex-generate/regenerate *)

let bos = '^'
let eos = '$'
let alphabet = ['a'; 'b'; 'c']

module Char = struct
  include Char
  let hash = Hashtbl.hash
  let foreach f =
    List.iter f (bos :: eos :: alphabet)
  let print c =
    Printf.sprintf "%c" c
  let pp = Format.pp_print_char
end

module B =
  Brzozowski.Make(Char)

open B

23 24 25 26 27 28 29 30
let remove l x =
  let rec remove' x acc l = match l with
    | [] -> List.rev acc
    | y :: tail when Char.equal x y -> remove' x acc tail
    | y :: tail -> remove' x (y::acc) tail
  in
  remove' x [] l

31
let compl l =
32
  List.fold_left remove alphabet l
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
let set b l = disjunction (List.map char (if b then l else compl l))


(* Turn a regular expression from Regenerate into a [B] expression. *)
let rec to_re = let open Regenerate.Regex in function
    | One -> B.epsilon
    | Set (b, l) -> set b l
    | Seq (re, re') -> to_re re @@ to_re re'
    | Or (re, re') -> to_re re ||| to_re re'
    | And (re, re') -> to_re re &&& to_re re'
    | Not re -> B.neg (to_re re)
    | Rep (0,None,re) -> B.star (to_re re)
    | Rep (_,_,_re) -> assert false

let is_match re input =
  match B.exec re input with None -> false | Some _ -> true

(* Check positive and negative samples. *)
let check (re, pos, neg) =
  (* 1. Compile the regular expression. *)
  let cre =
    try
      B.dfa (to_re re)
    with _ ->
      (* Discard regular expressions that we do not handle. *)
      QCheck.assume_fail ()
  in
  (* 2. Test! *)
  List.for_all (fun s -> is_match cre s) pos &&
  List.for_all (fun s -> not (is_match cre s)) neg



module WordSeq (C : sig include Set.OrderedType val pp : t Fmt.t end) = struct
  type char = C.t
  type t = C.t Seq.t
  let empty = Seq.empty
  let singleton = OSeq.return
  let length = OSeq.length
  let append = OSeq.append
  let cons = OSeq.cons
  let pp fmt w = Seq.iter (C.pp fmt) w
  let compare = OSeq.compare ~cmp:C.compare
end

(* Ensure that the whole word is matched *)
let add_boundaries =
  let bounds s =
    let (@) = OSeq.append in
    OSeq.(return bos @ s @ return eos)
  in
  let whole_string re =
    Regenerate.Regex.(seq [char bos; re; char eos])
  in
  QCheck.map_same_type
    (fun (re, pos, neg) ->
       whole_string re, List.map bounds pos, List.map bounds neg)

let test =
  let module Word = WordSeq(Char) in
  let module Stream = Segments.ThunkList(Word) in
  let generator =
    Regenerate.arbitrary
      (module Word) (* Datastructure for words *)
      (module Stream) (* Datastructure for streams of words *)
      ~compl:false (* Should we generate complement operations? *)
      ~pp:Fmt.char (* Printer for characters *)
      ~samples:100 (* Average number of samples for each regular expression *)
      alphabet (* Alphabet *)
  in
  let generator = add_boundaries generator in
  QCheck.Test.make generator check

let () = QCheck_runner.run_tests_main [test]