-
OUEDRAOGO Wendlasida authoredOUEDRAOGO Wendlasida authored
RegexpSimpl.v 8.78 KiB
(* *********************************************************************)
(* *)
(* Coqlex verified lexer generator *)
(* *)
(* Copyright 2022 Siemens Mobility SAS and Institut National de *)
(* Recherche en Informatique et en Automatique. *)
(* All rights reserved. This file is distributed under *)
(* the terms of the INRIA Non-Commercial License Agreement (see the *)
(* LICENSE file). *)
(* *)
(* *********************************************************************)
(** * Coqlex regex simplification *)
(** In this file, we use smart-constructors to simplify regular expressions.
The optimizations we implemented are based on the following results:
The Kleene Algebra is an idempotent semiring; therefore for any elements $x, y:$ #x,y:#
- $ x + 0 = 0 + x = x $ # x + 0 = 0 + x = x # ([Or_left_id], [Or_right_id], [simp_or_correct])
- $ 1 x = x 1 = 1 $ # 1x = x1 = 1 # ([Cat_left_id], [Cat_right_id], [simp_cat_correct])
- $ 0 x = x 0 = 0 $ # 0x = x0 = 0 # ([Cat_left_zero], [Cat_right_zero], [simp_cat_correct]).
- $ 0^{\ast} = 1^{\ast} = 1 $ # 0* = 1* = 1 # ([simp_star_correct]).
- $ (x^{\ast})^{\ast} = x^{\ast} $ # ( r* ) * = r* # ([simp_star_correct]).
- $ x - 0 = x $ # x - 0 = x # ([simp_minus_correct]).
- $ 0 - x = 0 $ # 0 - x = 0 # ([simp_minus_correct]).
*)
Add LoadPath "regexp_opt" as RegExp.
Require Import RegExp.Definitions.
Require Import RegExp.Boolean.
Require Import RegExp.Char.
Require Import RegExp.Concat.
Require Import RegExp.Star.
Definition simp_cat r s :=
match r with
| Empty => Empty
| Eps => s
| x => (match s with
| Empty => Empty
| Eps => x
| x0 => Cat x x0
end)
end.
Lemma simp_cat_correct : forall l r, simp_cat l r =R= Cat l r.
Proof.
induction l; induction r; simpl;
try first [rewrite Cat_left_zero | rewrite Cat_left_id | rewrite Cat_right_zero | rewrite Cat_right_id ];
apply re_eq_refl.
Qed.
Definition simp_or r s :=
match r with
| Empty => s
| x => (match s with
| Empty => x
| x0 => Or x x0
end)
end.
Lemma simp_or_correct : forall l r, simp_or l r =R= Or l r.
Proof.
induction l; simpl;
induction r; simpl; try apply re_eq_refl;
first [ rewrite Or_left_id | rewrite Or_right_id ];
apply re_eq_refl.
Qed.
Fixpoint simp_star r :=
match r with
| Empty => Eps
| Eps => Eps
| Star x => simp_star x
| x => Star x
end.
Lemma append_empty_str_r: forall s, s = (s ++ "")%string.
Proof.
induction s; simpl; eauto; f_equal; auto.
Qed.
Lemma string_app_assoc: forall s s1 s2, ((s++s1)++s2)%string = (s++(s1++s2))%string.
induction s; simpl; auto.
intros.
f_equal; auto.
Qed.
Lemma concat_list_string_list_assoc: forall x1 x2, ((concat_list_string x1) ++ (concat_list_string x2))%string = concat_list_string (x1 ++ x2)%list.
Proof.
induction x1; simpl; auto.
intros.
rewrite string_app_assoc.
f_equal; auto.
Qed.
Lemma star_cat_star_star: forall r s, matches (Star r) s = true <-> matches (Cat (Star r) (Star r)) s = true.
Proof.
split; intros.
replace s with (s ++ "")%string; [|apply eq_sym; apply append_empty_str_r].
apply matches_Cat; auto.
apply divide_Cat in H.
destruct H.
destruct s0.
destruct a.
destruct H0.
apply Star_to_list in H0.
destruct H0, a, H2.
apply Star_to_list in H1.
destruct H1, a, H4.
assert (forallb (fun s : string => r ~= s) (x1 ++ x2)%list = true).
rewrite forallb_app.
apply Bool.andb_true_iff.
split; eauto.
rewrite H.
rewrite <- H2, <- H4.
rewrite concat_list_string_list_assoc.
apply list_to_Star.
auto.
Qed.
Lemma starlist_implies_star_concat: forall ss r, forallb (fun s => Star r ~= s) ss = true -> matches (Star r) (concat_list_string ss) = true.
induction ss; simpl; intros; auto.
apply Bool.andb_true_iff in H.
destruct H.
apply star_cat_star_star.
apply matches_Cat; auto.
Qed.
Lemma star_star_matches_iff_star_matches: forall s r, matches (Star (Star r)) s = true <-> matches (Star r) s = true.
Proof.
split; intros.
apply Star_to_list in H.
destruct H, a, H0.
rewrite <- H0.
apply starlist_implies_star_concat; auto.
apply matches_Star_r; auto.
Qed.
Theorem star_star_equiv_star: forall r, Star r =R= Star (Star r).
Proof.
unfold re_eq.
intros.
case_eq (Star (Star r) ~= s); intros.
apply star_star_matches_iff_star_matches; auto.
case_eq (Star r ~= s); intros; auto.
apply star_star_matches_iff_star_matches in H0.
rewrite H in H0; auto.
Qed.
Lemma simp_star_correct : forall r, simp_star r =R= Star r.
Proof.
{ induction r; simpl; try apply re_eq_refl.
- rewrite matches_Star_right.
rewrite Cat_left_zero.
rewrite Or_right_id.
apply re_eq_refl.
- unfold re_eq.
induction s; simpl; auto.
rewrite Cat_left_zero.
apply re_eq_refl.
- rewrite IHr.
apply star_star_equiv_star.
}
Qed.
Definition simp_and r s :=
match r with
| Empty => Empty
| x => (match s with
| Empty => Empty
| x0 => And x x0
end)
end.
Lemma simp_and_correct : forall l r, simp_and l r =R= And l r.
Proof.
induction l; simpl.
unfold re_eq.
intros.
generalize dependent r.
generalize dependent s.
induction s; simpl; auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
induction s; simpl; auto.
induction s; simpl; auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (Char a ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (Cat l1 l2 ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (Or l1 l2 ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (Star l ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (And l1 l2 ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (AnyChar ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (CharExcept a ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (CharRange a a0 ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (CharRangeExcept a a0 ~= s); auto.
induction r; intros; try apply re_eq_refl.
unfold re_eq.
intros.
rewrite matches_And.
rewrite Empty_false.
induction (Minus l1 l2 ~= s); auto.
Qed.
Definition simp_minus r s :=
match r with
| Empty => Empty
| x => (match s with
| Empty => r
| x0 => Minus x x0
end)
end.
Lemma simp_minus_zero_left: forall l, Minus Empty l =R= Empty.
Proof.
unfold re_eq.
intros.
rewrite matches_Minus.
rewrite Empty_false.
simpl; auto.
Qed.
Lemma simp_minus_zero_right: forall l, Minus l Empty =R= l.
Proof.
unfold re_eq.
intros.
rewrite matches_Minus.
rewrite Empty_false.
simpl.
induction (l ~= s); simpl; auto.
Qed.
Lemma simp_minus_correct : forall l r, simp_minus l r =R= Minus l r.
Proof.
intros.
induction l; induction r; try rewrite simp_minus_zero_left; try rewrite simp_minus_zero_right; simpl; apply re_eq_refl.
Qed.
Fixpoint derive a (re:RegExp):RegExp :=
match re with
| Char c => match (ascii_dec c a) with
| left _ => Eps
| right _ => Empty
end
| Cat r s =>
if nu r
then simp_or (simp_cat (derive a r) s) (derive a s)
else simp_cat (derive a r) s
| Or r s => simp_or (derive a r) (derive a s)
| Star r => simp_cat (derive a r) (simp_star r)
| And r s => simp_and (derive a r) (derive a s)
| AnyChar => Eps
| CharExcept c => match (ascii_dec c a) with
| left _ => Empty
| right _ => Eps
end
| CharRange l h => if ((l <=? a)%char && (a <=? h)%char)%bool then Eps else Empty
| CharRangeExcept l h => if ((l <=? a)%char && (a <=? h)%char)%bool then Empty else Eps
| Minus r s => simp_minus (derive a r) (derive a s)
| _ => Empty
end.
Theorem derive_simpl_correct : forall a r, derive a r =R= RegExp.Definitions.derive a r.
Proof.
{ induction r; simpl; try apply re_eq_refl.
- case_eq (nu r1); intros.
rewrite simp_or_correct.
rewrite simp_cat_correct.
rewrite IHr1.
rewrite IHr2.
apply re_eq_refl.
rewrite simp_cat_correct.
rewrite IHr1.
apply re_eq_refl.
- rewrite simp_or_correct.
rewrite IHr1.
rewrite IHr2.
apply re_eq_refl.
- rewrite simp_cat_correct.
rewrite simp_star_correct.
rewrite IHr.
apply re_eq_refl.
- rewrite simp_and_correct.
rewrite IHr1.
rewrite IHr2.
apply re_eq_refl.
- rewrite simp_minus_correct.
rewrite IHr1.
rewrite IHr2.
apply re_eq_refl.
}
Qed.