kmp.mlw 4.77 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
(**************************************************************************)
(*                                                                        *)
(* Proof of the Knuth-Morris-Pratt Algorithm.                             *)
(*                                                                        *)
(* Jean-Christophe Filliâtre (LRI, Université Paris Sud)                  *)
(* November 1998                                                          *)
(*                                                                        *)
(**************************************************************************)

module KnuthMorrisPratt

  use import int.Int
13 14
  use import ref.Ref
  use import array.Array
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86

  type char

  predicate matches (a1: array char) (i1: int)
                    (a2: array char) (i2: int) (n: int) =
    0 <= i1 <= length a1 - n /\
    0 <= i2 <= length a2 - n /\
    forall i: int. 0 <= i < n -> a1[i1 + i] = a2[i2 + i]

  lemma matches_empty:
    forall a1 a2: array char, i1 i2: int.
    0 <= i1 <= length a1 ->
    0 <= i2 <= length a2 ->
    matches a1 i1 a2 i2 0

  lemma matches_right_extension:
    forall a1 a2: array char, i1 i2 n: int.
    matches a1 i1 a2 i2 n ->
    i1 <= length a1 - n - 1 ->
    i2 <= length a2 - n - 1 ->
    a1[i1 + n] = a2[i2 + n] ->
    matches a1 i1 a2 i2 (n + 1)

  lemma matches_contradiction_at_first:
    forall a1 a2: array char, i1 i2 n: int.
    0 < n -> a1[i1] <> a2[i2] -> not (matches a1 i1 a2 i2 n)

  lemma matches_contradiction_at_i :
    forall a1 a2: array char, i1 i2 i n: int.
    0 < n ->
    0 <= i < n ->
    a1[i1 + i] <> a2[i2 + i] -> not (matches a1 i1 a2 i2 n)

  lemma matches_right_weakening:
    forall a1 a2: array char, i1 i2 n n': int.
    matches a1 i1 a2 i2 n -> n' < n -> matches a1 i1 a2 i2 n'

  lemma matches_left_weakening:
    forall a1 a2: array char, i1 i2 n n': int.
    matches a1 (i1 - (n - n')) a2 (i2 - (n - n')) n ->
    n' < n -> matches a1 i1 a2 i2 n'

  lemma matches_sym:
    forall a1 a2: array char, i1 i2 n: int.
    matches a1 i1 a2 i2 n -> matches a2 i2 a1 i1 n

  lemma matches_trans:
    forall a1 a2 a3: array char, i1 i2 i3 n: int.
    matches a1 i1 a2 i2 n -> matches a2 i2 a3 i3 n -> matches a1 i1 a3 i3 n

  predicate is_next (p: array char) (j n: int) =
    0 <= n < j /\
    matches p (j - n) p 0 n /\
    forall z: int. n < z < j -> not (matches p (j - z) p 0 z)

  lemma next_iteration:
    forall p a: array char, i j n: int.
    0 < j < length p ->
    j <= i <= length a ->
    matches a (i - j) p 0 j -> is_next p j n -> matches a (i - n) p 0 n

  lemma next_is_maximal:
    forall p a: array char, i j n k: int.
    0 < j < length p ->
    j <= i <= length a ->
    i - j < k < i - n ->
    matches a (i - j) p 0 j ->
    is_next p j n -> not (matches a k p 0 (length p))

  lemma next_1_0:
    forall p: array char. 1 <= length p -> is_next p 1 0

87 88
  (* We first compute a table next with the procedure initnext.
   * That table only depends on the pattern. *)
89

90 91 92 93 94 95
  let initnext (p: array char)
    requires { 1 <= length p }
    ensures  { length result = length p &&
      forall j:int. 0 < j < p.length -> is_next p j result[j] }
  = let m = length p in
    let next = make m 0 in
96 97
    if 1 < m then begin
      next[1] <- 0;
98 99
      let i = ref 1 in
      let j = ref 0 in
100
      while !i < m - 1 do
101 102 103 104 105 106
        invariant { 0 <= !j < !i <= m }
        invariant { matches p (!i - !j) p 0 !j }
        invariant { forall z:int.
          !j+1 < z < !i+1 -> not matches p (!i + 1 - z) p 0 z }
        invariant { forall k:int. 0 < k <= !i -> is_next p k next[k] }
        variant { m - !i, !j }
107
        if p[!i] = p[!j] then begin
108
          i := !i + 1; j := !j + 1; next[!i] <- !j
109
        end else
110
          if !j = 0 then begin i := !i + 1; next[!i] <- 0 end else j := next[!j]
111
      done
112 113 114
    end;
    next

115 116 117 118 119 120 121 122 123 124

  (* The algorithm looks for an occurrence of the pattern p in a text a
   * which is an array of length N.
   * The function kmp returns an index i within 0..N-1 if there is an occurrence
   * at the position i and N otherwise. *)

  predicate first_occur (p a: array char) (r: int) =
    (0 <= r < length a -> matches a r p 0 (length p)) /\
    (forall k: int. 0 <= k < r -> not (matches a k p 0 (length p)))

125
  let kmp (p a: array char)
126
    requires { 1 <= length p }
127 128
    ensures  { first_occur p a result }
  = let m = length p in
129 130 131
    let n = length a in
    let i = ref 0 in
    let j = ref 0 in
132
    let next = initnext p in
133
    while !j < m && !i < n do
134 135 136 137
      invariant { 0 <= !j <= m /\ !j <= !i <= n }
      invariant { matches a (!i - !j) p 0 !j }
      invariant { forall k:int. 0 <= k < !i - !j -> not (matches a k p 0 m) }
      variant { n - !i, !j }
138 139 140 141 142 143 144 145
      if a[!i] = p[!j] then begin
        i := !i+1; j := !j+1
      end else
        if !j = 0 then i := !i+1 else j := next[!j]
    done;
    if !j = m then !i - m else !i

end