(* Correctness of a program computing the minimal distance between two words (code by Claude MarchÃ©). This program computes a variant of the Levenshtein distance. Given two strings [w1] and [w2] of respective lengths [n1] and [n2], it computes the minimal numbers of insertions and deletions to perform in one of the strings to get the other one. (The traditional edit distance also includes substitutions.) The nice point about this algorithm, due to Claude MarchÃ©, is to work in linear space, in an array of min(n1,n2) integers. Time complexity is O(n1 * n2), as usual. *) theory Word use export int.Int use export int.MinMax use export list.List use export list.Length type char type word = list char inductive dist word word int = | dist_eps : dist Nil Nil 0 | dist_add_left : forall w1 w2: word, n: int. dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1) | dist_add_right : forall w1 w2: word, n: int. dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1) | dist_context : forall w1 w2:word, n: int. dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n predicate min_dist (w1 w2: word) (n: int) = dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m (* intermediate lemmas *) use export list.Append function last_char (a: char) (u: word) : char = match u with | Nil -> a | Cons c u' -> last_char c u' end function but_last (a: char) (u: word) : word = match u with | Nil -> Nil | Cons c u' -> Cons a (but_last c u') end lemma first_last_explicit: forall u: word, a: char. but_last a u ++ Cons (last_char a u) Nil = Cons a u lemma first_last: forall a: char, u: word. exists v: word, b: char. v ++ Cons b Nil = Cons a u /\ length v = length u lemma key_lemma_right: forall w1 w'2: word, m: int, a: char. dist w1 w'2 m -> forall w2: word. w'2 = Cons a w2 -> exists u1 v1: word, k: int. w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1 lemma dist_symetry: forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n lemma key_lemma_left: forall w1 w2: word, m: int, a: char. dist (Cons a w1) w2 m -> exists u2 v2: word, k: int. w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1 lemma dist_concat_left: forall u v w: word, n: int. dist v w n -> dist (u ++ v) w (length u + n) lemma dist_concat_right: forall u v w: word, n: int. dist v w n -> dist v (u ++ w) (length u + n) (* main lemmas *) lemma min_dist_equal: forall w1 w2: word, a: char, n: int. min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n lemma min_dist_diff: forall w1 w2: word, a b: char, m p: int. a <> b -> min_dist (Cons a w1) w2 p -> min_dist w1 (Cons b w2) m -> min_dist (Cons a w1) (Cons b w2) (min m p + 1) lemma min_dist_eps: forall w: word, a: char, n: int. min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1) lemma min_dist_eps_length: forall w: word. min_dist Nil w (length w) end module EditDistance use import Word use import list.Length as L use import ref.Ref use import array.Array (* Auxiliary definitions for the program and its specification. *) function suffix (a: array char) (i: int) : word axiom suffix_nil: forall a: array char. suffix a a.length = Nil axiom suffix_cons: forall a: array char, i: int. 0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1)) lemma suffix_length: forall a: array char, i: int. 0 <= i <= a.length -> L.length (suffix a i) = (a.length - i) predicate min_suffix (a1 a2: array char) (i j n: int) = min_dist (suffix a1 i) (suffix a2 j) n function word_of (a: array char) : word = suffix a 0 (* The program. *) let distance (w1: array char) (w2: array char) ensures { min_dist (word_of w1) (word_of w2) result } = let n1 = length w1 in let n2 = length w2 in let t = Array.make (n2+1) 0 in (* initialization of t *) for i = 0 to n2 do invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j } t[i] <- n2 - i done; (* loop over w1 *) for i = n1 - 1 downto 0 do invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] } let oldt = ref t[n2] in t[n2] <- t[n2] + 1; (* loop over w2 *) for j = n2 - 1 downto 0 do invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] } invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] } invariant { min_suffix w1 w2 (i+1) (j+1) !oldt } let temp = !oldt in oldt := t[j]; if w1[i] = w2[j] then t[j] <- temp else t[j] <- (min t[j] t[j + 1]) + 1 done done; t[0] end