(* Correctness of a program computing the minimal distance between two words (code by Claude Marché). This program computes a variant of the Levenshtein distance. Given two strings [w1] and [w2] of respective lengths [n1] and [n2], it computes the minimal numbers of insertions and deletions to perform in one of the strings to get the other one. (The traditional edit distance also includes substitutions.) The nice point about this code is to work in linear space, in an array of min(n1,n2) integers. Time complexity is O(n1 * n2), as usual. *) module Distance use import int.Int use import int.MinMax use import list.List use import module ref.Ref use import module array.Array (* Parameters. Input of the program is composed of two arrays of characters, [w1] of size [n1] and [w2] of size [n2]. *) function n1 : int function n2 : int type a type word = list a val w1 : array a val w2 : array a (* Global variables of the program. The program uses an auxiliary array [t] of integers of size [n2+1] and three auxiliary integer variables [i], [j] and [old]. *) val t : array int val i : ref int val j : ref int val o : ref int (* Auxiliary definitions for the program and its specification. *) inductive dist word word int = | dist_eps : dist Nil Nil 0 | dist_add_left : forall w1 w2: word, n:int. dist w1 w2 n -> forall a:a. dist (Cons a w1) w2 (n + 1) | dist_add_right : forall w1 w2: word, n:int. dist w1 w2 n -> forall a:a. dist w1 (Cons a w2) (n + 1) | dist_context : forall w1 w2: word, n:int. dist w1 w2 n -> forall a:a. dist (Cons a w1) (Cons a w2) n predicate min_dist (w1 w2:word) (n:int) = dist w1 w2 n /\ forall m:int. dist w1 w2 m -> n <= m function suffix (array a) int : word axiom suffix_def_1: forall m: array a. suffix m (length m) = Nil axiom suffix_def_2: forall m: array a, i: int. 0 <= i < length m -> suffix m i = Cons m[i] (suffix m (i+1)) predicate min_suffix (w1 w2: array a) (i j n: int) = min_dist (suffix w1 i) (suffix w2 j) n function word_of_array (m: array a) : word = suffix m 0 (* The code. *) let distance () = { length w1 = n1 /\ length w2 = n2 /\ length t = n2+1 } begin (* initialization of t *) for i = 0 to n2 do invariant { length t = n2+1 /\ forall j:int. 0 <= j < i -> t[j] = n2-j } t[i] <- n2 - i done; (* loop over w1 *) for i = n1-1 downto 0 do invariant { length t = n2+1 /\ forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] } o := t[n2]; t[n2] <- t[n2] + 1; (* loop over w2 *) for j = n2-1 downto 0 do invariant { length t = n2+1 /\ (forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k]) /\ (forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k]) /\ min_suffix w1 w2 (i+1) (j+1) !o } begin let temp = !o in o := t[j]; if w1[i] = w2[j] then t[j] <- temp else t[j] <- (min t[j] t[j+1]) + 1 end done done; t[0] end { min_dist (word_of_array w1) (word_of_array w2) result } end (* Local Variables: compile-command: "unset LANG; make -C ../.. examples/programs/distance.gui" End: *)