edit_distance.mlw 4.71 KB
 Jean-Christophe Filliatre committed Jul 15, 2011 1 2 3 4 5 6 7 8 9 10 `````` (* Correctness of a program computing the minimal distance between two words (code by Claude Marché). This program computes a variant of the Levenshtein distance. Given two strings [w1] and [w2] of respective lengths [n1] and [n2], it computes the minimal numbers of insertions and deletions to perform in one of the strings to get the other one. (The traditional edit distance also includes substitutions.) `````` Jean-Christophe Filliatre committed Aug 17, 2011 11 `````` The nice point about this algorithm, due to Claude Marché, is to `````` Jean-Christophe Filliatre committed Jul 15, 2011 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 `````` work in linear space, in an array of min(n1,n2) integers. Time complexity is O(n1 * n2), as usual. *) theory Word use export int.Int use export int.MinMax use export list.List use export list.Length type char type word = list char inductive dist word word int = | dist_eps : dist Nil Nil 0 | dist_add_left : forall w1 w2: word, n: int. dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1) | dist_add_right : forall w1 w2: word, n: int. dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1) | dist_context : forall w1 w2:word, n: int. dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n predicate min_dist (w1 w2: word) (n: int) = dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m `````` Jean-Christophe Filliatre committed Jul 26, 2011 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 `````` (* intermediate lemmas *) use export list.Append function last_char (a: char) (u: word) : char = match u with | Nil -> a | Cons c u' -> last_char c u' end function but_last (a: char) (u: word) : word = match u with | Nil -> Nil | Cons c u' -> Cons a (but_last c u') end lemma first_last_explicit: forall u: word, a: char. but_last a u ++ Cons (last_char a u) Nil = Cons a u lemma first_last: forall a: char, u: word. exists v: word, b: char. v ++ Cons b Nil = Cons a u /\ length v = length u lemma key_lemma_right: forall w1 w'2: word, m: int, a: char. dist w1 w'2 m -> forall w2: word. w'2 = Cons a w2 -> exists u1 v1: word, k: int. w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1 lemma dist_symetry: forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n lemma key_lemma_left: forall w1 w2: word, m: int, a: char. dist (Cons a w1) w2 m -> exists u2 v2: word, k: int. w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1 lemma dist_concat_left: forall u v w: word, n: int. dist v w n -> dist (u ++ v) w (length u + n) lemma dist_concat_right: forall u v w: word, n: int. dist v w n -> dist v (u ++ w) (length u + n) `````` Jean-Christophe Filliatre committed Aug 17, 2011 87 `````` `````` Jean-Christophe Filliatre committed Aug 17, 2011 88 `````` (* main lemmas *) `````` Jean-Christophe Filliatre committed Jul 26, 2011 89 `````` `````` Jean-Christophe Filliatre committed Jul 15, 2011 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 `````` lemma min_dist_equal: forall w1 w2: word, a: char, n: int. min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n lemma min_dist_diff: forall w1 w2: word, a b: char, m p: int. a <> b -> min_dist (Cons a w1) w2 p -> min_dist w1 (Cons b w2) m -> min_dist (Cons a w1) (Cons b w2) (min m p + 1) lemma min_dist_eps: forall w: word, a: char, n: int. min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1) lemma min_dist_eps_length: forall w: word. min_dist Nil w (length w) end module EditDistance use import Word use import list.Length as L `````` Andrei Paskevich committed Oct 13, 2012 114 115 `````` use import ref.Ref use import array.Array `````` Jean-Christophe Filliatre committed Jul 15, 2011 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 `````` (* Auxiliary definitions for the program and its specification. *) function suffix (a: array char) (i: int) : word axiom suffix_nil: forall a: array char. suffix a a.length = Nil axiom suffix_cons: forall a: array char, i: int. 0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1)) lemma suffix_length: forall a: array char, i: int. 0 <= i <= a.length -> L.length (suffix a i) = (a.length - i) predicate min_suffix (a1 a2: array char) (i j n: int) = min_dist (suffix a1 i) (suffix a2 j) n function word_of (a: array char) : word = suffix a 0 (* The program. *) `````` Andrei Paskevich committed Oct 13, 2012 139 140 141 `````` let distance (w1: array char) (w2: array char) ensures { min_dist (word_of w1) (word_of w2) result } = let n1 = length w1 in `````` Jean-Christophe Filliatre committed Jul 15, 2011 142 143 144 145 146 147 148 149 150 151 152 153 154 155 `````` let n2 = length w2 in let t = Array.make (n2+1) 0 in (* initialization of t *) for i = 0 to n2 do invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j } t[i] <- n2 - i done; (* loop over w1 *) for i = n1 - 1 downto 0 do invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] } let oldt = ref t[n2] in t[n2] <- t[n2] + 1; (* loop over w2 *) for j = n2 - 1 downto 0 do `````` Andrei Paskevich committed Oct 13, 2012 156 157 158 `````` invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] } invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] } invariant { min_suffix w1 w2 (i+1) (j+1) !oldt } `````` Jean-Christophe Filliatre committed Jul 15, 2011 159 160 161 162 163 164 165 166 167 168 169 `````` let temp = !oldt in oldt := t[j]; if w1[i] = w2[j] then t[j] <- temp else t[j] <- (min t[j] t[j + 1]) + 1 done done; t[0] end``````