1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package levenshtein 16 17import ( 18 "testing" 19) 20 21func TestLevenshtein(t *testing.T) { 22 23 tests := []struct { 24 desc string 25 query string 26 distance int 27 seq []byte 28 isMatch bool 29 canMatch bool 30 }{ 31 { 32 desc: "cat/0 - c a t", 33 query: "cat", 34 distance: 0, 35 seq: []byte{'c', 'a', 't'}, 36 isMatch: true, 37 canMatch: true, 38 }, 39 { 40 desc: "cat/1 - c a", 41 query: "cat", 42 distance: 1, 43 seq: []byte{'c', 'a'}, 44 isMatch: true, 45 canMatch: true, 46 }, 47 { 48 desc: "cat/1 - c a t s", 49 query: "cat", 50 distance: 1, 51 seq: []byte{'c', 'a', 't', 's'}, 52 isMatch: true, 53 canMatch: true, 54 }, 55 { 56 desc: "cat/0 - c a", 57 query: "cat", 58 distance: 0, 59 seq: []byte{'c', 'a'}, 60 isMatch: false, 61 canMatch: true, 62 }, 63 { 64 desc: "cat/0 - c a t s", 65 query: "cat", 66 distance: 0, 67 seq: []byte{'c', 'a', 't', 's'}, 68 isMatch: false, 69 canMatch: false, 70 }, 71 // this section contains cases where the sequence 72 // of bytes encountered contains utf-8 encoded 73 // multi-byte characters, which should count as 1 74 // for the purposes of the levenshtein edit distance 75 { 76 desc: "cat/0 - c 0xc3 0xa1 t (cát)", 77 query: "cat", 78 distance: 0, 79 seq: []byte{'c', 0xc3, 0xa1, 't'}, 80 isMatch: false, 81 canMatch: false, 82 }, 83 { 84 desc: "cat/1 - c 0xc3 0xa1 t (cát)", 85 query: "cat", 86 distance: 1, 87 seq: []byte{'c', 0xc3, 0xa1, 't'}, 88 isMatch: true, 89 canMatch: true, 90 }, 91 { 92 desc: "cat/1 - c 0xc3 0xa1 t (cáts)", 93 query: "cat", 94 distance: 1, 95 seq: []byte{'c', 0xc3, 0xa1, 't', 's'}, 96 isMatch: false, 97 canMatch: false, 98 }, 99 { 100 desc: "cat/1 - 0xc3 0xa1 (á)", 101 query: "cat", 102 distance: 1, 103 seq: []byte{0xc3, 0xa1}, 104 isMatch: false, 105 canMatch: true, 106 }, 107 { 108 desc: "cat/1 - c 0xc3 0xa1 t (ácat)", 109 query: "cat", 110 distance: 1, 111 seq: []byte{0xc3, 0xa1, 'c', 'a', 't'}, 112 isMatch: true, 113 canMatch: true, 114 }, 115 // this section has utf-8 encoded multi-byte characters 116 // in the query, which should still just count as 1 117 // for the purposes of the levenshtein edit distance 118 { 119 desc: "cát/0 - c a t (cat)", 120 query: "cát", 121 distance: 0, 122 seq: []byte{'c', 'a', 't'}, 123 isMatch: false, 124 canMatch: false, 125 }, 126 { 127 desc: "cát/1 - c 0xc3 0xa1 (cá)", 128 query: "cát", 129 distance: 1, 130 seq: []byte{'c', 0xc3, 0xa1}, 131 isMatch: true, 132 canMatch: true, 133 }, 134 { 135 desc: "cát/1 - c 0xc3 0xa1 s (cás)", 136 query: "cát", 137 distance: 1, 138 seq: []byte{'c', 0xc3, 0xa1, 's'}, 139 isMatch: true, 140 canMatch: true, 141 }, 142 { 143 desc: "cát/1 - c 0xc3 0xa1 t a (cáta)", 144 query: "cát", 145 distance: 1, 146 seq: []byte{'c', 0xc3, 0xa1, 't', 'a'}, 147 isMatch: true, 148 canMatch: true, 149 }, 150 { 151 desc: "cát/1 - d 0xc3 0xa1 (dát)", 152 query: "cát", 153 distance: 1, 154 seq: []byte{'d', 0xc3, 0xa1, 't'}, 155 isMatch: true, 156 canMatch: true, 157 }, 158 { 159 desc: "cát/1 - c a t (cat)", 160 query: "cát", 161 distance: 1, 162 seq: []byte{'c', 'a', 't'}, 163 isMatch: true, 164 canMatch: true, 165 }, 166 167 { 168 desc: "cát/1 - c a t (cats)", 169 query: "cát", 170 distance: 1, 171 seq: []byte{'c', 'a', 't', 's'}, 172 isMatch: false, 173 canMatch: false, 174 }, 175 { 176 desc: "cát/1 - 0xc3, 0xa (á)", 177 query: "cát", 178 distance: 1, 179 seq: []byte{0xc3, 0xa1}, 180 isMatch: false, 181 canMatch: true, 182 }, 183 { 184 desc: "cát/1 - a c 0xc3 0xa1 t (acát)", 185 query: "cát", 186 distance: 1, 187 seq: []byte{'a', 'c', 0xc3, 0xa1, 't'}, 188 isMatch: true, 189 canMatch: true, 190 }, 191 } 192 193 for _, test := range tests { 194 t.Run(test.desc, func(t *testing.T) { 195 l, err := New(test.query, test.distance) 196 if err != nil { 197 t.Fatal(err) 198 } 199 200 s := l.Start() 201 for _, b := range test.seq { 202 s = l.Accept(s, b) 203 } 204 205 isMatch := l.IsMatch(s) 206 if isMatch != test.isMatch { 207 t.Errorf("expected isMatch %t, got %t", test.isMatch, isMatch) 208 } 209 210 canMatch := l.CanMatch(s) 211 if canMatch != test.canMatch { 212 t.Errorf("expectec canMatch %t, got %t", test.canMatch, canMatch) 213 } 214 }) 215 } 216} 217 218func BenchmarkNewMarty1(b *testing.B) { 219 for i := 0; i < b.N; i++ { 220 New("marty", 1) 221 } 222} 223 224func BenchmarkNewMarty2(b *testing.B) { 225 for i := 0; i < b.N; i++ { 226 New("marty", 2) 227 } 228} 229