1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package levenshtein
16
17import (
18	"testing"
19)
20
21func TestLevenshtein(t *testing.T) {
22
23	tests := []struct {
24		desc     string
25		query    string
26		distance int
27		seq      []byte
28		isMatch  bool
29		canMatch bool
30	}{
31		{
32			desc:     "cat/0 - c a t",
33			query:    "cat",
34			distance: 0,
35			seq:      []byte{'c', 'a', 't'},
36			isMatch:  true,
37			canMatch: true,
38		},
39		{
40			desc:     "cat/1 - c a",
41			query:    "cat",
42			distance: 1,
43			seq:      []byte{'c', 'a'},
44			isMatch:  true,
45			canMatch: true,
46		},
47		{
48			desc:     "cat/1 - c a t s",
49			query:    "cat",
50			distance: 1,
51			seq:      []byte{'c', 'a', 't', 's'},
52			isMatch:  true,
53			canMatch: true,
54		},
55		{
56			desc:     "cat/0 - c a",
57			query:    "cat",
58			distance: 0,
59			seq:      []byte{'c', 'a'},
60			isMatch:  false,
61			canMatch: true,
62		},
63		{
64			desc:     "cat/0 - c a t s",
65			query:    "cat",
66			distance: 0,
67			seq:      []byte{'c', 'a', 't', 's'},
68			isMatch:  false,
69			canMatch: false,
70		},
71		// this section contains cases where the sequence
72		// of bytes encountered contains utf-8 encoded
73		// multi-byte characters, which should count as 1
74		// for the purposes of the levenshtein edit distance
75		{
76			desc:     "cat/0 - c 0xc3 0xa1 t (cát)",
77			query:    "cat",
78			distance: 0,
79			seq:      []byte{'c', 0xc3, 0xa1, 't'},
80			isMatch:  false,
81			canMatch: false,
82		},
83		{
84			desc:     "cat/1 - c 0xc3 0xa1 t (cát)",
85			query:    "cat",
86			distance: 1,
87			seq:      []byte{'c', 0xc3, 0xa1, 't'},
88			isMatch:  true,
89			canMatch: true,
90		},
91		{
92			desc:     "cat/1 - c 0xc3 0xa1 t (cáts)",
93			query:    "cat",
94			distance: 1,
95			seq:      []byte{'c', 0xc3, 0xa1, 't', 's'},
96			isMatch:  false,
97			canMatch: false,
98		},
99		{
100			desc:     "cat/1 - 0xc3 0xa1 (á)",
101			query:    "cat",
102			distance: 1,
103			seq:      []byte{0xc3, 0xa1},
104			isMatch:  false,
105			canMatch: true,
106		},
107		{
108			desc:     "cat/1 - c 0xc3 0xa1 t (ácat)",
109			query:    "cat",
110			distance: 1,
111			seq:      []byte{0xc3, 0xa1, 'c', 'a', 't'},
112			isMatch:  true,
113			canMatch: true,
114		},
115		// this section has utf-8 encoded multi-byte characters
116		// in the query, which should still just count as 1
117		// for the purposes of the levenshtein edit distance
118		{
119			desc:     "cát/0 - c a t (cat)",
120			query:    "cát",
121			distance: 0,
122			seq:      []byte{'c', 'a', 't'},
123			isMatch:  false,
124			canMatch: false,
125		},
126		{
127			desc:     "cát/1 - c 0xc3 0xa1 (cá)",
128			query:    "cát",
129			distance: 1,
130			seq:      []byte{'c', 0xc3, 0xa1},
131			isMatch:  true,
132			canMatch: true,
133		},
134		{
135			desc:     "cát/1 - c 0xc3 0xa1 s (cás)",
136			query:    "cát",
137			distance: 1,
138			seq:      []byte{'c', 0xc3, 0xa1, 's'},
139			isMatch:  true,
140			canMatch: true,
141		},
142		{
143			desc:     "cát/1 - c 0xc3 0xa1 t a (cáta)",
144			query:    "cát",
145			distance: 1,
146			seq:      []byte{'c', 0xc3, 0xa1, 't', 'a'},
147			isMatch:  true,
148			canMatch: true,
149		},
150		{
151			desc:     "cát/1 - d 0xc3 0xa1 (dát)",
152			query:    "cát",
153			distance: 1,
154			seq:      []byte{'d', 0xc3, 0xa1, 't'},
155			isMatch:  true,
156			canMatch: true,
157		},
158		{
159			desc:     "cát/1 - c a t (cat)",
160			query:    "cát",
161			distance: 1,
162			seq:      []byte{'c', 'a', 't'},
163			isMatch:  true,
164			canMatch: true,
165		},
166
167		{
168			desc:     "cát/1 - c a t (cats)",
169			query:    "cát",
170			distance: 1,
171			seq:      []byte{'c', 'a', 't', 's'},
172			isMatch:  false,
173			canMatch: false,
174		},
175		{
176			desc:     "cát/1 - 0xc3, 0xa (á)",
177			query:    "cát",
178			distance: 1,
179			seq:      []byte{0xc3, 0xa1},
180			isMatch:  false,
181			canMatch: true,
182		},
183		{
184			desc:     "cát/1 - a c 0xc3 0xa1 t (acát)",
185			query:    "cát",
186			distance: 1,
187			seq:      []byte{'a', 'c', 0xc3, 0xa1, 't'},
188			isMatch:  true,
189			canMatch: true,
190		},
191	}
192
193	for _, test := range tests {
194		t.Run(test.desc, func(t *testing.T) {
195			l, err := New(test.query, test.distance)
196			if err != nil {
197				t.Fatal(err)
198			}
199
200			s := l.Start()
201			for _, b := range test.seq {
202				s = l.Accept(s, b)
203			}
204
205			isMatch := l.IsMatch(s)
206			if isMatch != test.isMatch {
207				t.Errorf("expected isMatch %t, got %t", test.isMatch, isMatch)
208			}
209
210			canMatch := l.CanMatch(s)
211			if canMatch != test.canMatch {
212				t.Errorf("expectec canMatch %t, got %t", test.canMatch, canMatch)
213			}
214		})
215	}
216}
217
218func BenchmarkNewMarty1(b *testing.B) {
219	for i := 0; i < b.N; i++ {
220		New("marty", 1)
221	}
222}
223
224func BenchmarkNewMarty2(b *testing.B) {
225	for i := 0; i < b.N; i++ {
226		New("marty", 2)
227	}
228}
229