1/*
2Copyright 2015 The Perkeep Authors
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8     http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package bytereplacer
18
19import (
20	"bytes"
21	"strings"
22	"testing"
23)
24
25var htmlEscaper = New(
26	"&", "&",
27	"<", "&lt;",
28	">", "&gt;",
29	`"`, "&quot;",
30	"'", "&apos;",
31)
32
33var htmlUnescaper = New(
34	"&amp;", "&",
35	"&lt;", "<",
36	"&gt;", ">",
37	"&quot;", `"`,
38	"&apos;", "'",
39)
40
41var capitalLetters = New("a", "A", "b", "B")
42
43func TestReplacer(t *testing.T) {
44	type testCase struct {
45		r       *Replacer
46		in, out string
47	}
48	var testCases []testCase
49
50	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51	str := func(b byte) string {
52		return string([]byte{b})
53	}
54	var s []string
55
56	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
57	s = nil
58	for i := 0; i < 256; i++ {
59		s = append(s, str(byte(i)), str(byte(i+1)))
60	}
61	inc := New(s...)
62
63	// Test cases with 1-byte old strings, 1-byte new strings.
64	testCases = append(testCases,
65		testCase{capitalLetters, "brad", "BrAd"},
66		testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
67		testCase{capitalLetters, "", ""},
68
69		testCase{inc, "brad", "csbe"},
70		testCase{inc, "\x00\xff", "\x01\x00"},
71		testCase{inc, "", ""},
72
73		testCase{New("a", "1", "a", "2"), "brad", "br1d"},
74	)
75
76	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
77	s = nil
78	for i := 0; i < 256; i++ {
79		n := i + 1 - 'a'
80		if n < 1 {
81			n = 1
82		}
83		s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
84	}
85	repeat := New(s...)
86
87	// Test cases with 1-byte old strings, variable length new strings.
88	testCases = append(testCases,
89		testCase{htmlEscaper, "No changes", "No changes"},
90		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
91		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
92		testCase{htmlEscaper, "", ""},
93
94		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95		testCase{repeat, "abba", "abbbba"},
96		testCase{repeat, "", ""},
97
98		testCase{New("a", "11", "a", "22"), "brad", "br11d"},
99	)
100
101	// The remaining test cases have variable length old strings.
102
103	testCases = append(testCases,
104		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
105		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
106		testCase{htmlUnescaper, "", ""},
107
108		testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
109
110		testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
111
112		testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
113	)
114
115	// gen1 has multiple old strings of variable length. There is no
116	// overall non-empty common prefix, but some pairwise common prefixes.
117	gen1 := New(
118		"aaa", "3[aaa]",
119		"aa", "2[aa]",
120		"a", "1[a]",
121		"i", "i",
122		"longerst", "most long",
123		"longer", "medium",
124		"long", "short",
125		"xx", "xx",
126		"x", "X",
127		"X", "Y",
128		"Y", "Z",
129	)
130	testCases = append(testCases,
131		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133		testCase{gen1, "xxxxx", "xxxxX"},
134		testCase{gen1, "XiX", "YiY"},
135		testCase{gen1, "", ""},
136	)
137
138	// gen2 has multiple old strings with no pairwise common prefix.
139	gen2 := New(
140		"roses", "red",
141		"violets", "blue",
142		"sugar", "sweet",
143	)
144	testCases = append(testCases,
145		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146		testCase{gen2, "", ""},
147	)
148
149	// gen3 has multiple old strings with an overall common prefix.
150	gen3 := New(
151		"abracadabra", "poof",
152		"abracadabrakazam", "splat",
153		"abraham", "lincoln",
154		"abrasion", "scrape",
155		"abraham", "isaac",
156	)
157	testCases = append(testCases,
158		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159		testCase{gen3, "abrasion abracad", "scrape abracad"},
160		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161		testCase{gen3, "", ""},
162	)
163
164	// foo{1,2,3,4} have multiple old strings with an overall common prefix
165	// and 1- or 2- byte extensions from the common prefix.
166	foo1 := New(
167		"foo1", "A",
168		"foo2", "B",
169		"foo3", "C",
170	)
171	foo2 := New(
172		"foo1", "A",
173		"foo2", "B",
174		"foo31", "C",
175		"foo32", "D",
176	)
177	foo3 := New(
178		"foo11", "A",
179		"foo12", "B",
180		"foo31", "C",
181		"foo32", "D",
182	)
183	foo4 := New(
184		"foo12", "B",
185		"foo32", "D",
186	)
187	testCases = append(testCases,
188		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189		testCase{foo1, "", ""},
190
191		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192		testCase{foo2, "", ""},
193
194		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195		testCase{foo3, "", ""},
196
197		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198		testCase{foo4, "", ""},
199	)
200
201	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202	allBytes := make([]byte, 256)
203	for i := range allBytes {
204		allBytes[i] = byte(i)
205	}
206	allString := string(allBytes)
207	genAll := New(
208		allString, "[all]",
209		"\xff", "[ff]",
210		"\x00", "[00]",
211	)
212	testCases = append(testCases,
213		testCase{genAll, allString, "[all]"},
214		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215		testCase{genAll, "", ""},
216	)
217
218	// Test cases with empty old strings.
219
220	blankToX1 := New("", "X")
221	blankToX2 := New("", "X", "", "")
222	blankHighPriority := New("", "X", "o", "O")
223	blankLowPriority := New("o", "O", "", "X")
224	blankNoOp1 := New("", "")
225	blankNoOp2 := New("", "", "", "A")
226	blankFoo := New("", "X", "foobar", "R", "foobaz", "Z")
227	testCases = append(testCases,
228		testCase{blankToX1, "foo", "XfXoXoX"},
229		testCase{blankToX1, "", "X"},
230
231		testCase{blankToX2, "foo", "XfXoXoX"},
232		testCase{blankToX2, "", "X"},
233
234		testCase{blankHighPriority, "oo", "XOXOX"},
235		testCase{blankHighPriority, "ii", "XiXiX"},
236		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238		testCase{blankHighPriority, "", "X"},
239
240		testCase{blankLowPriority, "oo", "OOX"},
241		testCase{blankLowPriority, "ii", "XiXiX"},
242		testCase{blankLowPriority, "oiio", "OXiXiOX"},
243		testCase{blankLowPriority, "iooi", "XiOOXiX"},
244		testCase{blankLowPriority, "", "X"},
245
246		testCase{blankNoOp1, "foo", "foo"},
247		testCase{blankNoOp1, "", ""},
248
249		testCase{blankNoOp2, "foo", "foo"},
250		testCase{blankNoOp2, "", ""},
251
252		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254		testCase{blankFoo, "", "X"},
255	)
256
257	// single string replacer
258
259	abcMatcher := New("abc", "[match]")
260
261	testCases = append(testCases,
262		testCase{abcMatcher, "", ""},
263		testCase{abcMatcher, "ab", "ab"},
264		testCase{abcMatcher, "abc", "[match]"},
265		testCase{abcMatcher, "abcd", "[match]d"},
266		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
267	)
268
269	// Issue 6659 cases (more single string replacer)
270
271	noHello := New("Hello", "")
272	testCases = append(testCases,
273		testCase{noHello, "Hello", ""},
274		testCase{noHello, "Hellox", "x"},
275		testCase{noHello, "xHello", "x"},
276		testCase{noHello, "xHellox", "xx"},
277	)
278
279	// No-arg test cases.
280
281	nop := New()
282	testCases = append(testCases,
283		testCase{nop, "abc", "abc"},
284		testCase{nop, "", ""},
285	)
286
287	// Run the test cases.
288
289	for i, tc := range testCases {
290		{
291			// Replace with len(in) == cap(in)
292			in := make([]byte, len(tc.in))
293			copy(in, tc.in)
294			if s := string(tc.r.Replace(in)); s != tc.out {
295				t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out)
296			}
297		}
298
299		{
300			// Replace with len(in) < cap(in)
301			in := make([]byte, len(tc.in), len(tc.in)*2)
302			copy(in, tc.in)
303			if s := string(tc.r.Replace(in)); s != tc.out {
304				t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out)
305			}
306		}
307	}
308}
309
310func BenchmarkGenericNoMatch(b *testing.B) {
311	str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100))
312	generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic
313	for i := 0; i < b.N; i++ {
314		generic.Replace(str)
315	}
316}
317
318func BenchmarkGenericMatch1(b *testing.B) {
319	str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100))
320	generic := New("a", "A", "b", "B", "12", "123")
321	for i := 0; i < b.N; i++ {
322		generic.Replace(str)
323	}
324}
325
326func BenchmarkGenericMatch2(b *testing.B) {
327	str := bytes.Repeat([]byte("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!"), 100)
328	for i := 0; i < b.N; i++ {
329		htmlUnescaper.Replace(str)
330	}
331}
332
333func benchmarkSingleString(b *testing.B, pattern, text string) {
334	r := New(pattern, "[match]")
335	buf := make([]byte, len(text), len(text)*7)
336	b.SetBytes(int64(len(text)))
337	b.ResetTimer()
338	for i := 0; i < b.N; i++ {
339		copy(buf, text)
340		r.Replace(buf)
341	}
342}
343
344func BenchmarkSingleMaxSkipping(b *testing.B) {
345	benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
346}
347
348func BenchmarkSingleLongSuffixFail(b *testing.B) {
349	benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
350}
351
352func BenchmarkSingleMatch(b *testing.B) {
353	benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
354}
355
356func benchmarkReplacer(b *testing.B, r *Replacer, str string) {
357	buf := make([]byte, len(str))
358	b.ResetTimer()
359	for i := 0; i < b.N; i++ {
360		copy(buf, str)
361		r.Replace(buf)
362	}
363}
364
365func BenchmarkByteByteNoMatch(b *testing.B) {
366	benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100))
367}
368
369func BenchmarkByteByteMatch(b *testing.B) {
370	benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100))
371}
372
373func BenchmarkByteStringMatch(b *testing.B) {
374	benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">")
375}
376
377func BenchmarkHTMLEscapeNew(b *testing.B) {
378	benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.")
379}
380
381func BenchmarkHTMLEscapeOld(b *testing.B) {
382	str := "I <3 to escape HTML & other text too."
383	buf := make([]byte, len(str))
384	for i := 0; i < b.N; i++ {
385		copy(buf, str)
386		oldHTMLEscape(buf)
387	}
388}
389
390// The http package's old HTML escaping function in bytes form.
391func oldHTMLEscape(s []byte) []byte {
392	s = bytes.Replace(s, []byte("&"), []byte("&amp;"), -1)
393	s = bytes.Replace(s, []byte("<"), []byte("&lt;"), -1)
394	s = bytes.Replace(s, []byte(">"), []byte("&gt;"), -1)
395	s = bytes.Replace(s, []byte(`"`), []byte("&quot;"), -1)
396	s = bytes.Replace(s, []byte("'"), []byte("&apos;"), -1)
397	return s
398}
399
400// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
401func BenchmarkByteByteReplaces(b *testing.B) {
402	str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
403	for i := 0; i < b.N; i++ {
404		bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1)
405	}
406}
407
408// BenchmarkByteByteMap compares byteByteImpl against Map.
409func BenchmarkByteByteMap(b *testing.B) {
410	str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
411	fn := func(r rune) rune {
412		switch r {
413		case 'a':
414			return 'A'
415		case 'b':
416			return 'B'
417		}
418		return r
419	}
420	for i := 0; i < b.N; i++ {
421		bytes.Map(fn, []byte(str))
422	}
423}
424