1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strings_test
6
7import (
8	"bytes"
9	"fmt"
10	. "strings"
11	"testing"
12)
13
14var htmlEscaper = NewReplacer(
15	"&", "&",
16	"<", "&lt;",
17	">", "&gt;",
18	`"`, "&quot;",
19	"'", "&apos;",
20)
21
22var htmlUnescaper = NewReplacer(
23	"&amp;", "&",
24	"&lt;", "<",
25	"&gt;", ">",
26	"&quot;", `"`,
27	"&apos;", "'",
28)
29
30// The http package's old HTML escaping function.
31func oldHTMLEscape(s string) string {
32	s = Replace(s, "&", "&amp;", -1)
33	s = Replace(s, "<", "&lt;", -1)
34	s = Replace(s, ">", "&gt;", -1)
35	s = Replace(s, `"`, "&quot;", -1)
36	s = Replace(s, "'", "&apos;", -1)
37	return s
38}
39
40var capitalLetters = NewReplacer("a", "A", "b", "B")
41
42// TestReplacer tests the replacer implementations.
43func TestReplacer(t *testing.T) {
44	type testCase struct {
45		r       *Replacer
46		in, out string
47	}
48	var testCases []testCase
49
50	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51	str := func(b byte) string {
52		return string([]byte{b})
53	}
54	var s []string
55
56	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
57	s = nil
58	for i := 0; i < 256; i++ {
59		s = append(s, str(byte(i)), str(byte(i+1)))
60	}
61	inc := NewReplacer(s...)
62
63	// Test cases with 1-byte old strings, 1-byte new strings.
64	testCases = append(testCases,
65		testCase{capitalLetters, "brad", "BrAd"},
66		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
67		testCase{capitalLetters, "", ""},
68
69		testCase{inc, "brad", "csbe"},
70		testCase{inc, "\x00\xff", "\x01\x00"},
71		testCase{inc, "", ""},
72
73		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
74	)
75
76	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
77	s = nil
78	for i := 0; i < 256; i++ {
79		n := i + 1 - 'a'
80		if n < 1 {
81			n = 1
82		}
83		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
84	}
85	repeat := NewReplacer(s...)
86
87	// Test cases with 1-byte old strings, variable length new strings.
88	testCases = append(testCases,
89		testCase{htmlEscaper, "No changes", "No changes"},
90		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
91		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
92		testCase{htmlEscaper, "", ""},
93
94		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95		testCase{repeat, "abba", "abbbba"},
96		testCase{repeat, "", ""},
97
98		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
99	)
100
101	// The remaining test cases have variable length old strings.
102
103	testCases = append(testCases,
104		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
105		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
106		testCase{htmlUnescaper, "", ""},
107
108		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
109
110		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
111
112		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
113	)
114
115	// gen1 has multiple old strings of variable length. There is no
116	// overall non-empty common prefix, but some pairwise common prefixes.
117	gen1 := NewReplacer(
118		"aaa", "3[aaa]",
119		"aa", "2[aa]",
120		"a", "1[a]",
121		"i", "i",
122		"longerst", "most long",
123		"longer", "medium",
124		"long", "short",
125		"xx", "xx",
126		"x", "X",
127		"X", "Y",
128		"Y", "Z",
129	)
130	testCases = append(testCases,
131		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133		testCase{gen1, "xxxxx", "xxxxX"},
134		testCase{gen1, "XiX", "YiY"},
135		testCase{gen1, "", ""},
136	)
137
138	// gen2 has multiple old strings with no pairwise common prefix.
139	gen2 := NewReplacer(
140		"roses", "red",
141		"violets", "blue",
142		"sugar", "sweet",
143	)
144	testCases = append(testCases,
145		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146		testCase{gen2, "", ""},
147	)
148
149	// gen3 has multiple old strings with an overall common prefix.
150	gen3 := NewReplacer(
151		"abracadabra", "poof",
152		"abracadabrakazam", "splat",
153		"abraham", "lincoln",
154		"abrasion", "scrape",
155		"abraham", "isaac",
156	)
157	testCases = append(testCases,
158		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159		testCase{gen3, "abrasion abracad", "scrape abracad"},
160		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161		testCase{gen3, "", ""},
162	)
163
164	// foo{1,2,3,4} have multiple old strings with an overall common prefix
165	// and 1- or 2- byte extensions from the common prefix.
166	foo1 := NewReplacer(
167		"foo1", "A",
168		"foo2", "B",
169		"foo3", "C",
170	)
171	foo2 := NewReplacer(
172		"foo1", "A",
173		"foo2", "B",
174		"foo31", "C",
175		"foo32", "D",
176	)
177	foo3 := NewReplacer(
178		"foo11", "A",
179		"foo12", "B",
180		"foo31", "C",
181		"foo32", "D",
182	)
183	foo4 := NewReplacer(
184		"foo12", "B",
185		"foo32", "D",
186	)
187	testCases = append(testCases,
188		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189		testCase{foo1, "", ""},
190
191		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192		testCase{foo2, "", ""},
193
194		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195		testCase{foo3, "", ""},
196
197		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198		testCase{foo4, "", ""},
199	)
200
201	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202	allBytes := make([]byte, 256)
203	for i := range allBytes {
204		allBytes[i] = byte(i)
205	}
206	allString := string(allBytes)
207	genAll := NewReplacer(
208		allString, "[all]",
209		"\xff", "[ff]",
210		"\x00", "[00]",
211	)
212	testCases = append(testCases,
213		testCase{genAll, allString, "[all]"},
214		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215		testCase{genAll, "", ""},
216	)
217
218	// Test cases with empty old strings.
219
220	blankToX1 := NewReplacer("", "X")
221	blankToX2 := NewReplacer("", "X", "", "")
222	blankHighPriority := NewReplacer("", "X", "o", "O")
223	blankLowPriority := NewReplacer("o", "O", "", "X")
224	blankNoOp1 := NewReplacer("", "")
225	blankNoOp2 := NewReplacer("", "", "", "A")
226	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
227	testCases = append(testCases,
228		testCase{blankToX1, "foo", "XfXoXoX"},
229		testCase{blankToX1, "", "X"},
230
231		testCase{blankToX2, "foo", "XfXoXoX"},
232		testCase{blankToX2, "", "X"},
233
234		testCase{blankHighPriority, "oo", "XOXOX"},
235		testCase{blankHighPriority, "ii", "XiXiX"},
236		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238		testCase{blankHighPriority, "", "X"},
239
240		testCase{blankLowPriority, "oo", "OOX"},
241		testCase{blankLowPriority, "ii", "XiXiX"},
242		testCase{blankLowPriority, "oiio", "OXiXiOX"},
243		testCase{blankLowPriority, "iooi", "XiOOXiX"},
244		testCase{blankLowPriority, "", "X"},
245
246		testCase{blankNoOp1, "foo", "foo"},
247		testCase{blankNoOp1, "", ""},
248
249		testCase{blankNoOp2, "foo", "foo"},
250		testCase{blankNoOp2, "", ""},
251
252		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254		testCase{blankFoo, "", "X"},
255	)
256
257	// single string replacer
258
259	abcMatcher := NewReplacer("abc", "[match]")
260
261	testCases = append(testCases,
262		testCase{abcMatcher, "", ""},
263		testCase{abcMatcher, "ab", "ab"},
264		testCase{abcMatcher, "abc", "[match]"},
265		testCase{abcMatcher, "abcd", "[match]d"},
266		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
267	)
268
269	// Issue 6659 cases (more single string replacer)
270
271	noHello := NewReplacer("Hello", "")
272	testCases = append(testCases,
273		testCase{noHello, "Hello", ""},
274		testCase{noHello, "Hellox", "x"},
275		testCase{noHello, "xHello", "x"},
276		testCase{noHello, "xHellox", "xx"},
277	)
278
279	// No-arg test cases.
280
281	nop := NewReplacer()
282	testCases = append(testCases,
283		testCase{nop, "abc", "abc"},
284		testCase{nop, "", ""},
285	)
286
287	// Run the test cases.
288
289	for i, tc := range testCases {
290		if s := tc.r.Replace(tc.in); s != tc.out {
291			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
292		}
293		var buf bytes.Buffer
294		n, err := tc.r.WriteString(&buf, tc.in)
295		if err != nil {
296			t.Errorf("%d. WriteString: %v", i, err)
297			continue
298		}
299		got := buf.String()
300		if got != tc.out {
301			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
302			continue
303		}
304		if n != len(tc.out) {
305			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
306				i, tc.in, n, len(tc.out), tc.out)
307		}
308	}
309}
310
311var algorithmTestCases = []struct {
312	r    *Replacer
313	want string
314}{
315	{capitalLetters, "*strings.byteReplacer"},
316	{htmlEscaper, "*strings.byteStringReplacer"},
317	{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
318	{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
319	{NewReplacer("", "X"), "*strings.genericReplacer"},
320	{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
321}
322
323// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
324func TestPickAlgorithm(t *testing.T) {
325	for i, tc := range algorithmTestCases {
326		got := fmt.Sprintf("%T", tc.r.Replacer())
327		if got != tc.want {
328			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
329		}
330	}
331}
332
333type errWriter struct{}
334
335func (errWriter) Write(p []byte) (n int, err error) {
336	return 0, fmt.Errorf("unwritable")
337}
338
339// TestWriteStringError tests that WriteString returns an error
340// received from the underlying io.Writer.
341func TestWriteStringError(t *testing.T) {
342	for i, tc := range algorithmTestCases {
343		n, err := tc.r.WriteString(errWriter{}, "abc")
344		if n != 0 || err == nil || err.Error() != "unwritable" {
345			t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
346		}
347	}
348}
349
350// TestGenericTrieBuilding verifies the structure of the generated trie. There
351// is one node per line, and the key ending with the current line is in the
352// trie if it ends with a "+".
353func TestGenericTrieBuilding(t *testing.T) {
354	testCases := []struct{ in, out string }{
355		{"abc;abdef;abdefgh;xx;xy;z", `-
356			a-
357			.b-
358			..c+
359			..d-
360			...ef+
361			.....gh+
362			x-
363			.x+
364			.y+
365			z+
366			`},
367		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
368			a-
369			.bra-
370			....c-
371			.....adabra+
372			...........kazam+
373			....h-
374			.....am+
375			....s-
376			.....ion+
377			`},
378		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
379			X+
380			Y+
381			a+
382			.a+
383			..a+
384			i+
385			l-
386			.ong+
387			....er+
388			......st+
389			x+
390			.x+
391			`},
392		{"foo;;foo;foo1", `+
393			f-
394			.oo+
395			...1+
396			`},
397	}
398
399	for _, tc := range testCases {
400		keys := Split(tc.in, ";")
401		args := make([]string, len(keys)*2)
402		for i, key := range keys {
403			args[i*2] = key
404		}
405
406		got := NewReplacer(args...).PrintTrie()
407		// Remove tabs from tc.out
408		wantbuf := make([]byte, 0, len(tc.out))
409		for i := 0; i < len(tc.out); i++ {
410			if tc.out[i] != '\t' {
411				wantbuf = append(wantbuf, tc.out[i])
412			}
413		}
414		want := string(wantbuf)
415
416		if got != want {
417			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
418		}
419	}
420}
421
422func BenchmarkGenericNoMatch(b *testing.B) {
423	str := Repeat("A", 100) + Repeat("B", 100)
424	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
425	for i := 0; i < b.N; i++ {
426		generic.Replace(str)
427	}
428}
429
430func BenchmarkGenericMatch1(b *testing.B) {
431	str := Repeat("a", 100) + Repeat("b", 100)
432	generic := NewReplacer("a", "A", "b", "B", "12", "123")
433	for i := 0; i < b.N; i++ {
434		generic.Replace(str)
435	}
436}
437
438func BenchmarkGenericMatch2(b *testing.B) {
439	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
440	for i := 0; i < b.N; i++ {
441		htmlUnescaper.Replace(str)
442	}
443}
444
445func benchmarkSingleString(b *testing.B, pattern, text string) {
446	r := NewReplacer(pattern, "[match]")
447	b.SetBytes(int64(len(text)))
448	b.ResetTimer()
449	for i := 0; i < b.N; i++ {
450		r.Replace(text)
451	}
452}
453
454func BenchmarkSingleMaxSkipping(b *testing.B) {
455	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
456}
457
458func BenchmarkSingleLongSuffixFail(b *testing.B) {
459	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
460}
461
462func BenchmarkSingleMatch(b *testing.B) {
463	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
464}
465
466func BenchmarkByteByteNoMatch(b *testing.B) {
467	str := Repeat("A", 100) + Repeat("B", 100)
468	for i := 0; i < b.N; i++ {
469		capitalLetters.Replace(str)
470	}
471}
472
473func BenchmarkByteByteMatch(b *testing.B) {
474	str := Repeat("a", 100) + Repeat("b", 100)
475	for i := 0; i < b.N; i++ {
476		capitalLetters.Replace(str)
477	}
478}
479
480func BenchmarkByteStringMatch(b *testing.B) {
481	str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
482	for i := 0; i < b.N; i++ {
483		htmlEscaper.Replace(str)
484	}
485}
486
487func BenchmarkHTMLEscapeNew(b *testing.B) {
488	str := "I <3 to escape HTML & other text too."
489	for i := 0; i < b.N; i++ {
490		htmlEscaper.Replace(str)
491	}
492}
493
494func BenchmarkHTMLEscapeOld(b *testing.B) {
495	str := "I <3 to escape HTML & other text too."
496	for i := 0; i < b.N; i++ {
497		oldHTMLEscape(str)
498	}
499}
500
501func BenchmarkByteStringReplacerWriteString(b *testing.B) {
502	str := Repeat("I <3 to escape HTML & other text too.", 100)
503	buf := new(bytes.Buffer)
504	for i := 0; i < b.N; i++ {
505		htmlEscaper.WriteString(buf, str)
506		buf.Reset()
507	}
508}
509
510func BenchmarkByteReplacerWriteString(b *testing.B) {
511	str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
512	buf := new(bytes.Buffer)
513	for i := 0; i < b.N; i++ {
514		capitalLetters.WriteString(buf, str)
515		buf.Reset()
516	}
517}
518
519// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
520func BenchmarkByteByteReplaces(b *testing.B) {
521	str := Repeat("a", 100) + Repeat("b", 100)
522	for i := 0; i < b.N; i++ {
523		Replace(Replace(str, "a", "A", -1), "b", "B", -1)
524	}
525}
526
527// BenchmarkByteByteMap compares byteByteImpl against Map.
528func BenchmarkByteByteMap(b *testing.B) {
529	str := Repeat("a", 100) + Repeat("b", 100)
530	fn := func(r rune) rune {
531		switch r {
532		case 'a':
533			return 'A'
534		case 'b':
535			return 'B'
536		}
537		return r
538	}
539	for i := 0; i < b.N; i++ {
540		Map(fn, str)
541	}
542}
543
544var mapdata = []struct{ name, data string }{
545	{"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
546	{"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
547}
548
549func BenchmarkMap(b *testing.B) {
550	mapidentity := func(r rune) rune {
551		return r
552	}
553
554	b.Run("identity", func(b *testing.B) {
555		for _, md := range mapdata {
556			b.Run(md.name, func(b *testing.B) {
557				for i := 0; i < b.N; i++ {
558					Map(mapidentity, md.data)
559				}
560			})
561		}
562	})
563
564	mapchange := func(r rune) rune {
565		if 'a' <= r && r <= 'z' {
566			return r + 'A' - 'a'
567		}
568		if 'α' <= r && r <= 'ω' {
569			return r + 'Α' - 'α'
570		}
571		return r
572	}
573
574	b.Run("change", func(b *testing.B) {
575		for _, md := range mapdata {
576			b.Run(md.name, func(b *testing.B) {
577				for i := 0; i < b.N; i++ {
578					Map(mapchange, md.data)
579				}
580			})
581		}
582	})
583}
584