1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package strings_test 6 7import ( 8 "bytes" 9 "fmt" 10 . "strings" 11 "testing" 12) 13 14var htmlEscaper = NewReplacer( 15 "&", "&", 16 "<", "<", 17 ">", ">", 18 `"`, """, 19 "'", "'", 20) 21 22var htmlUnescaper = NewReplacer( 23 "&", "&", 24 "<", "<", 25 ">", ">", 26 """, `"`, 27 "'", "'", 28) 29 30// The http package's old HTML escaping function. 31func oldHTMLEscape(s string) string { 32 s = Replace(s, "&", "&", -1) 33 s = Replace(s, "<", "<", -1) 34 s = Replace(s, ">", ">", -1) 35 s = Replace(s, `"`, """, -1) 36 s = Replace(s, "'", "'", -1) 37 return s 38} 39 40var capitalLetters = NewReplacer("a", "A", "b", "B") 41 42// TestReplacer tests the replacer implementations. 43func TestReplacer(t *testing.T) { 44 type testCase struct { 45 r *Replacer 46 in, out string 47 } 48 var testCases []testCase 49 50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. 51 str := func(b byte) string { 52 return string([]byte{b}) 53 } 54 var s []string 55 56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". 57 s = nil 58 for i := 0; i < 256; i++ { 59 s = append(s, str(byte(i)), str(byte(i+1))) 60 } 61 inc := NewReplacer(s...) 62 63 // Test cases with 1-byte old strings, 1-byte new strings. 64 testCases = append(testCases, 65 testCase{capitalLetters, "brad", "BrAd"}, 66 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, 67 testCase{capitalLetters, "", ""}, 68 69 testCase{inc, "brad", "csbe"}, 70 testCase{inc, "\x00\xff", "\x01\x00"}, 71 testCase{inc, "", ""}, 72 73 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, 74 ) 75 76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... 77 s = nil 78 for i := 0; i < 256; i++ { 79 n := i + 1 - 'a' 80 if n < 1 { 81 n = 1 82 } 83 s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) 84 } 85 repeat := NewReplacer(s...) 86 87 // Test cases with 1-byte old strings, variable length new strings. 88 testCases = append(testCases, 89 testCase{htmlEscaper, "No changes", "No changes"}, 90 testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, 91 testCase{htmlEscaper, "&&&", "&&&"}, 92 testCase{htmlEscaper, "", ""}, 93 94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, 95 testCase{repeat, "abba", "abbbba"}, 96 testCase{repeat, "", ""}, 97 98 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, 99 ) 100 101 // The remaining test cases have variable length old strings. 102 103 testCases = append(testCases, 104 testCase{htmlUnescaper, "&amp;", "&"}, 105 testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, 106 testCase{htmlUnescaper, "", ""}, 107 108 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, 109 110 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, 111 112 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, 113 ) 114 115 // gen1 has multiple old strings of variable length. There is no 116 // overall non-empty common prefix, but some pairwise common prefixes. 117 gen1 := NewReplacer( 118 "aaa", "3[aaa]", 119 "aa", "2[aa]", 120 "a", "1[a]", 121 "i", "i", 122 "longerst", "most long", 123 "longer", "medium", 124 "long", "short", 125 "xx", "xx", 126 "x", "X", 127 "X", "Y", 128 "Y", "Z", 129 ) 130 testCases = append(testCases, 131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, 132 testCase{gen1, "long, longerst, longer", "short, most long, medium"}, 133 testCase{gen1, "xxxxx", "xxxxX"}, 134 testCase{gen1, "XiX", "YiY"}, 135 testCase{gen1, "", ""}, 136 ) 137 138 // gen2 has multiple old strings with no pairwise common prefix. 139 gen2 := NewReplacer( 140 "roses", "red", 141 "violets", "blue", 142 "sugar", "sweet", 143 ) 144 testCases = append(testCases, 145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, 146 testCase{gen2, "", ""}, 147 ) 148 149 // gen3 has multiple old strings with an overall common prefix. 150 gen3 := NewReplacer( 151 "abracadabra", "poof", 152 "abracadabrakazam", "splat", 153 "abraham", "lincoln", 154 "abrasion", "scrape", 155 "abraham", "isaac", 156 ) 157 testCases = append(testCases, 158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, 159 testCase{gen3, "abrasion abracad", "scrape abracad"}, 160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, 161 testCase{gen3, "", ""}, 162 ) 163 164 // foo{1,2,3,4} have multiple old strings with an overall common prefix 165 // and 1- or 2- byte extensions from the common prefix. 166 foo1 := NewReplacer( 167 "foo1", "A", 168 "foo2", "B", 169 "foo3", "C", 170 ) 171 foo2 := NewReplacer( 172 "foo1", "A", 173 "foo2", "B", 174 "foo31", "C", 175 "foo32", "D", 176 ) 177 foo3 := NewReplacer( 178 "foo11", "A", 179 "foo12", "B", 180 "foo31", "C", 181 "foo32", "D", 182 ) 183 foo4 := NewReplacer( 184 "foo12", "B", 185 "foo32", "D", 186 ) 187 testCases = append(testCases, 188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, 189 testCase{foo1, "", ""}, 190 191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, 192 testCase{foo2, "", ""}, 193 194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, 195 testCase{foo3, "", ""}, 196 197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, 198 testCase{foo4, "", ""}, 199 ) 200 201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. 202 allBytes := make([]byte, 256) 203 for i := range allBytes { 204 allBytes[i] = byte(i) 205 } 206 allString := string(allBytes) 207 genAll := NewReplacer( 208 allString, "[all]", 209 "\xff", "[ff]", 210 "\x00", "[00]", 211 ) 212 testCases = append(testCases, 213 testCase{genAll, allString, "[all]"}, 214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, 215 testCase{genAll, "", ""}, 216 ) 217 218 // Test cases with empty old strings. 219 220 blankToX1 := NewReplacer("", "X") 221 blankToX2 := NewReplacer("", "X", "", "") 222 blankHighPriority := NewReplacer("", "X", "o", "O") 223 blankLowPriority := NewReplacer("o", "O", "", "X") 224 blankNoOp1 := NewReplacer("", "") 225 blankNoOp2 := NewReplacer("", "", "", "A") 226 blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") 227 testCases = append(testCases, 228 testCase{blankToX1, "foo", "XfXoXoX"}, 229 testCase{blankToX1, "", "X"}, 230 231 testCase{blankToX2, "foo", "XfXoXoX"}, 232 testCase{blankToX2, "", "X"}, 233 234 testCase{blankHighPriority, "oo", "XOXOX"}, 235 testCase{blankHighPriority, "ii", "XiXiX"}, 236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, 237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, 238 testCase{blankHighPriority, "", "X"}, 239 240 testCase{blankLowPriority, "oo", "OOX"}, 241 testCase{blankLowPriority, "ii", "XiXiX"}, 242 testCase{blankLowPriority, "oiio", "OXiXiOX"}, 243 testCase{blankLowPriority, "iooi", "XiOOXiX"}, 244 testCase{blankLowPriority, "", "X"}, 245 246 testCase{blankNoOp1, "foo", "foo"}, 247 testCase{blankNoOp1, "", ""}, 248 249 testCase{blankNoOp2, "foo", "foo"}, 250 testCase{blankNoOp2, "", ""}, 251 252 testCase{blankFoo, "foobarfoobaz", "XRXZX"}, 253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, 254 testCase{blankFoo, "", "X"}, 255 ) 256 257 // single string replacer 258 259 abcMatcher := NewReplacer("abc", "[match]") 260 261 testCases = append(testCases, 262 testCase{abcMatcher, "", ""}, 263 testCase{abcMatcher, "ab", "ab"}, 264 testCase{abcMatcher, "abcd", "[match]d"}, 265 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, 266 ) 267 268 // No-arg test cases. 269 270 nop := NewReplacer() 271 testCases = append(testCases, 272 testCase{nop, "abc", "abc"}, 273 testCase{nop, "", ""}, 274 ) 275 276 // Run the test cases. 277 278 for i, tc := range testCases { 279 if s := tc.r.Replace(tc.in); s != tc.out { 280 t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) 281 } 282 var buf bytes.Buffer 283 n, err := tc.r.WriteString(&buf, tc.in) 284 if err != nil { 285 t.Errorf("%d. WriteString: %v", i, err) 286 continue 287 } 288 got := buf.String() 289 if got != tc.out { 290 t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) 291 continue 292 } 293 if n != len(tc.out) { 294 t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", 295 i, tc.in, n, len(tc.out), tc.out) 296 } 297 } 298} 299 300// TestPickAlgorithm tests that NewReplacer picks the correct algorithm. 301func TestPickAlgorithm(t *testing.T) { 302 testCases := []struct { 303 r *Replacer 304 want string 305 }{ 306 {capitalLetters, "*strings.byteReplacer"}, 307 {htmlEscaper, "*strings.byteStringReplacer"}, 308 {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, 309 {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, 310 {NewReplacer("", "X"), "*strings.genericReplacer"}, 311 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, 312 } 313 for i, tc := range testCases { 314 got := fmt.Sprintf("%T", tc.r.Replacer()) 315 if got != tc.want { 316 t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) 317 } 318 } 319} 320 321// TestGenericTrieBuilding verifies the structure of the generated trie. There 322// is one node per line, and the key ending with the current line is in the 323// trie if it ends with a "+". 324func TestGenericTrieBuilding(t *testing.T) { 325 testCases := []struct{ in, out string }{ 326 {"abc;abdef;abdefgh;xx;xy;z", `- 327 a- 328 .b- 329 ..c+ 330 ..d- 331 ...ef+ 332 .....gh+ 333 x- 334 .x+ 335 .y+ 336 z+ 337 `}, 338 {"abracadabra;abracadabrakazam;abraham;abrasion", `- 339 a- 340 .bra- 341 ....c- 342 .....adabra+ 343 ...........kazam+ 344 ....h- 345 .....am+ 346 ....s- 347 .....ion+ 348 `}, 349 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- 350 X+ 351 Y+ 352 a+ 353 .a+ 354 ..a+ 355 i+ 356 l- 357 .ong+ 358 ....er+ 359 ......st+ 360 x+ 361 .x+ 362 `}, 363 {"foo;;foo;foo1", `+ 364 f- 365 .oo+ 366 ...1+ 367 `}, 368 } 369 370 for _, tc := range testCases { 371 keys := Split(tc.in, ";") 372 args := make([]string, len(keys)*2) 373 for i, key := range keys { 374 args[i*2] = key 375 } 376 377 got := NewReplacer(args...).PrintTrie() 378 // Remove tabs from tc.out 379 wantbuf := make([]byte, 0, len(tc.out)) 380 for i := 0; i < len(tc.out); i++ { 381 if tc.out[i] != '\t' { 382 wantbuf = append(wantbuf, tc.out[i]) 383 } 384 } 385 want := string(wantbuf) 386 387 if got != want { 388 t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) 389 } 390 } 391} 392 393func BenchmarkGenericNoMatch(b *testing.B) { 394 str := Repeat("A", 100) + Repeat("B", 100) 395 generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic 396 for i := 0; i < b.N; i++ { 397 generic.Replace(str) 398 } 399} 400 401func BenchmarkGenericMatch1(b *testing.B) { 402 str := Repeat("a", 100) + Repeat("b", 100) 403 generic := NewReplacer("a", "A", "b", "B", "12", "123") 404 for i := 0; i < b.N; i++ { 405 generic.Replace(str) 406 } 407} 408 409func BenchmarkGenericMatch2(b *testing.B) { 410 str := Repeat("It's <b>HTML</b>!", 100) 411 for i := 0; i < b.N; i++ { 412 htmlUnescaper.Replace(str) 413 } 414} 415 416func benchmarkSingleString(b *testing.B, pattern, text string) { 417 r := NewReplacer(pattern, "[match]") 418 b.SetBytes(int64(len(text))) 419 b.ResetTimer() 420 for i := 0; i < b.N; i++ { 421 r.Replace(text) 422 } 423} 424 425func BenchmarkSingleMaxSkipping(b *testing.B) { 426 benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) 427} 428 429func BenchmarkSingleLongSuffixFail(b *testing.B) { 430 benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) 431} 432 433func BenchmarkSingleMatch(b *testing.B) { 434 benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) 435} 436 437func BenchmarkByteByteNoMatch(b *testing.B) { 438 str := Repeat("A", 100) + Repeat("B", 100) 439 for i := 0; i < b.N; i++ { 440 capitalLetters.Replace(str) 441 } 442} 443 444func BenchmarkByteByteMatch(b *testing.B) { 445 str := Repeat("a", 100) + Repeat("b", 100) 446 for i := 0; i < b.N; i++ { 447 capitalLetters.Replace(str) 448 } 449} 450 451func BenchmarkByteStringMatch(b *testing.B) { 452 str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" 453 for i := 0; i < b.N; i++ { 454 htmlEscaper.Replace(str) 455 } 456} 457 458func BenchmarkHTMLEscapeNew(b *testing.B) { 459 str := "I <3 to escape HTML & other text too." 460 for i := 0; i < b.N; i++ { 461 htmlEscaper.Replace(str) 462 } 463} 464 465func BenchmarkHTMLEscapeOld(b *testing.B) { 466 str := "I <3 to escape HTML & other text too." 467 for i := 0; i < b.N; i++ { 468 oldHTMLEscape(str) 469 } 470} 471 472// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. 473func BenchmarkByteByteReplaces(b *testing.B) { 474 str := Repeat("a", 100) + Repeat("b", 100) 475 for i := 0; i < b.N; i++ { 476 Replace(Replace(str, "a", "A", -1), "b", "B", -1) 477 } 478} 479 480// BenchmarkByteByteMap compares byteByteImpl against Map. 481func BenchmarkByteByteMap(b *testing.B) { 482 str := Repeat("a", 100) + Repeat("b", 100) 483 fn := func(r rune) rune { 484 switch r { 485 case 'a': 486 return 'A' 487 case 'b': 488 return 'B' 489 } 490 return r 491 } 492 for i := 0; i < b.N; i++ { 493 Map(fn, str) 494 } 495} 496