1/* 2Copyright 2015 The Perkeep Authors 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package bytereplacer 18 19import ( 20 "bytes" 21 "strings" 22 "testing" 23) 24 25var htmlEscaper = New( 26 "&", "&", 27 "<", "<", 28 ">", ">", 29 `"`, """, 30 "'", "'", 31) 32 33var htmlUnescaper = New( 34 "&", "&", 35 "<", "<", 36 ">", ">", 37 """, `"`, 38 "'", "'", 39) 40 41var capitalLetters = New("a", "A", "b", "B") 42 43func TestReplacer(t *testing.T) { 44 type testCase struct { 45 r *Replacer 46 in, out string 47 } 48 var testCases []testCase 49 50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. 51 str := func(b byte) string { 52 return string([]byte{b}) 53 } 54 var s []string 55 56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". 57 s = nil 58 for i := 0; i < 256; i++ { 59 s = append(s, str(byte(i)), str(byte(i+1))) 60 } 61 inc := New(s...) 62 63 // Test cases with 1-byte old strings, 1-byte new strings. 64 testCases = append(testCases, 65 testCase{capitalLetters, "brad", "BrAd"}, 66 testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)}, 67 testCase{capitalLetters, "", ""}, 68 69 testCase{inc, "brad", "csbe"}, 70 testCase{inc, "\x00\xff", "\x01\x00"}, 71 testCase{inc, "", ""}, 72 73 testCase{New("a", "1", "a", "2"), "brad", "br1d"}, 74 ) 75 76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... 77 s = nil 78 for i := 0; i < 256; i++ { 79 n := i + 1 - 'a' 80 if n < 1 { 81 n = 1 82 } 83 s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n)) 84 } 85 repeat := New(s...) 86 87 // Test cases with 1-byte old strings, variable length new strings. 88 testCases = append(testCases, 89 testCase{htmlEscaper, "No changes", "No changes"}, 90 testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, 91 testCase{htmlEscaper, "&&&", "&&&"}, 92 testCase{htmlEscaper, "", ""}, 93 94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, 95 testCase{repeat, "abba", "abbbba"}, 96 testCase{repeat, "", ""}, 97 98 testCase{New("a", "11", "a", "22"), "brad", "br11d"}, 99 ) 100 101 // The remaining test cases have variable length old strings. 102 103 testCases = append(testCases, 104 testCase{htmlUnescaper, "&amp;", "&"}, 105 testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, 106 testCase{htmlUnescaper, "", ""}, 107 108 testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, 109 110 testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, 111 112 testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, 113 ) 114 115 // gen1 has multiple old strings of variable length. There is no 116 // overall non-empty common prefix, but some pairwise common prefixes. 117 gen1 := New( 118 "aaa", "3[aaa]", 119 "aa", "2[aa]", 120 "a", "1[a]", 121 "i", "i", 122 "longerst", "most long", 123 "longer", "medium", 124 "long", "short", 125 "xx", "xx", 126 "x", "X", 127 "X", "Y", 128 "Y", "Z", 129 ) 130 testCases = append(testCases, 131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, 132 testCase{gen1, "long, longerst, longer", "short, most long, medium"}, 133 testCase{gen1, "xxxxx", "xxxxX"}, 134 testCase{gen1, "XiX", "YiY"}, 135 testCase{gen1, "", ""}, 136 ) 137 138 // gen2 has multiple old strings with no pairwise common prefix. 139 gen2 := New( 140 "roses", "red", 141 "violets", "blue", 142 "sugar", "sweet", 143 ) 144 testCases = append(testCases, 145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, 146 testCase{gen2, "", ""}, 147 ) 148 149 // gen3 has multiple old strings with an overall common prefix. 150 gen3 := New( 151 "abracadabra", "poof", 152 "abracadabrakazam", "splat", 153 "abraham", "lincoln", 154 "abrasion", "scrape", 155 "abraham", "isaac", 156 ) 157 testCases = append(testCases, 158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, 159 testCase{gen3, "abrasion abracad", "scrape abracad"}, 160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, 161 testCase{gen3, "", ""}, 162 ) 163 164 // foo{1,2,3,4} have multiple old strings with an overall common prefix 165 // and 1- or 2- byte extensions from the common prefix. 166 foo1 := New( 167 "foo1", "A", 168 "foo2", "B", 169 "foo3", "C", 170 ) 171 foo2 := New( 172 "foo1", "A", 173 "foo2", "B", 174 "foo31", "C", 175 "foo32", "D", 176 ) 177 foo3 := New( 178 "foo11", "A", 179 "foo12", "B", 180 "foo31", "C", 181 "foo32", "D", 182 ) 183 foo4 := New( 184 "foo12", "B", 185 "foo32", "D", 186 ) 187 testCases = append(testCases, 188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, 189 testCase{foo1, "", ""}, 190 191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, 192 testCase{foo2, "", ""}, 193 194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, 195 testCase{foo3, "", ""}, 196 197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, 198 testCase{foo4, "", ""}, 199 ) 200 201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. 202 allBytes := make([]byte, 256) 203 for i := range allBytes { 204 allBytes[i] = byte(i) 205 } 206 allString := string(allBytes) 207 genAll := New( 208 allString, "[all]", 209 "\xff", "[ff]", 210 "\x00", "[00]", 211 ) 212 testCases = append(testCases, 213 testCase{genAll, allString, "[all]"}, 214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, 215 testCase{genAll, "", ""}, 216 ) 217 218 // Test cases with empty old strings. 219 220 blankToX1 := New("", "X") 221 blankToX2 := New("", "X", "", "") 222 blankHighPriority := New("", "X", "o", "O") 223 blankLowPriority := New("o", "O", "", "X") 224 blankNoOp1 := New("", "") 225 blankNoOp2 := New("", "", "", "A") 226 blankFoo := New("", "X", "foobar", "R", "foobaz", "Z") 227 testCases = append(testCases, 228 testCase{blankToX1, "foo", "XfXoXoX"}, 229 testCase{blankToX1, "", "X"}, 230 231 testCase{blankToX2, "foo", "XfXoXoX"}, 232 testCase{blankToX2, "", "X"}, 233 234 testCase{blankHighPriority, "oo", "XOXOX"}, 235 testCase{blankHighPriority, "ii", "XiXiX"}, 236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, 237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, 238 testCase{blankHighPriority, "", "X"}, 239 240 testCase{blankLowPriority, "oo", "OOX"}, 241 testCase{blankLowPriority, "ii", "XiXiX"}, 242 testCase{blankLowPriority, "oiio", "OXiXiOX"}, 243 testCase{blankLowPriority, "iooi", "XiOOXiX"}, 244 testCase{blankLowPriority, "", "X"}, 245 246 testCase{blankNoOp1, "foo", "foo"}, 247 testCase{blankNoOp1, "", ""}, 248 249 testCase{blankNoOp2, "foo", "foo"}, 250 testCase{blankNoOp2, "", ""}, 251 252 testCase{blankFoo, "foobarfoobaz", "XRXZX"}, 253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, 254 testCase{blankFoo, "", "X"}, 255 ) 256 257 // single string replacer 258 259 abcMatcher := New("abc", "[match]") 260 261 testCases = append(testCases, 262 testCase{abcMatcher, "", ""}, 263 testCase{abcMatcher, "ab", "ab"}, 264 testCase{abcMatcher, "abc", "[match]"}, 265 testCase{abcMatcher, "abcd", "[match]d"}, 266 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, 267 ) 268 269 // Issue 6659 cases (more single string replacer) 270 271 noHello := New("Hello", "") 272 testCases = append(testCases, 273 testCase{noHello, "Hello", ""}, 274 testCase{noHello, "Hellox", "x"}, 275 testCase{noHello, "xHello", "x"}, 276 testCase{noHello, "xHellox", "xx"}, 277 ) 278 279 // No-arg test cases. 280 281 nop := New() 282 testCases = append(testCases, 283 testCase{nop, "abc", "abc"}, 284 testCase{nop, "", ""}, 285 ) 286 287 // Run the test cases. 288 289 for i, tc := range testCases { 290 { 291 // Replace with len(in) == cap(in) 292 in := make([]byte, len(tc.in)) 293 copy(in, tc.in) 294 if s := string(tc.r.Replace(in)); s != tc.out { 295 t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out) 296 } 297 } 298 299 { 300 // Replace with len(in) < cap(in) 301 in := make([]byte, len(tc.in), len(tc.in)*2) 302 copy(in, tc.in) 303 if s := string(tc.r.Replace(in)); s != tc.out { 304 t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out) 305 } 306 } 307 } 308} 309 310func BenchmarkGenericNoMatch(b *testing.B) { 311 str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100)) 312 generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic 313 for i := 0; i < b.N; i++ { 314 generic.Replace(str) 315 } 316} 317 318func BenchmarkGenericMatch1(b *testing.B) { 319 str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100)) 320 generic := New("a", "A", "b", "B", "12", "123") 321 for i := 0; i < b.N; i++ { 322 generic.Replace(str) 323 } 324} 325 326func BenchmarkGenericMatch2(b *testing.B) { 327 str := bytes.Repeat([]byte("It's <b>HTML</b>!"), 100) 328 for i := 0; i < b.N; i++ { 329 htmlUnescaper.Replace(str) 330 } 331} 332 333func benchmarkSingleString(b *testing.B, pattern, text string) { 334 r := New(pattern, "[match]") 335 buf := make([]byte, len(text), len(text)*7) 336 b.SetBytes(int64(len(text))) 337 b.ResetTimer() 338 for i := 0; i < b.N; i++ { 339 copy(buf, text) 340 r.Replace(buf) 341 } 342} 343 344func BenchmarkSingleMaxSkipping(b *testing.B) { 345 benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000)) 346} 347 348func BenchmarkSingleLongSuffixFail(b *testing.B) { 349 benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002)) 350} 351 352func BenchmarkSingleMatch(b *testing.B) { 353 benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000)) 354} 355 356func benchmarkReplacer(b *testing.B, r *Replacer, str string) { 357 buf := make([]byte, len(str)) 358 b.ResetTimer() 359 for i := 0; i < b.N; i++ { 360 copy(buf, str) 361 r.Replace(buf) 362 } 363} 364 365func BenchmarkByteByteNoMatch(b *testing.B) { 366 benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100)) 367} 368 369func BenchmarkByteByteMatch(b *testing.B) { 370 benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100)) 371} 372 373func BenchmarkByteStringMatch(b *testing.B) { 374 benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">") 375} 376 377func BenchmarkHTMLEscapeNew(b *testing.B) { 378 benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.") 379} 380 381func BenchmarkHTMLEscapeOld(b *testing.B) { 382 str := "I <3 to escape HTML & other text too." 383 buf := make([]byte, len(str)) 384 for i := 0; i < b.N; i++ { 385 copy(buf, str) 386 oldHTMLEscape(buf) 387 } 388} 389 390// The http package's old HTML escaping function in bytes form. 391func oldHTMLEscape(s []byte) []byte { 392 s = bytes.Replace(s, []byte("&"), []byte("&"), -1) 393 s = bytes.Replace(s, []byte("<"), []byte("<"), -1) 394 s = bytes.Replace(s, []byte(">"), []byte(">"), -1) 395 s = bytes.Replace(s, []byte(`"`), []byte("""), -1) 396 s = bytes.Replace(s, []byte("'"), []byte("'"), -1) 397 return s 398} 399 400// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. 401func BenchmarkByteByteReplaces(b *testing.B) { 402 str := strings.Repeat("a", 100) + strings.Repeat("b", 100) 403 for i := 0; i < b.N; i++ { 404 bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1) 405 } 406} 407 408// BenchmarkByteByteMap compares byteByteImpl against Map. 409func BenchmarkByteByteMap(b *testing.B) { 410 str := strings.Repeat("a", 100) + strings.Repeat("b", 100) 411 fn := func(r rune) rune { 412 switch r { 413 case 'a': 414 return 'A' 415 case 'b': 416 return 'B' 417 } 418 return r 419 } 420 for i := 0; i < b.N; i++ { 421 bytes.Map(fn, []byte(str)) 422 } 423} 424