1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package regexp 6 7import ( 8 "reflect" 9 "regexp/syntax" 10 "strings" 11 "testing" 12 "unicode/utf8" 13) 14 15var goodRe = []string{ 16 ``, 17 `.`, 18 `^.$`, 19 `a`, 20 `a*`, 21 `a+`, 22 `a?`, 23 `a|b`, 24 `a*|b*`, 25 `(a*|b)(c*|d)`, 26 `[a-z]`, 27 `[a-abc-c\-\]\[]`, 28 `[a-z]+`, 29 `[abc]`, 30 `[^1234]`, 31 `[^\n]`, 32 `\!\\`, 33} 34 35type stringError struct { 36 re string 37 err string 38} 39 40var badRe = []stringError{ 41 {`*`, "missing argument to repetition operator: `*`"}, 42 {`+`, "missing argument to repetition operator: `+`"}, 43 {`?`, "missing argument to repetition operator: `?`"}, 44 {`(abc`, "missing closing ): `(abc`"}, 45 {`abc)`, "unexpected ): `abc)`"}, 46 {`x[a-z`, "missing closing ]: `[a-z`"}, 47 {`[z-a]`, "invalid character class range: `z-a`"}, 48 {`abc\`, "trailing backslash at end of expression"}, 49 {`a**`, "invalid nested repetition operator: `**`"}, 50 {`a*+`, "invalid nested repetition operator: `*+`"}, 51 {`\x`, "invalid escape sequence: `\\x`"}, 52} 53 54func compileTest(t *testing.T, expr string, error string) *Regexp { 55 re, err := Compile(expr) 56 if error == "" && err != nil { 57 t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) 58 } 59 if error != "" && err == nil { 60 t.Error("compiling `", expr, "`; missing error") 61 } else if error != "" && !strings.Contains(err.Error(), error) { 62 t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error) 63 } 64 return re 65} 66 67func TestGoodCompile(t *testing.T) { 68 for i := 0; i < len(goodRe); i++ { 69 compileTest(t, goodRe[i], "") 70 } 71} 72 73func TestBadCompile(t *testing.T) { 74 for i := 0; i < len(badRe); i++ { 75 compileTest(t, badRe[i].re, badRe[i].err) 76 } 77} 78 79func matchTest(t *testing.T, test *FindTest) { 80 re := compileTest(t, test.pat, "") 81 if re == nil { 82 return 83 } 84 m := re.MatchString(test.text) 85 if m != (len(test.matches) > 0) { 86 t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0) 87 } 88 // now try bytes 89 m = re.Match([]byte(test.text)) 90 if m != (len(test.matches) > 0) { 91 t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) 92 } 93} 94 95func TestMatch(t *testing.T) { 96 for _, test := range findTests { 97 matchTest(t, &test) 98 } 99} 100 101func matchFunctionTest(t *testing.T, test *FindTest) { 102 m, err := MatchString(test.pat, test.text) 103 if err == nil { 104 return 105 } 106 if m != (len(test.matches) > 0) { 107 t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) 108 } 109} 110 111func TestMatchFunction(t *testing.T) { 112 for _, test := range findTests { 113 matchFunctionTest(t, &test) 114 } 115} 116 117func copyMatchTest(t *testing.T, test *FindTest) { 118 re := compileTest(t, test.pat, "") 119 if re == nil { 120 return 121 } 122 m1 := re.MatchString(test.text) 123 m2 := re.Copy().MatchString(test.text) 124 if m1 != m2 { 125 t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t", 126 test, m1, m2, len(test.matches) > 0) 127 } 128} 129 130func TestCopyMatch(t *testing.T) { 131 for _, test := range findTests { 132 copyMatchTest(t, &test) 133 } 134} 135 136type ReplaceTest struct { 137 pattern, replacement, input, output string 138} 139 140var replaceTests = []ReplaceTest{ 141 // Test empty input and/or replacement, with pattern that matches the empty string. 142 {"", "", "", ""}, 143 {"", "x", "", "x"}, 144 {"", "", "abc", "abc"}, 145 {"", "x", "abc", "xaxbxcx"}, 146 147 // Test empty input and/or replacement, with pattern that does not match the empty string. 148 {"b", "", "", ""}, 149 {"b", "x", "", ""}, 150 {"b", "", "abc", "ac"}, 151 {"b", "x", "abc", "axc"}, 152 {"y", "", "", ""}, 153 {"y", "x", "", ""}, 154 {"y", "", "abc", "abc"}, 155 {"y", "x", "abc", "abc"}, 156 157 // Multibyte characters -- verify that we don't try to match in the middle 158 // of a character. 159 {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, 160 {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, 161 162 // Start and end of a string. 163 {"^[a-c]*", "x", "abcdabc", "xdabc"}, 164 {"[a-c]*$", "x", "abcdabc", "abcdx"}, 165 {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, 166 {"^[a-c]*", "x", "abc", "x"}, 167 {"[a-c]*$", "x", "abc", "x"}, 168 {"^[a-c]*$", "x", "abc", "x"}, 169 {"^[a-c]*", "x", "dabce", "xdabce"}, 170 {"[a-c]*$", "x", "dabce", "dabcex"}, 171 {"^[a-c]*$", "x", "dabce", "dabce"}, 172 {"^[a-c]*", "x", "", "x"}, 173 {"[a-c]*$", "x", "", "x"}, 174 {"^[a-c]*$", "x", "", "x"}, 175 176 {"^[a-c]+", "x", "abcdabc", "xdabc"}, 177 {"[a-c]+$", "x", "abcdabc", "abcdx"}, 178 {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, 179 {"^[a-c]+", "x", "abc", "x"}, 180 {"[a-c]+$", "x", "abc", "x"}, 181 {"^[a-c]+$", "x", "abc", "x"}, 182 {"^[a-c]+", "x", "dabce", "dabce"}, 183 {"[a-c]+$", "x", "dabce", "dabce"}, 184 {"^[a-c]+$", "x", "dabce", "dabce"}, 185 {"^[a-c]+", "x", "", ""}, 186 {"[a-c]+$", "x", "", ""}, 187 {"^[a-c]+$", "x", "", ""}, 188 189 // Other cases. 190 {"abc", "def", "abcdefg", "defdefg"}, 191 {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, 192 {"abc", "", "abcdabc", "d"}, 193 {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, 194 {"abc", "d", "", ""}, 195 {"abc", "d", "abc", "d"}, 196 {".+", "x", "abc", "x"}, 197 {"[a-c]*", "x", "def", "xdxexfx"}, 198 {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, 199 {"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"}, 200 201 // Substitutions 202 {"a+", "($0)", "banana", "b(a)n(a)n(a)"}, 203 {"a+", "(${0})", "banana", "b(a)n(a)n(a)"}, 204 {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, 205 {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, 206 {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"}, 207 {"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "}, 208 {"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"}, 209 {"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"}, 210 {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"}, 211 {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"}, 212 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"}, 213 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"}, 214 {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""}, 215 {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"}, 216 {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"}, 217 {"a+", "${oops", "aaa", "${oops"}, 218 {"a+", "$$", "aaa", "$"}, 219 {"a+", "$", "aaa", "$"}, 220 221 // Substitution when subexpression isn't found 222 {"(x)?", "$1", "123", "123"}, 223 {"abc", "$1", "123", "123"}, 224 225 // Substitutions involving a (x){0} 226 {"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"}, 227 {"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"}, 228 {"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"}, 229 {"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"}, 230} 231 232var replaceLiteralTests = []ReplaceTest{ 233 // Substitutions 234 {"a+", "($0)", "banana", "b($0)n($0)n($0)"}, 235 {"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"}, 236 {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, 237 {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, 238 {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"}, 239 {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"}, 240 {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"}, 241 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"}, 242 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"}, 243 {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"}, 244 {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"}, 245 {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"}, 246 {"a+", "${oops", "aaa", "${oops"}, 247 {"a+", "$$", "aaa", "$$"}, 248 {"a+", "$", "aaa", "$"}, 249} 250 251type ReplaceFuncTest struct { 252 pattern string 253 replacement func(string) string 254 input, output string 255} 256 257var replaceFuncTests = []ReplaceFuncTest{ 258 {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, 259 {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, 260 {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"}, 261} 262 263func TestReplaceAll(t *testing.T) { 264 for _, tc := range replaceTests { 265 re, err := Compile(tc.pattern) 266 if err != nil { 267 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 268 continue 269 } 270 actual := re.ReplaceAllString(tc.input, tc.replacement) 271 if actual != tc.output { 272 t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q", 273 tc.pattern, tc.input, tc.replacement, actual, tc.output) 274 } 275 // now try bytes 276 actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) 277 if actual != tc.output { 278 t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q", 279 tc.pattern, tc.input, tc.replacement, actual, tc.output) 280 } 281 } 282} 283 284func TestReplaceAllLiteral(t *testing.T) { 285 // Run ReplaceAll tests that do not have $ expansions. 286 for _, tc := range replaceTests { 287 if strings.Contains(tc.replacement, "$") { 288 continue 289 } 290 re, err := Compile(tc.pattern) 291 if err != nil { 292 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 293 continue 294 } 295 actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) 296 if actual != tc.output { 297 t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", 298 tc.pattern, tc.input, tc.replacement, actual, tc.output) 299 } 300 // now try bytes 301 actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) 302 if actual != tc.output { 303 t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", 304 tc.pattern, tc.input, tc.replacement, actual, tc.output) 305 } 306 } 307 308 // Run literal-specific tests. 309 for _, tc := range replaceLiteralTests { 310 re, err := Compile(tc.pattern) 311 if err != nil { 312 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 313 continue 314 } 315 actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) 316 if actual != tc.output { 317 t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", 318 tc.pattern, tc.input, tc.replacement, actual, tc.output) 319 } 320 // now try bytes 321 actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) 322 if actual != tc.output { 323 t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", 324 tc.pattern, tc.input, tc.replacement, actual, tc.output) 325 } 326 } 327} 328 329func TestReplaceAllFunc(t *testing.T) { 330 for _, tc := range replaceFuncTests { 331 re, err := Compile(tc.pattern) 332 if err != nil { 333 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 334 continue 335 } 336 actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) 337 if actual != tc.output { 338 t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", 339 tc.pattern, tc.input, actual, tc.output) 340 } 341 // now try bytes 342 actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) 343 if actual != tc.output { 344 t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", 345 tc.pattern, tc.input, actual, tc.output) 346 } 347 } 348} 349 350type MetaTest struct { 351 pattern, output, literal string 352 isLiteral bool 353} 354 355var metaTests = []MetaTest{ 356 {``, ``, ``, true}, 357 {`foo`, `foo`, `foo`, true}, 358 {`日本語+`, `日本語\+`, `日本語`, false}, 359 {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator 360 {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators 361 {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, 362} 363 364var literalPrefixTests = []MetaTest{ 365 // See golang.org/issue/11175. 366 // output is unused. 367 {`^0^0$`, ``, `0`, false}, 368 {`^0^`, ``, ``, false}, 369 {`^0$`, ``, `0`, true}, 370 {`$0^`, ``, ``, false}, 371 {`$0$`, ``, ``, false}, 372 {`^^0$$`, ``, ``, false}, 373 {`^$^$`, ``, ``, false}, 374 {`$$0^^`, ``, ``, false}, 375} 376 377func TestQuoteMeta(t *testing.T) { 378 for _, tc := range metaTests { 379 // Verify that QuoteMeta returns the expected string. 380 quoted := QuoteMeta(tc.pattern) 381 if quoted != tc.output { 382 t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", 383 tc.pattern, quoted, tc.output) 384 continue 385 } 386 387 // Verify that the quoted string is in fact treated as expected 388 // by Compile -- i.e. that it matches the original, unquoted string. 389 if tc.pattern != "" { 390 re, err := Compile(quoted) 391 if err != nil { 392 t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) 393 continue 394 } 395 src := "abc" + tc.pattern + "def" 396 repl := "xyz" 397 replaced := re.ReplaceAllString(src, repl) 398 expected := "abcxyzdef" 399 if replaced != expected { 400 t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", 401 tc.pattern, src, repl, replaced, expected) 402 } 403 } 404 } 405} 406 407func TestLiteralPrefix(t *testing.T) { 408 for _, tc := range append(metaTests, literalPrefixTests...) { 409 // Literal method needs to scan the pattern. 410 re := MustCompile(tc.pattern) 411 str, complete := re.LiteralPrefix() 412 if complete != tc.isLiteral { 413 t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral) 414 } 415 if str != tc.literal { 416 t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal) 417 } 418 } 419} 420 421type subexpIndex struct { 422 name string 423 index int 424} 425 426type subexpCase struct { 427 input string 428 num int 429 names []string 430 indices []subexpIndex 431} 432 433var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}} 434 435var subexpCases = []subexpCase{ 436 {``, 0, nil, emptySubexpIndices}, 437 {`.*`, 0, nil, emptySubexpIndices}, 438 {`abba`, 0, nil, emptySubexpIndices}, 439 {`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices}, 440 {`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices}, 441 {`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices}, 442 {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 443 {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices}, 444 {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 445 {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 446 {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}}, 447} 448 449func TestSubexp(t *testing.T) { 450 for _, c := range subexpCases { 451 re := MustCompile(c.input) 452 n := re.NumSubexp() 453 if n != c.num { 454 t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num) 455 continue 456 } 457 names := re.SubexpNames() 458 if len(names) != 1+n { 459 t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n) 460 continue 461 } 462 if c.names != nil { 463 for i := 0; i < 1+n; i++ { 464 if names[i] != c.names[i] { 465 t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i]) 466 } 467 } 468 } 469 for _, subexp := range c.indices { 470 index := re.SubexpIndex(subexp.name) 471 if index != subexp.index { 472 t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index) 473 } 474 } 475 } 476} 477 478var splitTests = []struct { 479 s string 480 r string 481 n int 482 out []string 483}{ 484 {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}}, 485 {"foo:and:bar", ":", 1, []string{"foo:and:bar"}}, 486 {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}}, 487 {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}}, 488 {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}}, 489 {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}}, 490 {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}}, 491 {"baabaab", "a*", -1, []string{"b", "b", "b"}}, 492 {"baabaab", "ba*", -1, []string{"", "", "", ""}}, 493 {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}}, 494 {"foobar", "f+.*b+", -1, []string{"", "ar"}}, 495 {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}}, 496 {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}}, 497 {"a,b,c,d,e,f", ",", 0, nil}, 498 {",", ",", -1, []string{"", ""}}, 499 {",,,", ",", -1, []string{"", "", "", ""}}, 500 {"", ",", -1, []string{""}}, 501 {"", ".*", -1, []string{""}}, 502 {"", ".+", -1, []string{""}}, 503 {"", "", -1, []string{}}, 504 {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}}, 505 {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}}, 506 {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}}, 507} 508 509func TestSplit(t *testing.T) { 510 for i, test := range splitTests { 511 re, err := Compile(test.r) 512 if err != nil { 513 t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error()) 514 continue 515 } 516 517 split := re.Split(test.s, test.n) 518 if !reflect.DeepEqual(split, test.out) { 519 t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out) 520 } 521 522 if QuoteMeta(test.r) == test.r { 523 strsplit := strings.SplitN(test.s, test.r, test.n) 524 if !reflect.DeepEqual(split, strsplit) { 525 t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit) 526 } 527 } 528 } 529} 530 531// The following sequence of Match calls used to panic. See issue #12980. 532func TestParseAndCompile(t *testing.T) { 533 expr := "a$" 534 s := "a\nb" 535 536 for i, tc := range []struct { 537 reFlags syntax.Flags 538 expMatch bool 539 }{ 540 {syntax.Perl | syntax.OneLine, false}, 541 {syntax.Perl &^ syntax.OneLine, true}, 542 } { 543 parsed, err := syntax.Parse(expr, tc.reFlags) 544 if err != nil { 545 t.Fatalf("%d: parse: %v", i, err) 546 } 547 re, err := Compile(parsed.String()) 548 if err != nil { 549 t.Fatalf("%d: compile: %v", i, err) 550 } 551 if match := re.MatchString(s); match != tc.expMatch { 552 t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch) 553 } 554 } 555} 556 557// Check that one-pass cutoff does trigger. 558func TestOnePassCutoff(t *testing.T) { 559 re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl) 560 if err != nil { 561 t.Fatalf("parse: %v", err) 562 } 563 p, err := syntax.Compile(re.Simplify()) 564 if err != nil { 565 t.Fatalf("compile: %v", err) 566 } 567 if compileOnePass(p) != nil { 568 t.Fatalf("makeOnePass succeeded; wanted nil") 569 } 570} 571 572// Check that the same machine can be used with the standard matcher 573// and then the backtracker when there are no captures. 574func TestSwitchBacktrack(t *testing.T) { 575 re := MustCompile(`a|b`) 576 long := make([]byte, maxBacktrackVector+1) 577 578 // The following sequence of Match calls used to panic. See issue #10319. 579 re.Match(long) // triggers standard matcher 580 re.Match(long[:1]) // triggers backtracker 581} 582 583func BenchmarkFind(b *testing.B) { 584 b.StopTimer() 585 re := MustCompile("a+b+") 586 wantSubs := "aaabb" 587 s := []byte("acbb" + wantSubs + "dd") 588 b.StartTimer() 589 b.ReportAllocs() 590 for i := 0; i < b.N; i++ { 591 subs := re.Find(s) 592 if string(subs) != wantSubs { 593 b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs) 594 } 595 } 596} 597 598func BenchmarkFindAllNoMatches(b *testing.B) { 599 re := MustCompile("a+b+") 600 s := []byte("acddee") 601 b.ReportAllocs() 602 b.ResetTimer() 603 for i := 0; i < b.N; i++ { 604 all := re.FindAll(s, -1) 605 if all != nil { 606 b.Fatalf("FindAll(%q) = %q; want nil", s, all) 607 } 608 } 609} 610 611func BenchmarkFindString(b *testing.B) { 612 b.StopTimer() 613 re := MustCompile("a+b+") 614 wantSubs := "aaabb" 615 s := "acbb" + wantSubs + "dd" 616 b.StartTimer() 617 b.ReportAllocs() 618 for i := 0; i < b.N; i++ { 619 subs := re.FindString(s) 620 if subs != wantSubs { 621 b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs) 622 } 623 } 624} 625 626func BenchmarkFindSubmatch(b *testing.B) { 627 b.StopTimer() 628 re := MustCompile("a(a+b+)b") 629 wantSubs := "aaabb" 630 s := []byte("acbb" + wantSubs + "dd") 631 b.StartTimer() 632 b.ReportAllocs() 633 for i := 0; i < b.N; i++ { 634 subs := re.FindSubmatch(s) 635 if string(subs[0]) != wantSubs { 636 b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) 637 } 638 if string(subs[1]) != "aab" { 639 b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") 640 } 641 } 642} 643 644func BenchmarkFindStringSubmatch(b *testing.B) { 645 b.StopTimer() 646 re := MustCompile("a(a+b+)b") 647 wantSubs := "aaabb" 648 s := "acbb" + wantSubs + "dd" 649 b.StartTimer() 650 b.ReportAllocs() 651 for i := 0; i < b.N; i++ { 652 subs := re.FindStringSubmatch(s) 653 if subs[0] != wantSubs { 654 b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) 655 } 656 if subs[1] != "aab" { 657 b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") 658 } 659 } 660} 661 662func BenchmarkLiteral(b *testing.B) { 663 x := strings.Repeat("x", 50) + "y" 664 b.StopTimer() 665 re := MustCompile("y") 666 b.StartTimer() 667 for i := 0; i < b.N; i++ { 668 if !re.MatchString(x) { 669 b.Fatalf("no match!") 670 } 671 } 672} 673 674func BenchmarkNotLiteral(b *testing.B) { 675 x := strings.Repeat("x", 50) + "y" 676 b.StopTimer() 677 re := MustCompile(".y") 678 b.StartTimer() 679 for i := 0; i < b.N; i++ { 680 if !re.MatchString(x) { 681 b.Fatalf("no match!") 682 } 683 } 684} 685 686func BenchmarkMatchClass(b *testing.B) { 687 b.StopTimer() 688 x := strings.Repeat("xxxx", 20) + "w" 689 re := MustCompile("[abcdw]") 690 b.StartTimer() 691 for i := 0; i < b.N; i++ { 692 if !re.MatchString(x) { 693 b.Fatalf("no match!") 694 } 695 } 696} 697 698func BenchmarkMatchClass_InRange(b *testing.B) { 699 b.StopTimer() 700 // 'b' is between 'a' and 'c', so the charclass 701 // range checking is no help here. 702 x := strings.Repeat("bbbb", 20) + "c" 703 re := MustCompile("[ac]") 704 b.StartTimer() 705 for i := 0; i < b.N; i++ { 706 if !re.MatchString(x) { 707 b.Fatalf("no match!") 708 } 709 } 710} 711 712func BenchmarkReplaceAll(b *testing.B) { 713 x := "abcdefghijklmnopqrstuvwxyz" 714 b.StopTimer() 715 re := MustCompile("[cjrw]") 716 b.StartTimer() 717 for i := 0; i < b.N; i++ { 718 re.ReplaceAllString(x, "") 719 } 720} 721 722func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { 723 b.StopTimer() 724 x := []byte("abcdefghijklmnopqrstuvwxyz") 725 re := MustCompile("^zbc(d|e)") 726 b.StartTimer() 727 for i := 0; i < b.N; i++ { 728 re.Match(x) 729 } 730} 731 732func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { 733 b.StopTimer() 734 x := []byte("abcdefghijklmnopqrstuvwxyz") 735 for i := 0; i < 15; i++ { 736 x = append(x, x...) 737 } 738 re := MustCompile("^zbc(d|e)") 739 b.StartTimer() 740 for i := 0; i < b.N; i++ { 741 re.Match(x) 742 } 743} 744 745func BenchmarkAnchoredShortMatch(b *testing.B) { 746 b.StopTimer() 747 x := []byte("abcdefghijklmnopqrstuvwxyz") 748 re := MustCompile("^.bc(d|e)") 749 b.StartTimer() 750 for i := 0; i < b.N; i++ { 751 re.Match(x) 752 } 753} 754 755func BenchmarkAnchoredLongMatch(b *testing.B) { 756 b.StopTimer() 757 x := []byte("abcdefghijklmnopqrstuvwxyz") 758 for i := 0; i < 15; i++ { 759 x = append(x, x...) 760 } 761 re := MustCompile("^.bc(d|e)") 762 b.StartTimer() 763 for i := 0; i < b.N; i++ { 764 re.Match(x) 765 } 766} 767 768func BenchmarkOnePassShortA(b *testing.B) { 769 b.StopTimer() 770 x := []byte("abcddddddeeeededd") 771 re := MustCompile("^.bc(d|e)*$") 772 b.StartTimer() 773 for i := 0; i < b.N; i++ { 774 re.Match(x) 775 } 776} 777 778func BenchmarkNotOnePassShortA(b *testing.B) { 779 b.StopTimer() 780 x := []byte("abcddddddeeeededd") 781 re := MustCompile(".bc(d|e)*$") 782 b.StartTimer() 783 for i := 0; i < b.N; i++ { 784 re.Match(x) 785 } 786} 787 788func BenchmarkOnePassShortB(b *testing.B) { 789 b.StopTimer() 790 x := []byte("abcddddddeeeededd") 791 re := MustCompile("^.bc(?:d|e)*$") 792 b.StartTimer() 793 for i := 0; i < b.N; i++ { 794 re.Match(x) 795 } 796} 797 798func BenchmarkNotOnePassShortB(b *testing.B) { 799 b.StopTimer() 800 x := []byte("abcddddddeeeededd") 801 re := MustCompile(".bc(?:d|e)*$") 802 b.StartTimer() 803 for i := 0; i < b.N; i++ { 804 re.Match(x) 805 } 806} 807 808func BenchmarkOnePassLongPrefix(b *testing.B) { 809 b.StopTimer() 810 x := []byte("abcdefghijklmnopqrstuvwxyz") 811 re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$") 812 b.StartTimer() 813 for i := 0; i < b.N; i++ { 814 re.Match(x) 815 } 816} 817 818func BenchmarkOnePassLongNotPrefix(b *testing.B) { 819 b.StopTimer() 820 x := []byte("abcdefghijklmnopqrstuvwxyz") 821 re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$") 822 b.StartTimer() 823 for i := 0; i < b.N; i++ { 824 re.Match(x) 825 } 826} 827 828func BenchmarkMatchParallelShared(b *testing.B) { 829 x := []byte("this is a long line that contains foo bar baz") 830 re := MustCompile("foo (ba+r)? baz") 831 b.ResetTimer() 832 b.RunParallel(func(pb *testing.PB) { 833 for pb.Next() { 834 re.Match(x) 835 } 836 }) 837} 838 839func BenchmarkMatchParallelCopied(b *testing.B) { 840 x := []byte("this is a long line that contains foo bar baz") 841 re := MustCompile("foo (ba+r)? baz") 842 b.ResetTimer() 843 b.RunParallel(func(pb *testing.PB) { 844 re := re.Copy() 845 for pb.Next() { 846 re.Match(x) 847 } 848 }) 849} 850 851var sink string 852 853func BenchmarkQuoteMetaAll(b *testing.B) { 854 specials := make([]byte, 0) 855 for i := byte(0); i < utf8.RuneSelf; i++ { 856 if special(i) { 857 specials = append(specials, i) 858 } 859 } 860 s := string(specials) 861 b.SetBytes(int64(len(s))) 862 b.ResetTimer() 863 for i := 0; i < b.N; i++ { 864 sink = QuoteMeta(s) 865 } 866} 867 868func BenchmarkQuoteMetaNone(b *testing.B) { 869 s := "abcdefghijklmnopqrstuvwxyz" 870 b.SetBytes(int64(len(s))) 871 b.ResetTimer() 872 for i := 0; i < b.N; i++ { 873 sink = QuoteMeta(s) 874 } 875} 876 877var compileBenchData = []struct{ name, re string }{ 878 {"Onepass", `^a.[l-nA-Cg-j]?e$`}, 879 {"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`}, 880 {"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)}, 881} 882 883func BenchmarkCompile(b *testing.B) { 884 for _, data := range compileBenchData { 885 b.Run(data.name, func(b *testing.B) { 886 b.ReportAllocs() 887 for i := 0; i < b.N; i++ { 888 if _, err := Compile(data.re); err != nil { 889 b.Fatal(err) 890 } 891 } 892 }) 893 } 894} 895 896func TestDeepEqual(t *testing.T) { 897 re1 := MustCompile("a.*b.*c.*d") 898 re2 := MustCompile("a.*b.*c.*d") 899 if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1. 900 t.Errorf("DeepEqual(re1, re2) = false, want true") 901 } 902 903 re1.MatchString("abcdefghijklmn") 904 if !reflect.DeepEqual(re1, re2) { 905 t.Errorf("DeepEqual(re1, re2) = false, want true") 906 } 907 908 re2.MatchString("abcdefghijklmn") 909 if !reflect.DeepEqual(re1, re2) { 910 t.Errorf("DeepEqual(re1, re2) = false, want true") 911 } 912 913 re2.MatchString(strings.Repeat("abcdefghijklmn", 100)) 914 if !reflect.DeepEqual(re1, re2) { 915 t.Errorf("DeepEqual(re1, re2) = false, want true") 916 } 917} 918 919var minInputLenTests = []struct { 920 Regexp string 921 min int 922}{ 923 {``, 0}, 924 {`a`, 1}, 925 {`aa`, 2}, 926 {`(aa)a`, 3}, 927 {`(?:aa)a`, 3}, 928 {`a?a`, 1}, 929 {`(aaa)|(aa)`, 2}, 930 {`(aa)+a`, 3}, 931 {`(aa)*a`, 1}, 932 {`(aa){3,5}`, 6}, 933 {`[a-z]`, 1}, 934 {`日`, 3}, 935} 936 937func TestMinInputLen(t *testing.T) { 938 for _, tt := range minInputLenTests { 939 re, _ := syntax.Parse(tt.Regexp, syntax.Perl) 940 m := minInputLen(re) 941 if m != tt.min { 942 t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min) 943 } 944 } 945} 946