1package regexp2 2 3import ( 4 "reflect" 5 "strings" 6 "testing" 7 "time" 8 9 "github.com/dlclark/regexp2/syntax" 10) 11 12func TestBacktrack_CatastrophicTimeout(t *testing.T) { 13 r, err := Compile("(.+)*\\?", 0) 14 r.MatchTimeout = time.Millisecond * 1 15 t.Logf("code dump: %v", r.code.Dump()) 16 m, err := r.FindStringMatch("Do you think you found the problem string!") 17 if err == nil { 18 t.Errorf("expected timeout err") 19 } 20 if m != nil { 21 t.Errorf("Expected no match") 22 } 23} 24 25func TestSetPrefix(t *testing.T) { 26 r := MustCompile(`^\s*-TEST`, 0) 27 if r.code.FcPrefix == nil { 28 t.Fatalf("Expected prefix set [-\\s] but was nil") 29 } 30 if r.code.FcPrefix.PrefixSet.String() != "[-\\s]" { 31 t.Fatalf("Expected prefix set [\\s-] but was %v", r.code.FcPrefix.PrefixSet.String()) 32 } 33} 34 35func TestSetInCode(t *testing.T) { 36 r := MustCompile(`(?<body>\s*(?<name>.+))`, 0) 37 t.Logf("code dump: %v", r.code.Dump()) 38 if want, got := 1, len(r.code.Sets); want != got { 39 t.Fatalf("r.code.Sets wanted %v, got %v", want, got) 40 } 41 if want, got := "[\\s]", r.code.Sets[0].String(); want != got { 42 t.Fatalf("first set wanted %v, got %v", want, got) 43 } 44} 45 46func TestRegexp_Basic(t *testing.T) { 47 r, err := Compile("test(?<named>ing)?", 0) 48 //t.Logf("code dump: %v", r.code.Dump()) 49 50 if err != nil { 51 t.Errorf("unexpected compile err: %v", err) 52 } 53 m, err := r.FindStringMatch("this is a testing stuff") 54 if err != nil { 55 t.Errorf("unexpected match err: %v", err) 56 } 57 if m == nil { 58 t.Error("Nil match, expected success") 59 } else { 60 //t.Logf("Match: %v", m.dump()) 61 } 62} 63 64// check all our functions and properties around basic capture groups and referential for Group 0 65func TestCapture_Basic(t *testing.T) { 66 r := MustCompile(`.*\B(SUCCESS)\B.*`, 0) 67 m, err := r.FindStringMatch("adfadsfSUCCESSadsfadsf") 68 if err != nil { 69 t.Fatalf("Unexpected match error: %v", err) 70 } 71 72 if m == nil { 73 t.Fatalf("Should have matched") 74 } 75 if want, got := "adfadsfSUCCESSadsfadsf", m.String(); want != got { 76 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 77 } 78 if want, got := 0, m.Index; want != got { 79 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 80 } 81 if want, got := 22, m.Length; want != got { 82 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 83 } 84 if want, got := 1, len(m.Captures); want != got { 85 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 86 } 87 88 if want, got := m.String(), m.Captures[0].String(); want != got { 89 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 90 } 91 if want, got := 0, m.Captures[0].Index; want != got { 92 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 93 } 94 if want, got := 22, m.Captures[0].Length; want != got { 95 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 96 } 97 98 g := m.Groups() 99 if want, got := 2, len(g); want != got { 100 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 101 } 102 // group 0 is always the match 103 if want, got := m.String(), g[0].String(); want != got { 104 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 105 } 106 if want, got := 1, len(g[0].Captures); want != got { 107 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 108 } 109 // group 0's capture is always the match 110 if want, got := m.Captures[0].String(), g[0].Captures[0].String(); want != got { 111 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 112 } 113 114 // group 1 is our first explicit group (unnamed) 115 if want, got := 7, g[1].Index; want != got { 116 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 117 } 118 if want, got := 7, g[1].Length; want != got { 119 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 120 } 121 if want, got := "SUCCESS", g[1].String(); want != got { 122 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 123 } 124} 125 126func TestEscapeUnescape_Basic(t *testing.T) { 127 s1 := "#$^*+(){}<>\\|. " 128 s2 := Escape(s1) 129 s3, err := Unescape(s2) 130 if err != nil { 131 t.Fatalf("Unexpected error during unescape: %v", err) 132 } 133 134 //confirm one way 135 if want, got := `\#\$\^\*\+\(\)\{\}<>\\\|\.\ `, s2; want != got { 136 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 137 } 138 139 //confirm round-trip 140 if want, got := s1, s3; want != got { 141 t.Fatalf("Wanted '%v'\nGot '%v'", want, got) 142 } 143 144} 145 146func TestGroups_Basic(t *testing.T) { 147 type d struct { 148 p string 149 s string 150 name []string 151 num []int 152 strs []string 153 } 154 data := []d{ 155 d{"(?<first_name>\\S+)\\s(?<last_name>\\S+)", // example 156 "Ryan Byington", 157 []string{"0", "first_name", "last_name"}, 158 []int{0, 1, 2}, 159 []string{"Ryan Byington", "Ryan", "Byington"}}, 160 d{"((?<One>abc)\\d+)?(?<Two>xyz)(.*)", // example 161 "abc208923xyzanqnakl", 162 []string{"0", "1", "2", "One", "Two"}, 163 []int{0, 1, 2, 3, 4}, 164 []string{"abc208923xyzanqnakl", "abc208923", "anqnakl", "abc", "xyz"}}, 165 d{"((?<256>abc)\\d+)?(?<16>xyz)(.*)", // numeric names 166 "0272saasdabc8978xyz][]12_+-", 167 []string{"0", "1", "2", "16", "256"}, 168 []int{0, 1, 2, 16, 256}, 169 []string{"abc8978xyz][]12_+-", "abc8978", "][]12_+-", "xyz", "abc"}}, 170 d{"((?<4>abc)(?<digits>\\d+))?(?<2>xyz)(?<everything_else>.*)", // mix numeric and string names 171 "0272saasdabc8978xyz][]12_+-", 172 []string{"0", "1", "2", "digits", "4", "everything_else"}, 173 []int{0, 1, 2, 3, 4, 5}, 174 []string{"abc8978xyz][]12_+-", "abc8978", "xyz", "8978", "abc", "][]12_+-"}}, 175 d{"(?<first_name>\\S+)\\s(?<first_name>\\S+)", // dupe string names 176 "Ryan Byington", 177 []string{"0", "first_name"}, 178 []int{0, 1}, 179 []string{"Ryan Byington", "Byington"}}, 180 d{"(?<15>\\S+)\\s(?<15>\\S+)", // dupe numeric names 181 "Ryan Byington", 182 []string{"0", "15"}, 183 []int{0, 15}, 184 []string{"Ryan Byington", "Byington"}}, 185 // *** repeated from above, but with alt cap syntax *** 186 d{"(?'first_name'\\S+)\\s(?'last_name'\\S+)", //example 187 "Ryan Byington", 188 []string{"0", "first_name", "last_name"}, 189 []int{0, 1, 2}, 190 []string{"Ryan Byington", "Ryan", "Byington"}}, 191 d{"((?'One'abc)\\d+)?(?'Two'xyz)(.*)", // example 192 "abc208923xyzanqnakl", 193 []string{"0", "1", "2", "One", "Two"}, 194 []int{0, 1, 2, 3, 4}, 195 []string{"abc208923xyzanqnakl", "abc208923", "anqnakl", "abc", "xyz"}}, 196 d{"((?'256'abc)\\d+)?(?'16'xyz)(.*)", // numeric names 197 "0272saasdabc8978xyz][]12_+-", 198 []string{"0", "1", "2", "16", "256"}, 199 []int{0, 1, 2, 16, 256}, 200 []string{"abc8978xyz][]12_+-", "abc8978", "][]12_+-", "xyz", "abc"}}, 201 d{"((?'4'abc)(?'digits'\\d+))?(?'2'xyz)(?'everything_else'.*)", // mix numeric and string names 202 "0272saasdabc8978xyz][]12_+-", 203 []string{"0", "1", "2", "digits", "4", "everything_else"}, 204 []int{0, 1, 2, 3, 4, 5}, 205 []string{"abc8978xyz][]12_+-", "abc8978", "xyz", "8978", "abc", "][]12_+-"}}, 206 d{"(?'first_name'\\S+)\\s(?'first_name'\\S+)", // dupe string names 207 "Ryan Byington", 208 []string{"0", "first_name"}, 209 []int{0, 1}, 210 []string{"Ryan Byington", "Byington"}}, 211 d{"(?'15'\\S+)\\s(?'15'\\S+)", // dupe numeric names 212 "Ryan Byington", 213 []string{"0", "15"}, 214 []int{0, 15}, 215 []string{"Ryan Byington", "Byington"}}, 216 } 217 218 fatalf := func(re *Regexp, v d, format string, args ...interface{}) { 219 args = append(args, v, re.code.Dump()) 220 221 t.Fatalf(format+" using test data: %#v\ndump:%v", args...) 222 } 223 224 validateGroupNamesNumbers := func(re *Regexp, v d) { 225 if len(v.name) != len(v.num) { 226 fatalf(re, v, "Invalid data, group name count and number count must match") 227 } 228 229 groupNames := re.GetGroupNames() 230 if !reflect.DeepEqual(groupNames, v.name) { 231 fatalf(re, v, "group names expected: %v, actual: %v", v.name, groupNames) 232 } 233 groupNums := re.GetGroupNumbers() 234 if !reflect.DeepEqual(groupNums, v.num) { 235 fatalf(re, v, "group numbers expected: %v, actual: %v", v.num, groupNums) 236 } 237 // make sure we can freely get names and numbers from eachother 238 for i := range groupNums { 239 if want, got := groupNums[i], re.GroupNumberFromName(groupNames[i]); want != got { 240 fatalf(re, v, "group num from name Wanted '%v'\nGot '%v'", want, got) 241 } 242 if want, got := groupNames[i], re.GroupNameFromNumber(groupNums[i]); want != got { 243 fatalf(re, v, "group name from num Wanted '%v'\nGot '%v'", want, got) 244 } 245 } 246 } 247 248 for _, v := range data { 249 // compile the regex 250 re := MustCompile(v.p, 0) 251 252 // validate our group name/num info before execute 253 validateGroupNamesNumbers(re, v) 254 255 m, err := re.FindStringMatch(v.s) 256 if err != nil { 257 fatalf(re, v, "Unexpected error in match: %v", err) 258 } 259 if m == nil { 260 fatalf(re, v, "Match is nil") 261 } 262 if want, got := len(v.strs), m.GroupCount(); want != got { 263 fatalf(re, v, "GroupCount() Wanted '%v'\nGot '%v'", want, got) 264 } 265 g := m.Groups() 266 if want, got := len(v.strs), len(g); want != got { 267 fatalf(re, v, "len(m.Groups()) Wanted '%v'\nGot '%v'", want, got) 268 } 269 // validate each group's value from the execute 270 for i := range v.name { 271 grp1 := m.GroupByName(v.name[i]) 272 grp2 := m.GroupByNumber(v.num[i]) 273 // should be identical reference 274 if grp1 != grp2 { 275 fatalf(re, v, "Expected GroupByName and GroupByNumber to return same result for %v, %v", v.name[i], v.num[i]) 276 } 277 if want, got := v.strs[i], grp1.String(); want != got { 278 fatalf(re, v, "Value[%v] Wanted '%v'\nGot '%v'", i, want, got) 279 } 280 } 281 282 // validate our group name/num info after execute 283 validateGroupNamesNumbers(re, v) 284 } 285} 286 287func TestErr_GroupName(t *testing.T) { 288 // group 0 is off limits 289 if _, err := Compile("foo(?<0>bar)", 0); err == nil { 290 t.Fatalf("zero group, expected error during compile") 291 } else if want, got := "error parsing regexp: capture number cannot be zero in `foo(?<0>bar)`", err.Error(); want != got { 292 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 293 } 294 if _, err := Compile("foo(?'0'bar)", 0); err == nil { 295 t.Fatalf("zero group, expected error during compile") 296 } else if want, got := "error parsing regexp: capture number cannot be zero in `foo(?'0'bar)`", err.Error(); want != got { 297 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 298 } 299 300 // group tag can't start with a num 301 if _, err := Compile("foo(?<1bar>)", 0); err == nil { 302 t.Fatalf("invalid group name, expected error during compile") 303 } else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?<1bar>)`", err.Error(); want != got { 304 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 305 } 306 if _, err := Compile("foo(?'1bar')", 0); err == nil { 307 t.Fatalf("invalid group name, expected error during compile") 308 } else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?'1bar')`", err.Error(); want != got { 309 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 310 } 311 312 // missing closing group tag 313 if _, err := Compile("foo(?<bar)", 0); err == nil { 314 t.Fatalf("invalid group name, expected error during compile") 315 } else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?<bar)`", err.Error(); want != got { 316 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 317 } 318 if _, err := Compile("foo(?'bar)", 0); err == nil { 319 t.Fatalf("invalid group name, expected error during compile") 320 } else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?'bar)`", err.Error(); want != got { 321 t.Fatalf("invalid error text, want '%v', got '%v'", want, got) 322 } 323 324} 325 326func TestConstantUneffected(t *testing.T) { 327 // had a bug where "constant" sets would get modified with alternations and be broken in memory until restart 328 // this meant that if you used a known-set (like \s) in a larger set it would "poison" \s for the process 329 re := MustCompile(`(\s|\*)test\s`, 0) 330 if want, got := 2, len(re.code.Sets); want != got { 331 t.Fatalf("wanted %v sets, got %v", want, got) 332 } 333 if want, got := "[\\*\\s]", re.code.Sets[0].String(); want != got { 334 t.Fatalf("wanted set 0 %v, got %v", want, got) 335 } 336 if want, got := "[\\s]", re.code.Sets[1].String(); want != got { 337 t.Fatalf("wanted set 1 %v, got %v", want, got) 338 } 339} 340 341func TestAlternationConstAndEscape(t *testing.T) { 342 re := MustCompile(`\:|\s`, 0) 343 if want, got := 1, len(re.code.Sets); want != got { 344 t.Fatalf("wanted %v sets, got %v", want, got) 345 } 346 if want, got := "[:\\s]", re.code.Sets[0].String(); want != got { 347 t.Fatalf("wanted set 0 %v, got %v", want, got) 348 } 349} 350 351func TestStartingCharsOptionalNegate(t *testing.T) { 352 // to maintain matching with the corefx we've made the negative char classes be negative and the 353 // categories they contain positive. This means they're not combinable or suitable for prefixes. 354 // In general this could be a fine thing since negatives are extremely wide groups and not 355 // missing much on prefix optimizations. 356 357 // the below expression *could* have a prefix of [\S\d] but 358 // this requires a change in charclass.go when setting 359 // NotSpaceClass = getCharSetFromCategoryString() 360 // to negate the individual categories rather than the CharSet itself 361 // this would deviate from corefx 362 363 re := MustCompile(`(^(\S{2} )?\S{2}(\d+|/) *\S{3}\S{3} ?\d{2,4}[A-Z] ?\d{2}[A-Z]{3}|(\S{2} )?\d{2,4})`, 0) 364 if re.code.FcPrefix != nil { 365 t.Fatalf("FcPrefix wanted nil, got %v", re.code.FcPrefix) 366 } 367} 368 369func TestParseNegativeDigit(t *testing.T) { 370 re := MustCompile(`\D`, 0) 371 if want, got := 1, len(re.code.Sets); want != got { 372 t.Fatalf("wanted %v sets, got %v", want, got) 373 } 374 375 if want, got := "[\\P{Nd}]", re.code.Sets[0].String(); want != got { 376 t.Fatalf("wanted set 0 %v, got %v", want, got) 377 } 378} 379 380func TestRunNegativeDigit(t *testing.T) { 381 re := MustCompile(`\D`, 0) 382 m, err := re.MatchString("this is a test") 383 if err != nil { 384 t.Fatalf("Unexpected error: %v", err) 385 } 386 if !m { 387 t.Fatalf("Expected match") 388 } 389} 390 391func TestCancellingClasses(t *testing.T) { 392 // [\w\W\s] should become "." because it means "anything" 393 re := MustCompile(`[\w\W\s]`, 0) 394 if want, got := 1, len(re.code.Sets); want != got { 395 t.Fatalf("wanted %v sets, got %v", want, got) 396 } 397 if want, got := syntax.AnyClass().String(), re.code.Sets[0].String(); want != got { 398 t.Fatalf("wanted set 0 %v, got %v", want, got) 399 } 400} 401 402func TestConcatLoopCaptureSet(t *testing.T) { 403 //(A|B)*?CD different Concat/Loop/Capture/Set (had [A-Z] should be [AB]) 404 // we were not copying the Sets in the prefix FC stack, so the underlying sets were unexpectedly mutating 405 // so set [AB] becomes [ABC] when we see the the static C in FC stack generation (which are the valid start chars), 406 // but that was mutating the tree node's original set [AB] because even though we copied the slie header, 407 // the two header's pointed to the same underlying byte array...which was mutated. 408 409 re := MustCompile(`(A|B)*CD`, 0) 410 if want, got := 1, len(re.code.Sets); want != got { 411 t.Fatalf("wanted %v sets, got %v", want, got) 412 } 413 if want, got := "[AB]", re.code.Sets[0].String(); want != got { 414 t.Fatalf("wanted set 0 %v, got %v", want, got) 415 } 416} 417 418func TestFirstcharsIgnoreCase(t *testing.T) { 419 //((?i)AB(?-i)C|D)E different Firstchars (had [da] should be [ad]) 420 // we were not canonicalizing when converting the prefix set to lower case 421 // so our set's were potentially not searching properly 422 re := MustCompile(`((?i)AB(?-i)C|D)E`, 0) 423 424 if re.code.FcPrefix == nil { 425 t.Fatalf("wanted prefix, got nil") 426 } 427 428 if want, got := "[ad]", re.code.FcPrefix.PrefixSet.String(); want != got { 429 t.Fatalf("wanted prefix %v, got %v", want, got) 430 } 431} 432 433func TestRepeatingGroup(t *testing.T) { 434 re := MustCompile(`(data?)+`, 0) 435 436 m, err := re.FindStringMatch("datadat") 437 if err != nil { 438 t.Fatalf("Unexpected err: %v", err) 439 } 440 441 if m == nil { 442 t.Fatalf("Expected match") 443 } 444 445 g := m.GroupByNumber(1) 446 if g == nil { 447 t.Fatalf("Expected group") 448 } 449 450 if want, got := 2, len(g.Captures); want != got { 451 t.Fatalf("wanted cap count %v, got %v", want, got) 452 } 453 454 if want, got := g.Captures[1].String(), g.Capture.String(); want != got { 455 t.Fatalf("expected last capture of the group to be embedded") 456 } 457 458 if want, got := "data", g.Captures[0].String(); want != got { 459 t.Fatalf("expected cap 0 to be %v, got %v", want, got) 460 } 461 if want, got := "dat", g.Captures[1].String(); want != got { 462 t.Fatalf("expected cap 1 to be %v, got %v", want, got) 463 } 464 465} 466 467func TestFindNextMatch_Basic(t *testing.T) { 468 re := MustCompile(`(T|E)(?=h|E|S|$)`, 0) 469 m, err := re.FindStringMatch(`This is a TEST`) 470 if err != nil { 471 t.Fatalf("Unexpected err 0: %v", err) 472 } 473 if m == nil { 474 t.Fatalf("Expected match 0") 475 } 476 if want, got := 0, m.Index; want != got { 477 t.Fatalf("expected match 0 to start at %v, got %v", want, got) 478 } 479 480 m, err = re.FindNextMatch(m) 481 if err != nil { 482 t.Fatalf("Unexpected err 1: %v", err) 483 } 484 if m == nil { 485 t.Fatalf("Expected match 1") 486 } 487 if want, got := 10, m.Index; want != got { 488 t.Fatalf("expected match 1 to start at %v, got %v", want, got) 489 } 490 491 m, err = re.FindNextMatch(m) 492 if err != nil { 493 t.Fatalf("Unexpected err 2: %v", err) 494 } 495 if m == nil { 496 t.Fatalf("Expected match 2") 497 } 498 if want, got := 11, m.Index; want != got { 499 t.Fatalf("expected match 2 to start at %v, got %v", want, got) 500 } 501 502 m, err = re.FindNextMatch(m) 503 if err != nil { 504 t.Fatalf("Unexpected err 3: %v", err) 505 } 506 if m == nil { 507 t.Fatalf("Expected match 3") 508 } 509 if want, got := 13, m.Index; want != got { 510 t.Fatalf("expected match 3 to start at %v, got %v", want, got) 511 } 512} 513 514func TestUnicodeSupplementaryCharSetMatch(t *testing.T) { 515 //0x2070E 0x20731 0x20779 516 re := MustCompile("[-]", 0) 517 518 if m, err := re.MatchString("\u2070"); err != nil { 519 t.Fatalf("Unexpected err: %v", err) 520 } else if m { 521 t.Fatalf("Unexpected match") 522 } 523 524 if m, err := re.MatchString(""); err != nil { 525 t.Fatalf("Unexpected err: %v", err) 526 } else if !m { 527 t.Fatalf("Expected match") 528 } 529} 530 531func TestUnicodeSupplementaryCharInRange(t *testing.T) { 532 //0x2070E 0x20731 0x20779 533 re := MustCompile(".", 0) 534 535 if m, err := re.MatchString("\u2070"); err != nil { 536 t.Fatalf("Unexpected err: %v", err) 537 } else if !m { 538 t.Fatalf("Expected match") 539 } 540 541 if m, err := re.MatchString(""); err != nil { 542 t.Fatalf("Unexpected err: %v", err) 543 } else if !m { 544 t.Fatalf("Expected match") 545 } 546} 547 548func TestUnicodeScriptSets(t *testing.T) { 549 re := MustCompile(`\p{Katakana}+`, 0) 550 if m, err := re.MatchString("\u30A0\u30FF"); err != nil { 551 t.Fatalf("Unexpected err: %v", err) 552 } else if !m { 553 t.Fatalf("Expected match") 554 } 555} 556 557func TestHexadecimalCurlyBraces(t *testing.T) { 558 re := MustCompile(`\x20`, 0) 559 if m, err := re.MatchString(" "); err != nil { 560 t.Fatalf("Unexpected err: %v", err) 561 } else if !m { 562 t.Fatalf("Expected match") 563 } 564 565 re = MustCompile(`\x{C4}`, 0) 566 if m, err := re.MatchString("Ä"); err != nil { 567 t.Fatalf("Unexpected err: %v", err) 568 } else if !m { 569 t.Fatalf("Expected match") 570 } 571 572 re = MustCompile(`\x{0C5}`, 0) 573 if m, err := re.MatchString("Å"); err != nil { 574 t.Fatalf("Unexpected err: %v", err) 575 } else if !m { 576 t.Fatalf("Expected match") 577 } 578 579 re = MustCompile(`\x{00C6}`, 0) 580 if m, err := re.MatchString("Æ"); err != nil { 581 t.Fatalf("Unexpected err: %v", err) 582 } else if !m { 583 t.Fatalf("Expected match") 584 } 585 586 re = MustCompile(`\x{1FF}`, 0) 587 if m, err := re.MatchString("ǿ"); err != nil { 588 t.Fatalf("Unexpected err: %v", err) 589 } else if !m { 590 t.Fatalf("Expected match") 591 } 592 593 re = MustCompile(`\x{02FF}`, 0) 594 if m, err := re.MatchString("˿"); err != nil { 595 t.Fatalf("Unexpected err: %v", err) 596 } else if !m { 597 t.Fatalf("Expected match") 598 } 599 600 re = MustCompile(`\x{1392}`, 0) 601 if m, err := re.MatchString("᎒"); err != nil { 602 t.Fatalf("Unexpected err: %v", err) 603 } else if !m { 604 t.Fatalf("Expected match") 605 } 606 607 re = MustCompile(`\x{0010ffff}`, 0) 608 if m, err := re.MatchString(string(rune(0x10ffff))); err != nil { 609 t.Fatalf("Unexpected err: %v", err) 610 } else if !m { 611 t.Fatalf("Expected match") 612 } 613 614 if _, err := Compile(`\x2R`, 0); err == nil { 615 t.Fatal("Expected error") 616 } 617 if _, err := Compile(`\x0`, 0); err == nil { 618 t.Fatal("Expected error") 619 } 620 if _, err := Compile(`\x`, 0); err == nil { 621 t.Fatal("Expected error") 622 } 623 if _, err := Compile(`\x{`, 0); err == nil { 624 t.Fatal("Expected error") 625 } 626 if _, err := Compile(`\x{2`, 0); err == nil { 627 t.Fatal("Expected error") 628 } 629 if _, err := Compile(`\x{2R`, 0); err == nil { 630 t.Fatal("Expected error") 631 } 632 if _, err := Compile(`\x{2R}`, 0); err == nil { 633 t.Fatal("Expected error") 634 } 635 if _, err := Compile(`\x{}`, 0); err == nil { 636 t.Fatalf("Expected error") 637 } 638 if _, err := Compile(`\x{10000`, 0); err == nil { 639 t.Fatal("Expected error") 640 } 641 if _, err := Compile(`\x{1234`, 0); err == nil { 642 t.Fatal("Expected error") 643 } 644 if _, err := Compile(`\x{123456789}`, 0); err == nil { 645 t.Fatal("Expected error") 646 } 647 648} 649 650func TestEmptyCharClass(t *testing.T) { 651 if _, err := Compile("[]", 0); err == nil { 652 t.Fatal("Empty char class isn't valid outside of ECMAScript mode") 653 } 654} 655 656func TestECMAEmptyCharClass(t *testing.T) { 657 re := MustCompile("[]", ECMAScript) 658 if m, err := re.MatchString("a"); err != nil { 659 t.Fatal(err) 660 } else if m { 661 t.Fatal("Expected no match") 662 } 663} 664 665func TestDot(t *testing.T) { 666 re := MustCompile(".", 0) 667 if m, err := re.MatchString("\r"); err != nil { 668 t.Fatal(err) 669 } else if !m { 670 t.Fatal("Expected match") 671 } 672} 673 674func TestECMADot(t *testing.T) { 675 re := MustCompile(".", ECMAScript) 676 if m, err := re.MatchString("\r"); err != nil { 677 t.Fatal(err) 678 } else if m { 679 t.Fatal("Expected no match") 680 } 681} 682 683func TestDecimalLookahead(t *testing.T) { 684 re := MustCompile(`\1(A)`, 0) 685 m, err := re.FindStringMatch("AA") 686 if err != nil { 687 t.Fatal(err) 688 } else if m != nil { 689 t.Fatal("Expected no match") 690 } 691} 692 693func TestECMADecimalLookahead(t *testing.T) { 694 re := MustCompile(`\1(A)`, ECMAScript) 695 m, err := re.FindStringMatch("AA") 696 if err != nil { 697 t.Fatal(err) 698 } 699 700 if c := m.GroupCount(); c != 2 { 701 t.Fatalf("Group count !=2 (%d)", c) 702 } 703 704 if s := m.GroupByNumber(0).String(); s != "A" { 705 t.Fatalf("Group0 != 'A' ('%s')", s) 706 } 707 708 if s := m.GroupByNumber(1).String(); s != "A" { 709 t.Fatalf("Group1 != 'A' ('%s')", s) 710 } 711} 712 713func TestECMAOctal(t *testing.T) { 714 re := MustCompile(`\100`, ECMAScript) 715 if m, err := re.MatchString("@"); err != nil { 716 t.Fatal(err) 717 } else if !m { 718 t.Fatal("Expected match") 719 } 720 721 if m, err := re.MatchString("x"); err != nil { 722 t.Fatal(err) 723 } else if m { 724 t.Fatal("Expected no match") 725 } 726 727 re = MustCompile(`\377`, ECMAScript) 728 if m, err := re.MatchString("\u00ff"); err != nil { 729 t.Fatal(err) 730 } else if !m { 731 t.Fatal("Expected match") 732 } 733 734 re = MustCompile(`\400`, ECMAScript) 735 if m, err := re.MatchString(" 0"); err != nil { 736 t.Fatal(err) 737 } else if !m { 738 t.Fatal("Expected match") 739 } 740 741} 742 743func TestECMAInvalidEscape(t *testing.T) { 744 re := MustCompile(`\x0`, ECMAScript) 745 if m, err := re.MatchString("x0"); err != nil { 746 t.Fatal(err) 747 } else if !m { 748 t.Fatal("Expected match") 749 } 750 751 re = MustCompile(`\x0z`, ECMAScript) 752 if m, err := re.MatchString("x0z"); err != nil { 753 t.Fatal(err) 754 } else if !m { 755 t.Fatal("Expected match") 756 } 757} 758 759func TestECMAInvalidEscapeCharClass(t *testing.T) { 760 re := MustCompile(`[\x0]`, ECMAScript) 761 if m, err := re.MatchString("x"); err != nil { 762 t.Fatal(err) 763 } else if !m { 764 t.Fatal("Expected match") 765 } 766 767 if m, err := re.MatchString("0"); err != nil { 768 t.Fatal(err) 769 } else if !m { 770 t.Fatal("Expected match") 771 } 772 773 if m, err := re.MatchString("z"); err != nil { 774 t.Fatal(err) 775 } else if m { 776 t.Fatal("Expected no match") 777 } 778} 779 780func TestECMAScriptXCurlyBraceEscape(t *testing.T) { 781 re := MustCompile(`\x{20}`, ECMAScript) 782 if m, err := re.MatchString(" "); err != nil { 783 t.Fatal(err) 784 } else if m { 785 t.Fatal("Expected no match") 786 } 787 788 if m, err := re.MatchString("xxxxxxxxxxxxxxxxxxxx"); err != nil { 789 t.Fatal(err) 790 } else if !m { 791 t.Fatal("Expected match") 792 } 793} 794 795func TestNegateRange(t *testing.T) { 796 re := MustCompile(`[\D]`, 0) 797 if m, err := re.MatchString("A"); err != nil { 798 t.Fatal(err) 799 } else if !m { 800 t.Fatal("Expected match") 801 } 802} 803 804func TestECMANegateRange(t *testing.T) { 805 re := MustCompile(`[\D]`, ECMAScript) 806 if m, err := re.MatchString("A"); err != nil { 807 t.Fatal(err) 808 } else if !m { 809 t.Fatal("Expected match") 810 } 811} 812 813func TestDollar(t *testing.T) { 814 // PCRE/C# allow \n to match to $ at end-of-string in singleline mode... 815 // a weird edge-case kept for compatibility, ECMAScript/RE2 mode don't allow it 816 re := MustCompile(`ac$`, 0) 817 if m, err := re.MatchString("ac\n"); err != nil { 818 t.Fatal(err) 819 } else if !m { 820 t.Fatal("Expected match") 821 } 822} 823func TestECMADollar(t *testing.T) { 824 re := MustCompile(`ac$`, ECMAScript) 825 if m, err := re.MatchString("ac\n"); err != nil { 826 t.Fatal(err) 827 } else if m { 828 t.Fatal("Expected no match") 829 } 830} 831 832func TestThreeByteUnicode_InputOnly(t *testing.T) { 833 // confirm the bmprefix properly ignores 3-byte unicode in the input value 834 // this used to panic 835 re := MustCompile("高", 0) 836 if m, err := re.MatchString("Test高"); err != nil { 837 t.Fatal(err) 838 } else if !m { 839 t.Fatal("Expected match") 840 } 841} 842 843func TestMultibyteUnicode_MatchPartialPattern(t *testing.T) { 844 re := MustCompile("猟な", 0) 845 if m, err := re.MatchString("なあな"); err != nil { 846 t.Fatal(err) 847 } else if m { 848 t.Fatal("Expected no match") 849 } 850} 851 852func TestMultibyteUnicode_Match(t *testing.T) { 853 re := MustCompile("猟な", 0) 854 if m, err := re.MatchString("なあ猟な"); err != nil { 855 t.Fatal(err) 856 } else if !m { 857 t.Fatal("Expected match") 858 } 859} 860 861func TestAlternationNamedOptions_Errors(t *testing.T) { 862 // all of these should give an error "error parsing regexp:" 863 data := []string{ 864 "(?(?e))", "(?(?a)", "(?(?", "(?(", "?(a:b)", "?(a)", "?(a|b)", "?((a)", "?((a)a", "?((a)a|", "?((a)a|b", 865 "(?(?i))", "(?(?I))", "(?(?m))", "(?(?M))", "(?(?s))", "(?(?S))", "(?(?x))", "(?(?X))", "(?(?n))", "(?(?N))", " (?(?n))", 866 } 867 for _, p := range data { 868 re, err := Compile(p, 0) 869 if err == nil { 870 t.Fatal("Expected error, got nil") 871 } 872 if re != nil { 873 t.Fatal("Expected unparsed regexp, got non-nil") 874 } 875 876 if !strings.HasPrefix(err.Error(), "error parsing regexp: ") { 877 t.Fatalf("Wanted parse error, got '%v'", err) 878 } 879 } 880} 881 882func TestAlternationNamedOptions_Success(t *testing.T) { 883 data := []struct { 884 pattern string 885 input string 886 expectSuccess bool 887 matchVal string 888 }{ 889 {"(?(cat)|dog)", "cat", true, ""}, 890 {"(?(cat)|dog)", "catdog", true, ""}, 891 {"(?(cat)dog1|dog2)", "catdog1", false, ""}, 892 {"(?(cat)dog1|dog2)", "catdog2", true, "dog2"}, 893 {"(?(cat)dog1|dog2)", "catdog1dog2", true, "dog2"}, 894 {"(?(dog2))", "dog2", true, ""}, 895 {"(?(cat)|dog)", "oof", false, ""}, 896 {"(?(a:b))", "a", true, ""}, 897 {"(?(a:))", "a", true, ""}, 898 } 899 for _, p := range data { 900 re := MustCompile(p.pattern, 0) 901 m, err := re.FindStringMatch(p.input) 902 903 if err != nil { 904 t.Fatalf("Unexpected error during match: %v", err) 905 } 906 if want, got := p.expectSuccess, m != nil; want != got { 907 t.Fatalf("Success mismatch for %v, wanted %v, got %v", p.pattern, want, got) 908 } 909 if m != nil { 910 if want, got := p.matchVal, m.String(); want != got { 911 t.Fatalf("Match val mismatch for %v, wanted %v, got %v", p.pattern, want, got) 912 } 913 } 914 } 915} 916 917func TestAlternationConstruct_Matches(t *testing.T) { 918 re := MustCompile("(?(A)A123|C789)", 0) 919 m, err := re.FindStringMatch("A123 B456 C789") 920 if err != nil { 921 t.Fatalf("Unexpected err: %v", err) 922 } 923 if m == nil { 924 t.Fatal("Expected match, got nil") 925 } 926 927 if want, got := "A123", m.String(); want != got { 928 t.Fatalf("Wanted %v, got %v", want, got) 929 } 930 931 m, err = re.FindNextMatch(m) 932 if err != nil { 933 t.Fatalf("Unexpected err in second match: %v", err) 934 } 935 if m == nil { 936 t.Fatal("Expected second match, got nil") 937 } 938 if want, got := "C789", m.String(); want != got { 939 t.Fatalf("Wanted %v, got %v", want, got) 940 } 941 942 m, err = re.FindNextMatch(m) 943 if err != nil { 944 t.Fatalf("Unexpected err in third match: %v", err) 945 } 946 if m != nil { 947 t.Fatal("Did not expect third match") 948 } 949} 950 951func TestStartAtEnd(t *testing.T) { 952 re := MustCompile("(?:)", 0) 953 m, err := re.FindStringMatchStartingAt("t", 1) 954 if err != nil { 955 t.Fatal(err) 956 } 957 if m == nil { 958 t.Fatal("Expected match") 959 } 960} 961 962func TestParserFuzzCrashes(t *testing.T) { 963 var crashes = []string{ 964 "(?'-", "(\\c0)", "(\\00(?())", "[\\p{0}", "(\x00?.*.()?(()?)?)*.x\xcb?&(\\s\x80)", "\\p{0}", "[0-[\\p{0}", 965 } 966 967 for _, c := range crashes { 968 t.Log(c) 969 Compile(c, 0) 970 } 971} 972 973func TestParserFuzzHangs(t *testing.T) { 974 var hangs = []string{ 975 "\r{865720113}z\xd5{\r{861o", "\r{915355}\r{9153}", "\r{525005}", "\x01{19765625}", "(\r{068828256})", "\r{677525005}", 976 } 977 978 for _, c := range hangs { 979 t.Log(c) 980 Compile(c, 0) 981 } 982} 983 984func BenchmarkParserPrefixLongLen(b *testing.B) { 985 re := MustCompile("\r{100001}T+", 0) 986 inp := strings.Repeat("testing", 10000) + strings.Repeat("\r", 100000) + "TTTT" 987 988 b.ResetTimer() 989 for i := 0; i < b.N; i++ { 990 if m, err := re.MatchString(inp); err != nil { 991 b.Fatalf("Unexpected err: %v", err) 992 } else if m { 993 b.Fatalf("Expected no match") 994 } 995 } 996} 997 998/* 999func TestPcreStuff(t *testing.T) { 1000 re := MustCompile(`(?(?=(a))a)`, Debug) 1001 inp := unEscapeToMatch(`a`) 1002 fmt.Printf("Inp %q\n", inp) 1003 m, err := re.FindStringMatch(inp) 1004 1005 if err != nil { 1006 t.Fatalf("Unexpected error: %v", err) 1007 } 1008 if m == nil { 1009 t.Fatalf("Expected match") 1010 } 1011 1012 fmt.Printf("Match %s\n", m.dump()) 1013 fmt.Printf("Text: %v\n", unEscapeGroup(m.String())) 1014 1015} 1016*/ 1017 1018//(.*)(\d+) different FirstChars ([\x00-\t\v-\x08] OR [\x00-\t\v-\uffff\p{Nd}] 1019 1020func TestControlBracketFail(t *testing.T) { 1021 re := MustCompile(`(cat)(\c[*)(dog)`, 0) 1022 inp := "asdlkcat\u00FFdogiwod" 1023 1024 if m, _ := re.MatchString(inp); m { 1025 t.Fatal("expected no match") 1026 } 1027} 1028 1029func TestControlBracketGroups(t *testing.T) { 1030 re := MustCompile(`(cat)(\c[*)(dog)`, 0) 1031 inp := "asdlkcat\u001bdogiwod" 1032 1033 if want, got := 4, re.capsize; want != got { 1034 t.Fatalf("Capsize wrong, want %v, got %v", want, got) 1035 } 1036 1037 m, _ := re.FindStringMatch(inp) 1038 if m == nil { 1039 t.Fatal("expected match") 1040 } 1041 1042 g := m.Groups() 1043 want := []string{"cat\u001bdog", "cat", "\u001b", "dog"} 1044 for i := 0; i < len(g); i++ { 1045 if want[i] != g[i].String() { 1046 t.Fatalf("Bad group num %v, want %v, got %v", i, want[i], g[i].String()) 1047 } 1048 } 1049} 1050 1051func TestBadGroupConstruct(t *testing.T) { 1052 bad := []string{"(?>-", "(?<", "(?<=", "(?<!", "(?>", "(?)", "(?<)", "(?')", "(?<-"} 1053 1054 for _, b := range bad { 1055 _, err := Compile(b, 0) 1056 if err == nil { 1057 t.Fatalf("Wanted error, but got no error for pattern: %v", b) 1058 } 1059 } 1060} 1061 1062func TestEmptyCaptureLargeRepeat(t *testing.T) { 1063 // a bug would cause our track to not grow and eventually panic 1064 // with large numbers of repeats of a non-capturing group (>16) 1065 1066 // the issue was that the jump occured to the same statement over and over 1067 // and the "grow stack/track" logic only triggered on jumps that moved 1068 // backwards 1069 1070 r := MustCompile(`(?:){40}`, 0) 1071 m, err := r.FindStringMatch("1") 1072 if err != nil { 1073 t.Fatalf("Unexpected error: %v", err) 1074 } 1075 if want, got := 0, m.Index; want != got { 1076 t.Errorf("First Match Index wanted %v got %v", want, got) 1077 } 1078 if want, got := 0, m.Length; want != got { 1079 t.Errorf("First Match Length wanted %v got %v", want, got) 1080 } 1081 1082 m, _ = r.FindNextMatch(m) 1083 if want, got := 1, m.Index; want != got { 1084 t.Errorf("Second Match Index wanted %v got %v", want, got) 1085 } 1086 if want, got := 0, m.Length; want != got { 1087 t.Errorf("Second Match Length wanted %v got %v", want, got) 1088 } 1089 1090 m, _ = r.FindNextMatch(m) 1091 if m != nil { 1092 t.Fatal("Expected 2 matches, got more") 1093 } 1094} 1095 1096func TestFuzzBytes(t *testing.T) { 1097 //some crash cases found from fuzzing 1098 1099 var testCases = []struct { 1100 r, s []byte 1101 }{ 1102 { 1103 r: []byte{0x28, 0x28, 0x29, 0x5c, 0x37, 0x28, 0x3f, 0x28, 0x29, 0x29}, 1104 s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, 1105 }, 1106 { 1107 r: []byte{0x28, 0x5c, 0x32, 0x28, 0x3f, 0x28, 0x30, 0x29, 0x29}, 1108 s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, 1109 }, 1110 { 1111 r: []byte{0x28, 0x3f, 0x28, 0x29, 0x29, 0x5c, 0x31, 0x30, 0x28, 0x3f, 0x28, 0x30, 0x29}, 1112 s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, 1113 }, 1114 { 1115 r: []byte{0x28, 0x29, 0x28, 0x28, 0x29, 0x5c, 0x37, 0x28, 0x3f, 0x28, 0x29, 0x29}, 1116 s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, 1117 }, 1118 } 1119 1120 for _, c := range testCases { 1121 r := string(c.r) 1122 t.Run(r, func(t *testing.T) { 1123 _, err := Compile(r, Multiline|ECMAScript|Debug) 1124 // should fail compiling 1125 if err == nil { 1126 t.Fatal("should fail compile, but didn't") 1127 } 1128 }) 1129 } 1130 1131} 1132