1 /* 2 * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 39 * 8216332 8214245 8237599 8241055 8247546 8258259 8037397 40 * 41 * @library /test/lib 42 * @library /lib/testlibrary/java/lang 43 * @build jdk.test.lib.RandomFactory 44 * @run main RegExTest 45 * @key randomness 46 */ 47 48 import java.io.BufferedReader; 49 import java.io.ByteArrayInputStream; 50 import java.io.ByteArrayOutputStream; 51 import java.io.File; 52 import java.io.FileInputStream; 53 import java.io.InputStreamReader; 54 import java.io.ObjectInputStream; 55 import java.io.ObjectOutputStream; 56 import java.math.BigInteger; 57 import java.nio.CharBuffer; 58 import java.nio.file.Files; 59 import java.nio.file.Path; 60 import java.nio.file.Paths; 61 import java.util.ArrayList; 62 import java.util.Arrays; 63 import java.util.HashMap; 64 import java.util.List; 65 import java.util.Map; 66 import java.util.Random; 67 import java.util.Scanner; 68 import java.util.function.Function; 69 import java.util.function.Predicate; 70 import java.util.regex.Matcher; 71 import java.util.regex.MatchResult; 72 import java.util.regex.Pattern; 73 import java.util.regex.PatternSyntaxException; 74 import java.util.stream.IntStream; 75 import java.util.stream.Stream; 76 77 import jdk.test.lib.RandomFactory; 78 79 /** 80 * This is a test class created to check the operation of 81 * the Pattern and Matcher classes. 82 */ 83 public class RegExTest { 84 85 private static Random generator = RandomFactory.getRandom(); 86 private static boolean failure = false; 87 private static int failCount = 0; 88 private static String firstFailure = null; 89 90 /** 91 * Main to interpret arguments and run several tests. 92 * 93 */ main(String[] args)94 public static void main(String[] args) throws Exception { 95 // Most of the tests are in a file 96 processFile("TestCases.txt"); 97 //processFile("PerlCases.txt"); 98 processFile("BMPTestCases.txt"); 99 processFile("SupplementaryTestCases.txt"); 100 101 // These test many randomly generated char patterns 102 bm(); 103 slice(); 104 105 // These are hard to put into the file 106 escapes(); 107 blankInput(); 108 109 // Substitition tests on randomly generated sequences 110 globalSubstitute(); 111 stringbufferSubstitute(); 112 stringbuilderSubstitute(); 113 114 substitutionBasher(); 115 substitutionBasher2(); 116 117 // Canonical Equivalence 118 ceTest(); 119 120 // Anchors 121 anchorTest(); 122 123 // boolean match calls 124 matchesTest(); 125 lookingAtTest(); 126 127 // Pattern API 128 patternMatchesTest(); 129 130 // Misc 131 lookbehindTest(); 132 nullArgumentTest(); 133 backRefTest(); 134 groupCaptureTest(); 135 caretTest(); 136 charClassTest(); 137 emptyPatternTest(); 138 findIntTest(); 139 group0Test(); 140 longPatternTest(); 141 octalTest(); 142 ampersandTest(); 143 negationTest(); 144 splitTest(); 145 appendTest(); 146 caseFoldingTest(); 147 commentsTest(); 148 unixLinesTest(); 149 replaceFirstTest(); 150 gTest(); 151 zTest(); 152 serializeTest(); 153 reluctantRepetitionTest(); 154 multilineDollarTest(); 155 dollarAtEndTest(); 156 caretBetweenTerminatorsTest(); 157 // This RFE rejected in Tiger numOccurrencesTest(); 158 javaCharClassTest(); 159 nonCaptureRepetitionTest(); 160 notCapturedGroupCurlyMatchTest(); 161 escapedSegmentTest(); 162 literalPatternTest(); 163 literalReplacementTest(); 164 regionTest(); 165 toStringTest(); 166 negatedCharClassTest(); 167 findFromTest(); 168 boundsTest(); 169 unicodeWordBoundsTest(); 170 caretAtEndTest(); 171 wordSearchTest(); 172 hitEndTest(); 173 toMatchResultTest(); 174 toMatchResultTest2(); 175 surrogatesInClassTest(); 176 removeQEQuotingTest(); 177 namedGroupCaptureTest(); 178 nonBmpClassComplementTest(); 179 unicodePropertiesTest(); 180 unicodeHexNotationTest(); 181 unicodeClassesTest(); 182 unicodeCharacterNameTest(); 183 horizontalAndVerticalWSTest(); 184 linebreakTest(); 185 branchTest(); 186 groupCurlyNotFoundSuppTest(); 187 groupCurlyBackoffTest(); 188 patternAsPredicate(); 189 patternAsMatchPredicate(); 190 invalidFlags(); 191 embeddedFlags(); 192 grapheme(); 193 expoBacktracking(); 194 invalidGroupName(); 195 illegalRepetitionRange(); 196 surrogatePairWithCanonEq(); 197 lineBreakWithQuantifier(); 198 caseInsensitivePMatch(); 199 surrogatePairOverlapRegion(); 200 droppedClassesWithIntersection(); 201 202 203 if (failure) { 204 throw new 205 RuntimeException("RegExTest failed, 1st failure: " + 206 firstFailure); 207 } else { 208 System.err.println("OKAY: All tests passed."); 209 } 210 } 211 212 // Utility functions 213 getRandomAlphaString(int length)214 private static String getRandomAlphaString(int length) { 215 StringBuffer buf = new StringBuffer(length); 216 for (int i=0; i<length; i++) { 217 char randChar = (char)(97 + generator.nextInt(26)); 218 buf.append(randChar); 219 } 220 return buf.toString(); 221 } 222 check(Matcher m, String expected)223 private static void check(Matcher m, String expected) { 224 m.find(); 225 if (!m.group().equals(expected)) 226 failCount++; 227 } 228 check(Matcher m, String result, boolean expected)229 private static void check(Matcher m, String result, boolean expected) { 230 m.find(); 231 if (m.group().equals(result) != expected) 232 failCount++; 233 } 234 check(Pattern p, String s, boolean expected)235 private static void check(Pattern p, String s, boolean expected) { 236 if (p.matcher(s).find() != expected) 237 failCount++; 238 } 239 check(String p, String s, boolean expected)240 private static void check(String p, String s, boolean expected) { 241 Matcher matcher = Pattern.compile(p).matcher(s); 242 if (matcher.find() != expected) 243 failCount++; 244 } 245 check(String p, char c, boolean expected)246 private static void check(String p, char c, boolean expected) { 247 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 248 Pattern pattern = Pattern.compile(propertyPattern); 249 char[] ca = new char[1]; ca[0] = c; 250 Matcher matcher = pattern.matcher(new String(ca)); 251 if (!matcher.find()) 252 failCount++; 253 } 254 check(String p, int codePoint, boolean expected)255 private static void check(String p, int codePoint, boolean expected) { 256 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 257 Pattern pattern = Pattern.compile(propertyPattern); 258 char[] ca = Character.toChars(codePoint); 259 Matcher matcher = pattern.matcher(new String(ca)); 260 if (!matcher.find()) 261 failCount++; 262 } 263 check(String p, int flag, String input, String s, boolean expected)264 private static void check(String p, int flag, String input, String s, 265 boolean expected) 266 { 267 Pattern pattern = Pattern.compile(p, flag); 268 Matcher matcher = pattern.matcher(input); 269 if (expected) 270 check(matcher, s, expected); 271 else 272 check(pattern, input, false); 273 } 274 report(String testName)275 private static void report(String testName) { 276 int spacesToAdd = 30 - testName.length(); 277 StringBuffer paddedNameBuffer = new StringBuffer(testName); 278 for (int i=0; i<spacesToAdd; i++) 279 paddedNameBuffer.append(" "); 280 String paddedName = paddedNameBuffer.toString(); 281 System.err.println(paddedName + ": " + 282 (failCount==0 ? "Passed":"Failed("+failCount+")")); 283 if (failCount > 0) { 284 failure = true; 285 286 if (firstFailure == null) { 287 firstFailure = testName; 288 } 289 } 290 291 failCount = 0; 292 } 293 294 /** 295 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 296 * supplementary characters. This method does NOT fully take care 297 * of the regex syntax. 298 */ toSupplementaries(String s)299 private static String toSupplementaries(String s) { 300 int length = s.length(); 301 StringBuffer sb = new StringBuffer(length * 2); 302 303 for (int i = 0; i < length; ) { 304 char c = s.charAt(i++); 305 if (c == '\\') { 306 sb.append(c); 307 if (i < length) { 308 c = s.charAt(i++); 309 sb.append(c); 310 if (c == 'u') { 311 // assume no syntax error 312 sb.append(s.charAt(i++)); 313 sb.append(s.charAt(i++)); 314 sb.append(s.charAt(i++)); 315 sb.append(s.charAt(i++)); 316 } 317 } 318 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 319 sb.append('\ud800').append((char)('\udc00'+c)); 320 } else { 321 sb.append(c); 322 } 323 } 324 return sb.toString(); 325 } 326 327 // Regular expression tests 328 329 // This is for bug 6178785 330 // Test if an expected NPE gets thrown when passing in a null argument check(Runnable test)331 private static boolean check(Runnable test) { 332 try { 333 test.run(); 334 failCount++; 335 return false; 336 } catch (NullPointerException npe) { 337 return true; 338 } 339 } 340 nullArgumentTest()341 private static void nullArgumentTest() { 342 check(() -> Pattern.compile(null)); 343 check(() -> Pattern.matches(null, null)); 344 check(() -> Pattern.matches("xyz", null)); 345 check(() -> Pattern.quote(null)); 346 check(() -> Pattern.compile("xyz").split(null)); 347 check(() -> Pattern.compile("xyz").matcher(null)); 348 349 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 350 m.matches(); 351 check(() -> m.appendTail((StringBuffer) null)); 352 check(() -> m.appendTail((StringBuilder)null)); 353 check(() -> m.replaceAll((String) null)); 354 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 355 check(() -> m.replaceFirst((String)null)); 356 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 357 check(() -> m.appendReplacement((StringBuffer)null, null)); 358 check(() -> m.appendReplacement((StringBuilder)null, null)); 359 check(() -> m.reset(null)); 360 check(() -> Matcher.quoteReplacement(null)); 361 //check(() -> m.usePattern(null)); 362 363 report("Null Argument"); 364 } 365 366 // This is for bug6635133 367 // Test if surrogate pair in Unicode escapes can be handled correctly. surrogatesInClassTest()368 private static void surrogatesInClassTest() throws Exception { 369 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 370 Matcher matcher = pattern.matcher("\ud834\udd22"); 371 if (!matcher.find()) 372 failCount++; 373 374 report("Surrogate pair in Unicode escape"); 375 } 376 377 // This is for bug6990617 378 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 379 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 380 // char is an octal digit. removeQEQuotingTest()381 private static void removeQEQuotingTest() throws Exception { 382 Pattern pattern = 383 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 384 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 385 if (!matcher.find()) 386 failCount++; 387 388 report("Remove Q/E Quoting"); 389 } 390 391 // This is for bug 4988891 392 // Test toMatchResult to see that it is a copy of the Matcher 393 // that is not affected by subsequent operations on the original toMatchResultTest()394 private static void toMatchResultTest() throws Exception { 395 Pattern pattern = Pattern.compile("squid"); 396 Matcher matcher = pattern.matcher( 397 "agiantsquidofdestinyasmallsquidoffate"); 398 matcher.find(); 399 int matcherStart1 = matcher.start(); 400 MatchResult mr = matcher.toMatchResult(); 401 if (mr == matcher) 402 failCount++; 403 int resultStart1 = mr.start(); 404 if (matcherStart1 != resultStart1) 405 failCount++; 406 matcher.find(); 407 int matcherStart2 = matcher.start(); 408 int resultStart2 = mr.start(); 409 if (matcherStart2 == resultStart2) 410 failCount++; 411 if (resultStart1 != resultStart2) 412 failCount++; 413 MatchResult mr2 = matcher.toMatchResult(); 414 if (mr == mr2) 415 failCount++; 416 if (mr2.start() != matcherStart2) 417 failCount++; 418 report("toMatchResult is a copy"); 419 } 420 checkExpectedISE(Runnable test)421 private static void checkExpectedISE(Runnable test) { 422 try { 423 test.run(); 424 failCount++; 425 } catch (IllegalStateException x) { 426 } catch (IndexOutOfBoundsException xx) { 427 failCount++; 428 } 429 } 430 checkExpectedIOOE(Runnable test)431 private static void checkExpectedIOOE(Runnable test) { 432 try { 433 test.run(); 434 failCount++; 435 } catch (IndexOutOfBoundsException x) {} 436 } 437 438 // This is for bug 8074678 439 // Test the result of toMatchResult throws ISE if no match is availble toMatchResultTest2()440 private static void toMatchResultTest2() throws Exception { 441 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 442 matcher.find(); 443 MatchResult mr = matcher.toMatchResult(); 444 445 checkExpectedISE(() -> mr.start()); 446 checkExpectedISE(() -> mr.start(2)); 447 checkExpectedISE(() -> mr.end()); 448 checkExpectedISE(() -> mr.end(2)); 449 checkExpectedISE(() -> mr.group()); 450 checkExpectedISE(() -> mr.group(2)); 451 452 matcher = Pattern.compile("(match)").matcher("there is a match"); 453 matcher.find(); 454 MatchResult mr2 = matcher.toMatchResult(); 455 checkExpectedIOOE(() -> mr2.start(2)); 456 checkExpectedIOOE(() -> mr2.end(2)); 457 checkExpectedIOOE(() -> mr2.group(2)); 458 459 report("toMatchResult2 appropriate exceptions"); 460 } 461 462 // This is for bug 5013885 463 // Must test a slice to see if it reports hitEnd correctly hitEndTest()464 private static void hitEndTest() throws Exception { 465 // Basic test of Slice node 466 Pattern p = Pattern.compile("^squidattack"); 467 Matcher m = p.matcher("squack"); 468 m.find(); 469 if (m.hitEnd()) 470 failCount++; 471 m.reset("squid"); 472 m.find(); 473 if (!m.hitEnd()) 474 failCount++; 475 476 // Test Slice, SliceA and SliceU nodes 477 for (int i=0; i<3; i++) { 478 int flags = 0; 479 if (i==1) flags = Pattern.CASE_INSENSITIVE; 480 if (i==2) flags = Pattern.UNICODE_CASE; 481 p = Pattern.compile("^abc", flags); 482 m = p.matcher("ad"); 483 m.find(); 484 if (m.hitEnd()) 485 failCount++; 486 m.reset("ab"); 487 m.find(); 488 if (!m.hitEnd()) 489 failCount++; 490 } 491 492 // Test Boyer-Moore node 493 p = Pattern.compile("catattack"); 494 m = p.matcher("attack"); 495 m.find(); 496 if (!m.hitEnd()) 497 failCount++; 498 499 p = Pattern.compile("catattack"); 500 m = p.matcher("attackattackattackcatatta"); 501 m.find(); 502 if (!m.hitEnd()) 503 failCount++; 504 505 // 8184706: Matching u+0d at EOL against \R should hit-end 506 p = Pattern.compile("...\\R"); 507 m = p.matcher("cat" + (char)0x0a); 508 m.find(); 509 if (m.hitEnd()) 510 failCount++; 511 512 m = p.matcher("cat" + (char)0x0d); 513 m.find(); 514 if (!m.hitEnd()) 515 failCount++; 516 517 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 518 m.find(); 519 if (m.hitEnd()) 520 failCount++; 521 522 report("hitEnd"); 523 } 524 525 // This is for bug 4997476 526 // It is weird code submitted by customer demonstrating a regression wordSearchTest()527 private static void wordSearchTest() throws Exception { 528 String testString = new String("word1 word2 word3"); 529 Pattern p = Pattern.compile("\\b"); 530 Matcher m = p.matcher(testString); 531 int position = 0; 532 int start = 0; 533 while (m.find(position)) { 534 start = m.start(); 535 if (start == testString.length()) 536 break; 537 if (m.find(start+1)) { 538 position = m.start(); 539 } else { 540 position = testString.length(); 541 } 542 if (testString.substring(start, position).equals(" ")) 543 continue; 544 if (!testString.substring(start, position-1).startsWith("word")) 545 failCount++; 546 } 547 report("Customer word search"); 548 } 549 550 // This is for bug 4994840 caretAtEndTest()551 private static void caretAtEndTest() throws Exception { 552 // Problem only occurs with multiline patterns 553 // containing a beginning-of-line caret "^" followed 554 // by an expression that also matches the empty string. 555 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 556 Matcher matcher = pattern.matcher("\r"); 557 matcher.find(); 558 matcher.find(); 559 report("Caret at end"); 560 } 561 562 // This test is for 4979006 563 // Check to see if word boundary construct properly handles unicode 564 // non spacing marks unicodeWordBoundsTest()565 private static void unicodeWordBoundsTest() throws Exception { 566 String spaces = " "; 567 String wordChar = "a"; 568 String nsm = "\u030a"; 569 570 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 571 572 Pattern pattern = Pattern.compile("\\b"); 573 Matcher matcher = pattern.matcher(""); 574 // S=other B=word character N=non spacing mark .=word boundary 575 // SS.BB.SS 576 String input = spaces + wordChar + wordChar + spaces; 577 twoFindIndexes(input, matcher, 2, 4); 578 // SS.BBN.SS 579 input = spaces + wordChar +wordChar + nsm + spaces; 580 twoFindIndexes(input, matcher, 2, 5); 581 // SS.BN.SS 582 input = spaces + wordChar + nsm + spaces; 583 twoFindIndexes(input, matcher, 2, 4); 584 // SS.BNN.SS 585 input = spaces + wordChar + nsm + nsm + spaces; 586 twoFindIndexes(input, matcher, 2, 5); 587 // SSN.BB.SS 588 input = spaces + nsm + wordChar + wordChar + spaces; 589 twoFindIndexes(input, matcher, 3, 5); 590 // SS.BNB.SS 591 input = spaces + wordChar + nsm + wordChar + spaces; 592 twoFindIndexes(input, matcher, 2, 5); 593 // SSNNSS 594 input = spaces + nsm + nsm + spaces; 595 matcher.reset(input); 596 if (matcher.find()) 597 failCount++; 598 // SSN.BBN.SS 599 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 600 twoFindIndexes(input, matcher, 3, 6); 601 602 report("Unicode word boundary"); 603 } 604 twoFindIndexes(String input, Matcher matcher, int a, int b)605 private static void twoFindIndexes(String input, Matcher matcher, int a, 606 int b) throws Exception 607 { 608 matcher.reset(input); 609 matcher.find(); 610 if (matcher.start() != a) 611 failCount++; 612 matcher.find(); 613 if (matcher.start() != b) 614 failCount++; 615 } 616 617 // This test is for 6284152 check(String regex, String input, String[] expected)618 static void check(String regex, String input, String[] expected) { 619 List<String> result = new ArrayList<String>(); 620 Pattern p = Pattern.compile(regex); 621 Matcher m = p.matcher(input); 622 while (m.find()) { 623 result.add(m.group()); 624 } 625 if (!Arrays.asList(expected).equals(result)) 626 failCount++; 627 } 628 lookbehindTest()629 private static void lookbehindTest() throws Exception { 630 //Positive 631 check("(?<=%.{0,5})foo\\d", 632 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 633 new String[]{"foo1", "foo2", "foo3"}); 634 635 //boundary at end of the lookbehind sub-regex should work consistently 636 //with the boundary just after the lookbehind sub-regex 637 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 638 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 639 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 640 check("(?<!abc \\b)foo", "abc foo", new String[0]); 641 642 //Negative 643 check("(?<!%.{0,5})foo\\d", 644 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 645 new String[] {"foo4", "foo5"}); 646 647 //Positive greedy 648 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 649 650 //Positive reluctant 651 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 652 653 //supplementary 654 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 655 new String[] {"fo\ud800\udc00o"}); 656 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 657 new String[] {"fo\ud800\udc00o"}); 658 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 659 new String[] {"fo\ud800\udc00o"}); 660 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 661 new String[] {"fo\ud800\udc00o"}); 662 report("Lookbehind"); 663 } 664 665 // This test is for 4938995 666 // Check to see if weak region boundaries are transparent to 667 // lookahead and lookbehind constructs boundsTest()668 private static void boundsTest() throws Exception { 669 String fullMessage = "catdogcat"; 670 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 671 Matcher matcher = pattern.matcher("catdogca"); 672 matcher.useTransparentBounds(true); 673 if (matcher.find()) 674 failCount++; 675 matcher.reset("atdogcat"); 676 if (matcher.find()) 677 failCount++; 678 matcher.reset(fullMessage); 679 if (!matcher.find()) 680 failCount++; 681 matcher.reset(fullMessage); 682 matcher.region(0,9); 683 if (!matcher.find()) 684 failCount++; 685 matcher.reset(fullMessage); 686 matcher.region(0,6); 687 if (!matcher.find()) 688 failCount++; 689 matcher.reset(fullMessage); 690 matcher.region(3,6); 691 if (!matcher.find()) 692 failCount++; 693 matcher.useTransparentBounds(false); 694 if (matcher.find()) 695 failCount++; 696 697 // Negative lookahead/lookbehind 698 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 699 matcher = pattern.matcher("dogcat"); 700 matcher.useTransparentBounds(true); 701 matcher.region(0,3); 702 if (matcher.find()) 703 failCount++; 704 matcher.reset("catdog"); 705 matcher.region(3,6); 706 if (matcher.find()) 707 failCount++; 708 matcher.useTransparentBounds(false); 709 matcher.reset("dogcat"); 710 matcher.region(0,3); 711 if (!matcher.find()) 712 failCount++; 713 matcher.reset("catdog"); 714 matcher.region(3,6); 715 if (!matcher.find()) 716 failCount++; 717 718 report("Region bounds transparency"); 719 } 720 721 // This test is for 4945394 findFromTest()722 private static void findFromTest() throws Exception { 723 String message = "This is 40 $0 message."; 724 Pattern pat = Pattern.compile("\\$0"); 725 Matcher match = pat.matcher(message); 726 if (!match.find()) 727 failCount++; 728 if (match.find()) 729 failCount++; 730 if (match.find()) 731 failCount++; 732 report("Check for alternating find"); 733 } 734 735 // This test is for 4872664 and 4892980 negatedCharClassTest()736 private static void negatedCharClassTest() throws Exception { 737 Pattern pattern = Pattern.compile("[^>]"); 738 Matcher matcher = pattern.matcher("\u203A"); 739 if (!matcher.matches()) 740 failCount++; 741 pattern = Pattern.compile("[^fr]"); 742 matcher = pattern.matcher("a"); 743 if (!matcher.find()) 744 failCount++; 745 matcher.reset("\u203A"); 746 if (!matcher.find()) 747 failCount++; 748 String s = "for"; 749 String result[] = s.split("[^fr]"); 750 if (!result[0].equals("f")) 751 failCount++; 752 if (!result[1].equals("r")) 753 failCount++; 754 s = "f\u203Ar"; 755 result = s.split("[^fr]"); 756 if (!result[0].equals("f")) 757 failCount++; 758 if (!result[1].equals("r")) 759 failCount++; 760 761 // Test adding to bits, subtracting a node, then adding to bits again 762 pattern = Pattern.compile("[^f\u203Ar]"); 763 matcher = pattern.matcher("a"); 764 if (!matcher.find()) 765 failCount++; 766 matcher.reset("f"); 767 if (matcher.find()) 768 failCount++; 769 matcher.reset("\u203A"); 770 if (matcher.find()) 771 failCount++; 772 matcher.reset("r"); 773 if (matcher.find()) 774 failCount++; 775 matcher.reset("\u203B"); 776 if (!matcher.find()) 777 failCount++; 778 779 // Test subtracting a node, adding to bits, subtracting again 780 pattern = Pattern.compile("[^\u203Ar\u203B]"); 781 matcher = pattern.matcher("a"); 782 if (!matcher.find()) 783 failCount++; 784 matcher.reset("\u203A"); 785 if (matcher.find()) 786 failCount++; 787 matcher.reset("r"); 788 if (matcher.find()) 789 failCount++; 790 matcher.reset("\u203B"); 791 if (matcher.find()) 792 failCount++; 793 matcher.reset("\u203C"); 794 if (!matcher.find()) 795 failCount++; 796 797 report("Negated Character Class"); 798 } 799 800 // This test is for 4628291 toStringTest()801 private static void toStringTest() throws Exception { 802 Pattern pattern = Pattern.compile("b+"); 803 if (pattern.toString() != "b+") 804 failCount++; 805 Matcher matcher = pattern.matcher("aaabbbccc"); 806 String matcherString = matcher.toString(); // unspecified 807 matcher.find(); 808 matcherString = matcher.toString(); // unspecified 809 matcher.region(0,3); 810 matcherString = matcher.toString(); // unspecified 811 matcher.reset(); 812 matcherString = matcher.toString(); // unspecified 813 report("toString"); 814 } 815 816 // This test is for 4808962 literalPatternTest()817 private static void literalPatternTest() throws Exception { 818 int flags = Pattern.LITERAL; 819 820 Pattern pattern = Pattern.compile("abc\\t$^", flags); 821 check(pattern, "abc\\t$^", true); 822 823 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 824 check(pattern, "abc\\t$^", true); 825 826 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 827 check(pattern, "\\Qa^$bcabc\\E", true); 828 check(pattern, "a^$bcabc", false); 829 830 pattern = Pattern.compile("\\\\Q\\\\E"); 831 check(pattern, "\\Q\\E", true); 832 833 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 834 check(pattern, "abcefg\\Q\\Ehij", true); 835 836 pattern = Pattern.compile("\\\\\\Q\\\\E"); 837 check(pattern, "\\\\\\\\", true); 838 839 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 840 check(pattern, "\\Qa^$bcabc\\E", true); 841 check(pattern, "a^$bcabc", false); 842 843 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 844 check(pattern, "\\Qabc\\Edef", true); 845 check(pattern, "abcdef", false); 846 847 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 848 check(pattern, "abc\\Edef", true); 849 check(pattern, "abcdef", false); 850 851 pattern = Pattern.compile(Pattern.quote("\\E")); 852 check(pattern, "\\E", true); 853 854 pattern = Pattern.compile("((((abc.+?:)", flags); 855 check(pattern, "((((abc.+?:)", true); 856 857 flags |= Pattern.MULTILINE; 858 859 pattern = Pattern.compile("^cat$", flags); 860 check(pattern, "abc^cat$def", true); 861 check(pattern, "cat", false); 862 863 flags |= Pattern.CASE_INSENSITIVE; 864 865 pattern = Pattern.compile("abcdef", flags); 866 check(pattern, "ABCDEF", true); 867 check(pattern, "AbCdEf", true); 868 869 flags |= Pattern.DOTALL; 870 871 pattern = Pattern.compile("a...b", flags); 872 check(pattern, "A...b", true); 873 check(pattern, "Axxxb", false); 874 875 flags |= Pattern.CANON_EQ; 876 877 Pattern p = Pattern.compile("testa\u030a", flags); 878 check(pattern, "testa\u030a", false); 879 check(pattern, "test\u00e5", false); 880 881 // Supplementary character test 882 flags = Pattern.LITERAL; 883 884 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 885 check(pattern, toSupplementaries("abc\\t$^"), true); 886 887 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 888 check(pattern, toSupplementaries("abc\\t$^"), true); 889 890 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 891 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 892 check(pattern, toSupplementaries("a^$bcabc"), false); 893 894 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 895 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 896 check(pattern, toSupplementaries("a^$bcabc"), false); 897 898 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 899 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 900 check(pattern, toSupplementaries("abcdef"), false); 901 902 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 903 check(pattern, toSupplementaries("abc\\Edef"), true); 904 check(pattern, toSupplementaries("abcdef"), false); 905 906 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 907 check(pattern, toSupplementaries("((((abc.+?:)"), true); 908 909 flags |= Pattern.MULTILINE; 910 911 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 912 check(pattern, toSupplementaries("abc^cat$def"), true); 913 check(pattern, toSupplementaries("cat"), false); 914 915 flags |= Pattern.DOTALL; 916 917 // note: this is case-sensitive. 918 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 919 check(pattern, toSupplementaries("a...b"), true); 920 check(pattern, toSupplementaries("axxxb"), false); 921 922 flags |= Pattern.CANON_EQ; 923 924 String t = toSupplementaries("test"); 925 p = Pattern.compile(t + "a\u030a", flags); 926 check(pattern, t + "a\u030a", false); 927 check(pattern, t + "\u00e5", false); 928 929 report("Literal pattern"); 930 } 931 932 // This test is for 4803179 933 // This test is also for 4808962, replacement parts literalReplacementTest()934 private static void literalReplacementTest() throws Exception { 935 int flags = Pattern.LITERAL; 936 937 Pattern pattern = Pattern.compile("abc", flags); 938 Matcher matcher = pattern.matcher("zzzabczzz"); 939 String replaceTest = "$0"; 940 String result = matcher.replaceAll(replaceTest); 941 if (!result.equals("zzzabczzz")) 942 failCount++; 943 944 matcher.reset(); 945 String literalReplacement = matcher.quoteReplacement(replaceTest); 946 result = matcher.replaceAll(literalReplacement); 947 if (!result.equals("zzz$0zzz")) 948 failCount++; 949 950 matcher.reset(); 951 replaceTest = "\\t$\\$"; 952 literalReplacement = matcher.quoteReplacement(replaceTest); 953 result = matcher.replaceAll(literalReplacement); 954 if (!result.equals("zzz\\t$\\$zzz")) 955 failCount++; 956 957 // Supplementary character test 958 pattern = Pattern.compile(toSupplementaries("abc"), flags); 959 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 960 replaceTest = "$0"; 961 result = matcher.replaceAll(replaceTest); 962 if (!result.equals(toSupplementaries("zzzabczzz"))) 963 failCount++; 964 965 matcher.reset(); 966 literalReplacement = matcher.quoteReplacement(replaceTest); 967 result = matcher.replaceAll(literalReplacement); 968 if (!result.equals(toSupplementaries("zzz$0zzz"))) 969 failCount++; 970 971 matcher.reset(); 972 replaceTest = "\\t$\\$"; 973 literalReplacement = matcher.quoteReplacement(replaceTest); 974 result = matcher.replaceAll(literalReplacement); 975 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 976 failCount++; 977 978 // IAE should be thrown if backslash or '$' is the last character 979 // in replacement string 980 try { 981 "\uac00".replaceAll("\uac00", "$"); 982 failCount++; 983 } catch (IllegalArgumentException iie) { 984 } catch (Exception e) { 985 failCount++; 986 } 987 try { 988 "\uac00".replaceAll("\uac00", "\\"); 989 failCount++; 990 } catch (IllegalArgumentException iie) { 991 } catch (Exception e) { 992 failCount++; 993 } 994 report("Literal replacement"); 995 } 996 997 // This test is for 4757029 regionTest()998 private static void regionTest() throws Exception { 999 Pattern pattern = Pattern.compile("abc"); 1000 Matcher matcher = pattern.matcher("abcdefabc"); 1001 1002 matcher.region(0,9); 1003 if (!matcher.find()) 1004 failCount++; 1005 if (!matcher.find()) 1006 failCount++; 1007 matcher.region(0,3); 1008 if (!matcher.find()) 1009 failCount++; 1010 matcher.region(3,6); 1011 if (matcher.find()) 1012 failCount++; 1013 matcher.region(0,2); 1014 if (matcher.find()) 1015 failCount++; 1016 1017 expectRegionFail(matcher, 1, -1); 1018 expectRegionFail(matcher, -1, -1); 1019 expectRegionFail(matcher, -1, 1); 1020 expectRegionFail(matcher, 5, 3); 1021 expectRegionFail(matcher, 5, 12); 1022 expectRegionFail(matcher, 12, 12); 1023 1024 pattern = Pattern.compile("^abc$"); 1025 matcher = pattern.matcher("zzzabczzz"); 1026 matcher.region(0,9); 1027 if (matcher.find()) 1028 failCount++; 1029 matcher.region(3,6); 1030 if (!matcher.find()) 1031 failCount++; 1032 matcher.region(3,6); 1033 matcher.useAnchoringBounds(false); 1034 if (matcher.find()) 1035 failCount++; 1036 1037 // Supplementary character test 1038 pattern = Pattern.compile(toSupplementaries("abc")); 1039 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1040 matcher.region(0,9*2); 1041 if (!matcher.find()) 1042 failCount++; 1043 if (!matcher.find()) 1044 failCount++; 1045 matcher.region(0,3*2); 1046 if (!matcher.find()) 1047 failCount++; 1048 matcher.region(1,3*2); 1049 if (matcher.find()) 1050 failCount++; 1051 matcher.region(3*2,6*2); 1052 if (matcher.find()) 1053 failCount++; 1054 matcher.region(0,2*2); 1055 if (matcher.find()) 1056 failCount++; 1057 matcher.region(0,2*2+1); 1058 if (matcher.find()) 1059 failCount++; 1060 1061 expectRegionFail(matcher, 1*2, -1); 1062 expectRegionFail(matcher, -1, -1); 1063 expectRegionFail(matcher, -1, 1*2); 1064 expectRegionFail(matcher, 5*2, 3*2); 1065 expectRegionFail(matcher, 5*2, 12*2); 1066 expectRegionFail(matcher, 12*2, 12*2); 1067 1068 pattern = Pattern.compile(toSupplementaries("^abc$")); 1069 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1070 matcher.region(0,9*2); 1071 if (matcher.find()) 1072 failCount++; 1073 matcher.region(3*2,6*2); 1074 if (!matcher.find()) 1075 failCount++; 1076 matcher.region(3*2+1,6*2); 1077 if (matcher.find()) 1078 failCount++; 1079 matcher.region(3*2,6*2-1); 1080 if (matcher.find()) 1081 failCount++; 1082 matcher.region(3*2,6*2); 1083 matcher.useAnchoringBounds(false); 1084 if (matcher.find()) 1085 failCount++; 1086 1087 // JDK-8230829 1088 pattern = Pattern.compile("\\ud800\\udc61"); 1089 matcher = pattern.matcher("\ud800\udc61"); 1090 matcher.region(0, 1); 1091 if (matcher.find()) { 1092 failCount++; 1093 System.out.println("Matched a surrogate pair" + 1094 " that crosses border of region"); 1095 } 1096 if (!matcher.hitEnd()) { 1097 failCount++; 1098 System.out.println("Expected to hit the end when" + 1099 " matching a surrogate pair crossing region"); 1100 } 1101 1102 report("Regions"); 1103 } 1104 expectRegionFail(Matcher matcher, int index1, int index2)1105 private static void expectRegionFail(Matcher matcher, int index1, 1106 int index2) 1107 { 1108 try { 1109 matcher.region(index1, index2); 1110 failCount++; 1111 } catch (IndexOutOfBoundsException ioobe) { 1112 // Correct result 1113 } catch (IllegalStateException ise) { 1114 // Correct result 1115 } 1116 } 1117 1118 // This test is for 4803197 escapedSegmentTest()1119 private static void escapedSegmentTest() throws Exception { 1120 1121 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1122 check(pattern, "dir1\\dir2", true); 1123 1124 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1125 check(pattern, "dir1\\dir2\\", true); 1126 1127 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1128 check(pattern, "dir1\\dir2\\", true); 1129 1130 // Supplementary character test 1131 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1132 check(pattern, toSupplementaries("dir1\\dir2"), true); 1133 1134 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1135 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1136 1137 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1138 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1139 1140 report("Escaped segment"); 1141 } 1142 1143 // This test is for 4792284 nonCaptureRepetitionTest()1144 private static void nonCaptureRepetitionTest() throws Exception { 1145 String input = "abcdefgh;"; 1146 1147 String[] patterns = new String[] { 1148 "(?:\\w{4})+;", 1149 "(?:\\w{8})*;", 1150 "(?:\\w{2}){2,4};", 1151 "(?:\\w{4}){2,};", // only matches the 1152 ".*?(?:\\w{5})+;", // specified minimum 1153 ".*?(?:\\w{9})*;", // number of reps - OK 1154 "(?:\\w{4})+?;", // lazy repetition - OK 1155 "(?:\\w{4})++;", // possessive repetition - OK 1156 "(?:\\w{2,}?)+;", // non-deterministic - OK 1157 "(\\w{4})+;", // capturing group - OK 1158 }; 1159 1160 for (int i = 0; i < patterns.length; i++) { 1161 // Check find() 1162 check(patterns[i], 0, input, input, true); 1163 // Check matches() 1164 Pattern p = Pattern.compile(patterns[i]); 1165 Matcher m = p.matcher(input); 1166 1167 if (m.matches()) { 1168 if (!m.group(0).equals(input)) 1169 failCount++; 1170 } else { 1171 failCount++; 1172 } 1173 } 1174 1175 report("Non capturing repetition"); 1176 } 1177 1178 // This test is for 6358731 notCapturedGroupCurlyMatchTest()1179 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1180 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1181 Matcher matcher = pattern.matcher("abcd"); 1182 if (!matcher.matches() || 1183 matcher.group(1) != null || 1184 !matcher.group(2).equals("abcd")) { 1185 failCount++; 1186 } 1187 report("Not captured GroupCurly"); 1188 } 1189 1190 // This test is for 4706545 javaCharClassTest()1191 private static void javaCharClassTest() throws Exception { 1192 for (int i=0; i<1000; i++) { 1193 char c = (char)generator.nextInt(); 1194 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1195 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1196 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1197 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1198 check("{javaDigit}", c, Character.isDigit(c)); 1199 check("{javaDefined}", c, Character.isDefined(c)); 1200 check("{javaLetter}", c, Character.isLetter(c)); 1201 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1202 check("{javaJavaIdentifierStart}", c, 1203 Character.isJavaIdentifierStart(c)); 1204 check("{javaJavaIdentifierPart}", c, 1205 Character.isJavaIdentifierPart(c)); 1206 check("{javaUnicodeIdentifierStart}", c, 1207 Character.isUnicodeIdentifierStart(c)); 1208 check("{javaUnicodeIdentifierPart}", c, 1209 Character.isUnicodeIdentifierPart(c)); 1210 check("{javaIdentifierIgnorable}", c, 1211 Character.isIdentifierIgnorable(c)); 1212 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1213 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1214 check("{javaISOControl}", c, Character.isISOControl(c)); 1215 check("{javaMirrored}", c, Character.isMirrored(c)); 1216 1217 } 1218 1219 // Supplementary character test 1220 for (int i=0; i<1000; i++) { 1221 int c = generator.nextInt(Character.MAX_CODE_POINT 1222 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1223 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1224 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1225 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1226 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1227 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1228 check("{javaDigit}", c, Character.isDigit(c)); 1229 check("{javaDefined}", c, Character.isDefined(c)); 1230 check("{javaLetter}", c, Character.isLetter(c)); 1231 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1232 check("{javaJavaIdentifierStart}", c, 1233 Character.isJavaIdentifierStart(c)); 1234 check("{javaJavaIdentifierPart}", c, 1235 Character.isJavaIdentifierPart(c)); 1236 check("{javaUnicodeIdentifierStart}", c, 1237 Character.isUnicodeIdentifierStart(c)); 1238 check("{javaUnicodeIdentifierPart}", c, 1239 Character.isUnicodeIdentifierPart(c)); 1240 check("{javaIdentifierIgnorable}", c, 1241 Character.isIdentifierIgnorable(c)); 1242 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1243 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1244 check("{javaISOControl}", c, Character.isISOControl(c)); 1245 check("{javaMirrored}", c, Character.isMirrored(c)); 1246 } 1247 1248 report("Java character classes"); 1249 } 1250 1251 // This test is for 4523620 1252 /* 1253 private static void numOccurrencesTest() throws Exception { 1254 Pattern pattern = Pattern.compile("aaa"); 1255 1256 if (pattern.numOccurrences("aaaaaa", false) != 2) 1257 failCount++; 1258 if (pattern.numOccurrences("aaaaaa", true) != 4) 1259 failCount++; 1260 1261 pattern = Pattern.compile("^"); 1262 if (pattern.numOccurrences("aaaaaa", false) != 1) 1263 failCount++; 1264 if (pattern.numOccurrences("aaaaaa", true) != 1) 1265 failCount++; 1266 1267 report("Number of Occurrences"); 1268 } 1269 */ 1270 1271 // This test is for 4776374 caretBetweenTerminatorsTest()1272 private static void caretBetweenTerminatorsTest() throws Exception { 1273 int flags1 = Pattern.DOTALL; 1274 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1275 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1276 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1277 1278 check("^....", flags1, "test\ntest", "test", true); 1279 check(".....^", flags1, "test\ntest", "test", false); 1280 check(".....^", flags1, "test\n", "test", false); 1281 check("....^", flags1, "test\r\n", "test", false); 1282 1283 check("^....", flags2, "test\ntest", "test", true); 1284 check("....^", flags2, "test\ntest", "test", false); 1285 check(".....^", flags2, "test\n", "test", false); 1286 check("....^", flags2, "test\r\n", "test", false); 1287 1288 check("^....", flags3, "test\ntest", "test", true); 1289 check(".....^", flags3, "test\ntest", "test\n", true); 1290 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1291 check(".....^", flags3, "test\n", "test", false); 1292 check(".....^", flags3, "test\r\n", "test", false); 1293 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1294 1295 check("^....", flags4, "test\ntest", "test", true); 1296 check(".....^", flags3, "test\ntest", "test\n", true); 1297 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1298 check(".....^", flags4, "test\n", "test\n", false); 1299 check(".....^", flags4, "test\r\n", "test\r", false); 1300 1301 // Supplementary character test 1302 String t = toSupplementaries("test"); 1303 check("^....", flags1, t+"\n"+t, t, true); 1304 check(".....^", flags1, t+"\n"+t, t, false); 1305 check(".....^", flags1, t+"\n", t, false); 1306 check("....^", flags1, t+"\r\n", t, false); 1307 1308 check("^....", flags2, t+"\n"+t, t, true); 1309 check("....^", flags2, t+"\n"+t, t, false); 1310 check(".....^", flags2, t+"\n", t, false); 1311 check("....^", flags2, t+"\r\n", t, false); 1312 1313 check("^....", flags3, t+"\n"+t, t, true); 1314 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1315 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1316 check(".....^", flags3, t+"\n", t, false); 1317 check(".....^", flags3, t+"\r\n", t, false); 1318 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1319 1320 check("^....", flags4, t+"\n"+t, t, true); 1321 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1322 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1323 check(".....^", flags4, t+"\n", t+"\n", false); 1324 check(".....^", flags4, t+"\r\n", t+"\r", false); 1325 1326 report("Caret between terminators"); 1327 } 1328 1329 // This test is for 4727935 dollarAtEndTest()1330 private static void dollarAtEndTest() throws Exception { 1331 int flags1 = Pattern.DOTALL; 1332 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1333 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1334 1335 check("....$", flags1, "test\n", "test", true); 1336 check("....$", flags1, "test\r\n", "test", true); 1337 check(".....$", flags1, "test\n", "test\n", true); 1338 check(".....$", flags1, "test\u0085", "test\u0085", true); 1339 check("....$", flags1, "test\u0085", "test", true); 1340 1341 check("....$", flags2, "test\n", "test", true); 1342 check(".....$", flags2, "test\n", "test\n", true); 1343 check(".....$", flags2, "test\u0085", "test\u0085", true); 1344 check("....$", flags2, "test\u0085", "est\u0085", true); 1345 1346 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1347 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1348 check("....$blah", flags3, "test\nblah", "!!!!", false); 1349 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1350 1351 // Supplementary character test 1352 String t = toSupplementaries("test"); 1353 String b = toSupplementaries("blah"); 1354 check("....$", flags1, t+"\n", t, true); 1355 check("....$", flags1, t+"\r\n", t, true); 1356 check(".....$", flags1, t+"\n", t+"\n", true); 1357 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1358 check("....$", flags1, t+"\u0085", t, true); 1359 1360 check("....$", flags2, t+"\n", t, true); 1361 check(".....$", flags2, t+"\n", t+"\n", true); 1362 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1363 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1364 1365 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1366 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1367 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1368 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1369 1370 report("Dollar at End"); 1371 } 1372 1373 // This test is for 4711773 multilineDollarTest()1374 private static void multilineDollarTest() throws Exception { 1375 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1376 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1377 matcher.find(); 1378 if (matcher.start(0) != 9) 1379 failCount++; 1380 matcher.find(); 1381 if (matcher.start(0) != 20) 1382 failCount++; 1383 1384 // Supplementary character test 1385 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1386 matcher.find(); 1387 if (matcher.start(0) != 9*2) 1388 failCount++; 1389 matcher.find(); 1390 if (matcher.start(0) != 20*2) 1391 failCount++; 1392 1393 report("Multiline Dollar"); 1394 } 1395 reluctantRepetitionTest()1396 private static void reluctantRepetitionTest() throws Exception { 1397 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1398 check(p, "1 word word word 2", true); 1399 check(p, "1 wor wo w 2", true); 1400 check(p, "1 word word 2", true); 1401 check(p, "1 word 2", true); 1402 check(p, "1 wo w w 2", true); 1403 check(p, "1 wo w 2", true); 1404 check(p, "1 wor w 2", true); 1405 1406 p = Pattern.compile("([a-z])+?c"); 1407 Matcher m = p.matcher("ababcdefdec"); 1408 check(m, "ababc"); 1409 1410 // Supplementary character test 1411 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1412 m = p.matcher(toSupplementaries("ababcdefdec")); 1413 check(m, toSupplementaries("ababc")); 1414 1415 report("Reluctant Repetition"); 1416 } 1417 serializedPattern(Pattern p)1418 private static Pattern serializedPattern(Pattern p) throws Exception { 1419 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1420 ObjectOutputStream oos = new ObjectOutputStream(baos); 1421 oos.writeObject(p); 1422 oos.close(); 1423 try (ObjectInputStream ois = new ObjectInputStream( 1424 new ByteArrayInputStream(baos.toByteArray()))) { 1425 return (Pattern)ois.readObject(); 1426 } 1427 } 1428 serializeTest()1429 private static void serializeTest() throws Exception { 1430 String patternStr = "(b)"; 1431 String matchStr = "b"; 1432 Pattern pattern = Pattern.compile(patternStr); 1433 Pattern serializedPattern = serializedPattern(pattern); 1434 Matcher matcher = serializedPattern.matcher(matchStr); 1435 if (!matcher.matches()) 1436 failCount++; 1437 if (matcher.groupCount() != 1) 1438 failCount++; 1439 1440 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1441 serializedPattern = serializedPattern(pattern); 1442 if (!serializedPattern.matcher("Ab").matches()) 1443 failCount++; 1444 if (serializedPattern.matcher("AB").matches()) 1445 failCount++; 1446 1447 report("Serialization"); 1448 } 1449 gTest()1450 private static void gTest() { 1451 Pattern pattern = Pattern.compile("\\G\\w"); 1452 Matcher matcher = pattern.matcher("abc#x#x"); 1453 matcher.find(); 1454 matcher.find(); 1455 matcher.find(); 1456 if (matcher.find()) 1457 failCount++; 1458 1459 pattern = Pattern.compile("\\GA*"); 1460 matcher = pattern.matcher("1A2AA3"); 1461 matcher.find(); 1462 if (matcher.find()) 1463 failCount++; 1464 1465 pattern = Pattern.compile("\\GA*"); 1466 matcher = pattern.matcher("1A2AA3"); 1467 if (!matcher.find(1)) 1468 failCount++; 1469 matcher.find(); 1470 if (matcher.find()) 1471 failCount++; 1472 1473 report("\\G"); 1474 } 1475 zTest()1476 private static void zTest() { 1477 Pattern pattern = Pattern.compile("foo\\Z"); 1478 // Positives 1479 check(pattern, "foo\u0085", true); 1480 check(pattern, "foo\u2028", true); 1481 check(pattern, "foo\u2029", true); 1482 check(pattern, "foo\n", true); 1483 check(pattern, "foo\r", true); 1484 check(pattern, "foo\r\n", true); 1485 // Negatives 1486 check(pattern, "fooo", false); 1487 check(pattern, "foo\n\r", false); 1488 1489 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1490 // Positives 1491 check(pattern, "foo", true); 1492 check(pattern, "foo\n", true); 1493 // Negatives 1494 check(pattern, "foo\r", false); 1495 check(pattern, "foo\u0085", false); 1496 check(pattern, "foo\u2028", false); 1497 check(pattern, "foo\u2029", false); 1498 1499 report("\\Z"); 1500 } 1501 replaceFirstTest()1502 private static void replaceFirstTest() { 1503 Pattern pattern = Pattern.compile("(ab)(c*)"); 1504 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1505 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1506 failCount++; 1507 1508 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1509 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1510 failCount++; 1511 1512 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1513 String result = matcher.replaceFirst("$1"); 1514 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1515 failCount++; 1516 1517 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1518 result = matcher.replaceFirst("$2"); 1519 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1520 failCount++; 1521 1522 pattern = Pattern.compile("a*"); 1523 matcher = pattern.matcher("aaaaaaaaaa"); 1524 if (!matcher.replaceFirst("test").equals("test")) 1525 failCount++; 1526 1527 pattern = Pattern.compile("a+"); 1528 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1529 if (!matcher.replaceFirst("test").equals("zzztest")) 1530 failCount++; 1531 1532 // Supplementary character test 1533 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1534 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1535 if (!matcher.replaceFirst(toSupplementaries("test")) 1536 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1537 failCount++; 1538 1539 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1540 if (!matcher.replaceFirst(toSupplementaries("test")). 1541 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1542 failCount++; 1543 1544 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1545 result = matcher.replaceFirst("$1"); 1546 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1547 failCount++; 1548 1549 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1550 result = matcher.replaceFirst("$2"); 1551 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1552 failCount++; 1553 1554 pattern = Pattern.compile(toSupplementaries("a*")); 1555 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1556 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1557 failCount++; 1558 1559 pattern = Pattern.compile(toSupplementaries("a+")); 1560 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1561 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1562 failCount++; 1563 1564 report("Replace First"); 1565 } 1566 unixLinesTest()1567 private static void unixLinesTest() { 1568 Pattern pattern = Pattern.compile(".*"); 1569 Matcher matcher = pattern.matcher("aa\u2028blah"); 1570 matcher.find(); 1571 if (!matcher.group(0).equals("aa")) 1572 failCount++; 1573 1574 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1575 matcher = pattern.matcher("aa\u2028blah"); 1576 matcher.find(); 1577 if (!matcher.group(0).equals("aa\u2028blah")) 1578 failCount++; 1579 1580 pattern = Pattern.compile("[az]$", 1581 Pattern.MULTILINE | Pattern.UNIX_LINES); 1582 matcher = pattern.matcher("aa\u2028zz"); 1583 check(matcher, "a\u2028", false); 1584 1585 // Supplementary character test 1586 pattern = Pattern.compile(".*"); 1587 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1588 matcher.find(); 1589 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1590 failCount++; 1591 1592 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1593 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1594 matcher.find(); 1595 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1596 failCount++; 1597 1598 pattern = Pattern.compile(toSupplementaries("[az]$"), 1599 Pattern.MULTILINE | Pattern.UNIX_LINES); 1600 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1601 check(matcher, toSupplementaries("a\u2028"), false); 1602 1603 report("Unix Lines"); 1604 } 1605 commentsTest()1606 private static void commentsTest() { 1607 int flags = Pattern.COMMENTS; 1608 1609 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1610 Matcher matcher = pattern.matcher("aa#aa"); 1611 if (!matcher.matches()) 1612 failCount++; 1613 1614 pattern = Pattern.compile("aa # blah", flags); 1615 matcher = pattern.matcher("aa"); 1616 if (!matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile("aa blah", flags); 1620 matcher = pattern.matcher("aablah"); 1621 if (!matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile("aa # blah blech ", flags); 1625 matcher = pattern.matcher("aa"); 1626 if (!matcher.matches()) 1627 failCount++; 1628 1629 pattern = Pattern.compile("aa # blah\n ", flags); 1630 matcher = pattern.matcher("aa"); 1631 if (!matcher.matches()) 1632 failCount++; 1633 1634 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1635 matcher = pattern.matcher("aabc"); 1636 if (!matcher.matches()) 1637 failCount++; 1638 1639 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1640 matcher = pattern.matcher("aabc"); 1641 if (!matcher.matches()) 1642 failCount++; 1643 1644 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1645 matcher = pattern.matcher("aabc#blech"); 1646 if (!matcher.matches()) 1647 failCount++; 1648 1649 // Supplementary character test 1650 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1651 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1652 if (!matcher.matches()) 1653 failCount++; 1654 1655 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1656 matcher = pattern.matcher(toSupplementaries("aa")); 1657 if (!matcher.matches()) 1658 failCount++; 1659 1660 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1661 matcher = pattern.matcher(toSupplementaries("aablah")); 1662 if (!matcher.matches()) 1663 failCount++; 1664 1665 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1666 matcher = pattern.matcher(toSupplementaries("aa")); 1667 if (!matcher.matches()) 1668 failCount++; 1669 1670 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1671 matcher = pattern.matcher(toSupplementaries("aa")); 1672 if (!matcher.matches()) 1673 failCount++; 1674 1675 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1676 matcher = pattern.matcher(toSupplementaries("aabc")); 1677 if (!matcher.matches()) 1678 failCount++; 1679 1680 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1681 matcher = pattern.matcher(toSupplementaries("aabc")); 1682 if (!matcher.matches()) 1683 failCount++; 1684 1685 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1686 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1687 if (!matcher.matches()) 1688 failCount++; 1689 1690 report("Comments"); 1691 } 1692 caseFoldingTest()1693 private static void caseFoldingTest() { // bug 4504687 1694 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1695 Pattern pattern = Pattern.compile("aa", flags); 1696 Matcher matcher = pattern.matcher("ab"); 1697 if (matcher.matches()) 1698 failCount++; 1699 1700 pattern = Pattern.compile("aA", flags); 1701 matcher = pattern.matcher("ab"); 1702 if (matcher.matches()) 1703 failCount++; 1704 1705 pattern = Pattern.compile("aa", flags); 1706 matcher = pattern.matcher("aB"); 1707 if (matcher.matches()) 1708 failCount++; 1709 matcher = pattern.matcher("Ab"); 1710 if (matcher.matches()) 1711 failCount++; 1712 1713 // ASCII "a" 1714 // Latin-1 Supplement "a" + grave 1715 // Cyrillic "a" 1716 String[] patterns = new String[] { 1717 //single 1718 "a", "\u00e0", "\u0430", 1719 //slice 1720 "ab", "\u00e0\u00e1", "\u0430\u0431", 1721 //class single 1722 "[a]", "[\u00e0]", "[\u0430]", 1723 //class range 1724 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1725 //back reference 1726 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1727 }; 1728 1729 String[] texts = new String[] { 1730 "A", "\u00c0", "\u0410", 1731 "AB", "\u00c0\u00c1", "\u0410\u0411", 1732 "A", "\u00c0", "\u0410", 1733 "B", "\u00c2", "\u0411", 1734 "aA", "\u00e0\u00c0", "\u0430\u0410" 1735 }; 1736 1737 boolean[] expected = new boolean[] { 1738 true, false, false, 1739 true, false, false, 1740 true, false, false, 1741 true, false, false, 1742 true, false, false 1743 }; 1744 1745 flags = Pattern.CASE_INSENSITIVE; 1746 for (int i = 0; i < patterns.length; i++) { 1747 pattern = Pattern.compile(patterns[i], flags); 1748 matcher = pattern.matcher(texts[i]); 1749 if (matcher.matches() != expected[i]) { 1750 System.out.println("<1> Failed at " + i); 1751 failCount++; 1752 } 1753 } 1754 1755 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1756 for (int i = 0; i < patterns.length; i++) { 1757 pattern = Pattern.compile(patterns[i], flags); 1758 matcher = pattern.matcher(texts[i]); 1759 if (!matcher.matches()) { 1760 System.out.println("<2> Failed at " + i); 1761 failCount++; 1762 } 1763 } 1764 // flag unicode_case alone should do nothing 1765 flags = Pattern.UNICODE_CASE; 1766 for (int i = 0; i < patterns.length; i++) { 1767 pattern = Pattern.compile(patterns[i], flags); 1768 matcher = pattern.matcher(texts[i]); 1769 if (matcher.matches()) { 1770 System.out.println("<3> Failed at " + i); 1771 failCount++; 1772 } 1773 } 1774 1775 // Special cases: i, I, u+0131 and u+0130 1776 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1777 pattern = Pattern.compile("[h-j]+", flags); 1778 if (!pattern.matcher("\u0131\u0130").matches()) 1779 failCount++; 1780 report("Case Folding"); 1781 } 1782 appendTest()1783 private static void appendTest() { 1784 Pattern pattern = Pattern.compile("(ab)(cd)"); 1785 Matcher matcher = pattern.matcher("abcd"); 1786 String result = matcher.replaceAll("$2$1"); 1787 if (!result.equals("cdab")) 1788 failCount++; 1789 1790 String s1 = "Swap all: first = 123, second = 456"; 1791 String s2 = "Swap one: first = 123, second = 456"; 1792 String r = "$3$2$1"; 1793 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1794 matcher = pattern.matcher(s1); 1795 1796 result = matcher.replaceAll(r); 1797 if (!result.equals("Swap all: 123 = first, 456 = second")) 1798 failCount++; 1799 1800 matcher = pattern.matcher(s2); 1801 1802 if (matcher.find()) { 1803 StringBuffer sb = new StringBuffer(); 1804 matcher.appendReplacement(sb, r); 1805 matcher.appendTail(sb); 1806 result = sb.toString(); 1807 if (!result.equals("Swap one: 123 = first, second = 456")) 1808 failCount++; 1809 } 1810 1811 // Supplementary character test 1812 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1813 matcher = pattern.matcher(toSupplementaries("abcd")); 1814 result = matcher.replaceAll("$2$1"); 1815 if (!result.equals(toSupplementaries("cdab"))) 1816 failCount++; 1817 1818 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1819 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1820 r = toSupplementaries("$3$2$1"); 1821 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1822 matcher = pattern.matcher(s1); 1823 1824 result = matcher.replaceAll(r); 1825 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1826 failCount++; 1827 1828 matcher = pattern.matcher(s2); 1829 1830 if (matcher.find()) { 1831 StringBuffer sb = new StringBuffer(); 1832 matcher.appendReplacement(sb, r); 1833 matcher.appendTail(sb); 1834 result = sb.toString(); 1835 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1836 failCount++; 1837 } 1838 report("Append"); 1839 } 1840 splitTest()1841 private static void splitTest() { 1842 Pattern pattern = Pattern.compile(":"); 1843 String[] result = pattern.split("foo:and:boo", 2); 1844 if (!result[0].equals("foo")) 1845 failCount++; 1846 if (!result[1].equals("and:boo")) 1847 failCount++; 1848 // Supplementary character test 1849 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1850 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1851 if (!result[0].equals(toSupplementaries("foo"))) 1852 failCount++; 1853 if (!result[1].equals(toSupplementaries("andXboo"))) 1854 failCount++; 1855 1856 CharBuffer cb = CharBuffer.allocate(100); 1857 cb.put("foo:and:boo"); 1858 cb.flip(); 1859 result = pattern.split(cb); 1860 if (!result[0].equals("foo")) 1861 failCount++; 1862 if (!result[1].equals("and")) 1863 failCount++; 1864 if (!result[2].equals("boo")) 1865 failCount++; 1866 1867 // Supplementary character test 1868 CharBuffer cbs = CharBuffer.allocate(100); 1869 cbs.put(toSupplementaries("fooXandXboo")); 1870 cbs.flip(); 1871 result = patternX.split(cbs); 1872 if (!result[0].equals(toSupplementaries("foo"))) 1873 failCount++; 1874 if (!result[1].equals(toSupplementaries("and"))) 1875 failCount++; 1876 if (!result[2].equals(toSupplementaries("boo"))) 1877 failCount++; 1878 1879 String source = "0123456789"; 1880 for (int limit=-2; limit<3; limit++) { 1881 for (int x=0; x<10; x++) { 1882 result = source.split(Integer.toString(x), limit); 1883 int expectedLength = limit < 1 ? 2 : limit; 1884 1885 if ((limit == 0) && (x == 9)) { 1886 // expected dropping of "" 1887 if (result.length != 1) 1888 failCount++; 1889 if (!result[0].equals("012345678")) { 1890 failCount++; 1891 } 1892 } else { 1893 if (result.length != expectedLength) { 1894 failCount++; 1895 } 1896 if (!result[0].equals(source.substring(0,x))) { 1897 if (limit != 1) { 1898 failCount++; 1899 } else { 1900 if (!result[0].equals(source.substring(0,10))) { 1901 failCount++; 1902 } 1903 } 1904 } 1905 if (expectedLength > 1) { // Check segment 2 1906 if (!result[1].equals(source.substring(x+1,10))) 1907 failCount++; 1908 } 1909 } 1910 } 1911 } 1912 // Check the case for no match found 1913 for (int limit=-2; limit<3; limit++) { 1914 result = source.split("e", limit); 1915 if (result.length != 1) 1916 failCount++; 1917 if (!result[0].equals(source)) 1918 failCount++; 1919 } 1920 // Check the case for limit == 0, source = ""; 1921 // split() now returns 0-length for empty source "" see #6559590 1922 source = ""; 1923 result = source.split("e", 0); 1924 if (result.length != 1) 1925 failCount++; 1926 if (!result[0].equals(source)) 1927 failCount++; 1928 1929 // Check both split() and splitAsStraem(), especially for zero-lenth 1930 // input and zero-lenth match cases 1931 String[][] input = new String[][] { 1932 { " ", "Abc Efg Hij" }, // normal non-zero-match 1933 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1934 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1935 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1936 { "(?=\\p{Lu})", "AbcEfg" }, 1937 { "(?=\\p{Lu})", "Abc" }, 1938 { " ", "" }, // zero-length input 1939 { ".*", "" }, 1940 1941 // some tests from PatternStreamTest.java 1942 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1943 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1944 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1945 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1946 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1947 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1948 { "\u56da", "" }, 1949 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1950 { "o", "boo:and:foo" }, 1951 { "o", "booooo:and:fooooo" }, 1952 { "o", "fooooo:" }, 1953 }; 1954 1955 String[][] expected = new String[][] { 1956 { "Abc", "Efg", "Hij" }, 1957 { "", "Abc", "Efg", "Hij" }, 1958 { "Abc", "", "Efg", "Hij" }, 1959 { "Abc", "Efg", "Hij" }, 1960 { "Abc", "Efg" }, 1961 { "Abc" }, 1962 { "" }, 1963 { "" }, 1964 1965 { "awgqwefg1fefw", "vssv1vvv1" }, 1966 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1967 { "awgqwefg", "fefw4vssv", "vvv" }, 1968 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1969 { "1", "23", "456", "7890" }, 1970 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1971 { "" }, 1972 { "This", "is", "testing", "", "with", "different", "separators" }, 1973 { "b", "", ":and:f" }, 1974 { "b", "", "", "", "", ":and:f" }, 1975 { "f", "", "", "", "", ":" }, 1976 }; 1977 for (int i = 0; i < input.length; i++) { 1978 pattern = Pattern.compile(input[i][0]); 1979 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1980 failCount++; 1981 } 1982 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1983 // array for zero-length input for now 1984 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1985 expected[i])) { 1986 failCount++; 1987 } 1988 } 1989 report("Split"); 1990 } 1991 negationTest()1992 private static void negationTest() { 1993 Pattern pattern = Pattern.compile("[\\[@^]+"); 1994 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1995 if (!matcher.find()) 1996 failCount++; 1997 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1998 failCount++; 1999 pattern = Pattern.compile("[@\\[^]+"); 2000 matcher = pattern.matcher("@@@@[[[[^^^^"); 2001 if (!matcher.find()) 2002 failCount++; 2003 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2004 failCount++; 2005 pattern = Pattern.compile("[@\\[^@]+"); 2006 matcher = pattern.matcher("@@@@[[[[^^^^"); 2007 if (!matcher.find()) 2008 failCount++; 2009 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2010 failCount++; 2011 2012 pattern = Pattern.compile("\\)"); 2013 matcher = pattern.matcher("xxx)xxx"); 2014 if (!matcher.find()) 2015 failCount++; 2016 2017 report("Negation"); 2018 } 2019 ampersandTest()2020 private static void ampersandTest() { 2021 Pattern pattern = Pattern.compile("[&@]+"); 2022 check(pattern, "@@@@&&&&", true); 2023 2024 pattern = Pattern.compile("[@&]+"); 2025 check(pattern, "@@@@&&&&", true); 2026 2027 pattern = Pattern.compile("[@\\&]+"); 2028 check(pattern, "@@@@&&&&", true); 2029 2030 report("Ampersand"); 2031 } 2032 octalTest()2033 private static void octalTest() throws Exception { 2034 Pattern pattern = Pattern.compile("\\u0007"); 2035 Matcher matcher = pattern.matcher("\u0007"); 2036 if (!matcher.matches()) 2037 failCount++; 2038 pattern = Pattern.compile("\\07"); 2039 matcher = pattern.matcher("\u0007"); 2040 if (!matcher.matches()) 2041 failCount++; 2042 pattern = Pattern.compile("\\007"); 2043 matcher = pattern.matcher("\u0007"); 2044 if (!matcher.matches()) 2045 failCount++; 2046 pattern = Pattern.compile("\\0007"); 2047 matcher = pattern.matcher("\u0007"); 2048 if (!matcher.matches()) 2049 failCount++; 2050 pattern = Pattern.compile("\\040"); 2051 matcher = pattern.matcher("\u0020"); 2052 if (!matcher.matches()) 2053 failCount++; 2054 pattern = Pattern.compile("\\0403"); 2055 matcher = pattern.matcher("\u00203"); 2056 if (!matcher.matches()) 2057 failCount++; 2058 pattern = Pattern.compile("\\0103"); 2059 matcher = pattern.matcher("\u0043"); 2060 if (!matcher.matches()) 2061 failCount++; 2062 2063 report("Octal"); 2064 } 2065 longPatternTest()2066 private static void longPatternTest() throws Exception { 2067 try { 2068 Pattern pattern = Pattern.compile( 2069 "a 32-character-long pattern xxxx"); 2070 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2071 pattern = Pattern.compile("a thirty four character long regex"); 2072 StringBuffer patternToBe = new StringBuffer(101); 2073 for (int i=0; i<100; i++) 2074 patternToBe.append((char)(97 + i%26)); 2075 pattern = Pattern.compile(patternToBe.toString()); 2076 } catch (PatternSyntaxException e) { 2077 failCount++; 2078 } 2079 2080 // Supplementary character test 2081 try { 2082 Pattern pattern = Pattern.compile( 2083 toSupplementaries("a 32-character-long pattern xxxx")); 2084 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2085 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2086 StringBuffer patternToBe = new StringBuffer(101*2); 2087 for (int i=0; i<100; i++) 2088 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2089 + 97 + i%26)); 2090 pattern = Pattern.compile(patternToBe.toString()); 2091 } catch (PatternSyntaxException e) { 2092 failCount++; 2093 } 2094 report("LongPattern"); 2095 } 2096 group0Test()2097 private static void group0Test() throws Exception { 2098 Pattern pattern = Pattern.compile("(tes)ting"); 2099 Matcher matcher = pattern.matcher("testing"); 2100 check(matcher, "testing"); 2101 2102 matcher.reset("testing"); 2103 if (matcher.lookingAt()) { 2104 if (!matcher.group(0).equals("testing")) 2105 failCount++; 2106 } else { 2107 failCount++; 2108 } 2109 2110 matcher.reset("testing"); 2111 if (matcher.matches()) { 2112 if (!matcher.group(0).equals("testing")) 2113 failCount++; 2114 } else { 2115 failCount++; 2116 } 2117 2118 pattern = Pattern.compile("(tes)ting"); 2119 matcher = pattern.matcher("testing"); 2120 if (matcher.lookingAt()) { 2121 if (!matcher.group(0).equals("testing")) 2122 failCount++; 2123 } else { 2124 failCount++; 2125 } 2126 2127 pattern = Pattern.compile("^(tes)ting"); 2128 matcher = pattern.matcher("testing"); 2129 if (matcher.matches()) { 2130 if (!matcher.group(0).equals("testing")) 2131 failCount++; 2132 } else { 2133 failCount++; 2134 } 2135 2136 // Supplementary character test 2137 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2138 matcher = pattern.matcher(toSupplementaries("testing")); 2139 check(matcher, toSupplementaries("testing")); 2140 2141 matcher.reset(toSupplementaries("testing")); 2142 if (matcher.lookingAt()) { 2143 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2144 failCount++; 2145 } else { 2146 failCount++; 2147 } 2148 2149 matcher.reset(toSupplementaries("testing")); 2150 if (matcher.matches()) { 2151 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2152 failCount++; 2153 } else { 2154 failCount++; 2155 } 2156 2157 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2158 matcher = pattern.matcher(toSupplementaries("testing")); 2159 if (matcher.lookingAt()) { 2160 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2161 failCount++; 2162 } else { 2163 failCount++; 2164 } 2165 2166 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2167 matcher = pattern.matcher(toSupplementaries("testing")); 2168 if (matcher.matches()) { 2169 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2170 failCount++; 2171 } else { 2172 failCount++; 2173 } 2174 2175 report("Group0"); 2176 } 2177 findIntTest()2178 private static void findIntTest() throws Exception { 2179 Pattern p = Pattern.compile("blah"); 2180 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2181 boolean result = m.find(2); 2182 if (!result) 2183 failCount++; 2184 2185 p = Pattern.compile("$"); 2186 m = p.matcher("1234567890"); 2187 result = m.find(10); 2188 if (!result) 2189 failCount++; 2190 try { 2191 result = m.find(11); 2192 failCount++; 2193 } catch (IndexOutOfBoundsException e) { 2194 // correct result 2195 } 2196 2197 // Supplementary character test 2198 p = Pattern.compile(toSupplementaries("blah")); 2199 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2200 result = m.find(2); 2201 if (!result) 2202 failCount++; 2203 2204 report("FindInt"); 2205 } 2206 emptyPatternTest()2207 private static void emptyPatternTest() throws Exception { 2208 Pattern p = Pattern.compile(""); 2209 Matcher m = p.matcher("foo"); 2210 2211 // Should find empty pattern at beginning of input 2212 boolean result = m.find(); 2213 if (result != true) 2214 failCount++; 2215 if (m.start() != 0) 2216 failCount++; 2217 2218 // Should not match entire input if input is not empty 2219 m.reset(); 2220 result = m.matches(); 2221 if (result == true) 2222 failCount++; 2223 2224 try { 2225 m.start(0); 2226 failCount++; 2227 } catch (IllegalStateException e) { 2228 // Correct result 2229 } 2230 2231 // Should match entire input if input is empty 2232 m.reset(""); 2233 result = m.matches(); 2234 if (result != true) 2235 failCount++; 2236 2237 result = Pattern.matches("", ""); 2238 if (result != true) 2239 failCount++; 2240 2241 result = Pattern.matches("", "foo"); 2242 if (result == true) 2243 failCount++; 2244 report("EmptyPattern"); 2245 } 2246 charClassTest()2247 private static void charClassTest() throws Exception { 2248 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2249 check(pattern, "blahb]blech", true); 2250 2251 pattern = Pattern.compile("[abc[def]]"); 2252 check(pattern, "b", true); 2253 2254 // Supplementary character tests 2255 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2256 check(pattern, toSupplementaries("blahb]blech"), true); 2257 2258 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2259 check(pattern, toSupplementaries("b"), true); 2260 2261 try { 2262 // u00ff when UNICODE_CASE 2263 pattern = Pattern.compile("[ab\u00ffcd]", 2264 Pattern.CASE_INSENSITIVE| 2265 Pattern.UNICODE_CASE); 2266 check(pattern, "ab\u00ffcd", true); 2267 check(pattern, "Ab\u0178Cd", true); 2268 2269 // u00b5 when UNICODE_CASE 2270 pattern = Pattern.compile("[ab\u00b5cd]", 2271 Pattern.CASE_INSENSITIVE| 2272 Pattern.UNICODE_CASE); 2273 check(pattern, "ab\u00b5cd", true); 2274 check(pattern, "Ab\u039cCd", true); 2275 } catch (Exception e) { failCount++; } 2276 2277 /* Special cases 2278 (1)LatinSmallLetterLongS u+017f 2279 (2)LatinSmallLetterDotlessI u+0131 2280 (3)LatineCapitalLetterIWithDotAbove u+0130 2281 (4)KelvinSign u+212a 2282 (5)AngstromSign u+212b 2283 */ 2284 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2285 pattern = Pattern.compile("[sik\u00c5]+", flags); 2286 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2287 failCount++; 2288 2289 report("CharClass"); 2290 } 2291 caretTest()2292 private static void caretTest() throws Exception { 2293 Pattern pattern = Pattern.compile("\\w*"); 2294 Matcher matcher = pattern.matcher("a#bc#def##g"); 2295 check(matcher, "a"); 2296 check(matcher, ""); 2297 check(matcher, "bc"); 2298 check(matcher, ""); 2299 check(matcher, "def"); 2300 check(matcher, ""); 2301 check(matcher, ""); 2302 check(matcher, "g"); 2303 check(matcher, ""); 2304 if (matcher.find()) 2305 failCount++; 2306 2307 pattern = Pattern.compile("^\\w*"); 2308 matcher = pattern.matcher("a#bc#def##g"); 2309 check(matcher, "a"); 2310 if (matcher.find()) 2311 failCount++; 2312 2313 pattern = Pattern.compile("\\w"); 2314 matcher = pattern.matcher("abc##x"); 2315 check(matcher, "a"); 2316 check(matcher, "b"); 2317 check(matcher, "c"); 2318 check(matcher, "x"); 2319 if (matcher.find()) 2320 failCount++; 2321 2322 pattern = Pattern.compile("^\\w"); 2323 matcher = pattern.matcher("abc##x"); 2324 check(matcher, "a"); 2325 if (matcher.find()) 2326 failCount++; 2327 2328 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2329 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2330 check(matcher, "abc"); 2331 if (matcher.find()) 2332 failCount++; 2333 2334 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2335 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2336 check(matcher, "abc"); 2337 check(matcher, "jkl"); 2338 if (matcher.find()) 2339 failCount++; 2340 2341 pattern = Pattern.compile("^", Pattern.MULTILINE); 2342 matcher = pattern.matcher("this is some text"); 2343 String result = matcher.replaceAll("X"); 2344 if (!result.equals("Xthis is some text")) 2345 failCount++; 2346 2347 pattern = Pattern.compile("^"); 2348 matcher = pattern.matcher("this is some text"); 2349 result = matcher.replaceAll("X"); 2350 if (!result.equals("Xthis is some text")) 2351 failCount++; 2352 2353 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2354 matcher = pattern.matcher("this is some text\n"); 2355 result = matcher.replaceAll("X"); 2356 if (!result.equals("Xthis is some text\n")) 2357 failCount++; 2358 2359 report("Caret"); 2360 } 2361 groupCaptureTest()2362 private static void groupCaptureTest() throws Exception { 2363 // Independent group 2364 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2365 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2366 matcher.find(); 2367 try { 2368 String blah = matcher.group(1); 2369 failCount++; 2370 } catch (IndexOutOfBoundsException ioobe) { 2371 // Good result 2372 } 2373 // Pure group 2374 pattern = Pattern.compile("x+(?:y+)z+"); 2375 matcher = pattern.matcher("xxxyyyzzz"); 2376 matcher.find(); 2377 try { 2378 String blah = matcher.group(1); 2379 failCount++; 2380 } catch (IndexOutOfBoundsException ioobe) { 2381 // Good result 2382 } 2383 2384 // Supplementary character tests 2385 // Independent group 2386 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2387 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2388 matcher.find(); 2389 try { 2390 String blah = matcher.group(1); 2391 failCount++; 2392 } catch (IndexOutOfBoundsException ioobe) { 2393 // Good result 2394 } 2395 // Pure group 2396 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2397 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2398 matcher.find(); 2399 try { 2400 String blah = matcher.group(1); 2401 failCount++; 2402 } catch (IndexOutOfBoundsException ioobe) { 2403 // Good result 2404 } 2405 2406 report("GroupCapture"); 2407 } 2408 backRefTest()2409 private static void backRefTest() throws Exception { 2410 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2411 check(pattern, "zzzaabcazzz", true); 2412 2413 pattern = Pattern.compile("(a*)bc\\1"); 2414 check(pattern, "zzzaabcaazzz", true); 2415 2416 pattern = Pattern.compile("(abc)(def)\\1"); 2417 check(pattern, "abcdefabc", true); 2418 2419 pattern = Pattern.compile("(abc)(def)\\3"); 2420 check(pattern, "abcdefabc", false); 2421 2422 try { 2423 for (int i = 1; i < 10; i++) { 2424 // Make sure backref 1-9 are always accepted 2425 pattern = Pattern.compile("abcdef\\" + i); 2426 // and fail to match if the target group does not exit 2427 check(pattern, "abcdef", false); 2428 } 2429 } catch(PatternSyntaxException e) { 2430 failCount++; 2431 } 2432 2433 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2434 check(pattern, "abcdefghija", false); 2435 check(pattern, "abcdefghija1", true); 2436 2437 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2438 check(pattern, "abcdefghijkk", true); 2439 2440 pattern = Pattern.compile("(a)bcdefghij\\11"); 2441 check(pattern, "abcdefghija1", true); 2442 2443 // Supplementary character tests 2444 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2445 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2446 2447 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2448 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2449 2450 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2451 check(pattern, toSupplementaries("abcdefabc"), true); 2452 2453 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2454 check(pattern, toSupplementaries("abcdefabc"), false); 2455 2456 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2457 check(pattern, toSupplementaries("abcdefghija"), false); 2458 check(pattern, toSupplementaries("abcdefghija1"), true); 2459 2460 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2461 check(pattern, toSupplementaries("abcdefghijkk"), true); 2462 2463 report("BackRef"); 2464 } 2465 2466 /** 2467 * Unicode Technical Report #18, section 2.6 End of Line 2468 * There is no empty line to be matched in the sequence \u000D\u000A 2469 * but there is an empty line in the sequence \u000A\u000D. 2470 */ anchorTest()2471 private static void anchorTest() throws Exception { 2472 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2473 Matcher m = p.matcher("blah1\r\nblah2"); 2474 m.find(); 2475 m.find(); 2476 if (!m.group().equals("blah2")) 2477 failCount++; 2478 2479 m.reset("blah1\n\rblah2"); 2480 m.find(); 2481 m.find(); 2482 m.find(); 2483 if (!m.group().equals("blah2")) 2484 failCount++; 2485 2486 // Test behavior of $ with \r\n at end of input 2487 p = Pattern.compile(".+$"); 2488 m = p.matcher("blah1\r\n"); 2489 if (!m.find()) 2490 failCount++; 2491 if (!m.group().equals("blah1")) 2492 failCount++; 2493 if (m.find()) 2494 failCount++; 2495 2496 // Test behavior of $ with \r\n at end of input in multiline 2497 p = Pattern.compile(".+$", Pattern.MULTILINE); 2498 m = p.matcher("blah1\r\n"); 2499 if (!m.find()) 2500 failCount++; 2501 if (m.find()) 2502 failCount++; 2503 2504 // Test for $ recognition of \u0085 for bug 4527731 2505 p = Pattern.compile(".+$", Pattern.MULTILINE); 2506 m = p.matcher("blah1\u0085"); 2507 if (!m.find()) 2508 failCount++; 2509 2510 // Supplementary character test 2511 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2512 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2513 m.find(); 2514 m.find(); 2515 if (!m.group().equals(toSupplementaries("blah2"))) 2516 failCount++; 2517 2518 m.reset(toSupplementaries("blah1\n\rblah2")); 2519 m.find(); 2520 m.find(); 2521 m.find(); 2522 if (!m.group().equals(toSupplementaries("blah2"))) 2523 failCount++; 2524 2525 // Test behavior of $ with \r\n at end of input 2526 p = Pattern.compile(".+$"); 2527 m = p.matcher(toSupplementaries("blah1\r\n")); 2528 if (!m.find()) 2529 failCount++; 2530 if (!m.group().equals(toSupplementaries("blah1"))) 2531 failCount++; 2532 if (m.find()) 2533 failCount++; 2534 2535 // Test behavior of $ with \r\n at end of input in multiline 2536 p = Pattern.compile(".+$", Pattern.MULTILINE); 2537 m = p.matcher(toSupplementaries("blah1\r\n")); 2538 if (!m.find()) 2539 failCount++; 2540 if (m.find()) 2541 failCount++; 2542 2543 // Test for $ recognition of \u0085 for bug 4527731 2544 p = Pattern.compile(".+$", Pattern.MULTILINE); 2545 m = p.matcher(toSupplementaries("blah1\u0085")); 2546 if (!m.find()) 2547 failCount++; 2548 2549 report("Anchors"); 2550 } 2551 2552 /** 2553 * A basic sanity test of Matcher.lookingAt(). 2554 */ lookingAtTest()2555 private static void lookingAtTest() throws Exception { 2556 Pattern p = Pattern.compile("(ab)(c*)"); 2557 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2558 2559 if (!m.lookingAt()) 2560 failCount++; 2561 2562 if (!m.group().equals(m.group(0))) 2563 failCount++; 2564 2565 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2566 if (m.lookingAt()) 2567 failCount++; 2568 2569 // Supplementary character test 2570 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2571 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2572 2573 if (!m.lookingAt()) 2574 failCount++; 2575 2576 if (!m.group().equals(m.group(0))) 2577 failCount++; 2578 2579 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2580 if (m.lookingAt()) 2581 failCount++; 2582 2583 report("Looking At"); 2584 } 2585 2586 /** 2587 * A basic sanity test of Matcher.matches(). 2588 */ matchesTest()2589 private static void matchesTest() throws Exception { 2590 // matches() 2591 Pattern p = Pattern.compile("ulb(c*)"); 2592 Matcher m = p.matcher("ulbcccccc"); 2593 if (!m.matches()) 2594 failCount++; 2595 2596 // find() but not matches() 2597 m.reset("zzzulbcccccc"); 2598 if (m.matches()) 2599 failCount++; 2600 2601 // lookingAt() but not matches() 2602 m.reset("ulbccccccdef"); 2603 if (m.matches()) 2604 failCount++; 2605 2606 // matches() 2607 p = Pattern.compile("a|ad"); 2608 m = p.matcher("ad"); 2609 if (!m.matches()) 2610 failCount++; 2611 2612 // Supplementary character test 2613 // matches() 2614 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2615 m = p.matcher(toSupplementaries("ulbcccccc")); 2616 if (!m.matches()) 2617 failCount++; 2618 2619 // find() but not matches() 2620 m.reset(toSupplementaries("zzzulbcccccc")); 2621 if (m.matches()) 2622 failCount++; 2623 2624 // lookingAt() but not matches() 2625 m.reset(toSupplementaries("ulbccccccdef")); 2626 if (m.matches()) 2627 failCount++; 2628 2629 // matches() 2630 p = Pattern.compile(toSupplementaries("a|ad")); 2631 m = p.matcher(toSupplementaries("ad")); 2632 if (!m.matches()) 2633 failCount++; 2634 2635 report("Matches"); 2636 } 2637 2638 /** 2639 * A basic sanity test of Pattern.matches(). 2640 */ patternMatchesTest()2641 private static void patternMatchesTest() throws Exception { 2642 // matches() 2643 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2644 toSupplementaries("ulbcccccc"))) 2645 failCount++; 2646 2647 // find() but not matches() 2648 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2649 toSupplementaries("zzzulbcccccc"))) 2650 failCount++; 2651 2652 // lookingAt() but not matches() 2653 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2654 toSupplementaries("ulbccccccdef"))) 2655 failCount++; 2656 2657 // Supplementary character test 2658 // matches() 2659 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2660 toSupplementaries("ulbcccccc"))) 2661 failCount++; 2662 2663 // find() but not matches() 2664 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2665 toSupplementaries("zzzulbcccccc"))) 2666 failCount++; 2667 2668 // lookingAt() but not matches() 2669 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2670 toSupplementaries("ulbccccccdef"))) 2671 failCount++; 2672 2673 report("Pattern Matches"); 2674 } 2675 2676 /** 2677 * Canonical equivalence testing. Tests the ability of the engine 2678 * to match sequences that are not explicitly specified in the 2679 * pattern when they are considered equivalent by the Unicode Standard. 2680 */ ceTest()2681 private static void ceTest() throws Exception { 2682 // Decomposed char outside char classes 2683 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2684 Matcher m = p.matcher("test\u00e5"); 2685 if (!m.matches()) 2686 failCount++; 2687 2688 m.reset("testa\u030a"); 2689 if (!m.matches()) 2690 failCount++; 2691 2692 // Composed char outside char classes 2693 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2694 m = p.matcher("test\u00e5"); 2695 if (!m.matches()) 2696 failCount++; 2697 2698 m.reset("testa\u030a"); 2699 if (!m.find()) 2700 failCount++; 2701 2702 // Decomposed char inside a char class 2703 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2704 m = p.matcher("test\u00e5"); 2705 if (!m.find()) 2706 failCount++; 2707 2708 m.reset("testa\u030a"); 2709 if (!m.find()) 2710 failCount++; 2711 2712 // Composed char inside a char class 2713 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2714 m = p.matcher("test\u00e5"); 2715 if (!m.find()) 2716 failCount++; 2717 2718 m.reset("testa\u0300"); 2719 if (!m.find()) 2720 failCount++; 2721 2722 m.reset("testa\u030a"); 2723 if (!m.find()) 2724 failCount++; 2725 2726 // Marks that cannot legally change order and be equivalent 2727 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2728 check(p, "testa\u0308\u0300", true); 2729 check(p, "testa\u0300\u0308", false); 2730 2731 // Marks that can legally change order and be equivalent 2732 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2733 check(p, "testa\u0308\u0323", true); 2734 check(p, "testa\u0323\u0308", true); 2735 2736 // Test all equivalences of the sequence a\u0308\u0323\u0300 2737 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2738 check(p, "testa\u0308\u0323\u0300", true); 2739 check(p, "testa\u0323\u0308\u0300", true); 2740 check(p, "testa\u0308\u0300\u0323", true); 2741 check(p, "test\u00e4\u0323\u0300", true); 2742 check(p, "test\u00e4\u0300\u0323", true); 2743 2744 Object[][] data = new Object[][] { 2745 2746 // JDK-4867170 2747 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2748 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2749 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2750 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2751 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2752 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2753 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2754 2755 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2756 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2757 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2758 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2759 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2760 2761 // backtracking, force to match "\u1f80", instead of \u1f82" 2762 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2763 2764 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2765 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2766 2767 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2768 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2769 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2770 2771 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2772 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2773 { "\u1f80", "ab\u1f80cd", "f", true }, 2774 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2775 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2776 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2777 { "\u1f82", "\u1f80\u0300", "m", true }, 2778 2779 // JDK-7080302 # compile failed 2780 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2781 2782 // JDK-6728861, same cause as above one 2783 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2784 2785 // JDK-6995635 2786 { "(\u00e9)", "e\u0301", "m", true }, 2787 2788 // JDK-6736245 2789 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2790 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2791 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2792 2793 // 4916384. 2794 // Decomposed hangul (jamos) works inside clazz 2795 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2796 { "[\u1100\u1161]", "\uac00", "m", true}, 2797 2798 { "[\uac00]", "\u1100\u1161", "m", true}, 2799 { "[\uac00]", "\uac00", "m", true}, 2800 2801 // Decomposed hangul (jamos) 2802 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2803 { "\u1100\u1161", "\uac00", "m", true}, 2804 2805 // Composed hangul 2806 { "\uac00", "\u1100\u1161", "m", true }, 2807 { "\uac00", "\uac00", "m", true }, 2808 2809 /* Need a NFDSlice to nfd the source to solve this issue 2810 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2811 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2812 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2813 2814 // Decomposed supplementary outside char classes 2815 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2816 // Composed supplementary outside char classes 2817 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2818 */ 2819 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2820 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2821 2822 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2823 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2824 }; 2825 2826 int failCount = 0; 2827 for (Object[] d : data) { 2828 String pn = (String)d[0]; 2829 String tt = (String)d[1]; 2830 boolean isFind = "f".equals(((String)d[2])); 2831 boolean expected = (boolean)d[3]; 2832 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2833 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2834 if (ret != expected) { 2835 failCount++; 2836 continue; 2837 } 2838 } 2839 report("Canonical Equivalence"); 2840 } 2841 2842 /** 2843 * A basic sanity test of Matcher.replaceAll(). 2844 */ globalSubstitute()2845 private static void globalSubstitute() throws Exception { 2846 // Global substitution with a literal 2847 Pattern p = Pattern.compile("(ab)(c*)"); 2848 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2849 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2850 failCount++; 2851 2852 m.reset("zzzabccczzzabcczzzabccczzz"); 2853 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2854 failCount++; 2855 2856 // Global substitution with groups 2857 m.reset("zzzabccczzzabcczzzabccczzz"); 2858 String result = m.replaceAll("$1"); 2859 if (!result.equals("zzzabzzzabzzzabzzz")) 2860 failCount++; 2861 2862 // Supplementary character test 2863 // Global substitution with a literal 2864 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2865 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2866 if (!m.replaceAll(toSupplementaries("test")). 2867 equals(toSupplementaries("testzzztestzzztest"))) 2868 failCount++; 2869 2870 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2871 if (!m.replaceAll(toSupplementaries("test")). 2872 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2873 failCount++; 2874 2875 // Global substitution with groups 2876 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2877 result = m.replaceAll("$1"); 2878 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2879 failCount++; 2880 2881 report("Global Substitution"); 2882 } 2883 2884 /** 2885 * Tests the usage of Matcher.appendReplacement() with literal 2886 * and group substitutions. 2887 */ stringbufferSubstitute()2888 private static void stringbufferSubstitute() throws Exception { 2889 // SB substitution with literal 2890 String blah = "zzzblahzzz"; 2891 Pattern p = Pattern.compile("blah"); 2892 Matcher m = p.matcher(blah); 2893 StringBuffer result = new StringBuffer(); 2894 try { 2895 m.appendReplacement(result, "blech"); 2896 failCount++; 2897 } catch (IllegalStateException e) { 2898 } 2899 m.find(); 2900 m.appendReplacement(result, "blech"); 2901 if (!result.toString().equals("zzzblech")) 2902 failCount++; 2903 2904 m.appendTail(result); 2905 if (!result.toString().equals("zzzblechzzz")) 2906 failCount++; 2907 2908 // SB substitution with groups 2909 blah = "zzzabcdzzz"; 2910 p = Pattern.compile("(ab)(cd)*"); 2911 m = p.matcher(blah); 2912 result = new StringBuffer(); 2913 try { 2914 m.appendReplacement(result, "$1"); 2915 failCount++; 2916 } catch (IllegalStateException e) { 2917 } 2918 m.find(); 2919 m.appendReplacement(result, "$1"); 2920 if (!result.toString().equals("zzzab")) 2921 failCount++; 2922 2923 m.appendTail(result); 2924 if (!result.toString().equals("zzzabzzz")) 2925 failCount++; 2926 2927 // SB substitution with 3 groups 2928 blah = "zzzabcdcdefzzz"; 2929 p = Pattern.compile("(ab)(cd)*(ef)"); 2930 m = p.matcher(blah); 2931 result = new StringBuffer(); 2932 try { 2933 m.appendReplacement(result, "$1w$2w$3"); 2934 failCount++; 2935 } catch (IllegalStateException e) { 2936 } 2937 m.find(); 2938 m.appendReplacement(result, "$1w$2w$3"); 2939 if (!result.toString().equals("zzzabwcdwef")) 2940 failCount++; 2941 2942 m.appendTail(result); 2943 if (!result.toString().equals("zzzabwcdwefzzz")) 2944 failCount++; 2945 2946 // SB substitution with groups and three matches 2947 // skipping middle match 2948 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2949 p = Pattern.compile("(ab)(cd*)"); 2950 m = p.matcher(blah); 2951 result = new StringBuffer(); 2952 try { 2953 m.appendReplacement(result, "$1"); 2954 failCount++; 2955 } catch (IllegalStateException e) { 2956 } 2957 m.find(); 2958 m.appendReplacement(result, "$1"); 2959 if (!result.toString().equals("zzzab")) 2960 failCount++; 2961 2962 m.find(); 2963 m.find(); 2964 m.appendReplacement(result, "$2"); 2965 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2966 failCount++; 2967 2968 m.appendTail(result); 2969 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2970 failCount++; 2971 2972 // Check to make sure escaped $ is ignored 2973 blah = "zzzabcdcdefzzz"; 2974 p = Pattern.compile("(ab)(cd)*(ef)"); 2975 m = p.matcher(blah); 2976 result = new StringBuffer(); 2977 m.find(); 2978 m.appendReplacement(result, "$1w\\$2w$3"); 2979 if (!result.toString().equals("zzzabw$2wef")) 2980 failCount++; 2981 2982 m.appendTail(result); 2983 if (!result.toString().equals("zzzabw$2wefzzz")) 2984 failCount++; 2985 2986 // Check to make sure a reference to nonexistent group causes error 2987 blah = "zzzabcdcdefzzz"; 2988 p = Pattern.compile("(ab)(cd)*(ef)"); 2989 m = p.matcher(blah); 2990 result = new StringBuffer(); 2991 m.find(); 2992 try { 2993 m.appendReplacement(result, "$1w$5w$3"); 2994 failCount++; 2995 } catch (IndexOutOfBoundsException ioobe) { 2996 // Correct result 2997 } 2998 2999 // Check double digit group references 3000 blah = "zzz123456789101112zzz"; 3001 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3002 m = p.matcher(blah); 3003 result = new StringBuffer(); 3004 m.find(); 3005 m.appendReplacement(result, "$1w$11w$3"); 3006 if (!result.toString().equals("zzz1w11w3")) 3007 failCount++; 3008 3009 // Check to make sure it backs off $15 to $1 if only three groups 3010 blah = "zzzabcdcdefzzz"; 3011 p = Pattern.compile("(ab)(cd)*(ef)"); 3012 m = p.matcher(blah); 3013 result = new StringBuffer(); 3014 m.find(); 3015 m.appendReplacement(result, "$1w$15w$3"); 3016 if (!result.toString().equals("zzzabwab5wef")) 3017 failCount++; 3018 3019 3020 // Supplementary character test 3021 // SB substitution with literal 3022 blah = toSupplementaries("zzzblahzzz"); 3023 p = Pattern.compile(toSupplementaries("blah")); 3024 m = p.matcher(blah); 3025 result = new StringBuffer(); 3026 try { 3027 m.appendReplacement(result, toSupplementaries("blech")); 3028 failCount++; 3029 } catch (IllegalStateException e) { 3030 } 3031 m.find(); 3032 m.appendReplacement(result, toSupplementaries("blech")); 3033 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3034 failCount++; 3035 3036 m.appendTail(result); 3037 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3038 failCount++; 3039 3040 // SB substitution with groups 3041 blah = toSupplementaries("zzzabcdzzz"); 3042 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3043 m = p.matcher(blah); 3044 result = new StringBuffer(); 3045 try { 3046 m.appendReplacement(result, "$1"); 3047 failCount++; 3048 } catch (IllegalStateException e) { 3049 } 3050 m.find(); 3051 m.appendReplacement(result, "$1"); 3052 if (!result.toString().equals(toSupplementaries("zzzab"))) 3053 failCount++; 3054 3055 m.appendTail(result); 3056 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3057 failCount++; 3058 3059 // SB substitution with 3 groups 3060 blah = toSupplementaries("zzzabcdcdefzzz"); 3061 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3062 m = p.matcher(blah); 3063 result = new StringBuffer(); 3064 try { 3065 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3066 failCount++; 3067 } catch (IllegalStateException e) { 3068 } 3069 m.find(); 3070 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3071 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3072 failCount++; 3073 3074 m.appendTail(result); 3075 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3076 failCount++; 3077 3078 // SB substitution with groups and three matches 3079 // skipping middle match 3080 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3081 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3082 m = p.matcher(blah); 3083 result = new StringBuffer(); 3084 try { 3085 m.appendReplacement(result, "$1"); 3086 failCount++; 3087 } catch (IllegalStateException e) { 3088 } 3089 m.find(); 3090 m.appendReplacement(result, "$1"); 3091 if (!result.toString().equals(toSupplementaries("zzzab"))) 3092 failCount++; 3093 3094 m.find(); 3095 m.find(); 3096 m.appendReplacement(result, "$2"); 3097 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3098 failCount++; 3099 3100 m.appendTail(result); 3101 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3102 failCount++; 3103 3104 // Check to make sure escaped $ is ignored 3105 blah = toSupplementaries("zzzabcdcdefzzz"); 3106 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3107 m = p.matcher(blah); 3108 result = new StringBuffer(); 3109 m.find(); 3110 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3111 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3112 failCount++; 3113 3114 m.appendTail(result); 3115 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3116 failCount++; 3117 3118 // Check to make sure a reference to nonexistent group causes error 3119 blah = toSupplementaries("zzzabcdcdefzzz"); 3120 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3121 m = p.matcher(blah); 3122 result = new StringBuffer(); 3123 m.find(); 3124 try { 3125 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3126 failCount++; 3127 } catch (IndexOutOfBoundsException ioobe) { 3128 // Correct result 3129 } 3130 3131 // Check double digit group references 3132 blah = toSupplementaries("zzz123456789101112zzz"); 3133 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3134 m = p.matcher(blah); 3135 result = new StringBuffer(); 3136 m.find(); 3137 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3138 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3139 failCount++; 3140 3141 // Check to make sure it backs off $15 to $1 if only three groups 3142 blah = toSupplementaries("zzzabcdcdefzzz"); 3143 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3144 m = p.matcher(blah); 3145 result = new StringBuffer(); 3146 m.find(); 3147 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3148 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3149 failCount++; 3150 3151 // Check nothing has been appended into the output buffer if 3152 // the replacement string triggers IllegalArgumentException. 3153 p = Pattern.compile("(abc)"); 3154 m = p.matcher("abcd"); 3155 result = new StringBuffer(); 3156 m.find(); 3157 try { 3158 m.appendReplacement(result, ("xyz$g")); 3159 failCount++; 3160 } catch (IllegalArgumentException iae) { 3161 if (result.length() != 0) 3162 failCount++; 3163 } 3164 3165 report("SB Substitution"); 3166 } 3167 3168 /** 3169 * Tests the usage of Matcher.appendReplacement() with literal 3170 * and group substitutions. 3171 */ stringbuilderSubstitute()3172 private static void stringbuilderSubstitute() throws Exception { 3173 // SB substitution with literal 3174 String blah = "zzzblahzzz"; 3175 Pattern p = Pattern.compile("blah"); 3176 Matcher m = p.matcher(blah); 3177 StringBuilder result = new StringBuilder(); 3178 try { 3179 m.appendReplacement(result, "blech"); 3180 failCount++; 3181 } catch (IllegalStateException e) { 3182 } 3183 m.find(); 3184 m.appendReplacement(result, "blech"); 3185 if (!result.toString().equals("zzzblech")) 3186 failCount++; 3187 3188 m.appendTail(result); 3189 if (!result.toString().equals("zzzblechzzz")) 3190 failCount++; 3191 3192 // SB substitution with groups 3193 blah = "zzzabcdzzz"; 3194 p = Pattern.compile("(ab)(cd)*"); 3195 m = p.matcher(blah); 3196 result = new StringBuilder(); 3197 try { 3198 m.appendReplacement(result, "$1"); 3199 failCount++; 3200 } catch (IllegalStateException e) { 3201 } 3202 m.find(); 3203 m.appendReplacement(result, "$1"); 3204 if (!result.toString().equals("zzzab")) 3205 failCount++; 3206 3207 m.appendTail(result); 3208 if (!result.toString().equals("zzzabzzz")) 3209 failCount++; 3210 3211 // SB substitution with 3 groups 3212 blah = "zzzabcdcdefzzz"; 3213 p = Pattern.compile("(ab)(cd)*(ef)"); 3214 m = p.matcher(blah); 3215 result = new StringBuilder(); 3216 try { 3217 m.appendReplacement(result, "$1w$2w$3"); 3218 failCount++; 3219 } catch (IllegalStateException e) { 3220 } 3221 m.find(); 3222 m.appendReplacement(result, "$1w$2w$3"); 3223 if (!result.toString().equals("zzzabwcdwef")) 3224 failCount++; 3225 3226 m.appendTail(result); 3227 if (!result.toString().equals("zzzabwcdwefzzz")) 3228 failCount++; 3229 3230 // SB substitution with groups and three matches 3231 // skipping middle match 3232 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3233 p = Pattern.compile("(ab)(cd*)"); 3234 m = p.matcher(blah); 3235 result = new StringBuilder(); 3236 try { 3237 m.appendReplacement(result, "$1"); 3238 failCount++; 3239 } catch (IllegalStateException e) { 3240 } 3241 m.find(); 3242 m.appendReplacement(result, "$1"); 3243 if (!result.toString().equals("zzzab")) 3244 failCount++; 3245 3246 m.find(); 3247 m.find(); 3248 m.appendReplacement(result, "$2"); 3249 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3250 failCount++; 3251 3252 m.appendTail(result); 3253 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3254 failCount++; 3255 3256 // Check to make sure escaped $ is ignored 3257 blah = "zzzabcdcdefzzz"; 3258 p = Pattern.compile("(ab)(cd)*(ef)"); 3259 m = p.matcher(blah); 3260 result = new StringBuilder(); 3261 m.find(); 3262 m.appendReplacement(result, "$1w\\$2w$3"); 3263 if (!result.toString().equals("zzzabw$2wef")) 3264 failCount++; 3265 3266 m.appendTail(result); 3267 if (!result.toString().equals("zzzabw$2wefzzz")) 3268 failCount++; 3269 3270 // Check to make sure a reference to nonexistent group causes error 3271 blah = "zzzabcdcdefzzz"; 3272 p = Pattern.compile("(ab)(cd)*(ef)"); 3273 m = p.matcher(blah); 3274 result = new StringBuilder(); 3275 m.find(); 3276 try { 3277 m.appendReplacement(result, "$1w$5w$3"); 3278 failCount++; 3279 } catch (IndexOutOfBoundsException ioobe) { 3280 // Correct result 3281 } 3282 3283 // Check double digit group references 3284 blah = "zzz123456789101112zzz"; 3285 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3286 m = p.matcher(blah); 3287 result = new StringBuilder(); 3288 m.find(); 3289 m.appendReplacement(result, "$1w$11w$3"); 3290 if (!result.toString().equals("zzz1w11w3")) 3291 failCount++; 3292 3293 // Check to make sure it backs off $15 to $1 if only three groups 3294 blah = "zzzabcdcdefzzz"; 3295 p = Pattern.compile("(ab)(cd)*(ef)"); 3296 m = p.matcher(blah); 3297 result = new StringBuilder(); 3298 m.find(); 3299 m.appendReplacement(result, "$1w$15w$3"); 3300 if (!result.toString().equals("zzzabwab5wef")) 3301 failCount++; 3302 3303 3304 // Supplementary character test 3305 // SB substitution with literal 3306 blah = toSupplementaries("zzzblahzzz"); 3307 p = Pattern.compile(toSupplementaries("blah")); 3308 m = p.matcher(blah); 3309 result = new StringBuilder(); 3310 try { 3311 m.appendReplacement(result, toSupplementaries("blech")); 3312 failCount++; 3313 } catch (IllegalStateException e) { 3314 } 3315 m.find(); 3316 m.appendReplacement(result, toSupplementaries("blech")); 3317 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3318 failCount++; 3319 m.appendTail(result); 3320 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3321 failCount++; 3322 3323 // SB substitution with groups 3324 blah = toSupplementaries("zzzabcdzzz"); 3325 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3326 m = p.matcher(blah); 3327 result = new StringBuilder(); 3328 try { 3329 m.appendReplacement(result, "$1"); 3330 failCount++; 3331 } catch (IllegalStateException e) { 3332 } 3333 m.find(); 3334 m.appendReplacement(result, "$1"); 3335 if (!result.toString().equals(toSupplementaries("zzzab"))) 3336 failCount++; 3337 3338 m.appendTail(result); 3339 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3340 failCount++; 3341 3342 // SB substitution with 3 groups 3343 blah = toSupplementaries("zzzabcdcdefzzz"); 3344 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3345 m = p.matcher(blah); 3346 result = new StringBuilder(); 3347 try { 3348 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3349 failCount++; 3350 } catch (IllegalStateException e) { 3351 } 3352 m.find(); 3353 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3354 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3355 failCount++; 3356 3357 m.appendTail(result); 3358 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3359 failCount++; 3360 3361 // SB substitution with groups and three matches 3362 // skipping middle match 3363 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3364 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3365 m = p.matcher(blah); 3366 result = new StringBuilder(); 3367 try { 3368 m.appendReplacement(result, "$1"); 3369 failCount++; 3370 } catch (IllegalStateException e) { 3371 } 3372 m.find(); 3373 m.appendReplacement(result, "$1"); 3374 if (!result.toString().equals(toSupplementaries("zzzab"))) 3375 failCount++; 3376 3377 m.find(); 3378 m.find(); 3379 m.appendReplacement(result, "$2"); 3380 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3381 failCount++; 3382 3383 m.appendTail(result); 3384 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3385 failCount++; 3386 3387 // Check to make sure escaped $ is ignored 3388 blah = toSupplementaries("zzzabcdcdefzzz"); 3389 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3390 m = p.matcher(blah); 3391 result = new StringBuilder(); 3392 m.find(); 3393 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3394 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3395 failCount++; 3396 3397 m.appendTail(result); 3398 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3399 failCount++; 3400 3401 // Check to make sure a reference to nonexistent group causes error 3402 blah = toSupplementaries("zzzabcdcdefzzz"); 3403 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3404 m = p.matcher(blah); 3405 result = new StringBuilder(); 3406 m.find(); 3407 try { 3408 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3409 failCount++; 3410 } catch (IndexOutOfBoundsException ioobe) { 3411 // Correct result 3412 } 3413 // Check double digit group references 3414 blah = toSupplementaries("zzz123456789101112zzz"); 3415 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3416 m = p.matcher(blah); 3417 result = new StringBuilder(); 3418 m.find(); 3419 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3420 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3421 failCount++; 3422 3423 // Check to make sure it backs off $15 to $1 if only three groups 3424 blah = toSupplementaries("zzzabcdcdefzzz"); 3425 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3426 m = p.matcher(blah); 3427 result = new StringBuilder(); 3428 m.find(); 3429 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3430 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3431 failCount++; 3432 // Check nothing has been appended into the output buffer if 3433 // the replacement string triggers IllegalArgumentException. 3434 p = Pattern.compile("(abc)"); 3435 m = p.matcher("abcd"); 3436 result = new StringBuilder(); 3437 m.find(); 3438 try { 3439 m.appendReplacement(result, ("xyz$g")); 3440 failCount++; 3441 } catch (IllegalArgumentException iae) { 3442 if (result.length() != 0) 3443 failCount++; 3444 } 3445 report("SB Substitution 2"); 3446 } 3447 3448 /* 3449 * 5 groups of characters are created to make a substitution string. 3450 * A base string will be created including random lead chars, the 3451 * substitution string, and random trailing chars. 3452 * A pattern containing the 5 groups is searched for and replaced with: 3453 * random group + random string + random group. 3454 * The results are checked for correctness. 3455 */ substitutionBasher()3456 private static void substitutionBasher() { 3457 for (int runs = 0; runs<1000; runs++) { 3458 // Create a base string to work in 3459 int leadingChars = generator.nextInt(10); 3460 StringBuffer baseBuffer = new StringBuffer(100); 3461 String leadingString = getRandomAlphaString(leadingChars); 3462 baseBuffer.append(leadingString); 3463 3464 // Create 5 groups of random number of random chars 3465 // Create the string to substitute 3466 // Create the pattern string to search for 3467 StringBuffer bufferToSub = new StringBuffer(25); 3468 StringBuffer bufferToPat = new StringBuffer(50); 3469 String[] groups = new String[5]; 3470 for(int i=0; i<5; i++) { 3471 int aGroupSize = generator.nextInt(5)+1; 3472 groups[i] = getRandomAlphaString(aGroupSize); 3473 bufferToSub.append(groups[i]); 3474 bufferToPat.append('('); 3475 bufferToPat.append(groups[i]); 3476 bufferToPat.append(')'); 3477 } 3478 String stringToSub = bufferToSub.toString(); 3479 String pattern = bufferToPat.toString(); 3480 3481 // Place sub string into working string at random index 3482 baseBuffer.append(stringToSub); 3483 3484 // Append random chars to end 3485 int trailingChars = generator.nextInt(10); 3486 String trailingString = getRandomAlphaString(trailingChars); 3487 baseBuffer.append(trailingString); 3488 String baseString = baseBuffer.toString(); 3489 3490 // Create test pattern and matcher 3491 Pattern p = Pattern.compile(pattern); 3492 Matcher m = p.matcher(baseString); 3493 3494 // Reject candidate if pattern happens to start early 3495 m.find(); 3496 if (m.start() < leadingChars) 3497 continue; 3498 3499 // Reject candidate if more than one match 3500 if (m.find()) 3501 continue; 3502 3503 // Construct a replacement string with : 3504 // random group + random string + random group 3505 StringBuffer bufferToRep = new StringBuffer(); 3506 int groupIndex1 = generator.nextInt(5); 3507 bufferToRep.append("$" + (groupIndex1 + 1)); 3508 String randomMidString = getRandomAlphaString(5); 3509 bufferToRep.append(randomMidString); 3510 int groupIndex2 = generator.nextInt(5); 3511 bufferToRep.append("$" + (groupIndex2 + 1)); 3512 String replacement = bufferToRep.toString(); 3513 3514 // Do the replacement 3515 String result = m.replaceAll(replacement); 3516 3517 // Construct expected result 3518 StringBuffer bufferToRes = new StringBuffer(); 3519 bufferToRes.append(leadingString); 3520 bufferToRes.append(groups[groupIndex1]); 3521 bufferToRes.append(randomMidString); 3522 bufferToRes.append(groups[groupIndex2]); 3523 bufferToRes.append(trailingString); 3524 String expectedResult = bufferToRes.toString(); 3525 3526 // Check results 3527 if (!result.equals(expectedResult)) 3528 failCount++; 3529 } 3530 3531 report("Substitution Basher"); 3532 } 3533 3534 /* 3535 * 5 groups of characters are created to make a substitution string. 3536 * A base string will be created including random lead chars, the 3537 * substitution string, and random trailing chars. 3538 * A pattern containing the 5 groups is searched for and replaced with: 3539 * random group + random string + random group. 3540 * The results are checked for correctness. 3541 */ substitutionBasher2()3542 private static void substitutionBasher2() { 3543 for (int runs = 0; runs<1000; runs++) { 3544 // Create a base string to work in 3545 int leadingChars = generator.nextInt(10); 3546 StringBuilder baseBuffer = new StringBuilder(100); 3547 String leadingString = getRandomAlphaString(leadingChars); 3548 baseBuffer.append(leadingString); 3549 3550 // Create 5 groups of random number of random chars 3551 // Create the string to substitute 3552 // Create the pattern string to search for 3553 StringBuilder bufferToSub = new StringBuilder(25); 3554 StringBuilder bufferToPat = new StringBuilder(50); 3555 String[] groups = new String[5]; 3556 for(int i=0; i<5; i++) { 3557 int aGroupSize = generator.nextInt(5)+1; 3558 groups[i] = getRandomAlphaString(aGroupSize); 3559 bufferToSub.append(groups[i]); 3560 bufferToPat.append('('); 3561 bufferToPat.append(groups[i]); 3562 bufferToPat.append(')'); 3563 } 3564 String stringToSub = bufferToSub.toString(); 3565 String pattern = bufferToPat.toString(); 3566 3567 // Place sub string into working string at random index 3568 baseBuffer.append(stringToSub); 3569 3570 // Append random chars to end 3571 int trailingChars = generator.nextInt(10); 3572 String trailingString = getRandomAlphaString(trailingChars); 3573 baseBuffer.append(trailingString); 3574 String baseString = baseBuffer.toString(); 3575 3576 // Create test pattern and matcher 3577 Pattern p = Pattern.compile(pattern); 3578 Matcher m = p.matcher(baseString); 3579 3580 // Reject candidate if pattern happens to start early 3581 m.find(); 3582 if (m.start() < leadingChars) 3583 continue; 3584 3585 // Reject candidate if more than one match 3586 if (m.find()) 3587 continue; 3588 3589 // Construct a replacement string with : 3590 // random group + random string + random group 3591 StringBuilder bufferToRep = new StringBuilder(); 3592 int groupIndex1 = generator.nextInt(5); 3593 bufferToRep.append("$" + (groupIndex1 + 1)); 3594 String randomMidString = getRandomAlphaString(5); 3595 bufferToRep.append(randomMidString); 3596 int groupIndex2 = generator.nextInt(5); 3597 bufferToRep.append("$" + (groupIndex2 + 1)); 3598 String replacement = bufferToRep.toString(); 3599 3600 // Do the replacement 3601 String result = m.replaceAll(replacement); 3602 3603 // Construct expected result 3604 StringBuilder bufferToRes = new StringBuilder(); 3605 bufferToRes.append(leadingString); 3606 bufferToRes.append(groups[groupIndex1]); 3607 bufferToRes.append(randomMidString); 3608 bufferToRes.append(groups[groupIndex2]); 3609 bufferToRes.append(trailingString); 3610 String expectedResult = bufferToRes.toString(); 3611 3612 // Check results 3613 if (!result.equals(expectedResult)) { 3614 failCount++; 3615 } 3616 } 3617 3618 report("Substitution Basher 2"); 3619 } 3620 3621 /** 3622 * Checks the handling of some escape sequences that the Pattern 3623 * class should process instead of the java compiler. These are 3624 * not in the file because the escapes should be be processed 3625 * by the Pattern class when the regex is compiled. 3626 */ escapes()3627 private static void escapes() throws Exception { 3628 Pattern p = Pattern.compile("\\043"); 3629 Matcher m = p.matcher("#"); 3630 if (!m.find()) 3631 failCount++; 3632 3633 p = Pattern.compile("\\x23"); 3634 m = p.matcher("#"); 3635 if (!m.find()) 3636 failCount++; 3637 3638 p = Pattern.compile("\\u0023"); 3639 m = p.matcher("#"); 3640 if (!m.find()) 3641 failCount++; 3642 3643 report("Escape sequences"); 3644 } 3645 3646 /** 3647 * Checks the handling of blank input situations. These 3648 * tests are incompatible with my test file format. 3649 */ blankInput()3650 private static void blankInput() throws Exception { 3651 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3652 Matcher m = p.matcher(""); 3653 if (m.find()) 3654 failCount++; 3655 3656 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3657 m = p.matcher(""); 3658 if (!m.find()) 3659 failCount++; 3660 3661 p = Pattern.compile("abc"); 3662 m = p.matcher(""); 3663 if (m.find()) 3664 failCount++; 3665 3666 p = Pattern.compile("a*"); 3667 m = p.matcher(""); 3668 if (!m.find()) 3669 failCount++; 3670 3671 report("Blank input"); 3672 } 3673 3674 /** 3675 * Tests the Boyer-Moore pattern matching of a character sequence 3676 * on randomly generated patterns. 3677 */ bm()3678 private static void bm() throws Exception { 3679 doBnM('a'); 3680 report("Boyer Moore (ASCII)"); 3681 3682 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3683 report("Boyer Moore (Supplementary)"); 3684 } 3685 doBnM(int baseCharacter)3686 private static void doBnM(int baseCharacter) throws Exception { 3687 int achar=0; 3688 3689 for (int i=0; i<100; i++) { 3690 // Create a short pattern to search for 3691 int patternLength = generator.nextInt(7) + 4; 3692 StringBuffer patternBuffer = new StringBuffer(patternLength); 3693 String pattern; 3694 retry: for (;;) { 3695 for (int x=0; x<patternLength; x++) { 3696 int ch = baseCharacter + generator.nextInt(26); 3697 if (Character.isSupplementaryCodePoint(ch)) { 3698 patternBuffer.append(Character.toChars(ch)); 3699 } else { 3700 patternBuffer.append((char)ch); 3701 } 3702 } 3703 pattern = patternBuffer.toString(); 3704 3705 // Avoid patterns that start and end with the same substring 3706 // See JDK-6854417 3707 for (int x=1; x < pattern.length(); x++) { 3708 if (pattern.startsWith(pattern.substring(x))) 3709 continue retry; 3710 } 3711 break; 3712 } 3713 Pattern p = Pattern.compile(pattern); 3714 3715 // Create a buffer with random ASCII chars that does 3716 // not match the sample 3717 String toSearch = null; 3718 StringBuffer s = null; 3719 Matcher m = p.matcher(""); 3720 do { 3721 s = new StringBuffer(100); 3722 for (int x=0; x<100; x++) { 3723 int ch = baseCharacter + generator.nextInt(26); 3724 if (Character.isSupplementaryCodePoint(ch)) { 3725 s.append(Character.toChars(ch)); 3726 } else { 3727 s.append((char)ch); 3728 } 3729 } 3730 toSearch = s.toString(); 3731 m.reset(toSearch); 3732 } while (m.find()); 3733 3734 // Insert the pattern at a random spot 3735 int insertIndex = generator.nextInt(99); 3736 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3737 insertIndex++; 3738 s = s.insert(insertIndex, pattern); 3739 toSearch = s.toString(); 3740 3741 // Make sure that the pattern is found 3742 m.reset(toSearch); 3743 if (!m.find()) 3744 failCount++; 3745 3746 // Make sure that the match text is the pattern 3747 if (!m.group().equals(pattern)) 3748 failCount++; 3749 3750 // Make sure match occured at insertion point 3751 if (m.start() != insertIndex) 3752 failCount++; 3753 } 3754 } 3755 3756 /** 3757 * Tests the matching of slices on randomly generated patterns. 3758 * The Boyer-Moore optimization is not done on these patterns 3759 * because it uses unicode case folding. 3760 */ slice()3761 private static void slice() throws Exception { 3762 doSlice(Character.MAX_VALUE); 3763 report("Slice"); 3764 3765 doSlice(Character.MAX_CODE_POINT); 3766 report("Slice (Supplementary)"); 3767 } 3768 doSlice(int maxCharacter)3769 private static void doSlice(int maxCharacter) throws Exception { 3770 Random generator = new Random(); 3771 int achar=0; 3772 3773 for (int i=0; i<100; i++) { 3774 // Create a short pattern to search for 3775 int patternLength = generator.nextInt(7) + 4; 3776 StringBuffer patternBuffer = new StringBuffer(patternLength); 3777 for (int x=0; x<patternLength; x++) { 3778 int randomChar = 0; 3779 while (!Character.isLetterOrDigit(randomChar)) 3780 randomChar = generator.nextInt(maxCharacter); 3781 if (Character.isSupplementaryCodePoint(randomChar)) { 3782 patternBuffer.append(Character.toChars(randomChar)); 3783 } else { 3784 patternBuffer.append((char) randomChar); 3785 } 3786 } 3787 String pattern = patternBuffer.toString(); 3788 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3789 3790 // Create a buffer with random chars that does not match the sample 3791 String toSearch = null; 3792 StringBuffer s = null; 3793 Matcher m = p.matcher(""); 3794 do { 3795 s = new StringBuffer(100); 3796 for (int x=0; x<100; x++) { 3797 int randomChar = 0; 3798 while (!Character.isLetterOrDigit(randomChar)) 3799 randomChar = generator.nextInt(maxCharacter); 3800 if (Character.isSupplementaryCodePoint(randomChar)) { 3801 s.append(Character.toChars(randomChar)); 3802 } else { 3803 s.append((char) randomChar); 3804 } 3805 } 3806 toSearch = s.toString(); 3807 m.reset(toSearch); 3808 } while (m.find()); 3809 3810 // Insert the pattern at a random spot 3811 int insertIndex = generator.nextInt(99); 3812 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3813 insertIndex++; 3814 s = s.insert(insertIndex, pattern); 3815 toSearch = s.toString(); 3816 3817 // Make sure that the pattern is found 3818 m.reset(toSearch); 3819 if (!m.find()) 3820 failCount++; 3821 3822 // Make sure that the match text is the pattern 3823 if (!m.group().equals(pattern)) 3824 failCount++; 3825 3826 // Make sure match occured at insertion point 3827 if (m.start() != insertIndex) 3828 failCount++; 3829 } 3830 } 3831 explainFailure(String pattern, String data, String expected, String actual)3832 private static void explainFailure(String pattern, String data, 3833 String expected, String actual) { 3834 System.err.println("----------------------------------------"); 3835 System.err.println("Pattern = "+pattern); 3836 System.err.println("Data = "+data); 3837 System.err.println("Expected = " + expected); 3838 System.err.println("Actual = " + actual); 3839 } 3840 explainFailure(String pattern, String data, Throwable t)3841 private static void explainFailure(String pattern, String data, 3842 Throwable t) { 3843 System.err.println("----------------------------------------"); 3844 System.err.println("Pattern = "+pattern); 3845 System.err.println("Data = "+data); 3846 t.printStackTrace(System.err); 3847 } 3848 3849 // Testing examples from a file 3850 3851 /** 3852 * Goes through the file "TestCases.txt" and creates many patterns 3853 * described in the file, matching the patterns against input lines in 3854 * the file, and comparing the results against the correct results 3855 * also found in the file. The file format is described in comments 3856 * at the head of the file. 3857 */ processFile(String fileName)3858 private static void processFile(String fileName) throws Exception { 3859 File testCases = new File(System.getProperty("test.src", "."), 3860 fileName); 3861 FileInputStream in = new FileInputStream(testCases); 3862 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3863 3864 // Process next test case. 3865 String aLine; 3866 while((aLine = r.readLine()) != null) { 3867 // Read a line for pattern 3868 String patternString = grabLine(r); 3869 Pattern p = null; 3870 try { 3871 p = compileTestPattern(patternString); 3872 } catch (PatternSyntaxException e) { 3873 String dataString = grabLine(r); 3874 String expectedResult = grabLine(r); 3875 if (expectedResult.startsWith("error")) 3876 continue; 3877 explainFailure(patternString, dataString, e); 3878 failCount++; 3879 continue; 3880 } 3881 3882 // Read a line for input string 3883 String dataString = grabLine(r); 3884 Matcher m = p.matcher(dataString); 3885 StringBuffer result = new StringBuffer(); 3886 3887 // Check for IllegalStateExceptions before a match 3888 failCount += preMatchInvariants(m); 3889 3890 boolean found = m.find(); 3891 3892 if (found) 3893 failCount += postTrueMatchInvariants(m); 3894 else 3895 failCount += postFalseMatchInvariants(m); 3896 3897 if (found) { 3898 result.append("true "); 3899 result.append(m.group(0) + " "); 3900 } else { 3901 result.append("false "); 3902 } 3903 3904 result.append(m.groupCount()); 3905 3906 if (found) { 3907 for (int i=1; i<m.groupCount()+1; i++) 3908 if (m.group(i) != null) 3909 result.append(" " +m.group(i)); 3910 } 3911 3912 // Read a line for the expected result 3913 String expectedResult = grabLine(r); 3914 3915 if (!result.toString().equals(expectedResult)) { 3916 explainFailure(patternString, dataString, expectedResult, result.toString()); 3917 failCount++; 3918 } 3919 } 3920 3921 report(fileName); 3922 } 3923 preMatchInvariants(Matcher m)3924 private static int preMatchInvariants(Matcher m) { 3925 int failCount = 0; 3926 try { 3927 m.start(); 3928 failCount++; 3929 } catch (IllegalStateException ise) {} 3930 try { 3931 m.end(); 3932 failCount++; 3933 } catch (IllegalStateException ise) {} 3934 try { 3935 m.group(); 3936 failCount++; 3937 } catch (IllegalStateException ise) {} 3938 return failCount; 3939 } 3940 postFalseMatchInvariants(Matcher m)3941 private static int postFalseMatchInvariants(Matcher m) { 3942 int failCount = 0; 3943 try { 3944 m.group(); 3945 failCount++; 3946 } catch (IllegalStateException ise) {} 3947 try { 3948 m.start(); 3949 failCount++; 3950 } catch (IllegalStateException ise) {} 3951 try { 3952 m.end(); 3953 failCount++; 3954 } catch (IllegalStateException ise) {} 3955 return failCount; 3956 } 3957 postTrueMatchInvariants(Matcher m)3958 private static int postTrueMatchInvariants(Matcher m) { 3959 int failCount = 0; 3960 //assert(m.start() = m.start(0); 3961 if (m.start() != m.start(0)) 3962 failCount++; 3963 //assert(m.end() = m.end(0); 3964 if (m.start() != m.start(0)) 3965 failCount++; 3966 //assert(m.group() = m.group(0); 3967 if (!m.group().equals(m.group(0))) 3968 failCount++; 3969 try { 3970 m.group(50); 3971 failCount++; 3972 } catch (IndexOutOfBoundsException ise) {} 3973 3974 return failCount; 3975 } 3976 compileTestPattern(String patternString)3977 private static Pattern compileTestPattern(String patternString) { 3978 if (!patternString.startsWith("'")) { 3979 return Pattern.compile(patternString); 3980 } 3981 int break1 = patternString.lastIndexOf("'"); 3982 String flagString = patternString.substring( 3983 break1+1, patternString.length()); 3984 patternString = patternString.substring(1, break1); 3985 3986 if (flagString.equals("i")) 3987 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3988 3989 if (flagString.equals("m")) 3990 return Pattern.compile(patternString, Pattern.MULTILINE); 3991 3992 return Pattern.compile(patternString); 3993 } 3994 3995 /** 3996 * Reads a line from the input file. Keeps reading lines until a non 3997 * empty non comment line is read. If the line contains a \n then 3998 * these two characters are replaced by a newline char. If a \\uxxxx 3999 * sequence is read then the sequence is replaced by the unicode char. 4000 */ grabLine(BufferedReader r)4001 private static String grabLine(BufferedReader r) throws Exception { 4002 int index = 0; 4003 String line = r.readLine(); 4004 while (line.startsWith("//") || line.length() < 1) 4005 line = r.readLine(); 4006 while ((index = line.indexOf("\\n")) != -1) { 4007 StringBuffer temp = new StringBuffer(line); 4008 temp.replace(index, index+2, "\n"); 4009 line = temp.toString(); 4010 } 4011 while ((index = line.indexOf("\\u")) != -1) { 4012 StringBuffer temp = new StringBuffer(line); 4013 String value = temp.substring(index+2, index+6); 4014 char aChar = (char)Integer.parseInt(value, 16); 4015 String unicodeChar = "" + aChar; 4016 temp.replace(index, index+6, unicodeChar); 4017 line = temp.toString(); 4018 } 4019 4020 return line; 4021 } 4022 check(Pattern p, String s, String g, String expected)4023 private static void check(Pattern p, String s, String g, String expected) { 4024 Matcher m = p.matcher(s); 4025 m.find(); 4026 if (!m.group(g).equals(expected) || 4027 s.charAt(m.start(g)) != expected.charAt(0) || 4028 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4029 failCount++; 4030 } 4031 checkReplaceFirst(String p, String s, String r, String expected)4032 private static void checkReplaceFirst(String p, String s, String r, String expected) 4033 { 4034 if (!expected.equals(Pattern.compile(p) 4035 .matcher(s) 4036 .replaceFirst(r))) 4037 failCount++; 4038 } 4039 checkReplaceAll(String p, String s, String r, String expected)4040 private static void checkReplaceAll(String p, String s, String r, String expected) 4041 { 4042 if (!expected.equals(Pattern.compile(p) 4043 .matcher(s) 4044 .replaceAll(r))) 4045 failCount++; 4046 } 4047 checkExpectedFail(String p)4048 private static void checkExpectedFail(String p) { 4049 try { 4050 Pattern.compile(p); 4051 } catch (PatternSyntaxException pse) { 4052 //pse.printStackTrace(); 4053 return; 4054 } 4055 failCount++; 4056 } 4057 checkExpectedIAE(Matcher m, String g)4058 private static void checkExpectedIAE(Matcher m, String g) { 4059 m.find(); 4060 try { 4061 m.group(g); 4062 } catch (IllegalArgumentException x) { 4063 //iae.printStackTrace(); 4064 try { 4065 m.start(g); 4066 } catch (IllegalArgumentException xx) { 4067 try { 4068 m.start(g); 4069 } catch (IllegalArgumentException xxx) { 4070 return; 4071 } 4072 } 4073 } 4074 failCount++; 4075 } 4076 checkExpectedNPE(Matcher m)4077 private static void checkExpectedNPE(Matcher m) { 4078 m.find(); 4079 try { 4080 m.group(null); 4081 } catch (NullPointerException x) { 4082 try { 4083 m.start(null); 4084 } catch (NullPointerException xx) { 4085 try { 4086 m.end(null); 4087 } catch (NullPointerException xxx) { 4088 return; 4089 } 4090 } 4091 } 4092 failCount++; 4093 } 4094 namedGroupCaptureTest()4095 private static void namedGroupCaptureTest() throws Exception { 4096 check(Pattern.compile("x+(?<gname>y+)z+"), 4097 "xxxyyyzzz", 4098 "gname", 4099 "yyy"); 4100 4101 check(Pattern.compile("x+(?<gname8>y+)z+"), 4102 "xxxyyyzzz", 4103 "gname8", 4104 "yyy"); 4105 4106 //backref 4107 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4108 check(pattern, "zzzaabcazzz", true); // found "abca" 4109 4110 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4111 "zzzaabcaazzz", true); 4112 4113 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4114 "abcdefabc", true); 4115 4116 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4117 "abcdefghijkk", true); 4118 4119 // Supplementary character tests 4120 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4121 toSupplementaries("zzzaabcazzz"), true); 4122 4123 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4124 toSupplementaries("zzzaabcaazzz"), true); 4125 4126 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4127 toSupplementaries("abcdefabc"), true); 4128 4129 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4130 "(?<gname>" + 4131 toSupplementaries("k)") + "\\k<gname>"), 4132 toSupplementaries("abcdefghijkk"), true); 4133 4134 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4135 "xxxyyyzzzyyy", 4136 "gname", 4137 "yyy"); 4138 4139 //replaceFirst/All 4140 checkReplaceFirst("(?<gn>ab)(c*)", 4141 "abccczzzabcczzzabccc", 4142 "${gn}", 4143 "abzzzabcczzzabccc"); 4144 4145 checkReplaceAll("(?<gn>ab)(c*)", 4146 "abccczzzabcczzzabccc", 4147 "${gn}", 4148 "abzzzabzzzab"); 4149 4150 4151 checkReplaceFirst("(?<gn>ab)(c*)", 4152 "zzzabccczzzabcczzzabccczzz", 4153 "${gn}", 4154 "zzzabzzzabcczzzabccczzz"); 4155 4156 checkReplaceAll("(?<gn>ab)(c*)", 4157 "zzzabccczzzabcczzzabccczzz", 4158 "${gn}", 4159 "zzzabzzzabzzzabzzz"); 4160 4161 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4162 "zzzabccczzzabcczzzabccczzz", 4163 "${gn2}", 4164 "zzzccczzzabcczzzabccczzz"); 4165 4166 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4167 "zzzabccczzzabcczzzabccczzz", 4168 "${gn2}", 4169 "zzzccczzzcczzzccczzz"); 4170 4171 //toSupplementaries("(ab)(c*)")); 4172 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4173 ")(?<gn2>" + toSupplementaries("c") + "*)", 4174 toSupplementaries("abccczzzabcczzzabccc"), 4175 "${gn1}", 4176 toSupplementaries("abzzzabcczzzabccc")); 4177 4178 4179 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4180 ")(?<gn2>" + toSupplementaries("c") + "*)", 4181 toSupplementaries("abccczzzabcczzzabccc"), 4182 "${gn1}", 4183 toSupplementaries("abzzzabzzzab")); 4184 4185 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4186 ")(?<gn2>" + toSupplementaries("c") + "*)", 4187 toSupplementaries("abccczzzabcczzzabccc"), 4188 "${gn2}", 4189 toSupplementaries("ccczzzabcczzzabccc")); 4190 4191 4192 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4193 ")(?<gn2>" + toSupplementaries("c") + "*)", 4194 toSupplementaries("abccczzzabcczzzabccc"), 4195 "${gn2}", 4196 toSupplementaries("ccczzzcczzzccc")); 4197 4198 checkReplaceFirst("(?<dog>Dog)AndCat", 4199 "zzzDogAndCatzzzDogAndCatzzz", 4200 "${dog}", 4201 "zzzDogzzzDogAndCatzzz"); 4202 4203 4204 checkReplaceAll("(?<dog>Dog)AndCat", 4205 "zzzDogAndCatzzzDogAndCatzzz", 4206 "${dog}", 4207 "zzzDogzzzDogzzz"); 4208 4209 // backref in Matcher & String 4210 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4211 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4212 failCount++; 4213 4214 // negative 4215 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4216 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4217 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4218 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4219 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4220 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4221 "gnameX"); 4222 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4223 report("NamedGroupCapture"); 4224 } 4225 4226 // This is for bug 6919132 nonBmpClassComplementTest()4227 private static void nonBmpClassComplementTest() throws Exception { 4228 Pattern p = Pattern.compile("\\P{Lu}"); 4229 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4230 4231 if (m.find() && m.start() == 1) 4232 failCount++; 4233 4234 // from a unicode category 4235 p = Pattern.compile("\\P{Lu}"); 4236 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4237 if (m.find()) 4238 failCount++; 4239 if (!m.hitEnd()) 4240 failCount++; 4241 4242 // block 4243 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4244 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4245 if (m.find() && m.start() == 1) 4246 failCount++; 4247 4248 p = Pattern.compile("\\P{sc=GRANTHA}"); 4249 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4250 if (m.find() && m.start() == 1) 4251 failCount++; 4252 4253 report("NonBmpClassComplement"); 4254 } 4255 unicodePropertiesTest()4256 private static void unicodePropertiesTest() throws Exception { 4257 // different forms 4258 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4259 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4260 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4261 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4262 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4263 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4264 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4265 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4266 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4267 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4268 failCount++; 4269 4270 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4271 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4272 Matcher lastSM = common; 4273 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4274 4275 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4276 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4277 Matcher lastBM = latin; 4278 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4279 4280 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4281 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4282 continue; // only pick couple code points, they are the same 4283 } 4284 4285 // Unicode Script 4286 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4287 Matcher m; 4288 String str = new String(Character.toChars(cp)); 4289 if (script == lastScript) { 4290 m = lastSM; 4291 m.reset(str); 4292 } else { 4293 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4294 } 4295 if (!m.matches()) { 4296 failCount++; 4297 } 4298 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4299 other.reset(str); 4300 if (other.matches()) { 4301 failCount++; 4302 } 4303 lastSM = m; 4304 lastScript = script; 4305 4306 // Unicode Block 4307 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4308 if (block == null) { 4309 //System.out.printf("Not a Block: cp=%x%n", cp); 4310 continue; 4311 } 4312 if (block == lastBlock) { 4313 m = lastBM; 4314 m.reset(str); 4315 } else { 4316 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4317 } 4318 if (!m.matches()) { 4319 failCount++; 4320 } 4321 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4322 other.reset(str); 4323 if (other.matches()) { 4324 failCount++; 4325 } 4326 lastBM = m; 4327 lastBlock = block; 4328 } 4329 report("unicodeProperties"); 4330 } 4331 unicodeHexNotationTest()4332 private static void unicodeHexNotationTest() throws Exception { 4333 4334 // negative 4335 checkExpectedFail("\\x{-23}"); 4336 checkExpectedFail("\\x{110000}"); 4337 checkExpectedFail("\\x{}"); 4338 checkExpectedFail("\\x{AB[ef]"); 4339 4340 // codepoint 4341 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4342 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4343 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4344 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4345 4346 // in class 4347 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4348 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4349 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4350 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4351 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4352 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4353 4354 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4355 String s = "A" + new String(Character.toChars(cp)) + "B"; 4356 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4357 : String.format("\\u%04x\\u%04x", 4358 (int) Character.toChars(cp)[0], 4359 (int) Character.toChars(cp)[1]); 4360 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4361 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4362 failCount++; 4363 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4364 failCount++; 4365 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4366 failCount++; 4367 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4368 failCount++; 4369 } 4370 report("unicodeHexNotation"); 4371 } 4372 unicodeClassesTest()4373 private static void unicodeClassesTest() throws Exception { 4374 4375 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4376 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4377 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4378 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4379 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4380 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4381 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4382 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4383 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4384 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4385 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4386 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4387 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4388 Matcher bound = Pattern.compile("\\b").matcher(""); 4389 Matcher word = Pattern.compile("\\w++").matcher(""); 4390 // UNICODE_CHARACTER_CLASS 4391 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4394 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4395 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4397 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4398 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4399 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4400 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4401 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4402 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4403 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4404 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4405 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4406 // embedded flag (?U) 4407 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4408 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4409 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4410 4411 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4412 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4413 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4414 // properties 4415 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4416 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4417 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4418 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4419 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4420 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4421 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4422 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4423 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4424 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4425 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4426 // javaMethod 4427 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4428 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4429 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4430 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4431 // GC/C 4432 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4433 4434 for (int cp = 1; cp < 0x30000; cp++) { 4435 String str = new String(Character.toChars(cp)); 4436 int type = Character.getType(cp); 4437 if (// lower 4438 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4439 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4440 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4441 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4442 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4443 // upper 4444 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4445 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4446 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4447 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4448 // alpha 4449 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4450 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4451 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4452 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4453 // digit 4454 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4455 Character.isDigit(cp) != digitU.reset(str).matches() || 4456 // alnum 4457 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4458 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4459 // punct 4460 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4461 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4462 // graph 4463 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4464 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4465 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4466 // blank 4467 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4468 != blank.reset(str).matches() || 4469 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4470 // print 4471 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4472 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4473 // cntrl 4474 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4475 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4476 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4477 // hexdigit 4478 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4479 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4480 // space 4481 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4482 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4483 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4484 // word 4485 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4486 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4487 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4488 // bwordb 4489 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4490 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4491 // properties 4492 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4493 Character.isLetter(cp) != letterP.reset(str).matches()|| 4494 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4495 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4496 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4497 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4498 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4499 // gc_C 4500 (Character.CONTROL == type || Character.FORMAT == type || 4501 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4502 Character.UNASSIGNED == type) 4503 != gcC.reset(str).matches()) { 4504 failCount++; 4505 } 4506 } 4507 4508 // bounds/word align 4509 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4510 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4511 failCount++; 4512 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4513 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4514 failCount++; 4515 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4516 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4517 failCount++; 4518 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4519 failCount++; 4520 report("unicodePredefinedClasses"); 4521 } 4522 unicodeCharacterNameTest()4523 private static void unicodeCharacterNameTest() throws Exception { 4524 4525 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4526 if (!Character.isValidCodePoint(cp) || 4527 Character.getType(cp) == Character.UNASSIGNED) 4528 continue; 4529 String str = new String(Character.toChars(cp)); 4530 // single 4531 String p = "\\N{" + Character.getName(cp) + "}"; 4532 if (!Pattern.compile(p).matcher(str).matches()) { 4533 failCount++; 4534 } 4535 // class[c] 4536 p = "[\\N{" + Character.getName(cp) + "}]"; 4537 if (!Pattern.compile(p).matcher(str).matches()) { 4538 failCount++; 4539 } 4540 } 4541 4542 // range 4543 for (int i = 0; i < 10; i++) { 4544 int start = generator.nextInt(20); 4545 int end = start + generator.nextInt(200); 4546 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4547 String str; 4548 for (int cp = start; cp < end; cp++) { 4549 str = new String(Character.toChars(cp)); 4550 if (!Pattern.compile(p).matcher(str).matches()) { 4551 failCount++; 4552 } 4553 } 4554 str = new String(Character.toChars(end + 10)); 4555 if (Pattern.compile(p).matcher(str).matches()) { 4556 failCount++; 4557 } 4558 } 4559 4560 // slice 4561 for (int i = 0; i < 10; i++) { 4562 int n = generator.nextInt(256); 4563 int[] buf = new int[n]; 4564 StringBuffer sb = new StringBuffer(1024); 4565 for (int j = 0; j < n; j++) { 4566 int cp = generator.nextInt(1000); 4567 if (!Character.isValidCodePoint(cp) || 4568 Character.getType(cp) == Character.UNASSIGNED) 4569 cp = 0x4e00; // just use 4e00 4570 sb.append("\\N{" + Character.getName(cp) + "}"); 4571 buf[j] = cp; 4572 } 4573 String p = sb.toString(); 4574 String str = new String(buf, 0, buf.length); 4575 if (!Pattern.compile(p).matcher(str).matches()) { 4576 failCount++; 4577 } 4578 } 4579 report("unicodeCharacterName"); 4580 } 4581 horizontalAndVerticalWSTest()4582 private static void horizontalAndVerticalWSTest() throws Exception { 4583 String hws = new String (new char[] { 4584 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4585 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4586 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4587 0x202f, 0x205f, 0x3000 }); 4588 String vws = new String (new char[] { 4589 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4590 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4591 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4592 failCount++; 4593 if (Pattern.compile("\\H").matcher(hws).find() || 4594 Pattern.compile("[\\H]").matcher(hws).find()) 4595 failCount++; 4596 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4597 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4598 failCount++; 4599 if (Pattern.compile("\\V").matcher(vws).find() || 4600 Pattern.compile("[\\V]").matcher(vws).find()) 4601 failCount++; 4602 String prefix = "abcd"; 4603 String suffix = "efgh"; 4604 String ng = "A"; 4605 for (int i = 0; i < hws.length(); i++) { 4606 String c = String.valueOf(hws.charAt(i)); 4607 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4608 if (!m.find() || !c.equals(m.group())) 4609 failCount++; 4610 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4611 if (!m.find() || !c.equals(m.group())) 4612 failCount++; 4613 4614 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4615 if (!m.find() || !ng.equals(m.group())) 4616 failCount++; 4617 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4618 if (!m.find() || !ng.equals(m.group())) 4619 failCount++; 4620 } 4621 for (int i = 0; i < vws.length(); i++) { 4622 String c = String.valueOf(vws.charAt(i)); 4623 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4624 if (!m.find() || !c.equals(m.group())) 4625 failCount++; 4626 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4627 if (!m.find() || !c.equals(m.group())) 4628 failCount++; 4629 4630 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4631 if (!m.find() || !ng.equals(m.group())) 4632 failCount++; 4633 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4634 if (!m.find() || !ng.equals(m.group())) 4635 failCount++; 4636 } 4637 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4638 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4639 failCount++; 4640 report("horizontalAndVerticalWSTest"); 4641 } 4642 linebreakTest()4643 private static void linebreakTest() throws Exception { 4644 String linebreaks = new String (new char[] { 4645 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4646 String crnl = "\r\n"; 4647 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4648 Pattern.compile("\\R").matcher(crnl).matches() && 4649 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4650 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4651 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4652 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4653 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4654 failCount++; 4655 } 4656 report("linebreakTest"); 4657 } 4658 4659 // #7189363 branchTest()4660 private static void branchTest() throws Exception { 4661 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4662 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4663 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4664 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4665 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4666 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4667 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4668 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4669 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4670 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4671 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4672 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4673 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4674 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4675 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4676 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4677 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4678 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4679 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4680 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4681 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4682 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4683 failCount++; 4684 report("branchTest"); 4685 } 4686 4687 // This test is for 8007395 groupCurlyNotFoundSuppTest()4688 private static void groupCurlyNotFoundSuppTest() throws Exception { 4689 String input = "test this as \ud83d\ude0d"; 4690 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4691 "test(.)*(@[a-zA-Z.]+)", 4692 "test([^B])+(@[a-zA-Z.]+)", 4693 "test([^B])*(@[a-zA-Z.]+)", 4694 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4695 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4696 }) { 4697 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4698 .matcher(input); 4699 try { 4700 if (m.find()) { 4701 failCount++; 4702 } 4703 } catch (Exception x) { 4704 failCount++; 4705 } 4706 } 4707 report("GroupCurly NotFoundSupp"); 4708 } 4709 4710 // This test is for 8023647 groupCurlyBackoffTest()4711 private static void groupCurlyBackoffTest() throws Exception { 4712 if (!"abc1c".matches("(\\w)+1\\1") || 4713 "abc11".matches("(\\w)+1\\1")) { 4714 failCount++; 4715 } 4716 report("GroupCurly backoff"); 4717 } 4718 4719 // This test is for 8012646 patternAsPredicate()4720 private static void patternAsPredicate() throws Exception { 4721 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4722 4723 if (p.test("")) { 4724 failCount++; 4725 } 4726 if (!p.test("word")) { 4727 failCount++; 4728 } 4729 if (p.test("1234")) { 4730 failCount++; 4731 } 4732 if (!p.test("word1234")) { 4733 failCount++; 4734 } 4735 report("Pattern.asPredicate"); 4736 } 4737 4738 // This test is for 8184692 patternAsMatchPredicate()4739 private static void patternAsMatchPredicate() throws Exception { 4740 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4741 4742 if (p.test("")) { 4743 failCount++; 4744 } 4745 if (!p.test("word")) { 4746 failCount++; 4747 } 4748 if (p.test("1234word")) { 4749 failCount++; 4750 } 4751 if (p.test("1234")) { 4752 failCount++; 4753 } 4754 report("Pattern.asMatchPredicate"); 4755 } 4756 4757 4758 // This test is for 8035975 invalidFlags()4759 private static void invalidFlags() throws Exception { 4760 for (int flag = 1; flag != 0; flag <<= 1) { 4761 switch (flag) { 4762 case Pattern.CASE_INSENSITIVE: 4763 case Pattern.MULTILINE: 4764 case Pattern.DOTALL: 4765 case Pattern.UNICODE_CASE: 4766 case Pattern.CANON_EQ: 4767 case Pattern.UNIX_LINES: 4768 case Pattern.LITERAL: 4769 case Pattern.UNICODE_CHARACTER_CLASS: 4770 case Pattern.COMMENTS: 4771 // valid flag, continue 4772 break; 4773 default: 4774 try { 4775 Pattern.compile(".", flag); 4776 failCount++; 4777 } catch (IllegalArgumentException expected) { 4778 } 4779 } 4780 } 4781 report("Invalid compile flags"); 4782 } 4783 4784 // This test is for 8158482 embeddedFlags()4785 private static void embeddedFlags() throws Exception { 4786 try { 4787 Pattern.compile("(?i).(?-i)."); 4788 Pattern.compile("(?m).(?-m)."); 4789 Pattern.compile("(?s).(?-s)."); 4790 Pattern.compile("(?d).(?-d)."); 4791 Pattern.compile("(?u).(?-u)."); 4792 Pattern.compile("(?c).(?-c)."); 4793 Pattern.compile("(?x).(?-x)."); 4794 Pattern.compile("(?U).(?-U)."); 4795 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4796 } catch (PatternSyntaxException x) { 4797 failCount++; 4798 } 4799 report("Embedded flags"); 4800 } 4801 grapheme()4802 private static void grapheme() throws Exception { 4803 final int[] lineNumber = new int[1]; 4804 Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), 4805 Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt"))) 4806 .forEach( ln -> { 4807 lineNumber[0]++; 4808 if (ln.length() == 0 || ln.startsWith("#")) { 4809 return; 4810 } 4811 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4812 // System.out.println(str); 4813 String[] strs = ln.split("\u00f7|\u00d7"); 4814 StringBuilder src = new StringBuilder(); 4815 ArrayList<String> graphemes = new ArrayList<>(); 4816 StringBuilder buf = new StringBuilder(); 4817 int offBk = 0; 4818 for (String str : strs) { 4819 if (str.length() == 0) // first empty str 4820 continue; 4821 int cp = Integer.parseInt(str, 16); 4822 src.appendCodePoint(cp); 4823 buf.appendCodePoint(cp); 4824 offBk += (str.length() + 1); 4825 if (ln.charAt(offBk) == '\u00f7') { // DIV 4826 graphemes.add(buf.toString()); 4827 buf = new StringBuilder(); 4828 } 4829 } 4830 Pattern p = Pattern.compile("\\X"); 4831 // (1) test \X directly 4832 Matcher m = p.matcher(src.toString()); 4833 for (String g : graphemes) { 4834 // System.out.printf(" grapheme:=[%s]%n", g); 4835 String group = null; 4836 if (!m.find() || !(group = m.group()).equals(g)) { 4837 System.out.println("Failed pattern \\X [" + ln + "] : " 4838 + "expected: " + g + " - actual: " + group 4839 + "(line " + lineNumber[0] + ")"); 4840 failCount++; 4841 } 4842 } 4843 if (m.find()) { 4844 failCount++; 4845 } 4846 // test \b{g} without \X via Pattern 4847 Pattern pbg = Pattern.compile("\\b{g}"); 4848 m = pbg.matcher(src.toString()); 4849 m.find(); 4850 int prev = m.end(); 4851 for (String g : graphemes) { 4852 String group = null; 4853 if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) { 4854 System.out.println("Failed pattern \\b{g} [" + ln + "] : " 4855 + "expected: " + g + " - actual: " + group 4856 + "(line " + lineNumber[0] + ")"); 4857 failCount++; 4858 } 4859 if (!"".equals(m.group())) { 4860 failCount++; 4861 } 4862 prev = m.end(); 4863 } 4864 if (m.find()) { 4865 failCount++; 4866 } 4867 // (2) test \b{g} + \X via Scanner 4868 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4869 for (String g : graphemes) { 4870 String next = null; 4871 if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) { 4872 System.out.println("Failed \\b{g} [" + ln + "] : " 4873 + "expected: " + g + " - actual: " + next 4874 + " (line " + lineNumber[0] + ")"); 4875 failCount++; 4876 } 4877 } 4878 if (s.hasNext(p)) { 4879 failCount++; 4880 } 4881 // test \b{g} without \X via Scanner 4882 s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4883 for (String g : graphemes) { 4884 String next = null; 4885 if (!s.hasNext() || !(next = s.next()).equals(g)) { 4886 System.out.println("Failed \\b{g} [" + ln + "] : " 4887 + "expected: " + g + " - actual: " + next 4888 + " (line " + lineNumber[0] + ")"); 4889 failCount++; 4890 } 4891 } 4892 if (s.hasNext()) { 4893 failCount++; 4894 } 4895 }); 4896 // some sanity checks 4897 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4898 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4899 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4900 failCount++; 4901 // make sure "\b{n}" still works 4902 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4903 failCount++; 4904 report("Unicode extended grapheme cluster"); 4905 } 4906 4907 // hangup/timeout if go into exponential backtracking expoBacktracking()4908 private static void expoBacktracking() throws Exception { 4909 4910 Object[][] patternMatchers = { 4911 // 6328855 4912 { "(.*\n*)*", 4913 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4914 false }, 4915 // 6192895 4916 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4917 "Hello World this is a test this is a test this is a test A", 4918 true }, 4919 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4920 "Hello World this is a test this is a test this is a test \u4e00 ", 4921 false }, 4922 { " *([a-z0-9]+ *)+", 4923 "hello world this is a test this is a test this is a test A", 4924 false }, 4925 // 4771934 [FIXED] #5013651? 4926 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4927 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4928 true }, 4929 // 4866249 [FIXED] 4930 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4931 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4932 true }, 4933 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4934 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4935 false }, 4936 // 6345469 4937 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4938 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4939 true }, // --> matched 4940 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4941 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4942 false }, 4943 // 5026912 4944 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4945 "156580451111112225588087755221111111566969655555555", 4946 false}, 4947 // 6988218 4948 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4949 "'%)) order by ANGEBOT.ID", 4950 false}, // find 4951 // 6693451 4952 { "^(\\s*foo\\s*)*$", 4953 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4954 true }, 4955 { "^(\\s*foo\\s*)*$", 4956 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4957 false 4958 }, 4959 // 7006761 4960 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4961 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4962 // 8140212 4963 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4964 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4965 false 4966 }, 4967 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4968 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4969 4970 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4971 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4972 4973 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4974 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4975 4976 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4977 4978 /* not fixed 4979 //8132141 ---> second level exponential backtracking 4980 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4981 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4982 */ 4983 }; 4984 4985 for (Object[] pm : patternMatchers) { 4986 String p = (String)pm[0]; 4987 String s = (String)pm[1]; 4988 boolean r = (Boolean)pm[2]; 4989 if (r != Pattern.compile(p).matcher(s).matches()) { 4990 failCount++; 4991 } 4992 } 4993 } 4994 invalidGroupName()4995 private static void invalidGroupName() { 4996 // Invalid start of a group name 4997 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4998 "\u0060", "\u007b", "\u0416")) { 4999 for (String pat : List.of("(?<" + groupName + ">)", 5000 "\\k<" + groupName + ">")) { 5001 try { 5002 Pattern.compile(pat); 5003 failCount++; 5004 } catch (PatternSyntaxException e) { 5005 if (!e.getMessage().startsWith( 5006 "capturing group name does not start with a" 5007 + " Latin letter")) { 5008 failCount++; 5009 } 5010 } 5011 } 5012 } 5013 // Invalid char in a group name 5014 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 5015 "d\u0060", "e\u007b", "f\u0416")) { 5016 for (String pat : List.of("(?<" + groupName + ">)", 5017 "\\k<" + groupName + ">")) { 5018 try { 5019 Pattern.compile(pat); 5020 failCount++; 5021 } catch (PatternSyntaxException e) { 5022 if (!e.getMessage().startsWith( 5023 "named capturing group is missing trailing '>'")) { 5024 failCount++; 5025 } 5026 } 5027 } 5028 } 5029 report("Invalid capturing group names"); 5030 } 5031 illegalRepetitionRange()5032 private static void illegalRepetitionRange() { 5033 // huge integers > (2^31 - 1) 5034 String n = BigInteger.valueOf(1L << 32) 5035 .toString(); 5036 String m = BigInteger.valueOf(1L << 31) 5037 .add(new BigInteger(80, generator)) 5038 .toString(); 5039 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 5040 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 5041 String pat = ".{" + rep + "}"; 5042 try { 5043 Pattern.compile(pat); 5044 failCount++; 5045 System.out.println("Expected to fail. Pattern: " + pat); 5046 } catch (PatternSyntaxException e) { 5047 if (!e.getMessage().startsWith("Illegal repetition")) { 5048 failCount++; 5049 System.out.println("Unexpected error message: " + e.getMessage()); 5050 } 5051 } catch (Throwable t) { 5052 failCount++; 5053 System.out.println("Unexpected exception: " + t); 5054 } 5055 } 5056 report("illegalRepetitionRange"); 5057 } 5058 surrogatePairWithCanonEq()5059 private static void surrogatePairWithCanonEq() { 5060 try { 5061 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 5062 } catch (Throwable t) { 5063 failCount++; 5064 System.out.println("Unexpected exception: " + t); 5065 } 5066 report("surrogatePairWithCanonEq"); 5067 } 5068 s2x(String s)5069 private static String s2x(String s) { 5070 StringBuilder sb = new StringBuilder(); 5071 for (char ch : s.toCharArray()) { 5072 sb.append(String.format("\\u%04x", (int)ch)); 5073 } 5074 return sb.toString(); 5075 } 5076 5077 // This test is for 8235812, with cases excluded by 8258259 lineBreakWithQuantifier()5078 private static void lineBreakWithQuantifier() { 5079 // key: pattern 5080 // value: lengths of input that must match the pattern 5081 Map<String, List<Integer>> cases = Map.ofEntries( 5082 Map.entry("\\R?", List.of(0, 1)), 5083 Map.entry("\\R*", List.of(0, 1, 2, 3)), 5084 Map.entry("\\R+", List.of(1, 2, 3)), 5085 Map.entry("\\R{0}", List.of(0)), 5086 Map.entry("\\R{1}", List.of(1)), 5087 // Map.entry("\\R{2}", List.of(2)), // 8258259 5088 // Map.entry("\\R{3}", List.of(3)), // 8258259 5089 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)), 5090 Map.entry("\\R{1,}", List.of(1, 2, 3)), 5091 // Map.entry("\\R{2,}", List.of(2, 3)), // 8258259 5092 // Map.entry("\\R{3,}", List.of(3)), // 8258259 5093 Map.entry("\\R{0,0}", List.of(0)), 5094 Map.entry("\\R{0,1}", List.of(0, 1)), 5095 Map.entry("\\R{0,2}", List.of(0, 1, 2)), 5096 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)), 5097 Map.entry("\\R{1,1}", List.of(1)), 5098 Map.entry("\\R{1,2}", List.of(1, 2)), 5099 Map.entry("\\R{1,3}", List.of(1, 2, 3)), 5100 // Map.entry("\\R{2,2}", List.of(2)), // 8258259 5101 // Map.entry("\\R{2,3}", List.of(2, 3)), // 8258259 5102 // Map.entry("\\R{3,3}", List.of(3)), // 8258259 5103 Map.entry("\\R", List.of(1)), 5104 Map.entry("\\R\\R", List.of(2)), 5105 Map.entry("\\R\\R\\R", List.of(3)) 5106 ); 5107 5108 // key: length of input 5109 // value: all possible inputs of given length 5110 Map<Integer, List<String>> inputs = new HashMap<>(); 5111 String[] Rs = { "\r\n", "\r", "\n", 5112 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" }; 5113 StringBuilder sb = new StringBuilder(); 5114 for (int len = 0; len <= 3; ++len) { 5115 int[] idx = new int[len + 1]; 5116 do { 5117 sb.setLength(0); 5118 for (int j = 0; j < len; ++j) 5119 sb.append(Rs[idx[j]]); 5120 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString()); 5121 idx[0]++; 5122 for (int j = 0; j < len; ++j) { 5123 if (idx[j] < Rs.length) 5124 break; 5125 idx[j] = 0; 5126 idx[j+1]++; 5127 } 5128 } while (idx[len] == 0); 5129 } 5130 5131 // exhaustive testing 5132 for (String patStr : cases.keySet()) { 5133 Pattern[] pats = patStr.endsWith("R") 5134 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers 5135 : new Pattern[] { Pattern.compile(patStr), // greedy 5136 Pattern.compile(patStr + "?") }; // reluctant 5137 Matcher m = pats[0].matcher(""); 5138 for (Pattern p : pats) { 5139 m.usePattern(p); 5140 for (int len : cases.get(patStr)) { 5141 for (String in : inputs.get(len)) { 5142 if (!m.reset(in).matches()) { 5143 failCount++; 5144 System.err.println("Expected to match '" + 5145 s2x(in) + "' =~ /" + p + "/"); 5146 } 5147 } 5148 } 5149 } 5150 } 5151 report("lineBreakWithQuantifier"); 5152 } 5153 5154 // This test is for 8214245 caseInsensitivePMatch()5155 private static void caseInsensitivePMatch() { 5156 for (String input : List.of("abcd", "AbCd", "ABCD")) { 5157 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", 5158 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", 5159 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", 5160 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", 5161 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", 5162 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", 5163 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", 5164 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", 5165 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", 5166 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", 5167 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", 5168 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", 5169 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", 5170 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", 5171 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", 5172 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", 5173 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", 5174 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) 5175 { 5176 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) 5177 .matcher(input) 5178 .matches()) 5179 { 5180 failCount++; 5181 System.err.println("Expected to match: " + 5182 "'" + input + "' =~ /" + pattern + "/"); 5183 } 5184 } 5185 } 5186 5187 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { 5188 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", 5189 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", 5190 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", 5191 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", 5192 "\\p{general_category=Ll}", "\\p{IsLowercase}", 5193 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", 5194 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", 5195 "\\p{IsUppercase}", "\\p{javaUpperCase}", 5196 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", 5197 "\\p{general_category=Lt}", "\\p{IsTitlecase}", 5198 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", 5199 "[\\p{IsLl}]", "[\\p{gc=Ll}]", 5200 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", 5201 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", 5202 "[\\p{IsLu}]", "[\\p{gc=Lu}]", 5203 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", 5204 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", 5205 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", 5206 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) 5207 { 5208 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE 5209 | Pattern.UNICODE_CHARACTER_CLASS) 5210 .matcher(input) 5211 .matches()) 5212 { 5213 failCount++; 5214 System.err.println("Expected to match: " + 5215 "'" + input + "' =~ /" + pattern + "/"); 5216 } 5217 } 5218 } 5219 report("caseInsensitivePMatch"); 5220 } 5221 5222 // This test is for 8237599 surrogatePairOverlapRegion()5223 private static void surrogatePairOverlapRegion() { 5224 String input = "\ud801\udc37"; 5225 5226 Pattern p = Pattern.compile(".+"); 5227 Matcher m = p.matcher(input); 5228 m.region(0, 1); 5229 5230 boolean ok = m.find(); 5231 if (!ok || !m.group(0).equals(input.substring(0, 1))) 5232 { 5233 failCount++; 5234 System.out.println("Input \"" + input + "\".substr(0, 1)" + 5235 " expected to match pattern \"" + p + "\""); 5236 if (ok) { 5237 System.out.println("group(0): \"" + m.group(0) + "\""); 5238 } 5239 } else if (!m.hitEnd()) { 5240 failCount++; 5241 System.out.println("Expected m.hitEnd() == true"); 5242 } 5243 5244 p = Pattern.compile(".*(.)"); 5245 m = p.matcher(input); 5246 m.region(1, 2); 5247 5248 ok = m.find(); 5249 if (!ok || !m.group(0).equals(input.substring(1, 2)) 5250 || !m.group(1).equals(input.substring(1, 2))) 5251 { 5252 failCount++; 5253 System.out.println("Input \"" + input + "\".substr(1, 2)" + 5254 " expected to match pattern \"" + p + "\""); 5255 if (ok) { 5256 System.out.println("group(0): \"" + m.group(0) + "\""); 5257 System.out.println("group(1): \"" + m.group(1) + "\""); 5258 } 5259 } 5260 report("surrogatePairOverlapRegion"); 5261 } 5262 5263 //This test is for 8037397 droppedClassesWithIntersection()5264 private static void droppedClassesWithIntersection() { 5265 String rx = "[A-Z&&[A-Z]0-9]"; 5266 String ry = "[A-Z&&[A-F][G-Z]0-9]"; 5267 5268 Stream<Character> letterChars = IntStream.range('A', 'Z').mapToObj((i) -> (char) i); 5269 Stream<Character> digitChars = IntStream.range('0', '9').mapToObj((i) -> (char) i); 5270 5271 boolean letterCharsMatch = letterChars.allMatch((ch) -> { 5272 String chString = ch.toString(); 5273 return chString.matches(rx) && chString.matches(ry); 5274 }); 5275 5276 boolean digitCharsDontMatch = digitChars.noneMatch((ch) -> { 5277 String chString = ch.toString(); 5278 return chString.matches(rx) && chString.matches(ry); 5279 }); 5280 5281 5282 if (!letterCharsMatch) { 5283 failCount++; 5284 System.out.println("Compiling intersection pattern is dropping a character class in its matcher"); 5285 } 5286 5287 if (!digitCharsDontMatch) { 5288 failCount++; 5289 System.out.println("Compiling intersection pattern is matching digits where it should not"); 5290 } 5291 5292 } 5293 } 5294