1 /*
2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
39 * 8216332 8214245 8237599 8241055 8247546 8258259
40 *
41 * @library /test/lib
42 * @library /lib/testlibrary/java/lang
43 * @build jdk.test.lib.RandomFactory
44 * @run main RegExTest
45 * @key randomness
46 */
47
48 import java.io.BufferedReader;
49 import java.io.ByteArrayInputStream;
50 import java.io.ByteArrayOutputStream;
51 import java.io.File;
52 import java.io.FileInputStream;
53 import java.io.InputStreamReader;
54 import java.io.ObjectInputStream;
55 import java.io.ObjectOutputStream;
56 import java.math.BigInteger;
makeStreamTestData()57 import java.nio.CharBuffer;
58 import java.nio.file.Files;
59 import java.nio.file.Path;
60 import java.nio.file.Paths;
61 import java.util.ArrayList;
62 import java.util.Arrays;
63 import java.util.HashMap;
64 import java.util.List;
65 import java.util.Map;
66 import java.util.Random;
67 import java.util.Scanner;
68 import java.util.function.Function;
69 import java.util.function.Predicate;
70 import java.util.regex.Matcher;
71 import java.util.regex.MatchResult;
72 import java.util.regex.Pattern;
73 import java.util.regex.PatternSyntaxException;
74 import java.util.stream.Stream;
75
76 import jdk.test.lib.RandomFactory;
77
78 /**
79 * This is a test class created to check the operation of
80 * the Pattern and Matcher classes.
81 */
82 public class RegExTest {
83
84 private static Random generator = RandomFactory.getRandom();
85 private static boolean failure = false;
86 private static int failCount = 0;
87 private static String firstFailure = null;
88
89 /**
90 * Main to interpret arguments and run several tests.
91 *
92 */
93 public static void main(String[] args) throws Exception {
94 // Most of the tests are in a file
95 processFile("TestCases.txt");
96 //processFile("PerlCases.txt");
97 processFile("BMPTestCases.txt");
98 processFile("SupplementaryTestCases.txt");
99
100 // These test many randomly generated char patterns
101 bm();
102 slice();
103
104 // These are hard to put into the file
105 escapes();
106 blankInput();
107
108 // Substitition tests on randomly generated sequences
109 globalSubstitute();
110 stringbufferSubstitute();
111 stringbuilderSubstitute();
112
113 substitutionBasher();
114 substitutionBasher2();
115
116 // Canonical Equivalence
117 ceTest();
118
119 // Anchors
120 anchorTest();
121
122 // boolean match calls
123 matchesTest();
124 lookingAtTest();
125
126 // Pattern API
127 patternMatchesTest();
128
129 // Misc
130 lookbehindTest();
131 nullArgumentTest();
132 backRefTest();
133 groupCaptureTest();
134 caretTest();
135 charClassTest();
testPatternSplitAsStream(String description, String input, Pattern pattern)136 emptyPatternTest();
137 findIntTest();
138 group0Test();
139 longPatternTest();
140 octalTest();
141 ampersandTest();
142 negationTest();
143 splitTest();
144 appendTest();
145 caseFoldingTest();
146 commentsTest();
147 unixLinesTest();
testReplaceFirst(String description, String input, Pattern pattern)148 replaceFirstTest();
149 gTest();
150 zTest();
151 serializeTest();
152 reluctantRepetitionTest();
153 multilineDollarTest();
154 dollarAtEndTest();
155 caretBetweenTerminatorsTest();
testReplaceAll(String description, String input, Pattern pattern)156 // This RFE rejected in Tiger numOccurrencesTest();
157 javaCharClassTest();
158 nonCaptureRepetitionTest();
159 notCapturedGroupCurlyMatchTest();
160 escapedSegmentTest();
161 literalPatternTest();
162 literalReplacementTest();
163 regionTest();
164 toStringTest();
165 negatedCharClassTest();
166 findFromTest();
167 boundsTest();
168 unicodeWordBoundsTest();
169 caretAtEndTest();
170 wordSearchTest();
171 hitEndTest();
172 toMatchResultTest();
173 toMatchResultTest2();
testMatchResults(String description, String input, Pattern pattern)174 surrogatesInClassTest();
175 removeQEQuotingTest();
176 namedGroupCaptureTest();
177 nonBmpClassComplementTest();
178 unicodePropertiesTest();
179 unicodeHexNotationTest();
180 unicodeClassesTest();
181 unicodeCharacterNameTest();
182 horizontalAndVerticalWSTest();
183 linebreakTest();
184 branchTest();
185 groupCurlyNotFoundSuppTest();
186 groupCurlyBackoffTest();
187 patternAsPredicate();
188 patternAsMatchPredicate();
189 invalidFlags();
testLateBinding()190 embeddedFlags();
191 grapheme();
192 expoBacktracking();
193 invalidGroupName();
194 illegalRepetitionRange();
195 surrogatePairWithCanonEq();
196 lineBreakWithQuantifier();
197 caseInsensitivePMatch();
198 surrogatePairOverlapRegion();
199
200 if (failure) {
201 throw new
202 RuntimeException("RegExTest failed, 1st failure: " +
testFailfastMatchResults()203 firstFailure);
204 } else {
205 System.err.println("OKAY: All tests passed.");
206 }
207 }
208
209 // Utility functions
210
211 private static String getRandomAlphaString(int length) {
212 StringBuffer buf = new StringBuffer(length);
213 for (int i=0; i<length; i++) {
214 char randChar = (char)(97 + generator.nextInt(26));
215 buf.append(randChar);
216 }
217 return buf.toString();
218 }
219
220 private static void check(Matcher m, String expected) {
221 m.find();
222 if (!m.group().equals(expected))
223 failCount++;
224 }
225
226 private static void check(Matcher m, String result, boolean expected) {
227 m.find();
228 if (m.group().equals(result) != expected)
229 failCount++;
230 }
231
232 private static void check(Pattern p, String s, boolean expected) {
233 if (p.matcher(s).find() != expected)
234 failCount++;
235 }
236
237 private static void check(String p, String s, boolean expected) {
238 Matcher matcher = Pattern.compile(p).matcher(s);
239 if (matcher.find() != expected)
240 failCount++;
241 }
242
243 private static void check(String p, char c, boolean expected) {
244 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
245 Pattern pattern = Pattern.compile(propertyPattern);
246 char[] ca = new char[1]; ca[0] = c;
247 Matcher matcher = pattern.matcher(new String(ca));
248 if (!matcher.find())
testFailfastReplace()249 failCount++;
250 }
251
252 private static void check(String p, int codePoint, boolean expected) {
253 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
254 Pattern pattern = Pattern.compile(propertyPattern);
255 char[] ca = Character.toChars(codePoint);
256 Matcher matcher = pattern.matcher(new String(ca));
257 if (!matcher.find())
258 failCount++;
259 }
260
261 private static void check(String p, int flag, String input, String s,
262 boolean expected)
263 {
264 Pattern pattern = Pattern.compile(p, flag);
265 Matcher matcher = pattern.matcher(input);
266 if (expected)
267 check(matcher, s, expected);
268 else
269 check(pattern, input, false);
270 }
271
272 private static void report(String testName) {
273 int spacesToAdd = 30 - testName.length();
274 StringBuffer paddedNameBuffer = new StringBuffer(testName);
275 for (int i=0; i<spacesToAdd; i++)
MatchResultHolder(Matcher m)276 paddedNameBuffer.append(" ");
277 String paddedName = paddedNameBuffer.toString();
278 System.err.println(paddedName + ": " +
279 (failCount==0 ? "Passed":"Failed("+failCount+")"));
MatchResultHolder(MatchResult mr)280 if (failCount > 0) {
281 failure = true;
282
283 if (firstFailure == null) {
284 firstFailure = testName;
285 }
286 }
287
288 failCount = 0;
289 }
290
291 /**
292 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
293 * supplementary characters. This method does NOT fully take care
294 * of the regex syntax.
295 */
296 private static String toSupplementaries(String s) {
297 int length = s.length();
298 StringBuffer sb = new StringBuffer(length * 2);
299
300 for (int i = 0; i < length; ) {
301 char c = s.charAt(i++);
302 if (c == '\\') {
303 sb.append(c);
304 if (i < length) {
305 c = s.charAt(i++);
306 sb.append(c);
307 if (c == 'u') {
308 // assume no syntax error
309 sb.append(s.charAt(i++));
310 sb.append(s.charAt(i++));
311 sb.append(s.charAt(i++));
312 sb.append(s.charAt(i++));
313 }
314 }
315 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
316 sb.append('\ud800').append((char)('\udc00'+c));
317 } else {
318 sb.append(c);
equals(Object that)319 }
320 }
321 return sb.toString();
322 }
323
324 // Regular expression tests
325
326 // This is for bug 6178785
hashCode()327 // Test if an expected NPE gets thrown when passing in a null argument
328 private static boolean check(Runnable test) {
329 try {
330 test.run();
331 failCount++;
332 return false;
333 } catch (NullPointerException npe) {
334 return true;
335 }
336 }
337
338 private static void nullArgumentTest() {
339 check(() -> Pattern.compile(null));
340 check(() -> Pattern.matches(null, null));
341 check(() -> Pattern.matches("xyz", null));
342 check(() -> Pattern.quote(null));
343 check(() -> Pattern.compile("xyz").split(null));
344 check(() -> Pattern.compile("xyz").matcher(null));
345
346 final Matcher m = Pattern.compile("xyz").matcher("xyz");
347 m.matches();
348 check(() -> m.appendTail((StringBuffer) null));
349 check(() -> m.appendTail((StringBuilder)null));
350 check(() -> m.replaceAll((String) null));
351 check(() -> m.replaceAll((Function<MatchResult, String>)null));
352 check(() -> m.replaceFirst((String)null));
353 check(() -> m.replaceFirst((Function<MatchResult, String>) null));
354 check(() -> m.appendReplacement((StringBuffer)null, null));
355 check(() -> m.appendReplacement((StringBuilder)null, null));
356 check(() -> m.reset(null));
357 check(() -> Matcher.quoteReplacement(null));
358 //check(() -> m.usePattern(null));
359
360 report("Null Argument");
361 }
362
363 // This is for bug6635133
364 // Test if surrogate pair in Unicode escapes can be handled correctly.
365 private static void surrogatesInClassTest() throws Exception {
366 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
367 Matcher matcher = pattern.matcher("\ud834\udd22");
368 if (!matcher.find())
369 failCount++;
370
371 report("Surrogate pair in Unicode escape");
372 }
373
374 // This is for bug6990617
375 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
376 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
377 // char is an octal digit.
378 private static void removeQEQuotingTest() throws Exception {
379 Pattern pattern =
380 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
381 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
382 if (!matcher.find())
383 failCount++;
384
385 report("Remove Q/E Quoting");
386 }
387
388 // This is for bug 4988891
389 // Test toMatchResult to see that it is a copy of the Matcher
390 // that is not affected by subsequent operations on the original
391 private static void toMatchResultTest() throws Exception {
392 Pattern pattern = Pattern.compile("squid");
393 Matcher matcher = pattern.matcher(
394 "agiantsquidofdestinyasmallsquidoffate");
395 matcher.find();
396 int matcherStart1 = matcher.start();
397 MatchResult mr = matcher.toMatchResult();
398 if (mr == matcher)
399 failCount++;
400 int resultStart1 = mr.start();
401 if (matcherStart1 != resultStart1)
402 failCount++;
403 matcher.find();
404 int matcherStart2 = matcher.start();
405 int resultStart2 = mr.start();
406 if (matcherStart2 == resultStart2)
407 failCount++;
408 if (resultStart1 != resultStart2)
409 failCount++;
410 MatchResult mr2 = matcher.toMatchResult();
411 if (mr == mr2)
412 failCount++;
413 if (mr2.start() != matcherStart2)
414 failCount++;
415 report("toMatchResult is a copy");
416 }
417
418 private static void checkExpectedISE(Runnable test) {
419 try {
420 test.run();
421 failCount++;
422 } catch (IllegalStateException x) {
423 } catch (IndexOutOfBoundsException xx) {
424 failCount++;
425 }
426 }
427
428 private static void checkExpectedIOOE(Runnable test) {
429 try {
430 test.run();
431 failCount++;
432 } catch (IndexOutOfBoundsException x) {}
433 }
434
435 // This is for bug 8074678
436 // Test the result of toMatchResult throws ISE if no match is availble
437 private static void toMatchResultTest2() throws Exception {
438 Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
439 matcher.find();
440 MatchResult mr = matcher.toMatchResult();
441
442 checkExpectedISE(() -> mr.start());
443 checkExpectedISE(() -> mr.start(2));
444 checkExpectedISE(() -> mr.end());
445 checkExpectedISE(() -> mr.end(2));
446 checkExpectedISE(() -> mr.group());
447 checkExpectedISE(() -> mr.group(2));
448
449 matcher = Pattern.compile("(match)").matcher("there is a match");
450 matcher.find();
451 MatchResult mr2 = matcher.toMatchResult();
452 checkExpectedIOOE(() -> mr2.start(2));
453 checkExpectedIOOE(() -> mr2.end(2));
454 checkExpectedIOOE(() -> mr2.group(2));
455
456 report("toMatchResult2 appropriate exceptions");
457 }
458
459 // This is for bug 5013885
460 // Must test a slice to see if it reports hitEnd correctly
461 private static void hitEndTest() throws Exception {
462 // Basic test of Slice node
463 Pattern p = Pattern.compile("^squidattack");
464 Matcher m = p.matcher("squack");
465 m.find();
466 if (m.hitEnd())
467 failCount++;
468 m.reset("squid");
469 m.find();
470 if (!m.hitEnd())
471 failCount++;
472
473 // Test Slice, SliceA and SliceU nodes
474 for (int i=0; i<3; i++) {
475 int flags = 0;
476 if (i==1) flags = Pattern.CASE_INSENSITIVE;
477 if (i==2) flags = Pattern.UNICODE_CASE;
478 p = Pattern.compile("^abc", flags);
479 m = p.matcher("ad");
480 m.find();
481 if (m.hitEnd())
482 failCount++;
483 m.reset("ab");
484 m.find();
485 if (!m.hitEnd())
486 failCount++;
487 }
488
489 // Test Boyer-Moore node
490 p = Pattern.compile("catattack");
491 m = p.matcher("attack");
492 m.find();
493 if (!m.hitEnd())
494 failCount++;
495
496 p = Pattern.compile("catattack");
497 m = p.matcher("attackattackattackcatatta");
498 m.find();
499 if (!m.hitEnd())
500 failCount++;
501
502 // 8184706: Matching u+0d at EOL against \R should hit-end
503 p = Pattern.compile("...\\R");
504 m = p.matcher("cat" + (char)0x0a);
505 m.find();
506 if (m.hitEnd())
507 failCount++;
508
509 m = p.matcher("cat" + (char)0x0d);
510 m.find();
511 if (!m.hitEnd())
512 failCount++;
513
514 m = p.matcher("cat" + (char)0x0d + (char)0x0a);
515 m.find();
516 if (m.hitEnd())
517 failCount++;
518
519 report("hitEnd");
520 }
521
522 // This is for bug 4997476
523 // It is weird code submitted by customer demonstrating a regression
524 private static void wordSearchTest() throws Exception {
525 String testString = new String("word1 word2 word3");
526 Pattern p = Pattern.compile("\\b");
527 Matcher m = p.matcher(testString);
528 int position = 0;
529 int start = 0;
530 while (m.find(position)) {
531 start = m.start();
532 if (start == testString.length())
533 break;
534 if (m.find(start+1)) {
535 position = m.start();
536 } else {
537 position = testString.length();
538 }
539 if (testString.substring(start, position).equals(" "))
540 continue;
541 if (!testString.substring(start, position-1).startsWith("word"))
542 failCount++;
543 }
544 report("Customer word search");
545 }
546
547 // This is for bug 4994840
548 private static void caretAtEndTest() throws Exception {
549 // Problem only occurs with multiline patterns
550 // containing a beginning-of-line caret "^" followed
551 // by an expression that also matches the empty string.
552 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
553 Matcher matcher = pattern.matcher("\r");
554 matcher.find();
555 matcher.find();
556 report("Caret at end");
557 }
558
559 // This test is for 4979006
560 // Check to see if word boundary construct properly handles unicode
561 // non spacing marks
562 private static void unicodeWordBoundsTest() throws Exception {
563 String spaces = " ";
564 String wordChar = "a";
565 String nsm = "\u030a";
566
567 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
568
569 Pattern pattern = Pattern.compile("\\b");
570 Matcher matcher = pattern.matcher("");
571 // S=other B=word character N=non spacing mark .=word boundary
572 // SS.BB.SS
573 String input = spaces + wordChar + wordChar + spaces;
574 twoFindIndexes(input, matcher, 2, 4);
575 // SS.BBN.SS
576 input = spaces + wordChar +wordChar + nsm + spaces;
577 twoFindIndexes(input, matcher, 2, 5);
578 // SS.BN.SS
579 input = spaces + wordChar + nsm + spaces;
580 twoFindIndexes(input, matcher, 2, 4);
581 // SS.BNN.SS
582 input = spaces + wordChar + nsm + nsm + spaces;
583 twoFindIndexes(input, matcher, 2, 5);
584 // SSN.BB.SS
585 input = spaces + nsm + wordChar + wordChar + spaces;
586 twoFindIndexes(input, matcher, 3, 5);
587 // SS.BNB.SS
588 input = spaces + wordChar + nsm + wordChar + spaces;
589 twoFindIndexes(input, matcher, 2, 5);
590 // SSNNSS
591 input = spaces + nsm + nsm + spaces;
592 matcher.reset(input);
593 if (matcher.find())
594 failCount++;
595 // SSN.BBN.SS
596 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
597 twoFindIndexes(input, matcher, 3, 6);
598
599 report("Unicode word boundary");
600 }
601
602 private static void twoFindIndexes(String input, Matcher matcher, int a,
603 int b) throws Exception
604 {
605 matcher.reset(input);
606 matcher.find();
607 if (matcher.start() != a)
608 failCount++;
609 matcher.find();
610 if (matcher.start() != b)
611 failCount++;
612 }
613
614 // This test is for 6284152
615 static void check(String regex, String input, String[] expected) {
616 List<String> result = new ArrayList<String>();
617 Pattern p = Pattern.compile(regex);
618 Matcher m = p.matcher(input);
619 while (m.find()) {
620 result.add(m.group());
621 }
622 if (!Arrays.asList(expected).equals(result))
623 failCount++;
624 }
625
626 private static void lookbehindTest() throws Exception {
627 //Positive
628 check("(?<=%.{0,5})foo\\d",
629 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
630 new String[]{"foo1", "foo2", "foo3"});
631
632 //boundary at end of the lookbehind sub-regex should work consistently
633 //with the boundary just after the lookbehind sub-regex
634 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
635 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
636 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
637 check("(?<!abc \\b)foo", "abc foo", new String[0]);
638
639 //Negative
640 check("(?<!%.{0,5})foo\\d",
641 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
642 new String[] {"foo4", "foo5"});
643
644 //Positive greedy
645 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
646
647 //Positive reluctant
648 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
649
650 //supplementary
651 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
652 new String[] {"fo\ud800\udc00o"});
653 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
654 new String[] {"fo\ud800\udc00o"});
655 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
656 new String[] {"fo\ud800\udc00o"});
657 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
658 new String[] {"fo\ud800\udc00o"});
659 report("Lookbehind");
660 }
661
662 // This test is for 4938995
663 // Check to see if weak region boundaries are transparent to
664 // lookahead and lookbehind constructs
665 private static void boundsTest() throws Exception {
666 String fullMessage = "catdogcat";
667 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
668 Matcher matcher = pattern.matcher("catdogca");
669 matcher.useTransparentBounds(true);
670 if (matcher.find())
671 failCount++;
672 matcher.reset("atdogcat");
673 if (matcher.find())
674 failCount++;
675 matcher.reset(fullMessage);
676 if (!matcher.find())
677 failCount++;
678 matcher.reset(fullMessage);
679 matcher.region(0,9);
680 if (!matcher.find())
681 failCount++;
682 matcher.reset(fullMessage);
683 matcher.region(0,6);
684 if (!matcher.find())
685 failCount++;
686 matcher.reset(fullMessage);
687 matcher.region(3,6);
688 if (!matcher.find())
689 failCount++;
690 matcher.useTransparentBounds(false);
691 if (matcher.find())
692 failCount++;
693
694 // Negative lookahead/lookbehind
695 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
696 matcher = pattern.matcher("dogcat");
697 matcher.useTransparentBounds(true);
698 matcher.region(0,3);
699 if (matcher.find())
700 failCount++;
701 matcher.reset("catdog");
702 matcher.region(3,6);
703 if (matcher.find())
704 failCount++;
705 matcher.useTransparentBounds(false);
706 matcher.reset("dogcat");
707 matcher.region(0,3);
708 if (!matcher.find())
709 failCount++;
710 matcher.reset("catdog");
711 matcher.region(3,6);
712 if (!matcher.find())
713 failCount++;
714
715 report("Region bounds transparency");
716 }
717
718 // This test is for 4945394
719 private static void findFromTest() throws Exception {
720 String message = "This is 40 $0 message.";
721 Pattern pat = Pattern.compile("\\$0");
722 Matcher match = pat.matcher(message);
723 if (!match.find())
724 failCount++;
725 if (match.find())
726 failCount++;
727 if (match.find())
728 failCount++;
729 report("Check for alternating find");
730 }
731
732 // This test is for 4872664 and 4892980
733 private static void negatedCharClassTest() throws Exception {
734 Pattern pattern = Pattern.compile("[^>]");
735 Matcher matcher = pattern.matcher("\u203A");
736 if (!matcher.matches())
737 failCount++;
738 pattern = Pattern.compile("[^fr]");
739 matcher = pattern.matcher("a");
740 if (!matcher.find())
741 failCount++;
742 matcher.reset("\u203A");
743 if (!matcher.find())
744 failCount++;
745 String s = "for";
746 String result[] = s.split("[^fr]");
747 if (!result[0].equals("f"))
748 failCount++;
749 if (!result[1].equals("r"))
750 failCount++;
751 s = "f\u203Ar";
752 result = s.split("[^fr]");
753 if (!result[0].equals("f"))
754 failCount++;
755 if (!result[1].equals("r"))
756 failCount++;
757
758 // Test adding to bits, subtracting a node, then adding to bits again
759 pattern = Pattern.compile("[^f\u203Ar]");
760 matcher = pattern.matcher("a");
761 if (!matcher.find())
762 failCount++;
763 matcher.reset("f");
764 if (matcher.find())
765 failCount++;
766 matcher.reset("\u203A");
767 if (matcher.find())
768 failCount++;
769 matcher.reset("r");
770 if (matcher.find())
771 failCount++;
772 matcher.reset("\u203B");
773 if (!matcher.find())
774 failCount++;
775
776 // Test subtracting a node, adding to bits, subtracting again
777 pattern = Pattern.compile("[^\u203Ar\u203B]");
778 matcher = pattern.matcher("a");
779 if (!matcher.find())
780 failCount++;
781 matcher.reset("\u203A");
782 if (matcher.find())
783 failCount++;
784 matcher.reset("r");
785 if (matcher.find())
786 failCount++;
787 matcher.reset("\u203B");
788 if (matcher.find())
789 failCount++;
790 matcher.reset("\u203C");
791 if (!matcher.find())
792 failCount++;
793
794 report("Negated Character Class");
795 }
796
797 // This test is for 4628291
798 private static void toStringTest() throws Exception {
799 Pattern pattern = Pattern.compile("b+");
800 if (pattern.toString() != "b+")
801 failCount++;
802 Matcher matcher = pattern.matcher("aaabbbccc");
803 String matcherString = matcher.toString(); // unspecified
804 matcher.find();
805 matcherString = matcher.toString(); // unspecified
806 matcher.region(0,3);
807 matcherString = matcher.toString(); // unspecified
808 matcher.reset();
809 matcherString = matcher.toString(); // unspecified
810 report("toString");
811 }
812
813 // This test is for 4808962
814 private static void literalPatternTest() throws Exception {
815 int flags = Pattern.LITERAL;
816
817 Pattern pattern = Pattern.compile("abc\\t$^", flags);
818 check(pattern, "abc\\t$^", true);
819
820 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
821 check(pattern, "abc\\t$^", true);
822
823 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
824 check(pattern, "\\Qa^$bcabc\\E", true);
825 check(pattern, "a^$bcabc", false);
826
827 pattern = Pattern.compile("\\\\Q\\\\E");
828 check(pattern, "\\Q\\E", true);
829
830 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
831 check(pattern, "abcefg\\Q\\Ehij", true);
832
833 pattern = Pattern.compile("\\\\\\Q\\\\E");
834 check(pattern, "\\\\\\\\", true);
835
836 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
837 check(pattern, "\\Qa^$bcabc\\E", true);
838 check(pattern, "a^$bcabc", false);
839
840 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
841 check(pattern, "\\Qabc\\Edef", true);
842 check(pattern, "abcdef", false);
843
844 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
845 check(pattern, "abc\\Edef", true);
846 check(pattern, "abcdef", false);
847
848 pattern = Pattern.compile(Pattern.quote("\\E"));
849 check(pattern, "\\E", true);
850
851 pattern = Pattern.compile("((((abc.+?:)", flags);
852 check(pattern, "((((abc.+?:)", true);
853
854 flags |= Pattern.MULTILINE;
855
856 pattern = Pattern.compile("^cat$", flags);
857 check(pattern, "abc^cat$def", true);
858 check(pattern, "cat", false);
859
860 flags |= Pattern.CASE_INSENSITIVE;
861
862 pattern = Pattern.compile("abcdef", flags);
863 check(pattern, "ABCDEF", true);
864 check(pattern, "AbCdEf", true);
865
866 flags |= Pattern.DOTALL;
867
868 pattern = Pattern.compile("a...b", flags);
869 check(pattern, "A...b", true);
870 check(pattern, "Axxxb", false);
871
872 flags |= Pattern.CANON_EQ;
873
874 Pattern p = Pattern.compile("testa\u030a", flags);
875 check(pattern, "testa\u030a", false);
876 check(pattern, "test\u00e5", false);
877
878 // Supplementary character test
879 flags = Pattern.LITERAL;
880
881 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
882 check(pattern, toSupplementaries("abc\\t$^"), true);
883
884 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
885 check(pattern, toSupplementaries("abc\\t$^"), true);
886
887 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
888 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
889 check(pattern, toSupplementaries("a^$bcabc"), false);
890
891 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
892 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
893 check(pattern, toSupplementaries("a^$bcabc"), false);
894
895 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
896 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
897 check(pattern, toSupplementaries("abcdef"), false);
898
899 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
900 check(pattern, toSupplementaries("abc\\Edef"), true);
901 check(pattern, toSupplementaries("abcdef"), false);
902
903 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
904 check(pattern, toSupplementaries("((((abc.+?:)"), true);
905
906 flags |= Pattern.MULTILINE;
907
908 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
909 check(pattern, toSupplementaries("abc^cat$def"), true);
910 check(pattern, toSupplementaries("cat"), false);
911
912 flags |= Pattern.DOTALL;
913
914 // note: this is case-sensitive.
915 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
916 check(pattern, toSupplementaries("a...b"), true);
917 check(pattern, toSupplementaries("axxxb"), false);
918
919 flags |= Pattern.CANON_EQ;
920
921 String t = toSupplementaries("test");
922 p = Pattern.compile(t + "a\u030a", flags);
923 check(pattern, t + "a\u030a", false);
924 check(pattern, t + "\u00e5", false);
925
926 report("Literal pattern");
927 }
928
929 // This test is for 4803179
930 // This test is also for 4808962, replacement parts
931 private static void literalReplacementTest() throws Exception {
932 int flags = Pattern.LITERAL;
933
934 Pattern pattern = Pattern.compile("abc", flags);
935 Matcher matcher = pattern.matcher("zzzabczzz");
936 String replaceTest = "$0";
937 String result = matcher.replaceAll(replaceTest);
938 if (!result.equals("zzzabczzz"))
939 failCount++;
940
941 matcher.reset();
942 String literalReplacement = matcher.quoteReplacement(replaceTest);
943 result = matcher.replaceAll(literalReplacement);
944 if (!result.equals("zzz$0zzz"))
945 failCount++;
946
947 matcher.reset();
948 replaceTest = "\\t$\\$";
949 literalReplacement = matcher.quoteReplacement(replaceTest);
950 result = matcher.replaceAll(literalReplacement);
951 if (!result.equals("zzz\\t$\\$zzz"))
952 failCount++;
953
954 // Supplementary character test
955 pattern = Pattern.compile(toSupplementaries("abc"), flags);
956 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
957 replaceTest = "$0";
958 result = matcher.replaceAll(replaceTest);
959 if (!result.equals(toSupplementaries("zzzabczzz")))
960 failCount++;
961
962 matcher.reset();
963 literalReplacement = matcher.quoteReplacement(replaceTest);
964 result = matcher.replaceAll(literalReplacement);
965 if (!result.equals(toSupplementaries("zzz$0zzz")))
966 failCount++;
967
968 matcher.reset();
969 replaceTest = "\\t$\\$";
970 literalReplacement = matcher.quoteReplacement(replaceTest);
971 result = matcher.replaceAll(literalReplacement);
972 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
973 failCount++;
974
975 // IAE should be thrown if backslash or '$' is the last character
976 // in replacement string
977 try {
978 "\uac00".replaceAll("\uac00", "$");
979 failCount++;
980 } catch (IllegalArgumentException iie) {
981 } catch (Exception e) {
982 failCount++;
983 }
984 try {
985 "\uac00".replaceAll("\uac00", "\\");
986 failCount++;
987 } catch (IllegalArgumentException iie) {
988 } catch (Exception e) {
989 failCount++;
990 }
991 report("Literal replacement");
992 }
993
994 // This test is for 4757029
995 private static void regionTest() throws Exception {
996 Pattern pattern = Pattern.compile("abc");
997 Matcher matcher = pattern.matcher("abcdefabc");
998
999 matcher.region(0,9);
1000 if (!matcher.find())
1001 failCount++;
1002 if (!matcher.find())
1003 failCount++;
1004 matcher.region(0,3);
1005 if (!matcher.find())
1006 failCount++;
1007 matcher.region(3,6);
1008 if (matcher.find())
1009 failCount++;
1010 matcher.region(0,2);
1011 if (matcher.find())
1012 failCount++;
1013
1014 expectRegionFail(matcher, 1, -1);
1015 expectRegionFail(matcher, -1, -1);
1016 expectRegionFail(matcher, -1, 1);
1017 expectRegionFail(matcher, 5, 3);
1018 expectRegionFail(matcher, 5, 12);
1019 expectRegionFail(matcher, 12, 12);
1020
1021 pattern = Pattern.compile("^abc$");
1022 matcher = pattern.matcher("zzzabczzz");
1023 matcher.region(0,9);
1024 if (matcher.find())
1025 failCount++;
1026 matcher.region(3,6);
1027 if (!matcher.find())
1028 failCount++;
1029 matcher.region(3,6);
1030 matcher.useAnchoringBounds(false);
1031 if (matcher.find())
1032 failCount++;
1033
1034 // Supplementary character test
1035 pattern = Pattern.compile(toSupplementaries("abc"));
1036 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1037 matcher.region(0,9*2);
1038 if (!matcher.find())
1039 failCount++;
1040 if (!matcher.find())
1041 failCount++;
1042 matcher.region(0,3*2);
1043 if (!matcher.find())
1044 failCount++;
1045 matcher.region(1,3*2);
1046 if (matcher.find())
1047 failCount++;
1048 matcher.region(3*2,6*2);
1049 if (matcher.find())
1050 failCount++;
1051 matcher.region(0,2*2);
1052 if (matcher.find())
1053 failCount++;
1054 matcher.region(0,2*2+1);
1055 if (matcher.find())
1056 failCount++;
1057
1058 expectRegionFail(matcher, 1*2, -1);
1059 expectRegionFail(matcher, -1, -1);
1060 expectRegionFail(matcher, -1, 1*2);
1061 expectRegionFail(matcher, 5*2, 3*2);
1062 expectRegionFail(matcher, 5*2, 12*2);
1063 expectRegionFail(matcher, 12*2, 12*2);
1064
1065 pattern = Pattern.compile(toSupplementaries("^abc$"));
1066 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1067 matcher.region(0,9*2);
1068 if (matcher.find())
1069 failCount++;
1070 matcher.region(3*2,6*2);
1071 if (!matcher.find())
1072 failCount++;
1073 matcher.region(3*2+1,6*2);
1074 if (matcher.find())
1075 failCount++;
1076 matcher.region(3*2,6*2-1);
1077 if (matcher.find())
1078 failCount++;
1079 matcher.region(3*2,6*2);
1080 matcher.useAnchoringBounds(false);
1081 if (matcher.find())
1082 failCount++;
1083
1084 // JDK-8230829
1085 pattern = Pattern.compile("\\ud800\\udc61");
1086 matcher = pattern.matcher("\ud800\udc61");
1087 matcher.region(0, 1);
1088 if (matcher.find()) {
1089 failCount++;
1090 System.out.println("Matched a surrogate pair" +
1091 " that crosses border of region");
1092 }
1093 if (!matcher.hitEnd()) {
1094 failCount++;
1095 System.out.println("Expected to hit the end when" +
1096 " matching a surrogate pair crossing region");
1097 }
1098
1099 report("Regions");
1100 }
1101
1102 private static void expectRegionFail(Matcher matcher, int index1,
1103 int index2)
1104 {
1105 try {
1106 matcher.region(index1, index2);
1107 failCount++;
1108 } catch (IndexOutOfBoundsException ioobe) {
1109 // Correct result
1110 } catch (IllegalStateException ise) {
1111 // Correct result
1112 }
1113 }
1114
1115 // This test is for 4803197
1116 private static void escapedSegmentTest() throws Exception {
1117
1118 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1119 check(pattern, "dir1\\dir2", true);
1120
1121 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1122 check(pattern, "dir1\\dir2\\", true);
1123
1124 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1125 check(pattern, "dir1\\dir2\\", true);
1126
1127 // Supplementary character test
1128 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1129 check(pattern, toSupplementaries("dir1\\dir2"), true);
1130
1131 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1132 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1133
1134 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1135 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1136
1137 report("Escaped segment");
1138 }
1139
1140 // This test is for 4792284
1141 private static void nonCaptureRepetitionTest() throws Exception {
1142 String input = "abcdefgh;";
1143
1144 String[] patterns = new String[] {
1145 "(?:\\w{4})+;",
1146 "(?:\\w{8})*;",
1147 "(?:\\w{2}){2,4};",
1148 "(?:\\w{4}){2,};", // only matches the
1149 ".*?(?:\\w{5})+;", // specified minimum
1150 ".*?(?:\\w{9})*;", // number of reps - OK
1151 "(?:\\w{4})+?;", // lazy repetition - OK
1152 "(?:\\w{4})++;", // possessive repetition - OK
1153 "(?:\\w{2,}?)+;", // non-deterministic - OK
1154 "(\\w{4})+;", // capturing group - OK
1155 };
1156
1157 for (int i = 0; i < patterns.length; i++) {
1158 // Check find()
1159 check(patterns[i], 0, input, input, true);
1160 // Check matches()
1161 Pattern p = Pattern.compile(patterns[i]);
1162 Matcher m = p.matcher(input);
1163
1164 if (m.matches()) {
1165 if (!m.group(0).equals(input))
1166 failCount++;
1167 } else {
1168 failCount++;
1169 }
1170 }
1171
1172 report("Non capturing repetition");
1173 }
1174
1175 // This test is for 6358731
1176 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1177 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1178 Matcher matcher = pattern.matcher("abcd");
1179 if (!matcher.matches() ||
1180 matcher.group(1) != null ||
1181 !matcher.group(2).equals("abcd")) {
1182 failCount++;
1183 }
1184 report("Not captured GroupCurly");
1185 }
1186
1187 // This test is for 4706545
1188 private static void javaCharClassTest() throws Exception {
1189 for (int i=0; i<1000; i++) {
1190 char c = (char)generator.nextInt();
1191 check("{javaLowerCase}", c, Character.isLowerCase(c));
1192 check("{javaUpperCase}", c, Character.isUpperCase(c));
1193 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1194 check("{javaTitleCase}", c, Character.isTitleCase(c));
1195 check("{javaDigit}", c, Character.isDigit(c));
1196 check("{javaDefined}", c, Character.isDefined(c));
1197 check("{javaLetter}", c, Character.isLetter(c));
1198 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1199 check("{javaJavaIdentifierStart}", c,
1200 Character.isJavaIdentifierStart(c));
1201 check("{javaJavaIdentifierPart}", c,
1202 Character.isJavaIdentifierPart(c));
1203 check("{javaUnicodeIdentifierStart}", c,
1204 Character.isUnicodeIdentifierStart(c));
1205 check("{javaUnicodeIdentifierPart}", c,
1206 Character.isUnicodeIdentifierPart(c));
1207 check("{javaIdentifierIgnorable}", c,
1208 Character.isIdentifierIgnorable(c));
1209 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1210 check("{javaWhitespace}", c, Character.isWhitespace(c));
1211 check("{javaISOControl}", c, Character.isISOControl(c));
1212 check("{javaMirrored}", c, Character.isMirrored(c));
1213
1214 }
1215
1216 // Supplementary character test
1217 for (int i=0; i<1000; i++) {
1218 int c = generator.nextInt(Character.MAX_CODE_POINT
1219 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1220 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1221 check("{javaLowerCase}", c, Character.isLowerCase(c));
1222 check("{javaUpperCase}", c, Character.isUpperCase(c));
1223 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1224 check("{javaTitleCase}", c, Character.isTitleCase(c));
1225 check("{javaDigit}", c, Character.isDigit(c));
1226 check("{javaDefined}", c, Character.isDefined(c));
1227 check("{javaLetter}", c, Character.isLetter(c));
1228 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1229 check("{javaJavaIdentifierStart}", c,
1230 Character.isJavaIdentifierStart(c));
1231 check("{javaJavaIdentifierPart}", c,
1232 Character.isJavaIdentifierPart(c));
1233 check("{javaUnicodeIdentifierStart}", c,
1234 Character.isUnicodeIdentifierStart(c));
1235 check("{javaUnicodeIdentifierPart}", c,
1236 Character.isUnicodeIdentifierPart(c));
1237 check("{javaIdentifierIgnorable}", c,
1238 Character.isIdentifierIgnorable(c));
1239 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1240 check("{javaWhitespace}", c, Character.isWhitespace(c));
1241 check("{javaISOControl}", c, Character.isISOControl(c));
1242 check("{javaMirrored}", c, Character.isMirrored(c));
1243 }
1244
1245 report("Java character classes");
1246 }
1247
1248 // This test is for 4523620
1249 /*
1250 private static void numOccurrencesTest() throws Exception {
1251 Pattern pattern = Pattern.compile("aaa");
1252
1253 if (pattern.numOccurrences("aaaaaa", false) != 2)
1254 failCount++;
1255 if (pattern.numOccurrences("aaaaaa", true) != 4)
1256 failCount++;
1257
1258 pattern = Pattern.compile("^");
1259 if (pattern.numOccurrences("aaaaaa", false) != 1)
1260 failCount++;
1261 if (pattern.numOccurrences("aaaaaa", true) != 1)
1262 failCount++;
1263
1264 report("Number of Occurrences");
1265 }
1266 */
1267
1268 // This test is for 4776374
1269 private static void caretBetweenTerminatorsTest() throws Exception {
1270 int flags1 = Pattern.DOTALL;
1271 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1272 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1273 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1274
1275 check("^....", flags1, "test\ntest", "test", true);
1276 check(".....^", flags1, "test\ntest", "test", false);
1277 check(".....^", flags1, "test\n", "test", false);
1278 check("....^", flags1, "test\r\n", "test", false);
1279
1280 check("^....", flags2, "test\ntest", "test", true);
1281 check("....^", flags2, "test\ntest", "test", false);
1282 check(".....^", flags2, "test\n", "test", false);
1283 check("....^", flags2, "test\r\n", "test", false);
1284
1285 check("^....", flags3, "test\ntest", "test", true);
1286 check(".....^", flags3, "test\ntest", "test\n", true);
1287 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1288 check(".....^", flags3, "test\n", "test", false);
1289 check(".....^", flags3, "test\r\n", "test", false);
1290 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1291
1292 check("^....", flags4, "test\ntest", "test", true);
1293 check(".....^", flags3, "test\ntest", "test\n", true);
1294 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1295 check(".....^", flags4, "test\n", "test\n", false);
1296 check(".....^", flags4, "test\r\n", "test\r", false);
1297
1298 // Supplementary character test
1299 String t = toSupplementaries("test");
1300 check("^....", flags1, t+"\n"+t, t, true);
1301 check(".....^", flags1, t+"\n"+t, t, false);
1302 check(".....^", flags1, t+"\n", t, false);
1303 check("....^", flags1, t+"\r\n", t, false);
1304
1305 check("^....", flags2, t+"\n"+t, t, true);
1306 check("....^", flags2, t+"\n"+t, t, false);
1307 check(".....^", flags2, t+"\n", t, false);
1308 check("....^", flags2, t+"\r\n", t, false);
1309
1310 check("^....", flags3, t+"\n"+t, t, true);
1311 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1312 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1313 check(".....^", flags3, t+"\n", t, false);
1314 check(".....^", flags3, t+"\r\n", t, false);
1315 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1316
1317 check("^....", flags4, t+"\n"+t, t, true);
1318 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1319 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1320 check(".....^", flags4, t+"\n", t+"\n", false);
1321 check(".....^", flags4, t+"\r\n", t+"\r", false);
1322
1323 report("Caret between terminators");
1324 }
1325
1326 // This test is for 4727935
1327 private static void dollarAtEndTest() throws Exception {
1328 int flags1 = Pattern.DOTALL;
1329 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1330 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1331
1332 check("....$", flags1, "test\n", "test", true);
1333 check("....$", flags1, "test\r\n", "test", true);
1334 check(".....$", flags1, "test\n", "test\n", true);
1335 check(".....$", flags1, "test\u0085", "test\u0085", true);
1336 check("....$", flags1, "test\u0085", "test", true);
1337
1338 check("....$", flags2, "test\n", "test", true);
1339 check(".....$", flags2, "test\n", "test\n", true);
1340 check(".....$", flags2, "test\u0085", "test\u0085", true);
1341 check("....$", flags2, "test\u0085", "est\u0085", true);
1342
1343 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1344 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1345 check("....$blah", flags3, "test\nblah", "!!!!", false);
1346 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1347
1348 // Supplementary character test
1349 String t = toSupplementaries("test");
1350 String b = toSupplementaries("blah");
1351 check("....$", flags1, t+"\n", t, true);
1352 check("....$", flags1, t+"\r\n", t, true);
1353 check(".....$", flags1, t+"\n", t+"\n", true);
1354 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1355 check("....$", flags1, t+"\u0085", t, true);
1356
1357 check("....$", flags2, t+"\n", t, true);
1358 check(".....$", flags2, t+"\n", t+"\n", true);
1359 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1360 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1361
1362 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1363 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1364 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1365 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1366
1367 report("Dollar at End");
1368 }
1369
1370 // This test is for 4711773
1371 private static void multilineDollarTest() throws Exception {
1372 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1373 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1374 matcher.find();
1375 if (matcher.start(0) != 9)
1376 failCount++;
1377 matcher.find();
1378 if (matcher.start(0) != 20)
1379 failCount++;
1380
1381 // Supplementary character test
1382 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1383 matcher.find();
1384 if (matcher.start(0) != 9*2)
1385 failCount++;
1386 matcher.find();
1387 if (matcher.start(0) != 20*2)
1388 failCount++;
1389
1390 report("Multiline Dollar");
1391 }
1392
1393 private static void reluctantRepetitionTest() throws Exception {
1394 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1395 check(p, "1 word word word 2", true);
1396 check(p, "1 wor wo w 2", true);
1397 check(p, "1 word word 2", true);
1398 check(p, "1 word 2", true);
1399 check(p, "1 wo w w 2", true);
1400 check(p, "1 wo w 2", true);
1401 check(p, "1 wor w 2", true);
1402
1403 p = Pattern.compile("([a-z])+?c");
1404 Matcher m = p.matcher("ababcdefdec");
1405 check(m, "ababc");
1406
1407 // Supplementary character test
1408 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1409 m = p.matcher(toSupplementaries("ababcdefdec"));
1410 check(m, toSupplementaries("ababc"));
1411
1412 report("Reluctant Repetition");
1413 }
1414
1415 private static Pattern serializedPattern(Pattern p) throws Exception {
1416 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1417 ObjectOutputStream oos = new ObjectOutputStream(baos);
1418 oos.writeObject(p);
1419 oos.close();
1420 try (ObjectInputStream ois = new ObjectInputStream(
1421 new ByteArrayInputStream(baos.toByteArray()))) {
1422 return (Pattern)ois.readObject();
1423 }
1424 }
1425
1426 private static void serializeTest() throws Exception {
1427 String patternStr = "(b)";
1428 String matchStr = "b";
1429 Pattern pattern = Pattern.compile(patternStr);
1430 Pattern serializedPattern = serializedPattern(pattern);
1431 Matcher matcher = serializedPattern.matcher(matchStr);
1432 if (!matcher.matches())
1433 failCount++;
1434 if (matcher.groupCount() != 1)
1435 failCount++;
1436
1437 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1438 serializedPattern = serializedPattern(pattern);
1439 if (!serializedPattern.matcher("Ab").matches())
1440 failCount++;
1441 if (serializedPattern.matcher("AB").matches())
1442 failCount++;
1443
1444 report("Serialization");
1445 }
1446
1447 private static void gTest() {
1448 Pattern pattern = Pattern.compile("\\G\\w");
1449 Matcher matcher = pattern.matcher("abc#x#x");
1450 matcher.find();
1451 matcher.find();
1452 matcher.find();
1453 if (matcher.find())
1454 failCount++;
1455
1456 pattern = Pattern.compile("\\GA*");
1457 matcher = pattern.matcher("1A2AA3");
1458 matcher.find();
1459 if (matcher.find())
1460 failCount++;
1461
1462 pattern = Pattern.compile("\\GA*");
1463 matcher = pattern.matcher("1A2AA3");
1464 if (!matcher.find(1))
1465 failCount++;
1466 matcher.find();
1467 if (matcher.find())
1468 failCount++;
1469
1470 report("\\G");
1471 }
1472
1473 private static void zTest() {
1474 Pattern pattern = Pattern.compile("foo\\Z");
1475 // Positives
1476 check(pattern, "foo\u0085", true);
1477 check(pattern, "foo\u2028", true);
1478 check(pattern, "foo\u2029", true);
1479 check(pattern, "foo\n", true);
1480 check(pattern, "foo\r", true);
1481 check(pattern, "foo\r\n", true);
1482 // Negatives
1483 check(pattern, "fooo", false);
1484 check(pattern, "foo\n\r", false);
1485
1486 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1487 // Positives
1488 check(pattern, "foo", true);
1489 check(pattern, "foo\n", true);
1490 // Negatives
1491 check(pattern, "foo\r", false);
1492 check(pattern, "foo\u0085", false);
1493 check(pattern, "foo\u2028", false);
1494 check(pattern, "foo\u2029", false);
1495
1496 report("\\Z");
1497 }
1498
1499 private static void replaceFirstTest() {
1500 Pattern pattern = Pattern.compile("(ab)(c*)");
1501 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1502 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1503 failCount++;
1504
1505 matcher.reset("zzzabccczzzabcczzzabccczzz");
1506 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1507 failCount++;
1508
1509 matcher.reset("zzzabccczzzabcczzzabccczzz");
1510 String result = matcher.replaceFirst("$1");
1511 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1512 failCount++;
1513
1514 matcher.reset("zzzabccczzzabcczzzabccczzz");
1515 result = matcher.replaceFirst("$2");
1516 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1517 failCount++;
1518
1519 pattern = Pattern.compile("a*");
1520 matcher = pattern.matcher("aaaaaaaaaa");
1521 if (!matcher.replaceFirst("test").equals("test"))
1522 failCount++;
1523
1524 pattern = Pattern.compile("a+");
1525 matcher = pattern.matcher("zzzaaaaaaaaaa");
1526 if (!matcher.replaceFirst("test").equals("zzztest"))
1527 failCount++;
1528
1529 // Supplementary character test
1530 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1531 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1532 if (!matcher.replaceFirst(toSupplementaries("test"))
1533 .equals(toSupplementaries("testzzzabcczzzabccc")))
1534 failCount++;
1535
1536 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1537 if (!matcher.replaceFirst(toSupplementaries("test")).
1538 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1539 failCount++;
1540
1541 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1542 result = matcher.replaceFirst("$1");
1543 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1544 failCount++;
1545
1546 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1547 result = matcher.replaceFirst("$2");
1548 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1549 failCount++;
1550
1551 pattern = Pattern.compile(toSupplementaries("a*"));
1552 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1553 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1554 failCount++;
1555
1556 pattern = Pattern.compile(toSupplementaries("a+"));
1557 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1558 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1559 failCount++;
1560
1561 report("Replace First");
1562 }
1563
1564 private static void unixLinesTest() {
1565 Pattern pattern = Pattern.compile(".*");
1566 Matcher matcher = pattern.matcher("aa\u2028blah");
1567 matcher.find();
1568 if (!matcher.group(0).equals("aa"))
1569 failCount++;
1570
1571 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1572 matcher = pattern.matcher("aa\u2028blah");
1573 matcher.find();
1574 if (!matcher.group(0).equals("aa\u2028blah"))
1575 failCount++;
1576
1577 pattern = Pattern.compile("[az]$",
1578 Pattern.MULTILINE | Pattern.UNIX_LINES);
1579 matcher = pattern.matcher("aa\u2028zz");
1580 check(matcher, "a\u2028", false);
1581
1582 // Supplementary character test
1583 pattern = Pattern.compile(".*");
1584 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1585 matcher.find();
1586 if (!matcher.group(0).equals(toSupplementaries("aa")))
1587 failCount++;
1588
1589 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1590 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1591 matcher.find();
1592 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1593 failCount++;
1594
1595 pattern = Pattern.compile(toSupplementaries("[az]$"),
1596 Pattern.MULTILINE | Pattern.UNIX_LINES);
1597 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1598 check(matcher, toSupplementaries("a\u2028"), false);
1599
1600 report("Unix Lines");
1601 }
1602
1603 private static void commentsTest() {
1604 int flags = Pattern.COMMENTS;
1605
1606 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1607 Matcher matcher = pattern.matcher("aa#aa");
1608 if (!matcher.matches())
1609 failCount++;
1610
1611 pattern = Pattern.compile("aa # blah", flags);
1612 matcher = pattern.matcher("aa");
1613 if (!matcher.matches())
1614 failCount++;
1615
1616 pattern = Pattern.compile("aa blah", flags);
1617 matcher = pattern.matcher("aablah");
1618 if (!matcher.matches())
1619 failCount++;
1620
1621 pattern = Pattern.compile("aa # blah blech ", flags);
1622 matcher = pattern.matcher("aa");
1623 if (!matcher.matches())
1624 failCount++;
1625
1626 pattern = Pattern.compile("aa # blah\n ", flags);
1627 matcher = pattern.matcher("aa");
1628 if (!matcher.matches())
1629 failCount++;
1630
1631 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1632 matcher = pattern.matcher("aabc");
1633 if (!matcher.matches())
1634 failCount++;
1635
1636 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1637 matcher = pattern.matcher("aabc");
1638 if (!matcher.matches())
1639 failCount++;
1640
1641 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1642 matcher = pattern.matcher("aabc#blech");
1643 if (!matcher.matches())
1644 failCount++;
1645
1646 // Supplementary character test
1647 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1648 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1649 if (!matcher.matches())
1650 failCount++;
1651
1652 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1653 matcher = pattern.matcher(toSupplementaries("aa"));
1654 if (!matcher.matches())
1655 failCount++;
1656
1657 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1658 matcher = pattern.matcher(toSupplementaries("aablah"));
1659 if (!matcher.matches())
1660 failCount++;
1661
1662 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1663 matcher = pattern.matcher(toSupplementaries("aa"));
1664 if (!matcher.matches())
1665 failCount++;
1666
1667 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1668 matcher = pattern.matcher(toSupplementaries("aa"));
1669 if (!matcher.matches())
1670 failCount++;
1671
1672 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1673 matcher = pattern.matcher(toSupplementaries("aabc"));
1674 if (!matcher.matches())
1675 failCount++;
1676
1677 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1678 matcher = pattern.matcher(toSupplementaries("aabc"));
1679 if (!matcher.matches())
1680 failCount++;
1681
1682 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1683 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1684 if (!matcher.matches())
1685 failCount++;
1686
1687 report("Comments");
1688 }
1689
1690 private static void caseFoldingTest() { // bug 4504687
1691 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1692 Pattern pattern = Pattern.compile("aa", flags);
1693 Matcher matcher = pattern.matcher("ab");
1694 if (matcher.matches())
1695 failCount++;
1696
1697 pattern = Pattern.compile("aA", flags);
1698 matcher = pattern.matcher("ab");
1699 if (matcher.matches())
1700 failCount++;
1701
1702 pattern = Pattern.compile("aa", flags);
1703 matcher = pattern.matcher("aB");
1704 if (matcher.matches())
1705 failCount++;
1706 matcher = pattern.matcher("Ab");
1707 if (matcher.matches())
1708 failCount++;
1709
1710 // ASCII "a"
1711 // Latin-1 Supplement "a" + grave
1712 // Cyrillic "a"
1713 String[] patterns = new String[] {
1714 //single
1715 "a", "\u00e0", "\u0430",
1716 //slice
1717 "ab", "\u00e0\u00e1", "\u0430\u0431",
1718 //class single
1719 "[a]", "[\u00e0]", "[\u0430]",
1720 //class range
1721 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1722 //back reference
1723 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1724 };
1725
1726 String[] texts = new String[] {
1727 "A", "\u00c0", "\u0410",
1728 "AB", "\u00c0\u00c1", "\u0410\u0411",
1729 "A", "\u00c0", "\u0410",
1730 "B", "\u00c2", "\u0411",
1731 "aA", "\u00e0\u00c0", "\u0430\u0410"
1732 };
1733
1734 boolean[] expected = new boolean[] {
1735 true, false, false,
1736 true, false, false,
1737 true, false, false,
1738 true, false, false,
1739 true, false, false
1740 };
1741
1742 flags = Pattern.CASE_INSENSITIVE;
1743 for (int i = 0; i < patterns.length; i++) {
1744 pattern = Pattern.compile(patterns[i], flags);
1745 matcher = pattern.matcher(texts[i]);
1746 if (matcher.matches() != expected[i]) {
1747 System.out.println("<1> Failed at " + i);
1748 failCount++;
1749 }
1750 }
1751
1752 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1753 for (int i = 0; i < patterns.length; i++) {
1754 pattern = Pattern.compile(patterns[i], flags);
1755 matcher = pattern.matcher(texts[i]);
1756 if (!matcher.matches()) {
1757 System.out.println("<2> Failed at " + i);
1758 failCount++;
1759 }
1760 }
1761 // flag unicode_case alone should do nothing
1762 flags = Pattern.UNICODE_CASE;
1763 for (int i = 0; i < patterns.length; i++) {
1764 pattern = Pattern.compile(patterns[i], flags);
1765 matcher = pattern.matcher(texts[i]);
1766 if (matcher.matches()) {
1767 System.out.println("<3> Failed at " + i);
1768 failCount++;
1769 }
1770 }
1771
1772 // Special cases: i, I, u+0131 and u+0130
1773 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1774 pattern = Pattern.compile("[h-j]+", flags);
1775 if (!pattern.matcher("\u0131\u0130").matches())
1776 failCount++;
1777 report("Case Folding");
1778 }
1779
1780 private static void appendTest() {
1781 Pattern pattern = Pattern.compile("(ab)(cd)");
1782 Matcher matcher = pattern.matcher("abcd");
1783 String result = matcher.replaceAll("$2$1");
1784 if (!result.equals("cdab"))
1785 failCount++;
1786
1787 String s1 = "Swap all: first = 123, second = 456";
1788 String s2 = "Swap one: first = 123, second = 456";
1789 String r = "$3$2$1";
1790 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1791 matcher = pattern.matcher(s1);
1792
1793 result = matcher.replaceAll(r);
1794 if (!result.equals("Swap all: 123 = first, 456 = second"))
1795 failCount++;
1796
1797 matcher = pattern.matcher(s2);
1798
1799 if (matcher.find()) {
1800 StringBuffer sb = new StringBuffer();
1801 matcher.appendReplacement(sb, r);
1802 matcher.appendTail(sb);
1803 result = sb.toString();
1804 if (!result.equals("Swap one: 123 = first, second = 456"))
1805 failCount++;
1806 }
1807
1808 // Supplementary character test
1809 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1810 matcher = pattern.matcher(toSupplementaries("abcd"));
1811 result = matcher.replaceAll("$2$1");
1812 if (!result.equals(toSupplementaries("cdab")))
1813 failCount++;
1814
1815 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1816 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1817 r = toSupplementaries("$3$2$1");
1818 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1819 matcher = pattern.matcher(s1);
1820
1821 result = matcher.replaceAll(r);
1822 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1823 failCount++;
1824
1825 matcher = pattern.matcher(s2);
1826
1827 if (matcher.find()) {
1828 StringBuffer sb = new StringBuffer();
1829 matcher.appendReplacement(sb, r);
1830 matcher.appendTail(sb);
1831 result = sb.toString();
1832 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1833 failCount++;
1834 }
1835 report("Append");
1836 }
1837
1838 private static void splitTest() {
1839 Pattern pattern = Pattern.compile(":");
1840 String[] result = pattern.split("foo:and:boo", 2);
1841 if (!result[0].equals("foo"))
1842 failCount++;
1843 if (!result[1].equals("and:boo"))
1844 failCount++;
1845 // Supplementary character test
1846 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1847 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1848 if (!result[0].equals(toSupplementaries("foo")))
1849 failCount++;
1850 if (!result[1].equals(toSupplementaries("andXboo")))
1851 failCount++;
1852
1853 CharBuffer cb = CharBuffer.allocate(100);
1854 cb.put("foo:and:boo");
1855 cb.flip();
1856 result = pattern.split(cb);
1857 if (!result[0].equals("foo"))
1858 failCount++;
1859 if (!result[1].equals("and"))
1860 failCount++;
1861 if (!result[2].equals("boo"))
1862 failCount++;
1863
1864 // Supplementary character test
1865 CharBuffer cbs = CharBuffer.allocate(100);
1866 cbs.put(toSupplementaries("fooXandXboo"));
1867 cbs.flip();
1868 result = patternX.split(cbs);
1869 if (!result[0].equals(toSupplementaries("foo")))
1870 failCount++;
1871 if (!result[1].equals(toSupplementaries("and")))
1872 failCount++;
1873 if (!result[2].equals(toSupplementaries("boo")))
1874 failCount++;
1875
1876 String source = "0123456789";
1877 for (int limit=-2; limit<3; limit++) {
1878 for (int x=0; x<10; x++) {
1879 result = source.split(Integer.toString(x), limit);
1880 int expectedLength = limit < 1 ? 2 : limit;
1881
1882 if ((limit == 0) && (x == 9)) {
1883 // expected dropping of ""
1884 if (result.length != 1)
1885 failCount++;
1886 if (!result[0].equals("012345678")) {
1887 failCount++;
1888 }
1889 } else {
1890 if (result.length != expectedLength) {
1891 failCount++;
1892 }
1893 if (!result[0].equals(source.substring(0,x))) {
1894 if (limit != 1) {
1895 failCount++;
1896 } else {
1897 if (!result[0].equals(source.substring(0,10))) {
1898 failCount++;
1899 }
1900 }
1901 }
1902 if (expectedLength > 1) { // Check segment 2
1903 if (!result[1].equals(source.substring(x+1,10)))
1904 failCount++;
1905 }
1906 }
1907 }
1908 }
1909 // Check the case for no match found
1910 for (int limit=-2; limit<3; limit++) {
1911 result = source.split("e", limit);
1912 if (result.length != 1)
1913 failCount++;
1914 if (!result[0].equals(source))
1915 failCount++;
1916 }
1917 // Check the case for limit == 0, source = "";
1918 // split() now returns 0-length for empty source "" see #6559590
1919 source = "";
1920 result = source.split("e", 0);
1921 if (result.length != 1)
1922 failCount++;
1923 if (!result[0].equals(source))
1924 failCount++;
1925
1926 // Check both split() and splitAsStraem(), especially for zero-lenth
1927 // input and zero-lenth match cases
1928 String[][] input = new String[][] {
1929 { " ", "Abc Efg Hij" }, // normal non-zero-match
1930 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1931 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1932 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1933 { "(?=\\p{Lu})", "AbcEfg" },
1934 { "(?=\\p{Lu})", "Abc" },
1935 { " ", "" }, // zero-length input
1936 { ".*", "" },
1937
1938 // some tests from PatternStreamTest.java
1939 { "4", "awgqwefg1fefw4vssv1vvv1" },
1940 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1941 { "1", "awgqwefg1fefw4vssv1vvv1" },
1942 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1943 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1944 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1945 { "\u56da", "" },
1946 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1947 { "o", "boo:and:foo" },
1948 { "o", "booooo:and:fooooo" },
1949 { "o", "fooooo:" },
1950 };
1951
1952 String[][] expected = new String[][] {
1953 { "Abc", "Efg", "Hij" },
1954 { "", "Abc", "Efg", "Hij" },
1955 { "Abc", "", "Efg", "Hij" },
1956 { "Abc", "Efg", "Hij" },
1957 { "Abc", "Efg" },
1958 { "Abc" },
1959 { "" },
1960 { "" },
1961
1962 { "awgqwefg1fefw", "vssv1vvv1" },
1963 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1964 { "awgqwefg", "fefw4vssv", "vvv" },
1965 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1966 { "1", "23", "456", "7890" },
1967 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1968 { "" },
1969 { "This", "is", "testing", "", "with", "different", "separators" },
1970 { "b", "", ":and:f" },
1971 { "b", "", "", "", "", ":and:f" },
1972 { "f", "", "", "", "", ":" },
1973 };
1974 for (int i = 0; i < input.length; i++) {
1975 pattern = Pattern.compile(input[i][0]);
1976 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1977 failCount++;
1978 }
1979 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting
1980 // array for zero-length input for now
1981 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1982 expected[i])) {
1983 failCount++;
1984 }
1985 }
1986 report("Split");
1987 }
1988
1989 private static void negationTest() {
1990 Pattern pattern = Pattern.compile("[\\[@^]+");
1991 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1992 if (!matcher.find())
1993 failCount++;
1994 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1995 failCount++;
1996 pattern = Pattern.compile("[@\\[^]+");
1997 matcher = pattern.matcher("@@@@[[[[^^^^");
1998 if (!matcher.find())
1999 failCount++;
2000 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
2001 failCount++;
2002 pattern = Pattern.compile("[@\\[^@]+");
2003 matcher = pattern.matcher("@@@@[[[[^^^^");
2004 if (!matcher.find())
2005 failCount++;
2006 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
2007 failCount++;
2008
2009 pattern = Pattern.compile("\\)");
2010 matcher = pattern.matcher("xxx)xxx");
2011 if (!matcher.find())
2012 failCount++;
2013
2014 report("Negation");
2015 }
2016
2017 private static void ampersandTest() {
2018 Pattern pattern = Pattern.compile("[&@]+");
2019 check(pattern, "@@@@&&&&", true);
2020
2021 pattern = Pattern.compile("[@&]+");
2022 check(pattern, "@@@@&&&&", true);
2023
2024 pattern = Pattern.compile("[@\\&]+");
2025 check(pattern, "@@@@&&&&", true);
2026
2027 report("Ampersand");
2028 }
2029
2030 private static void octalTest() throws Exception {
2031 Pattern pattern = Pattern.compile("\\u0007");
2032 Matcher matcher = pattern.matcher("\u0007");
2033 if (!matcher.matches())
2034 failCount++;
2035 pattern = Pattern.compile("\\07");
2036 matcher = pattern.matcher("\u0007");
2037 if (!matcher.matches())
2038 failCount++;
2039 pattern = Pattern.compile("\\007");
2040 matcher = pattern.matcher("\u0007");
2041 if (!matcher.matches())
2042 failCount++;
2043 pattern = Pattern.compile("\\0007");
2044 matcher = pattern.matcher("\u0007");
2045 if (!matcher.matches())
2046 failCount++;
2047 pattern = Pattern.compile("\\040");
2048 matcher = pattern.matcher("\u0020");
2049 if (!matcher.matches())
2050 failCount++;
2051 pattern = Pattern.compile("\\0403");
2052 matcher = pattern.matcher("\u00203");
2053 if (!matcher.matches())
2054 failCount++;
2055 pattern = Pattern.compile("\\0103");
2056 matcher = pattern.matcher("\u0043");
2057 if (!matcher.matches())
2058 failCount++;
2059
2060 report("Octal");
2061 }
2062
2063 private static void longPatternTest() throws Exception {
2064 try {
2065 Pattern pattern = Pattern.compile(
2066 "a 32-character-long pattern xxxx");
2067 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2068 pattern = Pattern.compile("a thirty four character long regex");
2069 StringBuffer patternToBe = new StringBuffer(101);
2070 for (int i=0; i<100; i++)
2071 patternToBe.append((char)(97 + i%26));
2072 pattern = Pattern.compile(patternToBe.toString());
2073 } catch (PatternSyntaxException e) {
2074 failCount++;
2075 }
2076
2077 // Supplementary character test
2078 try {
2079 Pattern pattern = Pattern.compile(
2080 toSupplementaries("a 32-character-long pattern xxxx"));
2081 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2082 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2083 StringBuffer patternToBe = new StringBuffer(101*2);
2084 for (int i=0; i<100; i++)
2085 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2086 + 97 + i%26));
2087 pattern = Pattern.compile(patternToBe.toString());
2088 } catch (PatternSyntaxException e) {
2089 failCount++;
2090 }
2091 report("LongPattern");
2092 }
2093
2094 private static void group0Test() throws Exception {
2095 Pattern pattern = Pattern.compile("(tes)ting");
2096 Matcher matcher = pattern.matcher("testing");
2097 check(matcher, "testing");
2098
2099 matcher.reset("testing");
2100 if (matcher.lookingAt()) {
2101 if (!matcher.group(0).equals("testing"))
2102 failCount++;
2103 } else {
2104 failCount++;
2105 }
2106
2107 matcher.reset("testing");
2108 if (matcher.matches()) {
2109 if (!matcher.group(0).equals("testing"))
2110 failCount++;
2111 } else {
2112 failCount++;
2113 }
2114
2115 pattern = Pattern.compile("(tes)ting");
2116 matcher = pattern.matcher("testing");
2117 if (matcher.lookingAt()) {
2118 if (!matcher.group(0).equals("testing"))
2119 failCount++;
2120 } else {
2121 failCount++;
2122 }
2123
2124 pattern = Pattern.compile("^(tes)ting");
2125 matcher = pattern.matcher("testing");
2126 if (matcher.matches()) {
2127 if (!matcher.group(0).equals("testing"))
2128 failCount++;
2129 } else {
2130 failCount++;
2131 }
2132
2133 // Supplementary character test
2134 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2135 matcher = pattern.matcher(toSupplementaries("testing"));
2136 check(matcher, toSupplementaries("testing"));
2137
2138 matcher.reset(toSupplementaries("testing"));
2139 if (matcher.lookingAt()) {
2140 if (!matcher.group(0).equals(toSupplementaries("testing")))
2141 failCount++;
2142 } else {
2143 failCount++;
2144 }
2145
2146 matcher.reset(toSupplementaries("testing"));
2147 if (matcher.matches()) {
2148 if (!matcher.group(0).equals(toSupplementaries("testing")))
2149 failCount++;
2150 } else {
2151 failCount++;
2152 }
2153
2154 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2155 matcher = pattern.matcher(toSupplementaries("testing"));
2156 if (matcher.lookingAt()) {
2157 if (!matcher.group(0).equals(toSupplementaries("testing")))
2158 failCount++;
2159 } else {
2160 failCount++;
2161 }
2162
2163 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2164 matcher = pattern.matcher(toSupplementaries("testing"));
2165 if (matcher.matches()) {
2166 if (!matcher.group(0).equals(toSupplementaries("testing")))
2167 failCount++;
2168 } else {
2169 failCount++;
2170 }
2171
2172 report("Group0");
2173 }
2174
2175 private static void findIntTest() throws Exception {
2176 Pattern p = Pattern.compile("blah");
2177 Matcher m = p.matcher("zzzzblahzzzzzblah");
2178 boolean result = m.find(2);
2179 if (!result)
2180 failCount++;
2181
2182 p = Pattern.compile("$");
2183 m = p.matcher("1234567890");
2184 result = m.find(10);
2185 if (!result)
2186 failCount++;
2187 try {
2188 result = m.find(11);
2189 failCount++;
2190 } catch (IndexOutOfBoundsException e) {
2191 // correct result
2192 }
2193
2194 // Supplementary character test
2195 p = Pattern.compile(toSupplementaries("blah"));
2196 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2197 result = m.find(2);
2198 if (!result)
2199 failCount++;
2200
2201 report("FindInt");
2202 }
2203
2204 private static void emptyPatternTest() throws Exception {
2205 Pattern p = Pattern.compile("");
2206 Matcher m = p.matcher("foo");
2207
2208 // Should find empty pattern at beginning of input
2209 boolean result = m.find();
2210 if (result != true)
2211 failCount++;
2212 if (m.start() != 0)
2213 failCount++;
2214
2215 // Should not match entire input if input is not empty
2216 m.reset();
2217 result = m.matches();
2218 if (result == true)
2219 failCount++;
2220
2221 try {
2222 m.start(0);
2223 failCount++;
2224 } catch (IllegalStateException e) {
2225 // Correct result
2226 }
2227
2228 // Should match entire input if input is empty
2229 m.reset("");
2230 result = m.matches();
2231 if (result != true)
2232 failCount++;
2233
2234 result = Pattern.matches("", "");
2235 if (result != true)
2236 failCount++;
2237
2238 result = Pattern.matches("", "foo");
2239 if (result == true)
2240 failCount++;
2241 report("EmptyPattern");
2242 }
2243
2244 private static void charClassTest() throws Exception {
2245 Pattern pattern = Pattern.compile("blah[ab]]blech");
2246 check(pattern, "blahb]blech", true);
2247
2248 pattern = Pattern.compile("[abc[def]]");
2249 check(pattern, "b", true);
2250
2251 // Supplementary character tests
2252 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2253 check(pattern, toSupplementaries("blahb]blech"), true);
2254
2255 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2256 check(pattern, toSupplementaries("b"), true);
2257
2258 try {
2259 // u00ff when UNICODE_CASE
2260 pattern = Pattern.compile("[ab\u00ffcd]",
2261 Pattern.CASE_INSENSITIVE|
2262 Pattern.UNICODE_CASE);
2263 check(pattern, "ab\u00ffcd", true);
2264 check(pattern, "Ab\u0178Cd", true);
2265
2266 // u00b5 when UNICODE_CASE
2267 pattern = Pattern.compile("[ab\u00b5cd]",
2268 Pattern.CASE_INSENSITIVE|
2269 Pattern.UNICODE_CASE);
2270 check(pattern, "ab\u00b5cd", true);
2271 check(pattern, "Ab\u039cCd", true);
2272 } catch (Exception e) { failCount++; }
2273
2274 /* Special cases
2275 (1)LatinSmallLetterLongS u+017f
2276 (2)LatinSmallLetterDotlessI u+0131
2277 (3)LatineCapitalLetterIWithDotAbove u+0130
2278 (4)KelvinSign u+212a
2279 (5)AngstromSign u+212b
2280 */
2281 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2282 pattern = Pattern.compile("[sik\u00c5]+", flags);
2283 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2284 failCount++;
2285
2286 report("CharClass");
2287 }
2288
2289 private static void caretTest() throws Exception {
2290 Pattern pattern = Pattern.compile("\\w*");
2291 Matcher matcher = pattern.matcher("a#bc#def##g");
2292 check(matcher, "a");
2293 check(matcher, "");
2294 check(matcher, "bc");
2295 check(matcher, "");
2296 check(matcher, "def");
2297 check(matcher, "");
2298 check(matcher, "");
2299 check(matcher, "g");
2300 check(matcher, "");
2301 if (matcher.find())
2302 failCount++;
2303
2304 pattern = Pattern.compile("^\\w*");
2305 matcher = pattern.matcher("a#bc#def##g");
2306 check(matcher, "a");
2307 if (matcher.find())
2308 failCount++;
2309
2310 pattern = Pattern.compile("\\w");
2311 matcher = pattern.matcher("abc##x");
2312 check(matcher, "a");
2313 check(matcher, "b");
2314 check(matcher, "c");
2315 check(matcher, "x");
2316 if (matcher.find())
2317 failCount++;
2318
2319 pattern = Pattern.compile("^\\w");
2320 matcher = pattern.matcher("abc##x");
2321 check(matcher, "a");
2322 if (matcher.find())
2323 failCount++;
2324
2325 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2326 matcher = pattern.matcher("abcdef-ghi\njklmno");
2327 check(matcher, "abc");
2328 if (matcher.find())
2329 failCount++;
2330
2331 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2332 matcher = pattern.matcher("abcdef-ghi\njklmno");
2333 check(matcher, "abc");
2334 check(matcher, "jkl");
2335 if (matcher.find())
2336 failCount++;
2337
2338 pattern = Pattern.compile("^", Pattern.MULTILINE);
2339 matcher = pattern.matcher("this is some text");
2340 String result = matcher.replaceAll("X");
2341 if (!result.equals("Xthis is some text"))
2342 failCount++;
2343
2344 pattern = Pattern.compile("^");
2345 matcher = pattern.matcher("this is some text");
2346 result = matcher.replaceAll("X");
2347 if (!result.equals("Xthis is some text"))
2348 failCount++;
2349
2350 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2351 matcher = pattern.matcher("this is some text\n");
2352 result = matcher.replaceAll("X");
2353 if (!result.equals("Xthis is some text\n"))
2354 failCount++;
2355
2356 report("Caret");
2357 }
2358
2359 private static void groupCaptureTest() throws Exception {
2360 // Independent group
2361 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2362 Matcher matcher = pattern.matcher("xxxyyyzzz");
2363 matcher.find();
2364 try {
2365 String blah = matcher.group(1);
2366 failCount++;
2367 } catch (IndexOutOfBoundsException ioobe) {
2368 // Good result
2369 }
2370 // Pure group
2371 pattern = Pattern.compile("x+(?:y+)z+");
2372 matcher = pattern.matcher("xxxyyyzzz");
2373 matcher.find();
2374 try {
2375 String blah = matcher.group(1);
2376 failCount++;
2377 } catch (IndexOutOfBoundsException ioobe) {
2378 // Good result
2379 }
2380
2381 // Supplementary character tests
2382 // Independent group
2383 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2384 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2385 matcher.find();
2386 try {
2387 String blah = matcher.group(1);
2388 failCount++;
2389 } catch (IndexOutOfBoundsException ioobe) {
2390 // Good result
2391 }
2392 // Pure group
2393 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2394 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2395 matcher.find();
2396 try {
2397 String blah = matcher.group(1);
2398 failCount++;
2399 } catch (IndexOutOfBoundsException ioobe) {
2400 // Good result
2401 }
2402
2403 report("GroupCapture");
2404 }
2405
2406 private static void backRefTest() throws Exception {
2407 Pattern pattern = Pattern.compile("(a*)bc\\1");
2408 check(pattern, "zzzaabcazzz", true);
2409
2410 pattern = Pattern.compile("(a*)bc\\1");
2411 check(pattern, "zzzaabcaazzz", true);
2412
2413 pattern = Pattern.compile("(abc)(def)\\1");
2414 check(pattern, "abcdefabc", true);
2415
2416 pattern = Pattern.compile("(abc)(def)\\3");
2417 check(pattern, "abcdefabc", false);
2418
2419 try {
2420 for (int i = 1; i < 10; i++) {
2421 // Make sure backref 1-9 are always accepted
2422 pattern = Pattern.compile("abcdef\\" + i);
2423 // and fail to match if the target group does not exit
2424 check(pattern, "abcdef", false);
2425 }
2426 } catch(PatternSyntaxException e) {
2427 failCount++;
2428 }
2429
2430 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2431 check(pattern, "abcdefghija", false);
2432 check(pattern, "abcdefghija1", true);
2433
2434 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2435 check(pattern, "abcdefghijkk", true);
2436
2437 pattern = Pattern.compile("(a)bcdefghij\\11");
2438 check(pattern, "abcdefghija1", true);
2439
2440 // Supplementary character tests
2441 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2442 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2443
2444 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2445 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2446
2447 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2448 check(pattern, toSupplementaries("abcdefabc"), true);
2449
2450 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2451 check(pattern, toSupplementaries("abcdefabc"), false);
2452
2453 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2454 check(pattern, toSupplementaries("abcdefghija"), false);
2455 check(pattern, toSupplementaries("abcdefghija1"), true);
2456
2457 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2458 check(pattern, toSupplementaries("abcdefghijkk"), true);
2459
2460 report("BackRef");
2461 }
2462
2463 /**
2464 * Unicode Technical Report #18, section 2.6 End of Line
2465 * There is no empty line to be matched in the sequence \u000D\u000A
2466 * but there is an empty line in the sequence \u000A\u000D.
2467 */
2468 private static void anchorTest() throws Exception {
2469 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2470 Matcher m = p.matcher("blah1\r\nblah2");
2471 m.find();
2472 m.find();
2473 if (!m.group().equals("blah2"))
2474 failCount++;
2475
2476 m.reset("blah1\n\rblah2");
2477 m.find();
2478 m.find();
2479 m.find();
2480 if (!m.group().equals("blah2"))
2481 failCount++;
2482
2483 // Test behavior of $ with \r\n at end of input
2484 p = Pattern.compile(".+$");
2485 m = p.matcher("blah1\r\n");
2486 if (!m.find())
2487 failCount++;
2488 if (!m.group().equals("blah1"))
2489 failCount++;
2490 if (m.find())
2491 failCount++;
2492
2493 // Test behavior of $ with \r\n at end of input in multiline
2494 p = Pattern.compile(".+$", Pattern.MULTILINE);
2495 m = p.matcher("blah1\r\n");
2496 if (!m.find())
2497 failCount++;
2498 if (m.find())
2499 failCount++;
2500
2501 // Test for $ recognition of \u0085 for bug 4527731
2502 p = Pattern.compile(".+$", Pattern.MULTILINE);
2503 m = p.matcher("blah1\u0085");
2504 if (!m.find())
2505 failCount++;
2506
2507 // Supplementary character test
2508 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2509 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2510 m.find();
2511 m.find();
2512 if (!m.group().equals(toSupplementaries("blah2")))
2513 failCount++;
2514
2515 m.reset(toSupplementaries("blah1\n\rblah2"));
2516 m.find();
2517 m.find();
2518 m.find();
2519 if (!m.group().equals(toSupplementaries("blah2")))
2520 failCount++;
2521
2522 // Test behavior of $ with \r\n at end of input
2523 p = Pattern.compile(".+$");
2524 m = p.matcher(toSupplementaries("blah1\r\n"));
2525 if (!m.find())
2526 failCount++;
2527 if (!m.group().equals(toSupplementaries("blah1")))
2528 failCount++;
2529 if (m.find())
2530 failCount++;
2531
2532 // Test behavior of $ with \r\n at end of input in multiline
2533 p = Pattern.compile(".+$", Pattern.MULTILINE);
2534 m = p.matcher(toSupplementaries("blah1\r\n"));
2535 if (!m.find())
2536 failCount++;
2537 if (m.find())
2538 failCount++;
2539
2540 // Test for $ recognition of \u0085 for bug 4527731
2541 p = Pattern.compile(".+$", Pattern.MULTILINE);
2542 m = p.matcher(toSupplementaries("blah1\u0085"));
2543 if (!m.find())
2544 failCount++;
2545
2546 report("Anchors");
2547 }
2548
2549 /**
2550 * A basic sanity test of Matcher.lookingAt().
2551 */
2552 private static void lookingAtTest() throws Exception {
2553 Pattern p = Pattern.compile("(ab)(c*)");
2554 Matcher m = p.matcher("abccczzzabcczzzabccc");
2555
2556 if (!m.lookingAt())
2557 failCount++;
2558
2559 if (!m.group().equals(m.group(0)))
2560 failCount++;
2561
2562 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2563 if (m.lookingAt())
2564 failCount++;
2565
2566 // Supplementary character test
2567 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2568 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2569
2570 if (!m.lookingAt())
2571 failCount++;
2572
2573 if (!m.group().equals(m.group(0)))
2574 failCount++;
2575
2576 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2577 if (m.lookingAt())
2578 failCount++;
2579
2580 report("Looking At");
2581 }
2582
2583 /**
2584 * A basic sanity test of Matcher.matches().
2585 */
2586 private static void matchesTest() throws Exception {
2587 // matches()
2588 Pattern p = Pattern.compile("ulb(c*)");
2589 Matcher m = p.matcher("ulbcccccc");
2590 if (!m.matches())
2591 failCount++;
2592
2593 // find() but not matches()
2594 m.reset("zzzulbcccccc");
2595 if (m.matches())
2596 failCount++;
2597
2598 // lookingAt() but not matches()
2599 m.reset("ulbccccccdef");
2600 if (m.matches())
2601 failCount++;
2602
2603 // matches()
2604 p = Pattern.compile("a|ad");
2605 m = p.matcher("ad");
2606 if (!m.matches())
2607 failCount++;
2608
2609 // Supplementary character test
2610 // matches()
2611 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2612 m = p.matcher(toSupplementaries("ulbcccccc"));
2613 if (!m.matches())
2614 failCount++;
2615
2616 // find() but not matches()
2617 m.reset(toSupplementaries("zzzulbcccccc"));
2618 if (m.matches())
2619 failCount++;
2620
2621 // lookingAt() but not matches()
2622 m.reset(toSupplementaries("ulbccccccdef"));
2623 if (m.matches())
2624 failCount++;
2625
2626 // matches()
2627 p = Pattern.compile(toSupplementaries("a|ad"));
2628 m = p.matcher(toSupplementaries("ad"));
2629 if (!m.matches())
2630 failCount++;
2631
2632 report("Matches");
2633 }
2634
2635 /**
2636 * A basic sanity test of Pattern.matches().
2637 */
2638 private static void patternMatchesTest() throws Exception {
2639 // matches()
2640 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2641 toSupplementaries("ulbcccccc")))
2642 failCount++;
2643
2644 // find() but not matches()
2645 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2646 toSupplementaries("zzzulbcccccc")))
2647 failCount++;
2648
2649 // lookingAt() but not matches()
2650 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2651 toSupplementaries("ulbccccccdef")))
2652 failCount++;
2653
2654 // Supplementary character test
2655 // matches()
2656 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2657 toSupplementaries("ulbcccccc")))
2658 failCount++;
2659
2660 // find() but not matches()
2661 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2662 toSupplementaries("zzzulbcccccc")))
2663 failCount++;
2664
2665 // lookingAt() but not matches()
2666 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2667 toSupplementaries("ulbccccccdef")))
2668 failCount++;
2669
2670 report("Pattern Matches");
2671 }
2672
2673 /**
2674 * Canonical equivalence testing. Tests the ability of the engine
2675 * to match sequences that are not explicitly specified in the
2676 * pattern when they are considered equivalent by the Unicode Standard.
2677 */
2678 private static void ceTest() throws Exception {
2679 // Decomposed char outside char classes
2680 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2681 Matcher m = p.matcher("test\u00e5");
2682 if (!m.matches())
2683 failCount++;
2684
2685 m.reset("testa\u030a");
2686 if (!m.matches())
2687 failCount++;
2688
2689 // Composed char outside char classes
2690 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2691 m = p.matcher("test\u00e5");
2692 if (!m.matches())
2693 failCount++;
2694
2695 m.reset("testa\u030a");
2696 if (!m.find())
2697 failCount++;
2698
2699 // Decomposed char inside a char class
2700 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2701 m = p.matcher("test\u00e5");
2702 if (!m.find())
2703 failCount++;
2704
2705 m.reset("testa\u030a");
2706 if (!m.find())
2707 failCount++;
2708
2709 // Composed char inside a char class
2710 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2711 m = p.matcher("test\u00e5");
2712 if (!m.find())
2713 failCount++;
2714
2715 m.reset("testa\u0300");
2716 if (!m.find())
2717 failCount++;
2718
2719 m.reset("testa\u030a");
2720 if (!m.find())
2721 failCount++;
2722
2723 // Marks that cannot legally change order and be equivalent
2724 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2725 check(p, "testa\u0308\u0300", true);
2726 check(p, "testa\u0300\u0308", false);
2727
2728 // Marks that can legally change order and be equivalent
2729 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2730 check(p, "testa\u0308\u0323", true);
2731 check(p, "testa\u0323\u0308", true);
2732
2733 // Test all equivalences of the sequence a\u0308\u0323\u0300
2734 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2735 check(p, "testa\u0308\u0323\u0300", true);
2736 check(p, "testa\u0323\u0308\u0300", true);
2737 check(p, "testa\u0308\u0300\u0323", true);
2738 check(p, "test\u00e4\u0323\u0300", true);
2739 check(p, "test\u00e4\u0300\u0323", true);
2740
2741 Object[][] data = new Object[][] {
2742
2743 // JDK-4867170
2744 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true },
2745 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true },
2746 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true },
2747 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2748 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2749 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true },
2750 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true },
2751
2752 { "\\p{IsGreek}", "ab\u1f80cd", "f", true },
2753 { "\\p{IsGreek}", "ab\u1f81cd", "f", true },
2754 { "\\p{IsGreek}", "ab\u1f82cd", "f", true },
2755 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true },
2756 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true },
2757
2758 // backtracking, force to match "\u1f80", instead of \u1f82"
2759 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2760
2761 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true },
2762 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true },
2763
2764 { "[^\u1f80-\u1f82]","\u1f81", "m", false },
2765 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false },
2766 { "[^\u1f01\u0345]", "\u1f81", "f", false },
2767
2768 { "[^\u1f81]+", "\u1f80\u1f82", "f", true },
2769 { "[\u1f80]", "ab\u1f80cd", "f", true },
2770 { "\u1f80", "ab\u1f80cd", "f", true },
2771 { "\u1f00\u0345\u0300", "\u1f82", "m", true },
2772 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true },
2773 { "\u1f82", "\u1f00\u0345\u0300", "m", true },
2774 { "\u1f82", "\u1f80\u0300", "m", true },
2775
2776 // JDK-7080302 # compile failed
2777 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2778
2779 // JDK-6728861, same cause as above one
2780 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2781
2782 // JDK-6995635
2783 { "(\u00e9)", "e\u0301", "m", true },
2784
2785 // JDK-6736245
2786 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2787 { "\u2ADC", "\u2ADC", "m", true}, // NFC
2788 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD
2789
2790 // 4916384.
2791 // Decomposed hangul (jamos) works inside clazz
2792 { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2793 { "[\u1100\u1161]", "\uac00", "m", true},
2794
2795 { "[\uac00]", "\u1100\u1161", "m", true},
2796 { "[\uac00]", "\uac00", "m", true},
2797
2798 // Decomposed hangul (jamos)
2799 { "\u1100\u1161", "\u1100\u1161", "m", true},
2800 { "\u1100\u1161", "\uac00", "m", true},
2801
2802 // Composed hangul
2803 { "\uac00", "\u1100\u1161", "m", true },
2804 { "\uac00", "\uac00", "m", true },
2805
2806 /* Need a NFDSlice to nfd the source to solve this issue
2807 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2808 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165>
2809 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2810
2811 // Decomposed supplementary outside char classes
2812 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2813 // Composed supplementary outside char classes
2814 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2815 */
2816 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2817 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2818
2819 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true },
2820 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2821 };
2822
2823 int failCount = 0;
2824 for (Object[] d : data) {
2825 String pn = (String)d[0];
2826 String tt = (String)d[1];
2827 boolean isFind = "f".equals(((String)d[2]));
2828 boolean expected = (boolean)d[3];
2829 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2830 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2831 if (ret != expected) {
2832 failCount++;
2833 continue;
2834 }
2835 }
2836 report("Canonical Equivalence");
2837 }
2838
2839 /**
2840 * A basic sanity test of Matcher.replaceAll().
2841 */
2842 private static void globalSubstitute() throws Exception {
2843 // Global substitution with a literal
2844 Pattern p = Pattern.compile("(ab)(c*)");
2845 Matcher m = p.matcher("abccczzzabcczzzabccc");
2846 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2847 failCount++;
2848
2849 m.reset("zzzabccczzzabcczzzabccczzz");
2850 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2851 failCount++;
2852
2853 // Global substitution with groups
2854 m.reset("zzzabccczzzabcczzzabccczzz");
2855 String result = m.replaceAll("$1");
2856 if (!result.equals("zzzabzzzabzzzabzzz"))
2857 failCount++;
2858
2859 // Supplementary character test
2860 // Global substitution with a literal
2861 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2862 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2863 if (!m.replaceAll(toSupplementaries("test")).
2864 equals(toSupplementaries("testzzztestzzztest")))
2865 failCount++;
2866
2867 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2868 if (!m.replaceAll(toSupplementaries("test")).
2869 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2870 failCount++;
2871
2872 // Global substitution with groups
2873 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2874 result = m.replaceAll("$1");
2875 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2876 failCount++;
2877
2878 report("Global Substitution");
2879 }
2880
2881 /**
2882 * Tests the usage of Matcher.appendReplacement() with literal
2883 * and group substitutions.
2884 */
2885 private static void stringbufferSubstitute() throws Exception {
2886 // SB substitution with literal
2887 String blah = "zzzblahzzz";
2888 Pattern p = Pattern.compile("blah");
2889 Matcher m = p.matcher(blah);
2890 StringBuffer result = new StringBuffer();
2891 try {
2892 m.appendReplacement(result, "blech");
2893 failCount++;
2894 } catch (IllegalStateException e) {
2895 }
2896 m.find();
2897 m.appendReplacement(result, "blech");
2898 if (!result.toString().equals("zzzblech"))
2899 failCount++;
2900
2901 m.appendTail(result);
2902 if (!result.toString().equals("zzzblechzzz"))
2903 failCount++;
2904
2905 // SB substitution with groups
2906 blah = "zzzabcdzzz";
2907 p = Pattern.compile("(ab)(cd)*");
2908 m = p.matcher(blah);
2909 result = new StringBuffer();
2910 try {
2911 m.appendReplacement(result, "$1");
2912 failCount++;
2913 } catch (IllegalStateException e) {
2914 }
2915 m.find();
2916 m.appendReplacement(result, "$1");
2917 if (!result.toString().equals("zzzab"))
2918 failCount++;
2919
2920 m.appendTail(result);
2921 if (!result.toString().equals("zzzabzzz"))
2922 failCount++;
2923
2924 // SB substitution with 3 groups
2925 blah = "zzzabcdcdefzzz";
2926 p = Pattern.compile("(ab)(cd)*(ef)");
2927 m = p.matcher(blah);
2928 result = new StringBuffer();
2929 try {
2930 m.appendReplacement(result, "$1w$2w$3");
2931 failCount++;
2932 } catch (IllegalStateException e) {
2933 }
2934 m.find();
2935 m.appendReplacement(result, "$1w$2w$3");
2936 if (!result.toString().equals("zzzabwcdwef"))
2937 failCount++;
2938
2939 m.appendTail(result);
2940 if (!result.toString().equals("zzzabwcdwefzzz"))
2941 failCount++;
2942
2943 // SB substitution with groups and three matches
2944 // skipping middle match
2945 blah = "zzzabcdzzzabcddzzzabcdzzz";
2946 p = Pattern.compile("(ab)(cd*)");
2947 m = p.matcher(blah);
2948 result = new StringBuffer();
2949 try {
2950 m.appendReplacement(result, "$1");
2951 failCount++;
2952 } catch (IllegalStateException e) {
2953 }
2954 m.find();
2955 m.appendReplacement(result, "$1");
2956 if (!result.toString().equals("zzzab"))
2957 failCount++;
2958
2959 m.find();
2960 m.find();
2961 m.appendReplacement(result, "$2");
2962 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2963 failCount++;
2964
2965 m.appendTail(result);
2966 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2967 failCount++;
2968
2969 // Check to make sure escaped $ is ignored
2970 blah = "zzzabcdcdefzzz";
2971 p = Pattern.compile("(ab)(cd)*(ef)");
2972 m = p.matcher(blah);
2973 result = new StringBuffer();
2974 m.find();
2975 m.appendReplacement(result, "$1w\\$2w$3");
2976 if (!result.toString().equals("zzzabw$2wef"))
2977 failCount++;
2978
2979 m.appendTail(result);
2980 if (!result.toString().equals("zzzabw$2wefzzz"))
2981 failCount++;
2982
2983 // Check to make sure a reference to nonexistent group causes error
2984 blah = "zzzabcdcdefzzz";
2985 p = Pattern.compile("(ab)(cd)*(ef)");
2986 m = p.matcher(blah);
2987 result = new StringBuffer();
2988 m.find();
2989 try {
2990 m.appendReplacement(result, "$1w$5w$3");
2991 failCount++;
2992 } catch (IndexOutOfBoundsException ioobe) {
2993 // Correct result
2994 }
2995
2996 // Check double digit group references
2997 blah = "zzz123456789101112zzz";
2998 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2999 m = p.matcher(blah);
3000 result = new StringBuffer();
3001 m.find();
3002 m.appendReplacement(result, "$1w$11w$3");
3003 if (!result.toString().equals("zzz1w11w3"))
3004 failCount++;
3005
3006 // Check to make sure it backs off $15 to $1 if only three groups
3007 blah = "zzzabcdcdefzzz";
3008 p = Pattern.compile("(ab)(cd)*(ef)");
3009 m = p.matcher(blah);
3010 result = new StringBuffer();
3011 m.find();
3012 m.appendReplacement(result, "$1w$15w$3");
3013 if (!result.toString().equals("zzzabwab5wef"))
3014 failCount++;
3015
3016
3017 // Supplementary character test
3018 // SB substitution with literal
3019 blah = toSupplementaries("zzzblahzzz");
3020 p = Pattern.compile(toSupplementaries("blah"));
3021 m = p.matcher(blah);
3022 result = new StringBuffer();
3023 try {
3024 m.appendReplacement(result, toSupplementaries("blech"));
3025 failCount++;
3026 } catch (IllegalStateException e) {
3027 }
3028 m.find();
3029 m.appendReplacement(result, toSupplementaries("blech"));
3030 if (!result.toString().equals(toSupplementaries("zzzblech")))
3031 failCount++;
3032
3033 m.appendTail(result);
3034 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3035 failCount++;
3036
3037 // SB substitution with groups
3038 blah = toSupplementaries("zzzabcdzzz");
3039 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3040 m = p.matcher(blah);
3041 result = new StringBuffer();
3042 try {
3043 m.appendReplacement(result, "$1");
3044 failCount++;
3045 } catch (IllegalStateException e) {
3046 }
3047 m.find();
3048 m.appendReplacement(result, "$1");
3049 if (!result.toString().equals(toSupplementaries("zzzab")))
3050 failCount++;
3051
3052 m.appendTail(result);
3053 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3054 failCount++;
3055
3056 // SB substitution with 3 groups
3057 blah = toSupplementaries("zzzabcdcdefzzz");
3058 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3059 m = p.matcher(blah);
3060 result = new StringBuffer();
3061 try {
3062 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3063 failCount++;
3064 } catch (IllegalStateException e) {
3065 }
3066 m.find();
3067 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3068 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3069 failCount++;
3070
3071 m.appendTail(result);
3072 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3073 failCount++;
3074
3075 // SB substitution with groups and three matches
3076 // skipping middle match
3077 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3078 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3079 m = p.matcher(blah);
3080 result = new StringBuffer();
3081 try {
3082 m.appendReplacement(result, "$1");
3083 failCount++;
3084 } catch (IllegalStateException e) {
3085 }
3086 m.find();
3087 m.appendReplacement(result, "$1");
3088 if (!result.toString().equals(toSupplementaries("zzzab")))
3089 failCount++;
3090
3091 m.find();
3092 m.find();
3093 m.appendReplacement(result, "$2");
3094 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3095 failCount++;
3096
3097 m.appendTail(result);
3098 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3099 failCount++;
3100
3101 // Check to make sure escaped $ is ignored
3102 blah = toSupplementaries("zzzabcdcdefzzz");
3103 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3104 m = p.matcher(blah);
3105 result = new StringBuffer();
3106 m.find();
3107 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3108 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3109 failCount++;
3110
3111 m.appendTail(result);
3112 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3113 failCount++;
3114
3115 // Check to make sure a reference to nonexistent group causes error
3116 blah = toSupplementaries("zzzabcdcdefzzz");
3117 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3118 m = p.matcher(blah);
3119 result = new StringBuffer();
3120 m.find();
3121 try {
3122 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3123 failCount++;
3124 } catch (IndexOutOfBoundsException ioobe) {
3125 // Correct result
3126 }
3127
3128 // Check double digit group references
3129 blah = toSupplementaries("zzz123456789101112zzz");
3130 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3131 m = p.matcher(blah);
3132 result = new StringBuffer();
3133 m.find();
3134 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3135 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3136 failCount++;
3137
3138 // Check to make sure it backs off $15 to $1 if only three groups
3139 blah = toSupplementaries("zzzabcdcdefzzz");
3140 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3141 m = p.matcher(blah);
3142 result = new StringBuffer();
3143 m.find();
3144 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3145 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3146 failCount++;
3147
3148 // Check nothing has been appended into the output buffer if
3149 // the replacement string triggers IllegalArgumentException.
3150 p = Pattern.compile("(abc)");
3151 m = p.matcher("abcd");
3152 result = new StringBuffer();
3153 m.find();
3154 try {
3155 m.appendReplacement(result, ("xyz$g"));
3156 failCount++;
3157 } catch (IllegalArgumentException iae) {
3158 if (result.length() != 0)
3159 failCount++;
3160 }
3161
3162 report("SB Substitution");
3163 }
3164
3165 /**
3166 * Tests the usage of Matcher.appendReplacement() with literal
3167 * and group substitutions.
3168 */
3169 private static void stringbuilderSubstitute() throws Exception {
3170 // SB substitution with literal
3171 String blah = "zzzblahzzz";
3172 Pattern p = Pattern.compile("blah");
3173 Matcher m = p.matcher(blah);
3174 StringBuilder result = new StringBuilder();
3175 try {
3176 m.appendReplacement(result, "blech");
3177 failCount++;
3178 } catch (IllegalStateException e) {
3179 }
3180 m.find();
3181 m.appendReplacement(result, "blech");
3182 if (!result.toString().equals("zzzblech"))
3183 failCount++;
3184
3185 m.appendTail(result);
3186 if (!result.toString().equals("zzzblechzzz"))
3187 failCount++;
3188
3189 // SB substitution with groups
3190 blah = "zzzabcdzzz";
3191 p = Pattern.compile("(ab)(cd)*");
3192 m = p.matcher(blah);
3193 result = new StringBuilder();
3194 try {
3195 m.appendReplacement(result, "$1");
3196 failCount++;
3197 } catch (IllegalStateException e) {
3198 }
3199 m.find();
3200 m.appendReplacement(result, "$1");
3201 if (!result.toString().equals("zzzab"))
3202 failCount++;
3203
3204 m.appendTail(result);
3205 if (!result.toString().equals("zzzabzzz"))
3206 failCount++;
3207
3208 // SB substitution with 3 groups
3209 blah = "zzzabcdcdefzzz";
3210 p = Pattern.compile("(ab)(cd)*(ef)");
3211 m = p.matcher(blah);
3212 result = new StringBuilder();
3213 try {
3214 m.appendReplacement(result, "$1w$2w$3");
3215 failCount++;
3216 } catch (IllegalStateException e) {
3217 }
3218 m.find();
3219 m.appendReplacement(result, "$1w$2w$3");
3220 if (!result.toString().equals("zzzabwcdwef"))
3221 failCount++;
3222
3223 m.appendTail(result);
3224 if (!result.toString().equals("zzzabwcdwefzzz"))
3225 failCount++;
3226
3227 // SB substitution with groups and three matches
3228 // skipping middle match
3229 blah = "zzzabcdzzzabcddzzzabcdzzz";
3230 p = Pattern.compile("(ab)(cd*)");
3231 m = p.matcher(blah);
3232 result = new StringBuilder();
3233 try {
3234 m.appendReplacement(result, "$1");
3235 failCount++;
3236 } catch (IllegalStateException e) {
3237 }
3238 m.find();
3239 m.appendReplacement(result, "$1");
3240 if (!result.toString().equals("zzzab"))
3241 failCount++;
3242
3243 m.find();
3244 m.find();
3245 m.appendReplacement(result, "$2");
3246 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3247 failCount++;
3248
3249 m.appendTail(result);
3250 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3251 failCount++;
3252
3253 // Check to make sure escaped $ is ignored
3254 blah = "zzzabcdcdefzzz";
3255 p = Pattern.compile("(ab)(cd)*(ef)");
3256 m = p.matcher(blah);
3257 result = new StringBuilder();
3258 m.find();
3259 m.appendReplacement(result, "$1w\\$2w$3");
3260 if (!result.toString().equals("zzzabw$2wef"))
3261 failCount++;
3262
3263 m.appendTail(result);
3264 if (!result.toString().equals("zzzabw$2wefzzz"))
3265 failCount++;
3266
3267 // Check to make sure a reference to nonexistent group causes error
3268 blah = "zzzabcdcdefzzz";
3269 p = Pattern.compile("(ab)(cd)*(ef)");
3270 m = p.matcher(blah);
3271 result = new StringBuilder();
3272 m.find();
3273 try {
3274 m.appendReplacement(result, "$1w$5w$3");
3275 failCount++;
3276 } catch (IndexOutOfBoundsException ioobe) {
3277 // Correct result
3278 }
3279
3280 // Check double digit group references
3281 blah = "zzz123456789101112zzz";
3282 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3283 m = p.matcher(blah);
3284 result = new StringBuilder();
3285 m.find();
3286 m.appendReplacement(result, "$1w$11w$3");
3287 if (!result.toString().equals("zzz1w11w3"))
3288 failCount++;
3289
3290 // Check to make sure it backs off $15 to $1 if only three groups
3291 blah = "zzzabcdcdefzzz";
3292 p = Pattern.compile("(ab)(cd)*(ef)");
3293 m = p.matcher(blah);
3294 result = new StringBuilder();
3295 m.find();
3296 m.appendReplacement(result, "$1w$15w$3");
3297 if (!result.toString().equals("zzzabwab5wef"))
3298 failCount++;
3299
3300
3301 // Supplementary character test
3302 // SB substitution with literal
3303 blah = toSupplementaries("zzzblahzzz");
3304 p = Pattern.compile(toSupplementaries("blah"));
3305 m = p.matcher(blah);
3306 result = new StringBuilder();
3307 try {
3308 m.appendReplacement(result, toSupplementaries("blech"));
3309 failCount++;
3310 } catch (IllegalStateException e) {
3311 }
3312 m.find();
3313 m.appendReplacement(result, toSupplementaries("blech"));
3314 if (!result.toString().equals(toSupplementaries("zzzblech")))
3315 failCount++;
3316 m.appendTail(result);
3317 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3318 failCount++;
3319
3320 // SB substitution with groups
3321 blah = toSupplementaries("zzzabcdzzz");
3322 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3323 m = p.matcher(blah);
3324 result = new StringBuilder();
3325 try {
3326 m.appendReplacement(result, "$1");
3327 failCount++;
3328 } catch (IllegalStateException e) {
3329 }
3330 m.find();
3331 m.appendReplacement(result, "$1");
3332 if (!result.toString().equals(toSupplementaries("zzzab")))
3333 failCount++;
3334
3335 m.appendTail(result);
3336 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3337 failCount++;
3338
3339 // SB substitution with 3 groups
3340 blah = toSupplementaries("zzzabcdcdefzzz");
3341 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3342 m = p.matcher(blah);
3343 result = new StringBuilder();
3344 try {
3345 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3346 failCount++;
3347 } catch (IllegalStateException e) {
3348 }
3349 m.find();
3350 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3351 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3352 failCount++;
3353
3354 m.appendTail(result);
3355 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3356 failCount++;
3357
3358 // SB substitution with groups and three matches
3359 // skipping middle match
3360 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3361 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3362 m = p.matcher(blah);
3363 result = new StringBuilder();
3364 try {
3365 m.appendReplacement(result, "$1");
3366 failCount++;
3367 } catch (IllegalStateException e) {
3368 }
3369 m.find();
3370 m.appendReplacement(result, "$1");
3371 if (!result.toString().equals(toSupplementaries("zzzab")))
3372 failCount++;
3373
3374 m.find();
3375 m.find();
3376 m.appendReplacement(result, "$2");
3377 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3378 failCount++;
3379
3380 m.appendTail(result);
3381 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3382 failCount++;
3383
3384 // Check to make sure escaped $ is ignored
3385 blah = toSupplementaries("zzzabcdcdefzzz");
3386 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3387 m = p.matcher(blah);
3388 result = new StringBuilder();
3389 m.find();
3390 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3391 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3392 failCount++;
3393
3394 m.appendTail(result);
3395 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3396 failCount++;
3397
3398 // Check to make sure a reference to nonexistent group causes error
3399 blah = toSupplementaries("zzzabcdcdefzzz");
3400 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3401 m = p.matcher(blah);
3402 result = new StringBuilder();
3403 m.find();
3404 try {
3405 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3406 failCount++;
3407 } catch (IndexOutOfBoundsException ioobe) {
3408 // Correct result
3409 }
3410 // Check double digit group references
3411 blah = toSupplementaries("zzz123456789101112zzz");
3412 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3413 m = p.matcher(blah);
3414 result = new StringBuilder();
3415 m.find();
3416 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3417 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3418 failCount++;
3419
3420 // Check to make sure it backs off $15 to $1 if only three groups
3421 blah = toSupplementaries("zzzabcdcdefzzz");
3422 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3423 m = p.matcher(blah);
3424 result = new StringBuilder();
3425 m.find();
3426 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3427 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3428 failCount++;
3429 // Check nothing has been appended into the output buffer if
3430 // the replacement string triggers IllegalArgumentException.
3431 p = Pattern.compile("(abc)");
3432 m = p.matcher("abcd");
3433 result = new StringBuilder();
3434 m.find();
3435 try {
3436 m.appendReplacement(result, ("xyz$g"));
3437 failCount++;
3438 } catch (IllegalArgumentException iae) {
3439 if (result.length() != 0)
3440 failCount++;
3441 }
3442 report("SB Substitution 2");
3443 }
3444
3445 /*
3446 * 5 groups of characters are created to make a substitution string.
3447 * A base string will be created including random lead chars, the
3448 * substitution string, and random trailing chars.
3449 * A pattern containing the 5 groups is searched for and replaced with:
3450 * random group + random string + random group.
3451 * The results are checked for correctness.
3452 */
3453 private static void substitutionBasher() {
3454 for (int runs = 0; runs<1000; runs++) {
3455 // Create a base string to work in
3456 int leadingChars = generator.nextInt(10);
3457 StringBuffer baseBuffer = new StringBuffer(100);
3458 String leadingString = getRandomAlphaString(leadingChars);
3459 baseBuffer.append(leadingString);
3460
3461 // Create 5 groups of random number of random chars
3462 // Create the string to substitute
3463 // Create the pattern string to search for
3464 StringBuffer bufferToSub = new StringBuffer(25);
3465 StringBuffer bufferToPat = new StringBuffer(50);
3466 String[] groups = new String[5];
3467 for(int i=0; i<5; i++) {
3468 int aGroupSize = generator.nextInt(5)+1;
3469 groups[i] = getRandomAlphaString(aGroupSize);
3470 bufferToSub.append(groups[i]);
3471 bufferToPat.append('(');
3472 bufferToPat.append(groups[i]);
3473 bufferToPat.append(')');
3474 }
3475 String stringToSub = bufferToSub.toString();
3476 String pattern = bufferToPat.toString();
3477
3478 // Place sub string into working string at random index
3479 baseBuffer.append(stringToSub);
3480
3481 // Append random chars to end
3482 int trailingChars = generator.nextInt(10);
3483 String trailingString = getRandomAlphaString(trailingChars);
3484 baseBuffer.append(trailingString);
3485 String baseString = baseBuffer.toString();
3486
3487 // Create test pattern and matcher
3488 Pattern p = Pattern.compile(pattern);
3489 Matcher m = p.matcher(baseString);
3490
3491 // Reject candidate if pattern happens to start early
3492 m.find();
3493 if (m.start() < leadingChars)
3494 continue;
3495
3496 // Reject candidate if more than one match
3497 if (m.find())
3498 continue;
3499
3500 // Construct a replacement string with :
3501 // random group + random string + random group
3502 StringBuffer bufferToRep = new StringBuffer();
3503 int groupIndex1 = generator.nextInt(5);
3504 bufferToRep.append("$" + (groupIndex1 + 1));
3505 String randomMidString = getRandomAlphaString(5);
3506 bufferToRep.append(randomMidString);
3507 int groupIndex2 = generator.nextInt(5);
3508 bufferToRep.append("$" + (groupIndex2 + 1));
3509 String replacement = bufferToRep.toString();
3510
3511 // Do the replacement
3512 String result = m.replaceAll(replacement);
3513
3514 // Construct expected result
3515 StringBuffer bufferToRes = new StringBuffer();
3516 bufferToRes.append(leadingString);
3517 bufferToRes.append(groups[groupIndex1]);
3518 bufferToRes.append(randomMidString);
3519 bufferToRes.append(groups[groupIndex2]);
3520 bufferToRes.append(trailingString);
3521 String expectedResult = bufferToRes.toString();
3522
3523 // Check results
3524 if (!result.equals(expectedResult))
3525 failCount++;
3526 }
3527
3528 report("Substitution Basher");
3529 }
3530
3531 /*
3532 * 5 groups of characters are created to make a substitution string.
3533 * A base string will be created including random lead chars, the
3534 * substitution string, and random trailing chars.
3535 * A pattern containing the 5 groups is searched for and replaced with:
3536 * random group + random string + random group.
3537 * The results are checked for correctness.
3538 */
3539 private static void substitutionBasher2() {
3540 for (int runs = 0; runs<1000; runs++) {
3541 // Create a base string to work in
3542 int leadingChars = generator.nextInt(10);
3543 StringBuilder baseBuffer = new StringBuilder(100);
3544 String leadingString = getRandomAlphaString(leadingChars);
3545 baseBuffer.append(leadingString);
3546
3547 // Create 5 groups of random number of random chars
3548 // Create the string to substitute
3549 // Create the pattern string to search for
3550 StringBuilder bufferToSub = new StringBuilder(25);
3551 StringBuilder bufferToPat = new StringBuilder(50);
3552 String[] groups = new String[5];
3553 for(int i=0; i<5; i++) {
3554 int aGroupSize = generator.nextInt(5)+1;
3555 groups[i] = getRandomAlphaString(aGroupSize);
3556 bufferToSub.append(groups[i]);
3557 bufferToPat.append('(');
3558 bufferToPat.append(groups[i]);
3559 bufferToPat.append(')');
3560 }
3561 String stringToSub = bufferToSub.toString();
3562 String pattern = bufferToPat.toString();
3563
3564 // Place sub string into working string at random index
3565 baseBuffer.append(stringToSub);
3566
3567 // Append random chars to end
3568 int trailingChars = generator.nextInt(10);
3569 String trailingString = getRandomAlphaString(trailingChars);
3570 baseBuffer.append(trailingString);
3571 String baseString = baseBuffer.toString();
3572
3573 // Create test pattern and matcher
3574 Pattern p = Pattern.compile(pattern);
3575 Matcher m = p.matcher(baseString);
3576
3577 // Reject candidate if pattern happens to start early
3578 m.find();
3579 if (m.start() < leadingChars)
3580 continue;
3581
3582 // Reject candidate if more than one match
3583 if (m.find())
3584 continue;
3585
3586 // Construct a replacement string with :
3587 // random group + random string + random group
3588 StringBuilder bufferToRep = new StringBuilder();
3589 int groupIndex1 = generator.nextInt(5);
3590 bufferToRep.append("$" + (groupIndex1 + 1));
3591 String randomMidString = getRandomAlphaString(5);
3592 bufferToRep.append(randomMidString);
3593 int groupIndex2 = generator.nextInt(5);
3594 bufferToRep.append("$" + (groupIndex2 + 1));
3595 String replacement = bufferToRep.toString();
3596
3597 // Do the replacement
3598 String result = m.replaceAll(replacement);
3599
3600 // Construct expected result
3601 StringBuilder bufferToRes = new StringBuilder();
3602 bufferToRes.append(leadingString);
3603 bufferToRes.append(groups[groupIndex1]);
3604 bufferToRes.append(randomMidString);
3605 bufferToRes.append(groups[groupIndex2]);
3606 bufferToRes.append(trailingString);
3607 String expectedResult = bufferToRes.toString();
3608
3609 // Check results
3610 if (!result.equals(expectedResult)) {
3611 failCount++;
3612 }
3613 }
3614
3615 report("Substitution Basher 2");
3616 }
3617
3618 /**
3619 * Checks the handling of some escape sequences that the Pattern
3620 * class should process instead of the java compiler. These are
3621 * not in the file because the escapes should be be processed
3622 * by the Pattern class when the regex is compiled.
3623 */
3624 private static void escapes() throws Exception {
3625 Pattern p = Pattern.compile("\\043");
3626 Matcher m = p.matcher("#");
3627 if (!m.find())
3628 failCount++;
3629
3630 p = Pattern.compile("\\x23");
3631 m = p.matcher("#");
3632 if (!m.find())
3633 failCount++;
3634
3635 p = Pattern.compile("\\u0023");
3636 m = p.matcher("#");
3637 if (!m.find())
3638 failCount++;
3639
3640 report("Escape sequences");
3641 }
3642
3643 /**
3644 * Checks the handling of blank input situations. These
3645 * tests are incompatible with my test file format.
3646 */
3647 private static void blankInput() throws Exception {
3648 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3649 Matcher m = p.matcher("");
3650 if (m.find())
3651 failCount++;
3652
3653 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3654 m = p.matcher("");
3655 if (!m.find())
3656 failCount++;
3657
3658 p = Pattern.compile("abc");
3659 m = p.matcher("");
3660 if (m.find())
3661 failCount++;
3662
3663 p = Pattern.compile("a*");
3664 m = p.matcher("");
3665 if (!m.find())
3666 failCount++;
3667
3668 report("Blank input");
3669 }
3670
3671 /**
3672 * Tests the Boyer-Moore pattern matching of a character sequence
3673 * on randomly generated patterns.
3674 */
3675 private static void bm() throws Exception {
3676 doBnM('a');
3677 report("Boyer Moore (ASCII)");
3678
3679 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3680 report("Boyer Moore (Supplementary)");
3681 }
3682
3683 private static void doBnM(int baseCharacter) throws Exception {
3684 int achar=0;
3685
3686 for (int i=0; i<100; i++) {
3687 // Create a short pattern to search for
3688 int patternLength = generator.nextInt(7) + 4;
3689 StringBuffer patternBuffer = new StringBuffer(patternLength);
3690 String pattern;
3691 retry: for (;;) {
3692 for (int x=0; x<patternLength; x++) {
3693 int ch = baseCharacter + generator.nextInt(26);
3694 if (Character.isSupplementaryCodePoint(ch)) {
3695 patternBuffer.append(Character.toChars(ch));
3696 } else {
3697 patternBuffer.append((char)ch);
3698 }
3699 }
3700 pattern = patternBuffer.toString();
3701
3702 // Avoid patterns that start and end with the same substring
3703 // See JDK-6854417
3704 for (int x=1; x < pattern.length(); x++) {
3705 if (pattern.startsWith(pattern.substring(x)))
3706 continue retry;
3707 }
3708 break;
3709 }
3710 Pattern p = Pattern.compile(pattern);
3711
3712 // Create a buffer with random ASCII chars that does
3713 // not match the sample
3714 String toSearch = null;
3715 StringBuffer s = null;
3716 Matcher m = p.matcher("");
3717 do {
3718 s = new StringBuffer(100);
3719 for (int x=0; x<100; x++) {
3720 int ch = baseCharacter + generator.nextInt(26);
3721 if (Character.isSupplementaryCodePoint(ch)) {
3722 s.append(Character.toChars(ch));
3723 } else {
3724 s.append((char)ch);
3725 }
3726 }
3727 toSearch = s.toString();
3728 m.reset(toSearch);
3729 } while (m.find());
3730
3731 // Insert the pattern at a random spot
3732 int insertIndex = generator.nextInt(99);
3733 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3734 insertIndex++;
3735 s = s.insert(insertIndex, pattern);
3736 toSearch = s.toString();
3737
3738 // Make sure that the pattern is found
3739 m.reset(toSearch);
3740 if (!m.find())
3741 failCount++;
3742
3743 // Make sure that the match text is the pattern
3744 if (!m.group().equals(pattern))
3745 failCount++;
3746
3747 // Make sure match occured at insertion point
3748 if (m.start() != insertIndex)
3749 failCount++;
3750 }
3751 }
3752
3753 /**
3754 * Tests the matching of slices on randomly generated patterns.
3755 * The Boyer-Moore optimization is not done on these patterns
3756 * because it uses unicode case folding.
3757 */
3758 private static void slice() throws Exception {
3759 doSlice(Character.MAX_VALUE);
3760 report("Slice");
3761
3762 doSlice(Character.MAX_CODE_POINT);
3763 report("Slice (Supplementary)");
3764 }
3765
3766 private static void doSlice(int maxCharacter) throws Exception {
3767 Random generator = new Random();
3768 int achar=0;
3769
3770 for (int i=0; i<100; i++) {
3771 // Create a short pattern to search for
3772 int patternLength = generator.nextInt(7) + 4;
3773 StringBuffer patternBuffer = new StringBuffer(patternLength);
3774 for (int x=0; x<patternLength; x++) {
3775 int randomChar = 0;
3776 while (!Character.isLetterOrDigit(randomChar))
3777 randomChar = generator.nextInt(maxCharacter);
3778 if (Character.isSupplementaryCodePoint(randomChar)) {
3779 patternBuffer.append(Character.toChars(randomChar));
3780 } else {
3781 patternBuffer.append((char) randomChar);
3782 }
3783 }
3784 String pattern = patternBuffer.toString();
3785 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3786
3787 // Create a buffer with random chars that does not match the sample
3788 String toSearch = null;
3789 StringBuffer s = null;
3790 Matcher m = p.matcher("");
3791 do {
3792 s = new StringBuffer(100);
3793 for (int x=0; x<100; x++) {
3794 int randomChar = 0;
3795 while (!Character.isLetterOrDigit(randomChar))
3796 randomChar = generator.nextInt(maxCharacter);
3797 if (Character.isSupplementaryCodePoint(randomChar)) {
3798 s.append(Character.toChars(randomChar));
3799 } else {
3800 s.append((char) randomChar);
3801 }
3802 }
3803 toSearch = s.toString();
3804 m.reset(toSearch);
3805 } while (m.find());
3806
3807 // Insert the pattern at a random spot
3808 int insertIndex = generator.nextInt(99);
3809 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3810 insertIndex++;
3811 s = s.insert(insertIndex, pattern);
3812 toSearch = s.toString();
3813
3814 // Make sure that the pattern is found
3815 m.reset(toSearch);
3816 if (!m.find())
3817 failCount++;
3818
3819 // Make sure that the match text is the pattern
3820 if (!m.group().equals(pattern))
3821 failCount++;
3822
3823 // Make sure match occured at insertion point
3824 if (m.start() != insertIndex)
3825 failCount++;
3826 }
3827 }
3828
3829 private static void explainFailure(String pattern, String data,
3830 String expected, String actual) {
3831 System.err.println("----------------------------------------");
3832 System.err.println("Pattern = "+pattern);
3833 System.err.println("Data = "+data);
3834 System.err.println("Expected = " + expected);
3835 System.err.println("Actual = " + actual);
3836 }
3837
3838 private static void explainFailure(String pattern, String data,
3839 Throwable t) {
3840 System.err.println("----------------------------------------");
3841 System.err.println("Pattern = "+pattern);
3842 System.err.println("Data = "+data);
3843 t.printStackTrace(System.err);
3844 }
3845
3846 // Testing examples from a file
3847
3848 /**
3849 * Goes through the file "TestCases.txt" and creates many patterns
3850 * described in the file, matching the patterns against input lines in
3851 * the file, and comparing the results against the correct results
3852 * also found in the file. The file format is described in comments
3853 * at the head of the file.
3854 */
3855 private static void processFile(String fileName) throws Exception {
3856 File testCases = new File(System.getProperty("test.src", "."),
3857 fileName);
3858 FileInputStream in = new FileInputStream(testCases);
3859 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3860
3861 // Process next test case.
3862 String aLine;
3863 while((aLine = r.readLine()) != null) {
3864 // Read a line for pattern
3865 String patternString = grabLine(r);
3866 Pattern p = null;
3867 try {
3868 p = compileTestPattern(patternString);
3869 } catch (PatternSyntaxException e) {
3870 String dataString = grabLine(r);
3871 String expectedResult = grabLine(r);
3872 if (expectedResult.startsWith("error"))
3873 continue;
3874 explainFailure(patternString, dataString, e);
3875 failCount++;
3876 continue;
3877 }
3878
3879 // Read a line for input string
3880 String dataString = grabLine(r);
3881 Matcher m = p.matcher(dataString);
3882 StringBuffer result = new StringBuffer();
3883
3884 // Check for IllegalStateExceptions before a match
3885 failCount += preMatchInvariants(m);
3886
3887 boolean found = m.find();
3888
3889 if (found)
3890 failCount += postTrueMatchInvariants(m);
3891 else
3892 failCount += postFalseMatchInvariants(m);
3893
3894 if (found) {
3895 result.append("true ");
3896 result.append(m.group(0) + " ");
3897 } else {
3898 result.append("false ");
3899 }
3900
3901 result.append(m.groupCount());
3902
3903 if (found) {
3904 for (int i=1; i<m.groupCount()+1; i++)
3905 if (m.group(i) != null)
3906 result.append(" " +m.group(i));
3907 }
3908
3909 // Read a line for the expected result
3910 String expectedResult = grabLine(r);
3911
3912 if (!result.toString().equals(expectedResult)) {
3913 explainFailure(patternString, dataString, expectedResult, result.toString());
3914 failCount++;
3915 }
3916 }
3917
3918 report(fileName);
3919 }
3920
3921 private static int preMatchInvariants(Matcher m) {
3922 int failCount = 0;
3923 try {
3924 m.start();
3925 failCount++;
3926 } catch (IllegalStateException ise) {}
3927 try {
3928 m.end();
3929 failCount++;
3930 } catch (IllegalStateException ise) {}
3931 try {
3932 m.group();
3933 failCount++;
3934 } catch (IllegalStateException ise) {}
3935 return failCount;
3936 }
3937
3938 private static int postFalseMatchInvariants(Matcher m) {
3939 int failCount = 0;
3940 try {
3941 m.group();
3942 failCount++;
3943 } catch (IllegalStateException ise) {}
3944 try {
3945 m.start();
3946 failCount++;
3947 } catch (IllegalStateException ise) {}
3948 try {
3949 m.end();
3950 failCount++;
3951 } catch (IllegalStateException ise) {}
3952 return failCount;
3953 }
3954
3955 private static int postTrueMatchInvariants(Matcher m) {
3956 int failCount = 0;
3957 //assert(m.start() = m.start(0);
3958 if (m.start() != m.start(0))
3959 failCount++;
3960 //assert(m.end() = m.end(0);
3961 if (m.start() != m.start(0))
3962 failCount++;
3963 //assert(m.group() = m.group(0);
3964 if (!m.group().equals(m.group(0)))
3965 failCount++;
3966 try {
3967 m.group(50);
3968 failCount++;
3969 } catch (IndexOutOfBoundsException ise) {}
3970
3971 return failCount;
3972 }
3973
3974 private static Pattern compileTestPattern(String patternString) {
3975 if (!patternString.startsWith("'")) {
3976 return Pattern.compile(patternString);
3977 }
3978 int break1 = patternString.lastIndexOf("'");
3979 String flagString = patternString.substring(
3980 break1+1, patternString.length());
3981 patternString = patternString.substring(1, break1);
3982
3983 if (flagString.equals("i"))
3984 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3985
3986 if (flagString.equals("m"))
3987 return Pattern.compile(patternString, Pattern.MULTILINE);
3988
3989 return Pattern.compile(patternString);
3990 }
3991
3992 /**
3993 * Reads a line from the input file. Keeps reading lines until a non
3994 * empty non comment line is read. If the line contains a \n then
3995 * these two characters are replaced by a newline char. If a \\uxxxx
3996 * sequence is read then the sequence is replaced by the unicode char.
3997 */
3998 private static String grabLine(BufferedReader r) throws Exception {
3999 int index = 0;
4000 String line = r.readLine();
4001 while (line.startsWith("//") || line.length() < 1)
4002 line = r.readLine();
4003 while ((index = line.indexOf("\\n")) != -1) {
4004 StringBuffer temp = new StringBuffer(line);
4005 temp.replace(index, index+2, "\n");
4006 line = temp.toString();
4007 }
4008 while ((index = line.indexOf("\\u")) != -1) {
4009 StringBuffer temp = new StringBuffer(line);
4010 String value = temp.substring(index+2, index+6);
4011 char aChar = (char)Integer.parseInt(value, 16);
4012 String unicodeChar = "" + aChar;
4013 temp.replace(index, index+6, unicodeChar);
4014 line = temp.toString();
4015 }
4016
4017 return line;
4018 }
4019
4020 private static void check(Pattern p, String s, String g, String expected) {
4021 Matcher m = p.matcher(s);
4022 m.find();
4023 if (!m.group(g).equals(expected) ||
4024 s.charAt(m.start(g)) != expected.charAt(0) ||
4025 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4026 failCount++;
4027 }
4028
4029 private static void checkReplaceFirst(String p, String s, String r, String expected)
4030 {
4031 if (!expected.equals(Pattern.compile(p)
4032 .matcher(s)
4033 .replaceFirst(r)))
4034 failCount++;
4035 }
4036
4037 private static void checkReplaceAll(String p, String s, String r, String expected)
4038 {
4039 if (!expected.equals(Pattern.compile(p)
4040 .matcher(s)
4041 .replaceAll(r)))
4042 failCount++;
4043 }
4044
4045 private static void checkExpectedFail(String p) {
4046 try {
4047 Pattern.compile(p);
4048 } catch (PatternSyntaxException pse) {
4049 //pse.printStackTrace();
4050 return;
4051 }
4052 failCount++;
4053 }
4054
4055 private static void checkExpectedIAE(Matcher m, String g) {
4056 m.find();
4057 try {
4058 m.group(g);
4059 } catch (IllegalArgumentException x) {
4060 //iae.printStackTrace();
4061 try {
4062 m.start(g);
4063 } catch (IllegalArgumentException xx) {
4064 try {
4065 m.start(g);
4066 } catch (IllegalArgumentException xxx) {
4067 return;
4068 }
4069 }
4070 }
4071 failCount++;
4072 }
4073
4074 private static void checkExpectedNPE(Matcher m) {
4075 m.find();
4076 try {
4077 m.group(null);
4078 } catch (NullPointerException x) {
4079 try {
4080 m.start(null);
4081 } catch (NullPointerException xx) {
4082 try {
4083 m.end(null);
4084 } catch (NullPointerException xxx) {
4085 return;
4086 }
4087 }
4088 }
4089 failCount++;
4090 }
4091
4092 private static void namedGroupCaptureTest() throws Exception {
4093 check(Pattern.compile("x+(?<gname>y+)z+"),
4094 "xxxyyyzzz",
4095 "gname",
4096 "yyy");
4097
4098 check(Pattern.compile("x+(?<gname8>y+)z+"),
4099 "xxxyyyzzz",
4100 "gname8",
4101 "yyy");
4102
4103 //backref
4104 Pattern pattern = Pattern.compile("(a*)bc\\1");
4105 check(pattern, "zzzaabcazzz", true); // found "abca"
4106
4107 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4108 "zzzaabcaazzz", true);
4109
4110 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4111 "abcdefabc", true);
4112
4113 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4114 "abcdefghijkk", true);
4115
4116 // Supplementary character tests
4117 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4118 toSupplementaries("zzzaabcazzz"), true);
4119
4120 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4121 toSupplementaries("zzzaabcaazzz"), true);
4122
4123 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4124 toSupplementaries("abcdefabc"), true);
4125
4126 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4127 "(?<gname>" +
4128 toSupplementaries("k)") + "\\k<gname>"),
4129 toSupplementaries("abcdefghijkk"), true);
4130
4131 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4132 "xxxyyyzzzyyy",
4133 "gname",
4134 "yyy");
4135
4136 //replaceFirst/All
4137 checkReplaceFirst("(?<gn>ab)(c*)",
4138 "abccczzzabcczzzabccc",
4139 "${gn}",
4140 "abzzzabcczzzabccc");
4141
4142 checkReplaceAll("(?<gn>ab)(c*)",
4143 "abccczzzabcczzzabccc",
4144 "${gn}",
4145 "abzzzabzzzab");
4146
4147
4148 checkReplaceFirst("(?<gn>ab)(c*)",
4149 "zzzabccczzzabcczzzabccczzz",
4150 "${gn}",
4151 "zzzabzzzabcczzzabccczzz");
4152
4153 checkReplaceAll("(?<gn>ab)(c*)",
4154 "zzzabccczzzabcczzzabccczzz",
4155 "${gn}",
4156 "zzzabzzzabzzzabzzz");
4157
4158 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4159 "zzzabccczzzabcczzzabccczzz",
4160 "${gn2}",
4161 "zzzccczzzabcczzzabccczzz");
4162
4163 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4164 "zzzabccczzzabcczzzabccczzz",
4165 "${gn2}",
4166 "zzzccczzzcczzzccczzz");
4167
4168 //toSupplementaries("(ab)(c*)"));
4169 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4170 ")(?<gn2>" + toSupplementaries("c") + "*)",
4171 toSupplementaries("abccczzzabcczzzabccc"),
4172 "${gn1}",
4173 toSupplementaries("abzzzabcczzzabccc"));
4174
4175
4176 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4177 ")(?<gn2>" + toSupplementaries("c") + "*)",
4178 toSupplementaries("abccczzzabcczzzabccc"),
4179 "${gn1}",
4180 toSupplementaries("abzzzabzzzab"));
4181
4182 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4183 ")(?<gn2>" + toSupplementaries("c") + "*)",
4184 toSupplementaries("abccczzzabcczzzabccc"),
4185 "${gn2}",
4186 toSupplementaries("ccczzzabcczzzabccc"));
4187
4188
4189 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4190 ")(?<gn2>" + toSupplementaries("c") + "*)",
4191 toSupplementaries("abccczzzabcczzzabccc"),
4192 "${gn2}",
4193 toSupplementaries("ccczzzcczzzccc"));
4194
4195 checkReplaceFirst("(?<dog>Dog)AndCat",
4196 "zzzDogAndCatzzzDogAndCatzzz",
4197 "${dog}",
4198 "zzzDogzzzDogAndCatzzz");
4199
4200
4201 checkReplaceAll("(?<dog>Dog)AndCat",
4202 "zzzDogAndCatzzzDogAndCatzzz",
4203 "${dog}",
4204 "zzzDogzzzDogzzz");
4205
4206 // backref in Matcher & String
4207 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4208 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4209 failCount++;
4210
4211 // negative
4212 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4213 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4214 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4215 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4216 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4217 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4218 "gnameX");
4219 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4220 report("NamedGroupCapture");
4221 }
4222
4223 // This is for bug 6919132
4224 private static void nonBmpClassComplementTest() throws Exception {
4225 Pattern p = Pattern.compile("\\P{Lu}");
4226 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4227
4228 if (m.find() && m.start() == 1)
4229 failCount++;
4230
4231 // from a unicode category
4232 p = Pattern.compile("\\P{Lu}");
4233 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4234 if (m.find())
4235 failCount++;
4236 if (!m.hitEnd())
4237 failCount++;
4238
4239 // block
4240 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4241 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4242 if (m.find() && m.start() == 1)
4243 failCount++;
4244
4245 p = Pattern.compile("\\P{sc=GRANTHA}");
4246 m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4247 if (m.find() && m.start() == 1)
4248 failCount++;
4249
4250 report("NonBmpClassComplement");
4251 }
4252
4253 private static void unicodePropertiesTest() throws Exception {
4254 // different forms
4255 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4256 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4257 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4258 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4259 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4260 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4261 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4262 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4263 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4264 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4265 failCount++;
4266
4267 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4268 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4269 Matcher lastSM = common;
4270 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4271
4272 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
4273 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
4274 Matcher lastBM = latin;
4275 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4276
4277 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4278 if (cp >= 0x30000 && (cp & 0x70) == 0){
4279 continue; // only pick couple code points, they are the same
4280 }
4281
4282 // Unicode Script
4283 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4284 Matcher m;
4285 String str = new String(Character.toChars(cp));
4286 if (script == lastScript) {
4287 m = lastSM;
4288 m.reset(str);
4289 } else {
4290 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4291 }
4292 if (!m.matches()) {
4293 failCount++;
4294 }
4295 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4296 other.reset(str);
4297 if (other.matches()) {
4298 failCount++;
4299 }
4300 lastSM = m;
4301 lastScript = script;
4302
4303 // Unicode Block
4304 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4305 if (block == null) {
4306 //System.out.printf("Not a Block: cp=%x%n", cp);
4307 continue;
4308 }
4309 if (block == lastBlock) {
4310 m = lastBM;
4311 m.reset(str);
4312 } else {
4313 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4314 }
4315 if (!m.matches()) {
4316 failCount++;
4317 }
4318 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4319 other.reset(str);
4320 if (other.matches()) {
4321 failCount++;
4322 }
4323 lastBM = m;
4324 lastBlock = block;
4325 }
4326 report("unicodeProperties");
4327 }
4328
4329 private static void unicodeHexNotationTest() throws Exception {
4330
4331 // negative
4332 checkExpectedFail("\\x{-23}");
4333 checkExpectedFail("\\x{110000}");
4334 checkExpectedFail("\\x{}");
4335 checkExpectedFail("\\x{AB[ef]");
4336
4337 // codepoint
4338 check("^\\x{1033c}$", "\uD800\uDF3C", true);
4339 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
4340 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
4341 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
4342
4343 // in class
4344 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
4345 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4346 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
4347 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
4348 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
4349 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
4350
4351 for (int cp = 0; cp <= 0x10FFFF; cp++) {
4352 String s = "A" + new String(Character.toChars(cp)) + "B";
4353 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4354 : String.format("\\u%04x\\u%04x",
4355 (int) Character.toChars(cp)[0],
4356 (int) Character.toChars(cp)[1]);
4357 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4358 if (!Pattern.matches("A" + hexUTF16 + "B", s))
4359 failCount++;
4360 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4361 failCount++;
4362 if (!Pattern.matches("A" + hexCodePoint + "B", s))
4363 failCount++;
4364 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4365 failCount++;
4366 }
4367 report("unicodeHexNotation");
4368 }
4369
4370 private static void unicodeClassesTest() throws Exception {
4371
4372 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
4373 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
4374 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
4375 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
4376 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
4377 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
4378 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
4379 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
4380 Matcher print = Pattern.compile("\\p{Print}").matcher("");
4381 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
4382 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
4383 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4384 Matcher space = Pattern.compile("\\p{Space}").matcher("");
4385 Matcher bound = Pattern.compile("\\b").matcher("");
4386 Matcher word = Pattern.compile("\\w++").matcher("");
4387 // UNICODE_CHARACTER_CLASS
4388 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4389 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4390 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4391 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4392 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4393 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4394 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4395 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4396 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4397 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4398 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4399 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4400 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4401 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4402 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4403 // embedded flag (?U)
4404 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4405 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4406 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4407
4408 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
4409 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4410 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4411 // properties
4412 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
4413 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
4414 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
4415 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4416 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4417 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
4418 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
4419 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4420 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4421 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4422 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4423 // javaMethod
4424 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
4425 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
4426 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4427 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
4428 // GC/C
4429 Matcher gcC = Pattern.compile("\\p{C}").matcher("");
4430
4431 for (int cp = 1; cp < 0x30000; cp++) {
4432 String str = new String(Character.toChars(cp));
4433 int type = Character.getType(cp);
4434 if (// lower
4435 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
4436 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4437 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4438 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4439 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4440 // upper
4441 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
4442 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4443 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4444 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4445 // alpha
4446 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
4447 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4448 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4449 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4450 // digit
4451 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
4452 Character.isDigit(cp) != digitU.reset(str).matches() ||
4453 // alnum
4454 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
4455 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4456 // punct
4457 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
4458 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4459 // graph
4460 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
4461 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4462 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4463 // blank
4464 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4465 != blank.reset(str).matches() ||
4466 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4467 // print
4468 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
4469 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4470 // cntrl
4471 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
4472 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4473 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4474 // hexdigit
4475 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
4476 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4477 // space
4478 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
4479 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4480 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4481 // word
4482 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
4483 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4484 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4485 // bwordb
4486 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4487 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4488 // properties
4489 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4490 Character.isLetter(cp) != letterP.reset(str).matches()||
4491 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4492 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4493 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4494 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4495 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4496 // gc_C
4497 (Character.CONTROL == type || Character.FORMAT == type ||
4498 Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4499 Character.UNASSIGNED == type)
4500 != gcC.reset(str).matches()) {
4501 failCount++;
4502 }
4503 }
4504
4505 // bounds/word align
4506 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4507 if (!bwbU.reset("\u0180sherman\u0400").matches())
4508 failCount++;
4509 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4510 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4511 failCount++;
4512 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4513 if (!bwbU.reset("\u0724\u0739\u0724").matches())
4514 failCount++;
4515 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4516 failCount++;
4517 report("unicodePredefinedClasses");
4518 }
4519
4520 private static void unicodeCharacterNameTest() throws Exception {
4521
4522 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4523 if (!Character.isValidCodePoint(cp) ||
4524 Character.getType(cp) == Character.UNASSIGNED)
4525 continue;
4526 String str = new String(Character.toChars(cp));
4527 // single
4528 String p = "\\N{" + Character.getName(cp) + "}";
4529 if (!Pattern.compile(p).matcher(str).matches()) {
4530 failCount++;
4531 }
4532 // class[c]
4533 p = "[\\N{" + Character.getName(cp) + "}]";
4534 if (!Pattern.compile(p).matcher(str).matches()) {
4535 failCount++;
4536 }
4537 }
4538
4539 // range
4540 for (int i = 0; i < 10; i++) {
4541 int start = generator.nextInt(20);
4542 int end = start + generator.nextInt(200);
4543 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4544 String str;
4545 for (int cp = start; cp < end; cp++) {
4546 str = new String(Character.toChars(cp));
4547 if (!Pattern.compile(p).matcher(str).matches()) {
4548 failCount++;
4549 }
4550 }
4551 str = new String(Character.toChars(end + 10));
4552 if (Pattern.compile(p).matcher(str).matches()) {
4553 failCount++;
4554 }
4555 }
4556
4557 // slice
4558 for (int i = 0; i < 10; i++) {
4559 int n = generator.nextInt(256);
4560 int[] buf = new int[n];
4561 StringBuffer sb = new StringBuffer(1024);
4562 for (int j = 0; j < n; j++) {
4563 int cp = generator.nextInt(1000);
4564 if (!Character.isValidCodePoint(cp) ||
4565 Character.getType(cp) == Character.UNASSIGNED)
4566 cp = 0x4e00; // just use 4e00
4567 sb.append("\\N{" + Character.getName(cp) + "}");
4568 buf[j] = cp;
4569 }
4570 String p = sb.toString();
4571 String str = new String(buf, 0, buf.length);
4572 if (!Pattern.compile(p).matcher(str).matches()) {
4573 failCount++;
4574 }
4575 }
4576 report("unicodeCharacterName");
4577 }
4578
4579 private static void horizontalAndVerticalWSTest() throws Exception {
4580 String hws = new String (new char[] {
4581 0x09, 0x20, 0xa0, 0x1680, 0x180e,
4582 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4583 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4584 0x202f, 0x205f, 0x3000 });
4585 String vws = new String (new char[] {
4586 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4587 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4588 !Pattern.compile("[\\h]+").matcher(hws).matches())
4589 failCount++;
4590 if (Pattern.compile("\\H").matcher(hws).find() ||
4591 Pattern.compile("[\\H]").matcher(hws).find())
4592 failCount++;
4593 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4594 !Pattern.compile("[\\v]+").matcher(vws).matches())
4595 failCount++;
4596 if (Pattern.compile("\\V").matcher(vws).find() ||
4597 Pattern.compile("[\\V]").matcher(vws).find())
4598 failCount++;
4599 String prefix = "abcd";
4600 String suffix = "efgh";
4601 String ng = "A";
4602 for (int i = 0; i < hws.length(); i++) {
4603 String c = String.valueOf(hws.charAt(i));
4604 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4605 if (!m.find() || !c.equals(m.group()))
4606 failCount++;
4607 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4608 if (!m.find() || !c.equals(m.group()))
4609 failCount++;
4610
4611 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4612 if (!m.find() || !ng.equals(m.group()))
4613 failCount++;
4614 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4615 if (!m.find() || !ng.equals(m.group()))
4616 failCount++;
4617 }
4618 for (int i = 0; i < vws.length(); i++) {
4619 String c = String.valueOf(vws.charAt(i));
4620 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4621 if (!m.find() || !c.equals(m.group()))
4622 failCount++;
4623 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4624 if (!m.find() || !c.equals(m.group()))
4625 failCount++;
4626
4627 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4628 if (!m.find() || !ng.equals(m.group()))
4629 failCount++;
4630 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4631 if (!m.find() || !ng.equals(m.group()))
4632 failCount++;
4633 }
4634 // \v in range is interpreted as 0x0B. This is the undocumented behavior
4635 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4636 failCount++;
4637 report("horizontalAndVerticalWSTest");
4638 }
4639
4640 private static void linebreakTest() throws Exception {
4641 String linebreaks = new String (new char[] {
4642 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4643 String crnl = "\r\n";
4644 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4645 Pattern.compile("\\R").matcher(crnl).matches() &&
4646 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4647 Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4648 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking
4649 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4650 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4651 failCount++;
4652 }
4653 report("linebreakTest");
4654 }
4655
4656 // #7189363
4657 private static void branchTest() throws Exception {
4658 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4659 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4660 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4661 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4662 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4663 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4664 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4665 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4666 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4667 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4668 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4669 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4670 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4671 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4672 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4673 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4674 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4675 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4676 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4677 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4678 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4679 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4680 failCount++;
4681 report("branchTest");
4682 }
4683
4684 // This test is for 8007395
4685 private static void groupCurlyNotFoundSuppTest() throws Exception {
4686 String input = "test this as \ud83d\ude0d";
4687 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4688 "test(.)*(@[a-zA-Z.]+)",
4689 "test([^B])+(@[a-zA-Z.]+)",
4690 "test([^B])*(@[a-zA-Z.]+)",
4691 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4692 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4693 }) {
4694 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4695 .matcher(input);
4696 try {
4697 if (m.find()) {
4698 failCount++;
4699 }
4700 } catch (Exception x) {
4701 failCount++;
4702 }
4703 }
4704 report("GroupCurly NotFoundSupp");
4705 }
4706
4707 // This test is for 8023647
4708 private static void groupCurlyBackoffTest() throws Exception {
4709 if (!"abc1c".matches("(\\w)+1\\1") ||
4710 "abc11".matches("(\\w)+1\\1")) {
4711 failCount++;
4712 }
4713 report("GroupCurly backoff");
4714 }
4715
4716 // This test is for 8012646
4717 private static void patternAsPredicate() throws Exception {
4718 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4719
4720 if (p.test("")) {
4721 failCount++;
4722 }
4723 if (!p.test("word")) {
4724 failCount++;
4725 }
4726 if (p.test("1234")) {
4727 failCount++;
4728 }
4729 if (!p.test("word1234")) {
4730 failCount++;
4731 }
4732 report("Pattern.asPredicate");
4733 }
4734
4735 // This test is for 8184692
4736 private static void patternAsMatchPredicate() throws Exception {
4737 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4738
4739 if (p.test("")) {
4740 failCount++;
4741 }
4742 if (!p.test("word")) {
4743 failCount++;
4744 }
4745 if (p.test("1234word")) {
4746 failCount++;
4747 }
4748 if (p.test("1234")) {
4749 failCount++;
4750 }
4751 report("Pattern.asMatchPredicate");
4752 }
4753
4754
4755 // This test is for 8035975
4756 private static void invalidFlags() throws Exception {
4757 for (int flag = 1; flag != 0; flag <<= 1) {
4758 switch (flag) {
4759 case Pattern.CASE_INSENSITIVE:
4760 case Pattern.MULTILINE:
4761 case Pattern.DOTALL:
4762 case Pattern.UNICODE_CASE:
4763 case Pattern.CANON_EQ:
4764 case Pattern.UNIX_LINES:
4765 case Pattern.LITERAL:
4766 case Pattern.UNICODE_CHARACTER_CLASS:
4767 case Pattern.COMMENTS:
4768 // valid flag, continue
4769 break;
4770 default:
4771 try {
4772 Pattern.compile(".", flag);
4773 failCount++;
4774 } catch (IllegalArgumentException expected) {
4775 }
4776 }
4777 }
4778 report("Invalid compile flags");
4779 }
4780
4781 // This test is for 8158482
4782 private static void embeddedFlags() throws Exception {
4783 try {
4784 Pattern.compile("(?i).(?-i).");
4785 Pattern.compile("(?m).(?-m).");
4786 Pattern.compile("(?s).(?-s).");
4787 Pattern.compile("(?d).(?-d).");
4788 Pattern.compile("(?u).(?-u).");
4789 Pattern.compile("(?c).(?-c).");
4790 Pattern.compile("(?x).(?-x).");
4791 Pattern.compile("(?U).(?-U).");
4792 Pattern.compile("(?imsducxU).(?-imsducxU).");
4793 } catch (PatternSyntaxException x) {
4794 failCount++;
4795 }
4796 report("Embedded flags");
4797 }
4798
4799 private static void grapheme() throws Exception {
4800 final int[] lineNumber = new int[1];
4801 Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
4802 Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
4803 .forEach( ln -> {
4804 lineNumber[0]++;
4805 if (ln.length() == 0 || ln.startsWith("#")) {
4806 return;
4807 }
4808 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4809 // System.out.println(str);
4810 String[] strs = ln.split("\u00f7|\u00d7");
4811 StringBuilder src = new StringBuilder();
4812 ArrayList<String> graphemes = new ArrayList<>();
4813 StringBuilder buf = new StringBuilder();
4814 int offBk = 0;
4815 for (String str : strs) {
4816 if (str.length() == 0) // first empty str
4817 continue;
4818 int cp = Integer.parseInt(str, 16);
4819 src.appendCodePoint(cp);
4820 buf.appendCodePoint(cp);
4821 offBk += (str.length() + 1);
4822 if (ln.charAt(offBk) == '\u00f7') { // DIV
4823 graphemes.add(buf.toString());
4824 buf = new StringBuilder();
4825 }
4826 }
4827 Pattern p = Pattern.compile("\\X");
4828 // (1) test \X directly
4829 Matcher m = p.matcher(src.toString());
4830 for (String g : graphemes) {
4831 // System.out.printf(" grapheme:=[%s]%n", g);
4832 String group = null;
4833 if (!m.find() || !(group = m.group()).equals(g)) {
4834 System.out.println("Failed pattern \\X [" + ln + "] : "
4835 + "expected: " + g + " - actual: " + group
4836 + "(line " + lineNumber[0] + ")");
4837 failCount++;
4838 }
4839 }
4840 if (m.find()) {
4841 failCount++;
4842 }
4843 // test \b{g} without \X via Pattern
4844 Pattern pbg = Pattern.compile("\\b{g}");
4845 m = pbg.matcher(src.toString());
4846 m.find();
4847 int prev = m.end();
4848 for (String g : graphemes) {
4849 String group = null;
4850 if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
4851 System.out.println("Failed pattern \\b{g} [" + ln + "] : "
4852 + "expected: " + g + " - actual: " + group
4853 + "(line " + lineNumber[0] + ")");
4854 failCount++;
4855 }
4856 if (!"".equals(m.group())) {
4857 failCount++;
4858 }
4859 prev = m.end();
4860 }
4861 if (m.find()) {
4862 failCount++;
4863 }
4864 // (2) test \b{g} + \X via Scanner
4865 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4866 for (String g : graphemes) {
4867 String next = null;
4868 if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
4869 System.out.println("Failed \\b{g} [" + ln + "] : "
4870 + "expected: " + g + " - actual: " + next
4871 + " (line " + lineNumber[0] + ")");
4872 failCount++;
4873 }
4874 }
4875 if (s.hasNext(p)) {
4876 failCount++;
4877 }
4878 // test \b{g} without \X via Scanner
4879 s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4880 for (String g : graphemes) {
4881 String next = null;
4882 if (!s.hasNext() || !(next = s.next()).equals(g)) {
4883 System.out.println("Failed \\b{g} [" + ln + "] : "
4884 + "expected: " + g + " - actual: " + next
4885 + " (line " + lineNumber[0] + ")");
4886 failCount++;
4887 }
4888 }
4889 if (s.hasNext()) {
4890 failCount++;
4891 }
4892 });
4893 // some sanity checks
4894 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4895 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4896 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4897 failCount++;
4898 // make sure "\b{n}" still works
4899 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4900 failCount++;
4901 report("Unicode extended grapheme cluster");
4902 }
4903
4904 // hangup/timeout if go into exponential backtracking
4905 private static void expoBacktracking() throws Exception {
4906
4907 Object[][] patternMatchers = {
4908 // 6328855
4909 { "(.*\n*)*",
4910 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4911 false },
4912 // 6192895
4913 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4914 "Hello World this is a test this is a test this is a test A",
4915 true },
4916 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4917 "Hello World this is a test this is a test this is a test \u4e00 ",
4918 false },
4919 { " *([a-z0-9]+ *)+",
4920 "hello world this is a test this is a test this is a test A",
4921 false },
4922 // 4771934 [FIXED] #5013651?
4923 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4924 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4925 true },
4926 // 4866249 [FIXED]
4927 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4928 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4929 true },
4930 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4931 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4932 false },
4933 // 6345469
4934 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4935 " < br/> < / p> <p> <html> <adfasfdasdf> </p>",
4936 true }, // --> matched
4937 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4938 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>",
4939 false },
4940 // 5026912
4941 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4942 "156580451111112225588087755221111111566969655555555",
4943 false},
4944 // 6988218
4945 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4946 "'%)) order by ANGEBOT.ID",
4947 false}, // find
4948 // 6693451
4949 { "^(\\s*foo\\s*)*$",
4950 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4951 true },
4952 { "^(\\s*foo\\s*)*$",
4953 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4954 false
4955 },
4956 // 7006761
4957 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4958 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4959 // 8140212
4960 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4961 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4962 false
4963 },
4964 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4965 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4966
4967 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4968 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4969
4970 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4971 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4972
4973 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4974
4975 /* not fixed
4976 //8132141 ---> second level exponential backtracking
4977 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4978 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4979 */
4980 };
4981
4982 for (Object[] pm : patternMatchers) {
4983 String p = (String)pm[0];
4984 String s = (String)pm[1];
4985 boolean r = (Boolean)pm[2];
4986 if (r != Pattern.compile(p).matcher(s).matches()) {
4987 failCount++;
4988 }
4989 }
4990 }
4991
4992 private static void invalidGroupName() {
4993 // Invalid start of a group name
4994 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4995 "\u0060", "\u007b", "\u0416")) {
4996 for (String pat : List.of("(?<" + groupName + ">)",
4997 "\\k<" + groupName + ">")) {
4998 try {
4999 Pattern.compile(pat);
5000 failCount++;
5001 } catch (PatternSyntaxException e) {
5002 if (!e.getMessage().startsWith(
5003 "capturing group name does not start with a"
5004 + " Latin letter")) {
5005 failCount++;
5006 }
5007 }
5008 }
5009 }
5010 // Invalid char in a group name
5011 for (String groupName : List.of("a.", "b\u0040", "c\u005b",
5012 "d\u0060", "e\u007b", "f\u0416")) {
5013 for (String pat : List.of("(?<" + groupName + ">)",
5014 "\\k<" + groupName + ">")) {
5015 try {
5016 Pattern.compile(pat);
5017 failCount++;
5018 } catch (PatternSyntaxException e) {
5019 if (!e.getMessage().startsWith(
5020 "named capturing group is missing trailing '>'")) {
5021 failCount++;
5022 }
5023 }
5024 }
5025 }
5026 report("Invalid capturing group names");
5027 }
5028
5029 private static void illegalRepetitionRange() {
5030 // huge integers > (2^31 - 1)
5031 String n = BigInteger.valueOf(1L << 32)
5032 .toString();
5033 String m = BigInteger.valueOf(1L << 31)
5034 .add(new BigInteger(80, generator))
5035 .toString();
5036 for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
5037 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
5038 String pat = ".{" + rep + "}";
5039 try {
5040 Pattern.compile(pat);
5041 failCount++;
5042 System.out.println("Expected to fail. Pattern: " + pat);
5043 } catch (PatternSyntaxException e) {
5044 if (!e.getMessage().startsWith("Illegal repetition")) {
5045 failCount++;
5046 System.out.println("Unexpected error message: " + e.getMessage());
5047 }
5048 } catch (Throwable t) {
5049 failCount++;
5050 System.out.println("Unexpected exception: " + t);
5051 }
5052 }
5053 report("illegalRepetitionRange");
5054 }
5055
5056 private static void surrogatePairWithCanonEq() {
5057 try {
5058 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
5059 } catch (Throwable t) {
5060 failCount++;
5061 System.out.println("Unexpected exception: " + t);
5062 }
5063 report("surrogatePairWithCanonEq");
5064 }
5065
5066 private static String s2x(String s) {
5067 StringBuilder sb = new StringBuilder();
5068 for (char ch : s.toCharArray()) {
5069 sb.append(String.format("\\u%04x", (int)ch));
5070 }
5071 return sb.toString();
5072 }
5073
5074 // This test is for 8235812, with cases excluded by 8258259
5075 private static void lineBreakWithQuantifier() {
5076 // key: pattern
5077 // value: lengths of input that must match the pattern
5078 Map<String, List<Integer>> cases = Map.ofEntries(
5079 Map.entry("\\R?", List.of(0, 1)),
5080 Map.entry("\\R*", List.of(0, 1, 2, 3)),
5081 Map.entry("\\R+", List.of(1, 2, 3)),
5082 Map.entry("\\R{0}", List.of(0)),
5083 Map.entry("\\R{1}", List.of(1)),
5084 // Map.entry("\\R{2}", List.of(2)), // 8258259
5085 // Map.entry("\\R{3}", List.of(3)), // 8258259
5086 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)),
5087 Map.entry("\\R{1,}", List.of(1, 2, 3)),
5088 // Map.entry("\\R{2,}", List.of(2, 3)), // 8258259
5089 // Map.entry("\\R{3,}", List.of(3)), // 8258259
5090 Map.entry("\\R{0,0}", List.of(0)),
5091 Map.entry("\\R{0,1}", List.of(0, 1)),
5092 Map.entry("\\R{0,2}", List.of(0, 1, 2)),
5093 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)),
5094 Map.entry("\\R{1,1}", List.of(1)),
5095 Map.entry("\\R{1,2}", List.of(1, 2)),
5096 Map.entry("\\R{1,3}", List.of(1, 2, 3)),
5097 // Map.entry("\\R{2,2}", List.of(2)), // 8258259
5098 // Map.entry("\\R{2,3}", List.of(2, 3)), // 8258259
5099 // Map.entry("\\R{3,3}", List.of(3)), // 8258259
5100 Map.entry("\\R", List.of(1)),
5101 Map.entry("\\R\\R", List.of(2)),
5102 Map.entry("\\R\\R\\R", List.of(3))
5103 );
5104
5105 // key: length of input
5106 // value: all possible inputs of given length
5107 Map<Integer, List<String>> inputs = new HashMap<>();
5108 String[] Rs = { "\r\n", "\r", "\n",
5109 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" };
5110 StringBuilder sb = new StringBuilder();
5111 for (int len = 0; len <= 3; ++len) {
5112 int[] idx = new int[len + 1];
5113 do {
5114 sb.setLength(0);
5115 for (int j = 0; j < len; ++j)
5116 sb.append(Rs[idx[j]]);
5117 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString());
5118 idx[0]++;
5119 for (int j = 0; j < len; ++j) {
5120 if (idx[j] < Rs.length)
5121 break;
5122 idx[j] = 0;
5123 idx[j+1]++;
5124 }
5125 } while (idx[len] == 0);
5126 }
5127
5128 // exhaustive testing
5129 for (String patStr : cases.keySet()) {
5130 Pattern[] pats = patStr.endsWith("R")
5131 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers
5132 : new Pattern[] { Pattern.compile(patStr), // greedy
5133 Pattern.compile(patStr + "?") }; // reluctant
5134 Matcher m = pats[0].matcher("");
5135 for (Pattern p : pats) {
5136 m.usePattern(p);
5137 for (int len : cases.get(patStr)) {
5138 for (String in : inputs.get(len)) {
5139 if (!m.reset(in).matches()) {
5140 failCount++;
5141 System.err.println("Expected to match '" +
5142 s2x(in) + "' =~ /" + p + "/");
5143 }
5144 }
5145 }
5146 }
5147 }
5148 report("lineBreakWithQuantifier");
5149 }
5150
5151 // This test is for 8214245
5152 private static void caseInsensitivePMatch() {
5153 for (String input : List.of("abcd", "AbCd", "ABCD")) {
5154 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}",
5155 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}",
5156 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}",
5157 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}",
5158 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}",
5159 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}",
5160 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}",
5161 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}",
5162 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}",
5163 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}",
5164 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}",
5165 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}",
5166 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}",
5167 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}",
5168 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}",
5169 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}",
5170 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}",
5171 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}"))
5172 {
5173 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE)
5174 .matcher(input)
5175 .matches())
5176 {
5177 failCount++;
5178 System.err.println("Expected to match: " +
5179 "'" + input + "' =~ /" + pattern + "/");
5180 }
5181 }
5182 }
5183
5184 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) {
5185 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9",
5186 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]",
5187 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]",
5188 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}",
5189 "\\p{general_category=Ll}", "\\p{IsLowercase}",
5190 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}",
5191 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}",
5192 "\\p{IsUppercase}", "\\p{javaUpperCase}",
5193 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}",
5194 "\\p{general_category=Lt}", "\\p{IsTitlecase}",
5195 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]",
5196 "[\\p{IsLl}]", "[\\p{gc=Ll}]",
5197 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]",
5198 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]",
5199 "[\\p{IsLu}]", "[\\p{gc=Lu}]",
5200 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]",
5201 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]",
5202 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]",
5203 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]"))
5204 {
5205 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE
5206 | Pattern.UNICODE_CHARACTER_CLASS)
5207 .matcher(input)
5208 .matches())
5209 {
5210 failCount++;
5211 System.err.println("Expected to match: " +
5212 "'" + input + "' =~ /" + pattern + "/");
5213 }
5214 }
5215 }
5216 report("caseInsensitivePMatch");
5217 }
5218
5219 // This test is for 8237599
5220 private static void surrogatePairOverlapRegion() {
5221 String input = "\ud801\udc37";
5222
5223 Pattern p = Pattern.compile(".+");
5224 Matcher m = p.matcher(input);
5225 m.region(0, 1);
5226
5227 boolean ok = m.find();
5228 if (!ok || !m.group(0).equals(input.substring(0, 1)))
5229 {
5230 failCount++;
5231 System.out.println("Input \"" + input + "\".substr(0, 1)" +
5232 " expected to match pattern \"" + p + "\"");
5233 if (ok) {
5234 System.out.println("group(0): \"" + m.group(0) + "\"");
5235 }
5236 } else if (!m.hitEnd()) {
5237 failCount++;
5238 System.out.println("Expected m.hitEnd() == true");
5239 }
5240
5241 p = Pattern.compile(".*(.)");
5242 m = p.matcher(input);
5243 m.region(1, 2);
5244
5245 ok = m.find();
5246 if (!ok || !m.group(0).equals(input.substring(1, 2))
5247 || !m.group(1).equals(input.substring(1, 2)))
5248 {
5249 failCount++;
5250 System.out.println("Input \"" + input + "\".substr(1, 2)" +
5251 " expected to match pattern \"" + p + "\"");
5252 if (ok) {
5253 System.out.println("group(0): \"" + m.group(0) + "\"");
5254 System.out.println("group(1): \"" + m.group(1) + "\"");
5255 }
5256 }
5257 report("surrogatePairOverlapRegion");
5258 }
5259 }
5260