1 /* Matcher.java -- Instance of a regular expression applied to a char sequence. 2 Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package java.util.regex; 40 41 import gnu.java.lang.CPStringBuilder; 42 43 import gnu.java.util.regex.CharIndexed; 44 import gnu.java.util.regex.RE; 45 import gnu.java.util.regex.REMatch; 46 47 /** 48 * Instance of a regular expression applied to a char sequence. 49 * 50 * @since 1.4 51 */ 52 public final class Matcher implements MatchResult 53 { 54 private Pattern pattern; 55 private CharSequence input; 56 // We use CharIndexed as an input object to the getMatch method in order 57 // that /\G/ (the end of the previous match) may work. The information 58 // of the previous match is stored in the CharIndexed object. 59 private CharIndexed inputCharIndexed; 60 private int position; 61 private int appendPosition; 62 private REMatch match; 63 64 /** 65 * The start of the region of the input on which to match. 66 */ 67 private int regionStart; 68 69 /** 70 * The end of the region of the input on which to match. 71 */ 72 private int regionEnd; 73 74 /** 75 * True if the match process should look beyond the 76 * region marked by regionStart to regionEnd when 77 * performing lookAhead, lookBehind and boundary 78 * matching. 79 */ 80 private boolean transparentBounds; 81 82 /** 83 * The flags that affect the anchoring bounds. 84 * If {@link #hasAnchoringBounds()} is {@code true}, 85 * the match process will honour the 86 * anchoring bounds: ^, \A, \Z, \z and $. If 87 * {@link #hasAnchoringBounds()} is {@code false}, 88 * the anchors are ignored and appropriate flags, 89 * stored in this variable, are used to provide this 90 * behaviour. 91 */ 92 private int anchoringBounds; 93 Matcher(Pattern pattern, CharSequence input)94 Matcher(Pattern pattern, CharSequence input) 95 { 96 this.pattern = pattern; 97 this.input = input; 98 this.inputCharIndexed = RE.makeCharIndexed(input, 0); 99 regionStart = 0; 100 regionEnd = input.length(); 101 transparentBounds = false; 102 anchoringBounds = 0; 103 } 104 105 /** 106 * Changes the pattern used by the {@link Matcher} to 107 * the one specified. Existing match information is lost, 108 * but the input and the matcher's position within it is 109 * retained. 110 * 111 * @param newPattern the new pattern to use. 112 * @return this matcher. 113 * @throws IllegalArgumentException if {@code newPattern} is 114 * {@code null}. 115 * @since 1.5 116 */ usePattern(Pattern newPattern)117 public Matcher usePattern(Pattern newPattern) 118 { 119 if (newPattern == null) 120 throw new IllegalArgumentException("The new pattern was null."); 121 pattern = newPattern; 122 match = null; 123 124 return this; 125 } 126 127 /** 128 * @param sb The target string buffer 129 * @param replacement The replacement string 130 * 131 * @exception IllegalStateException If no match has yet been attempted, 132 * or if the previous match operation failed 133 * @exception IndexOutOfBoundsException If the replacement string refers 134 * to a capturing group that does not exist in the pattern 135 */ appendReplacement(StringBuffer sb, String replacement)136 public Matcher appendReplacement (StringBuffer sb, String replacement) 137 throws IllegalStateException 138 { 139 assertMatchOp(); 140 sb.append(input.subSequence(appendPosition, 141 match.getStartIndex()).toString()); 142 sb.append(RE.getReplacement(replacement, match, 143 RE.REG_REPLACE_USE_BACKSLASHESCAPE)); 144 appendPosition = match.getEndIndex(); 145 return this; 146 } 147 148 /** 149 * @param sb The target string buffer 150 */ appendTail(StringBuffer sb)151 public StringBuffer appendTail (StringBuffer sb) 152 { 153 sb.append(input.subSequence(appendPosition, input.length()).toString()); 154 return sb; 155 } 156 157 /** 158 * @exception IllegalStateException If no match has yet been attempted, 159 * or if the previous match operation failed 160 */ end()161 public int end () 162 throws IllegalStateException 163 { 164 assertMatchOp(); 165 return match.getEndIndex(); 166 } 167 168 /** 169 * @param group The index of a capturing group in this matcher's pattern 170 * 171 * @exception IllegalStateException If no match has yet been attempted, 172 * or if the previous match operation failed 173 * @exception IndexOutOfBoundsException If the replacement string refers 174 * to a capturing group that does not exist in the pattern 175 */ end(int group)176 public int end (int group) 177 throws IllegalStateException 178 { 179 assertMatchOp(); 180 return match.getEndIndex(group); 181 } 182 find()183 public boolean find () 184 { 185 boolean first = (match == null); 186 if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 187 match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds); 188 else 189 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 190 position, anchoringBounds); 191 if (match != null) 192 { 193 int endIndex = match.getEndIndex(); 194 // Is the match within input limits? 195 if (endIndex > input.length()) 196 { 197 match = null; 198 return false; 199 } 200 // Are we stuck at the same position? 201 if (!first && endIndex == position) 202 { 203 match = null; 204 // Not at the end of the input yet? 205 if (position < input.length() - 1) 206 { 207 position++; 208 return find(position); 209 } 210 else 211 return false; 212 } 213 position = endIndex; 214 return true; 215 } 216 return false; 217 } 218 219 /** 220 * @param start The index to start the new pattern matching 221 * 222 * @exception IndexOutOfBoundsException If the replacement string refers 223 * to a capturing group that does not exist in the pattern 224 */ find(int start)225 public boolean find (int start) 226 { 227 if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 228 match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds); 229 else 230 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 231 start, anchoringBounds); 232 if (match != null) 233 { 234 position = match.getEndIndex(); 235 return true; 236 } 237 return false; 238 } 239 240 /** 241 * @exception IllegalStateException If no match has yet been attempted, 242 * or if the previous match operation failed 243 */ group()244 public String group () 245 { 246 assertMatchOp(); 247 return match.toString(); 248 } 249 250 /** 251 * @param group The index of a capturing group in this matcher's pattern 252 * 253 * @exception IllegalStateException If no match has yet been attempted, 254 * or if the previous match operation failed 255 * @exception IndexOutOfBoundsException If the replacement string refers 256 * to a capturing group that does not exist in the pattern 257 */ group(int group)258 public String group (int group) 259 throws IllegalStateException 260 { 261 assertMatchOp(); 262 return match.toString(group); 263 } 264 265 /** 266 * @param replacement The replacement string 267 */ replaceFirst(String replacement)268 public String replaceFirst (String replacement) 269 { 270 reset(); 271 // Semantics might not quite match 272 return pattern.getRE().substitute(input, replacement, position, 273 RE.REG_REPLACE_USE_BACKSLASHESCAPE); 274 } 275 276 /** 277 * @param replacement The replacement string 278 */ replaceAll(String replacement)279 public String replaceAll (String replacement) 280 { 281 reset(); 282 return pattern.getRE().substituteAll(input, replacement, position, 283 RE.REG_REPLACE_USE_BACKSLASHESCAPE); 284 } 285 groupCount()286 public int groupCount () 287 { 288 return pattern.getRE().getNumSubs(); 289 } 290 lookingAt()291 public boolean lookingAt () 292 { 293 if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 294 match = pattern.getRE().getMatch(inputCharIndexed, regionStart, 295 anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); 296 else 297 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, 298 anchoringBounds|RE.REG_FIX_STARTING_POSITION); 299 if (match != null) 300 { 301 if (match.getStartIndex() == 0) 302 { 303 position = match.getEndIndex(); 304 return true; 305 } 306 match = null; 307 } 308 return false; 309 } 310 311 /** 312 * Attempts to match the entire input sequence against the pattern. 313 * 314 * If the match succeeds then more information can be obtained via the 315 * start, end, and group methods. 316 * 317 * @see #start() 318 * @see #end() 319 * @see #group() 320 */ matches()321 public boolean matches () 322 { 323 if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) 324 match = pattern.getRE().getMatch(inputCharIndexed, regionStart, 325 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); 326 else 327 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, 328 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION); 329 if (match != null) 330 { 331 if (match.getStartIndex() == 0) 332 { 333 position = match.getEndIndex(); 334 if (position == input.length()) 335 return true; 336 } 337 match = null; 338 } 339 return false; 340 } 341 342 /** 343 * Returns the Pattern that is interpreted by this Matcher 344 */ pattern()345 public Pattern pattern () 346 { 347 return pattern; 348 } 349 350 /** 351 * Resets the internal state of the matcher, including 352 * resetting the region to its default state of encompassing 353 * the whole input. The state of {@link #hasTransparentBounds()} 354 * and {@link #hasAnchoringBounds()} are unaffected. 355 * 356 * @return a reference to this matcher. 357 * @see #regionStart() 358 * @see #regionEnd() 359 * @see #hasTransparentBounds() 360 * @see #hasAnchoringBounds() 361 */ reset()362 public Matcher reset () 363 { 364 position = 0; 365 match = null; 366 regionStart = 0; 367 regionEnd = input.length(); 368 appendPosition = 0; 369 return this; 370 } 371 372 /** 373 * Resets the internal state of the matcher, including 374 * resetting the region to its default state of encompassing 375 * the whole input. The state of {@link #hasTransparentBounds()} 376 * and {@link #hasAnchoringBounds()} are unaffected. 377 * 378 * @param input The new input character sequence. 379 * @return a reference to this matcher. 380 * @see #regionStart() 381 * @see #regionEnd() 382 * @see #hasTransparentBounds() 383 * @see #hasAnchoringBounds() 384 */ reset(CharSequence input)385 public Matcher reset (CharSequence input) 386 { 387 this.input = input; 388 this.inputCharIndexed = RE.makeCharIndexed(input, 0); 389 return reset(); 390 } 391 392 /** 393 * @return the index of a capturing group in this matcher's pattern 394 * 395 * @exception IllegalStateException If no match has yet been attempted, 396 * or if the previous match operation failed 397 */ start()398 public int start () 399 throws IllegalStateException 400 { 401 assertMatchOp(); 402 return match.getStartIndex(); 403 } 404 405 /** 406 * @param group The index of a capturing group in this matcher's pattern 407 * 408 * @exception IllegalStateException If no match has yet been attempted, 409 * or if the previous match operation failed 410 * @exception IndexOutOfBoundsException If the replacement string refers 411 * to a capturing group that does not exist in the pattern 412 */ start(int group)413 public int start (int group) 414 throws IllegalStateException 415 { 416 assertMatchOp(); 417 return match.getStartIndex(group); 418 } 419 420 /** 421 * @return True if and only if the matcher hit the end of input. 422 * @since 1.5 423 */ hitEnd()424 public boolean hitEnd() 425 { 426 return inputCharIndexed.hitEnd(); 427 } 428 429 /** 430 * @return A string expression of this matcher. 431 */ toString()432 public String toString() 433 { 434 CPStringBuilder sb = new CPStringBuilder(); 435 sb.append(this.getClass().getName()) 436 .append("[pattern=").append(pattern.pattern()) 437 .append(" region=").append(regionStart).append(",").append(regionEnd) 438 .append(" anchoringBounds=").append(anchoringBounds == 0) 439 .append(" transparentBounds=").append(transparentBounds) 440 .append(" lastmatch=").append(match == null ? "" : match.toString()) 441 .append("]"); 442 return sb.toString(); 443 } 444 assertMatchOp()445 private void assertMatchOp() 446 { 447 if (match == null) throw new IllegalStateException(); 448 } 449 450 /** 451 * <p> 452 * Defines the region of the input on which to match. 453 * By default, the {@link Matcher} attempts to match 454 * the whole string (from 0 to the length of the input), 455 * but a region between {@code start} (inclusive) and 456 * {@code end} (exclusive) on which to match may instead 457 * be defined using this method. 458 * </p> 459 * <p> 460 * The behaviour of region matching is further affected 461 * by the use of transparent or opaque bounds (see 462 * {@link #useTransparentBounds(boolean)}) and whether or not 463 * anchors ({@code ^} and {@code $}) are in use 464 * (see {@link #useAnchoringBounds(boolean)}). With transparent 465 * bounds, the matcher is aware of input outside the bounds 466 * set by this method, whereas, with opaque bounds (the default) 467 * only the input within the bounds is used. The use of 468 * anchors are affected by this setting; with transparent 469 * bounds, anchors will match the beginning of the real input, 470 * while with opaque bounds they match the beginning of the 471 * region. {@link #useAnchoringBounds(boolean)} can be used 472 * to turn on or off the matching of anchors. 473 * </p> 474 * 475 * @param start the start of the region (inclusive). 476 * @param end the end of the region (exclusive). 477 * @return a reference to this matcher. 478 * @throws IndexOutOfBoundsException if either {@code start} or 479 * {@code end} are less than zero, 480 * if either {@code start} or 481 * {@code end} are greater than the 482 * length of the input, or if 483 * {@code start} is greater than 484 * {@code end}. 485 * @see #regionStart() 486 * @see #regionEnd() 487 * @see #hasTransparentBounds() 488 * @see #useTransparentBounds(boolean) 489 * @see #hasAnchoringBounds() 490 * @see #useAnchoringBounds(boolean) 491 * @since 1.5 492 */ region(int start, int end)493 public Matcher region(int start, int end) 494 { 495 int length = input.length(); 496 if (start < 0) 497 throw new IndexOutOfBoundsException("The start position was less than zero."); 498 if (start >= length) 499 throw new IndexOutOfBoundsException("The start position is after the end of the input."); 500 if (end < 0) 501 throw new IndexOutOfBoundsException("The end position was less than zero."); 502 if (end > length) 503 throw new IndexOutOfBoundsException("The end position is after the end of the input."); 504 if (start > end) 505 throw new IndexOutOfBoundsException("The start position is after the end position."); 506 reset(); 507 regionStart = start; 508 regionEnd = end; 509 return this; 510 } 511 512 /** 513 * The start of the region on which to perform matches (inclusive). 514 * 515 * @return the start index of the region. 516 * @see #region(int,int) 517 * #see #regionEnd() 518 * @since 1.5 519 */ regionStart()520 public int regionStart() 521 { 522 return regionStart; 523 } 524 525 /** 526 * The end of the region on which to perform matches (exclusive). 527 * 528 * @return the end index of the region. 529 * @see #region(int,int) 530 * @see #regionStart() 531 * @since 1.5 532 */ regionEnd()533 public int regionEnd() 534 { 535 return regionEnd; 536 } 537 538 /** 539 * Returns true if the bounds of the region marked by 540 * {@link #regionStart()} and {@link #regionEnd()} are 541 * transparent. When these bounds are transparent, the 542 * matching process can look beyond them to perform 543 * lookahead, lookbehind and boundary matching operations. 544 * By default, the bounds are opaque. 545 * 546 * @return true if the bounds of the matching region are 547 * transparent. 548 * @see #useTransparentBounds(boolean) 549 * @see #region(int,int) 550 * @see #regionStart() 551 * @see #regionEnd() 552 * @since 1.5 553 */ hasTransparentBounds()554 public boolean hasTransparentBounds() 555 { 556 return transparentBounds; 557 } 558 559 /** 560 * Sets the transparency of the bounds of the region 561 * marked by {@link #regionStart()} and {@link #regionEnd()}. 562 * A value of {@code true} makes the bounds transparent, 563 * so the matcher can see beyond them to perform lookahead, 564 * lookbehind and boundary matching operations. A value 565 * of {@code false} (the default) makes the bounds opaque, 566 * restricting the match to the input region denoted 567 * by {@link #regionStart()} and {@link #regionEnd()}. 568 * 569 * @param transparent true if the bounds should be transparent. 570 * @return a reference to this matcher. 571 * @see #hasTransparentBounds() 572 * @see #region(int,int) 573 * @see #regionStart() 574 * @see #regionEnd() 575 * @since 1.5 576 */ useTransparentBounds(boolean transparent)577 public Matcher useTransparentBounds(boolean transparent) 578 { 579 transparentBounds = transparent; 580 return this; 581 } 582 583 /** 584 * Returns true if the matcher will honour the use of 585 * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z}, 586 * {@code \z} and {@code $}. By default, the anchors 587 * are used. Note that the effect of the anchors is 588 * also affected by {@link #hasTransparentBounds()}. 589 * 590 * @return true if the matcher will attempt to match 591 * the anchoring bounds. 592 * @see #useAnchoringBounds(boolean) 593 * @see #hasTransparentBounds() 594 * @since 1.5 595 */ hasAnchoringBounds()596 public boolean hasAnchoringBounds() 597 { 598 return anchoringBounds == 0; 599 } 600 601 /** 602 * Enables or disables the use of the anchoring bounds: 603 * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and 604 * {@code $}. By default, their use is enabled. When 605 * disabled, the matcher will not attempt to match 606 * the anchors. 607 * 608 * @param useAnchors true if anchoring bounds should be used. 609 * @return a reference to this matcher. 610 * @since 1.5 611 * @see #hasAnchoringBounds() 612 */ useAnchoringBounds(boolean useAnchors)613 public Matcher useAnchoringBounds(boolean useAnchors) 614 { 615 if (useAnchors) 616 anchoringBounds = 0; 617 else 618 anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL; 619 return this; 620 } 621 622 /** 623 * Returns a read-only snapshot of the current state of 624 * the {@link Matcher} as a {@link MatchResult}. Any 625 * subsequent changes to this instance are not reflected 626 * in the returned {@link MatchResult}. 627 * 628 * @return a {@link MatchResult} instance representing the 629 * current state of the {@link Matcher}. 630 */ toMatchResult()631 public MatchResult toMatchResult() 632 { 633 Matcher snapshot = new Matcher(pattern, input); 634 if (match != null) 635 snapshot.match = (REMatch) match.clone(); 636 return snapshot; 637 } 638 639 /** 640 * Returns a literalized string of s where characters {@code $} and {@code 641 * \\} are escaped. 642 * 643 * @param s the string to literalize. 644 * @return the literalized string. 645 * @since 1.5 646 */ quoteReplacement(String s)647 public static String quoteReplacement(String s) 648 { 649 if (s == null) 650 throw new NullPointerException(); 651 CPStringBuilder sb = new CPStringBuilder(); 652 for (int i = 0; i < s.length(); i++) 653 { 654 char ch = s.charAt(i); 655 if (ch == '$' || ch == '\\') 656 sb.append('\\'); 657 sb.append(ch); 658 } 659 return sb.toString(); 660 } 661 662 } 663