1 /******************************************************************************* 2 * Copyright (c) 2000, 2020 IBM Corporation and others. 3 * 4 * This program and the accompanying materials 5 * are made available under the terms of the Eclipse Public License 2.0 6 * which accompanies this distribution, and is available at 7 * https://www.eclipse.org/legal/epl-2.0/ 8 * 9 * SPDX-License-Identifier: EPL-2.0 10 * 11 * Contributors: 12 * IBM Corporation - initial API and implementation 13 * Lucas Bullen (Red Hat Inc.) - [Bug 203792] filter should support multiple keywords 14 * Mickael Istria (Red Hat Inc.) - [534277] erroneous filtering with multiple words 15 *******************************************************************************/ 16 package org.eclipse.ui.internal.misc; 17 18 import java.util.ArrayList; 19 import java.util.regex.Pattern; 20 21 /** 22 * A string pattern matcher, supporting "*" and "?" wildcards. 23 */ 24 public class StringMatcher { 25 protected String fPattern; 26 27 protected int fLength; // pattern length 28 29 protected boolean fIgnoreWildCards; 30 31 protected boolean fIgnoreCase; 32 33 protected String[] patternWords; 34 35 protected Word wholePatternWord; 36 protected Word[] splittedPatternWords; 37 38 protected static final char fSingleWildCard = '\u0000'; 39 private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$ 40 41 class Word { 42 private boolean hasTrailingStar = false; 43 private boolean hasLeadingStar = false; 44 private int bound = 0; 45 private String[] fragments = null; 46 private final String pattern; 47 Word(String pattern)48 Word(String pattern) { 49 this.pattern = pattern; 50 } 51 Word(String pattern, int fLength, String[] wordsSplitted)52 public Word(String pattern, int fLength, String[] wordsSplitted) { 53 this(pattern); 54 this.bound = fLength; 55 this.fragments = wordsSplitted; 56 } 57 parseWildcards()58 private void parseWildcards() { 59 if (this.pattern.startsWith("*")) { //$NON-NLS-1$ 60 this.hasLeadingStar = true; 61 } 62 if (this.pattern.endsWith("*")) {//$NON-NLS-1$ 63 /* make sure it's not an escaped wildcard */ 64 if (this.pattern.length() > 1 && this.pattern.charAt(this.pattern.length() - 2) != '\\') { 65 this.hasTrailingStar = true; 66 } 67 } 68 69 ArrayList<String> temp = new ArrayList<>(); 70 71 int pos = 0; 72 StringBuilder buf = new StringBuilder(); 73 while (pos < this.pattern.length()) { 74 char c = this.pattern.charAt(pos++); 75 switch (c) { 76 case '\\': 77 if (pos >= this.pattern.length()) { 78 buf.append(c); 79 } else { 80 char next = this.pattern.charAt(pos++); 81 /* if it's an escape sequence */ 82 if (next == '*' || next == '?' || next == '\\') { 83 buf.append(next); 84 } else { 85 /* not an escape sequence, just insert literally */ 86 buf.append(c); 87 buf.append(next); 88 } 89 } 90 break; 91 case '*': 92 if (buf.length() > 0) { 93 /* new segment */ 94 temp.add(buf.toString()); 95 this.bound += buf.length(); 96 buf.setLength(0); 97 } 98 break; 99 case '?': 100 /* append special character representing single match wildcard */ 101 buf.append(fSingleWildCard); 102 break; 103 default: 104 buf.append(c); 105 } 106 } 107 108 /* add last buffer to segment list */ 109 if (buf.length() > 0) { 110 temp.add(buf.toString()); 111 this.bound += buf.length(); 112 } 113 this.fragments = temp.toArray(new String[temp.size()]); 114 } 115 match(String text, int start, int end)116 boolean match(String text, int start, int end) { 117 boolean found = true; 118 if (fIgnoreWildCards) { 119 if ((end - start == this.pattern.length()) 120 && this.pattern.regionMatches(fIgnoreCase, 0, text, start, this.pattern.length())) 121 return true; 122 return false; 123 } 124 String[] segments = null; 125 segments = this.fragments; 126 int segCount = segments.length; 127 if (segCount == 0 && (this.hasLeadingStar || this.hasTrailingStar)) { 128 return true; 129 } 130 if (start == end) { 131 if (this.pattern.length() == 0) 132 return true; 133 return false; 134 } 135 if (this.pattern.length() == 0) { 136 if (start == end) 137 return true; 138 return false; 139 } 140 141 int tCurPos = start; 142 int bound = end - this.bound; 143 if (bound < 0) { 144 return false; 145 } 146 int i = 0; 147 String current = segments[i]; 148 int segLength = current.length(); 149 150 /* process first segment */ 151 if (!hasLeadingStar) { 152 if (!regExpRegionMatches(text, start, current, 0, segLength)) { 153 return false; 154 } 155 ++i; 156 tCurPos = tCurPos + segLength; 157 } 158 if ((segments.length == 1) && (!hasLeadingStar) && (!hasTrailingStar)) { 159 // only one segment to match, no wildcards specified 160 if (tCurPos == end) 161 return true; 162 return false; 163 } 164 /* process middle segments */ 165 while (i < segCount && found) { 166 current = segments[i]; 167 int currentMatch; 168 int k = current.indexOf(fSingleWildCard); 169 if (k < 0) { 170 currentMatch = textPosIn(text, tCurPos, end, current); 171 if (currentMatch < 0) { 172 found = false; 173 } 174 } else { 175 currentMatch = regExpPosIn(text, tCurPos, end, current); 176 if (currentMatch < 0) { 177 found = false; 178 } 179 } 180 if (!found) 181 return false; 182 tCurPos = currentMatch + current.length(); 183 i++; 184 } 185 186 /* process final segment */ 187 if (!hasTrailingStar && tCurPos != end) { 188 int clen = current.length(); 189 if (regExpRegionMatches(text, end - clen, current, 0, clen)) 190 return true; 191 return false; 192 } 193 if (i == segCount) 194 return true; 195 return false; 196 } 197 198 /** 199 * @param text 200 * @param start 201 * @param end 202 * @return whether the current pattern word matches at least one word in the 203 * given text 204 */ matchTextWord(String text, int start, int end)205 public boolean matchTextWord(String text, int start, int end) { 206 String[] textWords = getWords(text.substring(start, end)); 207 if (textWords.length == 0) { 208 return pattern.isEmpty(); 209 } 210 for (String subword : textWords) { 211 if (match(subword, 0, subword.length())) { 212 return true; 213 } 214 } 215 return false; 216 } 217 218 } 219 220 public static class Position { 221 int start; // inclusive 222 223 int end; // exclusive 224 Position(int start, int end)225 public Position(int start, int end) { 226 this.start = start; 227 this.end = end; 228 } 229 getStart()230 public int getStart() { 231 return start; 232 } 233 getEnd()234 public int getEnd() { 235 return end; 236 } 237 } 238 239 /** 240 * StringMatcher constructor takes in a String object that is a simple pattern 241 * which may contain '*' for 0 and many characters and '?' for exactly one 242 * character. 243 * 244 * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*" 245 * means literal "*", etc. 246 * 247 * Escaping any other character (including the escape character itself), just 248 * results in that character in the pattern. e.g., "\a" means "a" and "\\" means 249 * "\" 250 * 251 * If invoking the StringMatcher with string literals in Java, don't forget 252 * escape characters are represented by "\\". 253 * 254 * @param pattern the pattern to match text against 255 * @param ignoreCase if true, case is ignored 256 * @param ignoreWildCards if true, wild cards and their escape sequences are 257 * ignored (everything is taken literally). 258 */ StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards)259 public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) { 260 if (pattern == null) { 261 throw new IllegalArgumentException(); 262 } 263 fIgnoreCase = ignoreCase; 264 fIgnoreWildCards = ignoreWildCards; 265 fPattern = pattern; 266 fLength = pattern.length(); 267 268 parsePatternIntoWords(); 269 270 if (fIgnoreWildCards) { 271 parseNoWildCards(); 272 } else { 273 if (wholePatternWord != null) { 274 wholePatternWord.parseWildcards(); 275 } 276 if (splittedPatternWords != null && splittedPatternWords.length > 1) { 277 for (Word word : splittedPatternWords) { 278 word.parseWildcards(); 279 } 280 } 281 } 282 } 283 284 /** 285 * match the given <code>text</code> with the pattern 286 * 287 * @return true if matched otherwise false 288 * @param text a String object 289 */ match(String text)290 public boolean match(String text) { 291 if (text == null) { 292 return false; 293 } 294 return match(text, 0, text.length()); 295 } 296 297 /** 298 * Given the starting (inclusive) and the ending (exclusive) positions in the 299 * <code>text</code>, determine if the given substring matches with aPattern 300 * 301 * @return true if the specified portion of the text matches the pattern 302 * @param text a String object that contains the substring to match 303 * @param start marks the starting position (inclusive) of the substring 304 * @param end marks the ending index (exclusive) of the substring 305 */ match(String text, int start, int end)306 public boolean match(String text, int start, int end) { 307 if (null == text) { 308 throw new IllegalArgumentException(); 309 } 310 if (start > end) { 311 return false; 312 } 313 int tlen = text.length(); 314 start = Math.max(0, start); 315 end = Math.min(end, tlen); 316 317 if (wholePatternWord != null 318 && (wholePatternWord.match(text, start, end) || wholePatternWord.matchTextWord(text, start, end))) { 319 return true; 320 } 321 if (splittedPatternWords != null && splittedPatternWords.length > 0) { 322 for (Word word : splittedPatternWords) { 323 if (!word.match(text, start, end) && !word.matchTextWord(text, start, end)) { 324 return false; 325 } 326 } 327 return true; 328 } 329 return false; 330 } 331 332 /** 333 * This method parses the given pattern into words separated by spaces 334 * characters. Since wildcards are not being used in this case, the pattern 335 * consists of a single segment. 336 */ parsePatternIntoWords()337 private void parsePatternIntoWords() { 338 String trimedPattern = fPattern.trim(); 339 if (!trimedPattern.isEmpty()) { 340 this.wholePatternWord = new Word(trimedPattern); 341 patternWords = trimedPattern.split("\\s+"); //$NON-NLS-1$ 342 if (patternWords.length > 1) { 343 this.splittedPatternWords = new Word[patternWords.length]; 344 for (int i = 0; i < patternWords.length; i++) { 345 String patternWord = patternWords[i]; 346 if (!patternWord.endsWith("*")) { //$NON-NLS-1$ 347 patternWord += '*'; 348 } 349 this.splittedPatternWords[i] = new Word(patternWord); 350 // words may be found anywhere in the line 351 } 352 } 353 } 354 } 355 356 /** 357 * This method parses the given pattern into segments seperated by wildcard '*' 358 * characters. Since wildcards are not being used in this case, the pattern 359 * consists of a single segment. 360 */ parseNoWildCards()361 private void parseNoWildCards() { 362 this.wholePatternWord = new Word(fPattern, fLength, patternWords); 363 this.wholePatternWord.bound = fLength; 364 this.wholePatternWord.fragments = patternWords; 365 } 366 367 /** 368 * @param text a string which contains no wildcard 369 * @param start the starting index in the text for search, inclusive 370 * @param end the stopping point of search, exclusive 371 * @return the starting index in the text of the pattern , or -1 if not found 372 */ posIn(String text, int start, int end)373 protected int posIn(String text, int start, int end) {// no wild card in pattern 374 int max = end - fLength; 375 376 if (!fIgnoreCase) { 377 int i = text.indexOf(fPattern, start); 378 if (i == -1 || i > max) { 379 return -1; 380 } 381 return i; 382 } 383 384 for (int i = start; i <= max; ++i) { 385 if (text.regionMatches(true, i, fPattern, 0, fLength)) { 386 return i; 387 } 388 } 389 390 return -1; 391 } 392 393 /** 394 * @param text a simple regular expression that may only contain '?'(s) 395 * @param start the starting index in the text for search, inclusive 396 * @param end the stopping point of search, exclusive 397 * @param p a simple regular expression that may contains '?' 398 * @return the starting index in the text of the pattern , or -1 if not found 399 */ regExpPosIn(String text, int start, int end, String p)400 protected int regExpPosIn(String text, int start, int end, String p) { 401 int plen = p.length(); 402 403 int max = end - plen; 404 for (int i = start; i <= max; ++i) { 405 if (regExpRegionMatches(text, i, p, 0, plen)) { 406 return i; 407 } 408 } 409 return -1; 410 } 411 412 /** 413 * 414 * @return boolean 415 * @param text a String to match 416 * @param start int that indicates the starting index of match, inclusive 417 * @param end int that indicates the ending index of match, exclusive 418 * @param p String, String, a simple regular expression that may 419 * contain '?' 420 * @param ignoreCase boolean indicating whether <code>p</code> is case sensitive 421 */ regExpRegionMatches(String text, int tStart, String p, int pStart, int plen)422 protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) { 423 while (plen-- > 0) { 424 char tchar = text.charAt(tStart++); 425 char pchar = p.charAt(pStart++); 426 427 /* process wild cards */ 428 if (!fIgnoreWildCards) { 429 /* skip single wild cards */ 430 if (pchar == fSingleWildCard) { 431 continue; 432 } 433 } 434 if (pchar == tchar) { 435 continue; 436 } 437 if (fIgnoreCase) { 438 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar)) { 439 continue; 440 } 441 // comparing after converting to upper case doesn't handle all cases; 442 // also compare after converting to lower case 443 if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar)) { 444 continue; 445 } 446 } 447 return false; 448 } 449 return true; 450 } 451 452 /** 453 * @param text the string to match 454 * @param start the starting index in the text for search, inclusive 455 * @param end the stopping point of search, exclusive 456 * @param p a pattern string that has no wildcard 457 * @return the starting index in the text of the pattern , or -1 if not found 458 */ textPosIn(String text, int start, int end, String p)459 protected int textPosIn(String text, int start, int end, String p) { 460 461 int plen = p.length(); 462 int max = end - plen; 463 464 if (!fIgnoreCase) { 465 int i = text.indexOf(p, start); 466 if (i == -1 || i > max) { 467 return -1; 468 } 469 return i; 470 } 471 472 for (int i = start; i <= max; ++i) { 473 if (text.regionMatches(true, i, p, 0, plen)) { 474 return i; 475 } 476 } 477 478 return -1; 479 } 480 481 /** 482 * Take the given filter text and break it down into words using a 483 * BreakIterator. 484 * 485 * @param text 486 * @return an array of words 487 */ getWords(String text)488 public static String[] getWords(String text) { 489 490 return NON_WORD.split(text, 0); 491 } 492 493 } 494