1 /*******************************************************************************
2  * Copyright (c) 2000, 2020 IBM Corporation and others.
3  *
4  * This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License 2.0
6  * which accompanies this distribution, and is available at
7  * https://www.eclipse.org/legal/epl-2.0/
8  *
9  * SPDX-License-Identifier: EPL-2.0
10  *
11  * Contributors:
12  *     IBM Corporation - initial API and implementation
13  *     Lucas Bullen (Red Hat Inc.) - [Bug 203792] filter should support multiple keywords
14  *     Mickael Istria (Red Hat Inc.) - [534277] erroneous filtering with multiple words
15  *******************************************************************************/
16 package org.eclipse.ui.internal.misc;
17 
18 import java.util.ArrayList;
19 import java.util.regex.Pattern;
20 
21 /**
22  * A string pattern matcher, supporting "*" and "?" wildcards.
23  */
24 public class StringMatcher {
25 	protected String fPattern;
26 
27 	protected int fLength; // pattern length
28 
29 	protected boolean fIgnoreWildCards;
30 
31 	protected boolean fIgnoreCase;
32 
33 	protected String[] patternWords;
34 
35 	protected Word wholePatternWord;
36 	protected Word[] splittedPatternWords;
37 
38 	protected static final char fSingleWildCard = '\u0000';
39 	private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
40 
41 	class Word {
42 		private boolean hasTrailingStar = false;
43 		private boolean hasLeadingStar = false;
44 		private int bound = 0;
45 		private String[] fragments = null;
46 		private final String pattern;
47 
Word(String pattern)48 		Word(String pattern) {
49 			this.pattern = pattern;
50 		}
51 
Word(String pattern, int fLength, String[] wordsSplitted)52 		public Word(String pattern, int fLength, String[] wordsSplitted) {
53 			this(pattern);
54 			this.bound = fLength;
55 			this.fragments = wordsSplitted;
56 		}
57 
parseWildcards()58 		private void parseWildcards() {
59 			if (this.pattern.startsWith("*")) { //$NON-NLS-1$
60 				this.hasLeadingStar = true;
61 			}
62 			if (this.pattern.endsWith("*")) {//$NON-NLS-1$
63 				/* make sure it's not an escaped wildcard */
64 				if (this.pattern.length() > 1 && this.pattern.charAt(this.pattern.length() - 2) != '\\') {
65 					this.hasTrailingStar = true;
66 				}
67 			}
68 
69 			ArrayList<String> temp = new ArrayList<>();
70 
71 			int pos = 0;
72 			StringBuilder buf = new StringBuilder();
73 			while (pos < this.pattern.length()) {
74 				char c = this.pattern.charAt(pos++);
75 				switch (c) {
76 				case '\\':
77 					if (pos >= this.pattern.length()) {
78 						buf.append(c);
79 					} else {
80 						char next = this.pattern.charAt(pos++);
81 						/* if it's an escape sequence */
82 						if (next == '*' || next == '?' || next == '\\') {
83 							buf.append(next);
84 						} else {
85 							/* not an escape sequence, just insert literally */
86 							buf.append(c);
87 							buf.append(next);
88 						}
89 					}
90 					break;
91 				case '*':
92 					if (buf.length() > 0) {
93 						/* new segment */
94 						temp.add(buf.toString());
95 						this.bound += buf.length();
96 						buf.setLength(0);
97 					}
98 					break;
99 				case '?':
100 					/* append special character representing single match wildcard */
101 					buf.append(fSingleWildCard);
102 					break;
103 				default:
104 					buf.append(c);
105 				}
106 			}
107 
108 			/* add last buffer to segment list */
109 			if (buf.length() > 0) {
110 				temp.add(buf.toString());
111 				this.bound += buf.length();
112 			}
113 			this.fragments = temp.toArray(new String[temp.size()]);
114 		}
115 
match(String text, int start, int end)116 		boolean match(String text, int start, int end) {
117 			boolean found = true;
118 			if (fIgnoreWildCards) {
119 				if ((end - start == this.pattern.length())
120 						&& this.pattern.regionMatches(fIgnoreCase, 0, text, start, this.pattern.length()))
121 					return true;
122 				return false;
123 			}
124 			String[] segments = null;
125 			segments = this.fragments;
126 			int segCount = segments.length;
127 			if (segCount == 0 && (this.hasLeadingStar || this.hasTrailingStar)) {
128 				return true;
129 			}
130 			if (start == end) {
131 				if (this.pattern.length() == 0)
132 					return true;
133 				return false;
134 			}
135 			if (this.pattern.length() == 0) {
136 				if (start == end)
137 					return true;
138 				return false;
139 			}
140 
141 			int tCurPos = start;
142 			int bound = end - this.bound;
143 			if (bound < 0) {
144 				return false;
145 			}
146 			int i = 0;
147 			String current = segments[i];
148 			int segLength = current.length();
149 
150 			/* process first segment */
151 			if (!hasLeadingStar) {
152 				if (!regExpRegionMatches(text, start, current, 0, segLength)) {
153 					return false;
154 				}
155 				++i;
156 				tCurPos = tCurPos + segLength;
157 			}
158 			if ((segments.length == 1) && (!hasLeadingStar) && (!hasTrailingStar)) {
159 				// only one segment to match, no wildcards specified
160 				if (tCurPos == end)
161 					return true;
162 				return false;
163 			}
164 			/* process middle segments */
165 			while (i < segCount && found) {
166 				current = segments[i];
167 				int currentMatch;
168 				int k = current.indexOf(fSingleWildCard);
169 				if (k < 0) {
170 					currentMatch = textPosIn(text, tCurPos, end, current);
171 					if (currentMatch < 0) {
172 						found = false;
173 					}
174 				} else {
175 					currentMatch = regExpPosIn(text, tCurPos, end, current);
176 					if (currentMatch < 0) {
177 						found = false;
178 					}
179 				}
180 				if (!found)
181 					return false;
182 				tCurPos = currentMatch + current.length();
183 				i++;
184 			}
185 
186 			/* process final segment */
187 			if (!hasTrailingStar && tCurPos != end) {
188 				int clen = current.length();
189 				if (regExpRegionMatches(text, end - clen, current, 0, clen))
190 					return true;
191 				return false;
192 			}
193 			if (i == segCount)
194 				return true;
195 			return false;
196 		}
197 
198 		/**
199 		 * @param text
200 		 * @param start
201 		 * @param end
202 		 * @return whether the current pattern word matches at least one word in the
203 		 *         given text
204 		 */
matchTextWord(String text, int start, int end)205 		public boolean matchTextWord(String text, int start, int end) {
206 			String[] textWords = getWords(text.substring(start, end));
207 			if (textWords.length == 0) {
208 				return pattern.isEmpty();
209 			}
210 			for (String subword : textWords) {
211 				if (match(subword, 0, subword.length())) {
212 					return true;
213 				}
214 			}
215 			return false;
216 		}
217 
218 	}
219 
220 	public static class Position {
221 		int start; // inclusive
222 
223 		int end; // exclusive
224 
Position(int start, int end)225 		public Position(int start, int end) {
226 			this.start = start;
227 			this.end = end;
228 		}
229 
getStart()230 		public int getStart() {
231 			return start;
232 		}
233 
getEnd()234 		public int getEnd() {
235 			return end;
236 		}
237 	}
238 
239 	/**
240 	 * StringMatcher constructor takes in a String object that is a simple pattern
241 	 * which may contain '*' for 0 and many characters and '?' for exactly one
242 	 * character.
243 	 *
244 	 * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
245 	 * means literal "*", etc.
246 	 *
247 	 * Escaping any other character (including the escape character itself), just
248 	 * results in that character in the pattern. e.g., "\a" means "a" and "\\" means
249 	 * "\"
250 	 *
251 	 * If invoking the StringMatcher with string literals in Java, don't forget
252 	 * escape characters are represented by "\\".
253 	 *
254 	 * @param pattern         the pattern to match text against
255 	 * @param ignoreCase      if true, case is ignored
256 	 * @param ignoreWildCards if true, wild cards and their escape sequences are
257 	 *                        ignored (everything is taken literally).
258 	 */
StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards)259 	public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
260 		if (pattern == null) {
261 			throw new IllegalArgumentException();
262 		}
263 		fIgnoreCase = ignoreCase;
264 		fIgnoreWildCards = ignoreWildCards;
265 		fPattern = pattern;
266 		fLength = pattern.length();
267 
268 		parsePatternIntoWords();
269 
270 		if (fIgnoreWildCards) {
271 			parseNoWildCards();
272 		} else {
273 			if (wholePatternWord != null) {
274 				wholePatternWord.parseWildcards();
275 			}
276 			if (splittedPatternWords != null && splittedPatternWords.length > 1) {
277 				for (Word word : splittedPatternWords) {
278 					word.parseWildcards();
279 				}
280 			}
281 		}
282 	}
283 
284 	/**
285 	 * match the given <code>text</code> with the pattern
286 	 *
287 	 * @return true if matched otherwise false
288 	 * @param text a String object
289 	 */
match(String text)290 	public boolean match(String text) {
291 		if (text == null) {
292 			return false;
293 		}
294 		return match(text, 0, text.length());
295 	}
296 
297 	/**
298 	 * Given the starting (inclusive) and the ending (exclusive) positions in the
299 	 * <code>text</code>, determine if the given substring matches with aPattern
300 	 *
301 	 * @return true if the specified portion of the text matches the pattern
302 	 * @param text  a String object that contains the substring to match
303 	 * @param start marks the starting position (inclusive) of the substring
304 	 * @param end   marks the ending index (exclusive) of the substring
305 	 */
match(String text, int start, int end)306 	public boolean match(String text, int start, int end) {
307 		if (null == text) {
308 			throw new IllegalArgumentException();
309 		}
310 		if (start > end) {
311 			return false;
312 		}
313 		int tlen = text.length();
314 		start = Math.max(0, start);
315 		end = Math.min(end, tlen);
316 
317 		if (wholePatternWord != null
318 				&& (wholePatternWord.match(text, start, end) || wholePatternWord.matchTextWord(text, start, end))) {
319 			return true;
320 		}
321 		if (splittedPatternWords != null && splittedPatternWords.length > 0) {
322 			for (Word word : splittedPatternWords) {
323 				if (!word.match(text, start, end) && !word.matchTextWord(text, start, end)) {
324 					return false;
325 				}
326 			}
327 			return true;
328 		}
329 		return false;
330 	}
331 
332 	/**
333 	 * This method parses the given pattern into words separated by spaces
334 	 * characters. Since wildcards are not being used in this case, the pattern
335 	 * consists of a single segment.
336 	 */
parsePatternIntoWords()337 	private void parsePatternIntoWords() {
338 		String trimedPattern = fPattern.trim();
339 		if (!trimedPattern.isEmpty()) {
340 			this.wholePatternWord = new Word(trimedPattern);
341 			patternWords = trimedPattern.split("\\s+"); //$NON-NLS-1$
342 			if (patternWords.length > 1) {
343 				this.splittedPatternWords = new Word[patternWords.length];
344 				for (int i = 0; i < patternWords.length; i++) {
345 					String patternWord = patternWords[i];
346 					if (!patternWord.endsWith("*")) { //$NON-NLS-1$
347 						patternWord += '*';
348 					}
349 					this.splittedPatternWords[i] = new Word(patternWord);
350 					// words may be found anywhere in the line
351 				}
352 			}
353 		}
354 	}
355 
356 	/**
357 	 * This method parses the given pattern into segments seperated by wildcard '*'
358 	 * characters. Since wildcards are not being used in this case, the pattern
359 	 * consists of a single segment.
360 	 */
parseNoWildCards()361 	private void parseNoWildCards() {
362 		this.wholePatternWord = new Word(fPattern, fLength, patternWords);
363 		this.wholePatternWord.bound = fLength;
364 		this.wholePatternWord.fragments = patternWords;
365 	}
366 
367 	/**
368 	 * @param text  a string which contains no wildcard
369 	 * @param start the starting index in the text for search, inclusive
370 	 * @param end   the stopping point of search, exclusive
371 	 * @return the starting index in the text of the pattern , or -1 if not found
372 	 */
posIn(String text, int start, int end)373 	protected int posIn(String text, int start, int end) {// no wild card in pattern
374 		int max = end - fLength;
375 
376 		if (!fIgnoreCase) {
377 			int i = text.indexOf(fPattern, start);
378 			if (i == -1 || i > max) {
379 				return -1;
380 			}
381 			return i;
382 		}
383 
384 		for (int i = start; i <= max; ++i) {
385 			if (text.regionMatches(true, i, fPattern, 0, fLength)) {
386 				return i;
387 			}
388 		}
389 
390 		return -1;
391 	}
392 
393 	/**
394 	 * @param text  a simple regular expression that may only contain '?'(s)
395 	 * @param start the starting index in the text for search, inclusive
396 	 * @param end   the stopping point of search, exclusive
397 	 * @param p     a simple regular expression that may contains '?'
398 	 * @return the starting index in the text of the pattern , or -1 if not found
399 	 */
regExpPosIn(String text, int start, int end, String p)400 	protected int regExpPosIn(String text, int start, int end, String p) {
401 		int plen = p.length();
402 
403 		int max = end - plen;
404 		for (int i = start; i <= max; ++i) {
405 			if (regExpRegionMatches(text, i, p, 0, plen)) {
406 				return i;
407 			}
408 		}
409 		return -1;
410 	}
411 
412 	/**
413 	 *
414 	 * @return boolean
415 	 * @param text       a String to match
416 	 * @param start      int that indicates the starting index of match, inclusive
417 	 * @param end        int that indicates the ending index of match, exclusive
418 	 * @param p          String, String, a simple regular expression that may
419 	 *                   contain '?'
420 	 * @param ignoreCase boolean indicating whether <code>p</code> is case sensitive
421 	 */
regExpRegionMatches(String text, int tStart, String p, int pStart, int plen)422 	protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
423 		while (plen-- > 0) {
424 			char tchar = text.charAt(tStart++);
425 			char pchar = p.charAt(pStart++);
426 
427 			/* process wild cards */
428 			if (!fIgnoreWildCards) {
429 				/* skip single wild cards */
430 				if (pchar == fSingleWildCard) {
431 					continue;
432 				}
433 			}
434 			if (pchar == tchar) {
435 				continue;
436 			}
437 			if (fIgnoreCase) {
438 				if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar)) {
439 					continue;
440 				}
441 				// comparing after converting to upper case doesn't handle all cases;
442 				// also compare after converting to lower case
443 				if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar)) {
444 					continue;
445 				}
446 			}
447 			return false;
448 		}
449 		return true;
450 	}
451 
452 	/**
453 	 * @param text  the string to match
454 	 * @param start the starting index in the text for search, inclusive
455 	 * @param end   the stopping point of search, exclusive
456 	 * @param p     a pattern string that has no wildcard
457 	 * @return the starting index in the text of the pattern , or -1 if not found
458 	 */
textPosIn(String text, int start, int end, String p)459 	protected int textPosIn(String text, int start, int end, String p) {
460 
461 		int plen = p.length();
462 		int max = end - plen;
463 
464 		if (!fIgnoreCase) {
465 			int i = text.indexOf(p, start);
466 			if (i == -1 || i > max) {
467 				return -1;
468 			}
469 			return i;
470 		}
471 
472 		for (int i = start; i <= max; ++i) {
473 			if (text.regionMatches(true, i, p, 0, plen)) {
474 				return i;
475 			}
476 		}
477 
478 		return -1;
479 	}
480 
481 	/**
482 	 * Take the given filter text and break it down into words using a
483 	 * BreakIterator.
484 	 *
485 	 * @param text
486 	 * @return an array of words
487 	 */
getWords(String text)488 	public static String[] getWords(String text) {
489 
490 		return NON_WORD.split(text, 0);
491 	}
492 
493 }
494