1 /*******************************************************************************
2  * Copyright (c) 2000, 2015 IBM Corporation and others.
3  *
4  * This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License 2.0
6  * which accompanies this distribution, and is available at
7  * https://www.eclipse.org/legal/epl-2.0/
8  *
9  * SPDX-License-Identifier: EPL-2.0
10  *
11  * Contributors:
12  *     IBM Corporation - initial API and implementation
13  *     Christopher Lenz (cmlenz@gmx.de) - support for line continuation
14  *******************************************************************************/
15 package org.eclipse.jface.text.rules;
16 
17 import java.util.Arrays;
18 import java.util.Comparator;
19 
20 import org.eclipse.core.runtime.Assert;
21 
22 
23 /**
24  * Standard implementation of <code>IPredicateRule</code>.
25  * Is is capable of detecting a pattern which begins with a given start
26  * sequence and ends with a given end sequence. If the end sequence is
27  * not specified, it can be either end of line, end or file, or both. Additionally,
28  * the pattern can be constrained to begin in a certain column. The rule can also
29  * be used to check whether the text to scan covers half of the pattern, i.e. contains
30  * the end sequence required by the rule.
31  */
32 public class PatternRule implements IPredicateRule {
33 
34 	/**
35 	 * Comparator that orders <code>char[]</code> in decreasing array lengths.
36 	 *
37 	 * @since 3.1
38 	 */
39 	private static class DecreasingCharArrayLengthComparator implements Comparator<char[]> {
40 		@Override
compare(char[] o1, char[] o2)41 		public int compare(char[] o1, char[] o2) {
42 			return o2.length - o1.length;
43 		}
44 	}
45 
46 	/** Internal setting for the un-initialized column constraint */
47 	protected static final int UNDEFINED= -1;
48 
49 	/** The token to be returned on success */
50 	protected IToken fToken;
51 	/** The pattern's start sequence */
52 	protected char[] fStartSequence;
53 	/** The pattern's end sequence */
54 	protected char[] fEndSequence;
55 	/** The pattern's column constrain */
56 	protected int fColumn= UNDEFINED;
57 	/** The pattern's escape character */
58 	protected char fEscapeCharacter;
59 	/**
60 	 * Indicates whether the escape character continues a line
61 	 * @since 3.0
62 	 */
63 	protected boolean fEscapeContinuesLine;
64 	/** Indicates whether end of line terminates the pattern */
65 	protected boolean fBreaksOnEOL;
66 	/** Indicates whether end of file terminates the pattern */
67 	protected boolean fBreaksOnEOF;
68 
69 	/**
70 	 * Line delimiter comparator which orders according to decreasing delimiter length.
71 	 * @since 3.1
72 	 */
73 	private Comparator<char[]> fLineDelimiterComparator= new DecreasingCharArrayLengthComparator();
74 	/**
75 	 * Cached line delimiters.
76 	 * @since 3.1
77 	 */
78 	private char[][] fLineDelimiters;
79 	/**
80 	 * Cached sorted {@linkplain #fLineDelimiters}.
81 	 * @since 3.1
82 	 */
83 	private char[][] fSortedLineDelimiters;
84 
85 	/**
86 	 * Creates a rule for the given starting and ending sequence.
87 	 * When these sequences are detected the rule will return the specified token.
88 	 * Alternatively, the sequence can also be ended by the end of the line.
89 	 * Any character which follows the given escapeCharacter will be ignored.
90 	 *
91 	 * @param startSequence the pattern's start sequence
92 	 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
93 	 * @param token the token which will be returned on success
94 	 * @param escapeCharacter any character following this one will be ignored
95 	 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
96 	 */
PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL)97 	public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL) {
98 		Assert.isTrue(startSequence != null && !startSequence.isEmpty());
99 		Assert.isTrue(endSequence != null || breaksOnEOL);
100 		Assert.isNotNull(token);
101 
102 		fStartSequence= startSequence.toCharArray();
103 		fEndSequence= (endSequence == null ? new char[0] : endSequence.toCharArray());
104 		fToken= token;
105 		fEscapeCharacter= escapeCharacter;
106 		fBreaksOnEOL= breaksOnEOL;
107 	}
108 
109 	/**
110 	 * Creates a rule for the given starting and ending sequence.
111 	 * When these sequences are detected the rule will return the specified token.
112 	 * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
113 	 * Any character which follows the given escapeCharacter will be ignored.
114 	 *
115 	 * @param startSequence the pattern's start sequence
116 	 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
117 	 * @param token the token which will be returned on success
118 	 * @param escapeCharacter any character following this one will be ignored
119 	 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
120 	 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
121 	 * @since 2.1
122 	 */
PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF)123 	public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF) {
124 		this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL);
125 		fBreaksOnEOF= breaksOnEOF;
126 	}
127 
128 	/**
129 	 * Creates a rule for the given starting and ending sequence.
130 	 * When these sequences are detected the rule will return the specified token.
131 	 * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
132 	 * Any character which follows the given escapeCharacter will be ignored. An end of line
133 	 * immediately after the given <code>lineContinuationCharacter</code> will not cause the
134 	 * pattern to terminate even if <code>breakOnEOL</code> is set to true.
135 	 *
136 	 * @param startSequence the pattern's start sequence
137 	 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
138 	 * @param token the token which will be returned on success
139 	 * @param escapeCharacter any character following this one will be ignored
140 	 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
141 	 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
142 	 * @param escapeContinuesLine indicates whether the specified escape character is used for line
143 	 *        continuation, so that an end of line immediately after the escape character does not
144 	 *        terminate the pattern, even if <code>breakOnEOL</code> is set
145 	 * @since 3.0
146 	 */
PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine)147 	public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine) {
148 		this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF);
149 		fEscapeContinuesLine= escapeContinuesLine;
150 	}
151 
152 	/**
153 	 * Sets a column constraint for this rule. If set, the rule's token
154 	 * will only be returned if the pattern is detected starting at the
155 	 * specified column. If the column is smaller then 0, the column
156 	 * constraint is considered removed.
157 	 *
158 	 * @param column the column in which the pattern starts
159 	 */
setColumnConstraint(int column)160 	public void setColumnConstraint(int column) {
161 		if (column < 0)
162 			column= UNDEFINED;
163 		fColumn= column;
164 	}
165 
166 
167 	/**
168 	 * Evaluates this rules without considering any column constraints.
169 	 *
170 	 * @param scanner the character scanner to be used
171 	 * @return the token resulting from this evaluation
172 	 */
doEvaluate(ICharacterScanner scanner)173 	protected IToken doEvaluate(ICharacterScanner scanner) {
174 		return doEvaluate(scanner, false);
175 	}
176 
177 	/**
178 	 * Evaluates this rules without considering any column constraints. Resumes
179 	 * detection, i.e. look sonly for the end sequence required by this rule if the
180 	 * <code>resume</code> flag is set.
181 	 *
182 	 * @param scanner the character scanner to be used
183 	 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
184 	 * @return the token resulting from this evaluation
185 	 * @since 2.0
186 	 */
doEvaluate(ICharacterScanner scanner, boolean resume)187 	protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
188 
189 		if (resume) {
190 
191 			if (endSequenceDetected(scanner))
192 				return fToken;
193 
194 		} else {
195 
196 			int c= scanner.read();
197 			if (c == fStartSequence[0]) {
198 				if (sequenceDetected(scanner, fStartSequence, false)) {
199 					if (endSequenceDetected(scanner))
200 						return fToken;
201 				}
202 			}
203 		}
204 
205 		scanner.unread();
206 		return Token.UNDEFINED;
207 	}
208 
209 	@Override
evaluate(ICharacterScanner scanner)210 	public IToken evaluate(ICharacterScanner scanner) {
211 		return evaluate(scanner, false);
212 	}
213 
214 	/**
215 	 * Returns whether the end sequence was detected. As the pattern can be considered
216 	 * ended by a line delimiter, the result of this method is <code>true</code> if the
217 	 * rule breaks on the end of the line, or if the EOF character is read.
218 	 *
219 	 * @param scanner the character scanner to be used
220 	 * @return <code>true</code> if the end sequence has been detected
221 	 */
endSequenceDetected(ICharacterScanner scanner)222 	protected boolean endSequenceDetected(ICharacterScanner scanner) {
223 
224 		char[][] originalDelimiters= scanner.getLegalLineDelimiters();
225 		int count= originalDelimiters.length;
226 		if (fLineDelimiters == null || fLineDelimiters.length != count) {
227 			fSortedLineDelimiters= new char[count][];
228 		} else {
229 			while (count > 0 && Arrays.equals(fLineDelimiters[count - 1], originalDelimiters[count - 1]))
230 				count--;
231 		}
232 		if (count != 0) {
233 			fLineDelimiters= originalDelimiters;
234 			System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length);
235 			Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator);
236 		}
237 
238 		int readCount= 1;
239 		int c;
240 		while ((c= scanner.read()) != ICharacterScanner.EOF) {
241 			if (c == fEscapeCharacter) {
242 				// Skip escaped character(s)
243 				if (fEscapeContinuesLine) {
244 					c= scanner.read();
245 					for (char[] fSortedLineDelimiter : fSortedLineDelimiters) {
246 						if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF))
247 							break;
248 					}
249 				} else
250 					scanner.read();
251 
252 			} else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
253 				// Check if the specified end sequence has been found.
254 				if (sequenceDetected(scanner, fEndSequence, fBreaksOnEOF))
255 					return true;
256 			} else if (fBreaksOnEOL) {
257 				// Check for end of line since it can be used to terminate the pattern.
258 				for (char[] fSortedLineDelimiter : fSortedLineDelimiters) {
259 					if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF))
260 						return true;
261 				}
262 			}
263 			readCount++;
264 		}
265 
266 		if (fBreaksOnEOF)
267 			return true;
268 
269 		for (; readCount > 0; readCount--)
270 			scanner.unread();
271 
272 		return false;
273 	}
274 
275 	/**
276 	 * Returns whether the next characters to be read by the character scanner
277 	 * are an exact match with the given sequence. No escape characters are allowed
278 	 * within the sequence. If specified the sequence is considered to be found
279 	 * when reading the EOF character.
280 	 *
281 	 * @param scanner the character scanner to be used
282 	 * @param sequence the sequence to be detected
283 	 * @param eofAllowed indicated whether EOF terminates the pattern
284 	 * @return <code>true</code> if the given sequence has been detected
285 	 */
sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed)286 	protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
287 		for (int i= 1; i < sequence.length; i++) {
288 			int c= scanner.read();
289 			if (c == ICharacterScanner.EOF && eofAllowed) {
290 				return true;
291 			} else if (c != sequence[i]) {
292 				// Non-matching character detected, rewind the scanner back to the start.
293 				// Do not unread the first character.
294 				scanner.unread();
295 				for (int j= i-1; j > 0; j--)
296 					scanner.unread();
297 				return false;
298 			}
299 		}
300 
301 		return true;
302 	}
303 
304 	@Override
evaluate(ICharacterScanner scanner, boolean resume)305 	public IToken evaluate(ICharacterScanner scanner, boolean resume) {
306 		if (fColumn == UNDEFINED)
307 			return doEvaluate(scanner, resume);
308 
309 		int c= scanner.read();
310 		scanner.unread();
311 		if (c == fStartSequence[0])
312 			return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
313 		return Token.UNDEFINED;
314 	}
315 
316 	@Override
getSuccessToken()317 	public IToken getSuccessToken() {
318 		return fToken;
319 	}
320 }
321