1 /******************************************************************************* 2 * Copyright (c) 2000, 2015 IBM Corporation and others. 3 * 4 * This program and the accompanying materials 5 * are made available under the terms of the Eclipse Public License 2.0 6 * which accompanies this distribution, and is available at 7 * https://www.eclipse.org/legal/epl-2.0/ 8 * 9 * SPDX-License-Identifier: EPL-2.0 10 * 11 * Contributors: 12 * IBM Corporation - initial API and implementation 13 * Christopher Lenz (cmlenz@gmx.de) - support for line continuation 14 *******************************************************************************/ 15 package org.eclipse.jface.text.rules; 16 17 import java.util.Arrays; 18 import java.util.Comparator; 19 20 import org.eclipse.core.runtime.Assert; 21 22 23 /** 24 * Standard implementation of <code>IPredicateRule</code>. 25 * Is is capable of detecting a pattern which begins with a given start 26 * sequence and ends with a given end sequence. If the end sequence is 27 * not specified, it can be either end of line, end or file, or both. Additionally, 28 * the pattern can be constrained to begin in a certain column. The rule can also 29 * be used to check whether the text to scan covers half of the pattern, i.e. contains 30 * the end sequence required by the rule. 31 */ 32 public class PatternRule implements IPredicateRule { 33 34 /** 35 * Comparator that orders <code>char[]</code> in decreasing array lengths. 36 * 37 * @since 3.1 38 */ 39 private static class DecreasingCharArrayLengthComparator implements Comparator<char[]> { 40 @Override compare(char[] o1, char[] o2)41 public int compare(char[] o1, char[] o2) { 42 return o2.length - o1.length; 43 } 44 } 45 46 /** Internal setting for the un-initialized column constraint */ 47 protected static final int UNDEFINED= -1; 48 49 /** The token to be returned on success */ 50 protected IToken fToken; 51 /** The pattern's start sequence */ 52 protected char[] fStartSequence; 53 /** The pattern's end sequence */ 54 protected char[] fEndSequence; 55 /** The pattern's column constrain */ 56 protected int fColumn= UNDEFINED; 57 /** The pattern's escape character */ 58 protected char fEscapeCharacter; 59 /** 60 * Indicates whether the escape character continues a line 61 * @since 3.0 62 */ 63 protected boolean fEscapeContinuesLine; 64 /** Indicates whether end of line terminates the pattern */ 65 protected boolean fBreaksOnEOL; 66 /** Indicates whether end of file terminates the pattern */ 67 protected boolean fBreaksOnEOF; 68 69 /** 70 * Line delimiter comparator which orders according to decreasing delimiter length. 71 * @since 3.1 72 */ 73 private Comparator<char[]> fLineDelimiterComparator= new DecreasingCharArrayLengthComparator(); 74 /** 75 * Cached line delimiters. 76 * @since 3.1 77 */ 78 private char[][] fLineDelimiters; 79 /** 80 * Cached sorted {@linkplain #fLineDelimiters}. 81 * @since 3.1 82 */ 83 private char[][] fSortedLineDelimiters; 84 85 /** 86 * Creates a rule for the given starting and ending sequence. 87 * When these sequences are detected the rule will return the specified token. 88 * Alternatively, the sequence can also be ended by the end of the line. 89 * Any character which follows the given escapeCharacter will be ignored. 90 * 91 * @param startSequence the pattern's start sequence 92 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value 93 * @param token the token which will be returned on success 94 * @param escapeCharacter any character following this one will be ignored 95 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern 96 */ PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL)97 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL) { 98 Assert.isTrue(startSequence != null && !startSequence.isEmpty()); 99 Assert.isTrue(endSequence != null || breaksOnEOL); 100 Assert.isNotNull(token); 101 102 fStartSequence= startSequence.toCharArray(); 103 fEndSequence= (endSequence == null ? new char[0] : endSequence.toCharArray()); 104 fToken= token; 105 fEscapeCharacter= escapeCharacter; 106 fBreaksOnEOL= breaksOnEOL; 107 } 108 109 /** 110 * Creates a rule for the given starting and ending sequence. 111 * When these sequences are detected the rule will return the specified token. 112 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. 113 * Any character which follows the given escapeCharacter will be ignored. 114 * 115 * @param startSequence the pattern's start sequence 116 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value 117 * @param token the token which will be returned on success 118 * @param escapeCharacter any character following this one will be ignored 119 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern 120 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern 121 * @since 2.1 122 */ PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF)123 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF) { 124 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL); 125 fBreaksOnEOF= breaksOnEOF; 126 } 127 128 /** 129 * Creates a rule for the given starting and ending sequence. 130 * When these sequences are detected the rule will return the specified token. 131 * Alternatively, the sequence can also be ended by the end of the line or the end of the file. 132 * Any character which follows the given escapeCharacter will be ignored. An end of line 133 * immediately after the given <code>lineContinuationCharacter</code> will not cause the 134 * pattern to terminate even if <code>breakOnEOL</code> is set to true. 135 * 136 * @param startSequence the pattern's start sequence 137 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value 138 * @param token the token which will be returned on success 139 * @param escapeCharacter any character following this one will be ignored 140 * @param breaksOnEOL indicates whether the end of the line also terminates the pattern 141 * @param breaksOnEOF indicates whether the end of the file also terminates the pattern 142 * @param escapeContinuesLine indicates whether the specified escape character is used for line 143 * continuation, so that an end of line immediately after the escape character does not 144 * terminate the pattern, even if <code>breakOnEOL</code> is set 145 * @since 3.0 146 */ PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine)147 public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine) { 148 this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF); 149 fEscapeContinuesLine= escapeContinuesLine; 150 } 151 152 /** 153 * Sets a column constraint for this rule. If set, the rule's token 154 * will only be returned if the pattern is detected starting at the 155 * specified column. If the column is smaller then 0, the column 156 * constraint is considered removed. 157 * 158 * @param column the column in which the pattern starts 159 */ setColumnConstraint(int column)160 public void setColumnConstraint(int column) { 161 if (column < 0) 162 column= UNDEFINED; 163 fColumn= column; 164 } 165 166 167 /** 168 * Evaluates this rules without considering any column constraints. 169 * 170 * @param scanner the character scanner to be used 171 * @return the token resulting from this evaluation 172 */ doEvaluate(ICharacterScanner scanner)173 protected IToken doEvaluate(ICharacterScanner scanner) { 174 return doEvaluate(scanner, false); 175 } 176 177 /** 178 * Evaluates this rules without considering any column constraints. Resumes 179 * detection, i.e. look sonly for the end sequence required by this rule if the 180 * <code>resume</code> flag is set. 181 * 182 * @param scanner the character scanner to be used 183 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise 184 * @return the token resulting from this evaluation 185 * @since 2.0 186 */ doEvaluate(ICharacterScanner scanner, boolean resume)187 protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) { 188 189 if (resume) { 190 191 if (endSequenceDetected(scanner)) 192 return fToken; 193 194 } else { 195 196 int c= scanner.read(); 197 if (c == fStartSequence[0]) { 198 if (sequenceDetected(scanner, fStartSequence, false)) { 199 if (endSequenceDetected(scanner)) 200 return fToken; 201 } 202 } 203 } 204 205 scanner.unread(); 206 return Token.UNDEFINED; 207 } 208 209 @Override evaluate(ICharacterScanner scanner)210 public IToken evaluate(ICharacterScanner scanner) { 211 return evaluate(scanner, false); 212 } 213 214 /** 215 * Returns whether the end sequence was detected. As the pattern can be considered 216 * ended by a line delimiter, the result of this method is <code>true</code> if the 217 * rule breaks on the end of the line, or if the EOF character is read. 218 * 219 * @param scanner the character scanner to be used 220 * @return <code>true</code> if the end sequence has been detected 221 */ endSequenceDetected(ICharacterScanner scanner)222 protected boolean endSequenceDetected(ICharacterScanner scanner) { 223 224 char[][] originalDelimiters= scanner.getLegalLineDelimiters(); 225 int count= originalDelimiters.length; 226 if (fLineDelimiters == null || fLineDelimiters.length != count) { 227 fSortedLineDelimiters= new char[count][]; 228 } else { 229 while (count > 0 && Arrays.equals(fLineDelimiters[count - 1], originalDelimiters[count - 1])) 230 count--; 231 } 232 if (count != 0) { 233 fLineDelimiters= originalDelimiters; 234 System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length); 235 Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator); 236 } 237 238 int readCount= 1; 239 int c; 240 while ((c= scanner.read()) != ICharacterScanner.EOF) { 241 if (c == fEscapeCharacter) { 242 // Skip escaped character(s) 243 if (fEscapeContinuesLine) { 244 c= scanner.read(); 245 for (char[] fSortedLineDelimiter : fSortedLineDelimiters) { 246 if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF)) 247 break; 248 } 249 } else 250 scanner.read(); 251 252 } else if (fEndSequence.length > 0 && c == fEndSequence[0]) { 253 // Check if the specified end sequence has been found. 254 if (sequenceDetected(scanner, fEndSequence, fBreaksOnEOF)) 255 return true; 256 } else if (fBreaksOnEOL) { 257 // Check for end of line since it can be used to terminate the pattern. 258 for (char[] fSortedLineDelimiter : fSortedLineDelimiters) { 259 if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF)) 260 return true; 261 } 262 } 263 readCount++; 264 } 265 266 if (fBreaksOnEOF) 267 return true; 268 269 for (; readCount > 0; readCount--) 270 scanner.unread(); 271 272 return false; 273 } 274 275 /** 276 * Returns whether the next characters to be read by the character scanner 277 * are an exact match with the given sequence. No escape characters are allowed 278 * within the sequence. If specified the sequence is considered to be found 279 * when reading the EOF character. 280 * 281 * @param scanner the character scanner to be used 282 * @param sequence the sequence to be detected 283 * @param eofAllowed indicated whether EOF terminates the pattern 284 * @return <code>true</code> if the given sequence has been detected 285 */ sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed)286 protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) { 287 for (int i= 1; i < sequence.length; i++) { 288 int c= scanner.read(); 289 if (c == ICharacterScanner.EOF && eofAllowed) { 290 return true; 291 } else if (c != sequence[i]) { 292 // Non-matching character detected, rewind the scanner back to the start. 293 // Do not unread the first character. 294 scanner.unread(); 295 for (int j= i-1; j > 0; j--) 296 scanner.unread(); 297 return false; 298 } 299 } 300 301 return true; 302 } 303 304 @Override evaluate(ICharacterScanner scanner, boolean resume)305 public IToken evaluate(ICharacterScanner scanner, boolean resume) { 306 if (fColumn == UNDEFINED) 307 return doEvaluate(scanner, resume); 308 309 int c= scanner.read(); 310 scanner.unread(); 311 if (c == fStartSequence[0]) 312 return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED); 313 return Token.UNDEFINED; 314 } 315 316 @Override getSuccessToken()317 public IToken getSuccessToken() { 318 return fToken; 319 } 320 } 321