1 /******************************************************************************* 2 * Copyright (c) 2010, 2011 IBM Corporation and others. 3 * 4 * This program and the accompanying materials 5 * are made available under the terms of the Eclipse Public License 2.0 6 * which accompanies this distribution, and is available at 7 * https://www.eclipse.org/legal/epl-2.0/ 8 * 9 * SPDX-License-Identifier: EPL-2.0 10 * 11 * Contributors: 12 * IBM Corporation - initial API and implementation 13 ******************************************************************************/ 14 package org.eclipse.equinox.bidi.internal.consumable; 15 16 import org.eclipse.equinox.bidi.advanced.IStructuredTextExpert; 17 import org.eclipse.equinox.bidi.advanced.StructuredTextEnvironment; 18 import org.eclipse.equinox.bidi.custom.*; 19 20 /** 21 * Handler for regular expressions. 22 * Such expressions may span multiple lines. 23 * <p> 24 * In applications like an editor where parts of the text might be modified 25 * while other parts are not, the user may want to call 26 * {@link IStructuredTextExpert#leanToFullText} 27 * separately on each line and save the initial state of each line (this is 28 * the final state of the previous line which can be retrieved using 29 * {@link IStructuredTextExpert#getState()}. 30 * If both the content 31 * of a line and its initial state have not changed, the user can be sure that 32 * the last <i>full</i> text computed for this line has not changed either. 33 * 34 * @see IStructuredTextExpert explanation of state 35 */ 36 public class StructuredTextRegex extends StructuredTextTypeHandler { 37 static final String[] startStrings = {"", /* 0 *//* dummy *///$NON-NLS-1$ 38 "(?#", /* 1 *//* comment (?#...) *///$NON-NLS-1$ 39 "(?<", /* 2 *//* named group (?<name> *///$NON-NLS-1$ 40 "(?'", /* 3 *//* named group (?'name' *///$NON-NLS-1$ 41 "(?(<", /* 4 *//* conditional named back reference (?(<name>) *///$NON-NLS-1$ 42 "(?('", /* 5 *//* conditional named back reference (?('name') *///$NON-NLS-1$ 43 "(?(", /* 6 *//* conditional named back reference (?(name) *///$NON-NLS-1$ 44 "(?&", /* 7 *//* named parentheses reference (?&name) *///$NON-NLS-1$ 45 "(?P<", /* 8 *//* named group (?P<name> *///$NON-NLS-1$ 46 "\\k<", /* 9 *//* named back reference \k<name> *///$NON-NLS-1$ 47 "\\k'", /* 10 *//* named back reference \k'name' *///$NON-NLS-1$ 48 "\\k{", /* 11 *//* named back reference \k{name} *///$NON-NLS-1$ 49 "(?P=", /* 12 *//* named back reference (?P=name) *///$NON-NLS-1$ 50 "\\g{", /* 13 *//* named back reference \g{name} *///$NON-NLS-1$ 51 "\\g<", /* 14 *//* subroutine call \g<name> *///$NON-NLS-1$ 52 "\\g'", /* 15 *//* subroutine call \g'name' *///$NON-NLS-1$ 53 "(?(R&", /* 16 *//* named back reference recursion (?(R&name) *///$NON-NLS-1$ 54 "\\Q" /* 17 *//* quoted sequence \Q...\E *///$NON-NLS-1$ 55 }; 56 static final char[] endChars = { 57 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 58 '.', ')', '>', '\'', ')', ')', ')', ')', '>', '>', '\'', '}', ')', '}', '>', '\'', ')'}; 59 static final int numberOfStrings = startStrings.length; /* 18 */ 60 static final int maxSpecial = numberOfStrings; 61 static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; 62 static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; 63 static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; 64 static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; 65 static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; 66 private static final Integer STATE_COMMENT = Integer.valueOf(1); 67 private static final Integer STATE_QUOTED_SEQUENCE = Integer.valueOf(17); 68 69 /** 70 * Retrieves the number of special cases handled by this handler. 71 * 72 * @return the number of special cases for this handler. 73 */ 74 @Override getSpecialsCount(IStructuredTextExpert expert)75 public int getSpecialsCount(IStructuredTextExpert expert) { 76 return maxSpecial; 77 } 78 79 /** 80 * Locates occurrences of the syntactic strings and of 81 * R, AL, EN, AN characters. 82 */ 83 @Override indexOfSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int fromIndex)84 public int indexOfSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int fromIndex) { 85 // In this method, L, R, AL, AN and EN represent bidi categories 86 // as defined in the Unicode Bidirectional Algorithm 87 // ( http://www.unicode.org/reports/tr9/ ). 88 // L represents the category Left to Right character. 89 // R represents the category Right to Left character. 90 // AL represents the category Arabic Letter. 91 // AN represents the category Arabic Number. 92 // EN represents the category European Number. 93 byte charType; 94 95 if (caseNumber < numberOfStrings) { 96 /* 1 *//* comment (?#...) */ 97 /* 2 *//* named group (?<name> */ 98 /* 3 *//* named group (?'name' */ 99 /* 4 *//* conditional named back reference (?(name) */ 100 /* 5 *//* conditional named back reference (?(<name>) */ 101 /* 6 *//* conditional named back reference (?('name') */ 102 /* 7 *//* named parentheses reference (?&name) */ 103 /* 8 *//* named group (?P<name> */ 104 /* 9 *//* named back reference \k<name> */ 105 /* 10 *//* named back reference \k'name' */ 106 /* 11 *//* named back reference \k{name} */ 107 /* 12 *//* named back reference (?P=name) */ 108 /* 13 *//* named back reference \g{name} */ 109 /* 14 *//* subroutine call \g<name> */ 110 /* 15 *//* subroutine call \g'name' */ 111 /* 16 *//* named back reference recursion (?(R&name) */ 112 /* 17 *//* quoted sequence \Q...\E */ 113 return text.indexOf(startStrings[caseNumber], fromIndex); 114 } 115 // there never is a need for a mark before the first char 116 if (fromIndex <= 0) 117 fromIndex = 1; 118 // look for R, AL, AN, EN which are potentially needing a mark 119 for (; fromIndex < text.length(); fromIndex++) { 120 charType = charTypes.getBidiTypeAt(fromIndex); 121 // R and AL will always be examined using processSeparator() 122 if (charType == R || charType == AL) 123 return fromIndex; 124 125 if (charType == EN || charType == AN) { 126 // no need for a mark after the first digit in a number 127 if (charTypes.getBidiTypeAt(fromIndex - 1) == charType) 128 continue; 129 130 for (int i = fromIndex - 1; i >= 0; i--) { 131 charType = charTypes.getBidiTypeAt(i); 132 // after a L char, no need for a mark 133 if (charType == L) 134 continue; 135 136 // digit after R or AL or AN need a mark, except for EN 137 // following AN, but this is a contrived case, so we 138 // don't check for it (and calling processSeparator() 139 // for it will do no harm) 140 if (charType == R || charType == AL || charType == AN) 141 return fromIndex; 142 } 143 continue; 144 } 145 } 146 return -1; 147 } 148 149 /** 150 * Processes the special cases. 151 */ 152 @Override processSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int separLocation)153 public int processSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int separLocation) { 154 int location; 155 156 if (separLocation < 0) { 157 caseNumber = ((Integer) expert.getState()).intValue(); // TBD guard against "undefined" 158 expert.clearState(); 159 } 160 switch (caseNumber) { 161 case 1 : /* comment (?#...) */ 162 if (separLocation < 0) { 163 // initial state from previous line 164 location = 0; 165 } else { 166 StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation); 167 // skip the opening "(?#" 168 location = separLocation + 3; 169 } 170 location = text.indexOf(')', location); 171 if (location < 0) { 172 expert.setState(STATE_COMMENT); 173 return text.length(); 174 } 175 return location + 1; 176 case 2 : /* named group (?<name> */ 177 case 3 : /* named group (?'name' */ 178 case 4 : /* conditional named back reference (?(name) */ 179 case 5 : /* conditional named back reference (?(<name>) */ 180 case 6 : /* conditional named back reference (?('name') */ 181 case 7 : /* named parentheses reference (?&name) */ 182 StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation); 183 // no need for calling processSeparator() for the following cases 184 // since the starting string contains a L char 185 case 8 : /* named group (?P<name> */ 186 case 9 : /* named back reference \k<name> */ 187 case 10 : /* named back reference \k'name' */ 188 case 11 : /* named back reference \k{name} */ 189 case 12 : /* named back reference (?P=name) */ 190 case 13 : /* named back reference \g{name} */ 191 case 14 : /* subroutine call \g<name> */ 192 case 15 : /* subroutine call \g'name' */ 193 case 16 : /* named back reference recursion (?(R&name) */ 194 // skip the opening string 195 location = separLocation + startStrings[caseNumber].length(); 196 // look for ending character 197 location = text.indexOf(endChars[caseNumber], location); 198 if (location < 0) 199 return text.length(); 200 return location + 1; 201 case 17 : /* quoted sequence \Q...\E */ 202 if (separLocation < 0) { 203 // initial state from previous line 204 location = 0; 205 } else { 206 StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation); 207 // skip the opening "\Q" 208 location = separLocation + 2; 209 } 210 location = text.indexOf("\\E", location); //$NON-NLS-1$ 211 if (location < 0) { 212 expert.setState(STATE_QUOTED_SEQUENCE); 213 return text.length(); 214 } 215 // set the charType for the "E" to L (Left to Right character) 216 charTypes.setBidiTypeAt(location + 1, L); 217 return location + 2; 218 case 18 : /* R, AL, AN, EN */ 219 StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation); 220 return separLocation + 1; 221 222 } 223 // we should never get here 224 return text.length(); 225 } 226 227 @Override getDirection(IStructuredTextExpert expert, String text)228 public int getDirection(IStructuredTextExpert expert, String text) { 229 return getDirection(expert, text, new StructuredTextCharTypes(expert, text)); 230 } 231 232 /** 233 * @return {@link IStructuredTextExpert#DIR_RTL DIR_RTL} if the following 234 * conditions are satisfied: 235 * <ul> 236 * <li>The current locale (as expressed by the environment 237 * language) is Arabic.</li> 238 * <li>The first strong character has an RTL direction.</li> 239 * <li>If there is no strong character in the text, the 240 * GUI is mirrored. 241 * </ul> 242 * Otherwise, returns {@link IStructuredTextExpert#DIR_LTR DIR_LTR}. 243 */ 244 @Override getDirection(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes)245 public int getDirection(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes) { 246 StructuredTextEnvironment environment = expert.getEnvironment(); 247 String language = environment.getLanguage(); 248 if (!language.equals("ar")) //$NON-NLS-1$ 249 return IStructuredTextExpert.DIR_LTR; 250 for (int i = 0; i < text.length(); i++) { 251 byte charType = charTypes.getBidiTypeAt(i); 252 if (charType == AL || charType == R) 253 return IStructuredTextExpert.DIR_RTL; 254 if (charType == L) 255 return IStructuredTextExpert.DIR_LTR; 256 } 257 if (environment.getMirrored()) 258 return IStructuredTextExpert.DIR_RTL; 259 return IStructuredTextExpert.DIR_LTR; 260 } 261 262 } 263