1 /* gnu/regexp/RETokenNamedProperty.java 2 Copyright (C) 2006 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package gnu.java.util.regex; 40 41 import gnu.java.lang.CPStringBuilder; 42 43 import java.lang.reflect.InvocationTargetException; 44 import java.lang.reflect.Method; 45 46 final class RETokenNamedProperty extends REToken 47 { 48 String name; 49 boolean insens; 50 boolean negate; 51 Handler handler; 52 53 // Grouped properties 54 static final byte[] LETTER = new byte[]{ Character.LOWERCASE_LETTER, 55 Character.UPPERCASE_LETTER, 56 Character.TITLECASE_LETTER, 57 Character.MODIFIER_LETTER, 58 Character.OTHER_LETTER 59 }; 60 61 static final byte[] MARK = new byte[]{ Character.NON_SPACING_MARK, 62 Character.COMBINING_SPACING_MARK, 63 Character.ENCLOSING_MARK 64 }; 65 66 static final byte[] SEPARATOR = new byte[]{ Character.SPACE_SEPARATOR, 67 Character.LINE_SEPARATOR, 68 Character.PARAGRAPH_SEPARATOR 69 }; 70 71 static final byte[] SYMBOL = new byte[]{ Character.MATH_SYMBOL, 72 Character.CURRENCY_SYMBOL, 73 Character.MODIFIER_SYMBOL, 74 Character.OTHER_SYMBOL 75 }; 76 77 static final byte[] NUMBER = new byte[]{ Character.DECIMAL_DIGIT_NUMBER, 78 Character.LETTER_NUMBER, 79 Character.OTHER_NUMBER 80 }; 81 82 static final byte[] PUNCTUATION = new byte[]{ Character.DASH_PUNCTUATION, 83 Character.START_PUNCTUATION, 84 Character.END_PUNCTUATION, 85 Character.CONNECTOR_PUNCTUATION, 86 Character.OTHER_PUNCTUATION, 87 Character.INITIAL_QUOTE_PUNCTUATION, 88 Character.FINAL_QUOTE_PUNCTUATION 89 }; 90 91 static final byte[] OTHER = new byte[]{ Character.CONTROL, 92 Character.FORMAT, 93 Character.PRIVATE_USE, 94 Character.SURROGATE, 95 Character.UNASSIGNED 96 }; 97 RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate)98 RETokenNamedProperty (int subIndex, String name, boolean insens, 99 boolean negate) throws REException 100 { 101 super (subIndex); 102 this.name = name; 103 this.insens = insens; 104 this.negate = negate; 105 handler = getHandler (name); 106 } 107 getMinimumLength()108 int getMinimumLength () 109 { 110 return 1; 111 } 112 getMaximumLength()113 int getMaximumLength () 114 { 115 return 1; 116 } 117 matchThis(CharIndexed input, REMatch mymatch)118 REMatch matchThis (CharIndexed input, REMatch mymatch) 119 { 120 char ch = input.charAt (mymatch.index); 121 boolean retval = matchOneChar (ch); 122 if (retval) 123 { 124 ++mymatch.index; 125 return mymatch; 126 } 127 return null; 128 } 129 matchOneChar(char ch)130 private boolean matchOneChar (char ch) 131 { 132 if (ch == CharIndexed.OUT_OF_BOUNDS) 133 return false; 134 135 boolean retval = handler.includes (ch); 136 if (insens) 137 { 138 retval = retval || 139 handler.includes (toUpperCase (ch, unicodeAware)) || 140 handler.includes (toLowerCase (ch, unicodeAware)); 141 } 142 143 if (negate) 144 retval = !retval; 145 return retval; 146 } 147 returnsFixedLengthMatches()148 boolean returnsFixedLengthMatches () 149 { 150 return true; 151 } 152 findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max)153 int findFixedLengthMatches (CharIndexed input, REMatch mymatch, int max) 154 { 155 int index = mymatch.index; 156 int numRepeats = 0; 157 while (true) 158 { 159 if (numRepeats >= max) 160 break; 161 char ch = input.charAt (index++); 162 if (!matchOneChar (ch)) 163 break; 164 numRepeats++; 165 } 166 return numRepeats; 167 } 168 dump(CPStringBuilder os)169 void dump (CPStringBuilder os) 170 { 171 os.append ("\\").append (negate ? "P" : "p").append ("{" + name + "}"); 172 } 173 174 private abstract static class Handler 175 { includes(char c)176 public abstract boolean includes (char c); 177 } 178 getHandler(String name)179 private Handler getHandler (String name) throws REException 180 { 181 if (name.equals ("Lower") || name.equals ("Upper") || 182 // name.equals("ASCII") || 183 name.equals ("Alpha") || 184 name.equals ("Digit") || 185 name.equals ("Alnum") || 186 name.equals ("Punct") || 187 name.equals ("Graph") || 188 name.equals ("Print") || 189 name.equals ("Blank") || 190 name.equals ("Cntrl") || 191 name.equals ("XDigit") || name.equals ("Space")) 192 { 193 return new POSIXHandler (name); 194 } 195 if (name.startsWith ("In")) 196 { 197 try 198 { 199 name = name.substring (2); 200 Character.UnicodeBlock block = 201 Character.UnicodeBlock.forName (name); 202 return new UnicodeBlockHandler (block); 203 } 204 catch (IllegalArgumentException e) 205 { 206 throw new REException ("Invalid Unicode block name: " + name, 207 REException.REG_ESCAPE, 0); 208 } 209 } 210 if (name.startsWith ("Is")) 211 { 212 name = name.substring (2); 213 } 214 215 // "grouped properties" 216 if (name.equals ("L")) 217 return new UnicodeCategoriesHandler (LETTER); 218 if (name.equals ("M")) 219 return new UnicodeCategoriesHandler (MARK); 220 if (name.equals ("Z")) 221 return new UnicodeCategoriesHandler (SEPARATOR); 222 if (name.equals ("S")) 223 return new UnicodeCategoriesHandler (SYMBOL); 224 if (name.equals ("N")) 225 return new UnicodeCategoriesHandler (NUMBER); 226 if (name.equals ("P")) 227 return new UnicodeCategoriesHandler (PUNCTUATION); 228 if (name.equals ("C")) 229 return new UnicodeCategoriesHandler (OTHER); 230 231 if (name.equals ("Mc")) 232 return new UnicodeCategoryHandler (Character.COMBINING_SPACING_MARK); 233 if (name.equals ("Pc")) 234 return new UnicodeCategoryHandler (Character.CONNECTOR_PUNCTUATION); 235 if (name.equals ("Cc")) 236 return new UnicodeCategoryHandler (Character.CONTROL); 237 if (name.equals ("Sc")) 238 return new UnicodeCategoryHandler (Character.CURRENCY_SYMBOL); 239 if (name.equals ("Pd")) 240 return new UnicodeCategoryHandler (Character.DASH_PUNCTUATION); 241 if (name.equals ("Nd")) 242 return new UnicodeCategoryHandler (Character.DECIMAL_DIGIT_NUMBER); 243 if (name.equals ("Me")) 244 return new UnicodeCategoryHandler (Character.ENCLOSING_MARK); 245 if (name.equals ("Pe")) 246 return new UnicodeCategoryHandler (Character.END_PUNCTUATION); 247 if (name.equals ("Pf")) 248 return new UnicodeCategoryHandler (Character.FINAL_QUOTE_PUNCTUATION); 249 if (name.equals ("Cf")) 250 return new UnicodeCategoryHandler (Character.FORMAT); 251 if (name.equals ("Pi")) 252 return new UnicodeCategoryHandler (Character.INITIAL_QUOTE_PUNCTUATION); 253 if (name.equals ("Nl")) 254 return new UnicodeCategoryHandler (Character.LETTER_NUMBER); 255 if (name.equals ("Zl")) 256 return new UnicodeCategoryHandler (Character.LINE_SEPARATOR); 257 if (name.equals ("Ll")) 258 return new UnicodeCategoryHandler (Character.LOWERCASE_LETTER); 259 if (name.equals ("Sm")) 260 return new UnicodeCategoryHandler (Character.MATH_SYMBOL); 261 if (name.equals ("Lm")) 262 return new UnicodeCategoryHandler (Character.MODIFIER_LETTER); 263 if (name.equals ("Sk")) 264 return new UnicodeCategoryHandler (Character.MODIFIER_SYMBOL); 265 if (name.equals ("Mn")) 266 return new UnicodeCategoryHandler (Character.NON_SPACING_MARK); 267 if (name.equals ("Lo")) 268 return new UnicodeCategoryHandler (Character.OTHER_LETTER); 269 if (name.equals ("No")) 270 return new UnicodeCategoryHandler (Character.OTHER_NUMBER); 271 if (name.equals ("Po")) 272 return new UnicodeCategoryHandler (Character.OTHER_PUNCTUATION); 273 if (name.equals ("So")) 274 return new UnicodeCategoryHandler (Character.OTHER_SYMBOL); 275 if (name.equals ("Zp")) 276 return new UnicodeCategoryHandler (Character.PARAGRAPH_SEPARATOR); 277 if (name.equals ("Co")) 278 return new UnicodeCategoryHandler (Character.PRIVATE_USE); 279 if (name.equals ("Zs")) 280 return new UnicodeCategoryHandler (Character.SPACE_SEPARATOR); 281 if (name.equals ("Ps")) 282 return new UnicodeCategoryHandler (Character.START_PUNCTUATION); 283 if (name.equals ("Cs")) 284 return new UnicodeCategoryHandler (Character.SURROGATE); 285 if (name.equals ("Lt")) 286 return new UnicodeCategoryHandler (Character.TITLECASE_LETTER); 287 if (name.equals ("Cn")) 288 return new UnicodeCategoryHandler (Character.UNASSIGNED); 289 if (name.equals ("Lu")) 290 return new UnicodeCategoryHandler (Character.UPPERCASE_LETTER); 291 if (name.equals ("all")) 292 return new Handler () 293 { 294 public boolean includes (char c) 295 { 296 return true; 297 } 298 }; 299 if (name.startsWith ("java")) 300 { 301 try 302 { 303 Method m = Character.class.getMethod ("is" + name.substring (4), 304 Character.TYPE); 305 return new JavaCategoryHandler (m); 306 } 307 catch (NoSuchMethodException e) 308 { 309 throw new REException ("Unsupported Java handler: " + name, e, 310 REException.REG_ESCAPE, 0); 311 } 312 } 313 throw new REException ("unsupported name " + name, REException.REG_ESCAPE, 314 0); 315 } 316 317 private static class POSIXHandler extends Handler 318 { 319 private RETokenPOSIX retoken; 320 public POSIXHandler (String name) 321 { 322 int posixId = RETokenPOSIX.intValue (name.toLowerCase ()); 323 if (posixId != -1) 324 retoken = new RETokenPOSIX (0, posixId, false, false); 325 else 326 throw new RuntimeException ("Unknown posix ID: " + name); 327 } 328 public boolean includes (char c) 329 { 330 return retoken.matchOneChar (c); 331 } 332 } 333 334 private static class UnicodeCategoryHandler extends Handler 335 { 336 public UnicodeCategoryHandler (byte category) 337 { 338 this.category = (int) category; 339 } 340 private int category; 341 public boolean includes (char c) 342 { 343 return Character.getType (c) == category; 344 } 345 } 346 347 private static class UnicodeCategoriesHandler extends Handler 348 { 349 public UnicodeCategoriesHandler (byte[]categories) 350 { 351 this.categories = categories; 352 } 353 private byte[] categories; 354 public boolean includes (char c) 355 { 356 int category = Character.getType (c); 357 for (int i = 0; i < categories.length; i++) 358 if (category == categories[i]) 359 return true; 360 return false; 361 } 362 } 363 364 private static class UnicodeBlockHandler extends Handler 365 { 366 public UnicodeBlockHandler (Character.UnicodeBlock block) 367 { 368 this.block = block; 369 } 370 private Character.UnicodeBlock block; 371 public boolean includes (char c) 372 { 373 Character.UnicodeBlock cblock = Character.UnicodeBlock.of (c); 374 return (cblock != null && cblock.equals (block)); 375 } 376 } 377 378 /** 379 * Handle the Java-specific extensions \p{javaX} where X 380 * is a method from Character of the form isX 381 * 382 * @author Andrew John Hughes (gnu_andrew@member.fsf.org) 383 */ 384 private static class JavaCategoryHandler extends Handler 385 { 386 private Method method; 387 388 public JavaCategoryHandler (Method m) 389 { 390 this.method = m; 391 } 392 393 public boolean includes (char c) 394 { 395 try 396 { 397 return (Boolean) method.invoke (null, c); 398 } 399 catch (IllegalAccessException e) 400 { 401 throw new InternalError ("Unable to access method " + method); 402 } 403 catch (InvocationTargetException e) 404 { 405 throw new InternalError ("Error invoking " + method); 406 } 407 } 408 } 409 410 } 411