1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xpath.regex; 22 23 import java.text.CharacterIterator; 24 25 /** 26 * @xerces.internal 27 * 28 */ 29 public final class REUtil { REUtil()30 private REUtil() { 31 } 32 composeFromSurrogates(int high, int low)33 static final int composeFromSurrogates(int high, int low) { 34 return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; 35 } 36 isLowSurrogate(int ch)37 static final boolean isLowSurrogate(int ch) { 38 return (ch & 0xfc00) == 0xdc00; 39 } 40 isHighSurrogate(int ch)41 static final boolean isHighSurrogate(int ch) { 42 return (ch & 0xfc00) == 0xd800; 43 } 44 decomposeToSurrogates(int ch)45 static final String decomposeToSurrogates(int ch) { 46 char[] chs = new char[2]; 47 ch -= 0x10000; 48 chs[0] = (char)((ch>>10)+0xd800); 49 chs[1] = (char)((ch&0x3ff)+0xdc00); 50 return new String(chs); 51 } 52 substring(CharacterIterator iterator, int begin, int end)53 static final String substring(CharacterIterator iterator, int begin, int end) { 54 char[] src = new char[end-begin]; 55 for (int i = 0; i < src.length; i ++) 56 src[i] = iterator.setIndex(i+begin); 57 return new String(src); 58 } 59 60 // ================================================================ 61 getOptionValue(int ch)62 static final int getOptionValue(int ch) { 63 int ret = 0; 64 switch (ch) { 65 case 'i': 66 ret = RegularExpression.IGNORE_CASE; 67 break; 68 case 'm': 69 ret = RegularExpression.MULTIPLE_LINES; 70 break; 71 case 's': 72 ret = RegularExpression.SINGLE_LINE; 73 break; 74 case 'x': 75 ret = RegularExpression.EXTENDED_COMMENT; 76 break; 77 case 'u': 78 ret = RegularExpression.USE_UNICODE_CATEGORY; 79 break; 80 case 'w': 81 ret = RegularExpression.UNICODE_WORD_BOUNDARY; 82 break; 83 case 'F': 84 ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; 85 break; 86 case 'H': 87 ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 88 break; 89 case 'X': 90 ret = RegularExpression.XMLSCHEMA_MODE; 91 break; 92 case ',': 93 ret = RegularExpression.SPECIAL_COMMA; 94 break; 95 default: 96 } 97 return ret; 98 } 99 parseOptions(String opts)100 static final int parseOptions(String opts) throws ParseException { 101 if (opts == null) return 0; 102 int options = 0; 103 for (int i = 0; i < opts.length(); i ++) { 104 int v = getOptionValue(opts.charAt(i)); 105 if (v == 0) 106 throw new ParseException("Unknown Option: "+opts.substring(i), -1); 107 options |= v; 108 } 109 return options; 110 } 111 createOptionString(int options)112 static final String createOptionString(int options) { 113 StringBuilder sb = new StringBuilder(9); 114 if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) 115 sb.append('F'); 116 if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) 117 sb.append('H'); 118 if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) 119 sb.append('X'); 120 if ((options & RegularExpression.IGNORE_CASE) != 0) 121 sb.append('i'); 122 if ((options & RegularExpression.MULTIPLE_LINES) != 0) 123 sb.append('m'); 124 if ((options & RegularExpression.SINGLE_LINE) != 0) 125 sb.append('s'); 126 if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) 127 sb.append('u'); 128 if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) 129 sb.append('w'); 130 if ((options & RegularExpression.EXTENDED_COMMENT) != 0) 131 sb.append('x'); 132 if ((options & RegularExpression.SPECIAL_COMMA) != 0) 133 sb.append(','); 134 return sb.toString().intern(); 135 } 136 137 // ================================================================ 138 stripExtendedComment(String regex)139 static String stripExtendedComment(String regex) { 140 int len = regex.length(); 141 StringBuilder buffer = new StringBuilder(len); 142 int offset = 0; 143 int charClass = 0; 144 while (offset < len) { 145 int ch = regex.charAt(offset++); 146 // Skips a white space. 147 if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') { 148 // if we are inside a character class, we keep the white space 149 if (charClass > 0) { 150 buffer.append((char)ch); 151 } 152 continue; 153 } 154 155 if (ch == '#') { // Skips chracters between '#' and a line end. 156 while (offset < len) { 157 ch = regex.charAt(offset++); 158 if (ch == '\r' || ch == '\n') 159 break; 160 } 161 continue; 162 } 163 164 int next; // Strips an escaped white space. 165 if (ch == '\\' && offset < len) { 166 if ((next = regex.charAt(offset)) == '#' 167 || next == '\t' || next == '\n' || next == '\f' 168 || next == '\r' || next == ' ') { 169 buffer.append((char)next); 170 offset ++; 171 } else { // Other escaped character. 172 buffer.append('\\'); 173 buffer.append((char)next); 174 offset ++; 175 } 176 } 177 else if (ch == '[') { 178 charClass++; 179 buffer.append((char)ch); 180 if (offset < len) { 181 next = regex.charAt(offset); 182 if (next == '[' || next ==']') { 183 buffer.append((char)next); 184 offset ++; 185 } 186 else if (next == '^' && offset + 1 < len) { 187 next = regex.charAt(offset + 1); 188 if (next == '[' || next ==']') { 189 buffer.append('^'); 190 buffer.append((char)next); 191 offset += 2; 192 } 193 } 194 } 195 } 196 else { 197 if (charClass > 0 && ch == ']') { 198 --charClass; 199 } 200 buffer.append((char)ch); 201 } 202 } 203 return buffer.toString(); 204 } 205 206 // ================================================================ 207 208 /** 209 * Sample entry. 210 * <div>Usage: <KBD>com.sun.org.apache.xerces.internal.utils.regex.REUtil <regex> <string></KBD></div> 211 */ main(String[] argv)212 public static void main(String[] argv) { 213 String pattern = null; 214 try { 215 String options = ""; 216 String target = null; 217 if( argv.length == 0 ) { 218 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); 219 System.exit( 0 ); 220 } 221 for (int i = 0; i < argv.length; i ++) { 222 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { 223 if (pattern == null) 224 pattern = argv[i]; 225 else if (target == null) 226 target = argv[i]; 227 else 228 System.err.println("Unnecessary: "+argv[i]); 229 } else if (argv[i].equals("-i")) { 230 options += "i"; 231 } else if (argv[i].equals("-m")) { 232 options += "m"; 233 } else if (argv[i].equals("-s")) { 234 options += "s"; 235 } else if (argv[i].equals("-u")) { 236 options += "u"; 237 } else if (argv[i].equals("-w")) { 238 options += "w"; 239 } else if (argv[i].equals("-X")) { 240 options += "X"; 241 } else { 242 System.err.println("Unknown option: "+argv[i]); 243 } 244 } 245 RegularExpression reg = new RegularExpression(pattern, options); 246 System.out.println("RegularExpression: "+reg); 247 Match match = new Match(); 248 reg.matches(target, match); 249 for (int i = 0; i < match.getNumberOfGroups(); i ++) { 250 if (i == 0 ) System.out.print("Matched range for the whole pattern: "); 251 else System.out.print("["+i+"]: "); 252 if (match.getBeginning(i) < 0) 253 System.out.println("-1"); 254 else { 255 System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); 256 System.out.println("\""+match.getCapturedText(i)+"\""); 257 } 258 } 259 } catch (ParseException pe) { 260 if (pattern == null) { 261 pe.printStackTrace(); 262 } else { 263 System.err.println("com.sun.org.apache.xerces.internal.utils.regex.ParseException: "+pe.getMessage()); 264 String indent = " "; 265 System.err.println(indent+pattern); 266 int loc = pe.getLocation(); 267 if (loc >= 0) { 268 System.err.print(indent); 269 for (int i = 0; i < loc; i ++) System.err.print("-"); 270 System.err.println("^"); 271 } 272 } 273 } catch (Exception e) { 274 e.printStackTrace(); 275 } 276 } 277 278 static final int CACHESIZE = 20; 279 static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE]; 280 /** 281 * Creates a RegularExpression instance. 282 * This method caches created instances. 283 * 284 * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String) 285 */ createRegex(String pattern, String options)286 public static RegularExpression createRegex(String pattern, String options) 287 throws ParseException { 288 RegularExpression re = null; 289 int intOptions = REUtil.parseOptions(options); 290 synchronized (REUtil.regexCache) { 291 int i; 292 for (i = 0; i < REUtil.CACHESIZE; i ++) { 293 RegularExpression cached = REUtil.regexCache[i]; 294 if (cached == null) { 295 i = -1; 296 break; 297 } 298 if (cached.equals(pattern, intOptions)) { 299 re = cached; 300 break; 301 } 302 } 303 if (re != null) { 304 if (i != 0) { 305 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); 306 REUtil.regexCache[0] = re; 307 } 308 } else { 309 re = new RegularExpression(pattern, options); 310 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); 311 REUtil.regexCache[0] = re; 312 } 313 } 314 return re; 315 } 316 317 /** 318 * 319 * @see RegularExpression#matches(java.lang.String) 320 */ matches(String regex, String target)321 public static boolean matches(String regex, String target) throws ParseException { 322 return REUtil.createRegex(regex, null).matches(target); 323 } 324 325 /** 326 * 327 * @see RegularExpression#matches(java.lang.String) 328 */ matches(String regex, String options, String target)329 public static boolean matches(String regex, String options, String target) throws ParseException { 330 return REUtil.createRegex(regex, options).matches(target); 331 } 332 333 // ================================================================ 334 335 /** 336 * 337 */ quoteMeta(String literal)338 public static String quoteMeta(String literal) { 339 int len = literal.length(); 340 StringBuilder buffer = null; 341 for (int i = 0; i < len; i ++) { 342 int ch = literal.charAt(i); 343 if (".*+?{[()|\\^$".indexOf(ch) >= 0) { 344 if (buffer == null) { 345 buffer = new StringBuilder(i+(len-i)*2); 346 if (i > 0) buffer.append(literal.substring(0, i)); 347 } 348 buffer.append('\\'); 349 buffer.append((char)ch); 350 } else if (buffer != null) 351 buffer.append((char)ch); 352 } 353 return buffer != null ? buffer.toString() : literal; 354 } 355 356 // ================================================================ 357 dumpString(String v)358 static void dumpString(String v) { 359 for (int i = 0; i < v.length(); i ++) { 360 System.out.print(Integer.toHexString(v.charAt(i))); 361 System.out.print(" "); 362 } 363 System.out.println(); 364 } 365 } 366