1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 23 import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; 24 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 25 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 26 27 @SuppressWarnings("javadoc") 28 public final class Regex implements RegexState { 29 30 int[] code; /* compiled pattern */ 31 int codeLength; 32 boolean stackNeeded; 33 Object[] operands; /* e.g. shared CClassNode */ 34 int operandLength; 35 36 int numMem; /* used memory(...) num counted from 1 */ 37 int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ 38 int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ 39 int captureHistory; /* (?@...) flag (1-31) */ 40 int btMemStart; /* need backtrack flag */ 41 int btMemEnd; /* need backtrack flag */ 42 43 int stackPopLevel; 44 45 int[] repeatRangeLo; 46 int[] repeatRangeHi; 47 48 WarnCallback warnings; 49 MatcherFactory factory; 50 protected Analyser analyser; 51 52 int options; 53 final int caseFoldFlag; 54 55 /* optimization info (string search, char-map and anchors) */ 56 SearchAlgorithm searchAlgorithm; /* optimize flag */ 57 int thresholdLength; /* search str-length for apply optimize */ 58 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ 59 int anchorDmin; /* (SEMI_)END_BUF anchor distance */ 60 int anchorDmax; /* (SEMI_)END_BUF anchor distance */ 61 int subAnchor; /* start-anchor for exact or map */ 62 63 char[] exact; 64 int exactP; 65 int exactEnd; 66 67 byte[] map; /* used as BM skip or char-map */ 68 int[] intMap; /* BM skip for exact_len > 255 */ 69 int[] intMapBackward; /* BM skip for backward search */ 70 int dMin; /* min-distance of exact or map */ 71 int dMax; /* max-distance of exact or map */ 72 73 char[][] templates; 74 int templateNum; 75 Regex(final CharSequence cs)76 public Regex(final CharSequence cs) { 77 this(cs.toString()); 78 } 79 Regex(final String str)80 public Regex(final String str) { 81 this(str.toCharArray(), 0, str.length(), 0); 82 } 83 Regex(final char[] chars)84 public Regex(final char[] chars) { 85 this(chars, 0, chars.length, 0); 86 } 87 Regex(final char[] chars, final int p, final int end)88 public Regex(final char[] chars, final int p, final int end) { 89 this(chars, p, end, 0); 90 } 91 Regex(final char[] chars, final int p, final int end, final int option)92 public Regex(final char[] chars, final int p, final int end, final int option) { 93 this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); 94 } 95 96 // onig_new Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax)97 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) { 98 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); 99 } 100 Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings)101 public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) { 102 this(chars, p, end, option, Syntax.RUBY, warnings); 103 } 104 105 // onig_new Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings)106 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) { 107 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); 108 } 109 110 // onig_alloc_init Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings)111 public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) { 112 int option = optionp; 113 114 if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == 115 (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { 116 throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); 117 } 118 119 if ((option & Option.NEGATE_SINGLELINE) != 0) { 120 option |= syntax.options; 121 option &= ~Option.SINGLELINE; 122 } else { 123 option |= syntax.options; 124 } 125 126 this.options = option; 127 this.caseFoldFlag = caseFoldFlag; 128 this.warnings = warnings; 129 130 this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end); 131 this.analyser.compile(); 132 133 this.warnings = null; 134 } 135 compile()136 public synchronized MatcherFactory compile() { 137 if (factory == null && analyser != null) { 138 new ArrayCompiler(analyser).compile(); 139 analyser = null; // only do this once 140 } 141 assert factory != null; 142 return factory; 143 } 144 matcher(final char[] chars)145 public Matcher matcher(final char[] chars) { 146 return matcher(chars, 0, chars.length); 147 } 148 matcher(final char[] chars, final int p, final int end)149 public Matcher matcher(final char[] chars, final int p, final int end) { 150 MatcherFactory matcherFactory = factory; 151 if (matcherFactory == null) { 152 matcherFactory = compile(); 153 } 154 return matcherFactory.create(this, chars, p, end); 155 } 156 getWarnings()157 public WarnCallback getWarnings() { 158 return warnings; 159 } 160 numberOfCaptures()161 public int numberOfCaptures() { 162 return numMem; 163 } 164 165 /* set skip map for Boyer-Moor search */ setupBMSkipMap()166 void setupBMSkipMap() { 167 final char[] chars = exact; 168 final int p = exactP; 169 final int end = exactEnd; 170 final int len = end - p; 171 172 if (len < Config.CHAR_TABLE_SIZE) { 173 // map/skip 174 if (map == null) { 175 map = new byte[Config.CHAR_TABLE_SIZE]; 176 } 177 178 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 179 map[i] = (byte)len; 180 } 181 for (int i=0; i<len-1; i++) 182 { 183 map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? 184 } 185 } else { 186 if (intMap == null) { 187 intMap = new int[Config.CHAR_TABLE_SIZE]; 188 } 189 190 for (int i=0; i<len-1; i++) 191 { 192 intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? 193 } 194 } 195 } 196 setExactInfo(final OptExactInfo e)197 void setExactInfo(final OptExactInfo e) { 198 if (e.length == 0) { 199 return; 200 } 201 202 // shall we copy that ? 203 exact = e.chars; 204 exactP = 0; 205 exactEnd = e.length; 206 207 if (e.ignoreCase) { 208 searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); 209 } else { 210 if (e.length >= 2) { 211 setupBMSkipMap(); 212 searchAlgorithm = SearchAlgorithm.BM; 213 } else { 214 searchAlgorithm = SearchAlgorithm.SLOW; 215 } 216 } 217 218 dMin = e.mmd.min; 219 dMax = e.mmd.max; 220 221 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 222 thresholdLength = dMin + (exactEnd - exactP); 223 } 224 } 225 setOptimizeMapInfo(final OptMapInfo m)226 void setOptimizeMapInfo(final OptMapInfo m) { 227 map = m.map; 228 229 searchAlgorithm = SearchAlgorithm.MAP; 230 dMin = m.mmd.min; 231 dMax = m.mmd.max; 232 233 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 234 thresholdLength = dMin + 1; 235 } 236 } 237 setSubAnchor(final OptAnchorInfo anc)238 void setSubAnchor(final OptAnchorInfo anc) { 239 subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; 240 subAnchor |= anc.rightAnchor & AnchorType.END_LINE; 241 } 242 clearOptimizeInfo()243 void clearOptimizeInfo() { 244 searchAlgorithm = SearchAlgorithm.NONE; 245 anchor = 0; 246 anchorDmax = 0; 247 anchorDmin = 0; 248 subAnchor = 0; 249 250 exact = null; 251 exactP = exactEnd = 0; 252 } 253 optimizeInfoToString()254 public String optimizeInfoToString() { 255 final StringBuilder s = new StringBuilder(); 256 s.append("optimize: ").append(searchAlgorithm.getName()).append("\n"); 257 s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor)); 258 259 if ((anchor & AnchorType.END_BUF_MASK) != 0) { 260 s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax)); 261 } 262 263 s.append("\n"); 264 265 if (searchAlgorithm != SearchAlgorithm.NONE) { 266 s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n"); 267 } 268 269 s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n"); 270 s.append("threshold length: ").append(thresholdLength).append("\n"); 271 272 if (exact != null) { 273 s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n"); 274 } else if (searchAlgorithm == SearchAlgorithm.MAP) { 275 int n=0; 276 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 277 if (map[i] != 0) { 278 n++; 279 } 280 } 281 282 s.append("map: n = ").append(n).append("\n"); 283 if (n > 0) { 284 int c=0; 285 s.append("["); 286 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 287 if (map[i] != 0) { 288 if (c > 0) { 289 s.append(", "); 290 } 291 c++; 292 // TODO if (enc.isPrint(i) 293 s.append((char)i); 294 } 295 } 296 s.append("]\n"); 297 } 298 } 299 300 return s.toString(); 301 } 302 getOptions()303 public int getOptions() { 304 return options; 305 } 306 dumpTree()307 public String dumpTree() { 308 return analyser == null ? null : analyser.root.toString(); 309 } 310 dumpByteCode()311 public String dumpByteCode() { 312 compile(); 313 return new ByteCodePrinter(this).byteCodeListToString(); 314 } 315 316 } 317