1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDynamic; 24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; 26 import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; 27 import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 28 import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 29 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 30 import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 31 import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 32 import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 33 import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 34 import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 35 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 36 import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 37 import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 38 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 39 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; 40 import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; 41 42 final class ArrayCompiler extends Compiler { 43 private int[] code; 44 private int codeLength; 45 46 private char[][] templates; 47 private int templateNum; 48 ArrayCompiler(final Analyser analyser)49 ArrayCompiler(final Analyser analyser) { 50 super(analyser); 51 } 52 53 @Override prepare()54 protected final void prepare() { 55 final int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2); 56 code = new int[codeSize]; 57 codeLength = 0; 58 } 59 60 @Override finish()61 protected final void finish() { 62 addOpcode(OPCode.END); 63 addOpcode(OPCode.FINISH); // for stack bottom 64 65 regex.code = code; 66 regex.codeLength = codeLength; 67 regex.templates = templates; 68 regex.templateNum = templateNum; 69 regex.factory = MatcherFactory.DEFAULT; 70 } 71 72 @Override compileAltNode(final ConsAltNode node)73 protected void compileAltNode(final ConsAltNode node) { 74 ConsAltNode aln = node; 75 int len = 0; 76 77 do { 78 len += compileLengthTree(aln.car); 79 if (aln.cdr != null) { 80 len += OPSize.PUSH + OPSize.JUMP; 81 } 82 } while ((aln = aln.cdr) != null); 83 84 final int pos = codeLength + len; /* goal position */ 85 86 aln = node; 87 do { 88 len = compileLengthTree(aln.car); 89 if (aln.cdr != null) { 90 addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP); 91 } 92 compileTree(aln.car); 93 if (aln.cdr != null) { 94 len = pos - (codeLength + OPSize.JUMP); 95 addOpcodeRelAddr(OPCode.JUMP, len); 96 } 97 } while ((aln = aln.cdr) != null); 98 } 99 isNeedStrLenOpExact(final int op)100 private static boolean isNeedStrLenOpExact(final int op) { 101 return op == OPCode.EXACTN || op == OPCode.EXACTN_IC; 102 } 103 opTemplated(final int op)104 private static boolean opTemplated(final int op) { 105 return isNeedStrLenOpExact(op); 106 } 107 selectStrOpcode(final int strLength, final boolean ignoreCase)108 private static int selectStrOpcode(final int strLength, final boolean ignoreCase) { 109 int op; 110 111 if (ignoreCase) { 112 switch(strLength) { 113 case 1: op = OPCode.EXACT1_IC; break; 114 default:op = OPCode.EXACTN_IC; break; 115 } // switch 116 } else { 117 switch (strLength) { 118 case 1: op = OPCode.EXACT1; break; 119 case 2: op = OPCode.EXACT2; break; 120 case 3: op = OPCode.EXACT3; break; 121 case 4: op = OPCode.EXACT4; break; 122 case 5: op = OPCode.EXACT5; break; 123 default:op = OPCode.EXACTN; break; 124 } // inner switch 125 } 126 return op; 127 } 128 compileTreeEmptyCheck(final Node node, final int emptyInfo)129 private void compileTreeEmptyCheck(final Node node, final int emptyInfo) { 130 final int savedNumNullCheck = regex.numNullCheck; 131 132 if (emptyInfo != 0) { 133 addOpcode(OPCode.NULL_CHECK_START); 134 addMemNum(regex.numNullCheck); /* NULL CHECK ID */ 135 regex.numNullCheck++; 136 } 137 138 compileTree(node); 139 140 if (emptyInfo != 0) { 141 switch (emptyInfo) { 142 case TargetInfo.IS_EMPTY: 143 addOpcode(OPCode.NULL_CHECK_END); 144 break; 145 case TargetInfo.IS_EMPTY_MEM: 146 addOpcode(OPCode.NULL_CHECK_END_MEMST); 147 break; 148 default: 149 break; 150 } // switch 151 152 addMemNum(savedNumNullCheck); /* NULL CHECK ID */ 153 } 154 } 155 addCompileStringlength(final char[] chars, final int p, final int strLength, final boolean ignoreCase)156 private static int addCompileStringlength(final char[] chars, final int p, final int strLength, final boolean ignoreCase) { 157 final int op = selectStrOpcode(strLength, ignoreCase); 158 int len = OPSize.OPCODE; 159 160 if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { 161 // string length, template index, template string pointer 162 len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX; 163 } else { 164 if (isNeedStrLenOpExact(op)) { 165 len += OPSize.LENGTH; 166 } 167 len += strLength; 168 } 169 return len; 170 } 171 172 @Override addCompileString(final char[] chars, final int p, final int strLength, final boolean ignoreCase)173 protected final void addCompileString(final char[] chars, final int p, final int strLength, final boolean ignoreCase) { 174 final int op = selectStrOpcode(strLength, ignoreCase); 175 addOpcode(op); 176 177 if (isNeedStrLenOpExact(op)) { 178 addLength(strLength); 179 } 180 181 if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { 182 addInt(templateNum); 183 addInt(p); 184 addTemplate(chars); 185 } else { 186 addChars(chars, p, strLength); 187 } 188 } 189 compileLengthStringNode(final Node node)190 private static int compileLengthStringNode(final Node node) { 191 final StringNode sn = (StringNode)node; 192 if (sn.length() <= 0) { 193 return 0; 194 } 195 final boolean ambig = sn.isAmbig(); 196 197 int p, prev; 198 p = prev = sn.p; 199 final int end = sn.end; 200 final char[] chars = sn.chars; 201 p++; 202 203 int slen = 1; 204 int rlen = 0; 205 206 while (p < end) { 207 slen++; 208 p++; 209 } 210 final int r = addCompileStringlength(chars, prev, slen, ambig); 211 rlen += r; 212 return rlen; 213 } 214 compileLengthStringRawNode(final StringNode sn)215 private static int compileLengthStringRawNode(final StringNode sn) { 216 if (sn.length() <= 0) { 217 return 0; 218 } 219 return addCompileStringlength(sn.chars, sn.p, sn.length(), false); 220 } 221 addMultiByteCClass(final CodeRangeBuffer mbuf)222 private void addMultiByteCClass(final CodeRangeBuffer mbuf) { 223 addLength(mbuf.used); 224 addInts(mbuf.p, mbuf.used); 225 } 226 compileLengthCClassNode(final CClassNode cc)227 private static int compileLengthCClassNode(final CClassNode cc) { 228 if (cc.isShare()) { 229 return OPSize.OPCODE + OPSize.POINTER; 230 } 231 232 int len; 233 if (cc.mbuf == null) { 234 len = OPSize.OPCODE + BitSet.BITSET_SIZE; 235 } else { 236 if (cc.bs.isEmpty()) { 237 len = OPSize.OPCODE; 238 } else { 239 len = OPSize.OPCODE + BitSet.BITSET_SIZE; 240 } 241 242 len += OPSize.LENGTH + cc.mbuf.used; 243 } 244 return len; 245 } 246 247 @Override compileCClassNode(final CClassNode cc)248 protected void compileCClassNode(final CClassNode cc) { 249 if (cc.isShare()) { // shared char class 250 addOpcode(OPCode.CCLASS_NODE); 251 addPointer(cc); 252 return; 253 } 254 255 if (cc.mbuf == null) { 256 if (cc.isNot()) { 257 addOpcode(OPCode.CCLASS_NOT); 258 } else { 259 addOpcode(OPCode.CCLASS); 260 } 261 addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset 262 } else { 263 if (cc.bs.isEmpty()) { 264 if (cc.isNot()) { 265 addOpcode(OPCode.CCLASS_MB_NOT); 266 } else { 267 addOpcode(OPCode.CCLASS_MB); 268 } 269 addMultiByteCClass(cc.mbuf); 270 } else { 271 if (cc.isNot()) { 272 addOpcode(OPCode.CCLASS_MIX_NOT); 273 } else { 274 addOpcode(OPCode.CCLASS_MIX); 275 } 276 // store the bit set and mbuf themself! 277 addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset 278 addMultiByteCClass(cc.mbuf); 279 } 280 } 281 } 282 283 @Override compileAnyCharNode()284 protected void compileAnyCharNode() { 285 if (isMultiline(regex.options)) { 286 addOpcode(OPCode.ANYCHAR_ML); 287 } else { 288 addOpcode(OPCode.ANYCHAR); 289 } 290 } 291 292 @Override compileBackrefNode(final BackRefNode node)293 protected void compileBackrefNode(final BackRefNode node) { 294 if (isIgnoreCase(regex.options)) { 295 addOpcode(OPCode.BACKREFN_IC); 296 addMemNum(node.backRef); 297 } else { 298 switch (node.backRef) { 299 case 1: 300 addOpcode(OPCode.BACKREF1); 301 break; 302 case 2: 303 addOpcode(OPCode.BACKREF2); 304 break; 305 default: 306 addOpcode(OPCode.BACKREFN); 307 addOpcode(node.backRef); 308 break; 309 } // switch 310 } 311 } 312 313 private static final int REPEAT_RANGE_ALLOC = 8; entryRepeatRange(final int id, final int lower, final int upper)314 private void entryRepeatRange(final int id, final int lower, final int upper) { 315 if (regex.repeatRangeLo == null) { 316 regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC]; 317 regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC]; 318 } else if (id >= regex.repeatRangeLo.length){ 319 int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC]; 320 System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length); 321 regex.repeatRangeLo = tmp; 322 tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC]; 323 System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length); 324 regex.repeatRangeHi = tmp; 325 } 326 327 regex.repeatRangeLo[id] = lower; 328 regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper; 329 } 330 compileRangeRepeatNode(final QuantifierNode qn, final int targetLen, final int emptyInfo)331 private void compileRangeRepeatNode(final QuantifierNode qn, final int targetLen, final int emptyInfo) { 332 final int numRepeat = regex.numRepeat; 333 addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG); 334 addMemNum(numRepeat); /* OP_REPEAT ID */ 335 regex.numRepeat++; 336 addRelAddr(targetLen + OPSize.REPEAT_INC); 337 338 entryRepeatRange(numRepeat, qn.lower, qn.upper); 339 340 compileTreeEmptyCheck(qn.target, emptyInfo); 341 342 if (qn.isInRepeat()) { 343 addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG); 344 } else { 345 addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG); 346 } 347 348 addMemNum(numRepeat); /* OP_REPEAT ID */ 349 } 350 351 private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50 352 353 @SuppressWarnings("unused") cknOn(final int ckn)354 private static boolean cknOn(final int ckn) { 355 return ckn > 0; 356 } 357 compileNonCECLengthQuantifierNode(final QuantifierNode qn)358 private int compileNonCECLengthQuantifierNode(final QuantifierNode qn) { 359 final boolean infinite = isRepeatInfinite(qn.upper); 360 final int emptyInfo = qn.targetEmptyInfo; 361 362 final int tlen = compileLengthTree(qn.target); 363 364 /* anychar repeat */ 365 if (qn.target.getType() == NodeType.CANY) { 366 if (qn.greedy && infinite) { 367 if (qn.nextHeadExact != null) { 368 return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower; 369 } 370 return OPSize.ANYCHAR_STAR + tlen * qn.lower; 371 } 372 } 373 374 int modTLen = 0; 375 if (emptyInfo != 0) { 376 modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); 377 } else { 378 modTLen = tlen; 379 } 380 381 int len; 382 if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 383 if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 384 len = OPSize.JUMP; 385 } else { 386 len = tlen * qn.lower; 387 } 388 389 if (qn.greedy) { 390 if (qn.headExact != null) { 391 len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP; 392 } else if (qn.nextHeadExact != null) { 393 len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP; 394 } else { 395 len += OPSize.PUSH + modTLen + OPSize.JUMP; 396 } 397 } else { 398 len += OPSize.JUMP + modTLen + OPSize.PUSH; 399 } 400 401 } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */ 402 len = OPSize.JUMP + tlen; 403 } else if (!infinite && qn.greedy && 404 (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) { 405 len = tlen * qn.lower; 406 len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower); 407 } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ 408 len = OPSize.PUSH + OPSize.JUMP + tlen; 409 } else { 410 len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; 411 } 412 return len; 413 } 414 415 @Override compileNonCECQuantifierNode(final QuantifierNode qn)416 protected void compileNonCECQuantifierNode(final QuantifierNode qn) { 417 final boolean infinite = isRepeatInfinite(qn.upper); 418 final int emptyInfo = qn.targetEmptyInfo; 419 420 final int tlen = compileLengthTree(qn.target); 421 422 if (qn.isAnyCharStar()) { 423 compileTreeNTimes(qn.target, qn.lower); 424 if (qn.nextHeadExact != null) { 425 if (isMultiline(regex.options)) { 426 addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); 427 } else { 428 addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT); 429 } 430 final StringNode sn = (StringNode)qn.nextHeadExact; 431 addChars(sn.chars, sn.p, 1); 432 return; 433 } 434 if (isMultiline(regex.options)) { 435 addOpcode(OPCode.ANYCHAR_ML_STAR); 436 } else { 437 addOpcode(OPCode.ANYCHAR_STAR); 438 } 439 return; 440 } 441 442 int modTLen; 443 if (emptyInfo != 0) { 444 modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); 445 } else { 446 modTLen = tlen; 447 } 448 if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 449 if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 450 if (qn.greedy) { 451 if (qn.headExact != null) { 452 addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1); 453 } else if (qn.nextHeadExact != null) { 454 addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT); 455 } else { 456 addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH); 457 } 458 } else { 459 addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP); 460 } 461 } else { 462 compileTreeNTimes(qn.target, qn.lower); 463 } 464 465 if (qn.greedy) { 466 if (qn.headExact != null) { 467 addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP); 468 final StringNode sn = (StringNode)qn.headExact; 469 addChars(sn.chars, sn.p, 1); 470 compileTreeEmptyCheck(qn.target, emptyInfo); 471 addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1)); 472 } else if (qn.nextHeadExact != null) { 473 addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP); 474 final StringNode sn = (StringNode)qn.nextHeadExact; 475 addChars(sn.chars, sn.p, 1); 476 compileTreeEmptyCheck(qn.target, emptyInfo); 477 addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT)); 478 } else { 479 addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); 480 compileTreeEmptyCheck(qn.target, emptyInfo); 481 addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH)); 482 } 483 } else { 484 addOpcodeRelAddr(OPCode.JUMP, modTLen); 485 compileTreeEmptyCheck(qn.target, emptyInfo); 486 addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); 487 } 488 } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */ 489 addOpcodeRelAddr(OPCode.JUMP, tlen); 490 compileTree(qn.target); 491 } else if (!infinite && qn.greedy && 492 (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 493 final int n = qn.upper - qn.lower; 494 compileTreeNTimes(qn.target, qn.lower); 495 496 for (int i=0; i<n; i++) { 497 addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH); 498 compileTree(qn.target); 499 } 500 } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ 501 addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP); 502 addOpcodeRelAddr(OPCode.JUMP, tlen); 503 compileTree(qn.target); 504 } else { 505 compileRangeRepeatNode(qn, modTLen, emptyInfo); 506 } 507 } 508 compileLengthOptionNode(final EncloseNode node)509 private int compileLengthOptionNode(final EncloseNode node) { 510 final int prev = regex.options; 511 regex.options = node.option; 512 final int tlen = compileLengthTree(node.target); 513 regex.options = prev; 514 515 if (isDynamic(prev ^ node.option)) { 516 return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION; 517 } 518 return tlen; 519 } 520 521 @Override compileOptionNode(final EncloseNode node)522 protected void compileOptionNode(final EncloseNode node) { 523 final int prev = regex.options; 524 525 if (isDynamic(prev ^ node.option)) { 526 addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option); 527 addOpcodeOption(OPCode.SET_OPTION, prev); 528 addOpcode(OPCode.FAIL); 529 } 530 531 regex.options = node.option; 532 compileTree(node.target); 533 regex.options = prev; 534 535 if (isDynamic(prev ^ node.option)) { 536 addOpcodeOption(OPCode.SET_OPTION, prev); 537 } 538 } 539 compileLengthEncloseNode(final EncloseNode node)540 private int compileLengthEncloseNode(final EncloseNode node) { 541 if (node.isOption()) { 542 return compileLengthOptionNode(node); 543 } 544 545 int tlen; 546 if (node.target != null) { 547 tlen = compileLengthTree(node.target); 548 } else { 549 tlen = 0; 550 } 551 552 int len; 553 switch (node.type) { 554 case EncloseType.MEMORY: 555 if (bsAt(regex.btMemStart, node.regNum)) { 556 len = OPSize.MEMORY_START_PUSH; 557 } else { 558 len = OPSize.MEMORY_START; 559 } 560 len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END); 561 break; 562 563 case EncloseType.STOP_BACKTRACK: 564 if (node.isStopBtSimpleRepeat()) { 565 final QuantifierNode qn = (QuantifierNode)node.target; 566 tlen = compileLengthTree(qn.target); 567 len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP; 568 } else { 569 len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT; 570 } 571 break; 572 573 default: 574 newInternalException(ERR_PARSER_BUG); 575 return 0; // not reached 576 } // switch 577 return len; 578 } 579 580 @Override compileEncloseNode(final EncloseNode node)581 protected void compileEncloseNode(final EncloseNode node) { 582 int len; 583 switch (node.type) { 584 case EncloseType.MEMORY: 585 if (bsAt(regex.btMemStart, node.regNum)) { 586 addOpcode(OPCode.MEMORY_START_PUSH); 587 } else { 588 addOpcode(OPCode.MEMORY_START); 589 } 590 591 addMemNum(node.regNum); 592 compileTree(node.target); 593 594 if (bsAt(regex.btMemEnd, node.regNum)) { 595 addOpcode(OPCode.MEMORY_END_PUSH); 596 } else { 597 addOpcode(OPCode.MEMORY_END); 598 } 599 addMemNum(node.regNum); 600 break; 601 602 case EncloseType.STOP_BACKTRACK: 603 if (node.isStopBtSimpleRepeat()) { 604 final QuantifierNode qn = (QuantifierNode)node.target; 605 606 compileTreeNTimes(qn.target, qn.lower); 607 608 len = compileLengthTree(qn.target); 609 addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP); 610 compileTree(qn.target); 611 addOpcode(OPCode.POP); 612 addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP)); 613 } else { 614 addOpcode(OPCode.PUSH_STOP_BT); 615 compileTree(node.target); 616 addOpcode(OPCode.POP_STOP_BT); 617 } 618 break; 619 620 default: 621 newInternalException(ERR_PARSER_BUG); 622 break; 623 } // switch 624 } 625 compileLengthAnchorNode(final AnchorNode node)626 private int compileLengthAnchorNode(final AnchorNode node) { 627 int tlen; 628 if (node.target != null) { 629 tlen = compileLengthTree(node.target); 630 } else { 631 tlen = 0; 632 } 633 634 int len; 635 switch (node.type) { 636 case AnchorType.PREC_READ: 637 len = OPSize.PUSH_POS + tlen + OPSize.POP_POS; 638 break; 639 640 case AnchorType.PREC_READ_NOT: 641 len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS; 642 break; 643 644 case AnchorType.LOOK_BEHIND: 645 len = OPSize.LOOK_BEHIND + tlen; 646 break; 647 648 case AnchorType.LOOK_BEHIND_NOT: 649 len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT; 650 break; 651 652 default: 653 len = OPSize.OPCODE; 654 break; 655 } // switch 656 return len; 657 } 658 659 @Override compileAnchorNode(final AnchorNode node)660 protected void compileAnchorNode(final AnchorNode node) { 661 int len; 662 int n; 663 664 switch (node.type) { 665 case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break; 666 case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break; 667 case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break; 668 case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break; 669 case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break; 670 case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break; 671 672 case AnchorType.WORD_BOUND: 673 addOpcode(OPCode.WORD_BOUND); 674 break; 675 676 case AnchorType.NOT_WORD_BOUND: 677 addOpcode(OPCode.NOT_WORD_BOUND); 678 break; 679 680 case AnchorType.WORD_BEGIN: 681 if (Config.USE_WORD_BEGIN_END) { 682 addOpcode(OPCode.WORD_BEGIN); 683 } 684 break; 685 686 case AnchorType.WORD_END: 687 if (Config.USE_WORD_BEGIN_END) { 688 addOpcode(OPCode.WORD_END); 689 } 690 break; 691 692 case AnchorType.PREC_READ: 693 addOpcode(OPCode.PUSH_POS); 694 compileTree(node.target); 695 addOpcode(OPCode.POP_POS); 696 break; 697 698 case AnchorType.PREC_READ_NOT: 699 len = compileLengthTree(node.target); 700 addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS); 701 compileTree(node.target); 702 addOpcode(OPCode.FAIL_POS); 703 break; 704 705 case AnchorType.LOOK_BEHIND: 706 addOpcode(OPCode.LOOK_BEHIND); 707 if (node.charLength < 0) { 708 n = analyser.getCharLengthTree(node.target); 709 if (analyser.returnCode != 0) { 710 newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 711 } 712 } else { 713 n = node.charLength; 714 } 715 addLength(n); 716 compileTree(node.target); 717 break; 718 719 case AnchorType.LOOK_BEHIND_NOT: 720 len = compileLengthTree(node.target); 721 addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT); 722 if (node.charLength < 0) { 723 n = analyser.getCharLengthTree(node.target); 724 if (analyser.returnCode != 0) { 725 newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 726 } 727 } else { 728 n = node.charLength; 729 } 730 addLength(n); 731 compileTree(node.target); 732 addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT); 733 break; 734 735 default: 736 newInternalException(ERR_PARSER_BUG); 737 } // switch 738 } 739 compileLengthTree(final Node node)740 private int compileLengthTree(final Node node) { 741 int len = 0; 742 743 switch (node.getType()) { 744 case NodeType.LIST: 745 ConsAltNode lin = (ConsAltNode)node; 746 do { 747 len += compileLengthTree(lin.car); 748 } while ((lin = lin.cdr) != null); 749 break; 750 751 case NodeType.ALT: 752 ConsAltNode aln = (ConsAltNode)node; 753 int n = 0; 754 do { 755 len += compileLengthTree(aln.car); 756 n++; 757 } while ((aln = aln.cdr) != null); 758 len += (OPSize.PUSH + OPSize.JUMP) * (n - 1); 759 break; 760 761 case NodeType.STR: 762 final StringNode sn = (StringNode)node; 763 if (sn.isRaw()) { 764 len = compileLengthStringRawNode(sn); 765 } else { 766 len = compileLengthStringNode(sn); 767 } 768 break; 769 770 case NodeType.CCLASS: 771 len = compileLengthCClassNode((CClassNode)node); 772 break; 773 774 case NodeType.CTYPE: 775 case NodeType.CANY: 776 len = OPSize.OPCODE; 777 break; 778 779 case NodeType.BREF: 780 final BackRefNode br = (BackRefNode)node; 781 782 len = ((!isIgnoreCase(regex.options) && br.backRef <= 2) 783 ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM)); 784 break; 785 786 case NodeType.QTFR: 787 len = compileNonCECLengthQuantifierNode((QuantifierNode)node); 788 break; 789 790 case NodeType.ENCLOSE: 791 len = compileLengthEncloseNode((EncloseNode)node); 792 break; 793 794 case NodeType.ANCHOR: 795 len = compileLengthAnchorNode((AnchorNode)node); 796 break; 797 798 default: 799 newInternalException(ERR_PARSER_BUG); 800 801 } //switch 802 return len; 803 } 804 ensure(final int size)805 private void ensure(final int size) { 806 if (size >= code.length) { 807 int length = code.length << 1; 808 while (length <= size) { 809 length <<= 1; 810 } 811 final int[]tmp = new int[length]; 812 System.arraycopy(code, 0, tmp, 0, code.length); 813 code = tmp; 814 } 815 } 816 addInt(final int i)817 private void addInt(final int i) { 818 if (codeLength >= code.length) { 819 final int[]tmp = new int[code.length << 1]; 820 System.arraycopy(code, 0, tmp, 0, code.length); 821 code = tmp; 822 } 823 code[codeLength++] = i; 824 } 825 setInt(final int i, final int offset)826 void setInt(final int i, final int offset) { 827 ensure(offset); 828 regex.code[offset] = i; 829 } 830 addObject(final Object o)831 private void addObject(final Object o) { 832 if (regex.operands == null) { 833 regex.operands = new Object[4]; 834 } else if (regex.operandLength >= regex.operands.length) { 835 final Object[]tmp = new Object[regex.operands.length << 1]; 836 System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length); 837 regex.operands = tmp; 838 } 839 addInt(regex.operandLength); 840 regex.operands[regex.operandLength++] = o; 841 } 842 addChars(final char[] chars, final int pp ,final int length)843 private void addChars(final char[] chars, final int pp ,final int length) { 844 ensure(codeLength + length); 845 int p = pp; 846 final int end = p + length; 847 848 while (p < end) { 849 code[codeLength++] = chars[p++]; 850 } 851 } 852 addInts(final int[]ints, final int length)853 private void addInts(final int[]ints, final int length) { 854 ensure(codeLength + length); 855 System.arraycopy(ints, 0, code, codeLength, length); 856 codeLength += length; 857 } 858 addOpcode(final int opcode)859 private void addOpcode(final int opcode) { 860 addInt(opcode); 861 862 switch(opcode) { 863 case OPCode.ANYCHAR_STAR: 864 case OPCode.ANYCHAR_ML_STAR: 865 case OPCode.ANYCHAR_STAR_PEEK_NEXT: 866 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: 867 case OPCode.STATE_CHECK_ANYCHAR_STAR: 868 case OPCode.STATE_CHECK_ANYCHAR_ML_STAR: 869 case OPCode.MEMORY_START_PUSH: 870 case OPCode.MEMORY_END_PUSH: 871 case OPCode.MEMORY_END_PUSH_REC: 872 case OPCode.MEMORY_END_REC: 873 case OPCode.NULL_CHECK_START: 874 case OPCode.NULL_CHECK_END_MEMST_PUSH: 875 case OPCode.PUSH: 876 case OPCode.STATE_CHECK_PUSH: 877 case OPCode.STATE_CHECK_PUSH_OR_JUMP: 878 case OPCode.STATE_CHECK: 879 case OPCode.PUSH_OR_JUMP_EXACT1: 880 case OPCode.PUSH_IF_PEEK_NEXT: 881 case OPCode.REPEAT: 882 case OPCode.REPEAT_NG: 883 case OPCode.REPEAT_INC_SG: 884 case OPCode.REPEAT_INC_NG: 885 case OPCode.REPEAT_INC_NG_SG: 886 case OPCode.PUSH_POS: 887 case OPCode.PUSH_POS_NOT: 888 case OPCode.PUSH_STOP_BT: 889 case OPCode.PUSH_LOOK_BEHIND_NOT: 890 case OPCode.CALL: 891 case OPCode.RETURN: // it will appear only with CALL though 892 regex.stackNeeded = true; 893 break; 894 default: 895 break; 896 } 897 } 898 899 @SuppressWarnings("unused") addStateCheckNum(final int num)900 private void addStateCheckNum(final int num) { 901 addInt(num); 902 } 903 addRelAddr(final int addr)904 private void addRelAddr(final int addr) { 905 addInt(addr); 906 } 907 908 @SuppressWarnings("unused") addAbsAddr(final int addr)909 private void addAbsAddr(final int addr) { 910 addInt(addr); 911 } 912 addLength(final int length)913 private void addLength(final int length) { 914 addInt(length); 915 } 916 addMemNum(final int num)917 private void addMemNum(final int num) { 918 addInt(num); 919 } 920 addPointer(final Object o)921 private void addPointer(final Object o) { 922 addObject(o); 923 } 924 addOption(final int option)925 private void addOption(final int option) { 926 addInt(option); 927 } 928 addOpcodeRelAddr(final int opcode, final int addr)929 private void addOpcodeRelAddr(final int opcode, final int addr) { 930 addOpcode(opcode); 931 addRelAddr(addr); 932 } 933 addOpcodeOption(final int opcode, final int option)934 private void addOpcodeOption(final int opcode, final int option) { 935 addOpcode(opcode); 936 addOption(option); 937 } 938 addTemplate(final char[] chars)939 private void addTemplate(final char[] chars) { 940 if (templateNum == 0) { 941 templates = new char[2][]; 942 } else if (templateNum == templates.length) { 943 final char[][] tmp = new char[templateNum * 2][]; 944 System.arraycopy(templates, 0, tmp, 0, templateNum); 945 templates = tmp; 946 } 947 templates[templateNum++] = chars; 948 } 949 } 950