1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; 27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 29 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 30 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 31 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 32 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 33 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 34 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 35 36 class ByteCodeMachine extends StackMachine { 37 private int bestLen; // return value 38 private int s = 0; // current char 39 40 private int range; // right range 41 private int sprev; 42 private int sstart; 43 private int sbegin; 44 45 private final int[] code; // byte code 46 private int ip; // instruction pointer 47 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end)48 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) { 49 super(regex, chars, p, end); 50 this.code = regex.code; 51 } 52 stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd)53 private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) { 54 int s1 = s1p; 55 int s2 = ps2.value; 56 final int end1 = s1 + mbLen; 57 58 while (s1 < end1) { 59 final char c1 = EncodingHelper.toLowerCase(chars[s1++]); 60 final char c2 = EncodingHelper.toLowerCase(chars[s2++]); 61 62 if (c1 != c2) { 63 return false; 64 } 65 } 66 ps2.value = s2; 67 return true; 68 } 69 debugMatchBegin()70 private void debugMatchBegin() { 71 Config.log.println("match_at: " + 72 "str: " + str + 73 ", end: " + end + 74 ", start: " + this.sstart + 75 ", sprev: " + this.sprev); 76 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); 77 } 78 debugMatchLoop()79 private void debugMatchLoop() { 80 if (Config.DEBUG_MATCH) { 81 Config.log.printf("%4d", (s - str)).print("> \""); 82 int q, i; 83 for (i=0, q=s; i<7 && q<end && s>=0; i++) { 84 if (q < end) { 85 Config.log.print(new String(new char[]{chars[q++]})); 86 } 87 } 88 final String string = q < end ? "...\"" : "\""; 89 q += string.length(); 90 Config.log.print(string); 91 for (i=0; i<20-(q-s);i++) { 92 Config.log.print(" "); 93 } 94 final StringBuilder sb = new StringBuilder(); 95 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); 96 Config.log.println(sb.toString()); 97 } 98 } 99 100 @Override 101 protected final int matchAt(final int r, final int ss, final int sp) { 102 this.range = r; 103 this.sstart = ss; 104 this.sprev = sp; 105 106 stk = 0; 107 ip = 0; 108 109 if (Config.DEBUG_MATCH) { 110 debugMatchBegin(); 111 } 112 113 init(); 114 115 bestLen = -1; 116 s = ss; 117 118 final int[] c = this.code; 119 while (true) { 120 if (Config.DEBUG_MATCH) { 121 debugMatchLoop(); 122 } 123 124 sbegin = s; 125 switch (c[ip++]) { 126 case OPCode.END: if (opEnd()) { 127 return finish(); 128 } break; 129 case OPCode.EXACT1: opExact1(); break; 130 case OPCode.EXACT2: opExact2(); continue; 131 case OPCode.EXACT3: opExact3(); continue; 132 case OPCode.EXACT4: opExact4(); continue; 133 case OPCode.EXACT5: opExact5(); continue; 134 case OPCode.EXACTN: opExactN(); continue; 135 136 case OPCode.EXACT1_IC: opExact1IC(); break; 137 case OPCode.EXACTN_IC: opExactNIC(); continue; 138 139 case OPCode.CCLASS: opCClass(); break; 140 case OPCode.CCLASS_MB: opCClassMB(); break; 141 case OPCode.CCLASS_MIX: opCClassMIX(); break; 142 case OPCode.CCLASS_NOT: opCClassNot(); break; 143 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; 144 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; 145 case OPCode.CCLASS_NODE: opCClassNode(); break; 146 147 case OPCode.ANYCHAR: opAnyChar(); break; 148 case OPCode.ANYCHAR_ML: opAnyCharML(); break; 149 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; 150 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; 151 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; 152 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; 153 154 case OPCode.WORD: opWord(); break; 155 case OPCode.NOT_WORD: opNotWord(); break; 156 case OPCode.WORD_BOUND: opWordBound(); continue; 157 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; 158 case OPCode.WORD_BEGIN: opWordBegin(); continue; 159 case OPCode.WORD_END: opWordEnd(); continue; 160 161 case OPCode.BEGIN_BUF: opBeginBuf(); continue; 162 case OPCode.END_BUF: opEndBuf(); continue; 163 case OPCode.BEGIN_LINE: opBeginLine(); continue; 164 case OPCode.END_LINE: opEndLine(); continue; 165 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; 166 case OPCode.BEGIN_POSITION: opBeginPosition(); continue; 167 168 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; 169 case OPCode.MEMORY_START: opMemoryStart(); continue; 170 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; 171 case OPCode.MEMORY_END: opMemoryEnd(); continue; 172 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; 173 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; 174 175 case OPCode.BACKREF1: opBackRef1(); continue; 176 case OPCode.BACKREF2: opBackRef2(); continue; 177 case OPCode.BACKREFN: opBackRefN(); continue; 178 case OPCode.BACKREFN_IC: opBackRefNIC(); continue; 179 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; 180 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; 181 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; 182 183 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; 184 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; 185 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; 186 187 case OPCode.JUMP: opJump(); continue; 188 case OPCode.PUSH: opPush(); continue; 189 190 case OPCode.POP: opPop(); continue; 191 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; 192 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; 193 194 case OPCode.REPEAT: opRepeat(); continue; 195 case OPCode.REPEAT_NG: opRepeatNG(); continue; 196 case OPCode.REPEAT_INC: opRepeatInc(); continue; 197 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; 198 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; 199 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; 200 201 case OPCode.PUSH_POS: opPushPos(); continue; 202 case OPCode.POP_POS: opPopPos(); continue; 203 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; 204 case OPCode.FAIL_POS: opFailPos(); continue; 205 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; 206 case OPCode.POP_STOP_BT: opPopStopBT(); continue; 207 208 case OPCode.LOOK_BEHIND: opLookBehind(); continue; 209 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; 210 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; 211 212 case OPCode.FINISH: 213 return finish(); 214 215 case OPCode.FAIL: opFail(); continue; 216 217 default: 218 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); 219 220 } // main switch 221 } // main while 222 } 223 224 private boolean opEnd() { 225 final int n = s - sstart; 226 227 if (n > bestLen) { 228 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 229 if (isFindLongest(regex.options)) { 230 if (n > msaBestLen) { 231 msaBestLen = n; 232 msaBestS = sstart; 233 } else { 234 // goto end_best_len; 235 return endBestLength(); 236 } 237 } 238 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 239 240 bestLen = n; 241 final Region region = msaRegion; 242 if (region != null) { 243 // USE_POSIX_REGION_OPTION ... else ... 244 region.beg[0] = msaBegin = sstart - str; 245 region.end[0] = msaEnd = s - str; 246 for (int i = 1; i <= regex.numMem; i++) { 247 // opt! 248 if (repeatStk[memEndStk + i] != INVALID_INDEX) { 249 region.beg[i] = bsAt(regex.btMemStart, i) ? 250 stack[repeatStk[memStartStk + i]].getMemPStr() - str : 251 repeatStk[memStartStk + i] - str; 252 253 254 region.end[i] = bsAt(regex.btMemEnd, i) ? 255 stack[repeatStk[memEndStk + i]].getMemPStr() : 256 repeatStk[memEndStk + i] - str; 257 258 } else { 259 region.beg[i] = region.end[i] = Region.REGION_NOTPOS; 260 } 261 262 } 263 264 } else { 265 msaBegin = sstart - str; 266 msaEnd = s - str; 267 } 268 } else { 269 final Region region = msaRegion; 270 if (Config.USE_POSIX_API_REGION_OPTION) { 271 if (!isPosixRegion(regex.options)) { 272 if (region != null) { 273 region.clear(); 274 } else { 275 msaBegin = msaEnd = 0; 276 } 277 } 278 } else { 279 if (region != null) { 280 region.clear(); 281 } else { 282 msaBegin = msaEnd = 0; 283 } 284 } // USE_POSIX_REGION_OPTION 285 } 286 // end_best_len: 287 /* default behavior: return first-matching result. */ 288 return endBestLength(); 289 } 290 endBestLength()291 private boolean endBestLength() { 292 if (isFindCondition(regex.options)) { 293 if (isFindNotEmpty(regex.options) && s == sstart) { 294 bestLen = -1; 295 {opFail(); return false;} /* for retry */ 296 } 297 if (isFindLongest(regex.options) && s < range) { 298 {opFail(); return false;} /* for retry */ 299 } 300 } 301 // goto finish; 302 return true; 303 } 304 opExact1()305 private void opExact1() { 306 if (s >= range || code[ip] != chars[s++]) {opFail(); return;} 307 //if (s > range) {opFail(); return;} 308 ip++; 309 sprev = sbegin; // break; 310 } 311 opExact2()312 private void opExact2() { 313 if (s + 2 > range) {opFail(); return;} 314 if (code[ip] != chars[s]) {opFail(); return;} 315 ip++; s++; 316 if (code[ip] != chars[s]) {opFail(); return;} 317 sprev = s; 318 ip++; s++; 319 } 320 opExact3()321 private void opExact3() { 322 if (s + 3 > range) {opFail(); return;} 323 if (code[ip] != chars[s]) {opFail(); return;} 324 ip++; s++; 325 if (code[ip] != chars[s]) {opFail(); return;} 326 ip++; s++; 327 if (code[ip] != chars[s]) {opFail(); return;} 328 sprev = s; 329 ip++; s++; 330 } 331 opExact4()332 private void opExact4() { 333 if (s + 4 > range) {opFail(); return;} 334 if (code[ip] != chars[s]) {opFail(); return;} 335 ip++; s++; 336 if (code[ip] != chars[s]) {opFail(); return;} 337 ip++; s++; 338 if (code[ip] != chars[s]) {opFail(); return;} 339 ip++; s++; 340 if (code[ip] != chars[s]) {opFail(); return;} 341 sprev = s; 342 ip++; s++; 343 } 344 opExact5()345 private void opExact5() { 346 if (s + 5 > range) {opFail(); return;} 347 if (code[ip] != chars[s]) {opFail(); return;} 348 ip++; s++; 349 if (code[ip] != chars[s]) {opFail(); return;} 350 ip++; s++; 351 if (code[ip] != chars[s]) {opFail(); return;} 352 ip++; s++; 353 if (code[ip] != chars[s]) {opFail(); return;} 354 ip++; s++; 355 if (code[ip] != chars[s]) {opFail(); return;} 356 sprev = s; 357 ip++; s++; 358 } 359 opExactN()360 private void opExactN() { 361 int tlen = code[ip++]; 362 if (s + tlen > range) {opFail(); return;} 363 364 if (Config.USE_STRING_TEMPLATES) { 365 final char[] bs = regex.templates[code[ip++]]; 366 int ps = code[ip++]; 367 368 while (tlen-- > 0) { 369 if (bs[ps++] != chars[s++]) {opFail(); return;} 370 } 371 372 } else { 373 while (tlen-- > 0) { 374 if (code[ip++] != chars[s++]) {opFail(); return;} 375 } 376 } 377 sprev = s - 1; 378 } 379 opExact1IC()380 private void opExact1IC() { 381 if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 382 ip++; 383 sprev = sbegin; // break; 384 } 385 opExactNIC()386 private void opExactNIC() { 387 int tlen = code[ip++]; 388 if (s + tlen > range) {opFail(); return;} 389 390 if (Config.USE_STRING_TEMPLATES) { 391 final char[] bs = regex.templates[code[ip++]]; 392 int ps = code[ip++]; 393 394 while (tlen-- > 0) { 395 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 396 } 397 } else { 398 399 while (tlen-- > 0) { 400 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 401 } 402 } 403 sprev = s - 1; 404 } 405 isInBitSet()406 private boolean isInBitSet() { 407 final int c = chars[s]; 408 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); 409 } 410 opCClass()411 private void opCClass() { 412 if (s >= range || !isInBitSet()) {opFail(); return;} 413 ip += BitSet.BITSET_SIZE; 414 s++; 415 sprev = sbegin; // break; 416 } 417 isInClassMB()418 private boolean isInClassMB() { 419 final int tlen = code[ip++]; 420 if (s >= range) { 421 return false; 422 } 423 final int ss = s; 424 s++; 425 final int c = chars[ss]; 426 if (!EncodingHelper.isInCodeRange(code, ip, c)) { 427 return false; 428 } 429 ip += tlen; 430 return true; 431 } 432 opCClassMB()433 private void opCClassMB() { 434 // beyond string check 435 if (s >= range || chars[s] <= 0xff) {opFail(); return;} 436 if (!isInClassMB()) {opFail(); return;} // not!!! 437 sprev = sbegin; // break; 438 } 439 opCClassMIX()440 private void opCClassMIX() { 441 if (s >= range) {opFail(); return;} 442 if (chars[s] > 0xff) { 443 ip += BitSet.BITSET_SIZE; 444 if (!isInClassMB()) {opFail(); return;} 445 } else { 446 if (!isInBitSet()) {opFail(); return;} 447 ip += BitSet.BITSET_SIZE; 448 final int tlen = code[ip++]; // by code range length 449 ip += tlen; 450 s++; 451 } 452 sprev = sbegin; // break; 453 } 454 opCClassNot()455 private void opCClassNot() { 456 if (s >= range || isInBitSet()) {opFail(); return;} 457 ip += BitSet.BITSET_SIZE; 458 s++; 459 sprev = sbegin; // break; 460 } 461 isNotInClassMB()462 private boolean isNotInClassMB() { 463 final int tlen = code[ip++]; 464 465 if (!(s + 1 <= range)) { 466 if (s >= range) { 467 return false; 468 } 469 s = end; 470 ip += tlen; 471 return true; 472 } 473 474 final int ss = s; 475 s++; 476 final int c = chars[ss]; 477 478 if (EncodingHelper.isInCodeRange(code, ip, c)) { 479 return false; 480 } 481 ip += tlen; 482 return true; 483 } 484 opCClassMBNot()485 private void opCClassMBNot() { 486 if (s >= range) {opFail(); return;} 487 if (chars[s] <= 0xff) { 488 s++; 489 final int tlen = code[ip++]; 490 ip += tlen; 491 sprev = sbegin; // break; 492 return; 493 } 494 if (!isNotInClassMB()) {opFail(); return;} 495 sprev = sbegin; // break; 496 } 497 opCClassMIXNot()498 private void opCClassMIXNot() { 499 if (s >= range) {opFail(); return;} 500 if (chars[s] > 0xff) { 501 ip += BitSet.BITSET_SIZE; 502 if (!isNotInClassMB()) {opFail(); return;} 503 } else { 504 if (isInBitSet()) {opFail(); return;} 505 ip += BitSet.BITSET_SIZE; 506 final int tlen = code[ip++]; 507 ip += tlen; 508 s++; 509 } 510 sprev = sbegin; // break; 511 } 512 opCClassNode()513 private void opCClassNode() { 514 if (s >= range) {opFail(); return;} 515 final CClassNode cc = (CClassNode)regex.operands[code[ip++]]; 516 final int ss = s; 517 s++; 518 final int c = chars[ss]; 519 if (!cc.isCodeInCCLength(c)) {opFail(); return;} 520 sprev = sbegin; // break; 521 } 522 opAnyChar()523 private void opAnyChar() { 524 if (s >= range) {opFail(); return;} 525 if (isNewLine(chars[s])) {opFail(); return;} 526 s++; 527 sprev = sbegin; // break; 528 } 529 opAnyCharML()530 private void opAnyCharML() { 531 if (s >= range) {opFail(); return;} 532 s++; 533 sprev = sbegin; // break; 534 } 535 opAnyCharStar()536 private void opAnyCharStar() { 537 final char[] ch = this.chars; 538 while (s < range) { 539 pushAlt(ip, s, sprev); 540 if (isNewLine(ch, s, end)) {opFail(); return;} 541 sprev = s; 542 s++; 543 } 544 } 545 opAnyCharMLStar()546 private void opAnyCharMLStar() { 547 while (s < range) { 548 pushAlt(ip, s, sprev); 549 sprev = s; 550 s++; 551 } 552 } 553 opAnyCharStarPeekNext()554 private void opAnyCharStarPeekNext() { 555 final char c = (char)code[ip]; 556 final char[] ch = this.chars; 557 558 while (s < range) { 559 final char b = ch[s]; 560 if (c == b) { 561 pushAlt(ip + 1, s, sprev); 562 } 563 if (isNewLine(b)) {opFail(); return;} 564 sprev = s; 565 s++; 566 } 567 ip++; 568 sprev = sbegin; // break; 569 } 570 opAnyCharMLStarPeekNext()571 private void opAnyCharMLStarPeekNext() { 572 final char c = (char)code[ip]; 573 final char[] ch = this.chars; 574 575 while (s < range) { 576 if (c == ch[s]) { 577 pushAlt(ip + 1, s, sprev); 578 } 579 sprev = s; 580 s++; 581 } 582 ip++; 583 sprev = sbegin; // break; 584 } 585 opWord()586 private void opWord() { 587 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 588 s++; 589 sprev = sbegin; // break; 590 } 591 opNotWord()592 private void opNotWord() { 593 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} 594 s++; 595 sprev = sbegin; // break; 596 } 597 opWordBound()598 private void opWordBound() { 599 if (s == str) { 600 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 601 } else if (s == end) { 602 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 603 } else { 604 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 605 } 606 } 607 opNotWordBound()608 private void opNotWordBound() { 609 if (s == str) { 610 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} 611 } else if (s == end) { 612 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 613 } else { 614 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 615 } 616 } 617 opWordBegin()618 private void opWordBegin() { 619 if (s < range && EncodingHelper.isWord(chars[s])) { 620 if (s == str || !EncodingHelper.isWord(chars[sprev])) { 621 return; 622 } 623 } 624 opFail(); 625 } 626 opWordEnd()627 private void opWordEnd() { 628 if (s != str && EncodingHelper.isWord(chars[sprev])) { 629 if (s == end || !EncodingHelper.isWord(chars[s])) { 630 return; 631 } 632 } 633 opFail(); 634 } 635 opBeginBuf()636 private void opBeginBuf() { 637 if (s != str) { 638 opFail(); 639 } 640 } 641 opEndBuf()642 private void opEndBuf() { 643 if (s != end) { 644 opFail(); 645 } 646 } 647 opBeginLine()648 private void opBeginLine() { 649 if (s == str) { 650 if (isNotBol(msaOptions)) { 651 opFail(); 652 } 653 return; 654 } else if (isNewLine(chars, sprev, end) && s != end) { 655 return; 656 } 657 opFail(); 658 } 659 opEndLine()660 private void opEndLine() { 661 if (s == end) { 662 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 663 if (str == end || !isNewLine(chars, sprev, end)) { 664 if (isNotEol(msaOptions)) { 665 opFail(); 666 } 667 } 668 return; 669 } 670 if (isNotEol(msaOptions)) { 671 opFail(); 672 } 673 return; 674 } else if (isNewLine(chars, s, end)) { 675 return; 676 } 677 opFail(); 678 } 679 opSemiEndBuf()680 private void opSemiEndBuf() { 681 if (s == end) { 682 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 683 if (str == end || !isNewLine(chars, sprev, end)) { 684 if (isNotEol(msaOptions)) { 685 opFail(); 686 } 687 } 688 return; 689 } 690 if (isNotEol(msaOptions)) { 691 opFail(); 692 } 693 return; 694 } else if (isNewLine(chars, s, end) && s + 1 == end) { 695 return; 696 } 697 opFail(); 698 } 699 opBeginPosition()700 private void opBeginPosition() { 701 if (s != msaStart) { 702 opFail(); 703 } 704 } 705 opMemoryStartPush()706 private void opMemoryStartPush() { 707 final int mem = code[ip++]; 708 pushMemStart(mem, s); 709 } 710 opMemoryStart()711 private void opMemoryStart() { 712 final int mem = code[ip++]; 713 repeatStk[memStartStk + mem] = s; 714 } 715 opMemoryEndPush()716 private void opMemoryEndPush() { 717 final int mem = code[ip++]; 718 pushMemEnd(mem, s); 719 } 720 opMemoryEnd()721 private void opMemoryEnd() { 722 final int mem = code[ip++]; 723 repeatStk[memEndStk + mem] = s; 724 } 725 opMemoryEndPushRec()726 private void opMemoryEndPushRec() { 727 final int mem = code[ip++]; 728 final int stkp = getMemStart(mem); /* should be before push mem-end. */ 729 pushMemEnd(mem, s); 730 repeatStk[memStartStk + mem] = stkp; 731 } 732 opMemoryEndRec()733 private void opMemoryEndRec() { 734 final int mem = code[ip++]; 735 repeatStk[memEndStk + mem] = s; 736 final int stkp = getMemStart(mem); 737 738 if (BitStatus.bsAt(regex.btMemStart, mem)) { 739 repeatStk[memStartStk + mem] = stkp; 740 } else { 741 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); 742 } 743 744 pushMemEndMark(mem); 745 } 746 backrefInvalid(final int mem)747 private boolean backrefInvalid(final int mem) { 748 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; 749 } 750 backrefStart(final int mem)751 private int backrefStart(final int mem) { 752 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; 753 } 754 backrefEnd(final int mem)755 private int backrefEnd(final int mem) { 756 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; 757 } 758 backref(final int mem)759 private void backref(final int mem) { 760 /* if you want to remove following line, 761 you should check in parse and compile time. (numMem) */ 762 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 763 764 int pstart = backrefStart(mem); 765 final int pend = backrefEnd(mem); 766 767 int n = pend - pstart; 768 if (s + n > range) {opFail(); return;} 769 sprev = s; 770 771 // STRING_CMP 772 while(n-- > 0) { 773 if (chars[pstart++] != chars[s++]) {opFail(); return;} 774 } 775 776 // beyond string check 777 if (sprev < range) { 778 while (sprev + 1 < s) { 779 sprev++; 780 } 781 } 782 } 783 opBackRef1()784 private void opBackRef1() { 785 backref(1); 786 } 787 opBackRef2()788 private void opBackRef2() { 789 backref(2); 790 } 791 opBackRefN()792 private void opBackRefN() { 793 backref(code[ip++]); 794 } 795 opBackRefNIC()796 private void opBackRefNIC() { 797 final int mem = code[ip++]; 798 /* if you want to remove following line, 799 you should check in parse and compile time. (numMem) */ 800 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 801 802 final int pstart = backrefStart(mem); 803 final int pend = backrefEnd(mem); 804 805 final int n = pend - pstart; 806 if (s + n > range) {opFail(); return;} 807 sprev = s; 808 809 value = s; 810 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} 811 s = value; 812 813 // if (sprev < chars.length) 814 while (sprev + 1 < s) { 815 sprev++; 816 } 817 } 818 opBackRefMulti()819 private void opBackRefMulti() { 820 final int tlen = code[ip++]; 821 822 int i; 823 loop:for (i=0; i<tlen; i++) { 824 final int mem = code[ip++]; 825 if (backrefInvalid(mem)) { 826 continue; 827 } 828 829 int pstart = backrefStart(mem); 830 final int pend = backrefEnd(mem); 831 832 int n = pend - pstart; 833 if (s + n > range) {opFail(); return;} 834 835 sprev = s; 836 int swork = s; 837 838 while (n-- > 0) { 839 if (chars[pstart++] != chars[swork++]) { 840 continue loop; 841 } 842 } 843 844 s = swork; 845 846 // beyond string check 847 if (sprev < range) { 848 while (sprev + 1 < s) { 849 sprev++; 850 } 851 } 852 853 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 854 break; /* success */ 855 } 856 if (i == tlen) {opFail(); return;} 857 } 858 opBackRefMultiIC()859 private void opBackRefMultiIC() { 860 final int tlen = code[ip++]; 861 862 int i; 863 loop:for (i=0; i<tlen; i++) { 864 final int mem = code[ip++]; 865 if (backrefInvalid(mem)) { 866 continue; 867 } 868 869 final int pstart = backrefStart(mem); 870 final int pend = backrefEnd(mem); 871 872 final int n = pend - pstart; 873 if (s + n > range) {opFail(); return;} 874 875 sprev = s; 876 877 value = s; 878 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) 879 { 880 continue loop; // STRING_CMP_VALUE_IC 881 } 882 s = value; 883 884 // if (sprev < chars.length) 885 while (sprev + 1 < s) { 886 sprev++; 887 } 888 889 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 890 break; /* success */ 891 } 892 if (i == tlen) {opFail(); return;} 893 } 894 memIsInMemp(final int mem, final int num, final int mempp)895 private boolean memIsInMemp(final int mem, final int num, final int mempp) { 896 for (int i=0, memp = mempp; i<num; i++) { 897 final int m = code[memp++]; 898 if (mem == m) { 899 return true; 900 } 901 } 902 return false; 903 } 904 905 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, final int nest, final int memNum, final int memp)906 private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, 907 final int nest, final int memNum, final int memp) { 908 int pend = -1; 909 int level = 0; 910 int k = stk - 1; 911 912 while (k >= 0) { 913 final StackEntry e = stack[k]; 914 915 if (e.type == CALL_FRAME) { 916 level--; 917 } else if (e.type == RETURN) { 918 level++; 919 } else if (level == nest) { 920 if (e.type == MEM_START) { 921 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 922 final int pstart = e.getMemPStr(); 923 if (pend != -1) { 924 if (pend - pstart > end - s) { 925 return false; /* or goto next_mem; */ 926 } 927 int p = pstart; 928 929 value = s; 930 if (ignoreCase) { 931 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { 932 return false; /* or goto next_mem; */ 933 } 934 } else { 935 while (p < pend) { 936 if (chars[p++] != chars[value++]) { 937 return false; /* or goto next_mem; */ 938 } 939 } 940 } 941 s = value; 942 943 return true; 944 } 945 } 946 } else if (e.type == MEM_END) { 947 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 948 pend = e.getMemPStr(); 949 } 950 } 951 } 952 k--; 953 } 954 return false; 955 } 956 opBackRefAtLevel()957 private void opBackRefAtLevel() { 958 final int ic = code[ip++]; 959 final int level = code[ip++]; 960 final int tlen = code[ip++]; 961 962 sprev = s; 963 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit 964 while (sprev + 1 < s) { 965 sprev++; 966 } 967 ip += tlen; // * SIZE_MEMNUM 968 } else { 969 {opFail(); return;} 970 } 971 } 972 opNullCheckStart()973 private void opNullCheckStart() { 974 final int mem = code[ip++]; 975 pushNullCheckStart(mem, s); 976 } 977 nullCheckFound()978 private void nullCheckFound() { 979 // null_check_found: 980 /* empty loop founded, skip next instruction */ 981 switch(code[ip++]) { 982 case OPCode.JUMP: 983 case OPCode.PUSH: 984 ip++; // p += SIZE_RELADDR; 985 break; 986 case OPCode.REPEAT_INC: 987 case OPCode.REPEAT_INC_NG: 988 case OPCode.REPEAT_INC_SG: 989 case OPCode.REPEAT_INC_NG_SG: 990 ip++; // p += SIZE_MEMNUM; 991 break; 992 default: 993 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); 994 } // switch 995 } 996 opNullCheckEnd()997 private void opNullCheckEnd() { 998 final int mem = code[ip++]; 999 final int isNull = nullCheck(mem, s); /* mem: null check id */ 1000 1001 if (isNull != 0) { 1002 if (Config.DEBUG_MATCH) { 1003 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); 1004 } 1005 1006 nullCheckFound(); 1007 } 1008 } 1009 1010 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK opNullCheckEndMemST()1011 private void opNullCheckEndMemST() { 1012 final int mem = code[ip++]; /* mem: null check id */ 1013 final int isNull = nullCheckMemSt(mem, s); 1014 1015 if (isNull != 0) { 1016 if (Config.DEBUG_MATCH) { 1017 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); 1018 } 1019 1020 if (isNull == -1) {opFail(); return;} 1021 nullCheckFound(); 1022 } 1023 } 1024 opJump()1025 private void opJump() { 1026 ip += code[ip] + 1; 1027 } 1028 opPush()1029 private void opPush() { 1030 final int addr = code[ip++]; 1031 pushAlt(ip + addr, s, sprev); 1032 } 1033 opPop()1034 private void opPop() { 1035 popOne(); 1036 } 1037 opPushOrJumpExact1()1038 private void opPushOrJumpExact1() { 1039 final int addr = code[ip++]; 1040 // beyond string check 1041 if (s < range && code[ip] == chars[s]) { 1042 ip++; 1043 pushAlt(ip + addr, s, sprev); 1044 return; 1045 } 1046 ip += addr + 1; 1047 } 1048 opPushIfPeekNext()1049 private void opPushIfPeekNext() { 1050 final int addr = code[ip++]; 1051 // beyond string check 1052 if (s < range && code[ip] == chars[s]) { 1053 ip++; 1054 pushAlt(ip + addr, s, sprev); 1055 return; 1056 } 1057 ip++; 1058 } 1059 opRepeat()1060 private void opRepeat() { 1061 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1062 final int addr= code[ip++]; 1063 1064 // ensure1(); 1065 repeatStk[mem] = stk; 1066 pushRepeat(mem, ip); 1067 1068 if (regex.repeatRangeLo[mem] == 0) { // lower 1069 pushAlt(ip + addr, s, sprev); 1070 } 1071 } 1072 opRepeatNG()1073 private void opRepeatNG() { 1074 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1075 final int addr= code[ip++]; 1076 1077 // ensure1(); 1078 repeatStk[mem] = stk; 1079 pushRepeat(mem, ip); 1080 1081 if (regex.repeatRangeLo[mem] == 0) { 1082 pushAlt(ip, s, sprev); 1083 ip += addr; 1084 } 1085 } 1086 repeatInc(final int mem, final int si)1087 private void repeatInc(final int mem, final int si) { 1088 final StackEntry e = stack[si]; 1089 1090 e.increaseRepeatCount(); 1091 1092 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { 1093 /* end of repeat. Nothing to do. */ 1094 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1095 pushAlt(ip, s, sprev); 1096 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ 1097 } else { 1098 ip = e.getRepeatPCode(); 1099 } 1100 pushRepeatInc(si); 1101 } 1102 opRepeatInc()1103 private void opRepeatInc() { 1104 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1105 final int si = repeatStk[mem]; 1106 repeatInc(mem, si); 1107 } 1108 opRepeatIncSG()1109 private void opRepeatIncSG() { 1110 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1111 final int si = getRepeat(mem); 1112 repeatInc(mem, si); 1113 } 1114 repeatIncNG(final int mem, final int si)1115 private void repeatIncNG(final int mem, final int si) { 1116 final StackEntry e = stack[si]; 1117 1118 e.increaseRepeatCount(); 1119 1120 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { 1121 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1122 final int pcode = e.getRepeatPCode(); 1123 pushRepeatInc(si); 1124 pushAlt(pcode, s, sprev); 1125 } else { 1126 ip = e.getRepeatPCode(); 1127 pushRepeatInc(si); 1128 } 1129 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { 1130 pushRepeatInc(si); 1131 } 1132 } 1133 opRepeatIncNG()1134 private void opRepeatIncNG() { 1135 final int mem = code[ip++]; 1136 final int si = repeatStk[mem]; 1137 repeatIncNG(mem, si); 1138 } 1139 opRepeatIncNGSG()1140 private void opRepeatIncNGSG() { 1141 final int mem = code[ip++]; 1142 final int si = getRepeat(mem); 1143 repeatIncNG(mem, si); 1144 } 1145 opPushPos()1146 private void opPushPos() { 1147 pushPos(s, sprev); 1148 } 1149 opPopPos()1150 private void opPopPos() { 1151 final StackEntry e = stack[posEnd()]; 1152 s = e.getStatePStr(); 1153 sprev= e.getStatePStrPrev(); 1154 } 1155 opPushPosNot()1156 private void opPushPosNot() { 1157 final int addr = code[ip++]; 1158 pushPosNot(ip + addr, s, sprev); 1159 } 1160 opFailPos()1161 private void opFailPos() { 1162 popTilPosNot(); 1163 opFail(); 1164 } 1165 opPushStopBT()1166 private void opPushStopBT() { 1167 pushStopBT(); 1168 } 1169 opPopStopBT()1170 private void opPopStopBT() { 1171 stopBtEnd(); 1172 } 1173 opLookBehind()1174 private void opLookBehind() { 1175 final int tlen = code[ip++]; 1176 s = EncodingHelper.stepBack(str, s, tlen); 1177 if (s == -1) {opFail(); return;} 1178 sprev = EncodingHelper.prevCharHead(str, s); 1179 } 1180 opPushLookBehindNot()1181 private void opPushLookBehindNot() { 1182 final int addr = code[ip++]; 1183 final int tlen = code[ip++]; 1184 final int q = EncodingHelper.stepBack(str, s, tlen); 1185 if (q == -1) { 1186 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 1187 If you want to change to fail, replace following line. */ 1188 ip += addr; 1189 // return FAIL; 1190 } else { 1191 pushLookBehindNot(ip + addr, s, sprev); 1192 s = q; 1193 sprev = EncodingHelper.prevCharHead(str, s); 1194 } 1195 } 1196 opFailLookBehindNot()1197 private void opFailLookBehindNot() { 1198 popTilLookBehindNot(); 1199 opFail(); 1200 } 1201 opFail()1202 private void opFail() { 1203 if (stack == null) { 1204 ip = regex.codeLength - 1; 1205 return; 1206 } 1207 1208 1209 final StackEntry e = pop(); 1210 ip = e.getStatePCode(); 1211 s = e.getStatePStr(); 1212 sprev = e.getStatePStrPrev(); 1213 } 1214 finish()1215 private int finish() { 1216 return bestLen; 1217 } 1218 } 1219