1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
3  * this software and associated documentation files (the "Software"), to deal in
4  * the Software without restriction, including without limitation the rights to
5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6  * of the Software, and to permit persons to whom the Software is furnished to do
7  * so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in all
10  * copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18  * SOFTWARE.
19  */
20 package jdk.nashorn.internal.runtime.regexp.joni;
21 
22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
23 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty;
27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
29 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
30 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
31 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
32 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
33 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
34 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
35 
36 class ByteCodeMachine extends StackMachine {
37     private int bestLen;          // return value
38     private int s = 0;            // current char
39 
40     private int range;            // right range
41     private int sprev;
42     private int sstart;
43     private int sbegin;
44 
45     private final int[] code;       // byte code
46     private int ip;                 // instruction pointer
47 
ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end)48     ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) {
49         super(regex, chars, p, end);
50         this.code = regex.code;
51     }
52 
stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd)53     private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) {
54         int s1 = s1p;
55         int s2 = ps2.value;
56         final int end1 = s1 + mbLen;
57 
58         while (s1 < end1) {
59             final char c1 = EncodingHelper.toLowerCase(chars[s1++]);
60             final char c2 = EncodingHelper.toLowerCase(chars[s2++]);
61 
62             if (c1 != c2) {
63                 return false;
64             }
65         }
66         ps2.value = s2;
67         return true;
68     }
69 
debugMatchBegin()70     private void debugMatchBegin() {
71         Config.log.println("match_at: " +
72                 "str: " + str +
73                 ", end: " + end +
74                 ", start: " + this.sstart +
75                 ", sprev: " + this.sprev);
76         Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
77     }
78 
debugMatchLoop()79     private void debugMatchLoop() {
80         if (Config.DEBUG_MATCH) {
81             Config.log.printf("%4d", (s - str)).print("> \"");
82             int q, i;
83             for (i=0, q=s; i<7 && q<end && s>=0; i++) {
84                 if (q < end) {
85                     Config.log.print(new String(new char[]{chars[q++]}));
86                 }
87             }
88             final String string = q < end ? "...\"" : "\"";
89             q += string.length();
90             Config.log.print(string);
91             for (i=0; i<20-(q-s);i++) {
92                 Config.log.print(" ");
93             }
94             final StringBuilder sb = new StringBuilder();
95             new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
96             Config.log.println(sb.toString());
97         }
98     }
99 
100     @Override
101     protected final int matchAt(final int r, final int ss, final int sp) {
102         this.range = r;
103         this.sstart = ss;
104         this.sprev = sp;
105 
106         stk = 0;
107         ip = 0;
108 
109         if (Config.DEBUG_MATCH) {
110             debugMatchBegin();
111         }
112 
113         init();
114 
115         bestLen = -1;
116         s = ss;
117 
118         final int[] c = this.code;
119         while (true) {
120             if (Config.DEBUG_MATCH) {
121                 debugMatchLoop();
122             }
123 
124             sbegin = s;
125             switch (c[ip++]) {
126                 case OPCode.END:    if (opEnd()) {
127                     return finish();
128                 }                  break;
129                 case OPCode.EXACT1:                     opExact1();                break;
130                 case OPCode.EXACT2:                     opExact2();                continue;
131                 case OPCode.EXACT3:                     opExact3();                continue;
132                 case OPCode.EXACT4:                     opExact4();                continue;
133                 case OPCode.EXACT5:                     opExact5();                continue;
134                 case OPCode.EXACTN:                     opExactN();                continue;
135 
136                 case OPCode.EXACT1_IC:                  opExact1IC();              break;
137                 case OPCode.EXACTN_IC:                  opExactNIC();              continue;
138 
139                 case OPCode.CCLASS:                     opCClass();                break;
140                 case OPCode.CCLASS_MB:                  opCClassMB();              break;
141                 case OPCode.CCLASS_MIX:                 opCClassMIX();             break;
142                 case OPCode.CCLASS_NOT:                 opCClassNot();             break;
143                 case OPCode.CCLASS_MB_NOT:              opCClassMBNot();           break;
144                 case OPCode.CCLASS_MIX_NOT:             opCClassMIXNot();          break;
145                 case OPCode.CCLASS_NODE:                opCClassNode();            break;
146 
147                 case OPCode.ANYCHAR:                    opAnyChar();               break;
148                 case OPCode.ANYCHAR_ML:                 opAnyCharML();             break;
149                 case OPCode.ANYCHAR_STAR:               opAnyCharStar();           break;
150                 case OPCode.ANYCHAR_ML_STAR:            opAnyCharMLStar();         break;
151                 case OPCode.ANYCHAR_STAR_PEEK_NEXT:     opAnyCharStarPeekNext();   break;
152                 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:  opAnyCharMLStarPeekNext(); break;
153 
154                 case OPCode.WORD:                       opWord();                  break;
155                 case OPCode.NOT_WORD:                   opNotWord();               break;
156                 case OPCode.WORD_BOUND:                 opWordBound();             continue;
157                 case OPCode.NOT_WORD_BOUND:             opNotWordBound();          continue;
158                 case OPCode.WORD_BEGIN:                 opWordBegin();             continue;
159                 case OPCode.WORD_END:                   opWordEnd();               continue;
160 
161                 case OPCode.BEGIN_BUF:                  opBeginBuf();              continue;
162                 case OPCode.END_BUF:                    opEndBuf();                continue;
163                 case OPCode.BEGIN_LINE:                 opBeginLine();             continue;
164                 case OPCode.END_LINE:                   opEndLine();               continue;
165                 case OPCode.SEMI_END_BUF:               opSemiEndBuf();            continue;
166                 case OPCode.BEGIN_POSITION:             opBeginPosition();         continue;
167 
168                 case OPCode.MEMORY_START_PUSH:          opMemoryStartPush();       continue;
169                 case OPCode.MEMORY_START:               opMemoryStart();           continue;
170                 case OPCode.MEMORY_END_PUSH:            opMemoryEndPush();         continue;
171                 case OPCode.MEMORY_END:                 opMemoryEnd();             continue;
172                 case OPCode.MEMORY_END_PUSH_REC:        opMemoryEndPushRec();      continue;
173                 case OPCode.MEMORY_END_REC:             opMemoryEndRec();          continue;
174 
175                 case OPCode.BACKREF1:                   opBackRef1();              continue;
176                 case OPCode.BACKREF2:                   opBackRef2();              continue;
177                 case OPCode.BACKREFN:                   opBackRefN();              continue;
178                 case OPCode.BACKREFN_IC:                opBackRefNIC();            continue;
179                 case OPCode.BACKREF_MULTI:              opBackRefMulti();          continue;
180                 case OPCode.BACKREF_MULTI_IC:           opBackRefMultiIC();        continue;
181                 case OPCode.BACKREF_WITH_LEVEL:         opBackRefAtLevel();        continue;
182 
183                 case OPCode.NULL_CHECK_START:           opNullCheckStart();        continue;
184                 case OPCode.NULL_CHECK_END:             opNullCheckEnd();          continue;
185                 case OPCode.NULL_CHECK_END_MEMST:       opNullCheckEndMemST();     continue;
186 
187                 case OPCode.JUMP:                       opJump();                  continue;
188                 case OPCode.PUSH:                       opPush();                  continue;
189 
190                 case OPCode.POP:                        opPop();                   continue;
191                 case OPCode.PUSH_OR_JUMP_EXACT1:        opPushOrJumpExact1();      continue;
192                 case OPCode.PUSH_IF_PEEK_NEXT:          opPushIfPeekNext();        continue;
193 
194                 case OPCode.REPEAT:                     opRepeat();                continue;
195                 case OPCode.REPEAT_NG:                  opRepeatNG();              continue;
196                 case OPCode.REPEAT_INC:                 opRepeatInc();             continue;
197                 case OPCode.REPEAT_INC_SG:              opRepeatIncSG();           continue;
198                 case OPCode.REPEAT_INC_NG:              opRepeatIncNG();           continue;
199                 case OPCode.REPEAT_INC_NG_SG:           opRepeatIncNGSG();         continue;
200 
201                 case OPCode.PUSH_POS:                   opPushPos();               continue;
202                 case OPCode.POP_POS:                    opPopPos();                continue;
203                 case OPCode.PUSH_POS_NOT:               opPushPosNot();            continue;
204                 case OPCode.FAIL_POS:                   opFailPos();               continue;
205                 case OPCode.PUSH_STOP_BT:               opPushStopBT();            continue;
206                 case OPCode.POP_STOP_BT:                opPopStopBT();             continue;
207 
208                 case OPCode.LOOK_BEHIND:                opLookBehind();            continue;
209                 case OPCode.PUSH_LOOK_BEHIND_NOT:       opPushLookBehindNot();     continue;
210                 case OPCode.FAIL_LOOK_BEHIND_NOT:       opFailLookBehindNot();     continue;
211 
212                 case OPCode.FINISH:
213                     return finish();
214 
215                 case OPCode.FAIL:                       opFail();                  continue;
216 
217                 default:
218                     throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
219 
220             } // main switch
221         } // main while
222     }
223 
224     private boolean opEnd() {
225         final int n = s - sstart;
226 
227         if (n > bestLen) {
228             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
229                 if (isFindLongest(regex.options)) {
230                     if (n > msaBestLen) {
231                         msaBestLen = n;
232                         msaBestS = sstart;
233                     } else {
234                         // goto end_best_len;
235                         return endBestLength();
236                     }
237                 }
238             } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
239 
240             bestLen = n;
241             final Region region = msaRegion;
242             if (region != null) {
243                 // USE_POSIX_REGION_OPTION ... else ...
244                 region.beg[0] = msaBegin = sstart - str;
245                 region.end[0] = msaEnd   = s      - str;
246                 for (int i = 1; i <= regex.numMem; i++) {
247                     // opt!
248                     if (repeatStk[memEndStk + i] != INVALID_INDEX) {
249                         region.beg[i] = bsAt(regex.btMemStart, i) ?
250                                         stack[repeatStk[memStartStk + i]].getMemPStr() - str :
251                                         repeatStk[memStartStk + i] - str;
252 
253 
254                         region.end[i] = bsAt(regex.btMemEnd, i) ?
255                                         stack[repeatStk[memEndStk + i]].getMemPStr() :
256                                         repeatStk[memEndStk + i] - str;
257 
258                     } else {
259                         region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
260                     }
261 
262                 }
263 
264             } else {
265                 msaBegin = sstart - str;
266                 msaEnd   = s      - str;
267             }
268         } else {
269             final Region region = msaRegion;
270             if (Config.USE_POSIX_API_REGION_OPTION) {
271                 if (!isPosixRegion(regex.options)) {
272                     if (region != null) {
273                         region.clear();
274                     } else {
275                         msaBegin = msaEnd = 0;
276                     }
277                 }
278             } else {
279                 if (region != null) {
280                     region.clear();
281                 } else {
282                     msaBegin = msaEnd = 0;
283                 }
284             } // USE_POSIX_REGION_OPTION
285         }
286         // end_best_len:
287         /* default behavior: return first-matching result. */
288         return endBestLength();
289     }
290 
endBestLength()291     private boolean endBestLength() {
292         if (isFindCondition(regex.options)) {
293             if (isFindNotEmpty(regex.options) && s == sstart) {
294                 bestLen = -1;
295                 {opFail(); return false;} /* for retry */
296             }
297             if (isFindLongest(regex.options) && s < range) {
298                 {opFail(); return false;} /* for retry */
299             }
300         }
301         // goto finish;
302         return true;
303     }
304 
opExact1()305     private void opExact1() {
306         if (s >= range || code[ip] != chars[s++]) {opFail(); return;}
307         //if (s > range) {opFail(); return;}
308         ip++;
309         sprev = sbegin; // break;
310     }
311 
opExact2()312     private void opExact2() {
313         if (s + 2 > range) {opFail(); return;}
314         if (code[ip] != chars[s]) {opFail(); return;}
315         ip++; s++;
316         if (code[ip] != chars[s]) {opFail(); return;}
317         sprev = s;
318         ip++; s++;
319     }
320 
opExact3()321     private void opExact3() {
322         if (s + 3 > range) {opFail(); return;}
323         if (code[ip] != chars[s]) {opFail(); return;}
324         ip++; s++;
325         if (code[ip] != chars[s]) {opFail(); return;}
326         ip++; s++;
327         if (code[ip] != chars[s]) {opFail(); return;}
328         sprev = s;
329         ip++; s++;
330     }
331 
opExact4()332     private void opExact4() {
333         if (s + 4 > range) {opFail(); return;}
334         if (code[ip] != chars[s]) {opFail(); return;}
335         ip++; s++;
336         if (code[ip] != chars[s]) {opFail(); return;}
337         ip++; s++;
338         if (code[ip] != chars[s]) {opFail(); return;}
339         ip++; s++;
340         if (code[ip] != chars[s]) {opFail(); return;}
341         sprev = s;
342         ip++; s++;
343     }
344 
opExact5()345     private void opExact5() {
346         if (s + 5 > range) {opFail(); return;}
347         if (code[ip] != chars[s]) {opFail(); return;}
348         ip++; s++;
349         if (code[ip] != chars[s]) {opFail(); return;}
350         ip++; s++;
351         if (code[ip] != chars[s]) {opFail(); return;}
352         ip++; s++;
353         if (code[ip] != chars[s]) {opFail(); return;}
354         ip++; s++;
355         if (code[ip] != chars[s]) {opFail(); return;}
356         sprev = s;
357         ip++; s++;
358     }
359 
opExactN()360     private void opExactN() {
361         int tlen = code[ip++];
362         if (s + tlen > range) {opFail(); return;}
363 
364         if (Config.USE_STRING_TEMPLATES) {
365             final char[] bs = regex.templates[code[ip++]];
366             int ps = code[ip++];
367 
368             while (tlen-- > 0) {
369                 if (bs[ps++] != chars[s++]) {opFail(); return;}
370             }
371 
372         } else {
373             while (tlen-- > 0) {
374                 if (code[ip++] != chars[s++]) {opFail(); return;}
375             }
376         }
377         sprev = s - 1;
378     }
379 
opExact1IC()380     private void opExact1IC() {
381         if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
382         ip++;
383         sprev = sbegin; // break;
384     }
385 
opExactNIC()386     private void opExactNIC() {
387         int tlen = code[ip++];
388         if (s + tlen > range) {opFail(); return;}
389 
390         if (Config.USE_STRING_TEMPLATES) {
391             final char[] bs = regex.templates[code[ip++]];
392             int ps = code[ip++];
393 
394             while (tlen-- > 0) {
395                 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
396             }
397         } else {
398 
399             while (tlen-- > 0) {
400                 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
401             }
402         }
403         sprev = s - 1;
404     }
405 
isInBitSet()406     private boolean isInBitSet() {
407         final int c = chars[s];
408         return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
409     }
410 
opCClass()411     private void opCClass() {
412         if (s >= range || !isInBitSet()) {opFail(); return;}
413         ip += BitSet.BITSET_SIZE;
414         s++;
415         sprev = sbegin; // break;
416     }
417 
isInClassMB()418     private boolean isInClassMB() {
419         final int tlen = code[ip++];
420         if (s >= range) {
421             return false;
422         }
423         final int ss = s;
424         s++;
425         final int c = chars[ss];
426         if (!EncodingHelper.isInCodeRange(code, ip, c)) {
427             return false;
428         }
429         ip += tlen;
430         return true;
431     }
432 
opCClassMB()433     private void opCClassMB() {
434         // beyond string check
435         if (s >= range || chars[s] <= 0xff) {opFail(); return;}
436         if (!isInClassMB()) {opFail(); return;} // not!!!
437         sprev = sbegin; // break;
438     }
439 
opCClassMIX()440     private void opCClassMIX() {
441         if (s >= range) {opFail(); return;}
442         if (chars[s] > 0xff) {
443             ip += BitSet.BITSET_SIZE;
444             if (!isInClassMB()) {opFail(); return;}
445         } else {
446             if (!isInBitSet()) {opFail(); return;}
447             ip += BitSet.BITSET_SIZE;
448             final int tlen = code[ip++]; // by code range length
449             ip += tlen;
450             s++;
451         }
452         sprev = sbegin; // break;
453     }
454 
opCClassNot()455     private void opCClassNot() {
456         if (s >= range || isInBitSet()) {opFail(); return;}
457         ip += BitSet.BITSET_SIZE;
458         s++;
459         sprev = sbegin; // break;
460     }
461 
isNotInClassMB()462     private boolean isNotInClassMB() {
463         final int tlen = code[ip++];
464 
465         if (!(s + 1 <= range)) {
466             if (s >= range) {
467                 return false;
468             }
469             s = end;
470             ip += tlen;
471             return true;
472         }
473 
474         final int ss = s;
475         s++;
476         final int c = chars[ss];
477 
478         if (EncodingHelper.isInCodeRange(code, ip, c)) {
479             return false;
480         }
481         ip += tlen;
482         return true;
483     }
484 
opCClassMBNot()485     private void opCClassMBNot() {
486         if (s >= range) {opFail(); return;}
487         if (chars[s] <= 0xff) {
488             s++;
489             final int tlen = code[ip++];
490             ip += tlen;
491             sprev = sbegin; // break;
492             return;
493         }
494         if (!isNotInClassMB()) {opFail(); return;}
495         sprev = sbegin; // break;
496     }
497 
opCClassMIXNot()498     private void opCClassMIXNot() {
499         if (s >= range) {opFail(); return;}
500         if (chars[s] > 0xff) {
501             ip += BitSet.BITSET_SIZE;
502             if (!isNotInClassMB()) {opFail(); return;}
503         } else {
504             if (isInBitSet()) {opFail(); return;}
505             ip += BitSet.BITSET_SIZE;
506             final int tlen = code[ip++];
507             ip += tlen;
508             s++;
509         }
510         sprev = sbegin; // break;
511     }
512 
opCClassNode()513     private void opCClassNode() {
514         if (s >= range) {opFail(); return;}
515         final CClassNode cc = (CClassNode)regex.operands[code[ip++]];
516         final int ss = s;
517         s++;
518         final int c = chars[ss];
519         if (!cc.isCodeInCCLength(c)) {opFail(); return;}
520         sprev = sbegin; // break;
521     }
522 
opAnyChar()523     private void opAnyChar() {
524         if (s >= range) {opFail(); return;}
525         if (isNewLine(chars[s])) {opFail(); return;}
526         s++;
527         sprev = sbegin; // break;
528     }
529 
opAnyCharML()530     private void opAnyCharML() {
531         if (s >= range) {opFail(); return;}
532         s++;
533         sprev = sbegin; // break;
534     }
535 
opAnyCharStar()536     private void opAnyCharStar() {
537         final char[] ch = this.chars;
538         while (s < range) {
539             pushAlt(ip, s, sprev);
540             if (isNewLine(ch, s, end)) {opFail(); return;}
541             sprev = s;
542             s++;
543         }
544     }
545 
opAnyCharMLStar()546     private void opAnyCharMLStar() {
547         while (s < range) {
548             pushAlt(ip, s, sprev);
549             sprev = s;
550             s++;
551         }
552     }
553 
opAnyCharStarPeekNext()554     private void opAnyCharStarPeekNext() {
555         final char c = (char)code[ip];
556         final char[] ch = this.chars;
557 
558         while (s < range) {
559             final char b = ch[s];
560             if (c == b) {
561                 pushAlt(ip + 1, s, sprev);
562             }
563             if (isNewLine(b)) {opFail(); return;}
564             sprev = s;
565             s++;
566         }
567         ip++;
568         sprev = sbegin; // break;
569     }
570 
opAnyCharMLStarPeekNext()571     private void opAnyCharMLStarPeekNext() {
572         final char c = (char)code[ip];
573         final char[] ch = this.chars;
574 
575         while (s < range) {
576             if (c == ch[s]) {
577                 pushAlt(ip + 1, s, sprev);
578             }
579             sprev = s;
580             s++;
581         }
582         ip++;
583         sprev = sbegin; // break;
584     }
585 
opWord()586     private void opWord() {
587         if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
588         s++;
589         sprev = sbegin; // break;
590     }
591 
opNotWord()592     private void opNotWord() {
593         if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;}
594         s++;
595         sprev = sbegin; // break;
596     }
597 
opWordBound()598     private void opWordBound() {
599         if (s == str) {
600             if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
601         } else if (s == end) {
602             if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
603         } else {
604             if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
605         }
606     }
607 
opNotWordBound()608     private void opNotWordBound() {
609         if (s == str) {
610             if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;}
611         } else if (s == end) {
612             if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
613         } else {
614             if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
615         }
616     }
617 
opWordBegin()618     private void opWordBegin() {
619         if (s < range && EncodingHelper.isWord(chars[s])) {
620             if (s == str || !EncodingHelper.isWord(chars[sprev])) {
621                 return;
622             }
623         }
624         opFail();
625     }
626 
opWordEnd()627     private void opWordEnd() {
628         if (s != str && EncodingHelper.isWord(chars[sprev])) {
629             if (s == end || !EncodingHelper.isWord(chars[s])) {
630                 return;
631             }
632         }
633         opFail();
634     }
635 
opBeginBuf()636     private void opBeginBuf() {
637         if (s != str) {
638             opFail();
639         }
640     }
641 
opEndBuf()642     private void opEndBuf() {
643         if (s != end) {
644             opFail();
645         }
646     }
647 
opBeginLine()648     private void opBeginLine() {
649         if (s == str) {
650             if (isNotBol(msaOptions)) {
651                 opFail();
652             }
653             return;
654         } else if (isNewLine(chars, sprev, end) && s != end) {
655             return;
656         }
657         opFail();
658     }
659 
opEndLine()660     private void opEndLine()  {
661         if (s == end) {
662             if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
663                 if (str == end || !isNewLine(chars, sprev, end)) {
664                     if (isNotEol(msaOptions)) {
665                         opFail();
666                     }
667                 }
668                 return;
669             }
670             if (isNotEol(msaOptions)) {
671                 opFail();
672             }
673             return;
674         } else if (isNewLine(chars, s, end)) {
675             return;
676         }
677         opFail();
678     }
679 
opSemiEndBuf()680     private void opSemiEndBuf() {
681         if (s == end) {
682             if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
683                 if (str == end || !isNewLine(chars, sprev, end)) {
684                     if (isNotEol(msaOptions)) {
685                         opFail();
686                     }
687                 }
688                 return;
689             }
690             if (isNotEol(msaOptions)) {
691                 opFail();
692             }
693             return;
694         } else if (isNewLine(chars, s, end) && s + 1 == end) {
695             return;
696         }
697         opFail();
698     }
699 
opBeginPosition()700     private void opBeginPosition() {
701         if (s != msaStart) {
702             opFail();
703         }
704     }
705 
opMemoryStartPush()706     private void opMemoryStartPush() {
707         final int mem = code[ip++];
708         pushMemStart(mem, s);
709     }
710 
opMemoryStart()711     private void opMemoryStart() {
712         final int mem = code[ip++];
713         repeatStk[memStartStk + mem] = s;
714     }
715 
opMemoryEndPush()716     private void opMemoryEndPush() {
717         final int mem = code[ip++];
718         pushMemEnd(mem, s);
719     }
720 
opMemoryEnd()721     private void opMemoryEnd() {
722         final int mem = code[ip++];
723         repeatStk[memEndStk + mem] = s;
724     }
725 
opMemoryEndPushRec()726     private void opMemoryEndPushRec() {
727         final int mem = code[ip++];
728         final int stkp = getMemStart(mem); /* should be before push mem-end. */
729         pushMemEnd(mem, s);
730         repeatStk[memStartStk + mem] = stkp;
731     }
732 
opMemoryEndRec()733     private void opMemoryEndRec() {
734         final int mem = code[ip++];
735         repeatStk[memEndStk + mem] = s;
736         final int stkp = getMemStart(mem);
737 
738         if (BitStatus.bsAt(regex.btMemStart, mem)) {
739             repeatStk[memStartStk + mem] = stkp;
740         } else {
741             repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
742         }
743 
744         pushMemEndMark(mem);
745     }
746 
backrefInvalid(final int mem)747     private boolean backrefInvalid(final int mem) {
748         return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
749     }
750 
backrefStart(final int mem)751     private int backrefStart(final int mem) {
752         return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
753     }
754 
backrefEnd(final int mem)755     private int backrefEnd(final int mem) {
756         return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
757     }
758 
backref(final int mem)759     private void backref(final int mem) {
760         /* if you want to remove following line,
761         you should check in parse and compile time. (numMem) */
762         if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
763 
764         int pstart = backrefStart(mem);
765         final int pend = backrefEnd(mem);
766 
767         int n = pend - pstart;
768         if (s + n > range) {opFail(); return;}
769         sprev = s;
770 
771         // STRING_CMP
772         while(n-- > 0) {
773             if (chars[pstart++] != chars[s++]) {opFail(); return;}
774         }
775 
776         // beyond string check
777         if (sprev < range) {
778             while (sprev + 1 < s) {
779                 sprev++;
780             }
781         }
782     }
783 
opBackRef1()784     private void opBackRef1() {
785         backref(1);
786     }
787 
opBackRef2()788     private void opBackRef2() {
789         backref(2);
790     }
791 
opBackRefN()792     private void opBackRefN() {
793         backref(code[ip++]);
794     }
795 
opBackRefNIC()796     private void opBackRefNIC() {
797         final int mem = code[ip++];
798         /* if you want to remove following line,
799         you should check in parse and compile time. (numMem) */
800         if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
801 
802         final int pstart = backrefStart(mem);
803         final int pend = backrefEnd(mem);
804 
805         final int n = pend - pstart;
806         if (s + n > range) {opFail(); return;}
807         sprev = s;
808 
809         value = s;
810         if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;}
811         s = value;
812 
813         // if (sprev < chars.length)
814         while (sprev + 1 < s) {
815             sprev++;
816         }
817     }
818 
opBackRefMulti()819     private void opBackRefMulti() {
820         final int tlen = code[ip++];
821 
822         int i;
823         loop:for (i=0; i<tlen; i++) {
824             final int mem = code[ip++];
825             if (backrefInvalid(mem)) {
826                 continue;
827             }
828 
829             int pstart = backrefStart(mem);
830             final int pend = backrefEnd(mem);
831 
832             int n = pend - pstart;
833             if (s + n > range) {opFail(); return;}
834 
835             sprev = s;
836             int swork = s;
837 
838             while (n-- > 0) {
839                 if (chars[pstart++] != chars[swork++]) {
840                     continue loop;
841                 }
842             }
843 
844             s = swork;
845 
846             // beyond string check
847             if (sprev < range) {
848                 while (sprev + 1 < s) {
849                     sprev++;
850                 }
851             }
852 
853             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
854             break; /* success */
855         }
856         if (i == tlen) {opFail(); return;}
857     }
858 
opBackRefMultiIC()859     private void opBackRefMultiIC() {
860         final int tlen = code[ip++];
861 
862         int i;
863         loop:for (i=0; i<tlen; i++) {
864             final int mem = code[ip++];
865             if (backrefInvalid(mem)) {
866                 continue;
867             }
868 
869             final int pstart = backrefStart(mem);
870             final int pend = backrefEnd(mem);
871 
872             final int n = pend - pstart;
873             if (s + n > range) {opFail(); return;}
874 
875             sprev = s;
876 
877             value = s;
878             if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end))
879              {
880                 continue loop; // STRING_CMP_VALUE_IC
881             }
882             s = value;
883 
884             // if (sprev < chars.length)
885             while (sprev + 1 < s) {
886                 sprev++;
887             }
888 
889             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
890             break;  /* success */
891         }
892         if (i == tlen) {opFail(); return;}
893     }
894 
memIsInMemp(final int mem, final int num, final int mempp)895     private boolean memIsInMemp(final int mem, final int num, final int mempp) {
896         for (int i=0, memp = mempp; i<num; i++) {
897             final int m = code[memp++];
898             if (mem == m) {
899                 return true;
900             }
901         }
902         return false;
903     }
904 
905     // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, final int nest, final int memNum, final int memp)906     private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag,
907                                               final int nest, final int memNum, final int memp) {
908         int pend = -1;
909         int level = 0;
910         int k = stk - 1;
911 
912         while (k >= 0) {
913             final StackEntry e = stack[k];
914 
915             if (e.type == CALL_FRAME) {
916                 level--;
917             } else if (e.type == RETURN) {
918                 level++;
919             } else if (level == nest) {
920                 if (e.type == MEM_START) {
921                     if (memIsInMemp(e.getMemNum(), memNum, memp)) {
922                         final int pstart = e.getMemPStr();
923                         if (pend != -1) {
924                             if (pend - pstart > end - s) {
925                                 return false; /* or goto next_mem; */
926                             }
927                             int p = pstart;
928 
929                             value = s;
930                             if (ignoreCase) {
931                                 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) {
932                                     return false; /* or goto next_mem; */
933                                 }
934                             } else {
935                                 while (p < pend) {
936                                     if (chars[p++] != chars[value++]) {
937                                         return false; /* or goto next_mem; */
938                                     }
939                                 }
940                             }
941                             s = value;
942 
943                             return true;
944                         }
945                     }
946                 } else if (e.type == MEM_END) {
947                     if (memIsInMemp(e.getMemNum(), memNum, memp)) {
948                         pend = e.getMemPStr();
949                     }
950                 }
951             }
952             k--;
953         }
954         return false;
955     }
956 
opBackRefAtLevel()957     private void opBackRefAtLevel() {
958         final int ic      = code[ip++];
959         final int level   = code[ip++];
960         final int tlen    = code[ip++];
961 
962         sprev = s;
963         if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
964             while (sprev + 1 < s) {
965                 sprev++;
966             }
967             ip += tlen; // * SIZE_MEMNUM
968         } else {
969             {opFail(); return;}
970         }
971     }
972 
opNullCheckStart()973     private void opNullCheckStart() {
974         final int mem = code[ip++];
975         pushNullCheckStart(mem, s);
976     }
977 
nullCheckFound()978     private void nullCheckFound() {
979         // null_check_found:
980         /* empty loop founded, skip next instruction */
981         switch(code[ip++]) {
982         case OPCode.JUMP:
983         case OPCode.PUSH:
984             ip++;       // p += SIZE_RELADDR;
985             break;
986         case OPCode.REPEAT_INC:
987         case OPCode.REPEAT_INC_NG:
988         case OPCode.REPEAT_INC_SG:
989         case OPCode.REPEAT_INC_NG_SG:
990             ip++;        // p += SIZE_MEMNUM;
991             break;
992         default:
993             throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
994         } // switch
995     }
996 
opNullCheckEnd()997     private void opNullCheckEnd() {
998         final int mem = code[ip++];
999         final int isNull = nullCheck(mem, s); /* mem: null check id */
1000 
1001         if (isNull != 0) {
1002             if (Config.DEBUG_MATCH) {
1003                 Config.log.println("NULL_CHECK_END: skip  id:" + mem + ", s:" + s);
1004             }
1005 
1006             nullCheckFound();
1007         }
1008     }
1009 
1010     // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
opNullCheckEndMemST()1011     private void opNullCheckEndMemST() {
1012         final int mem = code[ip++];   /* mem: null check id */
1013         final int isNull = nullCheckMemSt(mem, s);
1014 
1015         if (isNull != 0) {
1016             if (Config.DEBUG_MATCH) {
1017                 Config.log.println("NULL_CHECK_END_MEMST: skip  id:" + mem + ", s:" + s);
1018             }
1019 
1020             if (isNull == -1) {opFail(); return;}
1021             nullCheckFound();
1022         }
1023     }
1024 
opJump()1025     private void opJump() {
1026         ip += code[ip] + 1;
1027     }
1028 
opPush()1029     private void opPush() {
1030         final int addr = code[ip++];
1031         pushAlt(ip + addr, s, sprev);
1032     }
1033 
opPop()1034     private void opPop() {
1035         popOne();
1036     }
1037 
opPushOrJumpExact1()1038     private void opPushOrJumpExact1() {
1039         final int addr = code[ip++];
1040         // beyond string check
1041         if (s < range && code[ip] == chars[s]) {
1042             ip++;
1043             pushAlt(ip + addr, s, sprev);
1044             return;
1045         }
1046         ip += addr + 1;
1047     }
1048 
opPushIfPeekNext()1049     private void opPushIfPeekNext() {
1050         final int addr = code[ip++];
1051         // beyond string check
1052         if (s < range && code[ip] == chars[s]) {
1053             ip++;
1054             pushAlt(ip + addr, s, sprev);
1055             return;
1056         }
1057         ip++;
1058     }
1059 
opRepeat()1060     private void opRepeat() {
1061         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
1062         final int addr= code[ip++];
1063 
1064         // ensure1();
1065         repeatStk[mem] = stk;
1066         pushRepeat(mem, ip);
1067 
1068         if (regex.repeatRangeLo[mem] == 0) { // lower
1069             pushAlt(ip + addr, s, sprev);
1070         }
1071     }
1072 
opRepeatNG()1073     private void opRepeatNG() {
1074         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
1075         final int addr= code[ip++];
1076 
1077         // ensure1();
1078         repeatStk[mem] = stk;
1079         pushRepeat(mem, ip);
1080 
1081         if (regex.repeatRangeLo[mem] == 0) {
1082             pushAlt(ip, s, sprev);
1083             ip += addr;
1084         }
1085     }
1086 
repeatInc(final int mem, final int si)1087     private void repeatInc(final int mem, final int si) {
1088         final StackEntry e = stack[si];
1089 
1090         e.increaseRepeatCount();
1091 
1092         if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
1093             /* end of repeat. Nothing to do. */
1094         } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
1095             pushAlt(ip, s, sprev);
1096             ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
1097         } else {
1098             ip = e.getRepeatPCode();
1099         }
1100         pushRepeatInc(si);
1101     }
1102 
opRepeatInc()1103     private void opRepeatInc() {
1104         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
1105         final int si = repeatStk[mem];
1106         repeatInc(mem, si);
1107     }
1108 
opRepeatIncSG()1109     private void opRepeatIncSG() {
1110         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
1111         final int si = getRepeat(mem);
1112         repeatInc(mem, si);
1113     }
1114 
repeatIncNG(final int mem, final int si)1115     private void repeatIncNG(final int mem, final int si) {
1116         final StackEntry e = stack[si];
1117 
1118         e.increaseRepeatCount();
1119 
1120         if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
1121             if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
1122                 final int pcode = e.getRepeatPCode();
1123                 pushRepeatInc(si);
1124                 pushAlt(pcode, s, sprev);
1125             } else {
1126                 ip = e.getRepeatPCode();
1127                 pushRepeatInc(si);
1128             }
1129         } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
1130             pushRepeatInc(si);
1131         }
1132     }
1133 
opRepeatIncNG()1134     private void opRepeatIncNG() {
1135         final int mem = code[ip++];
1136         final int si = repeatStk[mem];
1137         repeatIncNG(mem, si);
1138     }
1139 
opRepeatIncNGSG()1140     private void opRepeatIncNGSG() {
1141         final int mem = code[ip++];
1142         final int si = getRepeat(mem);
1143         repeatIncNG(mem, si);
1144     }
1145 
opPushPos()1146     private void opPushPos() {
1147         pushPos(s, sprev);
1148     }
1149 
opPopPos()1150     private void opPopPos() {
1151         final StackEntry e = stack[posEnd()];
1152         s    = e.getStatePStr();
1153         sprev= e.getStatePStrPrev();
1154     }
1155 
opPushPosNot()1156     private void opPushPosNot() {
1157         final int addr = code[ip++];
1158         pushPosNot(ip + addr, s, sprev);
1159     }
1160 
opFailPos()1161     private void opFailPos() {
1162         popTilPosNot();
1163         opFail();
1164     }
1165 
opPushStopBT()1166     private void opPushStopBT() {
1167         pushStopBT();
1168     }
1169 
opPopStopBT()1170     private void opPopStopBT() {
1171         stopBtEnd();
1172     }
1173 
opLookBehind()1174     private void opLookBehind() {
1175         final int tlen = code[ip++];
1176         s = EncodingHelper.stepBack(str, s, tlen);
1177         if (s == -1) {opFail(); return;}
1178         sprev = EncodingHelper.prevCharHead(str, s);
1179     }
1180 
opPushLookBehindNot()1181     private void opPushLookBehindNot() {
1182         final int addr = code[ip++];
1183         final int tlen = code[ip++];
1184         final int q = EncodingHelper.stepBack(str, s, tlen);
1185         if (q == -1) {
1186             /* too short case -> success. ex. /(?<!XXX)a/.match("a")
1187             If you want to change to fail, replace following line. */
1188             ip += addr;
1189             // return FAIL;
1190         } else {
1191             pushLookBehindNot(ip + addr, s, sprev);
1192             s = q;
1193             sprev = EncodingHelper.prevCharHead(str, s);
1194         }
1195     }
1196 
opFailLookBehindNot()1197     private void opFailLookBehindNot() {
1198         popTilLookBehindNot();
1199         opFail();
1200     }
1201 
opFail()1202     private void opFail() {
1203         if (stack == null) {
1204             ip = regex.codeLength - 1;
1205             return;
1206         }
1207 
1208 
1209         final StackEntry e = pop();
1210         ip    = e.getStatePCode();
1211         s     = e.getStatePStr();
1212         sprev = e.getStatePStrPrev();
1213     }
1214 
finish()1215     private int finish() {
1216         return bestLen;
1217     }
1218 }
1219