1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
3  * this software and associated documentation files (the "Software"), to deal in
4  * the Software without restriction, including without limitation the rights to
5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6  * of the Software, and to permit persons to whom the Software is furnished to do
7  * so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in all
10  * copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18  * SOFTWARE.
19  */
20 package jdk.nashorn.internal.runtime.regexp.joni;
21 
22 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
23 import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments;
24 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
25 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
26 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
27 
28 class ByteCodePrinter {
29     final int[] code;
30     final int codeLength;
31     final char[][] templates;
32 
33     Object[] operands;
34 
35     private final static String OpCodeNames[] = new String[] {
36             "finish", /*OP_FINISH*/
37             "end", /*OP_END*/
38             "exact1", /*OP_EXACT1*/
39             "exact2", /*OP_EXACT2*/
40             "exact3", /*OP_EXACT3*/
41             "exact4", /*OP_EXACT4*/
42             "exact5", /*OP_EXACT5*/
43             "exactn", /*OP_EXACTN*/
44             "exactmb2-n1", /*OP_EXACTMB2N1*/
45             "exactmb2-n2", /*OP_EXACTMB2N2*/
46             "exactmb2-n3", /*OP_EXACTMB2N3*/
47             "exactmb2-n", /*OP_EXACTMB2N*/
48             "exactmb3n", /*OP_EXACTMB3N*/
49             "exactmbn", /*OP_EXACTMBN*/
50             "exact1-ic", /*OP_EXACT1_IC*/
51             "exactn-ic", /*OP_EXACTN_IC*/
52             "cclass", /*OP_CCLASS*/
53             "cclass-mb", /*OP_CCLASS_MB*/
54             "cclass-mix", /*OP_CCLASS_MIX*/
55             "cclass-not", /*OP_CCLASS_NOT*/
56             "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
57             "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
58             "cclass-node", /*OP_CCLASS_NODE*/
59             "anychar", /*OP_ANYCHAR*/
60             "anychar-ml", /*OP_ANYCHAR_ML*/
61             "anychar*", /*OP_ANYCHAR_STAR*/
62             "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
63             "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
64             "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
65             "word", /*OP_WORD*/
66             "not-word", /*OP_NOT_WORD*/
67             "word-bound", /*OP_WORD_BOUND*/
68             "not-word-bound", /*OP_NOT_WORD_BOUND*/
69             "word-begin", /*OP_WORD_BEGIN*/
70             "word-end", /*OP_WORD_END*/
71             "begin-buf", /*OP_BEGIN_BUF*/
72             "end-buf", /*OP_END_BUF*/
73             "begin-line", /*OP_BEGIN_LINE*/
74             "end-line", /*OP_END_LINE*/
75             "semi-end-buf", /*OP_SEMI_END_BUF*/
76             "begin-position", /*OP_BEGIN_POSITION*/
77             "backref1", /*OP_BACKREF1*/
78             "backref2", /*OP_BACKREF2*/
79             "backrefn", /*OP_BACKREFN*/
80             "backrefn-ic", /*OP_BACKREFN_IC*/
81             "backref_multi", /*OP_BACKREF_MULTI*/
82             "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
83             "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
84             "mem-start", /*OP_MEMORY_START*/
85             "mem-start-push", /*OP_MEMORY_START_PUSH*/
86             "mem-end-push", /*OP_MEMORY_END_PUSH*/
87             "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
88             "mem-end", /*OP_MEMORY_END*/
89             "mem-end-rec", /*OP_MEMORY_END_REC*/
90             "fail", /*OP_FAIL*/
91             "jump", /*OP_JUMP*/
92             "push", /*OP_PUSH*/
93             "pop", /*OP_POP*/
94             "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
95             "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
96             "repeat", /*OP_REPEAT*/
97             "repeat-ng", /*OP_REPEAT_NG*/
98             "repeat-inc", /*OP_REPEAT_INC*/
99             "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
100             "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
101             "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
102             "null-check-start", /*OP_NULL_CHECK_START*/
103             "null-check-end", /*OP_NULL_CHECK_END*/
104             "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
105             "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
106             "push-pos", /*OP_PUSH_POS*/
107             "pop-pos", /*OP_POP_POS*/
108             "push-pos-not", /*OP_PUSH_POS_NOT*/
109             "fail-pos", /*OP_FAIL_POS*/
110             "push-stop-bt", /*OP_PUSH_STOP_BT*/
111             "pop-stop-bt", /*OP_POP_STOP_BT*/
112             "look-behind", /*OP_LOOK_BEHIND*/
113             "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
114             "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
115             "call", /*OP_CALL*/
116             "return", /*OP_RETURN*/
117             "state-check-push", /*OP_STATE_CHECK_PUSH*/
118             "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
119             "state-check", /*OP_STATE_CHECK*/
120             "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
121             "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
122             "set-option-push", /*OP_SET_OPTION_PUSH*/
123             "set-option", /*OP_SET_OPTION*/
124     };
125 
126     private final static int OpCodeArgTypes[] = new int[] {
127             Arguments.NON, /*OP_FINISH*/
128             Arguments.NON, /*OP_END*/
129             Arguments.SPECIAL, /*OP_EXACT1*/
130             Arguments.SPECIAL, /*OP_EXACT2*/
131             Arguments.SPECIAL, /*OP_EXACT3*/
132             Arguments.SPECIAL, /*OP_EXACT4*/
133             Arguments.SPECIAL, /*OP_EXACT5*/
134             Arguments.SPECIAL, /*OP_EXACTN*/
135             Arguments.SPECIAL, /*OP_EXACTMB2N1*/
136             Arguments.SPECIAL, /*OP_EXACTMB2N2*/
137             Arguments.SPECIAL, /*OP_EXACTMB2N3*/
138             Arguments.SPECIAL, /*OP_EXACTMB2N*/
139             Arguments.SPECIAL, /*OP_EXACTMB3N*/
140             Arguments.SPECIAL, /*OP_EXACTMBN*/
141             Arguments.SPECIAL, /*OP_EXACT1_IC*/
142             Arguments.SPECIAL, /*OP_EXACTN_IC*/
143             Arguments.SPECIAL, /*OP_CCLASS*/
144             Arguments.SPECIAL, /*OP_CCLASS_MB*/
145             Arguments.SPECIAL, /*OP_CCLASS_MIX*/
146             Arguments.SPECIAL, /*OP_CCLASS_NOT*/
147             Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
148             Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
149             Arguments.SPECIAL, /*OP_CCLASS_NODE*/
150             Arguments.NON, /*OP_ANYCHAR*/
151             Arguments.NON, /*OP_ANYCHAR_ML*/
152             Arguments.NON, /*OP_ANYCHAR_STAR*/
153             Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
154             Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
155             Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
156             Arguments.NON, /*OP_WORD*/
157             Arguments.NON, /*OP_NOT_WORD*/
158             Arguments.NON, /*OP_WORD_BOUND*/
159             Arguments.NON, /*OP_NOT_WORD_BOUND*/
160             Arguments.NON, /*OP_WORD_BEGIN*/
161             Arguments.NON, /*OP_WORD_END*/
162             Arguments.NON, /*OP_BEGIN_BUF*/
163             Arguments.NON, /*OP_END_BUF*/
164             Arguments.NON, /*OP_BEGIN_LINE*/
165             Arguments.NON, /*OP_END_LINE*/
166             Arguments.NON, /*OP_SEMI_END_BUF*/
167             Arguments.NON, /*OP_BEGIN_POSITION*/
168             Arguments.NON, /*OP_BACKREF1*/
169             Arguments.NON, /*OP_BACKREF2*/
170             Arguments.MEMNUM, /*OP_BACKREFN*/
171             Arguments.SPECIAL, /*OP_BACKREFN_IC*/
172             Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
173             Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
174             Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
175             Arguments.MEMNUM, /*OP_MEMORY_START*/
176             Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
177             Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
178             Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
179             Arguments.MEMNUM, /*OP_MEMORY_END*/
180             Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
181             Arguments.NON, /*OP_FAIL*/
182             Arguments.RELADDR, /*OP_JUMP*/
183             Arguments.RELADDR, /*OP_PUSH*/
184             Arguments.NON, /*OP_POP*/
185             Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
186             Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
187             Arguments.SPECIAL, /*OP_REPEAT*/
188             Arguments.SPECIAL, /*OP_REPEAT_NG*/
189             Arguments.MEMNUM, /*OP_REPEAT_INC*/
190             Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
191             Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
192             Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
193             Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
194             Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
195             Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
196             Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
197             Arguments.NON, /*OP_PUSH_POS*/
198             Arguments.NON, /*OP_POP_POS*/
199             Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
200             Arguments.NON, /*OP_FAIL_POS*/
201             Arguments.NON, /*OP_PUSH_STOP_BT*/
202             Arguments.NON, /*OP_POP_STOP_BT*/
203             Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
204             Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
205             Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
206             Arguments.ABSADDR, /*OP_CALL*/
207             Arguments.NON, /*OP_RETURN*/
208             Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
209             Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
210             Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
211             Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
212             Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
213             Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
214             Arguments.OPTION, /*OP_SET_OPTION*/
215     };
216 
ByteCodePrinter(final Regex regex)217     public ByteCodePrinter(final Regex regex) {
218         code = regex.code;
219         codeLength = regex.codeLength;
220         operands = regex.operands;
221 
222         templates = regex.templates;
223     }
224 
byteCodeListToString()225     public String byteCodeListToString() {
226         return compiledByteCodeListToString();
227     }
228 
pString(final StringBuilder sb, final int len, final int s)229     private void pString(final StringBuilder sb, final int len, final int s) {
230         sb.append(":");
231         sb.append(new String(code, s, len));
232     }
233 
pLenString(final StringBuilder sb, final int len, final int s)234     private void pLenString(final StringBuilder sb, final int len, final int s) {
235         sb.append(":").append(len).append(":");
236         sb.append(new String(code, s, len));
237     }
238 
pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx)239     private static void pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx) {
240         sb.append(":T:").append(len).append(":");
241         sb.append(tm, idx, len);
242     }
243 
compiledByteCodeToString(final StringBuilder sb, final int bptr)244     public int compiledByteCodeToString(final StringBuilder sb, final int bptr) {
245         int len, n, mem, addr, scn, cod;
246         BitSet bs;
247         CClassNode cc;
248         int tm, idx;
249         int bp = bptr;
250 
251         sb.append("[").append(OpCodeNames[code[bp]]);
252         final int argType = OpCodeArgTypes[code[bp]];
253         final int ip = bp;
254         if (argType != Arguments.SPECIAL) {
255             bp++;
256             switch (argType) {
257             default:
258             case Arguments.NON:
259                 break;
260 
261             case Arguments.RELADDR:
262                 sb.append(":(").append(code[bp]).append(")");
263                 bp += OPSize.RELADDR;
264                 break;
265 
266             case Arguments.ABSADDR:
267                 sb.append(":(").append(code[bp]).append(")");
268                 bp += OPSize.ABSADDR;
269                 break;
270 
271             case Arguments.LENGTH:
272                 sb.append(":").append(code[bp]);
273                 bp += OPSize.LENGTH;
274                 break;
275 
276             case Arguments.MEMNUM:
277                 sb.append(":").append(code[bp]);
278                 bp += OPSize.MEMNUM;
279                 break;
280 
281             case Arguments.OPTION:
282                 sb.append(":").append(code[bp]);
283                 bp += OPSize.OPTION;
284                 break;
285 
286             case Arguments.STATE_CHECK:
287                 sb.append(":").append(code[bp]);
288                 bp += OPSize.STATE_CHECK;
289                 break;
290             }
291         } else {
292             switch (code[bp++]) {
293             case OPCode.EXACT1:
294             case OPCode.ANYCHAR_STAR_PEEK_NEXT:
295             case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
296                 pString(sb, 1, bp++);
297                 break;
298 
299             case OPCode.EXACT2:
300                 pString(sb, 2, bp);
301                 bp += 2;
302                 break;
303 
304             case OPCode.EXACT3:
305                 pString(sb, 3, bp);
306                 bp += 3;
307                 break;
308 
309             case OPCode.EXACT4:
310                 pString(sb, 4, bp);
311                 bp += 4;
312                 break;
313 
314             case OPCode.EXACT5:
315                 pString(sb, 5, bp);
316                 bp += 5;
317                 break;
318 
319             case OPCode.EXACTN:
320                 len = code[bp];
321                 bp += OPSize.LENGTH;
322                 if (Config.USE_STRING_TEMPLATES) {
323                     tm = code[bp];
324                     bp += OPSize.INDEX;
325                     idx = code[bp];
326                     bp += OPSize.INDEX;
327                     pLenStringFromTemplate(sb, len, templates[tm], idx);
328                 } else {
329                     pLenString(sb, len, bp);
330                     bp += len;
331                 }
332                 break;
333 
334             case OPCode.EXACT1_IC:
335                 pString(sb, 1, bp);
336                 bp++;
337                 break;
338 
339             case OPCode.EXACTN_IC:
340                 len = code[bp];
341                 bp += OPSize.LENGTH;
342                 if (Config.USE_STRING_TEMPLATES) {
343                     tm = code[bp];
344                     bp += OPSize.INDEX;
345                     idx = code[bp];
346                     bp += OPSize.INDEX;
347                     pLenStringFromTemplate(sb, len, templates[tm], idx);
348                 } else {
349                     pLenString(sb, len, bp);
350                     bp += len;
351                 }
352                 break;
353 
354             case OPCode.CCLASS:
355                 bs = new BitSet();
356                 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
357                 n = bs.numOn();
358                 bp += BitSet.BITSET_SIZE;
359                 sb.append(":").append(n);
360                 break;
361 
362             case OPCode.CCLASS_NOT:
363                 bs = new BitSet();
364                 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
365                 n = bs.numOn();
366                 bp += BitSet.BITSET_SIZE;
367                 sb.append(":").append(n);
368                 break;
369 
370             case OPCode.CCLASS_MB:
371             case OPCode.CCLASS_MB_NOT:
372                 len = code[bp];
373                 bp += OPSize.LENGTH;
374                 cod = code[bp];
375                 //bp += OPSize.CODE_POINT;
376                 bp += len;
377                 sb.append(":").append(cod).append(":").append(len);
378                 break;
379 
380             case OPCode.CCLASS_MIX:
381             case OPCode.CCLASS_MIX_NOT:
382                 bs = new BitSet();
383                 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
384                 n = bs.numOn();
385                 bp += BitSet.BITSET_SIZE;
386                 len = code[bp];
387                 bp += OPSize.LENGTH;
388                 cod = code[bp];
389                 //bp += OPSize.CODE_POINT;
390                 bp += len;
391                 sb.append(":").append(n).append(":").append(cod).append(":").append(len);
392                 break;
393 
394             case OPCode.CCLASS_NODE:
395                 cc = (CClassNode)operands[code[bp]];
396                 bp += OPSize.POINTER;
397                 n = cc.bs.numOn();
398                 sb.append(":").append(cc).append(":").append(n);
399                 break;
400 
401             case OPCode.BACKREFN_IC:
402                 mem = code[bp];
403                 bp += OPSize.MEMNUM;
404                 sb.append(":").append(mem);
405                 break;
406 
407             case OPCode.BACKREF_MULTI_IC:
408             case OPCode.BACKREF_MULTI:
409                 sb.append(" ");
410                 len = code[bp];
411                 bp += OPSize.LENGTH;
412                 for (int i=0; i<len; i++) {
413                     mem = code[bp];
414                     bp += OPSize.MEMNUM;
415                     if (i > 0) {
416                         sb.append(", ");
417                     }
418                     sb.append(mem);
419                 }
420                 break;
421 
422             case OPCode.BACKREF_WITH_LEVEL: {
423                 final int option = code[bp];
424                 bp += OPSize.OPTION;
425                 sb.append(":").append(option);
426                 final int level = code[bp];
427                 bp += OPSize.LENGTH;
428                 sb.append(":").append(level);
429                 sb.append(" ");
430                 len = code[bp];
431                 bp += OPSize.LENGTH;
432                 for (int i=0; i<len; i++) {
433                     mem = code[bp];
434                     bp += OPSize.MEMNUM;
435                     if (i > 0) {
436                         sb.append(", ");
437                     }
438                     sb.append(mem);
439                 }
440                 break;
441             }
442 
443             case OPCode.REPEAT:
444             case OPCode.REPEAT_NG:
445                 mem = code[bp];
446                 bp += OPSize.MEMNUM;
447                 addr = code[bp];
448                 bp += OPSize.RELADDR;
449                 sb.append(":").append(mem).append(":").append(addr);
450                 break;
451 
452             case OPCode.PUSH_OR_JUMP_EXACT1:
453             case OPCode.PUSH_IF_PEEK_NEXT:
454                 addr = code[bp];
455                 bp += OPSize.RELADDR;
456                 sb.append(":(").append(addr).append(")");
457                 pString(sb, 1, bp);
458                 bp++;
459                 break;
460 
461             case OPCode.LOOK_BEHIND:
462                 len = code[bp];
463                 bp += OPSize.LENGTH;
464                 sb.append(":").append(len);
465                 break;
466 
467             case OPCode.PUSH_LOOK_BEHIND_NOT:
468                 addr = code[bp];
469                 bp += OPSize.RELADDR;
470                 len = code[bp];
471                 bp += OPSize.LENGTH;
472                 sb.append(":").append(len).append(":(").append(addr).append(")");
473                 break;
474 
475             case OPCode.STATE_CHECK_PUSH:
476             case OPCode.STATE_CHECK_PUSH_OR_JUMP:
477                 scn = code[bp];
478                 bp += OPSize.STATE_CHECK_NUM;
479                 addr = code[bp];
480                 bp += OPSize.RELADDR;
481                 sb.append(":").append(scn).append(":(").append(addr).append(")");
482                 break;
483 
484             default:
485                 throw new InternalException("undefined code: " + code[--bp]);
486             }
487         }
488 
489         sb.append("]");
490 
491         // @opcode_address(opcode_size)
492         if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
493             sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
494         }
495 
496         return bp;
497     }
498 
compiledByteCodeListToString()499     private String compiledByteCodeListToString() {
500         final StringBuilder sb = new StringBuilder();
501         sb.append("code length: ").append(codeLength).append("\n");
502 
503         int ncode = 0;
504         int bp = 0;
505         final int end = codeLength;
506 
507         while (bp < end) {
508             ncode++;
509 
510             if (bp > 0) {
511                 sb.append(ncode % 5 == 0 ? "\n" : " ");
512             }
513 
514             bp = compiledByteCodeToString(sb, bp);
515         }
516         sb.append("\n");
517         return sb.toString();
518     }
519 }
520