1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
3  * this software and associated documentation files (the "Software"), to deal in
4  * the Software without restriction, including without limitation the rights to
5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6  * of the Software, and to permit persons to whom the Software is furnished to do
7  * so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in all
10  * copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18  * SOFTWARE.
19  */
20 package jdk.nashorn.internal.runtime.regexp.joni.ast;
21 
22 import jdk.nashorn.internal.runtime.regexp.joni.BitSet;
23 import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer;
24 import jdk.nashorn.internal.runtime.regexp.joni.Config;
25 import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
26 import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
27 import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
28 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
29 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
30 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
31 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
32 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
33 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
34 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
35 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
36 
37 @SuppressWarnings("javadoc")
38 public final class CClassNode extends Node {
39     private static final int FLAG_NCCLASS_NOT = 1<<0;
40     private static final int FLAG_NCCLASS_SHARE = 1<<1;
41 
42     int flags;
43     public final BitSet bs = new BitSet();  // conditional creation ?
44     public CodeRangeBuffer mbuf;            /* multi-byte info or NULL */
45 
46     private int ctype;                      // for hashing purposes
47 
48     private final static short AsciiCtypeTable[] = {
49             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
50             0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
51             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
52             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
53             0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
54             0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
55             0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
56             0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
57             0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
58             0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
59             0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
60             0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
61             0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
62             0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
63             0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
64             0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
65             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
66             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
67             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
68             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
69             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
70             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
71             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
72             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
73             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
74             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
75             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
76             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
77             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
78             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
79             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
80             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
81     };
82 
83     // node_new_cclass
CClassNode()84     public CClassNode() {}
85 
clear()86     public void clear() {
87         bs.clear();
88         flags = 0;
89         mbuf = null;
90     }
91 
92     @Override
getType()93     public int getType() {
94         return CCLASS;
95     }
96 
97     @Override
getName()98     public String getName() {
99         return "Character Class";
100     }
101 
102     @Override
equals(final Object other)103     public boolean equals(final Object other) {
104         if (!(other instanceof CClassNode)) {
105             return false;
106         }
107         final CClassNode cc = (CClassNode)other;
108         return ctype == cc.ctype && isNot() == cc.isNot();
109     }
110 
111     @Override
hashCode()112     public int hashCode() {
113         if (Config.USE_SHARED_CCLASS_TABLE) {
114             int hash = 0;
115             hash += ctype;
116             if (isNot()) {
117                 hash++;
118             }
119             return hash + (hash >> 5);
120         }
121         return super.hashCode();
122     }
123 
124     @Override
toString(final int level)125     public String toString(final int level) {
126         final StringBuilder value = new StringBuilder();
127         value.append("\n  flags: " + flagsToString());
128         value.append("\n  bs: " + pad(bs, level + 1));
129         value.append("\n  mbuf: " + pad(mbuf, level + 1));
130 
131         return value.toString();
132     }
133 
flagsToString()134     public String flagsToString() {
135         final StringBuilder f = new StringBuilder();
136         if (isNot()) {
137             f.append("NOT ");
138         }
139         if (isShare()) {
140             f.append("SHARE ");
141         }
142         return f.toString();
143     }
144 
isEmpty()145     public boolean isEmpty() {
146         return mbuf == null && bs.isEmpty();
147     }
148 
addCodeRangeToBuf(final int from, final int to)149     public void addCodeRangeToBuf(final int from, final int to) {
150         mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
151     }
152 
addCodeRange(final ScanEnvironment env, final int from, final int to)153     public void addCodeRange(final ScanEnvironment env, final int from, final int to) {
154         mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
155     }
156 
addAllMultiByteRange()157     public void addAllMultiByteRange() {
158         mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
159     }
160 
clearNotFlag()161     public void clearNotFlag() {
162         if (isNot()) {
163             bs.invert();
164 
165             mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
166             clearNot();
167         }
168     }
169 
170     // and_cclass
and(final CClassNode other)171     public void and(final CClassNode other) {
172         final boolean not1 = isNot();
173         BitSet bsr1 = bs;
174         final CodeRangeBuffer buf1 = mbuf;
175         final boolean not2 = other.isNot();
176         BitSet bsr2 = other.bs;
177         final CodeRangeBuffer buf2 = other.mbuf;
178 
179         if (not1) {
180             final BitSet bs1 = new BitSet();
181             bsr1.invertTo(bs1);
182             bsr1 = bs1;
183         }
184 
185         if (not2) {
186             final BitSet bs2 = new BitSet();
187             bsr2.invertTo(bs2);
188             bsr2 = bs2;
189         }
190 
191         bsr1.and(bsr2);
192 
193         if (bsr1 != bs) {
194             bs.copy(bsr1);
195             bsr1 = bs;
196         }
197 
198         if (not1) {
199             bs.invert();
200         }
201 
202         CodeRangeBuffer pbuf = null;
203 
204         if (not1 && not2) {
205             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
206         } else {
207             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
208 
209             if (not1) {
210                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
211             }
212         }
213         mbuf = pbuf;
214 
215     }
216 
217     // or_cclass
or(final CClassNode other)218     public void or(final CClassNode other) {
219         final boolean not1 = isNot();
220         BitSet bsr1 = bs;
221         final CodeRangeBuffer buf1 = mbuf;
222         final boolean not2 = other.isNot();
223         BitSet bsr2 = other.bs;
224         final CodeRangeBuffer buf2 = other.mbuf;
225 
226         if (not1) {
227             final BitSet bs1 = new BitSet();
228             bsr1.invertTo(bs1);
229             bsr1 = bs1;
230         }
231 
232         if (not2) {
233             final BitSet bs2 = new BitSet();
234             bsr2.invertTo(bs2);
235             bsr2 = bs2;
236         }
237 
238         bsr1.or(bsr2);
239 
240         if (bsr1 != bs) {
241             bs.copy(bsr1);
242             bsr1 = bs;
243         }
244 
245         if (not1) {
246             bs.invert();
247         }
248 
249         CodeRangeBuffer pbuf = null;
250         if (not1 && not2) {
251             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
252         } else {
253             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
254             if (not1) {
255                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
256             }
257         }
258         mbuf = pbuf;
259     }
260 
261     // add_ctype_to_cc_by_range // Encoding out!
addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[])262     public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) {
263         final int n = mbr[0];
264 
265         if (!not) {
266             for (int i=0; i<n; i++) {
267                 for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
268                     if (j >= sbOut) {
269                         if (Config.VANILLA) {
270                             if (j == mbr[i * 2 + 2]) {
271                                 i++;
272                             } else if (j > mbr[i * 2 + 1]) {
273                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
274                                 i++;
275                             }
276                         } else {
277                             if (j >= mbr[i * 2 + 1]) {
278                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
279                                 i++;
280                             }
281                         }
282                         // !goto sb_end!, remove duplication!
283                         for (; i<n; i++) {
284                             addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
285                         }
286                         return;
287                     }
288                     bs.set(j);
289                 }
290             }
291             // !sb_end:!
292             for (int i=0; i<n; i++) {
293                 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
294             }
295 
296         } else {
297             int prev = 0;
298 
299             for (int i=0; i<n; i++) {
300                 for (int j=prev; j < mbr[2 * i + 1]; j++) {
301                     if (j >= sbOut) {
302                         // !goto sb_end2!, remove duplication
303                         prev = sbOut;
304                         for (i=0; i<n; i++) {
305                             if (prev < mbr[2 * i + 1]) {
306                                 addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
307                             }
308                             prev = mbr[i * 2 + 2] + 1;
309                         }
310                         if (prev < 0x7fffffff/*!!!*/) {
311                             addCodeRangeToBuf(prev, 0x7fffffff);
312                         }
313                         return;
314                     }
315                     bs.set(j);
316                 }
317                 prev = mbr[2 * i + 2] + 1;
318             }
319 
320             for (int j=prev; j<sbOut; j++) {
321                 bs.set(j);
322             }
323 
324             // !sb_end2:!
325             prev = sbOut;
326             for (int i=0; i<n; i++) {
327                 if (prev < mbr[2 * i + 1]) {
328                     addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
329                 }
330                 prev = mbr[i * 2 + 2] + 1;
331             }
332             if (prev < 0x7fffffff/*!!!*/) {
333                 addCodeRangeToBuf(prev, 0x7fffffff);
334             }
335         }
336     }
337 
addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut)338     public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) {
339         int ct = ctp;
340         if (Config.NON_UNICODE_SDW) {
341             switch (ct) {
342             case CharacterType.D:
343             case CharacterType.S:
344             case CharacterType.W:
345                 ct ^= CharacterType.SPECIAL_MASK;
346 
347                 if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) {
348                     // \s in JavaScript includes unicode characters.
349                     break;
350                 }
351 
352                 if (not) {
353                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
354                         // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
355                         if ((AsciiCtypeTable[c] & (1 << ct)) == 0) {
356                             bs.set(c);
357                         }
358                     }
359                     addAllMultiByteRange();
360                 } else {
361                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
362                         // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
363                         if ((AsciiCtypeTable[c] & (1 << ct)) != 0) {
364                             bs.set(c);
365                         }
366                     }
367                 }
368                 return;
369             default:
370                 break;
371             }
372         }
373 
374         final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut);
375         if (ranges != null) {
376             addCTypeByRange(ct, not, sbOut.value, ranges);
377             return;
378         }
379 
380         switch(ct) {
381         case CharacterType.ALPHA:
382         case CharacterType.BLANK:
383         case CharacterType.CNTRL:
384         case CharacterType.DIGIT:
385         case CharacterType.LOWER:
386         case CharacterType.PUNCT:
387         case CharacterType.SPACE:
388         case CharacterType.UPPER:
389         case CharacterType.XDIGIT:
390         case CharacterType.ASCII:
391         case CharacterType.ALNUM:
392             if (not) {
393                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
394                     if (!EncodingHelper.isCodeCType(c, ct)) {
395                         bs.set(c);
396                     }
397                 }
398                 addAllMultiByteRange();
399             } else {
400                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
401                     if (EncodingHelper.isCodeCType(c, ct)) {
402                         bs.set(c);
403                     }
404                 }
405             }
406             break;
407 
408         case CharacterType.GRAPH:
409         case CharacterType.PRINT:
410             if (not) {
411                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
412                     if (!EncodingHelper.isCodeCType(c, ct)) {
413                         bs.set(c);
414                     }
415                 }
416             } else {
417                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
418                     if (EncodingHelper.isCodeCType(c, ct)) {
419                         bs.set(c);
420                     }
421                 }
422                 addAllMultiByteRange();
423             }
424             break;
425 
426         case CharacterType.WORD:
427             if (!not) {
428                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
429                     if (EncodingHelper.isWord(c)) {
430                         bs.set(c);
431                     }
432                 }
433 
434                 addAllMultiByteRange();
435             } else {
436                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
437                     if (!EncodingHelper.isWord(c)) {
438                         bs.set(c);
439                     }
440                 }
441             }
442             break;
443 
444         default:
445             throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
446         } // switch
447     }
448 
449     public static final class CCStateArg {
450         public int v;
451         public int vs;
452         public boolean vsIsRaw;
453         public boolean vIsRaw;
454         public CCVALTYPE inType;
455         public CCVALTYPE type;
456         public CCSTATE state;
457     }
458 
nextStateClass(final CCStateArg arg, final ScanEnvironment env)459     public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) {
460         if (arg.state == CCSTATE.RANGE) {
461             throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
462         }
463 
464         if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
465             if (arg.type == CCVALTYPE.SB) {
466                 bs.set(arg.vs);
467             } else if (arg.type == CCVALTYPE.CODE_POINT) {
468                 addCodeRange(env, arg.vs, arg.vs);
469             }
470         }
471         arg.state = CCSTATE.VALUE;
472         arg.type = CCVALTYPE.CLASS;
473     }
474 
nextStateValue(final CCStateArg arg, final ScanEnvironment env)475     public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) {
476 
477         switch(arg.state) {
478         case VALUE:
479             if (arg.type == CCVALTYPE.SB) {
480                 if (arg.vs > 0xff) {
481                     throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
482                 }
483                 bs.set(arg.vs);
484             } else if (arg.type == CCVALTYPE.CODE_POINT) {
485                 addCodeRange(env, arg.vs, arg.vs);
486             }
487             break;
488 
489         case RANGE:
490             if (arg.inType == arg.type) {
491                 if (arg.inType == CCVALTYPE.SB) {
492                     if (arg.vs > 0xff || arg.v > 0xff) {
493                         throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
494                     }
495 
496                     if (arg.vs > arg.v) {
497                         if (env.syntax.allowEmptyRangeInCC()) {
498                             // goto ccs_range_end
499                             arg.state = CCSTATE.COMPLETE;
500                             break;
501                         }
502                         throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
503                     }
504                     bs.setRange(arg.vs, arg.v);
505                 } else {
506                     addCodeRange(env, arg.vs, arg.v);
507                 }
508             } else {
509                 if (arg.vs > arg.v) {
510                     if (env.syntax.allowEmptyRangeInCC()) {
511                         // goto ccs_range_end
512                         arg.state = CCSTATE.COMPLETE;
513                         break;
514                     }
515                     throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
516                 }
517                 bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
518                 addCodeRange(env, arg.vs, arg.v);
519             }
520             // ccs_range_end:
521             arg.state = CCSTATE.COMPLETE;
522             break;
523 
524         case COMPLETE:
525         case START:
526             arg.state = CCSTATE.VALUE;
527             break;
528 
529         default:
530             break;
531 
532         } // switch
533 
534         arg.vsIsRaw = arg.vIsRaw;
535         arg.vs = arg.v;
536         arg.type = arg.inType;
537     }
538 
539     // onig_is_code_in_cc_len
isCodeInCCLength(final int code)540     public boolean isCodeInCCLength(final int code) {
541         boolean found;
542 
543         if (code > 0xff) {
544             found = mbuf != null && mbuf.isInCodeRange(code);
545         } else {
546             found = bs.at(code);
547         }
548 
549         if (isNot()) {
550             return !found;
551         }
552         return found;
553     }
554 
555     // onig_is_code_in_cc
isCodeInCC(final int code)556     public boolean isCodeInCC(final int code) {
557          return isCodeInCCLength(code);
558     }
559 
setNot()560     public void setNot() {
561         flags |= FLAG_NCCLASS_NOT;
562     }
563 
clearNot()564     public void clearNot() {
565         flags &= ~FLAG_NCCLASS_NOT;
566     }
567 
isNot()568     public boolean isNot() {
569         return (flags & FLAG_NCCLASS_NOT) != 0;
570     }
571 
setShare()572     public void setShare() {
573         flags |= FLAG_NCCLASS_SHARE;
574     }
575 
clearShare()576     public void clearShare() {
577         flags &= ~FLAG_NCCLASS_SHARE;
578     }
579 
isShare()580     public boolean isShare() {
581         return (flags & FLAG_NCCLASS_SHARE) != 0;
582     }
583 
584 }
585