1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
3  * this software and associated documentation files (the "Software"), to deal in
4  * the Software without restriction, including without limitation the rights to
5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6  * of the Software, and to permit persons to whom the Software is furnished to do
7  * so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in all
10  * copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18  * SOFTWARE.
19  */
20 package jdk.nashorn.internal.runtime.regexp.joni;
21 
22 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
23 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
24 
25 @SuppressWarnings("javadoc")
26 public final class CodeRangeBuffer implements Cloneable {
27     private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
28     private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
29 
30     int[] p;
31     int used;
32 
33 
CodeRangeBuffer()34     public CodeRangeBuffer() {
35         p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
36         writeCodePoint(0, 0);
37     }
38 
39     // CodeRange.isInCodeRange
isInCodeRange(final int code)40     public boolean isInCodeRange(final int code) {
41         int low = 0;
42         final int n = p[0];
43         int high = n;
44 
45         while (low < high) {
46             final int x = (low + high) >> 1;
47             if (code > p[(x << 1) + 2]) {
48                 low = x + 1;
49             } else {
50                 high = x;
51             }
52         }
53         return low < n && code >= p[(low << 1) + 1];
54     }
55 
CodeRangeBuffer(final CodeRangeBuffer orig)56     private CodeRangeBuffer(final CodeRangeBuffer orig) {
57         p = new int[orig.p.length];
58         System.arraycopy(orig.p, 0, p, 0, p.length);
59         used = orig.used;
60     }
61 
62     @Override
toString()63     public String toString() {
64         final StringBuilder buf = new StringBuilder();
65         buf.append("CodeRange");
66         buf.append("\n  used: ").append(used);
67         buf.append("\n  code point: ").append(p[0]);
68         buf.append("\n  ranges: ");
69 
70         for (int i=0; i<p[0]; i++) {
71             buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]");
72             if (i > 0 && i % 6 == 0) {
73                 buf.append("\n          ");
74             }
75         }
76 
77         return buf.toString();
78     }
79 
rangeNumToString(final int num)80     private static String rangeNumToString(final int num){
81         return "0x" + Integer.toString(num, 16);
82     }
83 
expand(final int low)84     public void expand(final int low) {
85         int length = p.length;
86         do { length <<= 1; } while (length < low);
87         final int[]tmp = new int[length];
88         System.arraycopy(p, 0, tmp, 0, used);
89         p = tmp;
90     }
91 
ensureSize(final int size)92     public void ensureSize(final int size) {
93         int length = p.length;
94         while (length < size ) { length <<= 1; }
95         if (p.length != length) {
96             final int[]tmp = new int[length];
97             System.arraycopy(p, 0, tmp, 0, used);
98             p = tmp;
99         }
100     }
101 
moveRight(final int from, final int to, final int n)102     private void moveRight(final int from, final int to, final int n) {
103         if (to + n > p.length) {
104             expand(to + n);
105         }
106         System.arraycopy(p, from, p, to, n);
107         if (to + n > used) {
108             used = to + n;
109         }
110     }
111 
moveLeft(final int from, final int to, final int n)112     protected void moveLeft(final int from, final int to, final int n) {
113         System.arraycopy(p, from, p, to, n);
114     }
115 
moveLeftAndReduce(final int from, final int to)116     private void moveLeftAndReduce(final int from, final int to) {
117         System.arraycopy(p, from, p, to, used - from);
118         used -= from - to;
119     }
120 
writeCodePoint(final int pos, final int b)121     public void writeCodePoint(final int pos, final int b) {
122         final int u = pos + 1;
123         if (p.length < u) {
124             expand(u);
125         }
126         p[pos] = b;
127         if (used < u) {
128             used = u;
129         }
130     }
131 
132     @Override
clone()133     public CodeRangeBuffer clone() {
134         return new CodeRangeBuffer(this);
135     }
136 
137     // ugly part: these methods should be made OO
138     // add_code_range_to_buf
addCodeRangeToBuff(final CodeRangeBuffer pbufp, final int fromp, final int top)139     public static CodeRangeBuffer addCodeRangeToBuff(final CodeRangeBuffer pbufp, final int fromp, final int top) {
140         int from = fromp, to = top;
141         CodeRangeBuffer pbuf = pbufp;
142 
143         if (from > to) {
144             final int n = from;
145             from = to;
146             to = n;
147         }
148 
149         if (pbuf == null) {
150             pbuf = new CodeRangeBuffer(); // move to CClassNode
151         }
152 
153         final int[]p = pbuf.p;
154         int n = p[0];
155 
156         int low = 0;
157         int bound = n;
158 
159         while (low < bound) {
160             final int x = (low + bound) >>> 1;
161             if (from > p[x * 2 + 2]) {
162                 low = x + 1;
163             } else {
164                 bound = x;
165             }
166         }
167 
168         int high = low;
169         bound = n;
170 
171         while (high < bound) {
172             final int x = (high + bound) >>> 1;
173             if (to >= p[x * 2 + 1] - 1) {
174                 high = x + 1;
175             } else {
176                 bound = x;
177             }
178         }
179 
180         final int incN = low + 1 - high;
181 
182         if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) {
183             throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
184         }
185 
186         if (incN != 1) {
187             if (from > p[low * 2 + 1]) {
188                 from = p[low * 2 + 1];
189             }
190             if (to < p[(high - 1) * 2 + 2]) {
191                 to = p[(high - 1) * 2 + 2];
192             }
193         }
194 
195         if (incN != 0 && high < n) {
196             final int fromPos = 1 + high * 2;
197             final int toPos = 1 + (low + 1) * 2;
198             final int size = (n - high) * 2;
199 
200             if (incN > 0) {
201                 pbuf.moveRight(fromPos, toPos, size);
202             } else {
203                 pbuf.moveLeftAndReduce(fromPos, toPos);
204             }
205         }
206 
207         final int pos = 1 + low * 2;
208         // pbuf.ensureSize(pos + 2);
209         pbuf.writeCodePoint(pos, from);
210         pbuf.writeCodePoint(pos + 1, to);
211         n += incN;
212         pbuf.writeCodePoint(0, n);
213 
214         return pbuf;
215     }
216 
217     // add_code_range, be aware of it returning null!
addCodeRange(final CodeRangeBuffer pbuf, final ScanEnvironment env, final int from, final int to)218     public static CodeRangeBuffer addCodeRange(final CodeRangeBuffer pbuf, final ScanEnvironment env, final int from, final int to) {
219         if (from > to) {
220             if (env.syntax.allowEmptyRangeInCC()) {
221                 return pbuf;
222             }
223             throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
224         }
225         return addCodeRangeToBuff(pbuf, from, to);
226     }
227 
228     // SET_ALL_MULTI_BYTE_RANGE
setAllMultiByteRange(final CodeRangeBuffer pbuf)229     protected static CodeRangeBuffer setAllMultiByteRange(final CodeRangeBuffer pbuf) {
230         return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
231     }
232 
233     // ADD_ALL_MULTI_BYTE_RANGE
addAllMultiByteRange(final CodeRangeBuffer pbuf)234     public static CodeRangeBuffer addAllMultiByteRange(final CodeRangeBuffer pbuf) {
235         return setAllMultiByteRange(pbuf);
236     }
237 
238     // not_code_range_buf
notCodeRangeBuff(final CodeRangeBuffer bbuf)239     public static CodeRangeBuffer notCodeRangeBuff(final CodeRangeBuffer bbuf) {
240         CodeRangeBuffer pbuf = null;
241 
242         if (bbuf == null) {
243             return setAllMultiByteRange(pbuf);
244         }
245 
246         final int[]p = bbuf.p;
247         final int n = p[0];
248 
249         if (n <= 0) {
250             return setAllMultiByteRange(pbuf);
251         }
252 
253         int pre = EncodingHelper.mbcodeStartPosition();
254 
255         int from;
256         int to = 0;
257         for (int i=0; i<n; i++) {
258             from = p[i * 2 + 1];
259             to = p[i * 2 + 2];
260             if (pre <= from - 1) {
261                 pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
262             }
263             if (to == ALL_MULTI_BYTE_RANGE) {
264                 break;
265             }
266             pre = to + 1;
267         }
268 
269         if (to < ALL_MULTI_BYTE_RANGE) {
270             pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
271         }
272         return pbuf;
273     }
274 
275     // or_code_range_buf
orCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p, final CodeRangeBuffer bbuf2p, final boolean not2p)276     public static CodeRangeBuffer orCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p,
277                                                   final CodeRangeBuffer bbuf2p, final boolean not2p) {
278         CodeRangeBuffer pbuf = null;
279         CodeRangeBuffer bbuf1 = bbuf1p;
280         CodeRangeBuffer bbuf2 = bbuf2p;
281         boolean not1 = not1p;
282         boolean not2 = not2p;
283 
284         if (bbuf1 == null && bbuf2 == null) {
285             if (not1 || not2) {
286                 return setAllMultiByteRange(pbuf);
287             }
288             return null;
289         }
290 
291         if (bbuf2 == null) {
292             CodeRangeBuffer tbuf;
293             boolean tnot;
294             // swap
295             tnot = not1; not1 = not2; not2 = tnot;
296             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
297         }
298 
299         if (bbuf1 == null) {
300             if (not1) {
301                 return setAllMultiByteRange(pbuf);
302             }
303             if (!not2) {
304                 return bbuf2.clone();
305             }
306             return notCodeRangeBuff(bbuf2);
307         }
308 
309         if (not1) {
310             CodeRangeBuffer tbuf;
311             boolean tnot;
312             // swap
313             tnot = not1; not1 = not2; not2 = tnot;
314             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
315         }
316 
317         if (!not2 && !not1) { /* 1 OR 2 */
318             pbuf = bbuf2.clone();
319         } else if (!not1) { /* 1 OR (not 2) */
320             pbuf = notCodeRangeBuff(bbuf2);
321         }
322 
323         final int[]p1 = bbuf1.p;
324         final int n1 = p1[0];
325 
326         for (int i=0; i<n1; i++) {
327             final int from = p1[i * 2 + 1];
328             final int to = p1[i * 2 + 2];
329             pbuf = addCodeRangeToBuff(pbuf, from, to);
330         }
331 
332         return pbuf;
333     }
334 
335     // and_code_range1
andCodeRange1(final CodeRangeBuffer pbufp, final int from1p, final int to1p, final int[]data, final int n)336     public static CodeRangeBuffer andCodeRange1(final CodeRangeBuffer pbufp, final int from1p, final int to1p, final int[]data, final int n) {
337         CodeRangeBuffer pbuf = pbufp;
338         int from1 = from1p, to1 = to1p;
339 
340         for (int i=0; i<n; i++) {
341             final int from2 = data[i * 2 + 1];
342             final int to2 = data[i * 2 + 2];
343             if (from2 < from1) {
344                 if (to2 < from1) {
345                     continue;
346                 }
347                 from1 = to2 + 1;
348             } else if (from2 <= to1) {
349                 if (to2 < to1) {
350                     if (from1 <= from2 - 1) {
351                         pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
352                     }
353                     from1 = to2 + 1;
354                 } else {
355                     to1 = from2 - 1;
356                 }
357             } else {
358                 from1 = from2;
359             }
360             if (from1 > to1) {
361                 break;
362             }
363         }
364 
365         if (from1 <= to1) {
366             pbuf = addCodeRangeToBuff(pbuf, from1, to1);
367         }
368 
369         return pbuf;
370     }
371 
372     // and_code_range_buf
andCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p, final CodeRangeBuffer bbuf2p, final boolean not2p)373     public static CodeRangeBuffer andCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p,
374                                                    final CodeRangeBuffer bbuf2p, final boolean not2p) {
375         CodeRangeBuffer pbuf = null;
376         CodeRangeBuffer bbuf1 = bbuf1p;
377         CodeRangeBuffer bbuf2 = bbuf2p;
378         boolean not1 = not1p, not2 = not2p;
379 
380         if (bbuf1 == null) {
381             if (not1 && bbuf2 != null) {
382                 return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
383             }
384             return null;
385         } else if (bbuf2 == null) {
386             if (not2) {
387                 return bbuf1.clone();
388             }
389             return null;
390         }
391 
392         if (not1) {
393             CodeRangeBuffer tbuf;
394             boolean tnot;
395             // swap
396             tnot = not1; not1 = not2; not2 = tnot;
397             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
398         }
399 
400         final int[]p1 = bbuf1.p;
401         final int n1 = p1[0];
402         final int[]p2 = bbuf2.p;
403         final int n2 = p2[0];
404 
405         if (!not2 && !not1) { /* 1 AND 2 */
406             for (int i=0; i<n1; i++) {
407                 final int from1 = p1[i * 2 + 1];
408                 final int to1 = p1[i * 2 + 2];
409 
410                 for (int j=0; j<n2; j++) {
411                     final int from2 = p2[j * 2 + 1];
412                     final int to2 = p2[j * 2 + 2];
413 
414                     if (from2 > to1) {
415                         break;
416                     }
417                     if (to2 < from1) {
418                         continue;
419                     }
420                     final int from = from1 > from2 ? from1 : from2;
421                     final int to = to1 < to2 ? to1 : to2;
422                     pbuf = addCodeRangeToBuff(pbuf, from, to);
423                 }
424             }
425         } else if (!not1) { /* 1 AND (not 2) */
426             for (int i=0; i<n1; i++) {
427                 final int from1 = p1[i * 2 + 1];
428                 final int to1 = p1[i * 2 + 2];
429                 pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
430             }
431         }
432 
433         return pbuf;
434     }
435 }
436