1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
3  * this software and associated documentation files (the "Software"), to deal in
4  * the Software without restriction, including without limitation the rights to
5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6  * of the Software, and to permit persons to whom the Software is furnished to do
7  * so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in all
10  * copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18  * SOFTWARE.
19  */
20 package jdk.nashorn.internal.runtime.regexp.joni;
21 
22 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
23 import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
24 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
25 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
26 
27 @SuppressWarnings("javadoc")
28 public final class Regex implements RegexState {
29 
30     int[] code;             /* compiled pattern */
31     int codeLength;
32     boolean stackNeeded;
33     Object[] operands;       /* e.g. shared CClassNode */
34     int operandLength;
35 
36     int numMem;             /* used memory(...) num counted from 1 */
37     int numRepeat;          /* OP_REPEAT/OP_REPEAT_NG id-counter */
38     int numNullCheck;       /* OP_NULL_CHECK_START/END id counter */
39     int captureHistory;     /* (?@...) flag (1-31) */
40     int btMemStart;         /* need backtrack flag */
41     int btMemEnd;           /* need backtrack flag */
42 
43     int stackPopLevel;
44 
45     int[] repeatRangeLo;
46     int[] repeatRangeHi;
47 
48     WarnCallback warnings;
49     MatcherFactory factory;
50     protected Analyser analyser;
51 
52     int options;
53     final int caseFoldFlag;
54 
55     /* optimization info (string search, char-map and anchors) */
56     SearchAlgorithm searchAlgorithm;        /* optimize flag */
57     int thresholdLength;                    /* search str-length for apply optimize */
58     int anchor;                             /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
59     int anchorDmin;                         /* (SEMI_)END_BUF anchor distance */
60     int anchorDmax;                         /* (SEMI_)END_BUF anchor distance */
61     int subAnchor;                          /* start-anchor for exact or map */
62 
63     char[] exact;
64     int exactP;
65     int exactEnd;
66 
67     byte[] map;                              /* used as BM skip or char-map */
68     int[] intMap;                            /* BM skip for exact_len > 255 */
69     int[] intMapBackward;                    /* BM skip for backward search */
70     int dMin;                               /* min-distance of exact or map */
71     int dMax;                               /* max-distance of exact or map */
72 
73     char[][] templates;
74     int templateNum;
75 
Regex(final CharSequence cs)76     public Regex(final CharSequence cs) {
77         this(cs.toString());
78     }
79 
Regex(final String str)80     public Regex(final String str) {
81         this(str.toCharArray(), 0, str.length(), 0);
82     }
83 
Regex(final char[] chars)84     public Regex(final char[] chars) {
85         this(chars, 0, chars.length, 0);
86     }
87 
Regex(final char[] chars, final int p, final int end)88     public Regex(final char[] chars, final int p, final int end) {
89         this(chars, p, end, 0);
90     }
91 
Regex(final char[] chars, final int p, final int end, final int option)92     public Regex(final char[] chars, final int p, final int end, final int option) {
93         this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
94     }
95 
96     // onig_new
Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax)97     public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) {
98         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
99     }
100 
Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings)101     public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) {
102         this(chars, p, end, option, Syntax.RUBY, warnings);
103     }
104 
105     // onig_new
Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings)106     public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) {
107         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
108     }
109 
110     // onig_alloc_init
Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings)111     public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) {
112         int option = optionp;
113 
114         if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
115             (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
116             throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
117         }
118 
119         if ((option & Option.NEGATE_SINGLELINE) != 0) {
120             option |= syntax.options;
121             option &= ~Option.SINGLELINE;
122         } else {
123             option |= syntax.options;
124         }
125 
126         this.options = option;
127         this.caseFoldFlag = caseFoldFlag;
128         this.warnings = warnings;
129 
130         this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end);
131         this.analyser.compile();
132 
133         this.warnings = null;
134     }
135 
compile()136     public synchronized MatcherFactory compile() {
137         if (factory == null && analyser != null) {
138             new ArrayCompiler(analyser).compile();
139             analyser = null; // only do this once
140         }
141         assert factory != null;
142         return factory;
143     }
144 
matcher(final char[] chars)145     public Matcher matcher(final char[] chars) {
146         return matcher(chars, 0, chars.length);
147     }
148 
matcher(final char[] chars, final int p, final int end)149     public Matcher matcher(final char[] chars, final int p, final int end) {
150         MatcherFactory matcherFactory = factory;
151         if (matcherFactory == null) {
152             matcherFactory = compile();
153         }
154         return matcherFactory.create(this, chars, p, end);
155     }
156 
getWarnings()157     public WarnCallback getWarnings() {
158         return warnings;
159     }
160 
numberOfCaptures()161     public int numberOfCaptures() {
162         return numMem;
163     }
164 
165     /* set skip map for Boyer-Moor search */
setupBMSkipMap()166     void setupBMSkipMap() {
167         final char[] chars = exact;
168         final int p = exactP;
169         final int end = exactEnd;
170         final int len = end - p;
171 
172         if (len < Config.CHAR_TABLE_SIZE) {
173             // map/skip
174             if (map == null) {
175                 map = new byte[Config.CHAR_TABLE_SIZE];
176             }
177 
178             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
179                 map[i] = (byte)len;
180             }
181             for (int i=0; i<len-1; i++)
182              {
183                 map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
184             }
185         } else {
186             if (intMap == null) {
187                 intMap = new int[Config.CHAR_TABLE_SIZE];
188             }
189 
190             for (int i=0; i<len-1; i++)
191              {
192                 intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
193             }
194         }
195     }
196 
setExactInfo(final OptExactInfo e)197     void setExactInfo(final OptExactInfo e) {
198         if (e.length == 0) {
199             return;
200         }
201 
202         // shall we copy that ?
203         exact = e.chars;
204         exactP = 0;
205         exactEnd = e.length;
206 
207         if (e.ignoreCase) {
208             searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
209         } else {
210             if (e.length >= 2) {
211                 setupBMSkipMap();
212                 searchAlgorithm = SearchAlgorithm.BM;
213             } else {
214                 searchAlgorithm = SearchAlgorithm.SLOW;
215             }
216         }
217 
218         dMin = e.mmd.min;
219         dMax = e.mmd.max;
220 
221         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
222             thresholdLength = dMin + (exactEnd - exactP);
223         }
224     }
225 
setOptimizeMapInfo(final OptMapInfo m)226     void setOptimizeMapInfo(final OptMapInfo m) {
227         map = m.map;
228 
229         searchAlgorithm = SearchAlgorithm.MAP;
230         dMin = m.mmd.min;
231         dMax = m.mmd.max;
232 
233         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
234             thresholdLength = dMin + 1;
235         }
236     }
237 
setSubAnchor(final OptAnchorInfo anc)238     void setSubAnchor(final OptAnchorInfo anc) {
239         subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
240         subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
241     }
242 
clearOptimizeInfo()243     void clearOptimizeInfo() {
244         searchAlgorithm = SearchAlgorithm.NONE;
245         anchor = 0;
246         anchorDmax = 0;
247         anchorDmin = 0;
248         subAnchor = 0;
249 
250         exact = null;
251         exactP = exactEnd = 0;
252     }
253 
optimizeInfoToString()254     public String optimizeInfoToString() {
255         final StringBuilder s = new StringBuilder();
256         s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
257         s.append("  anchor:     ").append(OptAnchorInfo.anchorToString(anchor));
258 
259         if ((anchor & AnchorType.END_BUF_MASK) != 0) {
260             s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
261         }
262 
263         s.append("\n");
264 
265         if (searchAlgorithm != SearchAlgorithm.NONE) {
266             s.append("  sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
267         }
268 
269         s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
270         s.append("threshold length: ").append(thresholdLength).append("\n");
271 
272         if (exact != null) {
273             s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
274         } else if (searchAlgorithm == SearchAlgorithm.MAP) {
275             int n=0;
276             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
277                 if (map[i] != 0) {
278                     n++;
279                 }
280             }
281 
282             s.append("map: n = ").append(n).append("\n");
283             if (n > 0) {
284                 int c=0;
285                 s.append("[");
286                 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
287                     if (map[i] != 0) {
288                         if (c > 0) {
289                             s.append(", ");
290                         }
291                         c++;
292                         // TODO if (enc.isPrint(i)
293                         s.append((char)i);
294                     }
295                 }
296                 s.append("]\n");
297             }
298         }
299 
300         return s.toString();
301     }
302 
getOptions()303     public int getOptions() {
304         return options;
305     }
306 
dumpTree()307     public String dumpTree() {
308         return analyser == null ? null : analyser.root.toString();
309     }
310 
dumpByteCode()311     public String dumpByteCode() {
312         compile();
313         return new ByteCodePrinter(this).byteCodeListToString();
314     }
315 
316 }
317