1c2c66affSColin Finck /*
2c2c66affSColin Finck * Copyright 2008 Jacek Caban for CodeWeavers
3c2c66affSColin Finck *
4c2c66affSColin Finck * This library is free software; you can redistribute it and/or
5c2c66affSColin Finck * modify it under the terms of the GNU Lesser General Public
6c2c66affSColin Finck * License as published by the Free Software Foundation; either
7c2c66affSColin Finck * version 2.1 of the License, or (at your option) any later version.
8c2c66affSColin Finck *
9c2c66affSColin Finck * This library is distributed in the hope that it will be useful,
10c2c66affSColin Finck * but WITHOUT ANY WARRANTY; without even the implied warranty of
11c2c66affSColin Finck * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12c2c66affSColin Finck * Lesser General Public License for more details.
13c2c66affSColin Finck *
14c2c66affSColin Finck * You should have received a copy of the GNU Lesser General Public
15c2c66affSColin Finck * License along with this library; if not, write to the Free Software
16c2c66affSColin Finck * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17c2c66affSColin Finck */
18c2c66affSColin Finck
19c2c66affSColin Finck /*
20c2c66affSColin Finck * Code in this file is based on files:
21c2c66affSColin Finck * js/src/jsregexp.h
22c2c66affSColin Finck * js/src/jsregexp.c
23c2c66affSColin Finck * from Mozilla project, released under LGPL 2.1 or later.
24c2c66affSColin Finck *
25c2c66affSColin Finck * The Original Code is Mozilla Communicator client code, released
26c2c66affSColin Finck * March 31, 1998.
27c2c66affSColin Finck *
28c2c66affSColin Finck * The Initial Developer of the Original Code is
29c2c66affSColin Finck * Netscape Communications Corporation.
30c2c66affSColin Finck * Portions created by the Initial Developer are Copyright (C) 1998
31c2c66affSColin Finck * the Initial Developer. All Rights Reserved.
32c2c66affSColin Finck */
33c2c66affSColin Finck
348dba275bSAmine Khaldi #include <assert.h>
358dba275bSAmine Khaldi
36c2c66affSColin Finck #include "jscript.h"
378dba275bSAmine Khaldi #include "regexp.h"
388dba275bSAmine Khaldi
398dba275bSAmine Khaldi #include "wine/debug.h"
408dba275bSAmine Khaldi
418dba275bSAmine Khaldi WINE_DEFAULT_DEBUG_CHANNEL(jscript);
42c2c66affSColin Finck
43c2c66affSColin Finck /* FIXME: Better error handling */
44c2c66affSColin Finck #define ReportRegExpError(a,b,c)
45c2c66affSColin Finck #define ReportRegExpErrorHelper(a,b,c,d)
46c2c66affSColin Finck #define JS_ReportErrorNumber(a,b,c,d)
47c2c66affSColin Finck #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
48c2c66affSColin Finck #define js_ReportOutOfScriptQuota(a)
49c2c66affSColin Finck #define JS_ReportOutOfMemory(a)
50c2c66affSColin Finck #define JS_COUNT_OPERATION(a,b)
51c2c66affSColin Finck
52c2c66affSColin Finck
53c2c66affSColin Finck typedef BYTE JSPackedBool;
54c2c66affSColin Finck
55c2c66affSColin Finck /*
56c2c66affSColin Finck * This struct holds a bitmap representation of a class from a regexp.
57c2c66affSColin Finck * There's a list of these referenced by the classList field in the regexp_t
58c2c66affSColin Finck * struct below. The initial state has startIndex set to the offset in the
59c2c66affSColin Finck * original regexp source of the beginning of the class contents. The first
60c2c66affSColin Finck * use of the class converts the source representation into a bitmap.
61c2c66affSColin Finck *
62c2c66affSColin Finck */
63c2c66affSColin Finck typedef struct RECharSet {
64c2c66affSColin Finck JSPackedBool converted;
65c2c66affSColin Finck JSPackedBool sense;
66c2c66affSColin Finck WORD length;
67c2c66affSColin Finck union {
68c2c66affSColin Finck BYTE *bits;
69c2c66affSColin Finck struct {
70c2c66affSColin Finck size_t startIndex;
71c2c66affSColin Finck size_t length;
72c2c66affSColin Finck } src;
73c2c66affSColin Finck } u;
74c2c66affSColin Finck } RECharSet;
75c2c66affSColin Finck
76c2c66affSColin Finck #define JSMSG_MIN_TOO_BIG 47
77c2c66affSColin Finck #define JSMSG_MAX_TOO_BIG 48
78c2c66affSColin Finck #define JSMSG_OUT_OF_ORDER 49
79c2c66affSColin Finck #define JSMSG_OUT_OF_MEMORY 137
80c2c66affSColin Finck
81c2c66affSColin Finck #define LINE_SEPARATOR 0x2028
82c2c66affSColin Finck #define PARA_SEPARATOR 0x2029
83c2c66affSColin Finck
84c2c66affSColin Finck #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
85c2c66affSColin Finck ((c >= 'a') && (c <= 'z')) )
86c2c66affSColin Finck #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
87c2c66affSColin Finck (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
88c2c66affSColin Finck
89c2c66affSColin Finck #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
90c2c66affSColin Finck
91c2c66affSColin Finck #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
92c2c66affSColin Finck #define JS7_UNDEC(c) ((c) - '0')
93c2c66affSColin Finck
94c2c66affSColin Finck typedef enum REOp {
95c2c66affSColin Finck REOP_EMPTY,
96c2c66affSColin Finck REOP_BOL,
97c2c66affSColin Finck REOP_EOL,
98c2c66affSColin Finck REOP_WBDRY,
99c2c66affSColin Finck REOP_WNONBDRY,
100c2c66affSColin Finck REOP_DOT,
101c2c66affSColin Finck REOP_DIGIT,
102c2c66affSColin Finck REOP_NONDIGIT,
103c2c66affSColin Finck REOP_ALNUM,
104c2c66affSColin Finck REOP_NONALNUM,
105c2c66affSColin Finck REOP_SPACE,
106c2c66affSColin Finck REOP_NONSPACE,
107c2c66affSColin Finck REOP_BACKREF,
108c2c66affSColin Finck REOP_FLAT,
109c2c66affSColin Finck REOP_FLAT1,
110c2c66affSColin Finck REOP_FLATi,
111c2c66affSColin Finck REOP_FLAT1i,
112c2c66affSColin Finck REOP_UCFLAT1,
113c2c66affSColin Finck REOP_UCFLAT1i,
114c2c66affSColin Finck REOP_UCFLAT,
115c2c66affSColin Finck REOP_UCFLATi,
116c2c66affSColin Finck REOP_CLASS,
117c2c66affSColin Finck REOP_NCLASS,
118c2c66affSColin Finck REOP_ALT,
119c2c66affSColin Finck REOP_QUANT,
120c2c66affSColin Finck REOP_STAR,
121c2c66affSColin Finck REOP_PLUS,
122c2c66affSColin Finck REOP_OPT,
123c2c66affSColin Finck REOP_LPAREN,
124c2c66affSColin Finck REOP_RPAREN,
125c2c66affSColin Finck REOP_JUMP,
126c2c66affSColin Finck REOP_DOTSTAR,
127c2c66affSColin Finck REOP_LPARENNON,
128c2c66affSColin Finck REOP_ASSERT,
129c2c66affSColin Finck REOP_ASSERT_NOT,
130c2c66affSColin Finck REOP_ASSERTTEST,
131c2c66affSColin Finck REOP_ASSERTNOTTEST,
132c2c66affSColin Finck REOP_MINIMALSTAR,
133c2c66affSColin Finck REOP_MINIMALPLUS,
134c2c66affSColin Finck REOP_MINIMALOPT,
135c2c66affSColin Finck REOP_MINIMALQUANT,
136c2c66affSColin Finck REOP_ENDCHILD,
137c2c66affSColin Finck REOP_REPEAT,
138c2c66affSColin Finck REOP_MINIMALREPEAT,
139c2c66affSColin Finck REOP_ALTPREREQ,
140c2c66affSColin Finck REOP_ALTPREREQ2,
141c2c66affSColin Finck REOP_ENDALT,
142c2c66affSColin Finck REOP_CONCAT,
143c2c66affSColin Finck REOP_END,
144c2c66affSColin Finck REOP_LIMIT /* META: no operator >= to this */
145c2c66affSColin Finck } REOp;
146c2c66affSColin Finck
147c2c66affSColin Finck #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
148c2c66affSColin Finck
149c2c66affSColin Finck static const char *reop_names[] = {
150c2c66affSColin Finck "empty",
151c2c66affSColin Finck "bol",
152c2c66affSColin Finck "eol",
153c2c66affSColin Finck "wbdry",
154c2c66affSColin Finck "wnonbdry",
155c2c66affSColin Finck "dot",
156c2c66affSColin Finck "digit",
157c2c66affSColin Finck "nondigit",
158c2c66affSColin Finck "alnum",
159c2c66affSColin Finck "nonalnum",
160c2c66affSColin Finck "space",
161c2c66affSColin Finck "nonspace",
162c2c66affSColin Finck "backref",
163c2c66affSColin Finck "flat",
164c2c66affSColin Finck "flat1",
165c2c66affSColin Finck "flati",
166c2c66affSColin Finck "flat1i",
167c2c66affSColin Finck "ucflat1",
168c2c66affSColin Finck "ucflat1i",
169c2c66affSColin Finck "ucflat",
170c2c66affSColin Finck "ucflati",
171c2c66affSColin Finck "class",
172c2c66affSColin Finck "nclass",
173c2c66affSColin Finck "alt",
174c2c66affSColin Finck "quant",
175c2c66affSColin Finck "star",
176c2c66affSColin Finck "plus",
177c2c66affSColin Finck "opt",
178c2c66affSColin Finck "lparen",
179c2c66affSColin Finck "rparen",
180c2c66affSColin Finck "jump",
181c2c66affSColin Finck "dotstar",
182c2c66affSColin Finck "lparennon",
183c2c66affSColin Finck "assert",
184c2c66affSColin Finck "assert_not",
185c2c66affSColin Finck "asserttest",
186c2c66affSColin Finck "assertnottest",
187c2c66affSColin Finck "minimalstar",
188c2c66affSColin Finck "minimalplus",
189c2c66affSColin Finck "minimalopt",
190c2c66affSColin Finck "minimalquant",
191c2c66affSColin Finck "endchild",
192c2c66affSColin Finck "repeat",
193c2c66affSColin Finck "minimalrepeat",
194c2c66affSColin Finck "altprereq",
195c2c66affSColin Finck "alrprereq2",
196c2c66affSColin Finck "endalt",
197c2c66affSColin Finck "concat",
198c2c66affSColin Finck "end",
199c2c66affSColin Finck NULL
200c2c66affSColin Finck };
201c2c66affSColin Finck
202c2c66affSColin Finck typedef struct REProgState {
203c2c66affSColin Finck jsbytecode *continue_pc; /* current continuation data */
204c2c66affSColin Finck jsbytecode continue_op;
205c2c66affSColin Finck ptrdiff_t index; /* progress in text */
206c2c66affSColin Finck size_t parenSoFar; /* highest indexed paren started */
207c2c66affSColin Finck union {
208c2c66affSColin Finck struct {
209c2c66affSColin Finck UINT min; /* current quantifier limits */
210c2c66affSColin Finck UINT max;
211c2c66affSColin Finck } quantifier;
212c2c66affSColin Finck struct {
213c2c66affSColin Finck size_t top; /* backtrack stack state */
214c2c66affSColin Finck size_t sz;
215c2c66affSColin Finck } assertion;
216c2c66affSColin Finck } u;
217c2c66affSColin Finck } REProgState;
218c2c66affSColin Finck
219c2c66affSColin Finck typedef struct REBackTrackData {
220c2c66affSColin Finck size_t sz; /* size of previous stack entry */
221c2c66affSColin Finck jsbytecode *backtrack_pc; /* where to backtrack to */
222c2c66affSColin Finck jsbytecode backtrack_op;
223c2c66affSColin Finck const WCHAR *cp; /* index in text of match at backtrack */
224c2c66affSColin Finck size_t parenIndex; /* start index of saved paren contents */
225c2c66affSColin Finck size_t parenCount; /* # of saved paren contents */
226c2c66affSColin Finck size_t saveStateStackTop; /* number of parent states */
227c2c66affSColin Finck /* saved parent states follow */
228c2c66affSColin Finck /* saved paren contents follow */
229c2c66affSColin Finck } REBackTrackData;
230c2c66affSColin Finck
231c2c66affSColin Finck #define INITIAL_STATESTACK 100
232c2c66affSColin Finck #define INITIAL_BACKTRACK 8000
233c2c66affSColin Finck
234c2c66affSColin Finck typedef struct REGlobalData {
235c2c66affSColin Finck void *cx;
236c2c66affSColin Finck regexp_t *regexp; /* the RE in execution */
237c2c66affSColin Finck BOOL ok; /* runtime error (out_of_memory only?) */
238c2c66affSColin Finck size_t start; /* offset to start at */
239c2c66affSColin Finck ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
240c2c66affSColin Finck const WCHAR *cpbegin; /* text base address */
241c2c66affSColin Finck const WCHAR *cpend; /* text limit address */
242c2c66affSColin Finck
243c2c66affSColin Finck REProgState *stateStack; /* stack of state of current parents */
244c2c66affSColin Finck size_t stateStackTop;
245c2c66affSColin Finck size_t stateStackLimit;
246c2c66affSColin Finck
247c2c66affSColin Finck REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
248c2c66affSColin Finck REBackTrackData *backTrackSP;
249c2c66affSColin Finck size_t backTrackStackSize;
250c2c66affSColin Finck size_t cursz; /* size of current stack entry */
251c2c66affSColin Finck size_t backTrackCount; /* how many times we've backtracked */
252c2c66affSColin Finck size_t backTrackLimit; /* upper limit on backtrack states */
253c2c66affSColin Finck
254c2c66affSColin Finck heap_pool_t *pool; /* It's faster to use one malloc'd pool
255c2c66affSColin Finck than to malloc/free the three items
256c2c66affSColin Finck that are allocated from this pool */
257c2c66affSColin Finck } REGlobalData;
258c2c66affSColin Finck
259c2c66affSColin Finck typedef struct RENode RENode;
260c2c66affSColin Finck struct RENode {
261c2c66affSColin Finck REOp op; /* r.e. op bytecode */
262c2c66affSColin Finck RENode *next; /* next in concatenation order */
263c2c66affSColin Finck void *kid; /* first operand */
264c2c66affSColin Finck union {
265c2c66affSColin Finck void *kid2; /* second operand */
266c2c66affSColin Finck INT num; /* could be a number */
267c2c66affSColin Finck size_t parenIndex; /* or a parenthesis index */
268c2c66affSColin Finck struct { /* or a quantifier range */
269c2c66affSColin Finck UINT min;
270c2c66affSColin Finck UINT max;
271c2c66affSColin Finck JSPackedBool greedy;
272c2c66affSColin Finck } range;
273c2c66affSColin Finck struct { /* or a character class */
274c2c66affSColin Finck size_t startIndex;
275c2c66affSColin Finck size_t kidlen; /* length of string at kid, in jschars */
276c2c66affSColin Finck size_t index; /* index into class list */
277c2c66affSColin Finck WORD bmsize; /* bitmap size, based on max char code */
278c2c66affSColin Finck JSPackedBool sense;
279c2c66affSColin Finck } ucclass;
280c2c66affSColin Finck struct { /* or a literal sequence */
281c2c66affSColin Finck WCHAR chr; /* of one character */
282c2c66affSColin Finck size_t length; /* or many (via the kid) */
283c2c66affSColin Finck } flat;
284c2c66affSColin Finck struct {
285c2c66affSColin Finck RENode *kid2; /* second operand from ALT */
286c2c66affSColin Finck WCHAR ch1; /* match char for ALTPREREQ */
287c2c66affSColin Finck WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
288c2c66affSColin Finck } altprereq;
289c2c66affSColin Finck } u;
290c2c66affSColin Finck };
291c2c66affSColin Finck
292c2c66affSColin Finck #define CLASS_CACHE_SIZE 4
293c2c66affSColin Finck
294c2c66affSColin Finck typedef struct CompilerState {
295c2c66affSColin Finck void *context;
296c2c66affSColin Finck const WCHAR *cpbegin;
297c2c66affSColin Finck const WCHAR *cpend;
298c2c66affSColin Finck const WCHAR *cp;
299c2c66affSColin Finck size_t parenCount;
300c2c66affSColin Finck size_t classCount; /* number of [] encountered */
301c2c66affSColin Finck size_t treeDepth; /* maximum depth of parse tree */
302c2c66affSColin Finck size_t progLength; /* estimated bytecode length */
303c2c66affSColin Finck RENode *result;
304c2c66affSColin Finck size_t classBitmapsMem; /* memory to hold all class bitmaps */
305c2c66affSColin Finck struct {
306c2c66affSColin Finck const WCHAR *start; /* small cache of class strings */
307c2c66affSColin Finck size_t length; /* since they're often the same */
308c2c66affSColin Finck size_t index;
309c2c66affSColin Finck } classCache[CLASS_CACHE_SIZE];
310c2c66affSColin Finck WORD flags;
311c2c66affSColin Finck
312c2c66affSColin Finck heap_pool_t *pool; /* It's faster to use one malloc'd pool
313c2c66affSColin Finck than to malloc/free */
314c2c66affSColin Finck } CompilerState;
315c2c66affSColin Finck
316c2c66affSColin Finck typedef struct EmitStateStackEntry {
317c2c66affSColin Finck jsbytecode *altHead; /* start of REOP_ALT* opcode */
318c2c66affSColin Finck jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
319c2c66affSColin Finck jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
320c2c66affSColin Finck jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
321c2c66affSColin Finck RENode *continueNode; /* original REOP_ALT* node being stacked */
322c2c66affSColin Finck jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
323c2c66affSColin Finck JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
324c2c66affSColin Finck avoid 16-bit unsigned offset overflow */
325c2c66affSColin Finck } EmitStateStackEntry;
326c2c66affSColin Finck
327c2c66affSColin Finck /*
328c2c66affSColin Finck * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
329c2c66affSColin Finck * the getters and setters take the pc of the offset, not of the opcode before
330c2c66affSColin Finck * the offset.
331c2c66affSColin Finck */
332c2c66affSColin Finck #define ARG_LEN 2
333c2c66affSColin Finck #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
334c2c66affSColin Finck #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
335c2c66affSColin Finck (pc)[1] = (jsbytecode) (arg))
336c2c66affSColin Finck
337c2c66affSColin Finck #define OFFSET_LEN ARG_LEN
338c2c66affSColin Finck #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
339c2c66affSColin Finck #define GET_OFFSET(pc) GET_ARG(pc)
340c2c66affSColin Finck
341c2c66affSColin Finck static BOOL ParseRegExp(CompilerState*);
342c2c66affSColin Finck
343c2c66affSColin Finck /*
344c2c66affSColin Finck * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
345c2c66affSColin Finck * For sanity, we limit it to 2^24 bytes.
346c2c66affSColin Finck */
347c2c66affSColin Finck #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
348c2c66affSColin Finck
349c2c66affSColin Finck /*
350c2c66affSColin Finck * The maximum memory that can be allocated for class bitmaps.
351c2c66affSColin Finck * For sanity, we limit it to 2^24 bytes.
352c2c66affSColin Finck */
353c2c66affSColin Finck #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
354c2c66affSColin Finck
355c2c66affSColin Finck /*
356c2c66affSColin Finck * Functions to get size and write/read bytecode that represent small indexes
357c2c66affSColin Finck * compactly.
358c2c66affSColin Finck * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
359c2c66affSColin Finck * indicates that the following byte brings more bits to the index. Otherwise
360c2c66affSColin Finck * this is the last byte in the index bytecode representing highest index bits.
361c2c66affSColin Finck */
362c2c66affSColin Finck static size_t
GetCompactIndexWidth(size_t index)363c2c66affSColin Finck GetCompactIndexWidth(size_t index)
364c2c66affSColin Finck {
365c2c66affSColin Finck size_t width;
366c2c66affSColin Finck
367c2c66affSColin Finck for (width = 1; (index >>= 7) != 0; ++width) { }
368c2c66affSColin Finck return width;
369c2c66affSColin Finck }
370c2c66affSColin Finck
371c2c66affSColin Finck static inline jsbytecode *
WriteCompactIndex(jsbytecode * pc,size_t index)372c2c66affSColin Finck WriteCompactIndex(jsbytecode *pc, size_t index)
373c2c66affSColin Finck {
374c2c66affSColin Finck size_t next;
375c2c66affSColin Finck
376c2c66affSColin Finck while ((next = index >> 7) != 0) {
377c2c66affSColin Finck *pc++ = (jsbytecode)(index | 0x80);
378c2c66affSColin Finck index = next;
379c2c66affSColin Finck }
380c2c66affSColin Finck *pc++ = (jsbytecode)index;
381c2c66affSColin Finck return pc;
382c2c66affSColin Finck }
383c2c66affSColin Finck
384c2c66affSColin Finck static inline jsbytecode *
ReadCompactIndex(jsbytecode * pc,size_t * result)385c2c66affSColin Finck ReadCompactIndex(jsbytecode *pc, size_t *result)
386c2c66affSColin Finck {
387c2c66affSColin Finck size_t nextByte;
388c2c66affSColin Finck
389c2c66affSColin Finck nextByte = *pc++;
390c2c66affSColin Finck if ((nextByte & 0x80) == 0) {
391c2c66affSColin Finck /*
392c2c66affSColin Finck * Short-circuit the most common case when compact index <= 127.
393c2c66affSColin Finck */
394c2c66affSColin Finck *result = nextByte;
395c2c66affSColin Finck } else {
396c2c66affSColin Finck size_t shift = 7;
397c2c66affSColin Finck *result = 0x7F & nextByte;
398c2c66affSColin Finck do {
399c2c66affSColin Finck nextByte = *pc++;
400c2c66affSColin Finck *result |= (nextByte & 0x7F) << shift;
401c2c66affSColin Finck shift += 7;
402c2c66affSColin Finck } while ((nextByte & 0x80) != 0);
403c2c66affSColin Finck }
404c2c66affSColin Finck return pc;
405c2c66affSColin Finck }
406c2c66affSColin Finck
407c2c66affSColin Finck /* Construct and initialize an RENode, returning NULL for out-of-memory */
408c2c66affSColin Finck static RENode *
NewRENode(CompilerState * state,REOp op)409c2c66affSColin Finck NewRENode(CompilerState *state, REOp op)
410c2c66affSColin Finck {
411c2c66affSColin Finck RENode *ren;
412c2c66affSColin Finck
413c2c66affSColin Finck ren = heap_pool_alloc(state->pool, sizeof(*ren));
414c2c66affSColin Finck if (!ren) {
415c2c66affSColin Finck /* js_ReportOutOfScriptQuota(cx); */
416c2c66affSColin Finck return NULL;
417c2c66affSColin Finck }
418c2c66affSColin Finck ren->op = op;
419c2c66affSColin Finck ren->next = NULL;
420c2c66affSColin Finck ren->kid = NULL;
421c2c66affSColin Finck return ren;
422c2c66affSColin Finck }
423c2c66affSColin Finck
424c2c66affSColin Finck /*
425c2c66affSColin Finck * Validates and converts hex ascii value.
426c2c66affSColin Finck */
427c2c66affSColin Finck static BOOL
isASCIIHexDigit(WCHAR c,UINT * digit)428c2c66affSColin Finck isASCIIHexDigit(WCHAR c, UINT *digit)
429c2c66affSColin Finck {
430c2c66affSColin Finck UINT cv = c;
431c2c66affSColin Finck
432c2c66affSColin Finck if (cv < '0')
433c2c66affSColin Finck return FALSE;
434c2c66affSColin Finck if (cv <= '9') {
435c2c66affSColin Finck *digit = cv - '0';
436c2c66affSColin Finck return TRUE;
437c2c66affSColin Finck }
438c2c66affSColin Finck cv |= 0x20;
439c2c66affSColin Finck if (cv >= 'a' && cv <= 'f') {
440c2c66affSColin Finck *digit = cv - 'a' + 10;
441c2c66affSColin Finck return TRUE;
442c2c66affSColin Finck }
443c2c66affSColin Finck return FALSE;
444c2c66affSColin Finck }
445c2c66affSColin Finck
446c2c66affSColin Finck typedef struct {
447c2c66affSColin Finck REOp op;
448c2c66affSColin Finck const WCHAR *errPos;
449c2c66affSColin Finck size_t parenIndex;
450c2c66affSColin Finck } REOpData;
451c2c66affSColin Finck
452c2c66affSColin Finck #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
453c2c66affSColin Finck #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
454c2c66affSColin Finck
455c2c66affSColin Finck static BOOL
SetForwardJumpOffset(jsbytecode * jump,jsbytecode * target)456c2c66affSColin Finck SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
457c2c66affSColin Finck {
458c2c66affSColin Finck ptrdiff_t offset = target - jump;
459c2c66affSColin Finck
460c2c66affSColin Finck /* Check that target really points forward. */
461c2c66affSColin Finck assert(offset >= 2);
462c2c66affSColin Finck if ((size_t)offset > OFFSET_MAX)
463c2c66affSColin Finck return FALSE;
464c2c66affSColin Finck
465c2c66affSColin Finck jump[0] = JUMP_OFFSET_HI(offset);
466c2c66affSColin Finck jump[1] = JUMP_OFFSET_LO(offset);
467c2c66affSColin Finck return TRUE;
468c2c66affSColin Finck }
469c2c66affSColin Finck
470c2c66affSColin Finck /*
471c2c66affSColin Finck * Generate bytecode for the tree rooted at t using an explicit stack instead
472c2c66affSColin Finck * of recursion.
473c2c66affSColin Finck */
474c2c66affSColin Finck static jsbytecode *
EmitREBytecode(CompilerState * state,regexp_t * re,size_t treeDepth,jsbytecode * pc,RENode * t)475c2c66affSColin Finck EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
476c2c66affSColin Finck jsbytecode *pc, RENode *t)
477c2c66affSColin Finck {
478c2c66affSColin Finck EmitStateStackEntry *emitStateSP, *emitStateStack;
479c2c66affSColin Finck RECharSet *charSet;
480c2c66affSColin Finck REOp op;
481c2c66affSColin Finck
482c2c66affSColin Finck if (treeDepth == 0) {
483c2c66affSColin Finck emitStateStack = NULL;
484c2c66affSColin Finck } else {
485c2c66affSColin Finck emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
486c2c66affSColin Finck if (!emitStateStack)
487c2c66affSColin Finck return NULL;
488c2c66affSColin Finck }
489c2c66affSColin Finck emitStateSP = emitStateStack;
490c2c66affSColin Finck op = t->op;
491c2c66affSColin Finck assert(op < REOP_LIMIT);
492c2c66affSColin Finck
493c2c66affSColin Finck for (;;) {
494c2c66affSColin Finck *pc++ = op;
495c2c66affSColin Finck switch (op) {
496c2c66affSColin Finck case REOP_EMPTY:
497c2c66affSColin Finck --pc;
498c2c66affSColin Finck break;
499c2c66affSColin Finck
500c2c66affSColin Finck case REOP_ALTPREREQ2:
501c2c66affSColin Finck case REOP_ALTPREREQ:
502c2c66affSColin Finck assert(emitStateSP);
503c2c66affSColin Finck emitStateSP->altHead = pc - 1;
504c2c66affSColin Finck emitStateSP->endTermFixup = pc;
505c2c66affSColin Finck pc += OFFSET_LEN;
506c2c66affSColin Finck SET_ARG(pc, t->u.altprereq.ch1);
507c2c66affSColin Finck pc += ARG_LEN;
508c2c66affSColin Finck SET_ARG(pc, t->u.altprereq.ch2);
509c2c66affSColin Finck pc += ARG_LEN;
510c2c66affSColin Finck
511c2c66affSColin Finck emitStateSP->nextAltFixup = pc; /* offset to next alternate */
512c2c66affSColin Finck pc += OFFSET_LEN;
513c2c66affSColin Finck
514c2c66affSColin Finck emitStateSP->continueNode = t;
515c2c66affSColin Finck emitStateSP->continueOp = REOP_JUMP;
516c2c66affSColin Finck emitStateSP->jumpToJumpFlag = FALSE;
517c2c66affSColin Finck ++emitStateSP;
518c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
519c2c66affSColin Finck t = t->kid;
520c2c66affSColin Finck op = t->op;
521c2c66affSColin Finck assert(op < REOP_LIMIT);
522c2c66affSColin Finck continue;
523c2c66affSColin Finck
524c2c66affSColin Finck case REOP_JUMP:
525c2c66affSColin Finck emitStateSP->nextTermFixup = pc; /* offset to following term */
526c2c66affSColin Finck pc += OFFSET_LEN;
527c2c66affSColin Finck if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
528c2c66affSColin Finck goto jump_too_big;
529c2c66affSColin Finck emitStateSP->continueOp = REOP_ENDALT;
530c2c66affSColin Finck ++emitStateSP;
531c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
532c2c66affSColin Finck t = t->u.kid2;
533c2c66affSColin Finck op = t->op;
534c2c66affSColin Finck assert(op < REOP_LIMIT);
535c2c66affSColin Finck continue;
536c2c66affSColin Finck
537c2c66affSColin Finck case REOP_ENDALT:
538c2c66affSColin Finck /*
539c2c66affSColin Finck * If we already patched emitStateSP->nextTermFixup to jump to
540c2c66affSColin Finck * a nearer jump, to avoid 16-bit immediate offset overflow, we
541c2c66affSColin Finck * are done here.
542c2c66affSColin Finck */
543c2c66affSColin Finck if (emitStateSP->jumpToJumpFlag)
544c2c66affSColin Finck break;
545c2c66affSColin Finck
546c2c66affSColin Finck /*
547c2c66affSColin Finck * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
548c2c66affSColin Finck * REOP_ENDALT is executed only on successful match of the last
549c2c66affSColin Finck * alternate in a group.
550c2c66affSColin Finck */
551c2c66affSColin Finck if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
552c2c66affSColin Finck goto jump_too_big;
553c2c66affSColin Finck if (t->op != REOP_ALT) {
554c2c66affSColin Finck if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
555c2c66affSColin Finck goto jump_too_big;
556c2c66affSColin Finck }
557c2c66affSColin Finck
558c2c66affSColin Finck /*
559c2c66affSColin Finck * If the program is bigger than the REOP_JUMP offset range, then
560c2c66affSColin Finck * we must check for alternates before this one that are part of
561c2c66affSColin Finck * the same group, and fix up their jump offsets to target jumps
562c2c66affSColin Finck * close enough to fit in a 16-bit unsigned offset immediate.
563c2c66affSColin Finck */
564c2c66affSColin Finck if ((size_t)(pc - re->program) > OFFSET_MAX &&
565c2c66affSColin Finck emitStateSP > emitStateStack) {
566c2c66affSColin Finck EmitStateStackEntry *esp, *esp2;
567c2c66affSColin Finck jsbytecode *alt, *jump;
568c2c66affSColin Finck ptrdiff_t span, header;
569c2c66affSColin Finck
570c2c66affSColin Finck esp2 = emitStateSP;
571c2c66affSColin Finck alt = esp2->altHead;
572c2c66affSColin Finck for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
573c2c66affSColin Finck if (esp->continueOp == REOP_ENDALT &&
574c2c66affSColin Finck !esp->jumpToJumpFlag &&
575c2c66affSColin Finck esp->nextTermFixup + OFFSET_LEN == alt &&
576c2c66affSColin Finck (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
577c2c66affSColin Finck ? esp->endTermFixup
578c2c66affSColin Finck : esp->nextTermFixup)) > OFFSET_MAX) {
579c2c66affSColin Finck alt = esp->altHead;
580c2c66affSColin Finck jump = esp->nextTermFixup;
581c2c66affSColin Finck
582c2c66affSColin Finck /*
583c2c66affSColin Finck * The span must be 1 less than the distance from
584c2c66affSColin Finck * jump offset to jump offset, so we actually jump
585c2c66affSColin Finck * to a REOP_JUMP bytecode, not to its offset!
586c2c66affSColin Finck */
587c2c66affSColin Finck for (;;) {
588c2c66affSColin Finck assert(jump < esp2->nextTermFixup);
589c2c66affSColin Finck span = esp2->nextTermFixup - jump - 1;
590c2c66affSColin Finck if ((size_t)span <= OFFSET_MAX)
591c2c66affSColin Finck break;
592c2c66affSColin Finck do {
593c2c66affSColin Finck if (--esp2 == esp)
594c2c66affSColin Finck goto jump_too_big;
595c2c66affSColin Finck } while (esp2->continueOp != REOP_ENDALT);
596c2c66affSColin Finck }
597c2c66affSColin Finck
598c2c66affSColin Finck jump[0] = JUMP_OFFSET_HI(span);
599c2c66affSColin Finck jump[1] = JUMP_OFFSET_LO(span);
600c2c66affSColin Finck
601c2c66affSColin Finck if (esp->continueNode->op != REOP_ALT) {
602c2c66affSColin Finck /*
603c2c66affSColin Finck * We must patch the offset at esp->endTermFixup
604c2c66affSColin Finck * as well, for the REOP_ALTPREREQ{,2} opcodes.
605c2c66affSColin Finck * If we're unlucky and endTermFixup is more than
606c2c66affSColin Finck * OFFSET_MAX bytes from its target, we cheat by
607c2c66affSColin Finck * jumping 6 bytes to the jump whose offset is at
608c2c66affSColin Finck * esp->nextTermFixup, which has the same target.
609c2c66affSColin Finck */
610c2c66affSColin Finck jump = esp->endTermFixup;
611c2c66affSColin Finck header = esp->nextTermFixup - jump;
612c2c66affSColin Finck span += header;
613c2c66affSColin Finck if ((size_t)span > OFFSET_MAX)
614c2c66affSColin Finck span = header;
615c2c66affSColin Finck
616c2c66affSColin Finck jump[0] = JUMP_OFFSET_HI(span);
617c2c66affSColin Finck jump[1] = JUMP_OFFSET_LO(span);
618c2c66affSColin Finck }
619c2c66affSColin Finck
620c2c66affSColin Finck esp->jumpToJumpFlag = TRUE;
621c2c66affSColin Finck }
622c2c66affSColin Finck }
623c2c66affSColin Finck }
624c2c66affSColin Finck break;
625c2c66affSColin Finck
626c2c66affSColin Finck case REOP_ALT:
627c2c66affSColin Finck assert(emitStateSP);
628c2c66affSColin Finck emitStateSP->altHead = pc - 1;
629c2c66affSColin Finck emitStateSP->nextAltFixup = pc; /* offset to next alternate */
630c2c66affSColin Finck pc += OFFSET_LEN;
631c2c66affSColin Finck emitStateSP->continueNode = t;
632c2c66affSColin Finck emitStateSP->continueOp = REOP_JUMP;
633c2c66affSColin Finck emitStateSP->jumpToJumpFlag = FALSE;
634c2c66affSColin Finck ++emitStateSP;
635c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
636c2c66affSColin Finck t = t->kid;
637c2c66affSColin Finck op = t->op;
638c2c66affSColin Finck assert(op < REOP_LIMIT);
639c2c66affSColin Finck continue;
640c2c66affSColin Finck
641c2c66affSColin Finck case REOP_FLAT:
642c2c66affSColin Finck /*
643c2c66affSColin Finck * Coalesce FLATs if possible and if it would not increase bytecode
644c2c66affSColin Finck * beyond preallocated limit. The latter happens only when bytecode
645c2c66affSColin Finck * size for coalesced string with offset p and length 2 exceeds 6
646c2c66affSColin Finck * bytes preallocated for 2 single char nodes, i.e. when
647c2c66affSColin Finck * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
648c2c66affSColin Finck * GetCompactIndexWidth(p) > 4.
649c2c66affSColin Finck * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
650c2c66affSColin Finck * nodes strictly decreases bytecode size, the check has to be
651c2c66affSColin Finck * done only for the first coalescing.
652c2c66affSColin Finck */
653c2c66affSColin Finck if (t->kid &&
654c2c66affSColin Finck GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
655c2c66affSColin Finck {
656c2c66affSColin Finck while (t->next &&
657c2c66affSColin Finck t->next->op == REOP_FLAT &&
658c2c66affSColin Finck (WCHAR*)t->kid + t->u.flat.length ==
659c2c66affSColin Finck t->next->kid) {
660c2c66affSColin Finck t->u.flat.length += t->next->u.flat.length;
661c2c66affSColin Finck t->next = t->next->next;
662c2c66affSColin Finck }
663c2c66affSColin Finck }
664c2c66affSColin Finck if (t->kid && t->u.flat.length > 1) {
665c2c66affSColin Finck pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
666c2c66affSColin Finck pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
667c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.flat.length);
668c2c66affSColin Finck } else if (t->u.flat.chr < 256) {
669c2c66affSColin Finck pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
670c2c66affSColin Finck *pc++ = (jsbytecode) t->u.flat.chr;
671c2c66affSColin Finck } else {
672c2c66affSColin Finck pc[-1] = (state->flags & REG_FOLD)
673c2c66affSColin Finck ? REOP_UCFLAT1i
674c2c66affSColin Finck : REOP_UCFLAT1;
675c2c66affSColin Finck SET_ARG(pc, t->u.flat.chr);
676c2c66affSColin Finck pc += ARG_LEN;
677c2c66affSColin Finck }
678c2c66affSColin Finck break;
679c2c66affSColin Finck
680c2c66affSColin Finck case REOP_LPAREN:
681c2c66affSColin Finck assert(emitStateSP);
682c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.parenIndex);
683c2c66affSColin Finck emitStateSP->continueNode = t;
684c2c66affSColin Finck emitStateSP->continueOp = REOP_RPAREN;
685c2c66affSColin Finck ++emitStateSP;
686c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
687c2c66affSColin Finck t = t->kid;
688c2c66affSColin Finck op = t->op;
689c2c66affSColin Finck continue;
690c2c66affSColin Finck
691c2c66affSColin Finck case REOP_RPAREN:
692c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.parenIndex);
693c2c66affSColin Finck break;
694c2c66affSColin Finck
695c2c66affSColin Finck case REOP_BACKREF:
696c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.parenIndex);
697c2c66affSColin Finck break;
698c2c66affSColin Finck
699c2c66affSColin Finck case REOP_ASSERT:
700c2c66affSColin Finck assert(emitStateSP);
701c2c66affSColin Finck emitStateSP->nextTermFixup = pc;
702c2c66affSColin Finck pc += OFFSET_LEN;
703c2c66affSColin Finck emitStateSP->continueNode = t;
704c2c66affSColin Finck emitStateSP->continueOp = REOP_ASSERTTEST;
705c2c66affSColin Finck ++emitStateSP;
706c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
707c2c66affSColin Finck t = t->kid;
708c2c66affSColin Finck op = t->op;
709c2c66affSColin Finck continue;
710c2c66affSColin Finck
711c2c66affSColin Finck case REOP_ASSERTTEST:
712c2c66affSColin Finck case REOP_ASSERTNOTTEST:
713c2c66affSColin Finck if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
714c2c66affSColin Finck goto jump_too_big;
715c2c66affSColin Finck break;
716c2c66affSColin Finck
717c2c66affSColin Finck case REOP_ASSERT_NOT:
718c2c66affSColin Finck assert(emitStateSP);
719c2c66affSColin Finck emitStateSP->nextTermFixup = pc;
720c2c66affSColin Finck pc += OFFSET_LEN;
721c2c66affSColin Finck emitStateSP->continueNode = t;
722c2c66affSColin Finck emitStateSP->continueOp = REOP_ASSERTNOTTEST;
723c2c66affSColin Finck ++emitStateSP;
724c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
725c2c66affSColin Finck t = t->kid;
726c2c66affSColin Finck op = t->op;
727c2c66affSColin Finck continue;
728c2c66affSColin Finck
729c2c66affSColin Finck case REOP_QUANT:
730c2c66affSColin Finck assert(emitStateSP);
731c2c66affSColin Finck if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
732c2c66affSColin Finck pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
733c2c66affSColin Finck } else if (t->u.range.min == 0 && t->u.range.max == 1) {
734c2c66affSColin Finck pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
735c2c66affSColin Finck } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
736c2c66affSColin Finck pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
737c2c66affSColin Finck } else {
738c2c66affSColin Finck if (!t->u.range.greedy)
739c2c66affSColin Finck pc[-1] = REOP_MINIMALQUANT;
740c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.range.min);
741c2c66affSColin Finck /*
742c2c66affSColin Finck * Write max + 1 to avoid using size_t(max) + 1 bytes
743c2c66affSColin Finck * for (UINT)-1 sentinel.
744c2c66affSColin Finck */
745c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.range.max + 1);
746c2c66affSColin Finck }
747c2c66affSColin Finck emitStateSP->nextTermFixup = pc;
748c2c66affSColin Finck pc += OFFSET_LEN;
749c2c66affSColin Finck emitStateSP->continueNode = t;
750c2c66affSColin Finck emitStateSP->continueOp = REOP_ENDCHILD;
751c2c66affSColin Finck ++emitStateSP;
752c2c66affSColin Finck assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
753c2c66affSColin Finck t = t->kid;
754c2c66affSColin Finck op = t->op;
755c2c66affSColin Finck continue;
756c2c66affSColin Finck
757c2c66affSColin Finck case REOP_ENDCHILD:
758c2c66affSColin Finck if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
759c2c66affSColin Finck goto jump_too_big;
760c2c66affSColin Finck break;
761c2c66affSColin Finck
762c2c66affSColin Finck case REOP_CLASS:
763c2c66affSColin Finck if (!t->u.ucclass.sense)
764c2c66affSColin Finck pc[-1] = REOP_NCLASS;
765c2c66affSColin Finck pc = WriteCompactIndex(pc, t->u.ucclass.index);
766c2c66affSColin Finck charSet = &re->classList[t->u.ucclass.index];
767c2c66affSColin Finck charSet->converted = FALSE;
768c2c66affSColin Finck charSet->length = t->u.ucclass.bmsize;
769c2c66affSColin Finck charSet->u.src.startIndex = t->u.ucclass.startIndex;
770c2c66affSColin Finck charSet->u.src.length = t->u.ucclass.kidlen;
771c2c66affSColin Finck charSet->sense = t->u.ucclass.sense;
772c2c66affSColin Finck break;
773c2c66affSColin Finck
774c2c66affSColin Finck default:
775c2c66affSColin Finck break;
776c2c66affSColin Finck }
777c2c66affSColin Finck
778c2c66affSColin Finck t = t->next;
779c2c66affSColin Finck if (t) {
780c2c66affSColin Finck op = t->op;
781c2c66affSColin Finck } else {
782c2c66affSColin Finck if (emitStateSP == emitStateStack)
783c2c66affSColin Finck break;
784c2c66affSColin Finck --emitStateSP;
785c2c66affSColin Finck t = emitStateSP->continueNode;
786c2c66affSColin Finck op = (REOp) emitStateSP->continueOp;
787c2c66affSColin Finck }
788c2c66affSColin Finck }
789c2c66affSColin Finck
790c2c66affSColin Finck cleanup:
791c2c66affSColin Finck heap_free(emitStateStack);
792c2c66affSColin Finck return pc;
793c2c66affSColin Finck
794c2c66affSColin Finck jump_too_big:
795c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
796c2c66affSColin Finck pc = NULL;
797c2c66affSColin Finck goto cleanup;
798c2c66affSColin Finck }
799c2c66affSColin Finck
800c2c66affSColin Finck /*
801c2c66affSColin Finck * Process the op against the two top operands, reducing them to a single
802c2c66affSColin Finck * operand in the penultimate slot. Update progLength and treeDepth.
803c2c66affSColin Finck */
804c2c66affSColin Finck static BOOL
ProcessOp(CompilerState * state,REOpData * opData,RENode ** operandStack,INT operandSP)805c2c66affSColin Finck ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
806c2c66affSColin Finck INT operandSP)
807c2c66affSColin Finck {
808c2c66affSColin Finck RENode *result;
809c2c66affSColin Finck
810c2c66affSColin Finck switch (opData->op) {
811c2c66affSColin Finck case REOP_ALT:
812c2c66affSColin Finck result = NewRENode(state, REOP_ALT);
813c2c66affSColin Finck if (!result)
814c2c66affSColin Finck return FALSE;
815c2c66affSColin Finck result->kid = operandStack[operandSP - 2];
816c2c66affSColin Finck result->u.kid2 = operandStack[operandSP - 1];
817c2c66affSColin Finck operandStack[operandSP - 2] = result;
818c2c66affSColin Finck
819c2c66affSColin Finck if (state->treeDepth == TREE_DEPTH_MAX) {
820c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
821c2c66affSColin Finck return FALSE;
822c2c66affSColin Finck }
823c2c66affSColin Finck ++state->treeDepth;
824c2c66affSColin Finck
825c2c66affSColin Finck /*
826c2c66affSColin Finck * Look at both alternates to see if there's a FLAT or a CLASS at
827c2c66affSColin Finck * the start of each. If so, use a prerequisite match.
828c2c66affSColin Finck */
829c2c66affSColin Finck if (((RENode *) result->kid)->op == REOP_FLAT &&
830c2c66affSColin Finck ((RENode *) result->u.kid2)->op == REOP_FLAT &&
831c2c66affSColin Finck (state->flags & REG_FOLD) == 0) {
832c2c66affSColin Finck result->op = REOP_ALTPREREQ;
833c2c66affSColin Finck result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
834c2c66affSColin Finck result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
835c2c66affSColin Finck /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
836c2c66affSColin Finck JUMP, <end> ... ENDALT */
837c2c66affSColin Finck state->progLength += 13;
838c2c66affSColin Finck }
839c2c66affSColin Finck else
840c2c66affSColin Finck if (((RENode *) result->kid)->op == REOP_CLASS &&
841c2c66affSColin Finck ((RENode *) result->kid)->u.ucclass.index < 256 &&
842c2c66affSColin Finck ((RENode *) result->u.kid2)->op == REOP_FLAT &&
843c2c66affSColin Finck (state->flags & REG_FOLD) == 0) {
844c2c66affSColin Finck result->op = REOP_ALTPREREQ2;
845c2c66affSColin Finck result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
846c2c66affSColin Finck result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
847c2c66affSColin Finck /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
848c2c66affSColin Finck JUMP, <end> ... ENDALT */
849c2c66affSColin Finck state->progLength += 13;
850c2c66affSColin Finck }
851c2c66affSColin Finck else
852c2c66affSColin Finck if (((RENode *) result->kid)->op == REOP_FLAT &&
853c2c66affSColin Finck ((RENode *) result->u.kid2)->op == REOP_CLASS &&
854c2c66affSColin Finck ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
855c2c66affSColin Finck (state->flags & REG_FOLD) == 0) {
856c2c66affSColin Finck result->op = REOP_ALTPREREQ2;
857c2c66affSColin Finck result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
858c2c66affSColin Finck result->u.altprereq.ch2 =
859c2c66affSColin Finck ((RENode *) result->u.kid2)->u.ucclass.index;
860c2c66affSColin Finck /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
861c2c66affSColin Finck JUMP, <end> ... ENDALT */
862c2c66affSColin Finck state->progLength += 13;
863c2c66affSColin Finck }
864c2c66affSColin Finck else {
865c2c66affSColin Finck /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
866c2c66affSColin Finck state->progLength += 7;
867c2c66affSColin Finck }
868c2c66affSColin Finck break;
869c2c66affSColin Finck
870c2c66affSColin Finck case REOP_CONCAT:
871c2c66affSColin Finck result = operandStack[operandSP - 2];
872c2c66affSColin Finck while (result->next)
873c2c66affSColin Finck result = result->next;
874c2c66affSColin Finck result->next = operandStack[operandSP - 1];
875c2c66affSColin Finck break;
876c2c66affSColin Finck
877c2c66affSColin Finck case REOP_ASSERT:
878c2c66affSColin Finck case REOP_ASSERT_NOT:
879c2c66affSColin Finck case REOP_LPARENNON:
880c2c66affSColin Finck case REOP_LPAREN:
881c2c66affSColin Finck /* These should have been processed by a close paren. */
882c2c66affSColin Finck ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
883c2c66affSColin Finck opData->errPos);
884c2c66affSColin Finck return FALSE;
885c2c66affSColin Finck
886c2c66affSColin Finck default:;
887c2c66affSColin Finck }
888c2c66affSColin Finck return TRUE;
889c2c66affSColin Finck }
890c2c66affSColin Finck
891c2c66affSColin Finck /*
892c2c66affSColin Finck * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
893c2c66affSColin Finck * it being on the stack, and to propagate errors to its callers.
894c2c66affSColin Finck */
895c2c66affSColin Finck #define JSREG_FIND_PAREN_COUNT 0x8000
896c2c66affSColin Finck #define JSREG_FIND_PAREN_ERROR 0x4000
897c2c66affSColin Finck
898c2c66affSColin Finck /*
899c2c66affSColin Finck * Magic return value from FindParenCount and GetDecimalValue, to indicate
900c2c66affSColin Finck * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
901c2c66affSColin Finck * its findMax parameter is non-null.
902c2c66affSColin Finck */
903c2c66affSColin Finck #define OVERFLOW_VALUE ((UINT)-1)
904c2c66affSColin Finck
905c2c66affSColin Finck static UINT
FindParenCount(CompilerState * state)906c2c66affSColin Finck FindParenCount(CompilerState *state)
907c2c66affSColin Finck {
908c2c66affSColin Finck CompilerState temp;
909c2c66affSColin Finck int i;
910c2c66affSColin Finck
911c2c66affSColin Finck if (state->flags & JSREG_FIND_PAREN_COUNT)
912c2c66affSColin Finck return OVERFLOW_VALUE;
913c2c66affSColin Finck
914c2c66affSColin Finck /*
915c2c66affSColin Finck * Copy state into temp, flag it so we never report an invalid backref,
916c2c66affSColin Finck * and reset its members to parse the entire regexp. This is obviously
917c2c66affSColin Finck * suboptimal, but GetDecimalValue calls us only if a backref appears to
918c2c66affSColin Finck * refer to a forward parenthetical, which is rare.
919c2c66affSColin Finck */
920c2c66affSColin Finck temp = *state;
921c2c66affSColin Finck temp.flags |= JSREG_FIND_PAREN_COUNT;
922c2c66affSColin Finck temp.cp = temp.cpbegin;
923c2c66affSColin Finck temp.parenCount = 0;
924c2c66affSColin Finck temp.classCount = 0;
925c2c66affSColin Finck temp.progLength = 0;
926c2c66affSColin Finck temp.treeDepth = 0;
927c2c66affSColin Finck temp.classBitmapsMem = 0;
928c2c66affSColin Finck for (i = 0; i < CLASS_CACHE_SIZE; i++)
929c2c66affSColin Finck temp.classCache[i].start = NULL;
930c2c66affSColin Finck
931c2c66affSColin Finck if (!ParseRegExp(&temp)) {
932c2c66affSColin Finck state->flags |= JSREG_FIND_PAREN_ERROR;
933c2c66affSColin Finck return OVERFLOW_VALUE;
934c2c66affSColin Finck }
935c2c66affSColin Finck return temp.parenCount;
936c2c66affSColin Finck }
937c2c66affSColin Finck
938c2c66affSColin Finck /*
939c2c66affSColin Finck * Extract and return a decimal value at state->cp. The initial character c
940c2c66affSColin Finck * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
941c2c66affSColin Finck * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
942c2c66affSColin Finck * state->flags to discover whether an error occurred under findMax.
943c2c66affSColin Finck */
944c2c66affSColin Finck static UINT
GetDecimalValue(WCHAR c,UINT max,UINT (* findMax)(CompilerState * state),CompilerState * state)945c2c66affSColin Finck GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
946c2c66affSColin Finck CompilerState *state)
947c2c66affSColin Finck {
948c2c66affSColin Finck UINT value = JS7_UNDEC(c);
949c2c66affSColin Finck BOOL overflow = (value > max && (!findMax || value > findMax(state)));
950c2c66affSColin Finck
951c2c66affSColin Finck /* The following restriction allows simpler overflow checks. */
952c2c66affSColin Finck assert(max <= ((UINT)-1 - 9) / 10);
953c2c66affSColin Finck while (state->cp < state->cpend) {
954c2c66affSColin Finck c = *state->cp;
955c2c66affSColin Finck if (!JS7_ISDEC(c))
956c2c66affSColin Finck break;
957c2c66affSColin Finck value = 10 * value + JS7_UNDEC(c);
958c2c66affSColin Finck if (!overflow && value > max && (!findMax || value > findMax(state)))
959c2c66affSColin Finck overflow = TRUE;
960c2c66affSColin Finck ++state->cp;
961c2c66affSColin Finck }
962c2c66affSColin Finck return overflow ? OVERFLOW_VALUE : value;
963c2c66affSColin Finck }
964c2c66affSColin Finck
965c2c66affSColin Finck /*
966c2c66affSColin Finck * Calculate the total size of the bitmap required for a class expression.
967c2c66affSColin Finck */
968c2c66affSColin Finck static BOOL
CalculateBitmapSize(CompilerState * state,RENode * target,const WCHAR * src,const WCHAR * end)969c2c66affSColin Finck CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
970c2c66affSColin Finck const WCHAR *end)
971c2c66affSColin Finck {
972c2c66affSColin Finck UINT max = 0;
973c2c66affSColin Finck BOOL inRange = FALSE;
974c2c66affSColin Finck WCHAR c, rangeStart = 0;
975c2c66affSColin Finck UINT n, digit, nDigits, i;
976c2c66affSColin Finck
977c2c66affSColin Finck target->u.ucclass.bmsize = 0;
978c2c66affSColin Finck target->u.ucclass.sense = TRUE;
979c2c66affSColin Finck
980c2c66affSColin Finck if (src == end)
981c2c66affSColin Finck return TRUE;
982c2c66affSColin Finck
983c2c66affSColin Finck if (*src == '^') {
984c2c66affSColin Finck ++src;
985c2c66affSColin Finck target->u.ucclass.sense = FALSE;
986c2c66affSColin Finck }
987c2c66affSColin Finck
988c2c66affSColin Finck while (src != end) {
989c2c66affSColin Finck BOOL canStartRange = TRUE;
990c2c66affSColin Finck UINT localMax = 0;
991c2c66affSColin Finck
992c2c66affSColin Finck switch (*src) {
993c2c66affSColin Finck case '\\':
994c2c66affSColin Finck ++src;
995c2c66affSColin Finck c = *src++;
996c2c66affSColin Finck switch (c) {
997c2c66affSColin Finck case 'b':
998c2c66affSColin Finck localMax = 0x8;
999c2c66affSColin Finck break;
1000c2c66affSColin Finck case 'f':
1001c2c66affSColin Finck localMax = 0xC;
1002c2c66affSColin Finck break;
1003c2c66affSColin Finck case 'n':
1004c2c66affSColin Finck localMax = 0xA;
1005c2c66affSColin Finck break;
1006c2c66affSColin Finck case 'r':
1007c2c66affSColin Finck localMax = 0xD;
1008c2c66affSColin Finck break;
1009c2c66affSColin Finck case 't':
1010c2c66affSColin Finck localMax = 0x9;
1011c2c66affSColin Finck break;
1012c2c66affSColin Finck case 'v':
1013c2c66affSColin Finck localMax = 0xB;
1014c2c66affSColin Finck break;
1015c2c66affSColin Finck case 'c':
1016c2c66affSColin Finck if (src < end && RE_IS_LETTER(*src)) {
1017c2c66affSColin Finck localMax = (UINT) (*src++) & 0x1F;
1018c2c66affSColin Finck } else {
1019c2c66affSColin Finck --src;
1020c2c66affSColin Finck localMax = '\\';
1021c2c66affSColin Finck }
1022c2c66affSColin Finck break;
1023c2c66affSColin Finck case 'x':
1024c2c66affSColin Finck nDigits = 2;
1025c2c66affSColin Finck goto lexHex;
1026c2c66affSColin Finck case 'u':
1027c2c66affSColin Finck nDigits = 4;
1028c2c66affSColin Finck lexHex:
1029c2c66affSColin Finck n = 0;
1030c2c66affSColin Finck for (i = 0; (i < nDigits) && (src < end); i++) {
1031c2c66affSColin Finck c = *src++;
1032c2c66affSColin Finck if (!isASCIIHexDigit(c, &digit)) {
1033c2c66affSColin Finck /*
1034c2c66affSColin Finck * Back off to accepting the original
1035c2c66affSColin Finck *'\' as a literal.
1036c2c66affSColin Finck */
1037c2c66affSColin Finck src -= i + 1;
1038c2c66affSColin Finck n = '\\';
1039c2c66affSColin Finck break;
1040c2c66affSColin Finck }
1041c2c66affSColin Finck n = (n << 4) | digit;
1042c2c66affSColin Finck }
1043c2c66affSColin Finck localMax = n;
1044c2c66affSColin Finck break;
1045c2c66affSColin Finck case 'd':
1046c2c66affSColin Finck canStartRange = FALSE;
1047c2c66affSColin Finck if (inRange) {
1048c2c66affSColin Finck JS_ReportErrorNumber(state->context,
1049c2c66affSColin Finck js_GetErrorMessage, NULL,
1050c2c66affSColin Finck JSMSG_BAD_CLASS_RANGE);
1051c2c66affSColin Finck return FALSE;
1052c2c66affSColin Finck }
1053c2c66affSColin Finck localMax = '9';
1054c2c66affSColin Finck break;
1055c2c66affSColin Finck case 'D':
1056c2c66affSColin Finck case 's':
1057c2c66affSColin Finck case 'S':
1058c2c66affSColin Finck case 'w':
1059c2c66affSColin Finck case 'W':
1060c2c66affSColin Finck canStartRange = FALSE;
1061c2c66affSColin Finck if (inRange) {
1062c2c66affSColin Finck JS_ReportErrorNumber(state->context,
1063c2c66affSColin Finck js_GetErrorMessage, NULL,
1064c2c66affSColin Finck JSMSG_BAD_CLASS_RANGE);
1065c2c66affSColin Finck return FALSE;
1066c2c66affSColin Finck }
1067c2c66affSColin Finck max = 65535;
1068c2c66affSColin Finck
1069c2c66affSColin Finck /*
1070c2c66affSColin Finck * If this is the start of a range, ensure that it's less than
1071c2c66affSColin Finck * the end.
1072c2c66affSColin Finck */
1073c2c66affSColin Finck localMax = 0;
1074c2c66affSColin Finck break;
1075c2c66affSColin Finck case '0':
1076c2c66affSColin Finck case '1':
1077c2c66affSColin Finck case '2':
1078c2c66affSColin Finck case '3':
1079c2c66affSColin Finck case '4':
1080c2c66affSColin Finck case '5':
1081c2c66affSColin Finck case '6':
1082c2c66affSColin Finck case '7':
1083c2c66affSColin Finck /*
1084c2c66affSColin Finck * This is a non-ECMA extension - decimal escapes (in this
1085c2c66affSColin Finck * case, octal!) are supposed to be an error inside class
1086c2c66affSColin Finck * ranges, but supported here for backwards compatibility.
1087c2c66affSColin Finck *
1088c2c66affSColin Finck */
1089c2c66affSColin Finck n = JS7_UNDEC(c);
1090c2c66affSColin Finck c = *src;
1091c2c66affSColin Finck if ('0' <= c && c <= '7') {
1092c2c66affSColin Finck src++;
1093c2c66affSColin Finck n = 8 * n + JS7_UNDEC(c);
1094c2c66affSColin Finck c = *src;
1095c2c66affSColin Finck if ('0' <= c && c <= '7') {
1096c2c66affSColin Finck src++;
1097c2c66affSColin Finck i = 8 * n + JS7_UNDEC(c);
1098c2c66affSColin Finck if (i <= 0377)
1099c2c66affSColin Finck n = i;
1100c2c66affSColin Finck else
1101c2c66affSColin Finck src--;
1102c2c66affSColin Finck }
1103c2c66affSColin Finck }
1104c2c66affSColin Finck localMax = n;
1105c2c66affSColin Finck break;
1106c2c66affSColin Finck
1107c2c66affSColin Finck default:
1108c2c66affSColin Finck localMax = c;
1109c2c66affSColin Finck break;
1110c2c66affSColin Finck }
1111c2c66affSColin Finck break;
1112c2c66affSColin Finck default:
1113c2c66affSColin Finck localMax = *src++;
1114c2c66affSColin Finck break;
1115c2c66affSColin Finck }
1116c2c66affSColin Finck
1117c2c66affSColin Finck if (inRange) {
1118c2c66affSColin Finck /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1119c2c66affSColin Finck if (rangeStart > localMax) {
1120c2c66affSColin Finck JS_ReportErrorNumber(state->context,
1121c2c66affSColin Finck js_GetErrorMessage, NULL,
1122c2c66affSColin Finck JSMSG_BAD_CLASS_RANGE);
1123c2c66affSColin Finck return FALSE;
1124c2c66affSColin Finck }
1125c2c66affSColin Finck inRange = FALSE;
1126c2c66affSColin Finck } else {
1127c2c66affSColin Finck if (canStartRange && src < end - 1) {
1128c2c66affSColin Finck if (*src == '-') {
1129c2c66affSColin Finck ++src;
1130c2c66affSColin Finck inRange = TRUE;
1131c2c66affSColin Finck rangeStart = (WCHAR)localMax;
1132c2c66affSColin Finck continue;
1133c2c66affSColin Finck }
1134c2c66affSColin Finck }
1135c2c66affSColin Finck if (state->flags & REG_FOLD)
1136c2c66affSColin Finck rangeStart = localMax; /* one run of the uc/dc loop below */
1137c2c66affSColin Finck }
1138c2c66affSColin Finck
1139c2c66affSColin Finck if (state->flags & REG_FOLD) {
1140c2c66affSColin Finck WCHAR maxch = localMax;
1141c2c66affSColin Finck
1142c2c66affSColin Finck for (i = rangeStart; i <= localMax; i++) {
1143c2c66affSColin Finck WCHAR uch, dch;
1144c2c66affSColin Finck
1145*3e2d6582SAmine Khaldi uch = towupper(i);
1146*3e2d6582SAmine Khaldi dch = towlower(i);
1147c2c66affSColin Finck if(maxch < uch)
1148c2c66affSColin Finck maxch = uch;
1149c2c66affSColin Finck if(maxch < dch)
1150c2c66affSColin Finck maxch = dch;
1151c2c66affSColin Finck }
1152c2c66affSColin Finck localMax = maxch;
1153c2c66affSColin Finck }
1154c2c66affSColin Finck
1155c2c66affSColin Finck if (localMax > max)
1156c2c66affSColin Finck max = localMax;
1157c2c66affSColin Finck }
1158c2c66affSColin Finck target->u.ucclass.bmsize = max;
1159c2c66affSColin Finck return TRUE;
1160c2c66affSColin Finck }
1161c2c66affSColin Finck
1162c2c66affSColin Finck static INT
ParseMinMaxQuantifier(CompilerState * state,BOOL ignoreValues)1163c2c66affSColin Finck ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1164c2c66affSColin Finck {
1165c2c66affSColin Finck UINT min, max;
1166c2c66affSColin Finck WCHAR c;
1167c2c66affSColin Finck const WCHAR *errp = state->cp++;
1168c2c66affSColin Finck
1169c2c66affSColin Finck c = *state->cp;
1170c2c66affSColin Finck if (JS7_ISDEC(c)) {
1171c2c66affSColin Finck ++state->cp;
1172c2c66affSColin Finck min = GetDecimalValue(c, 0xFFFF, NULL, state);
1173c2c66affSColin Finck c = *state->cp;
1174c2c66affSColin Finck
1175c2c66affSColin Finck if (!ignoreValues && min == OVERFLOW_VALUE)
1176c2c66affSColin Finck return JSMSG_MIN_TOO_BIG;
1177c2c66affSColin Finck
1178c2c66affSColin Finck if (c == ',') {
1179c2c66affSColin Finck c = *++state->cp;
1180c2c66affSColin Finck if (JS7_ISDEC(c)) {
1181c2c66affSColin Finck ++state->cp;
1182c2c66affSColin Finck max = GetDecimalValue(c, 0xFFFF, NULL, state);
1183c2c66affSColin Finck c = *state->cp;
1184c2c66affSColin Finck if (!ignoreValues && max == OVERFLOW_VALUE)
1185c2c66affSColin Finck return JSMSG_MAX_TOO_BIG;
1186c2c66affSColin Finck if (!ignoreValues && min > max)
1187c2c66affSColin Finck return JSMSG_OUT_OF_ORDER;
1188c2c66affSColin Finck } else {
1189c2c66affSColin Finck max = (UINT)-1;
1190c2c66affSColin Finck }
1191c2c66affSColin Finck } else {
1192c2c66affSColin Finck max = min;
1193c2c66affSColin Finck }
1194c2c66affSColin Finck if (c == '}') {
1195c2c66affSColin Finck state->result = NewRENode(state, REOP_QUANT);
1196c2c66affSColin Finck if (!state->result)
1197c2c66affSColin Finck return JSMSG_OUT_OF_MEMORY;
1198c2c66affSColin Finck state->result->u.range.min = min;
1199c2c66affSColin Finck state->result->u.range.max = max;
1200c2c66affSColin Finck /*
1201c2c66affSColin Finck * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1202c2c66affSColin Finck * where <max> is written as compact(max+1) to make
1203c2c66affSColin Finck * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1204c2c66affSColin Finck */
1205c2c66affSColin Finck state->progLength += (1 + GetCompactIndexWidth(min)
1206c2c66affSColin Finck + GetCompactIndexWidth(max + 1)
1207c2c66affSColin Finck +3);
1208c2c66affSColin Finck return 0;
1209c2c66affSColin Finck }
1210c2c66affSColin Finck }
1211c2c66affSColin Finck
1212c2c66affSColin Finck state->cp = errp;
1213c2c66affSColin Finck return -1;
1214c2c66affSColin Finck }
1215c2c66affSColin Finck
1216c2c66affSColin Finck static BOOL
ParseQuantifier(CompilerState * state)1217c2c66affSColin Finck ParseQuantifier(CompilerState *state)
1218c2c66affSColin Finck {
1219c2c66affSColin Finck RENode *term;
1220c2c66affSColin Finck term = state->result;
1221c2c66affSColin Finck if (state->cp < state->cpend) {
1222c2c66affSColin Finck switch (*state->cp) {
1223c2c66affSColin Finck case '+':
1224c2c66affSColin Finck state->result = NewRENode(state, REOP_QUANT);
1225c2c66affSColin Finck if (!state->result)
1226c2c66affSColin Finck return FALSE;
1227c2c66affSColin Finck state->result->u.range.min = 1;
1228c2c66affSColin Finck state->result->u.range.max = (UINT)-1;
1229c2c66affSColin Finck /* <PLUS>, <next> ... <ENDCHILD> */
1230c2c66affSColin Finck state->progLength += 4;
1231c2c66affSColin Finck goto quantifier;
1232c2c66affSColin Finck case '*':
1233c2c66affSColin Finck state->result = NewRENode(state, REOP_QUANT);
1234c2c66affSColin Finck if (!state->result)
1235c2c66affSColin Finck return FALSE;
1236c2c66affSColin Finck state->result->u.range.min = 0;
1237c2c66affSColin Finck state->result->u.range.max = (UINT)-1;
1238c2c66affSColin Finck /* <STAR>, <next> ... <ENDCHILD> */
1239c2c66affSColin Finck state->progLength += 4;
1240c2c66affSColin Finck goto quantifier;
1241c2c66affSColin Finck case '?':
1242c2c66affSColin Finck state->result = NewRENode(state, REOP_QUANT);
1243c2c66affSColin Finck if (!state->result)
1244c2c66affSColin Finck return FALSE;
1245c2c66affSColin Finck state->result->u.range.min = 0;
1246c2c66affSColin Finck state->result->u.range.max = 1;
1247c2c66affSColin Finck /* <OPT>, <next> ... <ENDCHILD> */
1248c2c66affSColin Finck state->progLength += 4;
1249c2c66affSColin Finck goto quantifier;
1250c2c66affSColin Finck case '{': /* balance '}' */
1251c2c66affSColin Finck {
1252c2c66affSColin Finck INT err;
1253c2c66affSColin Finck
1254c2c66affSColin Finck err = ParseMinMaxQuantifier(state, FALSE);
1255c2c66affSColin Finck if (err == 0)
1256c2c66affSColin Finck goto quantifier;
1257c2c66affSColin Finck if (err == -1)
1258c2c66affSColin Finck return TRUE;
1259c2c66affSColin Finck
1260c2c66affSColin Finck ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1261c2c66affSColin Finck return FALSE;
1262c2c66affSColin Finck }
1263c2c66affSColin Finck default:;
1264c2c66affSColin Finck }
1265c2c66affSColin Finck }
1266c2c66affSColin Finck return TRUE;
1267c2c66affSColin Finck
1268c2c66affSColin Finck quantifier:
1269c2c66affSColin Finck if (state->treeDepth == TREE_DEPTH_MAX) {
1270c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1271c2c66affSColin Finck return FALSE;
1272c2c66affSColin Finck }
1273c2c66affSColin Finck
1274c2c66affSColin Finck ++state->treeDepth;
1275c2c66affSColin Finck ++state->cp;
1276c2c66affSColin Finck state->result->kid = term;
1277c2c66affSColin Finck if (state->cp < state->cpend && *state->cp == '?') {
1278c2c66affSColin Finck ++state->cp;
1279c2c66affSColin Finck state->result->u.range.greedy = FALSE;
1280c2c66affSColin Finck } else {
1281c2c66affSColin Finck state->result->u.range.greedy = TRUE;
1282c2c66affSColin Finck }
1283c2c66affSColin Finck return TRUE;
1284c2c66affSColin Finck }
1285c2c66affSColin Finck
1286c2c66affSColin Finck /*
1287c2c66affSColin Finck * item: assertion An item is either an assertion or
1288c2c66affSColin Finck * quantatom a quantified atom.
1289c2c66affSColin Finck *
1290c2c66affSColin Finck * assertion: '^' Assertions match beginning of string
1291c2c66affSColin Finck * (or line if the class static property
1292c2c66affSColin Finck * RegExp.multiline is true).
1293c2c66affSColin Finck * '$' End of string (or line if the class
1294c2c66affSColin Finck * static property RegExp.multiline is
1295c2c66affSColin Finck * true).
1296c2c66affSColin Finck * '\b' Word boundary (between \w and \W).
1297c2c66affSColin Finck * '\B' Word non-boundary.
1298c2c66affSColin Finck *
1299c2c66affSColin Finck * quantatom: atom An unquantified atom.
1300c2c66affSColin Finck * quantatom '{' n ',' m '}'
1301c2c66affSColin Finck * Atom must occur between n and m times.
1302c2c66affSColin Finck * quantatom '{' n ',' '}' Atom must occur at least n times.
1303c2c66affSColin Finck * quantatom '{' n '}' Atom must occur exactly n times.
1304c2c66affSColin Finck * quantatom '*' Zero or more times (same as {0,}).
1305c2c66affSColin Finck * quantatom '+' One or more times (same as {1,}).
1306c2c66affSColin Finck * quantatom '?' Zero or one time (same as {0,1}).
1307c2c66affSColin Finck *
1308c2c66affSColin Finck * any of which can be optionally followed by '?' for ungreedy
1309c2c66affSColin Finck *
1310c2c66affSColin Finck * atom: '(' regexp ')' A parenthesized regexp (what matched
1311c2c66affSColin Finck * can be addressed using a backreference,
1312c2c66affSColin Finck * see '\' n below).
1313c2c66affSColin Finck * '.' Matches any char except '\n'.
1314c2c66affSColin Finck * '[' classlist ']' A character class.
1315c2c66affSColin Finck * '[' '^' classlist ']' A negated character class.
1316c2c66affSColin Finck * '\f' Form Feed.
1317c2c66affSColin Finck * '\n' Newline (Line Feed).
1318c2c66affSColin Finck * '\r' Carriage Return.
1319c2c66affSColin Finck * '\t' Horizontal Tab.
1320c2c66affSColin Finck * '\v' Vertical Tab.
1321c2c66affSColin Finck * '\d' A digit (same as [0-9]).
1322c2c66affSColin Finck * '\D' A non-digit.
1323c2c66affSColin Finck * '\w' A word character, [0-9a-z_A-Z].
1324c2c66affSColin Finck * '\W' A non-word character.
1325c2c66affSColin Finck * '\s' A whitespace character, [ \b\f\n\r\t\v].
1326c2c66affSColin Finck * '\S' A non-whitespace character.
1327c2c66affSColin Finck * '\' n A backreference to the nth (n decimal
1328c2c66affSColin Finck * and positive) parenthesized expression.
1329c2c66affSColin Finck * '\' octal An octal escape sequence (octal must be
1330c2c66affSColin Finck * two or three digits long, unless it is
1331c2c66affSColin Finck * 0 for the null character).
1332c2c66affSColin Finck * '\x' hex A hex escape (hex must be two digits).
1333c2c66affSColin Finck * '\u' unicode A unicode escape (must be four digits).
1334c2c66affSColin Finck * '\c' ctrl A control character, ctrl is a letter.
1335c2c66affSColin Finck * '\' literalatomchar Any character except one of the above
1336c2c66affSColin Finck * that follow '\' in an atom.
1337c2c66affSColin Finck * otheratomchar Any character not first among the other
1338c2c66affSColin Finck * atom right-hand sides.
1339c2c66affSColin Finck */
1340c2c66affSColin Finck static BOOL
ParseTerm(CompilerState * state)1341c2c66affSColin Finck ParseTerm(CompilerState *state)
1342c2c66affSColin Finck {
1343c2c66affSColin Finck WCHAR c = *state->cp++;
1344c2c66affSColin Finck UINT nDigits;
1345c2c66affSColin Finck UINT num, tmp, n, i;
1346c2c66affSColin Finck const WCHAR *termStart;
1347c2c66affSColin Finck
1348c2c66affSColin Finck switch (c) {
1349c2c66affSColin Finck /* assertions and atoms */
1350c2c66affSColin Finck case '^':
1351c2c66affSColin Finck state->result = NewRENode(state, REOP_BOL);
1352c2c66affSColin Finck if (!state->result)
1353c2c66affSColin Finck return FALSE;
1354c2c66affSColin Finck state->progLength++;
1355c2c66affSColin Finck return TRUE;
1356c2c66affSColin Finck case '$':
1357c2c66affSColin Finck state->result = NewRENode(state, REOP_EOL);
1358c2c66affSColin Finck if (!state->result)
1359c2c66affSColin Finck return FALSE;
1360c2c66affSColin Finck state->progLength++;
1361c2c66affSColin Finck return TRUE;
1362c2c66affSColin Finck case '\\':
1363c2c66affSColin Finck if (state->cp >= state->cpend) {
1364c2c66affSColin Finck /* a trailing '\' is an error */
1365c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1366c2c66affSColin Finck return FALSE;
1367c2c66affSColin Finck }
1368c2c66affSColin Finck c = *state->cp++;
1369c2c66affSColin Finck switch (c) {
1370c2c66affSColin Finck /* assertion escapes */
1371c2c66affSColin Finck case 'b' :
1372c2c66affSColin Finck state->result = NewRENode(state, REOP_WBDRY);
1373c2c66affSColin Finck if (!state->result)
1374c2c66affSColin Finck return FALSE;
1375c2c66affSColin Finck state->progLength++;
1376c2c66affSColin Finck return TRUE;
1377c2c66affSColin Finck case 'B':
1378c2c66affSColin Finck state->result = NewRENode(state, REOP_WNONBDRY);
1379c2c66affSColin Finck if (!state->result)
1380c2c66affSColin Finck return FALSE;
1381c2c66affSColin Finck state->progLength++;
1382c2c66affSColin Finck return TRUE;
1383c2c66affSColin Finck /* Decimal escape */
1384c2c66affSColin Finck case '0':
1385c2c66affSColin Finck /* Give a strict warning. See also the note below. */
1386c2c66affSColin Finck WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1387c2c66affSColin Finck doOctal:
1388c2c66affSColin Finck num = 0;
1389c2c66affSColin Finck while (state->cp < state->cpend) {
1390c2c66affSColin Finck c = *state->cp;
1391c2c66affSColin Finck if (c < '0' || '7' < c)
1392c2c66affSColin Finck break;
1393c2c66affSColin Finck state->cp++;
1394c2c66affSColin Finck tmp = 8 * num + (UINT)JS7_UNDEC(c);
1395c2c66affSColin Finck if (tmp > 0377)
1396c2c66affSColin Finck break;
1397c2c66affSColin Finck num = tmp;
1398c2c66affSColin Finck }
1399c2c66affSColin Finck c = (WCHAR)num;
1400c2c66affSColin Finck doFlat:
1401c2c66affSColin Finck state->result = NewRENode(state, REOP_FLAT);
1402c2c66affSColin Finck if (!state->result)
1403c2c66affSColin Finck return FALSE;
1404c2c66affSColin Finck state->result->u.flat.chr = c;
1405c2c66affSColin Finck state->result->u.flat.length = 1;
1406c2c66affSColin Finck state->progLength += 3;
1407c2c66affSColin Finck break;
1408c2c66affSColin Finck case '1':
1409c2c66affSColin Finck case '2':
1410c2c66affSColin Finck case '3':
1411c2c66affSColin Finck case '4':
1412c2c66affSColin Finck case '5':
1413c2c66affSColin Finck case '6':
1414c2c66affSColin Finck case '7':
1415c2c66affSColin Finck case '8':
1416c2c66affSColin Finck case '9':
1417c2c66affSColin Finck termStart = state->cp - 1;
1418c2c66affSColin Finck num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1419c2c66affSColin Finck if (state->flags & JSREG_FIND_PAREN_ERROR)
1420c2c66affSColin Finck return FALSE;
1421c2c66affSColin Finck if (num == OVERFLOW_VALUE) {
1422c2c66affSColin Finck /* Give a strict mode warning. */
1423c2c66affSColin Finck WARN("back-reference exceeds number of capturing parentheses\n");
1424c2c66affSColin Finck
1425c2c66affSColin Finck /*
1426c2c66affSColin Finck * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1427c2c66affSColin Finck * error here. However, for compatibility with IE, we treat the
1428c2c66affSColin Finck * whole backref as flat if the first character in it is not a
1429c2c66affSColin Finck * valid octal character, and as an octal escape otherwise.
1430c2c66affSColin Finck */
1431c2c66affSColin Finck state->cp = termStart;
1432c2c66affSColin Finck if (c >= '8') {
1433c2c66affSColin Finck /* Treat this as flat. termStart - 1 is the \. */
1434c2c66affSColin Finck c = '\\';
1435c2c66affSColin Finck goto asFlat;
1436c2c66affSColin Finck }
1437c2c66affSColin Finck
1438c2c66affSColin Finck /* Treat this as an octal escape. */
1439c2c66affSColin Finck goto doOctal;
1440c2c66affSColin Finck }
1441c2c66affSColin Finck assert(1 <= num && num <= 0x10000);
1442c2c66affSColin Finck state->result = NewRENode(state, REOP_BACKREF);
1443c2c66affSColin Finck if (!state->result)
1444c2c66affSColin Finck return FALSE;
1445c2c66affSColin Finck state->result->u.parenIndex = num - 1;
1446c2c66affSColin Finck state->progLength
1447c2c66affSColin Finck += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1448c2c66affSColin Finck break;
1449c2c66affSColin Finck /* Control escape */
1450c2c66affSColin Finck case 'f':
1451c2c66affSColin Finck c = 0xC;
1452c2c66affSColin Finck goto doFlat;
1453c2c66affSColin Finck case 'n':
1454c2c66affSColin Finck c = 0xA;
1455c2c66affSColin Finck goto doFlat;
1456c2c66affSColin Finck case 'r':
1457c2c66affSColin Finck c = 0xD;
1458c2c66affSColin Finck goto doFlat;
1459c2c66affSColin Finck case 't':
1460c2c66affSColin Finck c = 0x9;
1461c2c66affSColin Finck goto doFlat;
1462c2c66affSColin Finck case 'v':
1463c2c66affSColin Finck c = 0xB;
1464c2c66affSColin Finck goto doFlat;
1465c2c66affSColin Finck /* Control letter */
1466c2c66affSColin Finck case 'c':
1467c2c66affSColin Finck if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1468c2c66affSColin Finck c = (WCHAR) (*state->cp++ & 0x1F);
1469c2c66affSColin Finck } else {
1470c2c66affSColin Finck /* back off to accepting the original '\' as a literal */
1471c2c66affSColin Finck --state->cp;
1472c2c66affSColin Finck c = '\\';
1473c2c66affSColin Finck }
1474c2c66affSColin Finck goto doFlat;
1475c2c66affSColin Finck /* HexEscapeSequence */
1476c2c66affSColin Finck case 'x':
1477c2c66affSColin Finck nDigits = 2;
1478c2c66affSColin Finck goto lexHex;
1479c2c66affSColin Finck /* UnicodeEscapeSequence */
1480c2c66affSColin Finck case 'u':
1481c2c66affSColin Finck nDigits = 4;
1482c2c66affSColin Finck lexHex:
1483c2c66affSColin Finck n = 0;
1484c2c66affSColin Finck for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1485c2c66affSColin Finck UINT digit;
1486c2c66affSColin Finck c = *state->cp++;
1487c2c66affSColin Finck if (!isASCIIHexDigit(c, &digit)) {
1488c2c66affSColin Finck /*
1489c2c66affSColin Finck * Back off to accepting the original 'u' or 'x' as a
1490c2c66affSColin Finck * literal.
1491c2c66affSColin Finck */
1492c2c66affSColin Finck state->cp -= i + 2;
1493c2c66affSColin Finck n = *state->cp++;
1494c2c66affSColin Finck break;
1495c2c66affSColin Finck }
1496c2c66affSColin Finck n = (n << 4) | digit;
1497c2c66affSColin Finck }
1498c2c66affSColin Finck c = (WCHAR) n;
1499c2c66affSColin Finck goto doFlat;
1500c2c66affSColin Finck /* Character class escapes */
1501c2c66affSColin Finck case 'd':
1502c2c66affSColin Finck state->result = NewRENode(state, REOP_DIGIT);
1503c2c66affSColin Finck doSimple:
1504c2c66affSColin Finck if (!state->result)
1505c2c66affSColin Finck return FALSE;
1506c2c66affSColin Finck state->progLength++;
1507c2c66affSColin Finck break;
1508c2c66affSColin Finck case 'D':
1509c2c66affSColin Finck state->result = NewRENode(state, REOP_NONDIGIT);
1510c2c66affSColin Finck goto doSimple;
1511c2c66affSColin Finck case 's':
1512c2c66affSColin Finck state->result = NewRENode(state, REOP_SPACE);
1513c2c66affSColin Finck goto doSimple;
1514c2c66affSColin Finck case 'S':
1515c2c66affSColin Finck state->result = NewRENode(state, REOP_NONSPACE);
1516c2c66affSColin Finck goto doSimple;
1517c2c66affSColin Finck case 'w':
1518c2c66affSColin Finck state->result = NewRENode(state, REOP_ALNUM);
1519c2c66affSColin Finck goto doSimple;
1520c2c66affSColin Finck case 'W':
1521c2c66affSColin Finck state->result = NewRENode(state, REOP_NONALNUM);
1522c2c66affSColin Finck goto doSimple;
1523c2c66affSColin Finck /* IdentityEscape */
1524c2c66affSColin Finck default:
1525c2c66affSColin Finck state->result = NewRENode(state, REOP_FLAT);
1526c2c66affSColin Finck if (!state->result)
1527c2c66affSColin Finck return FALSE;
1528c2c66affSColin Finck state->result->u.flat.chr = c;
1529c2c66affSColin Finck state->result->u.flat.length = 1;
1530c2c66affSColin Finck state->result->kid = (void *) (state->cp - 1);
1531c2c66affSColin Finck state->progLength += 3;
1532c2c66affSColin Finck break;
1533c2c66affSColin Finck }
1534c2c66affSColin Finck break;
1535c2c66affSColin Finck case '[':
1536c2c66affSColin Finck state->result = NewRENode(state, REOP_CLASS);
1537c2c66affSColin Finck if (!state->result)
1538c2c66affSColin Finck return FALSE;
1539c2c66affSColin Finck termStart = state->cp;
1540c2c66affSColin Finck state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1541c2c66affSColin Finck for (;;) {
1542c2c66affSColin Finck if (state->cp == state->cpend) {
1543c2c66affSColin Finck ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1544c2c66affSColin Finck JSMSG_UNTERM_CLASS, termStart);
1545c2c66affSColin Finck
1546c2c66affSColin Finck return FALSE;
1547c2c66affSColin Finck }
1548c2c66affSColin Finck if (*state->cp == '\\') {
1549c2c66affSColin Finck state->cp++;
1550c2c66affSColin Finck if (state->cp != state->cpend)
1551c2c66affSColin Finck state->cp++;
1552c2c66affSColin Finck continue;
1553c2c66affSColin Finck }
1554c2c66affSColin Finck if (*state->cp == ']') {
1555c2c66affSColin Finck state->result->u.ucclass.kidlen = state->cp - termStart;
1556c2c66affSColin Finck break;
1557c2c66affSColin Finck }
1558c2c66affSColin Finck state->cp++;
1559c2c66affSColin Finck }
1560c2c66affSColin Finck for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1561c2c66affSColin Finck if (!state->classCache[i].start) {
1562c2c66affSColin Finck state->classCache[i].start = termStart;
1563c2c66affSColin Finck state->classCache[i].length = state->result->u.ucclass.kidlen;
1564c2c66affSColin Finck state->classCache[i].index = state->classCount;
1565c2c66affSColin Finck break;
1566c2c66affSColin Finck }
1567c2c66affSColin Finck if (state->classCache[i].length ==
1568c2c66affSColin Finck state->result->u.ucclass.kidlen) {
1569c2c66affSColin Finck for (n = 0; ; n++) {
1570c2c66affSColin Finck if (n == state->classCache[i].length) {
1571c2c66affSColin Finck state->result->u.ucclass.index
1572c2c66affSColin Finck = state->classCache[i].index;
1573c2c66affSColin Finck goto claim;
1574c2c66affSColin Finck }
1575c2c66affSColin Finck if (state->classCache[i].start[n] != termStart[n])
1576c2c66affSColin Finck break;
1577c2c66affSColin Finck }
1578c2c66affSColin Finck }
1579c2c66affSColin Finck }
1580c2c66affSColin Finck state->result->u.ucclass.index = state->classCount++;
1581c2c66affSColin Finck
1582c2c66affSColin Finck claim:
1583c2c66affSColin Finck /*
1584c2c66affSColin Finck * Call CalculateBitmapSize now as we want any errors it finds
1585c2c66affSColin Finck * to be reported during the parse phase, not at execution.
1586c2c66affSColin Finck */
1587c2c66affSColin Finck if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1588c2c66affSColin Finck return FALSE;
1589c2c66affSColin Finck /*
1590c2c66affSColin Finck * Update classBitmapsMem with number of bytes to hold bmsize bits,
1591c2c66affSColin Finck * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1592c2c66affSColin Finck * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1593c2c66affSColin Finck */
1594c2c66affSColin Finck n = (state->result->u.ucclass.bmsize >> 3) + 1;
1595c2c66affSColin Finck if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1596c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1597c2c66affSColin Finck return FALSE;
1598c2c66affSColin Finck }
1599c2c66affSColin Finck state->classBitmapsMem += n;
1600c2c66affSColin Finck /* CLASS, <index> */
1601c2c66affSColin Finck state->progLength
1602c2c66affSColin Finck += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1603c2c66affSColin Finck break;
1604c2c66affSColin Finck
1605c2c66affSColin Finck case '.':
1606c2c66affSColin Finck state->result = NewRENode(state, REOP_DOT);
1607c2c66affSColin Finck goto doSimple;
1608c2c66affSColin Finck
1609c2c66affSColin Finck case '{':
1610c2c66affSColin Finck {
1611c2c66affSColin Finck const WCHAR *errp = state->cp--;
1612c2c66affSColin Finck INT err;
1613c2c66affSColin Finck
1614c2c66affSColin Finck err = ParseMinMaxQuantifier(state, TRUE);
1615c2c66affSColin Finck state->cp = errp;
1616c2c66affSColin Finck
1617c2c66affSColin Finck if (err < 0)
1618c2c66affSColin Finck goto asFlat;
1619c2c66affSColin Finck
1620c2c66affSColin Finck /* FALL THROUGH */
1621c2c66affSColin Finck }
1622c2c66affSColin Finck case '*':
1623c2c66affSColin Finck case '+':
1624c2c66affSColin Finck case '?':
1625c2c66affSColin Finck ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1626c2c66affSColin Finck JSMSG_BAD_QUANTIFIER, state->cp - 1);
1627c2c66affSColin Finck return FALSE;
1628c2c66affSColin Finck default:
1629c2c66affSColin Finck asFlat:
1630c2c66affSColin Finck state->result = NewRENode(state, REOP_FLAT);
1631c2c66affSColin Finck if (!state->result)
1632c2c66affSColin Finck return FALSE;
1633c2c66affSColin Finck state->result->u.flat.chr = c;
1634c2c66affSColin Finck state->result->u.flat.length = 1;
1635c2c66affSColin Finck state->result->kid = (void *) (state->cp - 1);
1636c2c66affSColin Finck state->progLength += 3;
1637c2c66affSColin Finck break;
1638c2c66affSColin Finck }
1639c2c66affSColin Finck return ParseQuantifier(state);
1640c2c66affSColin Finck }
1641c2c66affSColin Finck
1642c2c66affSColin Finck /*
1643c2c66affSColin Finck * Top-down regular expression grammar, based closely on Perl4.
1644c2c66affSColin Finck *
1645c2c66affSColin Finck * regexp: altern A regular expression is one or more
1646c2c66affSColin Finck * altern '|' regexp alternatives separated by vertical bar.
1647c2c66affSColin Finck */
1648c2c66affSColin Finck #define INITIAL_STACK_SIZE 128
1649c2c66affSColin Finck
1650c2c66affSColin Finck static BOOL
ParseRegExp(CompilerState * state)1651c2c66affSColin Finck ParseRegExp(CompilerState *state)
1652c2c66affSColin Finck {
1653c2c66affSColin Finck size_t parenIndex;
1654c2c66affSColin Finck RENode *operand;
1655c2c66affSColin Finck REOpData *operatorStack;
1656c2c66affSColin Finck RENode **operandStack;
1657c2c66affSColin Finck REOp op;
1658c2c66affSColin Finck INT i;
1659c2c66affSColin Finck BOOL result = FALSE;
1660c2c66affSColin Finck
1661c2c66affSColin Finck INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1662c2c66affSColin Finck INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1663c2c66affSColin Finck
1664c2c66affSColin Finck /* Watch out for empty regexp */
1665c2c66affSColin Finck if (state->cp == state->cpend) {
1666c2c66affSColin Finck state->result = NewRENode(state, REOP_EMPTY);
1667c2c66affSColin Finck return (state->result != NULL);
1668c2c66affSColin Finck }
1669c2c66affSColin Finck
1670c2c66affSColin Finck operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1671c2c66affSColin Finck if (!operatorStack)
1672c2c66affSColin Finck return FALSE;
1673c2c66affSColin Finck
1674c2c66affSColin Finck operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1675c2c66affSColin Finck if (!operandStack)
1676c2c66affSColin Finck goto out;
1677c2c66affSColin Finck
1678c2c66affSColin Finck for (;;) {
1679c2c66affSColin Finck parenIndex = state->parenCount;
1680c2c66affSColin Finck if (state->cp == state->cpend) {
1681c2c66affSColin Finck /*
1682c2c66affSColin Finck * If we are at the end of the regexp and we're short one or more
1683c2c66affSColin Finck * operands, the regexp must have the form /x|/ or some such, with
1684c2c66affSColin Finck * left parentheses making us short more than one operand.
1685c2c66affSColin Finck */
1686c2c66affSColin Finck if (operatorSP >= operandSP) {
1687c2c66affSColin Finck operand = NewRENode(state, REOP_EMPTY);
1688c2c66affSColin Finck if (!operand)
1689c2c66affSColin Finck goto out;
1690c2c66affSColin Finck goto pushOperand;
1691c2c66affSColin Finck }
1692c2c66affSColin Finck } else {
1693c2c66affSColin Finck switch (*state->cp) {
1694c2c66affSColin Finck case '(':
1695c2c66affSColin Finck ++state->cp;
1696c2c66affSColin Finck if (state->cp + 1 < state->cpend &&
1697c2c66affSColin Finck *state->cp == '?' &&
1698c2c66affSColin Finck (state->cp[1] == '=' ||
1699c2c66affSColin Finck state->cp[1] == '!' ||
1700c2c66affSColin Finck state->cp[1] == ':')) {
1701c2c66affSColin Finck switch (state->cp[1]) {
1702c2c66affSColin Finck case '=':
1703c2c66affSColin Finck op = REOP_ASSERT;
1704c2c66affSColin Finck /* ASSERT, <next>, ... ASSERTTEST */
1705c2c66affSColin Finck state->progLength += 4;
1706c2c66affSColin Finck break;
1707c2c66affSColin Finck case '!':
1708c2c66affSColin Finck op = REOP_ASSERT_NOT;
1709c2c66affSColin Finck /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1710c2c66affSColin Finck state->progLength += 4;
1711c2c66affSColin Finck break;
1712c2c66affSColin Finck default:
1713c2c66affSColin Finck op = REOP_LPARENNON;
1714c2c66affSColin Finck break;
1715c2c66affSColin Finck }
1716c2c66affSColin Finck state->cp += 2;
1717c2c66affSColin Finck } else {
1718c2c66affSColin Finck op = REOP_LPAREN;
1719c2c66affSColin Finck /* LPAREN, <index>, ... RPAREN, <index> */
1720c2c66affSColin Finck state->progLength
1721c2c66affSColin Finck += 2 * (1 + GetCompactIndexWidth(parenIndex));
1722c2c66affSColin Finck state->parenCount++;
1723c2c66affSColin Finck if (state->parenCount == 65535) {
1724c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR,
1725c2c66affSColin Finck JSMSG_TOO_MANY_PARENS);
1726c2c66affSColin Finck goto out;
1727c2c66affSColin Finck }
1728c2c66affSColin Finck }
1729c2c66affSColin Finck goto pushOperator;
1730c2c66affSColin Finck
1731c2c66affSColin Finck case ')':
1732c2c66affSColin Finck /*
1733c2c66affSColin Finck * If there's no stacked open parenthesis, throw syntax error.
1734c2c66affSColin Finck */
1735c2c66affSColin Finck for (i = operatorSP - 1; ; i--) {
1736c2c66affSColin Finck if (i < 0) {
1737c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR,
1738c2c66affSColin Finck JSMSG_UNMATCHED_RIGHT_PAREN);
1739c2c66affSColin Finck goto out;
1740c2c66affSColin Finck }
1741c2c66affSColin Finck if (operatorStack[i].op == REOP_ASSERT ||
1742c2c66affSColin Finck operatorStack[i].op == REOP_ASSERT_NOT ||
1743c2c66affSColin Finck operatorStack[i].op == REOP_LPARENNON ||
1744c2c66affSColin Finck operatorStack[i].op == REOP_LPAREN) {
1745c2c66affSColin Finck break;
1746c2c66affSColin Finck }
1747c2c66affSColin Finck }
1748c2c66affSColin Finck /* FALL THROUGH */
1749c2c66affSColin Finck
1750c2c66affSColin Finck case '|':
1751c2c66affSColin Finck /* Expected an operand before these, so make an empty one */
1752c2c66affSColin Finck operand = NewRENode(state, REOP_EMPTY);
1753c2c66affSColin Finck if (!operand)
1754c2c66affSColin Finck goto out;
1755c2c66affSColin Finck goto pushOperand;
1756c2c66affSColin Finck
1757c2c66affSColin Finck default:
1758c2c66affSColin Finck if (!ParseTerm(state))
1759c2c66affSColin Finck goto out;
1760c2c66affSColin Finck operand = state->result;
1761c2c66affSColin Finck pushOperand:
1762c2c66affSColin Finck if (operandSP == operandStackSize) {
1763c2c66affSColin Finck RENode **tmp;
1764c2c66affSColin Finck operandStackSize += operandStackSize;
1765c2c66affSColin Finck tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1766c2c66affSColin Finck if (!tmp)
1767c2c66affSColin Finck goto out;
1768c2c66affSColin Finck operandStack = tmp;
1769c2c66affSColin Finck }
1770c2c66affSColin Finck operandStack[operandSP++] = operand;
1771c2c66affSColin Finck break;
1772c2c66affSColin Finck }
1773c2c66affSColin Finck }
1774c2c66affSColin Finck
1775c2c66affSColin Finck /* At the end; process remaining operators. */
1776c2c66affSColin Finck restartOperator:
1777c2c66affSColin Finck if (state->cp == state->cpend) {
1778c2c66affSColin Finck while (operatorSP) {
1779c2c66affSColin Finck --operatorSP;
1780c2c66affSColin Finck if (!ProcessOp(state, &operatorStack[operatorSP],
1781c2c66affSColin Finck operandStack, operandSP))
1782c2c66affSColin Finck goto out;
1783c2c66affSColin Finck --operandSP;
1784c2c66affSColin Finck }
1785c2c66affSColin Finck assert(operandSP == 1);
1786c2c66affSColin Finck state->result = operandStack[0];
1787c2c66affSColin Finck result = TRUE;
1788c2c66affSColin Finck goto out;
1789c2c66affSColin Finck }
1790c2c66affSColin Finck
1791c2c66affSColin Finck switch (*state->cp) {
1792c2c66affSColin Finck case '|':
1793c2c66affSColin Finck /* Process any stacked 'concat' operators */
1794c2c66affSColin Finck ++state->cp;
1795c2c66affSColin Finck while (operatorSP &&
1796c2c66affSColin Finck operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1797c2c66affSColin Finck --operatorSP;
1798c2c66affSColin Finck if (!ProcessOp(state, &operatorStack[operatorSP],
1799c2c66affSColin Finck operandStack, operandSP)) {
1800c2c66affSColin Finck goto out;
1801c2c66affSColin Finck }
1802c2c66affSColin Finck --operandSP;
1803c2c66affSColin Finck }
1804c2c66affSColin Finck op = REOP_ALT;
1805c2c66affSColin Finck goto pushOperator;
1806c2c66affSColin Finck
1807c2c66affSColin Finck case ')':
1808c2c66affSColin Finck /*
1809c2c66affSColin Finck * If there's no stacked open parenthesis, throw syntax error.
1810c2c66affSColin Finck */
1811c2c66affSColin Finck for (i = operatorSP - 1; ; i--) {
1812c2c66affSColin Finck if (i < 0) {
1813c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR,
1814c2c66affSColin Finck JSMSG_UNMATCHED_RIGHT_PAREN);
1815c2c66affSColin Finck goto out;
1816c2c66affSColin Finck }
1817c2c66affSColin Finck if (operatorStack[i].op == REOP_ASSERT ||
1818c2c66affSColin Finck operatorStack[i].op == REOP_ASSERT_NOT ||
1819c2c66affSColin Finck operatorStack[i].op == REOP_LPARENNON ||
1820c2c66affSColin Finck operatorStack[i].op == REOP_LPAREN) {
1821c2c66affSColin Finck break;
1822c2c66affSColin Finck }
1823c2c66affSColin Finck }
1824c2c66affSColin Finck ++state->cp;
1825c2c66affSColin Finck
1826c2c66affSColin Finck /* Process everything on the stack until the open parenthesis. */
1827c2c66affSColin Finck for (;;) {
1828c2c66affSColin Finck assert(operatorSP);
1829c2c66affSColin Finck --operatorSP;
1830c2c66affSColin Finck switch (operatorStack[operatorSP].op) {
1831c2c66affSColin Finck case REOP_ASSERT:
1832c2c66affSColin Finck case REOP_ASSERT_NOT:
1833c2c66affSColin Finck case REOP_LPAREN:
1834c2c66affSColin Finck operand = NewRENode(state, operatorStack[operatorSP].op);
1835c2c66affSColin Finck if (!operand)
1836c2c66affSColin Finck goto out;
1837c2c66affSColin Finck operand->u.parenIndex =
1838c2c66affSColin Finck operatorStack[operatorSP].parenIndex;
1839c2c66affSColin Finck assert(operandSP);
1840c2c66affSColin Finck operand->kid = operandStack[operandSP - 1];
1841c2c66affSColin Finck operandStack[operandSP - 1] = operand;
1842c2c66affSColin Finck if (state->treeDepth == TREE_DEPTH_MAX) {
1843c2c66affSColin Finck ReportRegExpError(state, JSREPORT_ERROR,
1844c2c66affSColin Finck JSMSG_REGEXP_TOO_COMPLEX);
1845c2c66affSColin Finck goto out;
1846c2c66affSColin Finck }
1847c2c66affSColin Finck ++state->treeDepth;
1848c2c66affSColin Finck /* FALL THROUGH */
1849c2c66affSColin Finck
1850c2c66affSColin Finck case REOP_LPARENNON:
1851c2c66affSColin Finck state->result = operandStack[operandSP - 1];
1852c2c66affSColin Finck if (!ParseQuantifier(state))
1853c2c66affSColin Finck goto out;
1854c2c66affSColin Finck operandStack[operandSP - 1] = state->result;
1855c2c66affSColin Finck goto restartOperator;
1856c2c66affSColin Finck default:
1857c2c66affSColin Finck if (!ProcessOp(state, &operatorStack[operatorSP],
1858c2c66affSColin Finck operandStack, operandSP))
1859c2c66affSColin Finck goto out;
1860c2c66affSColin Finck --operandSP;
1861c2c66affSColin Finck break;
1862c2c66affSColin Finck }
1863c2c66affSColin Finck }
1864c2c66affSColin Finck break;
1865c2c66affSColin Finck
1866c2c66affSColin Finck case '{':
1867c2c66affSColin Finck {
1868c2c66affSColin Finck const WCHAR *errp = state->cp;
1869c2c66affSColin Finck
1870c2c66affSColin Finck if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1871c2c66affSColin Finck /*
1872c2c66affSColin Finck * This didn't even scan correctly as a quantifier, so we should
1873c2c66affSColin Finck * treat it as flat.
1874c2c66affSColin Finck */
1875c2c66affSColin Finck op = REOP_CONCAT;
1876c2c66affSColin Finck goto pushOperator;
1877c2c66affSColin Finck }
1878c2c66affSColin Finck
1879c2c66affSColin Finck state->cp = errp;
1880c2c66affSColin Finck /* FALL THROUGH */
1881c2c66affSColin Finck }
1882c2c66affSColin Finck
1883c2c66affSColin Finck case '+':
1884c2c66affSColin Finck case '*':
1885c2c66affSColin Finck case '?':
1886c2c66affSColin Finck ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1887c2c66affSColin Finck state->cp);
1888c2c66affSColin Finck result = FALSE;
1889c2c66affSColin Finck goto out;
1890c2c66affSColin Finck
1891c2c66affSColin Finck default:
1892c2c66affSColin Finck /* Anything else is the start of the next term. */
1893c2c66affSColin Finck op = REOP_CONCAT;
1894c2c66affSColin Finck pushOperator:
1895c2c66affSColin Finck if (operatorSP == operatorStackSize) {
1896c2c66affSColin Finck REOpData *tmp;
1897c2c66affSColin Finck operatorStackSize += operatorStackSize;
1898c2c66affSColin Finck tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1899c2c66affSColin Finck if (!tmp)
1900c2c66affSColin Finck goto out;
1901c2c66affSColin Finck operatorStack = tmp;
1902c2c66affSColin Finck }
1903c2c66affSColin Finck operatorStack[operatorSP].op = op;
1904c2c66affSColin Finck operatorStack[operatorSP].errPos = state->cp;
1905c2c66affSColin Finck operatorStack[operatorSP++].parenIndex = parenIndex;
1906c2c66affSColin Finck break;
1907c2c66affSColin Finck }
1908c2c66affSColin Finck }
1909c2c66affSColin Finck out:
1910c2c66affSColin Finck heap_free(operatorStack);
1911c2c66affSColin Finck heap_free(operandStack);
1912c2c66affSColin Finck return result;
1913c2c66affSColin Finck }
1914c2c66affSColin Finck
1915c2c66affSColin Finck /*
1916c2c66affSColin Finck * Save the current state of the match - the position in the input
1917c2c66affSColin Finck * text as well as the position in the bytecode. The state of any
1918c2c66affSColin Finck * parent expressions is also saved (preceding state).
1919c2c66affSColin Finck * Contents of parenCount parentheses from parenIndex are also saved.
1920c2c66affSColin Finck */
1921c2c66affSColin Finck static REBackTrackData *
PushBackTrackState(REGlobalData * gData,REOp op,jsbytecode * target,match_state_t * x,const WCHAR * cp,size_t parenIndex,size_t parenCount)1922c2c66affSColin Finck PushBackTrackState(REGlobalData *gData, REOp op,
1923c2c66affSColin Finck jsbytecode *target, match_state_t *x, const WCHAR *cp,
1924c2c66affSColin Finck size_t parenIndex, size_t parenCount)
1925c2c66affSColin Finck {
1926c2c66affSColin Finck size_t i;
1927c2c66affSColin Finck REBackTrackData *result =
1928c2c66affSColin Finck (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1929c2c66affSColin Finck
1930c2c66affSColin Finck size_t sz = sizeof(REBackTrackData) +
1931c2c66affSColin Finck gData->stateStackTop * sizeof(REProgState) +
1932c2c66affSColin Finck parenCount * sizeof(RECapture);
1933c2c66affSColin Finck
1934c2c66affSColin Finck ptrdiff_t btsize = gData->backTrackStackSize;
1935c2c66affSColin Finck ptrdiff_t btincr = ((char *)result + sz) -
1936c2c66affSColin Finck ((char *)gData->backTrackStack + btsize);
1937c2c66affSColin Finck
1938c2c66affSColin Finck TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1939c2c66affSColin Finck
1940c2c66affSColin Finck JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1941c2c66affSColin Finck if (btincr > 0) {
1942c2c66affSColin Finck ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1943c2c66affSColin Finck
1944c2c66affSColin Finck JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1945c2c66affSColin Finck btincr = ((btincr+btsize-1)/btsize)*btsize;
1946c2c66affSColin Finck gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1947c2c66affSColin Finck if (!gData->backTrackStack) {
1948c2c66affSColin Finck js_ReportOutOfScriptQuota(gData->cx);
1949c2c66affSColin Finck gData->ok = FALSE;
1950c2c66affSColin Finck return NULL;
1951c2c66affSColin Finck }
1952c2c66affSColin Finck gData->backTrackStackSize = btsize + btincr;
1953c2c66affSColin Finck result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1954c2c66affSColin Finck }
1955c2c66affSColin Finck gData->backTrackSP = result;
1956c2c66affSColin Finck result->sz = gData->cursz;
1957c2c66affSColin Finck gData->cursz = sz;
1958c2c66affSColin Finck
1959c2c66affSColin Finck result->backtrack_op = op;
1960c2c66affSColin Finck result->backtrack_pc = target;
1961c2c66affSColin Finck result->cp = cp;
1962c2c66affSColin Finck result->parenCount = parenCount;
1963c2c66affSColin Finck result->parenIndex = parenIndex;
1964c2c66affSColin Finck
1965c2c66affSColin Finck result->saveStateStackTop = gData->stateStackTop;
1966c2c66affSColin Finck assert(gData->stateStackTop);
1967c2c66affSColin Finck memcpy(result + 1, gData->stateStack,
1968c2c66affSColin Finck sizeof(REProgState) * result->saveStateStackTop);
1969c2c66affSColin Finck
1970c2c66affSColin Finck if (parenCount != 0) {
1971c2c66affSColin Finck memcpy((char *)(result + 1) +
1972c2c66affSColin Finck sizeof(REProgState) * result->saveStateStackTop,
1973c2c66affSColin Finck &x->parens[parenIndex],
1974c2c66affSColin Finck sizeof(RECapture) * parenCount);
1975c2c66affSColin Finck for (i = 0; i != parenCount; i++)
1976c2c66affSColin Finck x->parens[parenIndex + i].index = -1;
1977c2c66affSColin Finck }
1978c2c66affSColin Finck
1979c2c66affSColin Finck return result;
1980c2c66affSColin Finck }
1981c2c66affSColin Finck
1982c2c66affSColin Finck static inline match_state_t *
FlatNIMatcher(REGlobalData * gData,match_state_t * x,const WCHAR * matchChars,size_t length)1983c2c66affSColin Finck FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
1984c2c66affSColin Finck size_t length)
1985c2c66affSColin Finck {
1986c2c66affSColin Finck size_t i;
1987c2c66affSColin Finck assert(gData->cpend >= x->cp);
1988c2c66affSColin Finck if (length > (size_t)(gData->cpend - x->cp))
1989c2c66affSColin Finck return NULL;
1990c2c66affSColin Finck for (i = 0; i != length; i++) {
1991*3e2d6582SAmine Khaldi if (towupper(matchChars[i]) != towupper(x->cp[i]))
1992c2c66affSColin Finck return NULL;
1993c2c66affSColin Finck }
1994c2c66affSColin Finck x->cp += length;
1995c2c66affSColin Finck return x;
1996c2c66affSColin Finck }
1997c2c66affSColin Finck
1998c2c66affSColin Finck /*
1999c2c66affSColin Finck * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2000c2c66affSColin Finck * 2. If E is not a character then go to step 6.
2001c2c66affSColin Finck * 3. Let ch be E's character.
2002c2c66affSColin Finck * 4. Let A be a one-element RECharSet containing the character ch.
2003c2c66affSColin Finck * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2004c2c66affSColin Finck * 6. E must be an integer. Let n be that integer.
2005c2c66affSColin Finck * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2006c2c66affSColin Finck * 8. Return an internal Matcher closure that takes two arguments, a State x
2007c2c66affSColin Finck * and a Continuation c, and performs the following:
2008c2c66affSColin Finck * 1. Let cap be x's captures internal array.
2009c2c66affSColin Finck * 2. Let s be cap[n].
2010c2c66affSColin Finck * 3. If s is undefined, then call c(x) and return its result.
2011c2c66affSColin Finck * 4. Let e be x's endIndex.
2012c2c66affSColin Finck * 5. Let len be s's length.
2013c2c66affSColin Finck * 6. Let f be e+len.
2014c2c66affSColin Finck * 7. If f>InputLength, return failure.
2015c2c66affSColin Finck * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2016c2c66affSColin Finck * such that Canonicalize(s[i]) is not the same character as
2017c2c66affSColin Finck * Canonicalize(Input [e+i]), then return failure.
2018c2c66affSColin Finck * 9. Let y be the State (f, cap).
2019c2c66affSColin Finck * 10. Call c(y) and return its result.
2020c2c66affSColin Finck */
2021c2c66affSColin Finck static match_state_t *
BackrefMatcher(REGlobalData * gData,match_state_t * x,size_t parenIndex)2022c2c66affSColin Finck BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
2023c2c66affSColin Finck {
2024c2c66affSColin Finck size_t len, i;
2025c2c66affSColin Finck const WCHAR *parenContent;
2026c2c66affSColin Finck RECapture *cap = &x->parens[parenIndex];
2027c2c66affSColin Finck
2028c2c66affSColin Finck if (cap->index == -1)
2029c2c66affSColin Finck return x;
2030c2c66affSColin Finck
2031c2c66affSColin Finck len = cap->length;
2032c2c66affSColin Finck if (x->cp + len > gData->cpend)
2033c2c66affSColin Finck return NULL;
2034c2c66affSColin Finck
2035c2c66affSColin Finck parenContent = &gData->cpbegin[cap->index];
2036c2c66affSColin Finck if (gData->regexp->flags & REG_FOLD) {
2037c2c66affSColin Finck for (i = 0; i < len; i++) {
2038*3e2d6582SAmine Khaldi if (towupper(parenContent[i]) != towupper(x->cp[i]))
2039c2c66affSColin Finck return NULL;
2040c2c66affSColin Finck }
2041c2c66affSColin Finck } else {
2042c2c66affSColin Finck for (i = 0; i < len; i++) {
2043c2c66affSColin Finck if (parenContent[i] != x->cp[i])
2044c2c66affSColin Finck return NULL;
2045c2c66affSColin Finck }
2046c2c66affSColin Finck }
2047c2c66affSColin Finck x->cp += len;
2048c2c66affSColin Finck return x;
2049c2c66affSColin Finck }
2050c2c66affSColin Finck
2051c2c66affSColin Finck /* Add a single character to the RECharSet */
2052c2c66affSColin Finck static void
AddCharacterToCharSet(RECharSet * cs,WCHAR c)2053c2c66affSColin Finck AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2054c2c66affSColin Finck {
2055c2c66affSColin Finck UINT byteIndex = (UINT)(c >> 3);
2056c2c66affSColin Finck assert(c <= cs->length);
2057c2c66affSColin Finck cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2058c2c66affSColin Finck }
2059c2c66affSColin Finck
2060c2c66affSColin Finck
2061c2c66affSColin Finck /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2062c2c66affSColin Finck static void
AddCharacterRangeToCharSet(RECharSet * cs,UINT c1,UINT c2)2063c2c66affSColin Finck AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2064c2c66affSColin Finck {
2065c2c66affSColin Finck UINT i;
2066c2c66affSColin Finck
2067c2c66affSColin Finck UINT byteIndex1 = c1 >> 3;
2068c2c66affSColin Finck UINT byteIndex2 = c2 >> 3;
2069c2c66affSColin Finck
2070c2c66affSColin Finck assert(c2 <= cs->length && c1 <= c2);
2071c2c66affSColin Finck
2072c2c66affSColin Finck c1 &= 0x7;
2073c2c66affSColin Finck c2 &= 0x7;
2074c2c66affSColin Finck
2075c2c66affSColin Finck if (byteIndex1 == byteIndex2) {
2076c2c66affSColin Finck cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2077c2c66affSColin Finck } else {
2078c2c66affSColin Finck cs->u.bits[byteIndex1] |= 0xFF << c1;
2079c2c66affSColin Finck for (i = byteIndex1 + 1; i < byteIndex2; i++)
2080c2c66affSColin Finck cs->u.bits[i] = 0xFF;
2081c2c66affSColin Finck cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2082c2c66affSColin Finck }
2083c2c66affSColin Finck }
2084c2c66affSColin Finck
2085c2c66affSColin Finck /* Compile the source of the class into a RECharSet */
2086c2c66affSColin Finck static BOOL
ProcessCharSet(REGlobalData * gData,RECharSet * charSet)2087c2c66affSColin Finck ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2088c2c66affSColin Finck {
2089c2c66affSColin Finck const WCHAR *src, *end;
2090c2c66affSColin Finck BOOL inRange = FALSE;
2091c2c66affSColin Finck WCHAR rangeStart = 0;
2092c2c66affSColin Finck UINT byteLength, n;
2093c2c66affSColin Finck WCHAR c, thisCh;
2094c2c66affSColin Finck INT nDigits, i;
2095c2c66affSColin Finck
2096c2c66affSColin Finck assert(!charSet->converted);
2097c2c66affSColin Finck /*
2098c2c66affSColin Finck * Assert that startIndex and length points to chars inside [] inside
2099c2c66affSColin Finck * source string.
2100c2c66affSColin Finck */
2101c2c66affSColin Finck assert(1 <= charSet->u.src.startIndex);
2102c2c66affSColin Finck assert(charSet->u.src.startIndex < gData->regexp->source_len);
2103c2c66affSColin Finck assert(charSet->u.src.length <= gData->regexp->source_len
2104c2c66affSColin Finck - 1 - charSet->u.src.startIndex);
2105c2c66affSColin Finck
2106c2c66affSColin Finck charSet->converted = TRUE;
2107c2c66affSColin Finck src = gData->regexp->source + charSet->u.src.startIndex;
2108c2c66affSColin Finck
2109c2c66affSColin Finck end = src + charSet->u.src.length;
2110c2c66affSColin Finck
2111c2c66affSColin Finck assert(src[-1] == '[' && end[0] == ']');
2112c2c66affSColin Finck
2113c2c66affSColin Finck byteLength = (charSet->length >> 3) + 1;
2114c2c66affSColin Finck charSet->u.bits = heap_alloc(byteLength);
2115c2c66affSColin Finck if (!charSet->u.bits) {
2116c2c66affSColin Finck JS_ReportOutOfMemory(gData->cx);
2117c2c66affSColin Finck gData->ok = FALSE;
2118c2c66affSColin Finck return FALSE;
2119c2c66affSColin Finck }
2120c2c66affSColin Finck memset(charSet->u.bits, 0, byteLength);
2121c2c66affSColin Finck
2122c2c66affSColin Finck if (src == end)
2123c2c66affSColin Finck return TRUE;
2124c2c66affSColin Finck
2125c2c66affSColin Finck if (*src == '^') {
2126c2c66affSColin Finck assert(charSet->sense == FALSE);
2127c2c66affSColin Finck ++src;
2128c2c66affSColin Finck } else {
2129c2c66affSColin Finck assert(charSet->sense == TRUE);
2130c2c66affSColin Finck }
2131c2c66affSColin Finck
2132c2c66affSColin Finck while (src != end) {
2133c2c66affSColin Finck switch (*src) {
2134c2c66affSColin Finck case '\\':
2135c2c66affSColin Finck ++src;
2136c2c66affSColin Finck c = *src++;
2137c2c66affSColin Finck switch (c) {
2138c2c66affSColin Finck case 'b':
2139c2c66affSColin Finck thisCh = 0x8;
2140c2c66affSColin Finck break;
2141c2c66affSColin Finck case 'f':
2142c2c66affSColin Finck thisCh = 0xC;
2143c2c66affSColin Finck break;
2144c2c66affSColin Finck case 'n':
2145c2c66affSColin Finck thisCh = 0xA;
2146c2c66affSColin Finck break;
2147c2c66affSColin Finck case 'r':
2148c2c66affSColin Finck thisCh = 0xD;
2149c2c66affSColin Finck break;
2150c2c66affSColin Finck case 't':
2151c2c66affSColin Finck thisCh = 0x9;
2152c2c66affSColin Finck break;
2153c2c66affSColin Finck case 'v':
2154c2c66affSColin Finck thisCh = 0xB;
2155c2c66affSColin Finck break;
2156c2c66affSColin Finck case 'c':
2157c2c66affSColin Finck if (src < end && JS_ISWORD(*src)) {
2158c2c66affSColin Finck thisCh = (WCHAR)(*src++ & 0x1F);
2159c2c66affSColin Finck } else {
2160c2c66affSColin Finck --src;
2161c2c66affSColin Finck thisCh = '\\';
2162c2c66affSColin Finck }
2163c2c66affSColin Finck break;
2164c2c66affSColin Finck case 'x':
2165c2c66affSColin Finck nDigits = 2;
2166c2c66affSColin Finck goto lexHex;
2167c2c66affSColin Finck case 'u':
2168c2c66affSColin Finck nDigits = 4;
2169c2c66affSColin Finck lexHex:
2170c2c66affSColin Finck n = 0;
2171c2c66affSColin Finck for (i = 0; (i < nDigits) && (src < end); i++) {
2172c2c66affSColin Finck UINT digit;
2173c2c66affSColin Finck c = *src++;
2174c2c66affSColin Finck if (!isASCIIHexDigit(c, &digit)) {
2175c2c66affSColin Finck /*
2176c2c66affSColin Finck * Back off to accepting the original '\'
2177c2c66affSColin Finck * as a literal
2178c2c66affSColin Finck */
2179c2c66affSColin Finck src -= i + 1;
2180c2c66affSColin Finck n = '\\';
2181c2c66affSColin Finck break;
2182c2c66affSColin Finck }
2183c2c66affSColin Finck n = (n << 4) | digit;
2184c2c66affSColin Finck }
2185c2c66affSColin Finck thisCh = (WCHAR)n;
2186c2c66affSColin Finck break;
2187c2c66affSColin Finck case '0':
2188c2c66affSColin Finck case '1':
2189c2c66affSColin Finck case '2':
2190c2c66affSColin Finck case '3':
2191c2c66affSColin Finck case '4':
2192c2c66affSColin Finck case '5':
2193c2c66affSColin Finck case '6':
2194c2c66affSColin Finck case '7':
2195c2c66affSColin Finck /*
2196c2c66affSColin Finck * This is a non-ECMA extension - decimal escapes (in this
2197c2c66affSColin Finck * case, octal!) are supposed to be an error inside class
2198c2c66affSColin Finck * ranges, but supported here for backwards compatibility.
2199c2c66affSColin Finck */
2200c2c66affSColin Finck n = JS7_UNDEC(c);
2201c2c66affSColin Finck c = *src;
2202c2c66affSColin Finck if ('0' <= c && c <= '7') {
2203c2c66affSColin Finck src++;
2204c2c66affSColin Finck n = 8 * n + JS7_UNDEC(c);
2205c2c66affSColin Finck c = *src;
2206c2c66affSColin Finck if ('0' <= c && c <= '7') {
2207c2c66affSColin Finck src++;
2208c2c66affSColin Finck i = 8 * n + JS7_UNDEC(c);
2209c2c66affSColin Finck if (i <= 0377)
2210c2c66affSColin Finck n = i;
2211c2c66affSColin Finck else
2212c2c66affSColin Finck src--;
2213c2c66affSColin Finck }
2214c2c66affSColin Finck }
2215c2c66affSColin Finck thisCh = (WCHAR)n;
2216c2c66affSColin Finck break;
2217c2c66affSColin Finck
2218c2c66affSColin Finck case 'd':
2219c2c66affSColin Finck AddCharacterRangeToCharSet(charSet, '0', '9');
2220c2c66affSColin Finck continue; /* don't need range processing */
2221c2c66affSColin Finck case 'D':
2222c2c66affSColin Finck AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2223c2c66affSColin Finck AddCharacterRangeToCharSet(charSet,
2224c2c66affSColin Finck (WCHAR)('9' + 1),
2225c2c66affSColin Finck (WCHAR)charSet->length);
2226c2c66affSColin Finck continue;
2227c2c66affSColin Finck case 's':
2228c2c66affSColin Finck for (i = (INT)charSet->length; i >= 0; i--)
2229*3e2d6582SAmine Khaldi if (iswspace(i))
2230c2c66affSColin Finck AddCharacterToCharSet(charSet, (WCHAR)i);
2231c2c66affSColin Finck continue;
2232c2c66affSColin Finck case 'S':
2233c2c66affSColin Finck for (i = (INT)charSet->length; i >= 0; i--)
2234*3e2d6582SAmine Khaldi if (!iswspace(i))
2235c2c66affSColin Finck AddCharacterToCharSet(charSet, (WCHAR)i);
2236c2c66affSColin Finck continue;
2237c2c66affSColin Finck case 'w':
2238c2c66affSColin Finck for (i = (INT)charSet->length; i >= 0; i--)
2239c2c66affSColin Finck if (JS_ISWORD(i))
2240c2c66affSColin Finck AddCharacterToCharSet(charSet, (WCHAR)i);
2241c2c66affSColin Finck continue;
2242c2c66affSColin Finck case 'W':
2243c2c66affSColin Finck for (i = (INT)charSet->length; i >= 0; i--)
2244c2c66affSColin Finck if (!JS_ISWORD(i))
2245c2c66affSColin Finck AddCharacterToCharSet(charSet, (WCHAR)i);
2246c2c66affSColin Finck continue;
2247c2c66affSColin Finck default:
2248c2c66affSColin Finck thisCh = c;
2249c2c66affSColin Finck break;
2250c2c66affSColin Finck
2251c2c66affSColin Finck }
2252c2c66affSColin Finck break;
2253c2c66affSColin Finck
2254c2c66affSColin Finck default:
2255c2c66affSColin Finck thisCh = *src++;
2256c2c66affSColin Finck break;
2257c2c66affSColin Finck
2258c2c66affSColin Finck }
2259c2c66affSColin Finck if (inRange) {
2260c2c66affSColin Finck if (gData->regexp->flags & REG_FOLD) {
2261c2c66affSColin Finck assert(rangeStart <= thisCh);
2262c2c66affSColin Finck for (i = rangeStart; i <= thisCh; i++) {
2263c2c66affSColin Finck WCHAR uch, dch;
2264c2c66affSColin Finck
2265c2c66affSColin Finck AddCharacterToCharSet(charSet, i);
2266*3e2d6582SAmine Khaldi uch = towupper(i);
2267*3e2d6582SAmine Khaldi dch = towlower(i);
2268c2c66affSColin Finck if (i != uch)
2269c2c66affSColin Finck AddCharacterToCharSet(charSet, uch);
2270c2c66affSColin Finck if (i != dch)
2271c2c66affSColin Finck AddCharacterToCharSet(charSet, dch);
2272c2c66affSColin Finck }
2273c2c66affSColin Finck } else {
2274c2c66affSColin Finck AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2275c2c66affSColin Finck }
2276c2c66affSColin Finck inRange = FALSE;
2277c2c66affSColin Finck } else {
2278c2c66affSColin Finck if (gData->regexp->flags & REG_FOLD) {
2279*3e2d6582SAmine Khaldi AddCharacterToCharSet(charSet, towupper(thisCh));
2280*3e2d6582SAmine Khaldi AddCharacterToCharSet(charSet, towlower(thisCh));
2281c2c66affSColin Finck } else {
2282c2c66affSColin Finck AddCharacterToCharSet(charSet, thisCh);
2283c2c66affSColin Finck }
2284c2c66affSColin Finck if (src < end - 1) {
2285c2c66affSColin Finck if (*src == '-') {
2286c2c66affSColin Finck ++src;
2287c2c66affSColin Finck inRange = TRUE;
2288c2c66affSColin Finck rangeStart = thisCh;
2289c2c66affSColin Finck }
2290c2c66affSColin Finck }
2291c2c66affSColin Finck }
2292c2c66affSColin Finck }
2293c2c66affSColin Finck return TRUE;
2294c2c66affSColin Finck }
2295c2c66affSColin Finck
2296c2c66affSColin Finck static BOOL
ReallocStateStack(REGlobalData * gData)2297c2c66affSColin Finck ReallocStateStack(REGlobalData *gData)
2298c2c66affSColin Finck {
2299c2c66affSColin Finck size_t limit = gData->stateStackLimit;
2300c2c66affSColin Finck size_t sz = sizeof(REProgState) * limit;
2301c2c66affSColin Finck
2302c2c66affSColin Finck gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2303c2c66affSColin Finck if (!gData->stateStack) {
2304c2c66affSColin Finck js_ReportOutOfScriptQuota(gData->cx);
2305c2c66affSColin Finck gData->ok = FALSE;
2306c2c66affSColin Finck return FALSE;
2307c2c66affSColin Finck }
2308c2c66affSColin Finck gData->stateStackLimit = limit + limit;
2309c2c66affSColin Finck return TRUE;
2310c2c66affSColin Finck }
2311c2c66affSColin Finck
2312c2c66affSColin Finck #define PUSH_STATE_STACK(data) \
2313c2c66affSColin Finck do { \
2314c2c66affSColin Finck ++(data)->stateStackTop; \
2315c2c66affSColin Finck if ((data)->stateStackTop == (data)->stateStackLimit && \
2316c2c66affSColin Finck !ReallocStateStack((data))) { \
2317c2c66affSColin Finck return NULL; \
2318c2c66affSColin Finck } \
2319c2c66affSColin Finck }while(0)
2320c2c66affSColin Finck
2321c2c66affSColin Finck /*
2322c2c66affSColin Finck * Apply the current op against the given input to see if it's going to match
2323c2c66affSColin Finck * or fail. Return false if we don't get a match, true if we do. If updatecp is
2324c2c66affSColin Finck * true, then update the current state's cp. Always update startpc to the next
2325c2c66affSColin Finck * op.
2326c2c66affSColin Finck */
2327c2c66affSColin Finck static inline match_state_t *
SimpleMatch(REGlobalData * gData,match_state_t * x,REOp op,jsbytecode ** startpc,BOOL updatecp)2328c2c66affSColin Finck SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op,
2329c2c66affSColin Finck jsbytecode **startpc, BOOL updatecp)
2330c2c66affSColin Finck {
2331c2c66affSColin Finck match_state_t *result = NULL;
2332c2c66affSColin Finck WCHAR matchCh;
2333c2c66affSColin Finck size_t parenIndex;
2334c2c66affSColin Finck size_t offset, length, index;
2335c2c66affSColin Finck jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2336c2c66affSColin Finck const WCHAR *source;
2337c2c66affSColin Finck const WCHAR *startcp = x->cp;
2338c2c66affSColin Finck WCHAR ch;
2339c2c66affSColin Finck RECharSet *charSet;
2340c2c66affSColin Finck
2341c2c66affSColin Finck const char *opname = reop_names[op];
2342c2c66affSColin Finck TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2343c2c66affSColin Finck (int)gData->stateStackTop * 2, "", opname);
2344c2c66affSColin Finck
2345c2c66affSColin Finck switch (op) {
2346c2c66affSColin Finck case REOP_EMPTY:
2347c2c66affSColin Finck result = x;
2348c2c66affSColin Finck break;
2349c2c66affSColin Finck case REOP_BOL:
2350c2c66affSColin Finck if (x->cp != gData->cpbegin) {
2351c2c66affSColin Finck if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2352c2c66affSColin Finck !(gData->regexp->flags & REG_MULTILINE)) {
2353c2c66affSColin Finck break;
2354c2c66affSColin Finck }
2355c2c66affSColin Finck if (!RE_IS_LINE_TERM(x->cp[-1]))
2356c2c66affSColin Finck break;
2357c2c66affSColin Finck }
2358c2c66affSColin Finck result = x;
2359c2c66affSColin Finck break;
2360c2c66affSColin Finck case REOP_EOL:
2361c2c66affSColin Finck if (x->cp != gData->cpend) {
2362c2c66affSColin Finck if (/*!gData->cx->regExpStatics.multiline &&*/
2363c2c66affSColin Finck !(gData->regexp->flags & REG_MULTILINE)) {
2364c2c66affSColin Finck break;
2365c2c66affSColin Finck }
2366c2c66affSColin Finck if (!RE_IS_LINE_TERM(*x->cp))
2367c2c66affSColin Finck break;
2368c2c66affSColin Finck }
2369c2c66affSColin Finck result = x;
2370c2c66affSColin Finck break;
2371c2c66affSColin Finck case REOP_WBDRY:
2372c2c66affSColin Finck if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2373c2c66affSColin Finck !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2374c2c66affSColin Finck result = x;
2375c2c66affSColin Finck }
2376c2c66affSColin Finck break;
2377c2c66affSColin Finck case REOP_WNONBDRY:
2378c2c66affSColin Finck if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2379c2c66affSColin Finck (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2380c2c66affSColin Finck result = x;
2381c2c66affSColin Finck }
2382c2c66affSColin Finck break;
2383c2c66affSColin Finck case REOP_DOT:
2384c2c66affSColin Finck if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2385c2c66affSColin Finck result = x;
2386c2c66affSColin Finck result->cp++;
2387c2c66affSColin Finck }
2388c2c66affSColin Finck break;
2389c2c66affSColin Finck case REOP_DIGIT:
2390c2c66affSColin Finck if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2391c2c66affSColin Finck result = x;
2392c2c66affSColin Finck result->cp++;
2393c2c66affSColin Finck }
2394c2c66affSColin Finck break;
2395c2c66affSColin Finck case REOP_NONDIGIT:
2396c2c66affSColin Finck if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2397c2c66affSColin Finck result = x;
2398c2c66affSColin Finck result->cp++;
2399c2c66affSColin Finck }
2400c2c66affSColin Finck break;
2401c2c66affSColin Finck case REOP_ALNUM:
2402c2c66affSColin Finck if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2403c2c66affSColin Finck result = x;
2404c2c66affSColin Finck result->cp++;
2405c2c66affSColin Finck }
2406c2c66affSColin Finck break;
2407c2c66affSColin Finck case REOP_NONALNUM:
2408c2c66affSColin Finck if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2409c2c66affSColin Finck result = x;
2410c2c66affSColin Finck result->cp++;
2411c2c66affSColin Finck }
2412c2c66affSColin Finck break;
2413c2c66affSColin Finck case REOP_SPACE:
2414*3e2d6582SAmine Khaldi if (x->cp != gData->cpend && iswspace(*x->cp)) {
2415c2c66affSColin Finck result = x;
2416c2c66affSColin Finck result->cp++;
2417c2c66affSColin Finck }
2418c2c66affSColin Finck break;
2419c2c66affSColin Finck case REOP_NONSPACE:
2420*3e2d6582SAmine Khaldi if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2421c2c66affSColin Finck result = x;
2422c2c66affSColin Finck result->cp++;
2423c2c66affSColin Finck }
2424c2c66affSColin Finck break;
2425c2c66affSColin Finck case REOP_BACKREF:
2426c2c66affSColin Finck pc = ReadCompactIndex(pc, &parenIndex);
2427c2c66affSColin Finck assert(parenIndex < gData->regexp->parenCount);
2428c2c66affSColin Finck result = BackrefMatcher(gData, x, parenIndex);
2429c2c66affSColin Finck break;
2430c2c66affSColin Finck case REOP_FLAT:
2431c2c66affSColin Finck pc = ReadCompactIndex(pc, &offset);
2432c2c66affSColin Finck assert(offset < gData->regexp->source_len);
2433c2c66affSColin Finck pc = ReadCompactIndex(pc, &length);
2434c2c66affSColin Finck assert(1 <= length);
2435c2c66affSColin Finck assert(length <= gData->regexp->source_len - offset);
2436c2c66affSColin Finck if (length <= (size_t)(gData->cpend - x->cp)) {
2437c2c66affSColin Finck source = gData->regexp->source + offset;
2438c2c66affSColin Finck TRACE("%s\n", debugstr_wn(source, length));
2439c2c66affSColin Finck for (index = 0; index != length; index++) {
2440c2c66affSColin Finck if (source[index] != x->cp[index])
2441c2c66affSColin Finck return NULL;
2442c2c66affSColin Finck }
2443c2c66affSColin Finck x->cp += length;
2444c2c66affSColin Finck result = x;
2445c2c66affSColin Finck }
2446c2c66affSColin Finck break;
2447c2c66affSColin Finck case REOP_FLAT1:
2448c2c66affSColin Finck matchCh = *pc++;
2449c2c66affSColin Finck TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2450c2c66affSColin Finck if (x->cp != gData->cpend && *x->cp == matchCh) {
2451c2c66affSColin Finck result = x;
2452c2c66affSColin Finck result->cp++;
2453c2c66affSColin Finck }
2454c2c66affSColin Finck break;
2455c2c66affSColin Finck case REOP_FLATi:
2456c2c66affSColin Finck pc = ReadCompactIndex(pc, &offset);
2457c2c66affSColin Finck assert(offset < gData->regexp->source_len);
2458c2c66affSColin Finck pc = ReadCompactIndex(pc, &length);
2459c2c66affSColin Finck assert(1 <= length);
2460c2c66affSColin Finck assert(length <= gData->regexp->source_len - offset);
2461c2c66affSColin Finck source = gData->regexp->source;
2462c2c66affSColin Finck result = FlatNIMatcher(gData, x, source + offset, length);
2463c2c66affSColin Finck break;
2464c2c66affSColin Finck case REOP_FLAT1i:
2465c2c66affSColin Finck matchCh = *pc++;
2466*3e2d6582SAmine Khaldi if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2467c2c66affSColin Finck result = x;
2468c2c66affSColin Finck result->cp++;
2469c2c66affSColin Finck }
2470c2c66affSColin Finck break;
2471c2c66affSColin Finck case REOP_UCFLAT1:
2472c2c66affSColin Finck matchCh = GET_ARG(pc);
2473c2c66affSColin Finck TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2474c2c66affSColin Finck pc += ARG_LEN;
2475c2c66affSColin Finck if (x->cp != gData->cpend && *x->cp == matchCh) {
2476c2c66affSColin Finck result = x;
2477c2c66affSColin Finck result->cp++;
2478c2c66affSColin Finck }
2479c2c66affSColin Finck break;
2480c2c66affSColin Finck case REOP_UCFLAT1i:
2481c2c66affSColin Finck matchCh = GET_ARG(pc);
2482c2c66affSColin Finck pc += ARG_LEN;
2483*3e2d6582SAmine Khaldi if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2484c2c66affSColin Finck result = x;
2485c2c66affSColin Finck result->cp++;
2486c2c66affSColin Finck }
2487c2c66affSColin Finck break;
2488c2c66affSColin Finck case REOP_CLASS:
2489c2c66affSColin Finck pc = ReadCompactIndex(pc, &index);
2490c2c66affSColin Finck assert(index < gData->regexp->classCount);
2491c2c66affSColin Finck if (x->cp != gData->cpend) {
2492c2c66affSColin Finck charSet = &gData->regexp->classList[index];
2493c2c66affSColin Finck assert(charSet->converted);
2494c2c66affSColin Finck ch = *x->cp;
2495c2c66affSColin Finck index = ch >> 3;
2496c2c66affSColin Finck if (charSet->length != 0 &&
2497c2c66affSColin Finck ch <= charSet->length &&
2498c2c66affSColin Finck (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2499c2c66affSColin Finck result = x;
2500c2c66affSColin Finck result->cp++;
2501c2c66affSColin Finck }
2502c2c66affSColin Finck }
2503c2c66affSColin Finck break;
2504c2c66affSColin Finck case REOP_NCLASS:
2505c2c66affSColin Finck pc = ReadCompactIndex(pc, &index);
2506c2c66affSColin Finck assert(index < gData->regexp->classCount);
2507c2c66affSColin Finck if (x->cp != gData->cpend) {
2508c2c66affSColin Finck charSet = &gData->regexp->classList[index];
2509c2c66affSColin Finck assert(charSet->converted);
2510c2c66affSColin Finck ch = *x->cp;
2511c2c66affSColin Finck index = ch >> 3;
2512c2c66affSColin Finck if (charSet->length == 0 ||
2513c2c66affSColin Finck ch > charSet->length ||
2514c2c66affSColin Finck !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2515c2c66affSColin Finck result = x;
2516c2c66affSColin Finck result->cp++;
2517c2c66affSColin Finck }
2518c2c66affSColin Finck }
2519c2c66affSColin Finck break;
2520c2c66affSColin Finck
2521c2c66affSColin Finck default:
2522c2c66affSColin Finck assert(FALSE);
2523c2c66affSColin Finck }
2524c2c66affSColin Finck if (result) {
2525c2c66affSColin Finck if (!updatecp)
2526c2c66affSColin Finck x->cp = startcp;
2527c2c66affSColin Finck *startpc = pc;
2528c2c66affSColin Finck TRACE(" *\n");
2529c2c66affSColin Finck return result;
2530c2c66affSColin Finck }
2531c2c66affSColin Finck x->cp = startcp;
2532c2c66affSColin Finck return NULL;
2533c2c66affSColin Finck }
2534c2c66affSColin Finck
2535c2c66affSColin Finck static inline match_state_t *
ExecuteREBytecode(REGlobalData * gData,match_state_t * x)2536c2c66affSColin Finck ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
2537c2c66affSColin Finck {
2538c2c66affSColin Finck match_state_t *result = NULL;
2539c2c66affSColin Finck REBackTrackData *backTrackData;
2540c2c66affSColin Finck jsbytecode *nextpc, *testpc;
2541c2c66affSColin Finck REOp nextop;
2542c2c66affSColin Finck RECapture *cap;
2543c2c66affSColin Finck REProgState *curState;
2544c2c66affSColin Finck const WCHAR *startcp;
2545c2c66affSColin Finck size_t parenIndex, k;
2546c2c66affSColin Finck size_t parenSoFar = 0;
2547c2c66affSColin Finck
2548c2c66affSColin Finck WCHAR matchCh1, matchCh2;
2549c2c66affSColin Finck RECharSet *charSet;
2550c2c66affSColin Finck
2551c2c66affSColin Finck BOOL anchor;
2552c2c66affSColin Finck jsbytecode *pc = gData->regexp->program;
2553c2c66affSColin Finck REOp op = (REOp) *pc++;
2554c2c66affSColin Finck
2555c2c66affSColin Finck /*
2556c2c66affSColin Finck * If the first node is a simple match, step the index into the string
2557c2c66affSColin Finck * until that match is made, or fail if it can't be found at all.
2558c2c66affSColin Finck */
2559c2c66affSColin Finck if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2560c2c66affSColin Finck anchor = FALSE;
2561c2c66affSColin Finck while (x->cp <= gData->cpend) {
2562c2c66affSColin Finck nextpc = pc; /* reset back to start each time */
2563c2c66affSColin Finck result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2564c2c66affSColin Finck if (result) {
2565c2c66affSColin Finck anchor = TRUE;
2566c2c66affSColin Finck x = result;
2567c2c66affSColin Finck pc = nextpc; /* accept skip to next opcode */
2568c2c66affSColin Finck op = (REOp) *pc++;
2569c2c66affSColin Finck assert(op < REOP_LIMIT);
2570c2c66affSColin Finck break;
2571c2c66affSColin Finck }
2572c2c66affSColin Finck gData->skipped++;
2573c2c66affSColin Finck x->cp++;
2574c2c66affSColin Finck }
2575c2c66affSColin Finck if (!anchor)
2576c2c66affSColin Finck goto bad;
2577c2c66affSColin Finck }
2578c2c66affSColin Finck
2579c2c66affSColin Finck for (;;) {
2580c2c66affSColin Finck const char *opname = reop_names[op];
2581c2c66affSColin Finck TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2582c2c66affSColin Finck (int)gData->stateStackTop * 2, "", opname);
2583c2c66affSColin Finck
2584c2c66affSColin Finck if (REOP_IS_SIMPLE(op)) {
2585c2c66affSColin Finck result = SimpleMatch(gData, x, op, &pc, TRUE);
2586c2c66affSColin Finck } else {
2587c2c66affSColin Finck curState = &gData->stateStack[gData->stateStackTop];
2588c2c66affSColin Finck switch (op) {
2589c2c66affSColin Finck case REOP_END:
2590c2c66affSColin Finck goto good;
2591c2c66affSColin Finck case REOP_ALTPREREQ2:
2592c2c66affSColin Finck nextpc = pc + GET_OFFSET(pc); /* start of next op */
2593c2c66affSColin Finck pc += ARG_LEN;
2594c2c66affSColin Finck matchCh2 = GET_ARG(pc);
2595c2c66affSColin Finck pc += ARG_LEN;
2596c2c66affSColin Finck k = GET_ARG(pc);
2597c2c66affSColin Finck pc += ARG_LEN;
2598c2c66affSColin Finck
2599c2c66affSColin Finck if (x->cp != gData->cpend) {
2600c2c66affSColin Finck if (*x->cp == matchCh2)
2601c2c66affSColin Finck goto doAlt;
2602c2c66affSColin Finck
2603c2c66affSColin Finck charSet = &gData->regexp->classList[k];
2604c2c66affSColin Finck if (!charSet->converted && !ProcessCharSet(gData, charSet))
2605c2c66affSColin Finck goto bad;
2606c2c66affSColin Finck matchCh1 = *x->cp;
2607c2c66affSColin Finck k = matchCh1 >> 3;
2608c2c66affSColin Finck if ((charSet->length == 0 ||
2609c2c66affSColin Finck matchCh1 > charSet->length ||
2610c2c66affSColin Finck !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2611c2c66affSColin Finck charSet->sense) {
2612c2c66affSColin Finck goto doAlt;
2613c2c66affSColin Finck }
2614c2c66affSColin Finck }
2615c2c66affSColin Finck result = NULL;
2616c2c66affSColin Finck break;
2617c2c66affSColin Finck
2618c2c66affSColin Finck case REOP_ALTPREREQ:
2619c2c66affSColin Finck nextpc = pc + GET_OFFSET(pc); /* start of next op */
2620c2c66affSColin Finck pc += ARG_LEN;
2621c2c66affSColin Finck matchCh1 = GET_ARG(pc);
2622c2c66affSColin Finck pc += ARG_LEN;
2623c2c66affSColin Finck matchCh2 = GET_ARG(pc);
2624c2c66affSColin Finck pc += ARG_LEN;
2625c2c66affSColin Finck if (x->cp == gData->cpend ||
2626c2c66affSColin Finck (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2627c2c66affSColin Finck result = NULL;
2628c2c66affSColin Finck break;
2629c2c66affSColin Finck }
2630c2c66affSColin Finck /* else fall through... */
2631c2c66affSColin Finck
2632c2c66affSColin Finck case REOP_ALT:
2633c2c66affSColin Finck doAlt:
2634c2c66affSColin Finck nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2635c2c66affSColin Finck pc += ARG_LEN; /* start of this alternate */
2636c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2637c2c66affSColin Finck PUSH_STATE_STACK(gData);
2638c2c66affSColin Finck op = (REOp) *pc++;
2639c2c66affSColin Finck startcp = x->cp;
2640c2c66affSColin Finck if (REOP_IS_SIMPLE(op)) {
2641c2c66affSColin Finck if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2642c2c66affSColin Finck op = (REOp) *nextpc++;
2643c2c66affSColin Finck pc = nextpc;
2644c2c66affSColin Finck continue;
2645c2c66affSColin Finck }
2646c2c66affSColin Finck result = x;
2647c2c66affSColin Finck op = (REOp) *pc++;
2648c2c66affSColin Finck }
2649c2c66affSColin Finck nextop = (REOp) *nextpc++;
2650c2c66affSColin Finck if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2651c2c66affSColin Finck goto bad;
2652c2c66affSColin Finck continue;
2653c2c66affSColin Finck
2654c2c66affSColin Finck /*
2655c2c66affSColin Finck * Occurs at (successful) end of REOP_ALT,
2656c2c66affSColin Finck */
2657c2c66affSColin Finck case REOP_JUMP:
2658c2c66affSColin Finck /*
2659c2c66affSColin Finck * If we have not gotten a result here, it is because of an
2660c2c66affSColin Finck * empty match. Do the same thing REOP_EMPTY would do.
2661c2c66affSColin Finck */
2662c2c66affSColin Finck if (!result)
2663c2c66affSColin Finck result = x;
2664c2c66affSColin Finck
2665c2c66affSColin Finck --gData->stateStackTop;
2666c2c66affSColin Finck pc += GET_OFFSET(pc);
2667c2c66affSColin Finck op = (REOp) *pc++;
2668c2c66affSColin Finck continue;
2669c2c66affSColin Finck
2670c2c66affSColin Finck /*
2671c2c66affSColin Finck * Occurs at last (successful) end of REOP_ALT,
2672c2c66affSColin Finck */
2673c2c66affSColin Finck case REOP_ENDALT:
2674c2c66affSColin Finck /*
2675c2c66affSColin Finck * If we have not gotten a result here, it is because of an
2676c2c66affSColin Finck * empty match. Do the same thing REOP_EMPTY would do.
2677c2c66affSColin Finck */
2678c2c66affSColin Finck if (!result)
2679c2c66affSColin Finck result = x;
2680c2c66affSColin Finck
2681c2c66affSColin Finck --gData->stateStackTop;
2682c2c66affSColin Finck op = (REOp) *pc++;
2683c2c66affSColin Finck continue;
2684c2c66affSColin Finck
2685c2c66affSColin Finck case REOP_LPAREN:
2686c2c66affSColin Finck pc = ReadCompactIndex(pc, &parenIndex);
2687c2c66affSColin Finck TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2688c2c66affSColin Finck assert(parenIndex < gData->regexp->parenCount);
2689c2c66affSColin Finck if (parenIndex + 1 > parenSoFar)
2690c2c66affSColin Finck parenSoFar = parenIndex + 1;
2691c2c66affSColin Finck x->parens[parenIndex].index = x->cp - gData->cpbegin;
2692c2c66affSColin Finck x->parens[parenIndex].length = 0;
2693c2c66affSColin Finck op = (REOp) *pc++;
2694c2c66affSColin Finck continue;
2695c2c66affSColin Finck
2696c2c66affSColin Finck case REOP_RPAREN:
2697c2c66affSColin Finck {
2698c2c66affSColin Finck ptrdiff_t delta;
2699c2c66affSColin Finck
2700c2c66affSColin Finck pc = ReadCompactIndex(pc, &parenIndex);
2701c2c66affSColin Finck assert(parenIndex < gData->regexp->parenCount);
2702c2c66affSColin Finck cap = &x->parens[parenIndex];
2703c2c66affSColin Finck delta = x->cp - (gData->cpbegin + cap->index);
2704c2c66affSColin Finck cap->length = (delta < 0) ? 0 : (size_t) delta;
2705c2c66affSColin Finck op = (REOp) *pc++;
2706c2c66affSColin Finck
2707c2c66affSColin Finck if (!result)
2708c2c66affSColin Finck result = x;
2709c2c66affSColin Finck continue;
2710c2c66affSColin Finck }
2711c2c66affSColin Finck case REOP_ASSERT:
2712c2c66affSColin Finck nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2713c2c66affSColin Finck pc += ARG_LEN; /* start of ASSERT child */
2714c2c66affSColin Finck op = (REOp) *pc++;
2715c2c66affSColin Finck testpc = pc;
2716c2c66affSColin Finck if (REOP_IS_SIMPLE(op) &&
2717c2c66affSColin Finck !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2718c2c66affSColin Finck result = NULL;
2719c2c66affSColin Finck break;
2720c2c66affSColin Finck }
2721c2c66affSColin Finck curState->u.assertion.top =
2722c2c66affSColin Finck (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2723c2c66affSColin Finck curState->u.assertion.sz = gData->cursz;
2724c2c66affSColin Finck curState->index = x->cp - gData->cpbegin;
2725c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2726c2c66affSColin Finck PUSH_STATE_STACK(gData);
2727c2c66affSColin Finck if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2728c2c66affSColin Finck nextpc, x, x->cp, 0, 0)) {
2729c2c66affSColin Finck goto bad;
2730c2c66affSColin Finck }
2731c2c66affSColin Finck continue;
2732c2c66affSColin Finck
2733c2c66affSColin Finck case REOP_ASSERT_NOT:
2734c2c66affSColin Finck nextpc = pc + GET_OFFSET(pc);
2735c2c66affSColin Finck pc += ARG_LEN;
2736c2c66affSColin Finck op = (REOp) *pc++;
2737c2c66affSColin Finck testpc = pc;
2738c2c66affSColin Finck if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2739c2c66affSColin Finck SimpleMatch(gData, x, op, &testpc, FALSE) &&
2740c2c66affSColin Finck *testpc == REOP_ASSERTNOTTEST) {
2741c2c66affSColin Finck result = NULL;
2742c2c66affSColin Finck break;
2743c2c66affSColin Finck }
2744c2c66affSColin Finck curState->u.assertion.top
2745c2c66affSColin Finck = (char *)gData->backTrackSP -
2746c2c66affSColin Finck (char *)gData->backTrackStack;
2747c2c66affSColin Finck curState->u.assertion.sz = gData->cursz;
2748c2c66affSColin Finck curState->index = x->cp - gData->cpbegin;
2749c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2750c2c66affSColin Finck PUSH_STATE_STACK(gData);
2751c2c66affSColin Finck if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2752c2c66affSColin Finck nextpc, x, x->cp, 0, 0)) {
2753c2c66affSColin Finck goto bad;
2754c2c66affSColin Finck }
2755c2c66affSColin Finck continue;
2756c2c66affSColin Finck
2757c2c66affSColin Finck case REOP_ASSERTTEST:
2758c2c66affSColin Finck --gData->stateStackTop;
2759c2c66affSColin Finck --curState;
2760c2c66affSColin Finck x->cp = gData->cpbegin + curState->index;
2761c2c66affSColin Finck gData->backTrackSP =
2762c2c66affSColin Finck (REBackTrackData *) ((char *)gData->backTrackStack +
2763c2c66affSColin Finck curState->u.assertion.top);
2764c2c66affSColin Finck gData->cursz = curState->u.assertion.sz;
2765c2c66affSColin Finck if (result)
2766c2c66affSColin Finck result = x;
2767c2c66affSColin Finck break;
2768c2c66affSColin Finck
2769c2c66affSColin Finck case REOP_ASSERTNOTTEST:
2770c2c66affSColin Finck --gData->stateStackTop;
2771c2c66affSColin Finck --curState;
2772c2c66affSColin Finck x->cp = gData->cpbegin + curState->index;
2773c2c66affSColin Finck gData->backTrackSP =
2774c2c66affSColin Finck (REBackTrackData *) ((char *)gData->backTrackStack +
2775c2c66affSColin Finck curState->u.assertion.top);
2776c2c66affSColin Finck gData->cursz = curState->u.assertion.sz;
2777c2c66affSColin Finck result = (!result) ? x : NULL;
2778c2c66affSColin Finck break;
2779c2c66affSColin Finck case REOP_STAR:
2780c2c66affSColin Finck curState->u.quantifier.min = 0;
2781c2c66affSColin Finck curState->u.quantifier.max = (UINT)-1;
2782c2c66affSColin Finck goto quantcommon;
2783c2c66affSColin Finck case REOP_PLUS:
2784c2c66affSColin Finck curState->u.quantifier.min = 1;
2785c2c66affSColin Finck curState->u.quantifier.max = (UINT)-1;
2786c2c66affSColin Finck goto quantcommon;
2787c2c66affSColin Finck case REOP_OPT:
2788c2c66affSColin Finck curState->u.quantifier.min = 0;
2789c2c66affSColin Finck curState->u.quantifier.max = 1;
2790c2c66affSColin Finck goto quantcommon;
2791c2c66affSColin Finck case REOP_QUANT:
2792c2c66affSColin Finck pc = ReadCompactIndex(pc, &k);
2793c2c66affSColin Finck curState->u.quantifier.min = k;
2794c2c66affSColin Finck pc = ReadCompactIndex(pc, &k);
2795c2c66affSColin Finck /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2796c2c66affSColin Finck curState->u.quantifier.max = k - 1;
2797c2c66affSColin Finck assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2798c2c66affSColin Finck quantcommon:
2799c2c66affSColin Finck if (curState->u.quantifier.max == 0) {
2800c2c66affSColin Finck pc = pc + GET_OFFSET(pc);
2801c2c66affSColin Finck op = (REOp) *pc++;
2802c2c66affSColin Finck result = x;
2803c2c66affSColin Finck continue;
2804c2c66affSColin Finck }
2805c2c66affSColin Finck /* Step over <next> */
2806c2c66affSColin Finck nextpc = pc + ARG_LEN;
2807c2c66affSColin Finck op = (REOp) *nextpc++;
2808c2c66affSColin Finck startcp = x->cp;
2809c2c66affSColin Finck if (REOP_IS_SIMPLE(op)) {
2810c2c66affSColin Finck if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2811c2c66affSColin Finck if (curState->u.quantifier.min == 0)
2812c2c66affSColin Finck result = x;
2813c2c66affSColin Finck else
2814c2c66affSColin Finck result = NULL;
2815c2c66affSColin Finck pc = pc + GET_OFFSET(pc);
2816c2c66affSColin Finck break;
2817c2c66affSColin Finck }
2818c2c66affSColin Finck op = (REOp) *nextpc++;
2819c2c66affSColin Finck result = x;
2820c2c66affSColin Finck }
2821c2c66affSColin Finck curState->index = startcp - gData->cpbegin;
2822c2c66affSColin Finck curState->continue_op = REOP_REPEAT;
2823c2c66affSColin Finck curState->continue_pc = pc;
2824c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2825c2c66affSColin Finck PUSH_STATE_STACK(gData);
2826c2c66affSColin Finck if (curState->u.quantifier.min == 0 &&
2827c2c66affSColin Finck !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2828c2c66affSColin Finck 0, 0)) {
2829c2c66affSColin Finck goto bad;
2830c2c66affSColin Finck }
2831c2c66affSColin Finck pc = nextpc;
2832c2c66affSColin Finck continue;
2833c2c66affSColin Finck
2834c2c66affSColin Finck case REOP_ENDCHILD: /* marks the end of a quantifier child */
2835c2c66affSColin Finck pc = curState[-1].continue_pc;
2836c2c66affSColin Finck op = (REOp) curState[-1].continue_op;
2837c2c66affSColin Finck
2838c2c66affSColin Finck if (!result)
2839c2c66affSColin Finck result = x;
2840c2c66affSColin Finck continue;
2841c2c66affSColin Finck
2842c2c66affSColin Finck case REOP_REPEAT:
2843c2c66affSColin Finck --curState;
2844c2c66affSColin Finck do {
2845c2c66affSColin Finck --gData->stateStackTop;
2846c2c66affSColin Finck if (!result) {
2847c2c66affSColin Finck /* Failed, see if we have enough children. */
2848c2c66affSColin Finck if (curState->u.quantifier.min == 0)
2849c2c66affSColin Finck goto repeatDone;
2850c2c66affSColin Finck goto break_switch;
2851c2c66affSColin Finck }
2852c2c66affSColin Finck if (curState->u.quantifier.min == 0 &&
2853c2c66affSColin Finck x->cp == gData->cpbegin + curState->index) {
2854c2c66affSColin Finck /* matched an empty string, that'll get us nowhere */
2855c2c66affSColin Finck result = NULL;
2856c2c66affSColin Finck goto break_switch;
2857c2c66affSColin Finck }
2858c2c66affSColin Finck if (curState->u.quantifier.min != 0)
2859c2c66affSColin Finck curState->u.quantifier.min--;
2860c2c66affSColin Finck if (curState->u.quantifier.max != (UINT) -1)
2861c2c66affSColin Finck curState->u.quantifier.max--;
2862c2c66affSColin Finck if (curState->u.quantifier.max == 0)
2863c2c66affSColin Finck goto repeatDone;
2864c2c66affSColin Finck nextpc = pc + ARG_LEN;
2865c2c66affSColin Finck nextop = (REOp) *nextpc;
2866c2c66affSColin Finck startcp = x->cp;
2867c2c66affSColin Finck if (REOP_IS_SIMPLE(nextop)) {
2868c2c66affSColin Finck nextpc++;
2869c2c66affSColin Finck if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2870c2c66affSColin Finck if (curState->u.quantifier.min == 0)
2871c2c66affSColin Finck goto repeatDone;
2872c2c66affSColin Finck result = NULL;
2873c2c66affSColin Finck goto break_switch;
2874c2c66affSColin Finck }
2875c2c66affSColin Finck result = x;
2876c2c66affSColin Finck }
2877c2c66affSColin Finck curState->index = startcp - gData->cpbegin;
2878c2c66affSColin Finck PUSH_STATE_STACK(gData);
2879c2c66affSColin Finck if (curState->u.quantifier.min == 0 &&
2880c2c66affSColin Finck !PushBackTrackState(gData, REOP_REPEAT,
2881c2c66affSColin Finck pc, x, startcp,
2882c2c66affSColin Finck curState->parenSoFar,
2883c2c66affSColin Finck parenSoFar -
2884c2c66affSColin Finck curState->parenSoFar)) {
2885c2c66affSColin Finck goto bad;
2886c2c66affSColin Finck }
2887c2c66affSColin Finck } while (*nextpc == REOP_ENDCHILD);
2888c2c66affSColin Finck pc = nextpc;
2889c2c66affSColin Finck op = (REOp) *pc++;
2890c2c66affSColin Finck parenSoFar = curState->parenSoFar;
2891c2c66affSColin Finck continue;
2892c2c66affSColin Finck
2893c2c66affSColin Finck repeatDone:
2894c2c66affSColin Finck result = x;
2895c2c66affSColin Finck pc += GET_OFFSET(pc);
2896c2c66affSColin Finck goto break_switch;
2897c2c66affSColin Finck
2898c2c66affSColin Finck case REOP_MINIMALSTAR:
2899c2c66affSColin Finck curState->u.quantifier.min = 0;
2900c2c66affSColin Finck curState->u.quantifier.max = (UINT)-1;
2901c2c66affSColin Finck goto minimalquantcommon;
2902c2c66affSColin Finck case REOP_MINIMALPLUS:
2903c2c66affSColin Finck curState->u.quantifier.min = 1;
2904c2c66affSColin Finck curState->u.quantifier.max = (UINT)-1;
2905c2c66affSColin Finck goto minimalquantcommon;
2906c2c66affSColin Finck case REOP_MINIMALOPT:
2907c2c66affSColin Finck curState->u.quantifier.min = 0;
2908c2c66affSColin Finck curState->u.quantifier.max = 1;
2909c2c66affSColin Finck goto minimalquantcommon;
2910c2c66affSColin Finck case REOP_MINIMALQUANT:
2911c2c66affSColin Finck pc = ReadCompactIndex(pc, &k);
2912c2c66affSColin Finck curState->u.quantifier.min = k;
2913c2c66affSColin Finck pc = ReadCompactIndex(pc, &k);
2914c2c66affSColin Finck /* See REOP_QUANT comments about k - 1. */
2915c2c66affSColin Finck curState->u.quantifier.max = k - 1;
2916c2c66affSColin Finck assert(curState->u.quantifier.min
2917c2c66affSColin Finck <= curState->u.quantifier.max);
2918c2c66affSColin Finck minimalquantcommon:
2919c2c66affSColin Finck curState->index = x->cp - gData->cpbegin;
2920c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2921c2c66affSColin Finck PUSH_STATE_STACK(gData);
2922c2c66affSColin Finck if (curState->u.quantifier.min != 0) {
2923c2c66affSColin Finck curState->continue_op = REOP_MINIMALREPEAT;
2924c2c66affSColin Finck curState->continue_pc = pc;
2925c2c66affSColin Finck /* step over <next> */
2926c2c66affSColin Finck pc += OFFSET_LEN;
2927c2c66affSColin Finck op = (REOp) *pc++;
2928c2c66affSColin Finck } else {
2929c2c66affSColin Finck if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2930c2c66affSColin Finck pc, x, x->cp, 0, 0)) {
2931c2c66affSColin Finck goto bad;
2932c2c66affSColin Finck }
2933c2c66affSColin Finck --gData->stateStackTop;
2934c2c66affSColin Finck pc = pc + GET_OFFSET(pc);
2935c2c66affSColin Finck op = (REOp) *pc++;
2936c2c66affSColin Finck }
2937c2c66affSColin Finck continue;
2938c2c66affSColin Finck
2939c2c66affSColin Finck case REOP_MINIMALREPEAT:
2940c2c66affSColin Finck --gData->stateStackTop;
2941c2c66affSColin Finck --curState;
2942c2c66affSColin Finck
2943c2c66affSColin Finck TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2944c2c66affSColin Finck #define PREPARE_REPEAT() \
2945c2c66affSColin Finck do { \
2946c2c66affSColin Finck curState->index = x->cp - gData->cpbegin; \
2947c2c66affSColin Finck curState->continue_op = REOP_MINIMALREPEAT; \
2948c2c66affSColin Finck curState->continue_pc = pc; \
2949c2c66affSColin Finck pc += ARG_LEN; \
2950c2c66affSColin Finck for (k = curState->parenSoFar; k < parenSoFar; k++) \
2951c2c66affSColin Finck x->parens[k].index = -1; \
2952c2c66affSColin Finck PUSH_STATE_STACK(gData); \
2953c2c66affSColin Finck op = (REOp) *pc++; \
2954c2c66affSColin Finck assert(op < REOP_LIMIT); \
2955c2c66affSColin Finck }while(0)
2956c2c66affSColin Finck
2957c2c66affSColin Finck if (!result) {
2958c2c66affSColin Finck TRACE(" -\n");
2959c2c66affSColin Finck /*
2960c2c66affSColin Finck * Non-greedy failure - try to consume another child.
2961c2c66affSColin Finck */
2962c2c66affSColin Finck if (curState->u.quantifier.max == (UINT) -1 ||
2963c2c66affSColin Finck curState->u.quantifier.max > 0) {
2964c2c66affSColin Finck PREPARE_REPEAT();
2965c2c66affSColin Finck continue;
2966c2c66affSColin Finck }
2967c2c66affSColin Finck /* Don't need to adjust pc since we're going to pop. */
2968c2c66affSColin Finck break;
2969c2c66affSColin Finck }
2970c2c66affSColin Finck if (curState->u.quantifier.min == 0 &&
2971c2c66affSColin Finck x->cp == gData->cpbegin + curState->index) {
2972c2c66affSColin Finck /* Matched an empty string, that'll get us nowhere. */
2973c2c66affSColin Finck result = NULL;
2974c2c66affSColin Finck break;
2975c2c66affSColin Finck }
2976c2c66affSColin Finck if (curState->u.quantifier.min != 0)
2977c2c66affSColin Finck curState->u.quantifier.min--;
2978c2c66affSColin Finck if (curState->u.quantifier.max != (UINT) -1)
2979c2c66affSColin Finck curState->u.quantifier.max--;
2980c2c66affSColin Finck if (curState->u.quantifier.min != 0) {
2981c2c66affSColin Finck PREPARE_REPEAT();
2982c2c66affSColin Finck continue;
2983c2c66affSColin Finck }
2984c2c66affSColin Finck curState->index = x->cp - gData->cpbegin;
2985c2c66affSColin Finck curState->parenSoFar = parenSoFar;
2986c2c66affSColin Finck PUSH_STATE_STACK(gData);
2987c2c66affSColin Finck if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2988c2c66affSColin Finck pc, x, x->cp,
2989c2c66affSColin Finck curState->parenSoFar,
2990c2c66affSColin Finck parenSoFar - curState->parenSoFar)) {
2991c2c66affSColin Finck goto bad;
2992c2c66affSColin Finck }
2993c2c66affSColin Finck --gData->stateStackTop;
2994c2c66affSColin Finck pc = pc + GET_OFFSET(pc);
2995c2c66affSColin Finck op = (REOp) *pc++;
2996c2c66affSColin Finck assert(op < REOP_LIMIT);
2997c2c66affSColin Finck continue;
2998c2c66affSColin Finck default:
2999c2c66affSColin Finck assert(FALSE);
3000c2c66affSColin Finck result = NULL;
3001c2c66affSColin Finck }
3002c2c66affSColin Finck break_switch:;
3003c2c66affSColin Finck }
3004c2c66affSColin Finck
3005c2c66affSColin Finck /*
3006c2c66affSColin Finck * If the match failed and there's a backtrack option, take it.
3007c2c66affSColin Finck * Otherwise this is a complete and utter failure.
3008c2c66affSColin Finck */
3009c2c66affSColin Finck if (!result) {
3010c2c66affSColin Finck if (gData->cursz == 0)
3011c2c66affSColin Finck return NULL;
3012c2c66affSColin Finck
3013c2c66affSColin Finck /* Potentially detect explosive regex here. */
3014c2c66affSColin Finck gData->backTrackCount++;
3015c2c66affSColin Finck if (gData->backTrackLimit &&
3016c2c66affSColin Finck gData->backTrackCount >= gData->backTrackLimit) {
3017c2c66affSColin Finck JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3018c2c66affSColin Finck JSMSG_REGEXP_TOO_COMPLEX);
3019c2c66affSColin Finck gData->ok = FALSE;
3020c2c66affSColin Finck return NULL;
3021c2c66affSColin Finck }
3022c2c66affSColin Finck
3023c2c66affSColin Finck backTrackData = gData->backTrackSP;
3024c2c66affSColin Finck gData->cursz = backTrackData->sz;
3025c2c66affSColin Finck gData->backTrackSP =
3026c2c66affSColin Finck (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3027c2c66affSColin Finck x->cp = backTrackData->cp;
3028c2c66affSColin Finck pc = backTrackData->backtrack_pc;
3029c2c66affSColin Finck op = (REOp) backTrackData->backtrack_op;
3030c2c66affSColin Finck assert(op < REOP_LIMIT);
3031c2c66affSColin Finck gData->stateStackTop = backTrackData->saveStateStackTop;
3032c2c66affSColin Finck assert(gData->stateStackTop);
3033c2c66affSColin Finck
3034c2c66affSColin Finck memcpy(gData->stateStack, backTrackData + 1,
3035c2c66affSColin Finck sizeof(REProgState) * backTrackData->saveStateStackTop);
3036c2c66affSColin Finck curState = &gData->stateStack[gData->stateStackTop - 1];
3037c2c66affSColin Finck
3038c2c66affSColin Finck if (backTrackData->parenCount) {
3039c2c66affSColin Finck memcpy(&x->parens[backTrackData->parenIndex],
3040c2c66affSColin Finck (char *)(backTrackData + 1) +
3041c2c66affSColin Finck sizeof(REProgState) * backTrackData->saveStateStackTop,
3042c2c66affSColin Finck sizeof(RECapture) * backTrackData->parenCount);
3043c2c66affSColin Finck parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3044c2c66affSColin Finck } else {
3045c2c66affSColin Finck for (k = curState->parenSoFar; k < parenSoFar; k++)
3046c2c66affSColin Finck x->parens[k].index = -1;
3047c2c66affSColin Finck parenSoFar = curState->parenSoFar;
3048c2c66affSColin Finck }
3049c2c66affSColin Finck
3050c2c66affSColin Finck TRACE("\tBT_Pop: %ld,%ld\n",
3051c2c66affSColin Finck (ULONG_PTR)backTrackData->parenIndex,
3052c2c66affSColin Finck (ULONG_PTR)backTrackData->parenCount);
3053c2c66affSColin Finck continue;
3054c2c66affSColin Finck }
3055c2c66affSColin Finck x = result;
3056c2c66affSColin Finck
3057c2c66affSColin Finck /*
3058c2c66affSColin Finck * Continue with the expression.
3059c2c66affSColin Finck */
3060c2c66affSColin Finck op = (REOp)*pc++;
3061c2c66affSColin Finck assert(op < REOP_LIMIT);
3062c2c66affSColin Finck }
3063c2c66affSColin Finck
3064c2c66affSColin Finck bad:
3065c2c66affSColin Finck TRACE("\n");
3066c2c66affSColin Finck return NULL;
3067c2c66affSColin Finck
3068c2c66affSColin Finck good:
3069c2c66affSColin Finck TRACE("\n");
3070c2c66affSColin Finck return x;
3071c2c66affSColin Finck }
3072c2c66affSColin Finck
MatchRegExp(REGlobalData * gData,match_state_t * x)3073c2c66affSColin Finck static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x)
3074c2c66affSColin Finck {
3075c2c66affSColin Finck match_state_t *result;
3076c2c66affSColin Finck const WCHAR *cp = x->cp;
3077c2c66affSColin Finck const WCHAR *cp2;
3078c2c66affSColin Finck UINT j;
3079c2c66affSColin Finck
3080c2c66affSColin Finck /*
3081c2c66affSColin Finck * Have to include the position beyond the last character
3082c2c66affSColin Finck * in order to detect end-of-input/line condition.
3083c2c66affSColin Finck */
3084c2c66affSColin Finck for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3085c2c66affSColin Finck gData->skipped = cp2 - cp;
3086c2c66affSColin Finck x->cp = cp2;
3087c2c66affSColin Finck for (j = 0; j < gData->regexp->parenCount; j++)
3088c2c66affSColin Finck x->parens[j].index = -1;
3089c2c66affSColin Finck result = ExecuteREBytecode(gData, x);
3090c2c66affSColin Finck if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3091c2c66affSColin Finck return result;
3092c2c66affSColin Finck gData->backTrackSP = gData->backTrackStack;
3093c2c66affSColin Finck gData->cursz = 0;
3094c2c66affSColin Finck gData->stateStackTop = 0;
3095c2c66affSColin Finck cp2 = cp + gData->skipped;
3096c2c66affSColin Finck }
3097c2c66affSColin Finck return NULL;
3098c2c66affSColin Finck }
3099c2c66affSColin Finck
InitMatch(regexp_t * re,void * cx,heap_pool_t * pool,REGlobalData * gData)3100c2c66affSColin Finck static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
3101c2c66affSColin Finck {
3102c2c66affSColin Finck UINT i;
3103c2c66affSColin Finck
3104c2c66affSColin Finck gData->backTrackStackSize = INITIAL_BACKTRACK;
3105c2c66affSColin Finck gData->backTrackStack = heap_pool_alloc(gData->pool, INITIAL_BACKTRACK);
3106c2c66affSColin Finck if (!gData->backTrackStack)
3107c2c66affSColin Finck goto bad;
3108c2c66affSColin Finck
3109c2c66affSColin Finck gData->backTrackSP = gData->backTrackStack;
3110c2c66affSColin Finck gData->cursz = 0;
3111c2c66affSColin Finck gData->backTrackCount = 0;
3112c2c66affSColin Finck gData->backTrackLimit = 0;
3113c2c66affSColin Finck
3114c2c66affSColin Finck gData->stateStackLimit = INITIAL_STATESTACK;
3115c2c66affSColin Finck gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3116c2c66affSColin Finck if (!gData->stateStack)
3117c2c66affSColin Finck goto bad;
3118c2c66affSColin Finck
3119c2c66affSColin Finck gData->stateStackTop = 0;
3120c2c66affSColin Finck gData->cx = cx;
3121c2c66affSColin Finck gData->pool = pool;
3122c2c66affSColin Finck gData->regexp = re;
3123c2c66affSColin Finck gData->ok = TRUE;
3124c2c66affSColin Finck
3125c2c66affSColin Finck for (i = 0; i < re->classCount; i++) {
3126c2c66affSColin Finck if (!re->classList[i].converted &&
3127c2c66affSColin Finck !ProcessCharSet(gData, &re->classList[i])) {
3128c2c66affSColin Finck return E_FAIL;
3129c2c66affSColin Finck }
3130c2c66affSColin Finck }
3131c2c66affSColin Finck
3132c2c66affSColin Finck return S_OK;
3133c2c66affSColin Finck
3134c2c66affSColin Finck bad:
3135c2c66affSColin Finck js_ReportOutOfScriptQuota(cx);
3136c2c66affSColin Finck gData->ok = FALSE;
3137c2c66affSColin Finck return E_OUTOFMEMORY;
3138c2c66affSColin Finck }
3139c2c66affSColin Finck
regexp_execute(regexp_t * regexp,void * cx,heap_pool_t * pool,const WCHAR * str,DWORD str_len,match_state_t * result)3140c2c66affSColin Finck HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool,
3141c2c66affSColin Finck const WCHAR *str, DWORD str_len, match_state_t *result)
3142c2c66affSColin Finck {
3143c2c66affSColin Finck match_state_t *res;
3144c2c66affSColin Finck REGlobalData gData;
3145c2c66affSColin Finck heap_pool_t *mark = heap_pool_mark(pool);
3146c2c66affSColin Finck const WCHAR *str_beg = result->cp;
3147c2c66affSColin Finck HRESULT hres;
3148c2c66affSColin Finck
3149c2c66affSColin Finck assert(result->cp != NULL);
3150c2c66affSColin Finck
3151c2c66affSColin Finck gData.cpbegin = str;
3152c2c66affSColin Finck gData.cpend = str+str_len;
3153c2c66affSColin Finck gData.start = result->cp-str;
3154c2c66affSColin Finck gData.skipped = 0;
3155c2c66affSColin Finck gData.pool = pool;
3156c2c66affSColin Finck
3157c2c66affSColin Finck hres = InitMatch(regexp, cx, pool, &gData);
3158c2c66affSColin Finck if(FAILED(hres)) {
3159c2c66affSColin Finck WARN("InitMatch failed\n");
3160c2c66affSColin Finck heap_pool_clear(mark);
3161c2c66affSColin Finck return hres;
3162c2c66affSColin Finck }
3163c2c66affSColin Finck
3164c2c66affSColin Finck res = MatchRegExp(&gData, result);
3165c2c66affSColin Finck heap_pool_clear(mark);
3166c2c66affSColin Finck if(!gData.ok) {
3167c2c66affSColin Finck WARN("MatchRegExp failed\n");
3168c2c66affSColin Finck return E_FAIL;
3169c2c66affSColin Finck }
3170c2c66affSColin Finck
3171c2c66affSColin Finck if(!res) {
3172c2c66affSColin Finck result->match_len = 0;
3173c2c66affSColin Finck return S_FALSE;
3174c2c66affSColin Finck }
3175c2c66affSColin Finck
3176c2c66affSColin Finck result->match_len = (result->cp-str_beg) - gData.skipped;
3177c2c66affSColin Finck result->paren_count = regexp->parenCount;
3178c2c66affSColin Finck return S_OK;
3179c2c66affSColin Finck }
3180c2c66affSColin Finck
regexp_destroy(regexp_t * re)3181c2c66affSColin Finck void regexp_destroy(regexp_t *re)
3182c2c66affSColin Finck {
3183c2c66affSColin Finck if (re->classList) {
3184c2c66affSColin Finck UINT i;
3185c2c66affSColin Finck for (i = 0; i < re->classCount; i++) {
3186c2c66affSColin Finck if (re->classList[i].converted)
3187c2c66affSColin Finck heap_free(re->classList[i].u.bits);
3188c2c66affSColin Finck re->classList[i].u.bits = NULL;
3189c2c66affSColin Finck }
3190c2c66affSColin Finck heap_free(re->classList);
3191c2c66affSColin Finck }
3192c2c66affSColin Finck heap_free(re);
3193c2c66affSColin Finck }
3194c2c66affSColin Finck
regexp_new(void * cx,heap_pool_t * pool,const WCHAR * str,DWORD str_len,WORD flags,BOOL flat)3195c2c66affSColin Finck regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str,
3196c2c66affSColin Finck DWORD str_len, WORD flags, BOOL flat)
3197c2c66affSColin Finck {
3198c2c66affSColin Finck regexp_t *re;
3199c2c66affSColin Finck heap_pool_t *mark;
3200c2c66affSColin Finck CompilerState state;
3201c2c66affSColin Finck size_t resize;
3202c2c66affSColin Finck jsbytecode *endPC;
3203c2c66affSColin Finck UINT i;
3204c2c66affSColin Finck size_t len;
3205c2c66affSColin Finck
3206c2c66affSColin Finck re = NULL;
3207c2c66affSColin Finck mark = heap_pool_mark(pool);
3208c2c66affSColin Finck len = str_len;
3209c2c66affSColin Finck
3210c2c66affSColin Finck state.context = cx;
3211c2c66affSColin Finck state.pool = pool;
3212c2c66affSColin Finck state.cp = str;
3213c2c66affSColin Finck if (!state.cp)
3214c2c66affSColin Finck goto out;
3215c2c66affSColin Finck state.cpbegin = state.cp;
3216c2c66affSColin Finck state.cpend = state.cp + len;
3217c2c66affSColin Finck state.flags = flags;
3218c2c66affSColin Finck state.parenCount = 0;
3219c2c66affSColin Finck state.classCount = 0;
3220c2c66affSColin Finck state.progLength = 0;
3221c2c66affSColin Finck state.treeDepth = 0;
3222c2c66affSColin Finck state.classBitmapsMem = 0;
3223c2c66affSColin Finck for (i = 0; i < CLASS_CACHE_SIZE; i++)
3224c2c66affSColin Finck state.classCache[i].start = NULL;
3225c2c66affSColin Finck
3226c2c66affSColin Finck if (len != 0 && flat) {
3227c2c66affSColin Finck state.result = NewRENode(&state, REOP_FLAT);
3228c2c66affSColin Finck if (!state.result)
3229c2c66affSColin Finck goto out;
3230c2c66affSColin Finck state.result->u.flat.chr = *state.cpbegin;
3231c2c66affSColin Finck state.result->u.flat.length = len;
3232c2c66affSColin Finck state.result->kid = (void *) state.cpbegin;
3233c2c66affSColin Finck /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3234c2c66affSColin Finck state.progLength += 1 + GetCompactIndexWidth(0)
3235c2c66affSColin Finck + GetCompactIndexWidth(len);
3236c2c66affSColin Finck } else {
3237c2c66affSColin Finck if (!ParseRegExp(&state))
3238c2c66affSColin Finck goto out;
3239c2c66affSColin Finck }
3240c2c66affSColin Finck resize = offsetof(regexp_t, program) + state.progLength + 1;
3241c2c66affSColin Finck re = heap_alloc(resize);
3242c2c66affSColin Finck if (!re)
3243c2c66affSColin Finck goto out;
3244c2c66affSColin Finck
3245c2c66affSColin Finck assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3246c2c66affSColin Finck re->classCount = state.classCount;
3247c2c66affSColin Finck if (re->classCount) {
3248c2c66affSColin Finck re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3249c2c66affSColin Finck if (!re->classList) {
3250c2c66affSColin Finck regexp_destroy(re);
3251c2c66affSColin Finck re = NULL;
3252c2c66affSColin Finck goto out;
3253c2c66affSColin Finck }
3254c2c66affSColin Finck for (i = 0; i < re->classCount; i++)
3255c2c66affSColin Finck re->classList[i].converted = FALSE;
3256c2c66affSColin Finck } else {
3257c2c66affSColin Finck re->classList = NULL;
3258c2c66affSColin Finck }
3259c2c66affSColin Finck endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3260c2c66affSColin Finck if (!endPC) {
3261c2c66affSColin Finck regexp_destroy(re);
3262c2c66affSColin Finck re = NULL;
3263c2c66affSColin Finck goto out;
3264c2c66affSColin Finck }
3265c2c66affSColin Finck *endPC++ = REOP_END;
3266c2c66affSColin Finck /*
3267c2c66affSColin Finck * Check whether size was overestimated and shrink using realloc.
3268c2c66affSColin Finck * This is safe since no pointers to newly parsed regexp or its parts
3269c2c66affSColin Finck * besides re exist here.
3270c2c66affSColin Finck */
3271c2c66affSColin Finck if ((size_t)(endPC - re->program) != state.progLength + 1) {
3272c2c66affSColin Finck regexp_t *tmp;
3273c2c66affSColin Finck assert((size_t)(endPC - re->program) < state.progLength + 1);
3274c2c66affSColin Finck resize = offsetof(regexp_t, program) + (endPC - re->program);
3275c2c66affSColin Finck tmp = heap_realloc(re, resize);
3276c2c66affSColin Finck if (tmp)
3277c2c66affSColin Finck re = tmp;
3278c2c66affSColin Finck }
3279c2c66affSColin Finck
3280c2c66affSColin Finck re->flags = flags;
3281c2c66affSColin Finck re->parenCount = state.parenCount;
3282c2c66affSColin Finck re->source = str;
3283c2c66affSColin Finck re->source_len = str_len;
3284c2c66affSColin Finck
3285c2c66affSColin Finck out:
3286c2c66affSColin Finck heap_pool_clear(mark);
3287c2c66affSColin Finck return re;
3288c2c66affSColin Finck }
3289