xref: /reactos/dll/win32/jscript/regexp.c (revision 3e2d6582)
1c2c66affSColin Finck /*
2c2c66affSColin Finck  * Copyright 2008 Jacek Caban for CodeWeavers
3c2c66affSColin Finck  *
4c2c66affSColin Finck  * This library is free software; you can redistribute it and/or
5c2c66affSColin Finck  * modify it under the terms of the GNU Lesser General Public
6c2c66affSColin Finck  * License as published by the Free Software Foundation; either
7c2c66affSColin Finck  * version 2.1 of the License, or (at your option) any later version.
8c2c66affSColin Finck  *
9c2c66affSColin Finck  * This library is distributed in the hope that it will be useful,
10c2c66affSColin Finck  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11c2c66affSColin Finck  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12c2c66affSColin Finck  * Lesser General Public License for more details.
13c2c66affSColin Finck  *
14c2c66affSColin Finck  * You should have received a copy of the GNU Lesser General Public
15c2c66affSColin Finck  * License along with this library; if not, write to the Free Software
16c2c66affSColin Finck  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17c2c66affSColin Finck  */
18c2c66affSColin Finck 
19c2c66affSColin Finck /*
20c2c66affSColin Finck  * Code in this file is based on files:
21c2c66affSColin Finck  * js/src/jsregexp.h
22c2c66affSColin Finck  * js/src/jsregexp.c
23c2c66affSColin Finck  * from Mozilla project, released under LGPL 2.1 or later.
24c2c66affSColin Finck  *
25c2c66affSColin Finck  * The Original Code is Mozilla Communicator client code, released
26c2c66affSColin Finck  * March 31, 1998.
27c2c66affSColin Finck  *
28c2c66affSColin Finck  * The Initial Developer of the Original Code is
29c2c66affSColin Finck  * Netscape Communications Corporation.
30c2c66affSColin Finck  * Portions created by the Initial Developer are Copyright (C) 1998
31c2c66affSColin Finck  * the Initial Developer. All Rights Reserved.
32c2c66affSColin Finck  */
33c2c66affSColin Finck 
348dba275bSAmine Khaldi #include <assert.h>
358dba275bSAmine Khaldi 
36c2c66affSColin Finck #include "jscript.h"
378dba275bSAmine Khaldi #include "regexp.h"
388dba275bSAmine Khaldi 
398dba275bSAmine Khaldi #include "wine/debug.h"
408dba275bSAmine Khaldi 
418dba275bSAmine Khaldi WINE_DEFAULT_DEBUG_CHANNEL(jscript);
42c2c66affSColin Finck 
43c2c66affSColin Finck /* FIXME: Better error handling */
44c2c66affSColin Finck #define ReportRegExpError(a,b,c)
45c2c66affSColin Finck #define ReportRegExpErrorHelper(a,b,c,d)
46c2c66affSColin Finck #define JS_ReportErrorNumber(a,b,c,d)
47c2c66affSColin Finck #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
48c2c66affSColin Finck #define js_ReportOutOfScriptQuota(a)
49c2c66affSColin Finck #define JS_ReportOutOfMemory(a)
50c2c66affSColin Finck #define JS_COUNT_OPERATION(a,b)
51c2c66affSColin Finck 
52c2c66affSColin Finck 
53c2c66affSColin Finck typedef BYTE JSPackedBool;
54c2c66affSColin Finck 
55c2c66affSColin Finck /*
56c2c66affSColin Finck  * This struct holds a bitmap representation of a class from a regexp.
57c2c66affSColin Finck  * There's a list of these referenced by the classList field in the regexp_t
58c2c66affSColin Finck  * struct below. The initial state has startIndex set to the offset in the
59c2c66affSColin Finck  * original regexp source of the beginning of the class contents. The first
60c2c66affSColin Finck  * use of the class converts the source representation into a bitmap.
61c2c66affSColin Finck  *
62c2c66affSColin Finck  */
63c2c66affSColin Finck typedef struct RECharSet {
64c2c66affSColin Finck     JSPackedBool    converted;
65c2c66affSColin Finck     JSPackedBool    sense;
66c2c66affSColin Finck     WORD            length;
67c2c66affSColin Finck     union {
68c2c66affSColin Finck         BYTE        *bits;
69c2c66affSColin Finck         struct {
70c2c66affSColin Finck             size_t  startIndex;
71c2c66affSColin Finck             size_t  length;
72c2c66affSColin Finck         } src;
73c2c66affSColin Finck     } u;
74c2c66affSColin Finck } RECharSet;
75c2c66affSColin Finck 
76c2c66affSColin Finck #define JSMSG_MIN_TOO_BIG 47
77c2c66affSColin Finck #define JSMSG_MAX_TOO_BIG 48
78c2c66affSColin Finck #define JSMSG_OUT_OF_ORDER 49
79c2c66affSColin Finck #define JSMSG_OUT_OF_MEMORY 137
80c2c66affSColin Finck 
81c2c66affSColin Finck #define LINE_SEPARATOR  0x2028
82c2c66affSColin Finck #define PARA_SEPARATOR  0x2029
83c2c66affSColin Finck 
84c2c66affSColin Finck #define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
85c2c66affSColin Finck                              ((c >= 'a') && (c <= 'z')) )
86c2c66affSColin Finck #define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
87c2c66affSColin Finck                              (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
88c2c66affSColin Finck 
89c2c66affSColin Finck #define JS_ISWORD(c)    ((c) < 128 && (isalnum(c) || (c) == '_'))
90c2c66affSColin Finck 
91c2c66affSColin Finck #define JS7_ISDEC(c)    ((((unsigned)(c)) - '0') <= 9)
92c2c66affSColin Finck #define JS7_UNDEC(c)    ((c) - '0')
93c2c66affSColin Finck 
94c2c66affSColin Finck typedef enum REOp {
95c2c66affSColin Finck     REOP_EMPTY,
96c2c66affSColin Finck     REOP_BOL,
97c2c66affSColin Finck     REOP_EOL,
98c2c66affSColin Finck     REOP_WBDRY,
99c2c66affSColin Finck     REOP_WNONBDRY,
100c2c66affSColin Finck     REOP_DOT,
101c2c66affSColin Finck     REOP_DIGIT,
102c2c66affSColin Finck     REOP_NONDIGIT,
103c2c66affSColin Finck     REOP_ALNUM,
104c2c66affSColin Finck     REOP_NONALNUM,
105c2c66affSColin Finck     REOP_SPACE,
106c2c66affSColin Finck     REOP_NONSPACE,
107c2c66affSColin Finck     REOP_BACKREF,
108c2c66affSColin Finck     REOP_FLAT,
109c2c66affSColin Finck     REOP_FLAT1,
110c2c66affSColin Finck     REOP_FLATi,
111c2c66affSColin Finck     REOP_FLAT1i,
112c2c66affSColin Finck     REOP_UCFLAT1,
113c2c66affSColin Finck     REOP_UCFLAT1i,
114c2c66affSColin Finck     REOP_UCFLAT,
115c2c66affSColin Finck     REOP_UCFLATi,
116c2c66affSColin Finck     REOP_CLASS,
117c2c66affSColin Finck     REOP_NCLASS,
118c2c66affSColin Finck     REOP_ALT,
119c2c66affSColin Finck     REOP_QUANT,
120c2c66affSColin Finck     REOP_STAR,
121c2c66affSColin Finck     REOP_PLUS,
122c2c66affSColin Finck     REOP_OPT,
123c2c66affSColin Finck     REOP_LPAREN,
124c2c66affSColin Finck     REOP_RPAREN,
125c2c66affSColin Finck     REOP_JUMP,
126c2c66affSColin Finck     REOP_DOTSTAR,
127c2c66affSColin Finck     REOP_LPARENNON,
128c2c66affSColin Finck     REOP_ASSERT,
129c2c66affSColin Finck     REOP_ASSERT_NOT,
130c2c66affSColin Finck     REOP_ASSERTTEST,
131c2c66affSColin Finck     REOP_ASSERTNOTTEST,
132c2c66affSColin Finck     REOP_MINIMALSTAR,
133c2c66affSColin Finck     REOP_MINIMALPLUS,
134c2c66affSColin Finck     REOP_MINIMALOPT,
135c2c66affSColin Finck     REOP_MINIMALQUANT,
136c2c66affSColin Finck     REOP_ENDCHILD,
137c2c66affSColin Finck     REOP_REPEAT,
138c2c66affSColin Finck     REOP_MINIMALREPEAT,
139c2c66affSColin Finck     REOP_ALTPREREQ,
140c2c66affSColin Finck     REOP_ALTPREREQ2,
141c2c66affSColin Finck     REOP_ENDALT,
142c2c66affSColin Finck     REOP_CONCAT,
143c2c66affSColin Finck     REOP_END,
144c2c66affSColin Finck     REOP_LIMIT /* META: no operator >= to this */
145c2c66affSColin Finck } REOp;
146c2c66affSColin Finck 
147c2c66affSColin Finck #define REOP_IS_SIMPLE(op)  ((op) <= REOP_NCLASS)
148c2c66affSColin Finck 
149c2c66affSColin Finck static const char *reop_names[] = {
150c2c66affSColin Finck     "empty",
151c2c66affSColin Finck     "bol",
152c2c66affSColin Finck     "eol",
153c2c66affSColin Finck     "wbdry",
154c2c66affSColin Finck     "wnonbdry",
155c2c66affSColin Finck     "dot",
156c2c66affSColin Finck     "digit",
157c2c66affSColin Finck     "nondigit",
158c2c66affSColin Finck     "alnum",
159c2c66affSColin Finck     "nonalnum",
160c2c66affSColin Finck     "space",
161c2c66affSColin Finck     "nonspace",
162c2c66affSColin Finck     "backref",
163c2c66affSColin Finck     "flat",
164c2c66affSColin Finck     "flat1",
165c2c66affSColin Finck     "flati",
166c2c66affSColin Finck     "flat1i",
167c2c66affSColin Finck     "ucflat1",
168c2c66affSColin Finck     "ucflat1i",
169c2c66affSColin Finck     "ucflat",
170c2c66affSColin Finck     "ucflati",
171c2c66affSColin Finck     "class",
172c2c66affSColin Finck     "nclass",
173c2c66affSColin Finck     "alt",
174c2c66affSColin Finck     "quant",
175c2c66affSColin Finck     "star",
176c2c66affSColin Finck     "plus",
177c2c66affSColin Finck     "opt",
178c2c66affSColin Finck     "lparen",
179c2c66affSColin Finck     "rparen",
180c2c66affSColin Finck     "jump",
181c2c66affSColin Finck     "dotstar",
182c2c66affSColin Finck     "lparennon",
183c2c66affSColin Finck     "assert",
184c2c66affSColin Finck     "assert_not",
185c2c66affSColin Finck     "asserttest",
186c2c66affSColin Finck     "assertnottest",
187c2c66affSColin Finck     "minimalstar",
188c2c66affSColin Finck     "minimalplus",
189c2c66affSColin Finck     "minimalopt",
190c2c66affSColin Finck     "minimalquant",
191c2c66affSColin Finck     "endchild",
192c2c66affSColin Finck     "repeat",
193c2c66affSColin Finck     "minimalrepeat",
194c2c66affSColin Finck     "altprereq",
195c2c66affSColin Finck     "alrprereq2",
196c2c66affSColin Finck     "endalt",
197c2c66affSColin Finck     "concat",
198c2c66affSColin Finck     "end",
199c2c66affSColin Finck     NULL
200c2c66affSColin Finck };
201c2c66affSColin Finck 
202c2c66affSColin Finck typedef struct REProgState {
203c2c66affSColin Finck     jsbytecode *continue_pc;        /* current continuation data */
204c2c66affSColin Finck     jsbytecode continue_op;
205c2c66affSColin Finck     ptrdiff_t index;                /* progress in text */
206c2c66affSColin Finck     size_t parenSoFar;              /* highest indexed paren started */
207c2c66affSColin Finck     union {
208c2c66affSColin Finck         struct {
209c2c66affSColin Finck             UINT min;               /* current quantifier limits */
210c2c66affSColin Finck             UINT max;
211c2c66affSColin Finck         } quantifier;
212c2c66affSColin Finck         struct {
213c2c66affSColin Finck             size_t top;             /* backtrack stack state */
214c2c66affSColin Finck             size_t sz;
215c2c66affSColin Finck         } assertion;
216c2c66affSColin Finck     } u;
217c2c66affSColin Finck } REProgState;
218c2c66affSColin Finck 
219c2c66affSColin Finck typedef struct REBackTrackData {
220c2c66affSColin Finck     size_t sz;                      /* size of previous stack entry */
221c2c66affSColin Finck     jsbytecode *backtrack_pc;       /* where to backtrack to */
222c2c66affSColin Finck     jsbytecode backtrack_op;
223c2c66affSColin Finck     const WCHAR *cp;                /* index in text of match at backtrack */
224c2c66affSColin Finck     size_t parenIndex;              /* start index of saved paren contents */
225c2c66affSColin Finck     size_t parenCount;              /* # of saved paren contents */
226c2c66affSColin Finck     size_t saveStateStackTop;       /* number of parent states */
227c2c66affSColin Finck     /* saved parent states follow */
228c2c66affSColin Finck     /* saved paren contents follow */
229c2c66affSColin Finck } REBackTrackData;
230c2c66affSColin Finck 
231c2c66affSColin Finck #define INITIAL_STATESTACK  100
232c2c66affSColin Finck #define INITIAL_BACKTRACK   8000
233c2c66affSColin Finck 
234c2c66affSColin Finck typedef struct REGlobalData {
235c2c66affSColin Finck     void *cx;
236c2c66affSColin Finck     regexp_t *regexp;               /* the RE in execution */
237c2c66affSColin Finck     BOOL ok;                        /* runtime error (out_of_memory only?) */
238c2c66affSColin Finck     size_t start;                   /* offset to start at */
239c2c66affSColin Finck     ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
240c2c66affSColin Finck     const WCHAR    *cpbegin;        /* text base address */
241c2c66affSColin Finck     const WCHAR    *cpend;          /* text limit address */
242c2c66affSColin Finck 
243c2c66affSColin Finck     REProgState *stateStack;        /* stack of state of current parents */
244c2c66affSColin Finck     size_t stateStackTop;
245c2c66affSColin Finck     size_t stateStackLimit;
246c2c66affSColin Finck 
247c2c66affSColin Finck     REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
248c2c66affSColin Finck     REBackTrackData *backTrackSP;
249c2c66affSColin Finck     size_t backTrackStackSize;
250c2c66affSColin Finck     size_t cursz;                   /* size of current stack entry */
251c2c66affSColin Finck     size_t backTrackCount;          /* how many times we've backtracked */
252c2c66affSColin Finck     size_t backTrackLimit;          /* upper limit on backtrack states */
253c2c66affSColin Finck 
254c2c66affSColin Finck     heap_pool_t *pool;              /* It's faster to use one malloc'd pool
255c2c66affSColin Finck                                        than to malloc/free the three items
256c2c66affSColin Finck                                        that are allocated from this pool */
257c2c66affSColin Finck } REGlobalData;
258c2c66affSColin Finck 
259c2c66affSColin Finck typedef struct RENode RENode;
260c2c66affSColin Finck struct RENode {
261c2c66affSColin Finck     REOp            op;         /* r.e. op bytecode */
262c2c66affSColin Finck     RENode          *next;      /* next in concatenation order */
263c2c66affSColin Finck     void            *kid;       /* first operand */
264c2c66affSColin Finck     union {
265c2c66affSColin Finck         void        *kid2;      /* second operand */
266c2c66affSColin Finck         INT         num;        /* could be a number */
267c2c66affSColin Finck         size_t      parenIndex; /* or a parenthesis index */
268c2c66affSColin Finck         struct {                /* or a quantifier range */
269c2c66affSColin Finck             UINT  min;
270c2c66affSColin Finck             UINT  max;
271c2c66affSColin Finck             JSPackedBool greedy;
272c2c66affSColin Finck         } range;
273c2c66affSColin Finck         struct {                /* or a character class */
274c2c66affSColin Finck             size_t  startIndex;
275c2c66affSColin Finck             size_t  kidlen;     /* length of string at kid, in jschars */
276c2c66affSColin Finck             size_t  index;      /* index into class list */
277c2c66affSColin Finck             WORD  bmsize;       /* bitmap size, based on max char code */
278c2c66affSColin Finck             JSPackedBool sense;
279c2c66affSColin Finck         } ucclass;
280c2c66affSColin Finck         struct {                /* or a literal sequence */
281c2c66affSColin Finck             WCHAR   chr;        /* of one character */
282c2c66affSColin Finck             size_t  length;     /* or many (via the kid) */
283c2c66affSColin Finck         } flat;
284c2c66affSColin Finck         struct {
285c2c66affSColin Finck             RENode  *kid2;      /* second operand from ALT */
286c2c66affSColin Finck             WCHAR   ch1;        /* match char for ALTPREREQ */
287c2c66affSColin Finck             WCHAR   ch2;        /* ditto, or class index for ALTPREREQ2 */
288c2c66affSColin Finck         } altprereq;
289c2c66affSColin Finck     } u;
290c2c66affSColin Finck };
291c2c66affSColin Finck 
292c2c66affSColin Finck #define CLASS_CACHE_SIZE    4
293c2c66affSColin Finck 
294c2c66affSColin Finck typedef struct CompilerState {
295c2c66affSColin Finck     void            *context;
296c2c66affSColin Finck     const WCHAR     *cpbegin;
297c2c66affSColin Finck     const WCHAR     *cpend;
298c2c66affSColin Finck     const WCHAR     *cp;
299c2c66affSColin Finck     size_t          parenCount;
300c2c66affSColin Finck     size_t          classCount;   /* number of [] encountered */
301c2c66affSColin Finck     size_t          treeDepth;    /* maximum depth of parse tree */
302c2c66affSColin Finck     size_t          progLength;   /* estimated bytecode length */
303c2c66affSColin Finck     RENode          *result;
304c2c66affSColin Finck     size_t          classBitmapsMem; /* memory to hold all class bitmaps */
305c2c66affSColin Finck     struct {
306c2c66affSColin Finck         const WCHAR *start;         /* small cache of class strings */
307c2c66affSColin Finck         size_t length;              /* since they're often the same */
308c2c66affSColin Finck         size_t index;
309c2c66affSColin Finck     } classCache[CLASS_CACHE_SIZE];
310c2c66affSColin Finck     WORD          flags;
311c2c66affSColin Finck 
312c2c66affSColin Finck     heap_pool_t *pool;              /* It's faster to use one malloc'd pool
313c2c66affSColin Finck                                        than to malloc/free */
314c2c66affSColin Finck } CompilerState;
315c2c66affSColin Finck 
316c2c66affSColin Finck typedef struct EmitStateStackEntry {
317c2c66affSColin Finck     jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
318c2c66affSColin Finck     jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
319c2c66affSColin Finck     jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
320c2c66affSColin Finck     jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
321c2c66affSColin Finck     RENode          *continueNode;  /* original REOP_ALT* node being stacked */
322c2c66affSColin Finck     jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
323c2c66affSColin Finck     JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
324c2c66affSColin Finck                                        avoid 16-bit unsigned offset overflow */
325c2c66affSColin Finck } EmitStateStackEntry;
326c2c66affSColin Finck 
327c2c66affSColin Finck /*
328c2c66affSColin Finck  * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
329c2c66affSColin Finck  * the getters and setters take the pc of the offset, not of the opcode before
330c2c66affSColin Finck  * the offset.
331c2c66affSColin Finck  */
332c2c66affSColin Finck #define ARG_LEN             2
333c2c66affSColin Finck #define GET_ARG(pc)         ((WORD)(((pc)[0] << 8) | (pc)[1]))
334c2c66affSColin Finck #define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
335c2c66affSColin Finck                              (pc)[1] = (jsbytecode) (arg))
336c2c66affSColin Finck 
337c2c66affSColin Finck #define OFFSET_LEN          ARG_LEN
338c2c66affSColin Finck #define OFFSET_MAX          ((1 << (ARG_LEN * 8)) - 1)
339c2c66affSColin Finck #define GET_OFFSET(pc)      GET_ARG(pc)
340c2c66affSColin Finck 
341c2c66affSColin Finck static BOOL ParseRegExp(CompilerState*);
342c2c66affSColin Finck 
343c2c66affSColin Finck /*
344c2c66affSColin Finck  * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
345c2c66affSColin Finck  * For sanity, we limit it to 2^24 bytes.
346c2c66affSColin Finck  */
347c2c66affSColin Finck #define TREE_DEPTH_MAX  ((1 << 24) / sizeof(EmitStateStackEntry))
348c2c66affSColin Finck 
349c2c66affSColin Finck /*
350c2c66affSColin Finck  * The maximum memory that can be allocated for class bitmaps.
351c2c66affSColin Finck  * For sanity, we limit it to 2^24 bytes.
352c2c66affSColin Finck  */
353c2c66affSColin Finck #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
354c2c66affSColin Finck 
355c2c66affSColin Finck /*
356c2c66affSColin Finck  * Functions to get size and write/read bytecode that represent small indexes
357c2c66affSColin Finck  * compactly.
358c2c66affSColin Finck  * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
359c2c66affSColin Finck  * indicates that the following byte brings more bits to the index. Otherwise
360c2c66affSColin Finck  * this is the last byte in the index bytecode representing highest index bits.
361c2c66affSColin Finck  */
362c2c66affSColin Finck static size_t
GetCompactIndexWidth(size_t index)363c2c66affSColin Finck GetCompactIndexWidth(size_t index)
364c2c66affSColin Finck {
365c2c66affSColin Finck     size_t width;
366c2c66affSColin Finck 
367c2c66affSColin Finck     for (width = 1; (index >>= 7) != 0; ++width) { }
368c2c66affSColin Finck     return width;
369c2c66affSColin Finck }
370c2c66affSColin Finck 
371c2c66affSColin Finck static inline jsbytecode *
WriteCompactIndex(jsbytecode * pc,size_t index)372c2c66affSColin Finck WriteCompactIndex(jsbytecode *pc, size_t index)
373c2c66affSColin Finck {
374c2c66affSColin Finck     size_t next;
375c2c66affSColin Finck 
376c2c66affSColin Finck     while ((next = index >> 7) != 0) {
377c2c66affSColin Finck         *pc++ = (jsbytecode)(index | 0x80);
378c2c66affSColin Finck         index = next;
379c2c66affSColin Finck     }
380c2c66affSColin Finck     *pc++ = (jsbytecode)index;
381c2c66affSColin Finck     return pc;
382c2c66affSColin Finck }
383c2c66affSColin Finck 
384c2c66affSColin Finck static inline jsbytecode *
ReadCompactIndex(jsbytecode * pc,size_t * result)385c2c66affSColin Finck ReadCompactIndex(jsbytecode *pc, size_t *result)
386c2c66affSColin Finck {
387c2c66affSColin Finck     size_t nextByte;
388c2c66affSColin Finck 
389c2c66affSColin Finck     nextByte = *pc++;
390c2c66affSColin Finck     if ((nextByte & 0x80) == 0) {
391c2c66affSColin Finck         /*
392c2c66affSColin Finck          * Short-circuit the most common case when compact index <= 127.
393c2c66affSColin Finck          */
394c2c66affSColin Finck         *result = nextByte;
395c2c66affSColin Finck     } else {
396c2c66affSColin Finck         size_t shift = 7;
397c2c66affSColin Finck         *result = 0x7F & nextByte;
398c2c66affSColin Finck         do {
399c2c66affSColin Finck             nextByte = *pc++;
400c2c66affSColin Finck             *result |= (nextByte & 0x7F) << shift;
401c2c66affSColin Finck             shift += 7;
402c2c66affSColin Finck         } while ((nextByte & 0x80) != 0);
403c2c66affSColin Finck     }
404c2c66affSColin Finck     return pc;
405c2c66affSColin Finck }
406c2c66affSColin Finck 
407c2c66affSColin Finck /* Construct and initialize an RENode, returning NULL for out-of-memory */
408c2c66affSColin Finck static RENode *
NewRENode(CompilerState * state,REOp op)409c2c66affSColin Finck NewRENode(CompilerState *state, REOp op)
410c2c66affSColin Finck {
411c2c66affSColin Finck     RENode *ren;
412c2c66affSColin Finck 
413c2c66affSColin Finck     ren = heap_pool_alloc(state->pool, sizeof(*ren));
414c2c66affSColin Finck     if (!ren) {
415c2c66affSColin Finck         /* js_ReportOutOfScriptQuota(cx); */
416c2c66affSColin Finck         return NULL;
417c2c66affSColin Finck     }
418c2c66affSColin Finck     ren->op = op;
419c2c66affSColin Finck     ren->next = NULL;
420c2c66affSColin Finck     ren->kid = NULL;
421c2c66affSColin Finck     return ren;
422c2c66affSColin Finck }
423c2c66affSColin Finck 
424c2c66affSColin Finck /*
425c2c66affSColin Finck  * Validates and converts hex ascii value.
426c2c66affSColin Finck  */
427c2c66affSColin Finck static BOOL
isASCIIHexDigit(WCHAR c,UINT * digit)428c2c66affSColin Finck isASCIIHexDigit(WCHAR c, UINT *digit)
429c2c66affSColin Finck {
430c2c66affSColin Finck     UINT cv = c;
431c2c66affSColin Finck 
432c2c66affSColin Finck     if (cv < '0')
433c2c66affSColin Finck         return FALSE;
434c2c66affSColin Finck     if (cv <= '9') {
435c2c66affSColin Finck         *digit = cv - '0';
436c2c66affSColin Finck         return TRUE;
437c2c66affSColin Finck     }
438c2c66affSColin Finck     cv |= 0x20;
439c2c66affSColin Finck     if (cv >= 'a' && cv <= 'f') {
440c2c66affSColin Finck         *digit = cv - 'a' + 10;
441c2c66affSColin Finck         return TRUE;
442c2c66affSColin Finck     }
443c2c66affSColin Finck     return FALSE;
444c2c66affSColin Finck }
445c2c66affSColin Finck 
446c2c66affSColin Finck typedef struct {
447c2c66affSColin Finck     REOp op;
448c2c66affSColin Finck     const WCHAR *errPos;
449c2c66affSColin Finck     size_t parenIndex;
450c2c66affSColin Finck } REOpData;
451c2c66affSColin Finck 
452c2c66affSColin Finck #define JUMP_OFFSET_HI(off)     ((jsbytecode)((off) >> 8))
453c2c66affSColin Finck #define JUMP_OFFSET_LO(off)     ((jsbytecode)(off))
454c2c66affSColin Finck 
455c2c66affSColin Finck static BOOL
SetForwardJumpOffset(jsbytecode * jump,jsbytecode * target)456c2c66affSColin Finck SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
457c2c66affSColin Finck {
458c2c66affSColin Finck     ptrdiff_t offset = target - jump;
459c2c66affSColin Finck 
460c2c66affSColin Finck     /* Check that target really points forward. */
461c2c66affSColin Finck     assert(offset >= 2);
462c2c66affSColin Finck     if ((size_t)offset > OFFSET_MAX)
463c2c66affSColin Finck         return FALSE;
464c2c66affSColin Finck 
465c2c66affSColin Finck     jump[0] = JUMP_OFFSET_HI(offset);
466c2c66affSColin Finck     jump[1] = JUMP_OFFSET_LO(offset);
467c2c66affSColin Finck     return TRUE;
468c2c66affSColin Finck }
469c2c66affSColin Finck 
470c2c66affSColin Finck /*
471c2c66affSColin Finck  * Generate bytecode for the tree rooted at t using an explicit stack instead
472c2c66affSColin Finck  * of recursion.
473c2c66affSColin Finck  */
474c2c66affSColin Finck static jsbytecode *
EmitREBytecode(CompilerState * state,regexp_t * re,size_t treeDepth,jsbytecode * pc,RENode * t)475c2c66affSColin Finck EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
476c2c66affSColin Finck                jsbytecode *pc, RENode *t)
477c2c66affSColin Finck {
478c2c66affSColin Finck     EmitStateStackEntry *emitStateSP, *emitStateStack;
479c2c66affSColin Finck     RECharSet *charSet;
480c2c66affSColin Finck     REOp op;
481c2c66affSColin Finck 
482c2c66affSColin Finck     if (treeDepth == 0) {
483c2c66affSColin Finck         emitStateStack = NULL;
484c2c66affSColin Finck     } else {
485c2c66affSColin Finck         emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
486c2c66affSColin Finck         if (!emitStateStack)
487c2c66affSColin Finck             return NULL;
488c2c66affSColin Finck     }
489c2c66affSColin Finck     emitStateSP = emitStateStack;
490c2c66affSColin Finck     op = t->op;
491c2c66affSColin Finck     assert(op < REOP_LIMIT);
492c2c66affSColin Finck 
493c2c66affSColin Finck     for (;;) {
494c2c66affSColin Finck         *pc++ = op;
495c2c66affSColin Finck         switch (op) {
496c2c66affSColin Finck           case REOP_EMPTY:
497c2c66affSColin Finck             --pc;
498c2c66affSColin Finck             break;
499c2c66affSColin Finck 
500c2c66affSColin Finck           case REOP_ALTPREREQ2:
501c2c66affSColin Finck           case REOP_ALTPREREQ:
502c2c66affSColin Finck             assert(emitStateSP);
503c2c66affSColin Finck             emitStateSP->altHead = pc - 1;
504c2c66affSColin Finck             emitStateSP->endTermFixup = pc;
505c2c66affSColin Finck             pc += OFFSET_LEN;
506c2c66affSColin Finck             SET_ARG(pc, t->u.altprereq.ch1);
507c2c66affSColin Finck             pc += ARG_LEN;
508c2c66affSColin Finck             SET_ARG(pc, t->u.altprereq.ch2);
509c2c66affSColin Finck             pc += ARG_LEN;
510c2c66affSColin Finck 
511c2c66affSColin Finck             emitStateSP->nextAltFixup = pc;    /* offset to next alternate */
512c2c66affSColin Finck             pc += OFFSET_LEN;
513c2c66affSColin Finck 
514c2c66affSColin Finck             emitStateSP->continueNode = t;
515c2c66affSColin Finck             emitStateSP->continueOp = REOP_JUMP;
516c2c66affSColin Finck             emitStateSP->jumpToJumpFlag = FALSE;
517c2c66affSColin Finck             ++emitStateSP;
518c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
519c2c66affSColin Finck             t = t->kid;
520c2c66affSColin Finck             op = t->op;
521c2c66affSColin Finck             assert(op < REOP_LIMIT);
522c2c66affSColin Finck             continue;
523c2c66affSColin Finck 
524c2c66affSColin Finck           case REOP_JUMP:
525c2c66affSColin Finck             emitStateSP->nextTermFixup = pc;    /* offset to following term */
526c2c66affSColin Finck             pc += OFFSET_LEN;
527c2c66affSColin Finck             if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
528c2c66affSColin Finck                 goto jump_too_big;
529c2c66affSColin Finck             emitStateSP->continueOp = REOP_ENDALT;
530c2c66affSColin Finck             ++emitStateSP;
531c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
532c2c66affSColin Finck             t = t->u.kid2;
533c2c66affSColin Finck             op = t->op;
534c2c66affSColin Finck             assert(op < REOP_LIMIT);
535c2c66affSColin Finck             continue;
536c2c66affSColin Finck 
537c2c66affSColin Finck           case REOP_ENDALT:
538c2c66affSColin Finck             /*
539c2c66affSColin Finck              * If we already patched emitStateSP->nextTermFixup to jump to
540c2c66affSColin Finck              * a nearer jump, to avoid 16-bit immediate offset overflow, we
541c2c66affSColin Finck              * are done here.
542c2c66affSColin Finck              */
543c2c66affSColin Finck             if (emitStateSP->jumpToJumpFlag)
544c2c66affSColin Finck                 break;
545c2c66affSColin Finck 
546c2c66affSColin Finck             /*
547c2c66affSColin Finck              * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
548c2c66affSColin Finck              * REOP_ENDALT is executed only on successful match of the last
549c2c66affSColin Finck              * alternate in a group.
550c2c66affSColin Finck              */
551c2c66affSColin Finck             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
552c2c66affSColin Finck                 goto jump_too_big;
553c2c66affSColin Finck             if (t->op != REOP_ALT) {
554c2c66affSColin Finck                 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
555c2c66affSColin Finck                     goto jump_too_big;
556c2c66affSColin Finck             }
557c2c66affSColin Finck 
558c2c66affSColin Finck             /*
559c2c66affSColin Finck              * If the program is bigger than the REOP_JUMP offset range, then
560c2c66affSColin Finck              * we must check for alternates before this one that are part of
561c2c66affSColin Finck              * the same group, and fix up their jump offsets to target jumps
562c2c66affSColin Finck              * close enough to fit in a 16-bit unsigned offset immediate.
563c2c66affSColin Finck              */
564c2c66affSColin Finck             if ((size_t)(pc - re->program) > OFFSET_MAX &&
565c2c66affSColin Finck                 emitStateSP > emitStateStack) {
566c2c66affSColin Finck                 EmitStateStackEntry *esp, *esp2;
567c2c66affSColin Finck                 jsbytecode *alt, *jump;
568c2c66affSColin Finck                 ptrdiff_t span, header;
569c2c66affSColin Finck 
570c2c66affSColin Finck                 esp2 = emitStateSP;
571c2c66affSColin Finck                 alt = esp2->altHead;
572c2c66affSColin Finck                 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
573c2c66affSColin Finck                     if (esp->continueOp == REOP_ENDALT &&
574c2c66affSColin Finck                         !esp->jumpToJumpFlag &&
575c2c66affSColin Finck                         esp->nextTermFixup + OFFSET_LEN == alt &&
576c2c66affSColin Finck                         (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
577c2c66affSColin Finck                                        ? esp->endTermFixup
578c2c66affSColin Finck                                        : esp->nextTermFixup)) > OFFSET_MAX) {
579c2c66affSColin Finck                         alt = esp->altHead;
580c2c66affSColin Finck                         jump = esp->nextTermFixup;
581c2c66affSColin Finck 
582c2c66affSColin Finck                         /*
583c2c66affSColin Finck                          * The span must be 1 less than the distance from
584c2c66affSColin Finck                          * jump offset to jump offset, so we actually jump
585c2c66affSColin Finck                          * to a REOP_JUMP bytecode, not to its offset!
586c2c66affSColin Finck                          */
587c2c66affSColin Finck                         for (;;) {
588c2c66affSColin Finck                             assert(jump < esp2->nextTermFixup);
589c2c66affSColin Finck                             span = esp2->nextTermFixup - jump - 1;
590c2c66affSColin Finck                             if ((size_t)span <= OFFSET_MAX)
591c2c66affSColin Finck                                 break;
592c2c66affSColin Finck                             do {
593c2c66affSColin Finck                                 if (--esp2 == esp)
594c2c66affSColin Finck                                     goto jump_too_big;
595c2c66affSColin Finck                             } while (esp2->continueOp != REOP_ENDALT);
596c2c66affSColin Finck                         }
597c2c66affSColin Finck 
598c2c66affSColin Finck                         jump[0] = JUMP_OFFSET_HI(span);
599c2c66affSColin Finck                         jump[1] = JUMP_OFFSET_LO(span);
600c2c66affSColin Finck 
601c2c66affSColin Finck                         if (esp->continueNode->op != REOP_ALT) {
602c2c66affSColin Finck                             /*
603c2c66affSColin Finck                              * We must patch the offset at esp->endTermFixup
604c2c66affSColin Finck                              * as well, for the REOP_ALTPREREQ{,2} opcodes.
605c2c66affSColin Finck                              * If we're unlucky and endTermFixup is more than
606c2c66affSColin Finck                              * OFFSET_MAX bytes from its target, we cheat by
607c2c66affSColin Finck                              * jumping 6 bytes to the jump whose offset is at
608c2c66affSColin Finck                              * esp->nextTermFixup, which has the same target.
609c2c66affSColin Finck                              */
610c2c66affSColin Finck                             jump = esp->endTermFixup;
611c2c66affSColin Finck                             header = esp->nextTermFixup - jump;
612c2c66affSColin Finck                             span += header;
613c2c66affSColin Finck                             if ((size_t)span > OFFSET_MAX)
614c2c66affSColin Finck                                 span = header;
615c2c66affSColin Finck 
616c2c66affSColin Finck                             jump[0] = JUMP_OFFSET_HI(span);
617c2c66affSColin Finck                             jump[1] = JUMP_OFFSET_LO(span);
618c2c66affSColin Finck                         }
619c2c66affSColin Finck 
620c2c66affSColin Finck                         esp->jumpToJumpFlag = TRUE;
621c2c66affSColin Finck                     }
622c2c66affSColin Finck                 }
623c2c66affSColin Finck             }
624c2c66affSColin Finck             break;
625c2c66affSColin Finck 
626c2c66affSColin Finck           case REOP_ALT:
627c2c66affSColin Finck             assert(emitStateSP);
628c2c66affSColin Finck             emitStateSP->altHead = pc - 1;
629c2c66affSColin Finck             emitStateSP->nextAltFixup = pc;     /* offset to next alternate */
630c2c66affSColin Finck             pc += OFFSET_LEN;
631c2c66affSColin Finck             emitStateSP->continueNode = t;
632c2c66affSColin Finck             emitStateSP->continueOp = REOP_JUMP;
633c2c66affSColin Finck             emitStateSP->jumpToJumpFlag = FALSE;
634c2c66affSColin Finck             ++emitStateSP;
635c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
636c2c66affSColin Finck             t = t->kid;
637c2c66affSColin Finck             op = t->op;
638c2c66affSColin Finck             assert(op < REOP_LIMIT);
639c2c66affSColin Finck             continue;
640c2c66affSColin Finck 
641c2c66affSColin Finck           case REOP_FLAT:
642c2c66affSColin Finck             /*
643c2c66affSColin Finck              * Coalesce FLATs if possible and if it would not increase bytecode
644c2c66affSColin Finck              * beyond preallocated limit. The latter happens only when bytecode
645c2c66affSColin Finck              * size for coalesced string with offset p and length 2 exceeds 6
646c2c66affSColin Finck              * bytes preallocated for 2 single char nodes, i.e. when
647c2c66affSColin Finck              * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
648c2c66affSColin Finck              * GetCompactIndexWidth(p) > 4.
649c2c66affSColin Finck              * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
650c2c66affSColin Finck              * nodes strictly decreases bytecode size, the check has to be
651c2c66affSColin Finck              * done only for the first coalescing.
652c2c66affSColin Finck              */
653c2c66affSColin Finck             if (t->kid &&
654c2c66affSColin Finck                 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
655c2c66affSColin Finck             {
656c2c66affSColin Finck                 while (t->next &&
657c2c66affSColin Finck                        t->next->op == REOP_FLAT &&
658c2c66affSColin Finck                        (WCHAR*)t->kid + t->u.flat.length ==
659c2c66affSColin Finck                        t->next->kid) {
660c2c66affSColin Finck                     t->u.flat.length += t->next->u.flat.length;
661c2c66affSColin Finck                     t->next = t->next->next;
662c2c66affSColin Finck                 }
663c2c66affSColin Finck             }
664c2c66affSColin Finck             if (t->kid && t->u.flat.length > 1) {
665c2c66affSColin Finck                 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
666c2c66affSColin Finck                 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
667c2c66affSColin Finck                 pc = WriteCompactIndex(pc, t->u.flat.length);
668c2c66affSColin Finck             } else if (t->u.flat.chr < 256) {
669c2c66affSColin Finck                 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
670c2c66affSColin Finck                 *pc++ = (jsbytecode) t->u.flat.chr;
671c2c66affSColin Finck             } else {
672c2c66affSColin Finck                 pc[-1] = (state->flags & REG_FOLD)
673c2c66affSColin Finck                          ? REOP_UCFLAT1i
674c2c66affSColin Finck                          : REOP_UCFLAT1;
675c2c66affSColin Finck                 SET_ARG(pc, t->u.flat.chr);
676c2c66affSColin Finck                 pc += ARG_LEN;
677c2c66affSColin Finck             }
678c2c66affSColin Finck             break;
679c2c66affSColin Finck 
680c2c66affSColin Finck           case REOP_LPAREN:
681c2c66affSColin Finck             assert(emitStateSP);
682c2c66affSColin Finck             pc = WriteCompactIndex(pc, t->u.parenIndex);
683c2c66affSColin Finck             emitStateSP->continueNode = t;
684c2c66affSColin Finck             emitStateSP->continueOp = REOP_RPAREN;
685c2c66affSColin Finck             ++emitStateSP;
686c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
687c2c66affSColin Finck             t = t->kid;
688c2c66affSColin Finck             op = t->op;
689c2c66affSColin Finck             continue;
690c2c66affSColin Finck 
691c2c66affSColin Finck           case REOP_RPAREN:
692c2c66affSColin Finck             pc = WriteCompactIndex(pc, t->u.parenIndex);
693c2c66affSColin Finck             break;
694c2c66affSColin Finck 
695c2c66affSColin Finck           case REOP_BACKREF:
696c2c66affSColin Finck             pc = WriteCompactIndex(pc, t->u.parenIndex);
697c2c66affSColin Finck             break;
698c2c66affSColin Finck 
699c2c66affSColin Finck           case REOP_ASSERT:
700c2c66affSColin Finck             assert(emitStateSP);
701c2c66affSColin Finck             emitStateSP->nextTermFixup = pc;
702c2c66affSColin Finck             pc += OFFSET_LEN;
703c2c66affSColin Finck             emitStateSP->continueNode = t;
704c2c66affSColin Finck             emitStateSP->continueOp = REOP_ASSERTTEST;
705c2c66affSColin Finck             ++emitStateSP;
706c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
707c2c66affSColin Finck             t = t->kid;
708c2c66affSColin Finck             op = t->op;
709c2c66affSColin Finck             continue;
710c2c66affSColin Finck 
711c2c66affSColin Finck           case REOP_ASSERTTEST:
712c2c66affSColin Finck           case REOP_ASSERTNOTTEST:
713c2c66affSColin Finck             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
714c2c66affSColin Finck                 goto jump_too_big;
715c2c66affSColin Finck             break;
716c2c66affSColin Finck 
717c2c66affSColin Finck           case REOP_ASSERT_NOT:
718c2c66affSColin Finck             assert(emitStateSP);
719c2c66affSColin Finck             emitStateSP->nextTermFixup = pc;
720c2c66affSColin Finck             pc += OFFSET_LEN;
721c2c66affSColin Finck             emitStateSP->continueNode = t;
722c2c66affSColin Finck             emitStateSP->continueOp = REOP_ASSERTNOTTEST;
723c2c66affSColin Finck             ++emitStateSP;
724c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
725c2c66affSColin Finck             t = t->kid;
726c2c66affSColin Finck             op = t->op;
727c2c66affSColin Finck             continue;
728c2c66affSColin Finck 
729c2c66affSColin Finck           case REOP_QUANT:
730c2c66affSColin Finck             assert(emitStateSP);
731c2c66affSColin Finck             if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
732c2c66affSColin Finck                 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
733c2c66affSColin Finck             } else if (t->u.range.min == 0 && t->u.range.max == 1) {
734c2c66affSColin Finck                 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
735c2c66affSColin Finck             } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
736c2c66affSColin Finck                 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
737c2c66affSColin Finck             } else {
738c2c66affSColin Finck                 if (!t->u.range.greedy)
739c2c66affSColin Finck                     pc[-1] = REOP_MINIMALQUANT;
740c2c66affSColin Finck                 pc = WriteCompactIndex(pc, t->u.range.min);
741c2c66affSColin Finck                 /*
742c2c66affSColin Finck                  * Write max + 1 to avoid using size_t(max) + 1 bytes
743c2c66affSColin Finck                  * for (UINT)-1 sentinel.
744c2c66affSColin Finck                  */
745c2c66affSColin Finck                 pc = WriteCompactIndex(pc, t->u.range.max + 1);
746c2c66affSColin Finck             }
747c2c66affSColin Finck             emitStateSP->nextTermFixup = pc;
748c2c66affSColin Finck             pc += OFFSET_LEN;
749c2c66affSColin Finck             emitStateSP->continueNode = t;
750c2c66affSColin Finck             emitStateSP->continueOp = REOP_ENDCHILD;
751c2c66affSColin Finck             ++emitStateSP;
752c2c66affSColin Finck             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
753c2c66affSColin Finck             t = t->kid;
754c2c66affSColin Finck             op = t->op;
755c2c66affSColin Finck             continue;
756c2c66affSColin Finck 
757c2c66affSColin Finck           case REOP_ENDCHILD:
758c2c66affSColin Finck             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
759c2c66affSColin Finck                 goto jump_too_big;
760c2c66affSColin Finck             break;
761c2c66affSColin Finck 
762c2c66affSColin Finck           case REOP_CLASS:
763c2c66affSColin Finck             if (!t->u.ucclass.sense)
764c2c66affSColin Finck                 pc[-1] = REOP_NCLASS;
765c2c66affSColin Finck             pc = WriteCompactIndex(pc, t->u.ucclass.index);
766c2c66affSColin Finck             charSet = &re->classList[t->u.ucclass.index];
767c2c66affSColin Finck             charSet->converted = FALSE;
768c2c66affSColin Finck             charSet->length = t->u.ucclass.bmsize;
769c2c66affSColin Finck             charSet->u.src.startIndex = t->u.ucclass.startIndex;
770c2c66affSColin Finck             charSet->u.src.length = t->u.ucclass.kidlen;
771c2c66affSColin Finck             charSet->sense = t->u.ucclass.sense;
772c2c66affSColin Finck             break;
773c2c66affSColin Finck 
774c2c66affSColin Finck           default:
775c2c66affSColin Finck             break;
776c2c66affSColin Finck         }
777c2c66affSColin Finck 
778c2c66affSColin Finck         t = t->next;
779c2c66affSColin Finck         if (t) {
780c2c66affSColin Finck             op = t->op;
781c2c66affSColin Finck         } else {
782c2c66affSColin Finck             if (emitStateSP == emitStateStack)
783c2c66affSColin Finck                 break;
784c2c66affSColin Finck             --emitStateSP;
785c2c66affSColin Finck             t = emitStateSP->continueNode;
786c2c66affSColin Finck             op = (REOp) emitStateSP->continueOp;
787c2c66affSColin Finck         }
788c2c66affSColin Finck     }
789c2c66affSColin Finck 
790c2c66affSColin Finck   cleanup:
791c2c66affSColin Finck     heap_free(emitStateStack);
792c2c66affSColin Finck     return pc;
793c2c66affSColin Finck 
794c2c66affSColin Finck   jump_too_big:
795c2c66affSColin Finck     ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
796c2c66affSColin Finck     pc = NULL;
797c2c66affSColin Finck     goto cleanup;
798c2c66affSColin Finck }
799c2c66affSColin Finck 
800c2c66affSColin Finck /*
801c2c66affSColin Finck  * Process the op against the two top operands, reducing them to a single
802c2c66affSColin Finck  * operand in the penultimate slot. Update progLength and treeDepth.
803c2c66affSColin Finck  */
804c2c66affSColin Finck static BOOL
ProcessOp(CompilerState * state,REOpData * opData,RENode ** operandStack,INT operandSP)805c2c66affSColin Finck ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
806c2c66affSColin Finck           INT operandSP)
807c2c66affSColin Finck {
808c2c66affSColin Finck     RENode *result;
809c2c66affSColin Finck 
810c2c66affSColin Finck     switch (opData->op) {
811c2c66affSColin Finck       case REOP_ALT:
812c2c66affSColin Finck         result = NewRENode(state, REOP_ALT);
813c2c66affSColin Finck         if (!result)
814c2c66affSColin Finck             return FALSE;
815c2c66affSColin Finck         result->kid = operandStack[operandSP - 2];
816c2c66affSColin Finck         result->u.kid2 = operandStack[operandSP - 1];
817c2c66affSColin Finck         operandStack[operandSP - 2] = result;
818c2c66affSColin Finck 
819c2c66affSColin Finck         if (state->treeDepth == TREE_DEPTH_MAX) {
820c2c66affSColin Finck             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
821c2c66affSColin Finck             return FALSE;
822c2c66affSColin Finck         }
823c2c66affSColin Finck         ++state->treeDepth;
824c2c66affSColin Finck 
825c2c66affSColin Finck         /*
826c2c66affSColin Finck          * Look at both alternates to see if there's a FLAT or a CLASS at
827c2c66affSColin Finck          * the start of each. If so, use a prerequisite match.
828c2c66affSColin Finck          */
829c2c66affSColin Finck         if (((RENode *) result->kid)->op == REOP_FLAT &&
830c2c66affSColin Finck             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
831c2c66affSColin Finck             (state->flags & REG_FOLD) == 0) {
832c2c66affSColin Finck             result->op = REOP_ALTPREREQ;
833c2c66affSColin Finck             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
834c2c66affSColin Finck             result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
835c2c66affSColin Finck             /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
836c2c66affSColin Finck                                             JUMP, <end> ... ENDALT */
837c2c66affSColin Finck             state->progLength += 13;
838c2c66affSColin Finck         }
839c2c66affSColin Finck         else
840c2c66affSColin Finck         if (((RENode *) result->kid)->op == REOP_CLASS &&
841c2c66affSColin Finck             ((RENode *) result->kid)->u.ucclass.index < 256 &&
842c2c66affSColin Finck             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
843c2c66affSColin Finck             (state->flags & REG_FOLD) == 0) {
844c2c66affSColin Finck             result->op = REOP_ALTPREREQ2;
845c2c66affSColin Finck             result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
846c2c66affSColin Finck             result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
847c2c66affSColin Finck             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
848c2c66affSColin Finck                                             JUMP, <end> ... ENDALT */
849c2c66affSColin Finck             state->progLength += 13;
850c2c66affSColin Finck         }
851c2c66affSColin Finck         else
852c2c66affSColin Finck         if (((RENode *) result->kid)->op == REOP_FLAT &&
853c2c66affSColin Finck             ((RENode *) result->u.kid2)->op == REOP_CLASS &&
854c2c66affSColin Finck             ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
855c2c66affSColin Finck             (state->flags & REG_FOLD) == 0) {
856c2c66affSColin Finck             result->op = REOP_ALTPREREQ2;
857c2c66affSColin Finck             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
858c2c66affSColin Finck             result->u.altprereq.ch2 =
859c2c66affSColin Finck                 ((RENode *) result->u.kid2)->u.ucclass.index;
860c2c66affSColin Finck             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
861c2c66affSColin Finck                                           JUMP, <end> ... ENDALT */
862c2c66affSColin Finck             state->progLength += 13;
863c2c66affSColin Finck         }
864c2c66affSColin Finck         else {
865c2c66affSColin Finck             /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
866c2c66affSColin Finck             state->progLength += 7;
867c2c66affSColin Finck         }
868c2c66affSColin Finck         break;
869c2c66affSColin Finck 
870c2c66affSColin Finck       case REOP_CONCAT:
871c2c66affSColin Finck         result = operandStack[operandSP - 2];
872c2c66affSColin Finck         while (result->next)
873c2c66affSColin Finck             result = result->next;
874c2c66affSColin Finck         result->next = operandStack[operandSP - 1];
875c2c66affSColin Finck         break;
876c2c66affSColin Finck 
877c2c66affSColin Finck       case REOP_ASSERT:
878c2c66affSColin Finck       case REOP_ASSERT_NOT:
879c2c66affSColin Finck       case REOP_LPARENNON:
880c2c66affSColin Finck       case REOP_LPAREN:
881c2c66affSColin Finck         /* These should have been processed by a close paren. */
882c2c66affSColin Finck         ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
883c2c66affSColin Finck                                 opData->errPos);
884c2c66affSColin Finck         return FALSE;
885c2c66affSColin Finck 
886c2c66affSColin Finck       default:;
887c2c66affSColin Finck     }
888c2c66affSColin Finck     return TRUE;
889c2c66affSColin Finck }
890c2c66affSColin Finck 
891c2c66affSColin Finck /*
892c2c66affSColin Finck  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
893c2c66affSColin Finck  * it being on the stack, and to propagate errors to its callers.
894c2c66affSColin Finck  */
895c2c66affSColin Finck #define JSREG_FIND_PAREN_COUNT  0x8000
896c2c66affSColin Finck #define JSREG_FIND_PAREN_ERROR  0x4000
897c2c66affSColin Finck 
898c2c66affSColin Finck /*
899c2c66affSColin Finck  * Magic return value from FindParenCount and GetDecimalValue, to indicate
900c2c66affSColin Finck  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
901c2c66affSColin Finck  * its findMax parameter is non-null.
902c2c66affSColin Finck  */
903c2c66affSColin Finck #define OVERFLOW_VALUE          ((UINT)-1)
904c2c66affSColin Finck 
905c2c66affSColin Finck static UINT
FindParenCount(CompilerState * state)906c2c66affSColin Finck FindParenCount(CompilerState *state)
907c2c66affSColin Finck {
908c2c66affSColin Finck     CompilerState temp;
909c2c66affSColin Finck     int i;
910c2c66affSColin Finck 
911c2c66affSColin Finck     if (state->flags & JSREG_FIND_PAREN_COUNT)
912c2c66affSColin Finck         return OVERFLOW_VALUE;
913c2c66affSColin Finck 
914c2c66affSColin Finck     /*
915c2c66affSColin Finck      * Copy state into temp, flag it so we never report an invalid backref,
916c2c66affSColin Finck      * and reset its members to parse the entire regexp.  This is obviously
917c2c66affSColin Finck      * suboptimal, but GetDecimalValue calls us only if a backref appears to
918c2c66affSColin Finck      * refer to a forward parenthetical, which is rare.
919c2c66affSColin Finck      */
920c2c66affSColin Finck     temp = *state;
921c2c66affSColin Finck     temp.flags |= JSREG_FIND_PAREN_COUNT;
922c2c66affSColin Finck     temp.cp = temp.cpbegin;
923c2c66affSColin Finck     temp.parenCount = 0;
924c2c66affSColin Finck     temp.classCount = 0;
925c2c66affSColin Finck     temp.progLength = 0;
926c2c66affSColin Finck     temp.treeDepth = 0;
927c2c66affSColin Finck     temp.classBitmapsMem = 0;
928c2c66affSColin Finck     for (i = 0; i < CLASS_CACHE_SIZE; i++)
929c2c66affSColin Finck         temp.classCache[i].start = NULL;
930c2c66affSColin Finck 
931c2c66affSColin Finck     if (!ParseRegExp(&temp)) {
932c2c66affSColin Finck         state->flags |= JSREG_FIND_PAREN_ERROR;
933c2c66affSColin Finck         return OVERFLOW_VALUE;
934c2c66affSColin Finck     }
935c2c66affSColin Finck     return temp.parenCount;
936c2c66affSColin Finck }
937c2c66affSColin Finck 
938c2c66affSColin Finck /*
939c2c66affSColin Finck  * Extract and return a decimal value at state->cp.  The initial character c
940c2c66affSColin Finck  * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
941c2c66affSColin Finck  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
942c2c66affSColin Finck  * state->flags to discover whether an error occurred under findMax.
943c2c66affSColin Finck  */
944c2c66affSColin Finck static UINT
GetDecimalValue(WCHAR c,UINT max,UINT (* findMax)(CompilerState * state),CompilerState * state)945c2c66affSColin Finck GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
946c2c66affSColin Finck                 CompilerState *state)
947c2c66affSColin Finck {
948c2c66affSColin Finck     UINT value = JS7_UNDEC(c);
949c2c66affSColin Finck     BOOL overflow = (value > max && (!findMax || value > findMax(state)));
950c2c66affSColin Finck 
951c2c66affSColin Finck     /* The following restriction allows simpler overflow checks. */
952c2c66affSColin Finck     assert(max <= ((UINT)-1 - 9) / 10);
953c2c66affSColin Finck     while (state->cp < state->cpend) {
954c2c66affSColin Finck         c = *state->cp;
955c2c66affSColin Finck         if (!JS7_ISDEC(c))
956c2c66affSColin Finck             break;
957c2c66affSColin Finck         value = 10 * value + JS7_UNDEC(c);
958c2c66affSColin Finck         if (!overflow && value > max && (!findMax || value > findMax(state)))
959c2c66affSColin Finck             overflow = TRUE;
960c2c66affSColin Finck         ++state->cp;
961c2c66affSColin Finck     }
962c2c66affSColin Finck     return overflow ? OVERFLOW_VALUE : value;
963c2c66affSColin Finck }
964c2c66affSColin Finck 
965c2c66affSColin Finck /*
966c2c66affSColin Finck  * Calculate the total size of the bitmap required for a class expression.
967c2c66affSColin Finck  */
968c2c66affSColin Finck static BOOL
CalculateBitmapSize(CompilerState * state,RENode * target,const WCHAR * src,const WCHAR * end)969c2c66affSColin Finck CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
970c2c66affSColin Finck                     const WCHAR *end)
971c2c66affSColin Finck {
972c2c66affSColin Finck     UINT max = 0;
973c2c66affSColin Finck     BOOL inRange = FALSE;
974c2c66affSColin Finck     WCHAR c, rangeStart = 0;
975c2c66affSColin Finck     UINT n, digit, nDigits, i;
976c2c66affSColin Finck 
977c2c66affSColin Finck     target->u.ucclass.bmsize = 0;
978c2c66affSColin Finck     target->u.ucclass.sense = TRUE;
979c2c66affSColin Finck 
980c2c66affSColin Finck     if (src == end)
981c2c66affSColin Finck         return TRUE;
982c2c66affSColin Finck 
983c2c66affSColin Finck     if (*src == '^') {
984c2c66affSColin Finck         ++src;
985c2c66affSColin Finck         target->u.ucclass.sense = FALSE;
986c2c66affSColin Finck     }
987c2c66affSColin Finck 
988c2c66affSColin Finck     while (src != end) {
989c2c66affSColin Finck         BOOL canStartRange = TRUE;
990c2c66affSColin Finck         UINT localMax = 0;
991c2c66affSColin Finck 
992c2c66affSColin Finck         switch (*src) {
993c2c66affSColin Finck           case '\\':
994c2c66affSColin Finck             ++src;
995c2c66affSColin Finck             c = *src++;
996c2c66affSColin Finck             switch (c) {
997c2c66affSColin Finck               case 'b':
998c2c66affSColin Finck                 localMax = 0x8;
999c2c66affSColin Finck                 break;
1000c2c66affSColin Finck               case 'f':
1001c2c66affSColin Finck                 localMax = 0xC;
1002c2c66affSColin Finck                 break;
1003c2c66affSColin Finck               case 'n':
1004c2c66affSColin Finck                 localMax = 0xA;
1005c2c66affSColin Finck                 break;
1006c2c66affSColin Finck               case 'r':
1007c2c66affSColin Finck                 localMax = 0xD;
1008c2c66affSColin Finck                 break;
1009c2c66affSColin Finck               case 't':
1010c2c66affSColin Finck                 localMax = 0x9;
1011c2c66affSColin Finck                 break;
1012c2c66affSColin Finck               case 'v':
1013c2c66affSColin Finck                 localMax = 0xB;
1014c2c66affSColin Finck                 break;
1015c2c66affSColin Finck               case 'c':
1016c2c66affSColin Finck                 if (src < end && RE_IS_LETTER(*src)) {
1017c2c66affSColin Finck                     localMax = (UINT) (*src++) & 0x1F;
1018c2c66affSColin Finck                 } else {
1019c2c66affSColin Finck                     --src;
1020c2c66affSColin Finck                     localMax = '\\';
1021c2c66affSColin Finck                 }
1022c2c66affSColin Finck                 break;
1023c2c66affSColin Finck               case 'x':
1024c2c66affSColin Finck                 nDigits = 2;
1025c2c66affSColin Finck                 goto lexHex;
1026c2c66affSColin Finck               case 'u':
1027c2c66affSColin Finck                 nDigits = 4;
1028c2c66affSColin Finck lexHex:
1029c2c66affSColin Finck                 n = 0;
1030c2c66affSColin Finck                 for (i = 0; (i < nDigits) && (src < end); i++) {
1031c2c66affSColin Finck                     c = *src++;
1032c2c66affSColin Finck                     if (!isASCIIHexDigit(c, &digit)) {
1033c2c66affSColin Finck                         /*
1034c2c66affSColin Finck                          * Back off to accepting the original
1035c2c66affSColin Finck                          *'\' as a literal.
1036c2c66affSColin Finck                          */
1037c2c66affSColin Finck                         src -= i + 1;
1038c2c66affSColin Finck                         n = '\\';
1039c2c66affSColin Finck                         break;
1040c2c66affSColin Finck                     }
1041c2c66affSColin Finck                     n = (n << 4) | digit;
1042c2c66affSColin Finck                 }
1043c2c66affSColin Finck                 localMax = n;
1044c2c66affSColin Finck                 break;
1045c2c66affSColin Finck               case 'd':
1046c2c66affSColin Finck                 canStartRange = FALSE;
1047c2c66affSColin Finck                 if (inRange) {
1048c2c66affSColin Finck                     JS_ReportErrorNumber(state->context,
1049c2c66affSColin Finck                                          js_GetErrorMessage, NULL,
1050c2c66affSColin Finck                                          JSMSG_BAD_CLASS_RANGE);
1051c2c66affSColin Finck                     return FALSE;
1052c2c66affSColin Finck                 }
1053c2c66affSColin Finck                 localMax = '9';
1054c2c66affSColin Finck                 break;
1055c2c66affSColin Finck               case 'D':
1056c2c66affSColin Finck               case 's':
1057c2c66affSColin Finck               case 'S':
1058c2c66affSColin Finck               case 'w':
1059c2c66affSColin Finck               case 'W':
1060c2c66affSColin Finck                 canStartRange = FALSE;
1061c2c66affSColin Finck                 if (inRange) {
1062c2c66affSColin Finck                     JS_ReportErrorNumber(state->context,
1063c2c66affSColin Finck                                          js_GetErrorMessage, NULL,
1064c2c66affSColin Finck                                          JSMSG_BAD_CLASS_RANGE);
1065c2c66affSColin Finck                     return FALSE;
1066c2c66affSColin Finck                 }
1067c2c66affSColin Finck                 max = 65535;
1068c2c66affSColin Finck 
1069c2c66affSColin Finck                 /*
1070c2c66affSColin Finck                  * If this is the start of a range, ensure that it's less than
1071c2c66affSColin Finck                  * the end.
1072c2c66affSColin Finck                  */
1073c2c66affSColin Finck                 localMax = 0;
1074c2c66affSColin Finck                 break;
1075c2c66affSColin Finck               case '0':
1076c2c66affSColin Finck               case '1':
1077c2c66affSColin Finck               case '2':
1078c2c66affSColin Finck               case '3':
1079c2c66affSColin Finck               case '4':
1080c2c66affSColin Finck               case '5':
1081c2c66affSColin Finck               case '6':
1082c2c66affSColin Finck               case '7':
1083c2c66affSColin Finck                 /*
1084c2c66affSColin Finck                  *  This is a non-ECMA extension - decimal escapes (in this
1085c2c66affSColin Finck                  *  case, octal!) are supposed to be an error inside class
1086c2c66affSColin Finck                  *  ranges, but supported here for backwards compatibility.
1087c2c66affSColin Finck                  *
1088c2c66affSColin Finck                  */
1089c2c66affSColin Finck                 n = JS7_UNDEC(c);
1090c2c66affSColin Finck                 c = *src;
1091c2c66affSColin Finck                 if ('0' <= c && c <= '7') {
1092c2c66affSColin Finck                     src++;
1093c2c66affSColin Finck                     n = 8 * n + JS7_UNDEC(c);
1094c2c66affSColin Finck                     c = *src;
1095c2c66affSColin Finck                     if ('0' <= c && c <= '7') {
1096c2c66affSColin Finck                         src++;
1097c2c66affSColin Finck                         i = 8 * n + JS7_UNDEC(c);
1098c2c66affSColin Finck                         if (i <= 0377)
1099c2c66affSColin Finck                             n = i;
1100c2c66affSColin Finck                         else
1101c2c66affSColin Finck                             src--;
1102c2c66affSColin Finck                     }
1103c2c66affSColin Finck                 }
1104c2c66affSColin Finck                 localMax = n;
1105c2c66affSColin Finck                 break;
1106c2c66affSColin Finck 
1107c2c66affSColin Finck               default:
1108c2c66affSColin Finck                 localMax = c;
1109c2c66affSColin Finck                 break;
1110c2c66affSColin Finck             }
1111c2c66affSColin Finck             break;
1112c2c66affSColin Finck           default:
1113c2c66affSColin Finck             localMax = *src++;
1114c2c66affSColin Finck             break;
1115c2c66affSColin Finck         }
1116c2c66affSColin Finck 
1117c2c66affSColin Finck         if (inRange) {
1118c2c66affSColin Finck             /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1119c2c66affSColin Finck             if (rangeStart > localMax) {
1120c2c66affSColin Finck                 JS_ReportErrorNumber(state->context,
1121c2c66affSColin Finck                                      js_GetErrorMessage, NULL,
1122c2c66affSColin Finck                                      JSMSG_BAD_CLASS_RANGE);
1123c2c66affSColin Finck                 return FALSE;
1124c2c66affSColin Finck             }
1125c2c66affSColin Finck             inRange = FALSE;
1126c2c66affSColin Finck         } else {
1127c2c66affSColin Finck             if (canStartRange && src < end - 1) {
1128c2c66affSColin Finck                 if (*src == '-') {
1129c2c66affSColin Finck                     ++src;
1130c2c66affSColin Finck                     inRange = TRUE;
1131c2c66affSColin Finck                     rangeStart = (WCHAR)localMax;
1132c2c66affSColin Finck                     continue;
1133c2c66affSColin Finck                 }
1134c2c66affSColin Finck             }
1135c2c66affSColin Finck             if (state->flags & REG_FOLD)
1136c2c66affSColin Finck                 rangeStart = localMax;   /* one run of the uc/dc loop below */
1137c2c66affSColin Finck         }
1138c2c66affSColin Finck 
1139c2c66affSColin Finck         if (state->flags & REG_FOLD) {
1140c2c66affSColin Finck             WCHAR maxch = localMax;
1141c2c66affSColin Finck 
1142c2c66affSColin Finck             for (i = rangeStart; i <= localMax; i++) {
1143c2c66affSColin Finck                 WCHAR uch, dch;
1144c2c66affSColin Finck 
1145*3e2d6582SAmine Khaldi                 uch = towupper(i);
1146*3e2d6582SAmine Khaldi                 dch = towlower(i);
1147c2c66affSColin Finck                 if(maxch < uch)
1148c2c66affSColin Finck                     maxch = uch;
1149c2c66affSColin Finck                 if(maxch < dch)
1150c2c66affSColin Finck                     maxch = dch;
1151c2c66affSColin Finck             }
1152c2c66affSColin Finck             localMax = maxch;
1153c2c66affSColin Finck         }
1154c2c66affSColin Finck 
1155c2c66affSColin Finck         if (localMax > max)
1156c2c66affSColin Finck             max = localMax;
1157c2c66affSColin Finck     }
1158c2c66affSColin Finck     target->u.ucclass.bmsize = max;
1159c2c66affSColin Finck     return TRUE;
1160c2c66affSColin Finck }
1161c2c66affSColin Finck 
1162c2c66affSColin Finck static INT
ParseMinMaxQuantifier(CompilerState * state,BOOL ignoreValues)1163c2c66affSColin Finck ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1164c2c66affSColin Finck {
1165c2c66affSColin Finck     UINT min, max;
1166c2c66affSColin Finck     WCHAR c;
1167c2c66affSColin Finck     const WCHAR *errp = state->cp++;
1168c2c66affSColin Finck 
1169c2c66affSColin Finck     c = *state->cp;
1170c2c66affSColin Finck     if (JS7_ISDEC(c)) {
1171c2c66affSColin Finck         ++state->cp;
1172c2c66affSColin Finck         min = GetDecimalValue(c, 0xFFFF, NULL, state);
1173c2c66affSColin Finck         c = *state->cp;
1174c2c66affSColin Finck 
1175c2c66affSColin Finck         if (!ignoreValues && min == OVERFLOW_VALUE)
1176c2c66affSColin Finck             return JSMSG_MIN_TOO_BIG;
1177c2c66affSColin Finck 
1178c2c66affSColin Finck         if (c == ',') {
1179c2c66affSColin Finck             c = *++state->cp;
1180c2c66affSColin Finck             if (JS7_ISDEC(c)) {
1181c2c66affSColin Finck                 ++state->cp;
1182c2c66affSColin Finck                 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1183c2c66affSColin Finck                 c = *state->cp;
1184c2c66affSColin Finck                 if (!ignoreValues && max == OVERFLOW_VALUE)
1185c2c66affSColin Finck                     return JSMSG_MAX_TOO_BIG;
1186c2c66affSColin Finck                 if (!ignoreValues && min > max)
1187c2c66affSColin Finck                     return JSMSG_OUT_OF_ORDER;
1188c2c66affSColin Finck             } else {
1189c2c66affSColin Finck                 max = (UINT)-1;
1190c2c66affSColin Finck             }
1191c2c66affSColin Finck         } else {
1192c2c66affSColin Finck             max = min;
1193c2c66affSColin Finck         }
1194c2c66affSColin Finck         if (c == '}') {
1195c2c66affSColin Finck             state->result = NewRENode(state, REOP_QUANT);
1196c2c66affSColin Finck             if (!state->result)
1197c2c66affSColin Finck                 return JSMSG_OUT_OF_MEMORY;
1198c2c66affSColin Finck             state->result->u.range.min = min;
1199c2c66affSColin Finck             state->result->u.range.max = max;
1200c2c66affSColin Finck             /*
1201c2c66affSColin Finck              * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1202c2c66affSColin Finck              * where <max> is written as compact(max+1) to make
1203c2c66affSColin Finck              * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1204c2c66affSColin Finck              */
1205c2c66affSColin Finck             state->progLength += (1 + GetCompactIndexWidth(min)
1206c2c66affSColin Finck                                   + GetCompactIndexWidth(max + 1)
1207c2c66affSColin Finck                                   +3);
1208c2c66affSColin Finck             return 0;
1209c2c66affSColin Finck         }
1210c2c66affSColin Finck     }
1211c2c66affSColin Finck 
1212c2c66affSColin Finck     state->cp = errp;
1213c2c66affSColin Finck     return -1;
1214c2c66affSColin Finck }
1215c2c66affSColin Finck 
1216c2c66affSColin Finck static BOOL
ParseQuantifier(CompilerState * state)1217c2c66affSColin Finck ParseQuantifier(CompilerState *state)
1218c2c66affSColin Finck {
1219c2c66affSColin Finck     RENode *term;
1220c2c66affSColin Finck     term = state->result;
1221c2c66affSColin Finck     if (state->cp < state->cpend) {
1222c2c66affSColin Finck         switch (*state->cp) {
1223c2c66affSColin Finck           case '+':
1224c2c66affSColin Finck             state->result = NewRENode(state, REOP_QUANT);
1225c2c66affSColin Finck             if (!state->result)
1226c2c66affSColin Finck                 return FALSE;
1227c2c66affSColin Finck             state->result->u.range.min = 1;
1228c2c66affSColin Finck             state->result->u.range.max = (UINT)-1;
1229c2c66affSColin Finck             /* <PLUS>, <next> ... <ENDCHILD> */
1230c2c66affSColin Finck             state->progLength += 4;
1231c2c66affSColin Finck             goto quantifier;
1232c2c66affSColin Finck           case '*':
1233c2c66affSColin Finck             state->result = NewRENode(state, REOP_QUANT);
1234c2c66affSColin Finck             if (!state->result)
1235c2c66affSColin Finck                 return FALSE;
1236c2c66affSColin Finck             state->result->u.range.min = 0;
1237c2c66affSColin Finck             state->result->u.range.max = (UINT)-1;
1238c2c66affSColin Finck             /* <STAR>, <next> ... <ENDCHILD> */
1239c2c66affSColin Finck             state->progLength += 4;
1240c2c66affSColin Finck             goto quantifier;
1241c2c66affSColin Finck           case '?':
1242c2c66affSColin Finck             state->result = NewRENode(state, REOP_QUANT);
1243c2c66affSColin Finck             if (!state->result)
1244c2c66affSColin Finck                 return FALSE;
1245c2c66affSColin Finck             state->result->u.range.min = 0;
1246c2c66affSColin Finck             state->result->u.range.max = 1;
1247c2c66affSColin Finck             /* <OPT>, <next> ... <ENDCHILD> */
1248c2c66affSColin Finck             state->progLength += 4;
1249c2c66affSColin Finck             goto quantifier;
1250c2c66affSColin Finck           case '{':       /* balance '}' */
1251c2c66affSColin Finck           {
1252c2c66affSColin Finck             INT err;
1253c2c66affSColin Finck 
1254c2c66affSColin Finck             err = ParseMinMaxQuantifier(state, FALSE);
1255c2c66affSColin Finck             if (err == 0)
1256c2c66affSColin Finck                 goto quantifier;
1257c2c66affSColin Finck             if (err == -1)
1258c2c66affSColin Finck                 return TRUE;
1259c2c66affSColin Finck 
1260c2c66affSColin Finck             ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1261c2c66affSColin Finck             return FALSE;
1262c2c66affSColin Finck           }
1263c2c66affSColin Finck           default:;
1264c2c66affSColin Finck         }
1265c2c66affSColin Finck     }
1266c2c66affSColin Finck     return TRUE;
1267c2c66affSColin Finck 
1268c2c66affSColin Finck quantifier:
1269c2c66affSColin Finck     if (state->treeDepth == TREE_DEPTH_MAX) {
1270c2c66affSColin Finck         ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1271c2c66affSColin Finck         return FALSE;
1272c2c66affSColin Finck     }
1273c2c66affSColin Finck 
1274c2c66affSColin Finck     ++state->treeDepth;
1275c2c66affSColin Finck     ++state->cp;
1276c2c66affSColin Finck     state->result->kid = term;
1277c2c66affSColin Finck     if (state->cp < state->cpend && *state->cp == '?') {
1278c2c66affSColin Finck         ++state->cp;
1279c2c66affSColin Finck         state->result->u.range.greedy = FALSE;
1280c2c66affSColin Finck     } else {
1281c2c66affSColin Finck         state->result->u.range.greedy = TRUE;
1282c2c66affSColin Finck     }
1283c2c66affSColin Finck     return TRUE;
1284c2c66affSColin Finck }
1285c2c66affSColin Finck 
1286c2c66affSColin Finck /*
1287c2c66affSColin Finck  *  item:       assertion               An item is either an assertion or
1288c2c66affSColin Finck  *              quantatom               a quantified atom.
1289c2c66affSColin Finck  *
1290c2c66affSColin Finck  *  assertion:  '^'                     Assertions match beginning of string
1291c2c66affSColin Finck  *                                      (or line if the class static property
1292c2c66affSColin Finck  *                                      RegExp.multiline is true).
1293c2c66affSColin Finck  *              '$'                     End of string (or line if the class
1294c2c66affSColin Finck  *                                      static property RegExp.multiline is
1295c2c66affSColin Finck  *                                      true).
1296c2c66affSColin Finck  *              '\b'                    Word boundary (between \w and \W).
1297c2c66affSColin Finck  *              '\B'                    Word non-boundary.
1298c2c66affSColin Finck  *
1299c2c66affSColin Finck  *  quantatom:  atom                    An unquantified atom.
1300c2c66affSColin Finck  *              quantatom '{' n ',' m '}'
1301c2c66affSColin Finck  *                                      Atom must occur between n and m times.
1302c2c66affSColin Finck  *              quantatom '{' n ',' '}' Atom must occur at least n times.
1303c2c66affSColin Finck  *              quantatom '{' n '}'     Atom must occur exactly n times.
1304c2c66affSColin Finck  *              quantatom '*'           Zero or more times (same as {0,}).
1305c2c66affSColin Finck  *              quantatom '+'           One or more times (same as {1,}).
1306c2c66affSColin Finck  *              quantatom '?'           Zero or one time (same as {0,1}).
1307c2c66affSColin Finck  *
1308c2c66affSColin Finck  *              any of which can be optionally followed by '?' for ungreedy
1309c2c66affSColin Finck  *
1310c2c66affSColin Finck  *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
1311c2c66affSColin Finck  *                                      can be addressed using a backreference,
1312c2c66affSColin Finck  *                                      see '\' n below).
1313c2c66affSColin Finck  *              '.'                     Matches any char except '\n'.
1314c2c66affSColin Finck  *              '[' classlist ']'       A character class.
1315c2c66affSColin Finck  *              '[' '^' classlist ']'   A negated character class.
1316c2c66affSColin Finck  *              '\f'                    Form Feed.
1317c2c66affSColin Finck  *              '\n'                    Newline (Line Feed).
1318c2c66affSColin Finck  *              '\r'                    Carriage Return.
1319c2c66affSColin Finck  *              '\t'                    Horizontal Tab.
1320c2c66affSColin Finck  *              '\v'                    Vertical Tab.
1321c2c66affSColin Finck  *              '\d'                    A digit (same as [0-9]).
1322c2c66affSColin Finck  *              '\D'                    A non-digit.
1323c2c66affSColin Finck  *              '\w'                    A word character, [0-9a-z_A-Z].
1324c2c66affSColin Finck  *              '\W'                    A non-word character.
1325c2c66affSColin Finck  *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
1326c2c66affSColin Finck  *              '\S'                    A non-whitespace character.
1327c2c66affSColin Finck  *              '\' n                   A backreference to the nth (n decimal
1328c2c66affSColin Finck  *                                      and positive) parenthesized expression.
1329c2c66affSColin Finck  *              '\' octal               An octal escape sequence (octal must be
1330c2c66affSColin Finck  *                                      two or three digits long, unless it is
1331c2c66affSColin Finck  *                                      0 for the null character).
1332c2c66affSColin Finck  *              '\x' hex                A hex escape (hex must be two digits).
1333c2c66affSColin Finck  *              '\u' unicode            A unicode escape (must be four digits).
1334c2c66affSColin Finck  *              '\c' ctrl               A control character, ctrl is a letter.
1335c2c66affSColin Finck  *              '\' literalatomchar     Any character except one of the above
1336c2c66affSColin Finck  *                                      that follow '\' in an atom.
1337c2c66affSColin Finck  *              otheratomchar           Any character not first among the other
1338c2c66affSColin Finck  *                                      atom right-hand sides.
1339c2c66affSColin Finck  */
1340c2c66affSColin Finck static BOOL
ParseTerm(CompilerState * state)1341c2c66affSColin Finck ParseTerm(CompilerState *state)
1342c2c66affSColin Finck {
1343c2c66affSColin Finck     WCHAR c = *state->cp++;
1344c2c66affSColin Finck     UINT nDigits;
1345c2c66affSColin Finck     UINT num, tmp, n, i;
1346c2c66affSColin Finck     const WCHAR *termStart;
1347c2c66affSColin Finck 
1348c2c66affSColin Finck     switch (c) {
1349c2c66affSColin Finck     /* assertions and atoms */
1350c2c66affSColin Finck       case '^':
1351c2c66affSColin Finck         state->result = NewRENode(state, REOP_BOL);
1352c2c66affSColin Finck         if (!state->result)
1353c2c66affSColin Finck             return FALSE;
1354c2c66affSColin Finck         state->progLength++;
1355c2c66affSColin Finck         return TRUE;
1356c2c66affSColin Finck       case '$':
1357c2c66affSColin Finck         state->result = NewRENode(state, REOP_EOL);
1358c2c66affSColin Finck         if (!state->result)
1359c2c66affSColin Finck             return FALSE;
1360c2c66affSColin Finck         state->progLength++;
1361c2c66affSColin Finck         return TRUE;
1362c2c66affSColin Finck       case '\\':
1363c2c66affSColin Finck         if (state->cp >= state->cpend) {
1364c2c66affSColin Finck             /* a trailing '\' is an error */
1365c2c66affSColin Finck             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1366c2c66affSColin Finck             return FALSE;
1367c2c66affSColin Finck         }
1368c2c66affSColin Finck         c = *state->cp++;
1369c2c66affSColin Finck         switch (c) {
1370c2c66affSColin Finck         /* assertion escapes */
1371c2c66affSColin Finck           case 'b' :
1372c2c66affSColin Finck             state->result = NewRENode(state, REOP_WBDRY);
1373c2c66affSColin Finck             if (!state->result)
1374c2c66affSColin Finck                 return FALSE;
1375c2c66affSColin Finck             state->progLength++;
1376c2c66affSColin Finck             return TRUE;
1377c2c66affSColin Finck           case 'B':
1378c2c66affSColin Finck             state->result = NewRENode(state, REOP_WNONBDRY);
1379c2c66affSColin Finck             if (!state->result)
1380c2c66affSColin Finck                 return FALSE;
1381c2c66affSColin Finck             state->progLength++;
1382c2c66affSColin Finck             return TRUE;
1383c2c66affSColin Finck           /* Decimal escape */
1384c2c66affSColin Finck           case '0':
1385c2c66affSColin Finck               /* Give a strict warning. See also the note below. */
1386c2c66affSColin Finck               WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1387c2c66affSColin Finck      doOctal:
1388c2c66affSColin Finck             num = 0;
1389c2c66affSColin Finck             while (state->cp < state->cpend) {
1390c2c66affSColin Finck                 c = *state->cp;
1391c2c66affSColin Finck                 if (c < '0' || '7' < c)
1392c2c66affSColin Finck                     break;
1393c2c66affSColin Finck                 state->cp++;
1394c2c66affSColin Finck                 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1395c2c66affSColin Finck                 if (tmp > 0377)
1396c2c66affSColin Finck                     break;
1397c2c66affSColin Finck                 num = tmp;
1398c2c66affSColin Finck             }
1399c2c66affSColin Finck             c = (WCHAR)num;
1400c2c66affSColin Finck     doFlat:
1401c2c66affSColin Finck             state->result = NewRENode(state, REOP_FLAT);
1402c2c66affSColin Finck             if (!state->result)
1403c2c66affSColin Finck                 return FALSE;
1404c2c66affSColin Finck             state->result->u.flat.chr = c;
1405c2c66affSColin Finck             state->result->u.flat.length = 1;
1406c2c66affSColin Finck             state->progLength += 3;
1407c2c66affSColin Finck             break;
1408c2c66affSColin Finck           case '1':
1409c2c66affSColin Finck           case '2':
1410c2c66affSColin Finck           case '3':
1411c2c66affSColin Finck           case '4':
1412c2c66affSColin Finck           case '5':
1413c2c66affSColin Finck           case '6':
1414c2c66affSColin Finck           case '7':
1415c2c66affSColin Finck           case '8':
1416c2c66affSColin Finck           case '9':
1417c2c66affSColin Finck             termStart = state->cp - 1;
1418c2c66affSColin Finck             num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1419c2c66affSColin Finck             if (state->flags & JSREG_FIND_PAREN_ERROR)
1420c2c66affSColin Finck                 return FALSE;
1421c2c66affSColin Finck             if (num == OVERFLOW_VALUE) {
1422c2c66affSColin Finck                 /* Give a strict mode warning. */
1423c2c66affSColin Finck                 WARN("back-reference exceeds number of capturing parentheses\n");
1424c2c66affSColin Finck 
1425c2c66affSColin Finck                 /*
1426c2c66affSColin Finck                  * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1427c2c66affSColin Finck                  * error here. However, for compatibility with IE, we treat the
1428c2c66affSColin Finck                  * whole backref as flat if the first character in it is not a
1429c2c66affSColin Finck                  * valid octal character, and as an octal escape otherwise.
1430c2c66affSColin Finck                  */
1431c2c66affSColin Finck                 state->cp = termStart;
1432c2c66affSColin Finck                 if (c >= '8') {
1433c2c66affSColin Finck                     /* Treat this as flat. termStart - 1 is the \. */
1434c2c66affSColin Finck                     c = '\\';
1435c2c66affSColin Finck                     goto asFlat;
1436c2c66affSColin Finck                 }
1437c2c66affSColin Finck 
1438c2c66affSColin Finck                 /* Treat this as an octal escape. */
1439c2c66affSColin Finck                 goto doOctal;
1440c2c66affSColin Finck             }
1441c2c66affSColin Finck             assert(1 <= num && num <= 0x10000);
1442c2c66affSColin Finck             state->result = NewRENode(state, REOP_BACKREF);
1443c2c66affSColin Finck             if (!state->result)
1444c2c66affSColin Finck                 return FALSE;
1445c2c66affSColin Finck             state->result->u.parenIndex = num - 1;
1446c2c66affSColin Finck             state->progLength
1447c2c66affSColin Finck                 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1448c2c66affSColin Finck             break;
1449c2c66affSColin Finck           /* Control escape */
1450c2c66affSColin Finck           case 'f':
1451c2c66affSColin Finck             c = 0xC;
1452c2c66affSColin Finck             goto doFlat;
1453c2c66affSColin Finck           case 'n':
1454c2c66affSColin Finck             c = 0xA;
1455c2c66affSColin Finck             goto doFlat;
1456c2c66affSColin Finck           case 'r':
1457c2c66affSColin Finck             c = 0xD;
1458c2c66affSColin Finck             goto doFlat;
1459c2c66affSColin Finck           case 't':
1460c2c66affSColin Finck             c = 0x9;
1461c2c66affSColin Finck             goto doFlat;
1462c2c66affSColin Finck           case 'v':
1463c2c66affSColin Finck             c = 0xB;
1464c2c66affSColin Finck             goto doFlat;
1465c2c66affSColin Finck           /* Control letter */
1466c2c66affSColin Finck           case 'c':
1467c2c66affSColin Finck             if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1468c2c66affSColin Finck                 c = (WCHAR) (*state->cp++ & 0x1F);
1469c2c66affSColin Finck             } else {
1470c2c66affSColin Finck                 /* back off to accepting the original '\' as a literal */
1471c2c66affSColin Finck                 --state->cp;
1472c2c66affSColin Finck                 c = '\\';
1473c2c66affSColin Finck             }
1474c2c66affSColin Finck             goto doFlat;
1475c2c66affSColin Finck           /* HexEscapeSequence */
1476c2c66affSColin Finck           case 'x':
1477c2c66affSColin Finck             nDigits = 2;
1478c2c66affSColin Finck             goto lexHex;
1479c2c66affSColin Finck           /* UnicodeEscapeSequence */
1480c2c66affSColin Finck           case 'u':
1481c2c66affSColin Finck             nDigits = 4;
1482c2c66affSColin Finck lexHex:
1483c2c66affSColin Finck             n = 0;
1484c2c66affSColin Finck             for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1485c2c66affSColin Finck                 UINT digit;
1486c2c66affSColin Finck                 c = *state->cp++;
1487c2c66affSColin Finck                 if (!isASCIIHexDigit(c, &digit)) {
1488c2c66affSColin Finck                     /*
1489c2c66affSColin Finck                      * Back off to accepting the original 'u' or 'x' as a
1490c2c66affSColin Finck                      * literal.
1491c2c66affSColin Finck                      */
1492c2c66affSColin Finck                     state->cp -= i + 2;
1493c2c66affSColin Finck                     n = *state->cp++;
1494c2c66affSColin Finck                     break;
1495c2c66affSColin Finck                 }
1496c2c66affSColin Finck                 n = (n << 4) | digit;
1497c2c66affSColin Finck             }
1498c2c66affSColin Finck             c = (WCHAR) n;
1499c2c66affSColin Finck             goto doFlat;
1500c2c66affSColin Finck           /* Character class escapes */
1501c2c66affSColin Finck           case 'd':
1502c2c66affSColin Finck             state->result = NewRENode(state, REOP_DIGIT);
1503c2c66affSColin Finck doSimple:
1504c2c66affSColin Finck             if (!state->result)
1505c2c66affSColin Finck                 return FALSE;
1506c2c66affSColin Finck             state->progLength++;
1507c2c66affSColin Finck             break;
1508c2c66affSColin Finck           case 'D':
1509c2c66affSColin Finck             state->result = NewRENode(state, REOP_NONDIGIT);
1510c2c66affSColin Finck             goto doSimple;
1511c2c66affSColin Finck           case 's':
1512c2c66affSColin Finck             state->result = NewRENode(state, REOP_SPACE);
1513c2c66affSColin Finck             goto doSimple;
1514c2c66affSColin Finck           case 'S':
1515c2c66affSColin Finck             state->result = NewRENode(state, REOP_NONSPACE);
1516c2c66affSColin Finck             goto doSimple;
1517c2c66affSColin Finck           case 'w':
1518c2c66affSColin Finck             state->result = NewRENode(state, REOP_ALNUM);
1519c2c66affSColin Finck             goto doSimple;
1520c2c66affSColin Finck           case 'W':
1521c2c66affSColin Finck             state->result = NewRENode(state, REOP_NONALNUM);
1522c2c66affSColin Finck             goto doSimple;
1523c2c66affSColin Finck           /* IdentityEscape */
1524c2c66affSColin Finck           default:
1525c2c66affSColin Finck             state->result = NewRENode(state, REOP_FLAT);
1526c2c66affSColin Finck             if (!state->result)
1527c2c66affSColin Finck                 return FALSE;
1528c2c66affSColin Finck             state->result->u.flat.chr = c;
1529c2c66affSColin Finck             state->result->u.flat.length = 1;
1530c2c66affSColin Finck             state->result->kid = (void *) (state->cp - 1);
1531c2c66affSColin Finck             state->progLength += 3;
1532c2c66affSColin Finck             break;
1533c2c66affSColin Finck         }
1534c2c66affSColin Finck         break;
1535c2c66affSColin Finck       case '[':
1536c2c66affSColin Finck         state->result = NewRENode(state, REOP_CLASS);
1537c2c66affSColin Finck         if (!state->result)
1538c2c66affSColin Finck             return FALSE;
1539c2c66affSColin Finck         termStart = state->cp;
1540c2c66affSColin Finck         state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1541c2c66affSColin Finck         for (;;) {
1542c2c66affSColin Finck             if (state->cp == state->cpend) {
1543c2c66affSColin Finck                 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1544c2c66affSColin Finck                                         JSMSG_UNTERM_CLASS, termStart);
1545c2c66affSColin Finck 
1546c2c66affSColin Finck                 return FALSE;
1547c2c66affSColin Finck             }
1548c2c66affSColin Finck             if (*state->cp == '\\') {
1549c2c66affSColin Finck                 state->cp++;
1550c2c66affSColin Finck                 if (state->cp != state->cpend)
1551c2c66affSColin Finck                     state->cp++;
1552c2c66affSColin Finck                 continue;
1553c2c66affSColin Finck             }
1554c2c66affSColin Finck             if (*state->cp == ']') {
1555c2c66affSColin Finck                 state->result->u.ucclass.kidlen = state->cp - termStart;
1556c2c66affSColin Finck                 break;
1557c2c66affSColin Finck             }
1558c2c66affSColin Finck             state->cp++;
1559c2c66affSColin Finck         }
1560c2c66affSColin Finck         for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1561c2c66affSColin Finck             if (!state->classCache[i].start) {
1562c2c66affSColin Finck                 state->classCache[i].start = termStart;
1563c2c66affSColin Finck                 state->classCache[i].length = state->result->u.ucclass.kidlen;
1564c2c66affSColin Finck                 state->classCache[i].index = state->classCount;
1565c2c66affSColin Finck                 break;
1566c2c66affSColin Finck             }
1567c2c66affSColin Finck             if (state->classCache[i].length ==
1568c2c66affSColin Finck                 state->result->u.ucclass.kidlen) {
1569c2c66affSColin Finck                 for (n = 0; ; n++) {
1570c2c66affSColin Finck                     if (n == state->classCache[i].length) {
1571c2c66affSColin Finck                         state->result->u.ucclass.index
1572c2c66affSColin Finck                             = state->classCache[i].index;
1573c2c66affSColin Finck                         goto claim;
1574c2c66affSColin Finck                     }
1575c2c66affSColin Finck                     if (state->classCache[i].start[n] != termStart[n])
1576c2c66affSColin Finck                         break;
1577c2c66affSColin Finck                 }
1578c2c66affSColin Finck             }
1579c2c66affSColin Finck         }
1580c2c66affSColin Finck         state->result->u.ucclass.index = state->classCount++;
1581c2c66affSColin Finck 
1582c2c66affSColin Finck     claim:
1583c2c66affSColin Finck         /*
1584c2c66affSColin Finck          * Call CalculateBitmapSize now as we want any errors it finds
1585c2c66affSColin Finck          * to be reported during the parse phase, not at execution.
1586c2c66affSColin Finck          */
1587c2c66affSColin Finck         if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1588c2c66affSColin Finck             return FALSE;
1589c2c66affSColin Finck         /*
1590c2c66affSColin Finck          * Update classBitmapsMem with number of bytes to hold bmsize bits,
1591c2c66affSColin Finck          * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1592c2c66affSColin Finck          * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1593c2c66affSColin Finck          */
1594c2c66affSColin Finck         n = (state->result->u.ucclass.bmsize >> 3) + 1;
1595c2c66affSColin Finck         if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1596c2c66affSColin Finck             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1597c2c66affSColin Finck             return FALSE;
1598c2c66affSColin Finck         }
1599c2c66affSColin Finck         state->classBitmapsMem += n;
1600c2c66affSColin Finck         /* CLASS, <index> */
1601c2c66affSColin Finck         state->progLength
1602c2c66affSColin Finck             += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1603c2c66affSColin Finck         break;
1604c2c66affSColin Finck 
1605c2c66affSColin Finck       case '.':
1606c2c66affSColin Finck         state->result = NewRENode(state, REOP_DOT);
1607c2c66affSColin Finck         goto doSimple;
1608c2c66affSColin Finck 
1609c2c66affSColin Finck       case '{':
1610c2c66affSColin Finck       {
1611c2c66affSColin Finck         const WCHAR *errp = state->cp--;
1612c2c66affSColin Finck         INT err;
1613c2c66affSColin Finck 
1614c2c66affSColin Finck         err = ParseMinMaxQuantifier(state, TRUE);
1615c2c66affSColin Finck         state->cp = errp;
1616c2c66affSColin Finck 
1617c2c66affSColin Finck         if (err < 0)
1618c2c66affSColin Finck             goto asFlat;
1619c2c66affSColin Finck 
1620c2c66affSColin Finck         /* FALL THROUGH */
1621c2c66affSColin Finck       }
1622c2c66affSColin Finck       case '*':
1623c2c66affSColin Finck       case '+':
1624c2c66affSColin Finck       case '?':
1625c2c66affSColin Finck         ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1626c2c66affSColin Finck                                 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1627c2c66affSColin Finck         return FALSE;
1628c2c66affSColin Finck       default:
1629c2c66affSColin Finck asFlat:
1630c2c66affSColin Finck         state->result = NewRENode(state, REOP_FLAT);
1631c2c66affSColin Finck         if (!state->result)
1632c2c66affSColin Finck             return FALSE;
1633c2c66affSColin Finck         state->result->u.flat.chr = c;
1634c2c66affSColin Finck         state->result->u.flat.length = 1;
1635c2c66affSColin Finck         state->result->kid = (void *) (state->cp - 1);
1636c2c66affSColin Finck         state->progLength += 3;
1637c2c66affSColin Finck         break;
1638c2c66affSColin Finck     }
1639c2c66affSColin Finck     return ParseQuantifier(state);
1640c2c66affSColin Finck }
1641c2c66affSColin Finck 
1642c2c66affSColin Finck /*
1643c2c66affSColin Finck  * Top-down regular expression grammar, based closely on Perl4.
1644c2c66affSColin Finck  *
1645c2c66affSColin Finck  *  regexp:     altern                  A regular expression is one or more
1646c2c66affSColin Finck  *              altern '|' regexp       alternatives separated by vertical bar.
1647c2c66affSColin Finck  */
1648c2c66affSColin Finck #define INITIAL_STACK_SIZE  128
1649c2c66affSColin Finck 
1650c2c66affSColin Finck static BOOL
ParseRegExp(CompilerState * state)1651c2c66affSColin Finck ParseRegExp(CompilerState *state)
1652c2c66affSColin Finck {
1653c2c66affSColin Finck     size_t parenIndex;
1654c2c66affSColin Finck     RENode *operand;
1655c2c66affSColin Finck     REOpData *operatorStack;
1656c2c66affSColin Finck     RENode **operandStack;
1657c2c66affSColin Finck     REOp op;
1658c2c66affSColin Finck     INT i;
1659c2c66affSColin Finck     BOOL result = FALSE;
1660c2c66affSColin Finck 
1661c2c66affSColin Finck     INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1662c2c66affSColin Finck     INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1663c2c66affSColin Finck 
1664c2c66affSColin Finck     /* Watch out for empty regexp */
1665c2c66affSColin Finck     if (state->cp == state->cpend) {
1666c2c66affSColin Finck         state->result = NewRENode(state, REOP_EMPTY);
1667c2c66affSColin Finck         return (state->result != NULL);
1668c2c66affSColin Finck     }
1669c2c66affSColin Finck 
1670c2c66affSColin Finck     operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1671c2c66affSColin Finck     if (!operatorStack)
1672c2c66affSColin Finck         return FALSE;
1673c2c66affSColin Finck 
1674c2c66affSColin Finck     operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1675c2c66affSColin Finck     if (!operandStack)
1676c2c66affSColin Finck         goto out;
1677c2c66affSColin Finck 
1678c2c66affSColin Finck     for (;;) {
1679c2c66affSColin Finck         parenIndex = state->parenCount;
1680c2c66affSColin Finck         if (state->cp == state->cpend) {
1681c2c66affSColin Finck             /*
1682c2c66affSColin Finck              * If we are at the end of the regexp and we're short one or more
1683c2c66affSColin Finck              * operands, the regexp must have the form /x|/ or some such, with
1684c2c66affSColin Finck              * left parentheses making us short more than one operand.
1685c2c66affSColin Finck              */
1686c2c66affSColin Finck             if (operatorSP >= operandSP) {
1687c2c66affSColin Finck                 operand = NewRENode(state, REOP_EMPTY);
1688c2c66affSColin Finck                 if (!operand)
1689c2c66affSColin Finck                     goto out;
1690c2c66affSColin Finck                 goto pushOperand;
1691c2c66affSColin Finck             }
1692c2c66affSColin Finck         } else {
1693c2c66affSColin Finck             switch (*state->cp) {
1694c2c66affSColin Finck               case '(':
1695c2c66affSColin Finck                 ++state->cp;
1696c2c66affSColin Finck                 if (state->cp + 1 < state->cpend &&
1697c2c66affSColin Finck                     *state->cp == '?' &&
1698c2c66affSColin Finck                     (state->cp[1] == '=' ||
1699c2c66affSColin Finck                      state->cp[1] == '!' ||
1700c2c66affSColin Finck                      state->cp[1] == ':')) {
1701c2c66affSColin Finck                     switch (state->cp[1]) {
1702c2c66affSColin Finck                       case '=':
1703c2c66affSColin Finck                         op = REOP_ASSERT;
1704c2c66affSColin Finck                         /* ASSERT, <next>, ... ASSERTTEST */
1705c2c66affSColin Finck                         state->progLength += 4;
1706c2c66affSColin Finck                         break;
1707c2c66affSColin Finck                       case '!':
1708c2c66affSColin Finck                         op = REOP_ASSERT_NOT;
1709c2c66affSColin Finck                         /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1710c2c66affSColin Finck                         state->progLength += 4;
1711c2c66affSColin Finck                         break;
1712c2c66affSColin Finck                       default:
1713c2c66affSColin Finck                         op = REOP_LPARENNON;
1714c2c66affSColin Finck                         break;
1715c2c66affSColin Finck                     }
1716c2c66affSColin Finck                     state->cp += 2;
1717c2c66affSColin Finck                 } else {
1718c2c66affSColin Finck                     op = REOP_LPAREN;
1719c2c66affSColin Finck                     /* LPAREN, <index>, ... RPAREN, <index> */
1720c2c66affSColin Finck                     state->progLength
1721c2c66affSColin Finck                         += 2 * (1 + GetCompactIndexWidth(parenIndex));
1722c2c66affSColin Finck                     state->parenCount++;
1723c2c66affSColin Finck                     if (state->parenCount == 65535) {
1724c2c66affSColin Finck                         ReportRegExpError(state, JSREPORT_ERROR,
1725c2c66affSColin Finck                                           JSMSG_TOO_MANY_PARENS);
1726c2c66affSColin Finck                         goto out;
1727c2c66affSColin Finck                     }
1728c2c66affSColin Finck                 }
1729c2c66affSColin Finck                 goto pushOperator;
1730c2c66affSColin Finck 
1731c2c66affSColin Finck               case ')':
1732c2c66affSColin Finck                 /*
1733c2c66affSColin Finck                  * If there's no stacked open parenthesis, throw syntax error.
1734c2c66affSColin Finck                  */
1735c2c66affSColin Finck                 for (i = operatorSP - 1; ; i--) {
1736c2c66affSColin Finck                     if (i < 0) {
1737c2c66affSColin Finck                         ReportRegExpError(state, JSREPORT_ERROR,
1738c2c66affSColin Finck                                           JSMSG_UNMATCHED_RIGHT_PAREN);
1739c2c66affSColin Finck                         goto out;
1740c2c66affSColin Finck                     }
1741c2c66affSColin Finck                     if (operatorStack[i].op == REOP_ASSERT ||
1742c2c66affSColin Finck                         operatorStack[i].op == REOP_ASSERT_NOT ||
1743c2c66affSColin Finck                         operatorStack[i].op == REOP_LPARENNON ||
1744c2c66affSColin Finck                         operatorStack[i].op == REOP_LPAREN) {
1745c2c66affSColin Finck                         break;
1746c2c66affSColin Finck                     }
1747c2c66affSColin Finck                 }
1748c2c66affSColin Finck                 /* FALL THROUGH */
1749c2c66affSColin Finck 
1750c2c66affSColin Finck               case '|':
1751c2c66affSColin Finck                 /* Expected an operand before these, so make an empty one */
1752c2c66affSColin Finck                 operand = NewRENode(state, REOP_EMPTY);
1753c2c66affSColin Finck                 if (!operand)
1754c2c66affSColin Finck                     goto out;
1755c2c66affSColin Finck                 goto pushOperand;
1756c2c66affSColin Finck 
1757c2c66affSColin Finck               default:
1758c2c66affSColin Finck                 if (!ParseTerm(state))
1759c2c66affSColin Finck                     goto out;
1760c2c66affSColin Finck                 operand = state->result;
1761c2c66affSColin Finck pushOperand:
1762c2c66affSColin Finck                 if (operandSP == operandStackSize) {
1763c2c66affSColin Finck                     RENode **tmp;
1764c2c66affSColin Finck                     operandStackSize += operandStackSize;
1765c2c66affSColin Finck                     tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1766c2c66affSColin Finck                     if (!tmp)
1767c2c66affSColin Finck                         goto out;
1768c2c66affSColin Finck                     operandStack = tmp;
1769c2c66affSColin Finck                 }
1770c2c66affSColin Finck                 operandStack[operandSP++] = operand;
1771c2c66affSColin Finck                 break;
1772c2c66affSColin Finck             }
1773c2c66affSColin Finck         }
1774c2c66affSColin Finck 
1775c2c66affSColin Finck         /* At the end; process remaining operators. */
1776c2c66affSColin Finck restartOperator:
1777c2c66affSColin Finck         if (state->cp == state->cpend) {
1778c2c66affSColin Finck             while (operatorSP) {
1779c2c66affSColin Finck                 --operatorSP;
1780c2c66affSColin Finck                 if (!ProcessOp(state, &operatorStack[operatorSP],
1781c2c66affSColin Finck                                operandStack, operandSP))
1782c2c66affSColin Finck                     goto out;
1783c2c66affSColin Finck                 --operandSP;
1784c2c66affSColin Finck             }
1785c2c66affSColin Finck             assert(operandSP == 1);
1786c2c66affSColin Finck             state->result = operandStack[0];
1787c2c66affSColin Finck             result = TRUE;
1788c2c66affSColin Finck             goto out;
1789c2c66affSColin Finck         }
1790c2c66affSColin Finck 
1791c2c66affSColin Finck         switch (*state->cp) {
1792c2c66affSColin Finck           case '|':
1793c2c66affSColin Finck             /* Process any stacked 'concat' operators */
1794c2c66affSColin Finck             ++state->cp;
1795c2c66affSColin Finck             while (operatorSP &&
1796c2c66affSColin Finck                    operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1797c2c66affSColin Finck                 --operatorSP;
1798c2c66affSColin Finck                 if (!ProcessOp(state, &operatorStack[operatorSP],
1799c2c66affSColin Finck                                operandStack, operandSP)) {
1800c2c66affSColin Finck                     goto out;
1801c2c66affSColin Finck                 }
1802c2c66affSColin Finck                 --operandSP;
1803c2c66affSColin Finck             }
1804c2c66affSColin Finck             op = REOP_ALT;
1805c2c66affSColin Finck             goto pushOperator;
1806c2c66affSColin Finck 
1807c2c66affSColin Finck           case ')':
1808c2c66affSColin Finck             /*
1809c2c66affSColin Finck              * If there's no stacked open parenthesis, throw syntax error.
1810c2c66affSColin Finck              */
1811c2c66affSColin Finck             for (i = operatorSP - 1; ; i--) {
1812c2c66affSColin Finck                 if (i < 0) {
1813c2c66affSColin Finck                     ReportRegExpError(state, JSREPORT_ERROR,
1814c2c66affSColin Finck                                       JSMSG_UNMATCHED_RIGHT_PAREN);
1815c2c66affSColin Finck                     goto out;
1816c2c66affSColin Finck                 }
1817c2c66affSColin Finck                 if (operatorStack[i].op == REOP_ASSERT ||
1818c2c66affSColin Finck                     operatorStack[i].op == REOP_ASSERT_NOT ||
1819c2c66affSColin Finck                     operatorStack[i].op == REOP_LPARENNON ||
1820c2c66affSColin Finck                     operatorStack[i].op == REOP_LPAREN) {
1821c2c66affSColin Finck                     break;
1822c2c66affSColin Finck                 }
1823c2c66affSColin Finck             }
1824c2c66affSColin Finck             ++state->cp;
1825c2c66affSColin Finck 
1826c2c66affSColin Finck             /* Process everything on the stack until the open parenthesis. */
1827c2c66affSColin Finck             for (;;) {
1828c2c66affSColin Finck                 assert(operatorSP);
1829c2c66affSColin Finck                 --operatorSP;
1830c2c66affSColin Finck                 switch (operatorStack[operatorSP].op) {
1831c2c66affSColin Finck                   case REOP_ASSERT:
1832c2c66affSColin Finck                   case REOP_ASSERT_NOT:
1833c2c66affSColin Finck                   case REOP_LPAREN:
1834c2c66affSColin Finck                     operand = NewRENode(state, operatorStack[operatorSP].op);
1835c2c66affSColin Finck                     if (!operand)
1836c2c66affSColin Finck                         goto out;
1837c2c66affSColin Finck                     operand->u.parenIndex =
1838c2c66affSColin Finck                         operatorStack[operatorSP].parenIndex;
1839c2c66affSColin Finck                     assert(operandSP);
1840c2c66affSColin Finck                     operand->kid = operandStack[operandSP - 1];
1841c2c66affSColin Finck                     operandStack[operandSP - 1] = operand;
1842c2c66affSColin Finck                     if (state->treeDepth == TREE_DEPTH_MAX) {
1843c2c66affSColin Finck                         ReportRegExpError(state, JSREPORT_ERROR,
1844c2c66affSColin Finck                                           JSMSG_REGEXP_TOO_COMPLEX);
1845c2c66affSColin Finck                         goto out;
1846c2c66affSColin Finck                     }
1847c2c66affSColin Finck                     ++state->treeDepth;
1848c2c66affSColin Finck                     /* FALL THROUGH */
1849c2c66affSColin Finck 
1850c2c66affSColin Finck                   case REOP_LPARENNON:
1851c2c66affSColin Finck                     state->result = operandStack[operandSP - 1];
1852c2c66affSColin Finck                     if (!ParseQuantifier(state))
1853c2c66affSColin Finck                         goto out;
1854c2c66affSColin Finck                     operandStack[operandSP - 1] = state->result;
1855c2c66affSColin Finck                     goto restartOperator;
1856c2c66affSColin Finck                   default:
1857c2c66affSColin Finck                     if (!ProcessOp(state, &operatorStack[operatorSP],
1858c2c66affSColin Finck                                    operandStack, operandSP))
1859c2c66affSColin Finck                         goto out;
1860c2c66affSColin Finck                     --operandSP;
1861c2c66affSColin Finck                     break;
1862c2c66affSColin Finck                 }
1863c2c66affSColin Finck             }
1864c2c66affSColin Finck             break;
1865c2c66affSColin Finck 
1866c2c66affSColin Finck           case '{':
1867c2c66affSColin Finck           {
1868c2c66affSColin Finck             const WCHAR *errp = state->cp;
1869c2c66affSColin Finck 
1870c2c66affSColin Finck             if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1871c2c66affSColin Finck                 /*
1872c2c66affSColin Finck                  * This didn't even scan correctly as a quantifier, so we should
1873c2c66affSColin Finck                  * treat it as flat.
1874c2c66affSColin Finck                  */
1875c2c66affSColin Finck                 op = REOP_CONCAT;
1876c2c66affSColin Finck                 goto pushOperator;
1877c2c66affSColin Finck             }
1878c2c66affSColin Finck 
1879c2c66affSColin Finck             state->cp = errp;
1880c2c66affSColin Finck             /* FALL THROUGH */
1881c2c66affSColin Finck           }
1882c2c66affSColin Finck 
1883c2c66affSColin Finck           case '+':
1884c2c66affSColin Finck           case '*':
1885c2c66affSColin Finck           case '?':
1886c2c66affSColin Finck             ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1887c2c66affSColin Finck                                     state->cp);
1888c2c66affSColin Finck             result = FALSE;
1889c2c66affSColin Finck             goto out;
1890c2c66affSColin Finck 
1891c2c66affSColin Finck           default:
1892c2c66affSColin Finck             /* Anything else is the start of the next term. */
1893c2c66affSColin Finck             op = REOP_CONCAT;
1894c2c66affSColin Finck pushOperator:
1895c2c66affSColin Finck             if (operatorSP == operatorStackSize) {
1896c2c66affSColin Finck                 REOpData *tmp;
1897c2c66affSColin Finck                 operatorStackSize += operatorStackSize;
1898c2c66affSColin Finck                 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1899c2c66affSColin Finck                 if (!tmp)
1900c2c66affSColin Finck                     goto out;
1901c2c66affSColin Finck                 operatorStack = tmp;
1902c2c66affSColin Finck             }
1903c2c66affSColin Finck             operatorStack[operatorSP].op = op;
1904c2c66affSColin Finck             operatorStack[operatorSP].errPos = state->cp;
1905c2c66affSColin Finck             operatorStack[operatorSP++].parenIndex = parenIndex;
1906c2c66affSColin Finck             break;
1907c2c66affSColin Finck         }
1908c2c66affSColin Finck     }
1909c2c66affSColin Finck out:
1910c2c66affSColin Finck     heap_free(operatorStack);
1911c2c66affSColin Finck     heap_free(operandStack);
1912c2c66affSColin Finck     return result;
1913c2c66affSColin Finck }
1914c2c66affSColin Finck 
1915c2c66affSColin Finck /*
1916c2c66affSColin Finck  * Save the current state of the match - the position in the input
1917c2c66affSColin Finck  * text as well as the position in the bytecode. The state of any
1918c2c66affSColin Finck  * parent expressions is also saved (preceding state).
1919c2c66affSColin Finck  * Contents of parenCount parentheses from parenIndex are also saved.
1920c2c66affSColin Finck  */
1921c2c66affSColin Finck static REBackTrackData *
PushBackTrackState(REGlobalData * gData,REOp op,jsbytecode * target,match_state_t * x,const WCHAR * cp,size_t parenIndex,size_t parenCount)1922c2c66affSColin Finck PushBackTrackState(REGlobalData *gData, REOp op,
1923c2c66affSColin Finck                    jsbytecode *target, match_state_t *x, const WCHAR *cp,
1924c2c66affSColin Finck                    size_t parenIndex, size_t parenCount)
1925c2c66affSColin Finck {
1926c2c66affSColin Finck     size_t i;
1927c2c66affSColin Finck     REBackTrackData *result =
1928c2c66affSColin Finck         (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1929c2c66affSColin Finck 
1930c2c66affSColin Finck     size_t sz = sizeof(REBackTrackData) +
1931c2c66affSColin Finck                 gData->stateStackTop * sizeof(REProgState) +
1932c2c66affSColin Finck                 parenCount * sizeof(RECapture);
1933c2c66affSColin Finck 
1934c2c66affSColin Finck     ptrdiff_t btsize = gData->backTrackStackSize;
1935c2c66affSColin Finck     ptrdiff_t btincr = ((char *)result + sz) -
1936c2c66affSColin Finck                        ((char *)gData->backTrackStack + btsize);
1937c2c66affSColin Finck 
1938c2c66affSColin Finck     TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1939c2c66affSColin Finck 
1940c2c66affSColin Finck     JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1941c2c66affSColin Finck     if (btincr > 0) {
1942c2c66affSColin Finck         ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1943c2c66affSColin Finck 
1944c2c66affSColin Finck         JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1945c2c66affSColin Finck         btincr = ((btincr+btsize-1)/btsize)*btsize;
1946c2c66affSColin Finck         gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1947c2c66affSColin Finck         if (!gData->backTrackStack) {
1948c2c66affSColin Finck             js_ReportOutOfScriptQuota(gData->cx);
1949c2c66affSColin Finck             gData->ok = FALSE;
1950c2c66affSColin Finck             return NULL;
1951c2c66affSColin Finck         }
1952c2c66affSColin Finck         gData->backTrackStackSize = btsize + btincr;
1953c2c66affSColin Finck         result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1954c2c66affSColin Finck     }
1955c2c66affSColin Finck     gData->backTrackSP = result;
1956c2c66affSColin Finck     result->sz = gData->cursz;
1957c2c66affSColin Finck     gData->cursz = sz;
1958c2c66affSColin Finck 
1959c2c66affSColin Finck     result->backtrack_op = op;
1960c2c66affSColin Finck     result->backtrack_pc = target;
1961c2c66affSColin Finck     result->cp = cp;
1962c2c66affSColin Finck     result->parenCount = parenCount;
1963c2c66affSColin Finck     result->parenIndex = parenIndex;
1964c2c66affSColin Finck 
1965c2c66affSColin Finck     result->saveStateStackTop = gData->stateStackTop;
1966c2c66affSColin Finck     assert(gData->stateStackTop);
1967c2c66affSColin Finck     memcpy(result + 1, gData->stateStack,
1968c2c66affSColin Finck            sizeof(REProgState) * result->saveStateStackTop);
1969c2c66affSColin Finck 
1970c2c66affSColin Finck     if (parenCount != 0) {
1971c2c66affSColin Finck         memcpy((char *)(result + 1) +
1972c2c66affSColin Finck                sizeof(REProgState) * result->saveStateStackTop,
1973c2c66affSColin Finck                &x->parens[parenIndex],
1974c2c66affSColin Finck                sizeof(RECapture) * parenCount);
1975c2c66affSColin Finck         for (i = 0; i != parenCount; i++)
1976c2c66affSColin Finck             x->parens[parenIndex + i].index = -1;
1977c2c66affSColin Finck     }
1978c2c66affSColin Finck 
1979c2c66affSColin Finck     return result;
1980c2c66affSColin Finck }
1981c2c66affSColin Finck 
1982c2c66affSColin Finck static inline match_state_t *
FlatNIMatcher(REGlobalData * gData,match_state_t * x,const WCHAR * matchChars,size_t length)1983c2c66affSColin Finck FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
1984c2c66affSColin Finck               size_t length)
1985c2c66affSColin Finck {
1986c2c66affSColin Finck     size_t i;
1987c2c66affSColin Finck     assert(gData->cpend >= x->cp);
1988c2c66affSColin Finck     if (length > (size_t)(gData->cpend - x->cp))
1989c2c66affSColin Finck         return NULL;
1990c2c66affSColin Finck     for (i = 0; i != length; i++) {
1991*3e2d6582SAmine Khaldi         if (towupper(matchChars[i]) != towupper(x->cp[i]))
1992c2c66affSColin Finck             return NULL;
1993c2c66affSColin Finck     }
1994c2c66affSColin Finck     x->cp += length;
1995c2c66affSColin Finck     return x;
1996c2c66affSColin Finck }
1997c2c66affSColin Finck 
1998c2c66affSColin Finck /*
1999c2c66affSColin Finck  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2000c2c66affSColin Finck  * 2. If E is not a character then go to step 6.
2001c2c66affSColin Finck  * 3. Let ch be E's character.
2002c2c66affSColin Finck  * 4. Let A be a one-element RECharSet containing the character ch.
2003c2c66affSColin Finck  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2004c2c66affSColin Finck  * 6. E must be an integer. Let n be that integer.
2005c2c66affSColin Finck  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2006c2c66affSColin Finck  * 8. Return an internal Matcher closure that takes two arguments, a State x
2007c2c66affSColin Finck  *    and a Continuation c, and performs the following:
2008c2c66affSColin Finck  *     1. Let cap be x's captures internal array.
2009c2c66affSColin Finck  *     2. Let s be cap[n].
2010c2c66affSColin Finck  *     3. If s is undefined, then call c(x) and return its result.
2011c2c66affSColin Finck  *     4. Let e be x's endIndex.
2012c2c66affSColin Finck  *     5. Let len be s's length.
2013c2c66affSColin Finck  *     6. Let f be e+len.
2014c2c66affSColin Finck  *     7. If f>InputLength, return failure.
2015c2c66affSColin Finck  *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2016c2c66affSColin Finck  *        such that Canonicalize(s[i]) is not the same character as
2017c2c66affSColin Finck  *        Canonicalize(Input [e+i]), then return failure.
2018c2c66affSColin Finck  *     9. Let y be the State (f, cap).
2019c2c66affSColin Finck  *     10. Call c(y) and return its result.
2020c2c66affSColin Finck  */
2021c2c66affSColin Finck static match_state_t *
BackrefMatcher(REGlobalData * gData,match_state_t * x,size_t parenIndex)2022c2c66affSColin Finck BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
2023c2c66affSColin Finck {
2024c2c66affSColin Finck     size_t len, i;
2025c2c66affSColin Finck     const WCHAR *parenContent;
2026c2c66affSColin Finck     RECapture *cap = &x->parens[parenIndex];
2027c2c66affSColin Finck 
2028c2c66affSColin Finck     if (cap->index == -1)
2029c2c66affSColin Finck         return x;
2030c2c66affSColin Finck 
2031c2c66affSColin Finck     len = cap->length;
2032c2c66affSColin Finck     if (x->cp + len > gData->cpend)
2033c2c66affSColin Finck         return NULL;
2034c2c66affSColin Finck 
2035c2c66affSColin Finck     parenContent = &gData->cpbegin[cap->index];
2036c2c66affSColin Finck     if (gData->regexp->flags & REG_FOLD) {
2037c2c66affSColin Finck         for (i = 0; i < len; i++) {
2038*3e2d6582SAmine Khaldi             if (towupper(parenContent[i]) != towupper(x->cp[i]))
2039c2c66affSColin Finck                 return NULL;
2040c2c66affSColin Finck         }
2041c2c66affSColin Finck     } else {
2042c2c66affSColin Finck         for (i = 0; i < len; i++) {
2043c2c66affSColin Finck             if (parenContent[i] != x->cp[i])
2044c2c66affSColin Finck                 return NULL;
2045c2c66affSColin Finck         }
2046c2c66affSColin Finck     }
2047c2c66affSColin Finck     x->cp += len;
2048c2c66affSColin Finck     return x;
2049c2c66affSColin Finck }
2050c2c66affSColin Finck 
2051c2c66affSColin Finck /* Add a single character to the RECharSet */
2052c2c66affSColin Finck static void
AddCharacterToCharSet(RECharSet * cs,WCHAR c)2053c2c66affSColin Finck AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2054c2c66affSColin Finck {
2055c2c66affSColin Finck     UINT byteIndex = (UINT)(c >> 3);
2056c2c66affSColin Finck     assert(c <= cs->length);
2057c2c66affSColin Finck     cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2058c2c66affSColin Finck }
2059c2c66affSColin Finck 
2060c2c66affSColin Finck 
2061c2c66affSColin Finck /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2062c2c66affSColin Finck static void
AddCharacterRangeToCharSet(RECharSet * cs,UINT c1,UINT c2)2063c2c66affSColin Finck AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2064c2c66affSColin Finck {
2065c2c66affSColin Finck     UINT i;
2066c2c66affSColin Finck 
2067c2c66affSColin Finck     UINT byteIndex1 = c1 >> 3;
2068c2c66affSColin Finck     UINT byteIndex2 = c2 >> 3;
2069c2c66affSColin Finck 
2070c2c66affSColin Finck     assert(c2 <= cs->length && c1 <= c2);
2071c2c66affSColin Finck 
2072c2c66affSColin Finck     c1 &= 0x7;
2073c2c66affSColin Finck     c2 &= 0x7;
2074c2c66affSColin Finck 
2075c2c66affSColin Finck     if (byteIndex1 == byteIndex2) {
2076c2c66affSColin Finck         cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2077c2c66affSColin Finck     } else {
2078c2c66affSColin Finck         cs->u.bits[byteIndex1] |= 0xFF << c1;
2079c2c66affSColin Finck         for (i = byteIndex1 + 1; i < byteIndex2; i++)
2080c2c66affSColin Finck             cs->u.bits[i] = 0xFF;
2081c2c66affSColin Finck         cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2082c2c66affSColin Finck     }
2083c2c66affSColin Finck }
2084c2c66affSColin Finck 
2085c2c66affSColin Finck /* Compile the source of the class into a RECharSet */
2086c2c66affSColin Finck static BOOL
ProcessCharSet(REGlobalData * gData,RECharSet * charSet)2087c2c66affSColin Finck ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2088c2c66affSColin Finck {
2089c2c66affSColin Finck     const WCHAR *src, *end;
2090c2c66affSColin Finck     BOOL inRange = FALSE;
2091c2c66affSColin Finck     WCHAR rangeStart = 0;
2092c2c66affSColin Finck     UINT byteLength, n;
2093c2c66affSColin Finck     WCHAR c, thisCh;
2094c2c66affSColin Finck     INT nDigits, i;
2095c2c66affSColin Finck 
2096c2c66affSColin Finck     assert(!charSet->converted);
2097c2c66affSColin Finck     /*
2098c2c66affSColin Finck      * Assert that startIndex and length points to chars inside [] inside
2099c2c66affSColin Finck      * source string.
2100c2c66affSColin Finck      */
2101c2c66affSColin Finck     assert(1 <= charSet->u.src.startIndex);
2102c2c66affSColin Finck     assert(charSet->u.src.startIndex < gData->regexp->source_len);
2103c2c66affSColin Finck     assert(charSet->u.src.length <= gData->regexp->source_len
2104c2c66affSColin Finck             - 1 - charSet->u.src.startIndex);
2105c2c66affSColin Finck 
2106c2c66affSColin Finck     charSet->converted = TRUE;
2107c2c66affSColin Finck     src = gData->regexp->source + charSet->u.src.startIndex;
2108c2c66affSColin Finck 
2109c2c66affSColin Finck     end = src + charSet->u.src.length;
2110c2c66affSColin Finck 
2111c2c66affSColin Finck     assert(src[-1] == '[' && end[0] == ']');
2112c2c66affSColin Finck 
2113c2c66affSColin Finck     byteLength = (charSet->length >> 3) + 1;
2114c2c66affSColin Finck     charSet->u.bits = heap_alloc(byteLength);
2115c2c66affSColin Finck     if (!charSet->u.bits) {
2116c2c66affSColin Finck         JS_ReportOutOfMemory(gData->cx);
2117c2c66affSColin Finck         gData->ok = FALSE;
2118c2c66affSColin Finck         return FALSE;
2119c2c66affSColin Finck     }
2120c2c66affSColin Finck     memset(charSet->u.bits, 0, byteLength);
2121c2c66affSColin Finck 
2122c2c66affSColin Finck     if (src == end)
2123c2c66affSColin Finck         return TRUE;
2124c2c66affSColin Finck 
2125c2c66affSColin Finck     if (*src == '^') {
2126c2c66affSColin Finck         assert(charSet->sense == FALSE);
2127c2c66affSColin Finck         ++src;
2128c2c66affSColin Finck     } else {
2129c2c66affSColin Finck         assert(charSet->sense == TRUE);
2130c2c66affSColin Finck     }
2131c2c66affSColin Finck 
2132c2c66affSColin Finck     while (src != end) {
2133c2c66affSColin Finck         switch (*src) {
2134c2c66affSColin Finck           case '\\':
2135c2c66affSColin Finck             ++src;
2136c2c66affSColin Finck             c = *src++;
2137c2c66affSColin Finck             switch (c) {
2138c2c66affSColin Finck               case 'b':
2139c2c66affSColin Finck                 thisCh = 0x8;
2140c2c66affSColin Finck                 break;
2141c2c66affSColin Finck               case 'f':
2142c2c66affSColin Finck                 thisCh = 0xC;
2143c2c66affSColin Finck                 break;
2144c2c66affSColin Finck               case 'n':
2145c2c66affSColin Finck                 thisCh = 0xA;
2146c2c66affSColin Finck                 break;
2147c2c66affSColin Finck               case 'r':
2148c2c66affSColin Finck                 thisCh = 0xD;
2149c2c66affSColin Finck                 break;
2150c2c66affSColin Finck               case 't':
2151c2c66affSColin Finck                 thisCh = 0x9;
2152c2c66affSColin Finck                 break;
2153c2c66affSColin Finck               case 'v':
2154c2c66affSColin Finck                 thisCh = 0xB;
2155c2c66affSColin Finck                 break;
2156c2c66affSColin Finck               case 'c':
2157c2c66affSColin Finck                 if (src < end && JS_ISWORD(*src)) {
2158c2c66affSColin Finck                     thisCh = (WCHAR)(*src++ & 0x1F);
2159c2c66affSColin Finck                 } else {
2160c2c66affSColin Finck                     --src;
2161c2c66affSColin Finck                     thisCh = '\\';
2162c2c66affSColin Finck                 }
2163c2c66affSColin Finck                 break;
2164c2c66affSColin Finck               case 'x':
2165c2c66affSColin Finck                 nDigits = 2;
2166c2c66affSColin Finck                 goto lexHex;
2167c2c66affSColin Finck               case 'u':
2168c2c66affSColin Finck                 nDigits = 4;
2169c2c66affSColin Finck             lexHex:
2170c2c66affSColin Finck                 n = 0;
2171c2c66affSColin Finck                 for (i = 0; (i < nDigits) && (src < end); i++) {
2172c2c66affSColin Finck                     UINT digit;
2173c2c66affSColin Finck                     c = *src++;
2174c2c66affSColin Finck                     if (!isASCIIHexDigit(c, &digit)) {
2175c2c66affSColin Finck                         /*
2176c2c66affSColin Finck                          * Back off to accepting the original '\'
2177c2c66affSColin Finck                          * as a literal
2178c2c66affSColin Finck                          */
2179c2c66affSColin Finck                         src -= i + 1;
2180c2c66affSColin Finck                         n = '\\';
2181c2c66affSColin Finck                         break;
2182c2c66affSColin Finck                     }
2183c2c66affSColin Finck                     n = (n << 4) | digit;
2184c2c66affSColin Finck                 }
2185c2c66affSColin Finck                 thisCh = (WCHAR)n;
2186c2c66affSColin Finck                 break;
2187c2c66affSColin Finck               case '0':
2188c2c66affSColin Finck               case '1':
2189c2c66affSColin Finck               case '2':
2190c2c66affSColin Finck               case '3':
2191c2c66affSColin Finck               case '4':
2192c2c66affSColin Finck               case '5':
2193c2c66affSColin Finck               case '6':
2194c2c66affSColin Finck               case '7':
2195c2c66affSColin Finck                 /*
2196c2c66affSColin Finck                  *  This is a non-ECMA extension - decimal escapes (in this
2197c2c66affSColin Finck                  *  case, octal!) are supposed to be an error inside class
2198c2c66affSColin Finck                  *  ranges, but supported here for backwards compatibility.
2199c2c66affSColin Finck                  */
2200c2c66affSColin Finck                 n = JS7_UNDEC(c);
2201c2c66affSColin Finck                 c = *src;
2202c2c66affSColin Finck                 if ('0' <= c && c <= '7') {
2203c2c66affSColin Finck                     src++;
2204c2c66affSColin Finck                     n = 8 * n + JS7_UNDEC(c);
2205c2c66affSColin Finck                     c = *src;
2206c2c66affSColin Finck                     if ('0' <= c && c <= '7') {
2207c2c66affSColin Finck                         src++;
2208c2c66affSColin Finck                         i = 8 * n + JS7_UNDEC(c);
2209c2c66affSColin Finck                         if (i <= 0377)
2210c2c66affSColin Finck                             n = i;
2211c2c66affSColin Finck                         else
2212c2c66affSColin Finck                             src--;
2213c2c66affSColin Finck                     }
2214c2c66affSColin Finck                 }
2215c2c66affSColin Finck                 thisCh = (WCHAR)n;
2216c2c66affSColin Finck                 break;
2217c2c66affSColin Finck 
2218c2c66affSColin Finck               case 'd':
2219c2c66affSColin Finck                 AddCharacterRangeToCharSet(charSet, '0', '9');
2220c2c66affSColin Finck                 continue;   /* don't need range processing */
2221c2c66affSColin Finck               case 'D':
2222c2c66affSColin Finck                 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2223c2c66affSColin Finck                 AddCharacterRangeToCharSet(charSet,
2224c2c66affSColin Finck                                            (WCHAR)('9' + 1),
2225c2c66affSColin Finck                                            (WCHAR)charSet->length);
2226c2c66affSColin Finck                 continue;
2227c2c66affSColin Finck               case 's':
2228c2c66affSColin Finck                 for (i = (INT)charSet->length; i >= 0; i--)
2229*3e2d6582SAmine Khaldi                     if (iswspace(i))
2230c2c66affSColin Finck                         AddCharacterToCharSet(charSet, (WCHAR)i);
2231c2c66affSColin Finck                 continue;
2232c2c66affSColin Finck               case 'S':
2233c2c66affSColin Finck                 for (i = (INT)charSet->length; i >= 0; i--)
2234*3e2d6582SAmine Khaldi                     if (!iswspace(i))
2235c2c66affSColin Finck                         AddCharacterToCharSet(charSet, (WCHAR)i);
2236c2c66affSColin Finck                 continue;
2237c2c66affSColin Finck               case 'w':
2238c2c66affSColin Finck                 for (i = (INT)charSet->length; i >= 0; i--)
2239c2c66affSColin Finck                     if (JS_ISWORD(i))
2240c2c66affSColin Finck                         AddCharacterToCharSet(charSet, (WCHAR)i);
2241c2c66affSColin Finck                 continue;
2242c2c66affSColin Finck               case 'W':
2243c2c66affSColin Finck                 for (i = (INT)charSet->length; i >= 0; i--)
2244c2c66affSColin Finck                     if (!JS_ISWORD(i))
2245c2c66affSColin Finck                         AddCharacterToCharSet(charSet, (WCHAR)i);
2246c2c66affSColin Finck                 continue;
2247c2c66affSColin Finck               default:
2248c2c66affSColin Finck                 thisCh = c;
2249c2c66affSColin Finck                 break;
2250c2c66affSColin Finck 
2251c2c66affSColin Finck             }
2252c2c66affSColin Finck             break;
2253c2c66affSColin Finck 
2254c2c66affSColin Finck           default:
2255c2c66affSColin Finck             thisCh = *src++;
2256c2c66affSColin Finck             break;
2257c2c66affSColin Finck 
2258c2c66affSColin Finck         }
2259c2c66affSColin Finck         if (inRange) {
2260c2c66affSColin Finck             if (gData->regexp->flags & REG_FOLD) {
2261c2c66affSColin Finck                 assert(rangeStart <= thisCh);
2262c2c66affSColin Finck                 for (i = rangeStart; i <= thisCh; i++) {
2263c2c66affSColin Finck                     WCHAR uch, dch;
2264c2c66affSColin Finck 
2265c2c66affSColin Finck                     AddCharacterToCharSet(charSet, i);
2266*3e2d6582SAmine Khaldi                     uch = towupper(i);
2267*3e2d6582SAmine Khaldi                     dch = towlower(i);
2268c2c66affSColin Finck                     if (i != uch)
2269c2c66affSColin Finck                         AddCharacterToCharSet(charSet, uch);
2270c2c66affSColin Finck                     if (i != dch)
2271c2c66affSColin Finck                         AddCharacterToCharSet(charSet, dch);
2272c2c66affSColin Finck                 }
2273c2c66affSColin Finck             } else {
2274c2c66affSColin Finck                 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2275c2c66affSColin Finck             }
2276c2c66affSColin Finck             inRange = FALSE;
2277c2c66affSColin Finck         } else {
2278c2c66affSColin Finck             if (gData->regexp->flags & REG_FOLD) {
2279*3e2d6582SAmine Khaldi                 AddCharacterToCharSet(charSet, towupper(thisCh));
2280*3e2d6582SAmine Khaldi                 AddCharacterToCharSet(charSet, towlower(thisCh));
2281c2c66affSColin Finck             } else {
2282c2c66affSColin Finck                 AddCharacterToCharSet(charSet, thisCh);
2283c2c66affSColin Finck             }
2284c2c66affSColin Finck             if (src < end - 1) {
2285c2c66affSColin Finck                 if (*src == '-') {
2286c2c66affSColin Finck                     ++src;
2287c2c66affSColin Finck                     inRange = TRUE;
2288c2c66affSColin Finck                     rangeStart = thisCh;
2289c2c66affSColin Finck                 }
2290c2c66affSColin Finck             }
2291c2c66affSColin Finck         }
2292c2c66affSColin Finck     }
2293c2c66affSColin Finck     return TRUE;
2294c2c66affSColin Finck }
2295c2c66affSColin Finck 
2296c2c66affSColin Finck static BOOL
ReallocStateStack(REGlobalData * gData)2297c2c66affSColin Finck ReallocStateStack(REGlobalData *gData)
2298c2c66affSColin Finck {
2299c2c66affSColin Finck     size_t limit = gData->stateStackLimit;
2300c2c66affSColin Finck     size_t sz = sizeof(REProgState) * limit;
2301c2c66affSColin Finck 
2302c2c66affSColin Finck     gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2303c2c66affSColin Finck     if (!gData->stateStack) {
2304c2c66affSColin Finck         js_ReportOutOfScriptQuota(gData->cx);
2305c2c66affSColin Finck         gData->ok = FALSE;
2306c2c66affSColin Finck         return FALSE;
2307c2c66affSColin Finck     }
2308c2c66affSColin Finck     gData->stateStackLimit = limit + limit;
2309c2c66affSColin Finck     return TRUE;
2310c2c66affSColin Finck }
2311c2c66affSColin Finck 
2312c2c66affSColin Finck #define PUSH_STATE_STACK(data)                                                \
2313c2c66affSColin Finck     do {                                                                      \
2314c2c66affSColin Finck         ++(data)->stateStackTop;                                              \
2315c2c66affSColin Finck         if ((data)->stateStackTop == (data)->stateStackLimit &&               \
2316c2c66affSColin Finck             !ReallocStateStack((data))) {                                     \
2317c2c66affSColin Finck             return NULL;                                                      \
2318c2c66affSColin Finck         }                                                                     \
2319c2c66affSColin Finck     }while(0)
2320c2c66affSColin Finck 
2321c2c66affSColin Finck /*
2322c2c66affSColin Finck  * Apply the current op against the given input to see if it's going to match
2323c2c66affSColin Finck  * or fail. Return false if we don't get a match, true if we do. If updatecp is
2324c2c66affSColin Finck  * true, then update the current state's cp. Always update startpc to the next
2325c2c66affSColin Finck  * op.
2326c2c66affSColin Finck  */
2327c2c66affSColin Finck static inline match_state_t *
SimpleMatch(REGlobalData * gData,match_state_t * x,REOp op,jsbytecode ** startpc,BOOL updatecp)2328c2c66affSColin Finck SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op,
2329c2c66affSColin Finck             jsbytecode **startpc, BOOL updatecp)
2330c2c66affSColin Finck {
2331c2c66affSColin Finck     match_state_t *result = NULL;
2332c2c66affSColin Finck     WCHAR matchCh;
2333c2c66affSColin Finck     size_t parenIndex;
2334c2c66affSColin Finck     size_t offset, length, index;
2335c2c66affSColin Finck     jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
2336c2c66affSColin Finck     const WCHAR *source;
2337c2c66affSColin Finck     const WCHAR *startcp = x->cp;
2338c2c66affSColin Finck     WCHAR ch;
2339c2c66affSColin Finck     RECharSet *charSet;
2340c2c66affSColin Finck 
2341c2c66affSColin Finck     const char *opname = reop_names[op];
2342c2c66affSColin Finck     TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2343c2c66affSColin Finck           (int)gData->stateStackTop * 2, "", opname);
2344c2c66affSColin Finck 
2345c2c66affSColin Finck     switch (op) {
2346c2c66affSColin Finck       case REOP_EMPTY:
2347c2c66affSColin Finck         result = x;
2348c2c66affSColin Finck         break;
2349c2c66affSColin Finck       case REOP_BOL:
2350c2c66affSColin Finck         if (x->cp != gData->cpbegin) {
2351c2c66affSColin Finck             if (/*!gData->cx->regExpStatics.multiline &&  FIXME !!! */
2352c2c66affSColin Finck                 !(gData->regexp->flags & REG_MULTILINE)) {
2353c2c66affSColin Finck                 break;
2354c2c66affSColin Finck             }
2355c2c66affSColin Finck             if (!RE_IS_LINE_TERM(x->cp[-1]))
2356c2c66affSColin Finck                 break;
2357c2c66affSColin Finck         }
2358c2c66affSColin Finck         result = x;
2359c2c66affSColin Finck         break;
2360c2c66affSColin Finck       case REOP_EOL:
2361c2c66affSColin Finck         if (x->cp != gData->cpend) {
2362c2c66affSColin Finck             if (/*!gData->cx->regExpStatics.multiline &&*/
2363c2c66affSColin Finck                 !(gData->regexp->flags & REG_MULTILINE)) {
2364c2c66affSColin Finck                 break;
2365c2c66affSColin Finck             }
2366c2c66affSColin Finck             if (!RE_IS_LINE_TERM(*x->cp))
2367c2c66affSColin Finck                 break;
2368c2c66affSColin Finck         }
2369c2c66affSColin Finck         result = x;
2370c2c66affSColin Finck         break;
2371c2c66affSColin Finck       case REOP_WBDRY:
2372c2c66affSColin Finck         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2373c2c66affSColin Finck             !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2374c2c66affSColin Finck             result = x;
2375c2c66affSColin Finck         }
2376c2c66affSColin Finck         break;
2377c2c66affSColin Finck       case REOP_WNONBDRY:
2378c2c66affSColin Finck         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2379c2c66affSColin Finck             (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2380c2c66affSColin Finck             result = x;
2381c2c66affSColin Finck         }
2382c2c66affSColin Finck         break;
2383c2c66affSColin Finck       case REOP_DOT:
2384c2c66affSColin Finck         if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2385c2c66affSColin Finck             result = x;
2386c2c66affSColin Finck             result->cp++;
2387c2c66affSColin Finck         }
2388c2c66affSColin Finck         break;
2389c2c66affSColin Finck       case REOP_DIGIT:
2390c2c66affSColin Finck         if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2391c2c66affSColin Finck             result = x;
2392c2c66affSColin Finck             result->cp++;
2393c2c66affSColin Finck         }
2394c2c66affSColin Finck         break;
2395c2c66affSColin Finck       case REOP_NONDIGIT:
2396c2c66affSColin Finck         if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2397c2c66affSColin Finck             result = x;
2398c2c66affSColin Finck             result->cp++;
2399c2c66affSColin Finck         }
2400c2c66affSColin Finck         break;
2401c2c66affSColin Finck       case REOP_ALNUM:
2402c2c66affSColin Finck         if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2403c2c66affSColin Finck             result = x;
2404c2c66affSColin Finck             result->cp++;
2405c2c66affSColin Finck         }
2406c2c66affSColin Finck         break;
2407c2c66affSColin Finck       case REOP_NONALNUM:
2408c2c66affSColin Finck         if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2409c2c66affSColin Finck             result = x;
2410c2c66affSColin Finck             result->cp++;
2411c2c66affSColin Finck         }
2412c2c66affSColin Finck         break;
2413c2c66affSColin Finck       case REOP_SPACE:
2414*3e2d6582SAmine Khaldi         if (x->cp != gData->cpend && iswspace(*x->cp)) {
2415c2c66affSColin Finck             result = x;
2416c2c66affSColin Finck             result->cp++;
2417c2c66affSColin Finck         }
2418c2c66affSColin Finck         break;
2419c2c66affSColin Finck       case REOP_NONSPACE:
2420*3e2d6582SAmine Khaldi         if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2421c2c66affSColin Finck             result = x;
2422c2c66affSColin Finck             result->cp++;
2423c2c66affSColin Finck         }
2424c2c66affSColin Finck         break;
2425c2c66affSColin Finck       case REOP_BACKREF:
2426c2c66affSColin Finck         pc = ReadCompactIndex(pc, &parenIndex);
2427c2c66affSColin Finck         assert(parenIndex < gData->regexp->parenCount);
2428c2c66affSColin Finck         result = BackrefMatcher(gData, x, parenIndex);
2429c2c66affSColin Finck         break;
2430c2c66affSColin Finck       case REOP_FLAT:
2431c2c66affSColin Finck         pc = ReadCompactIndex(pc, &offset);
2432c2c66affSColin Finck         assert(offset < gData->regexp->source_len);
2433c2c66affSColin Finck         pc = ReadCompactIndex(pc, &length);
2434c2c66affSColin Finck         assert(1 <= length);
2435c2c66affSColin Finck         assert(length <= gData->regexp->source_len - offset);
2436c2c66affSColin Finck         if (length <= (size_t)(gData->cpend - x->cp)) {
2437c2c66affSColin Finck             source = gData->regexp->source + offset;
2438c2c66affSColin Finck             TRACE("%s\n", debugstr_wn(source, length));
2439c2c66affSColin Finck             for (index = 0; index != length; index++) {
2440c2c66affSColin Finck                 if (source[index] != x->cp[index])
2441c2c66affSColin Finck                     return NULL;
2442c2c66affSColin Finck             }
2443c2c66affSColin Finck             x->cp += length;
2444c2c66affSColin Finck             result = x;
2445c2c66affSColin Finck         }
2446c2c66affSColin Finck         break;
2447c2c66affSColin Finck       case REOP_FLAT1:
2448c2c66affSColin Finck         matchCh = *pc++;
2449c2c66affSColin Finck         TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2450c2c66affSColin Finck         if (x->cp != gData->cpend && *x->cp == matchCh) {
2451c2c66affSColin Finck             result = x;
2452c2c66affSColin Finck             result->cp++;
2453c2c66affSColin Finck         }
2454c2c66affSColin Finck         break;
2455c2c66affSColin Finck       case REOP_FLATi:
2456c2c66affSColin Finck         pc = ReadCompactIndex(pc, &offset);
2457c2c66affSColin Finck         assert(offset < gData->regexp->source_len);
2458c2c66affSColin Finck         pc = ReadCompactIndex(pc, &length);
2459c2c66affSColin Finck         assert(1 <= length);
2460c2c66affSColin Finck         assert(length <= gData->regexp->source_len - offset);
2461c2c66affSColin Finck         source = gData->regexp->source;
2462c2c66affSColin Finck         result = FlatNIMatcher(gData, x, source + offset, length);
2463c2c66affSColin Finck         break;
2464c2c66affSColin Finck       case REOP_FLAT1i:
2465c2c66affSColin Finck         matchCh = *pc++;
2466*3e2d6582SAmine Khaldi         if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2467c2c66affSColin Finck             result = x;
2468c2c66affSColin Finck             result->cp++;
2469c2c66affSColin Finck         }
2470c2c66affSColin Finck         break;
2471c2c66affSColin Finck       case REOP_UCFLAT1:
2472c2c66affSColin Finck         matchCh = GET_ARG(pc);
2473c2c66affSColin Finck         TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2474c2c66affSColin Finck         pc += ARG_LEN;
2475c2c66affSColin Finck         if (x->cp != gData->cpend && *x->cp == matchCh) {
2476c2c66affSColin Finck             result = x;
2477c2c66affSColin Finck             result->cp++;
2478c2c66affSColin Finck         }
2479c2c66affSColin Finck         break;
2480c2c66affSColin Finck       case REOP_UCFLAT1i:
2481c2c66affSColin Finck         matchCh = GET_ARG(pc);
2482c2c66affSColin Finck         pc += ARG_LEN;
2483*3e2d6582SAmine Khaldi         if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2484c2c66affSColin Finck             result = x;
2485c2c66affSColin Finck             result->cp++;
2486c2c66affSColin Finck         }
2487c2c66affSColin Finck         break;
2488c2c66affSColin Finck       case REOP_CLASS:
2489c2c66affSColin Finck         pc = ReadCompactIndex(pc, &index);
2490c2c66affSColin Finck         assert(index < gData->regexp->classCount);
2491c2c66affSColin Finck         if (x->cp != gData->cpend) {
2492c2c66affSColin Finck             charSet = &gData->regexp->classList[index];
2493c2c66affSColin Finck             assert(charSet->converted);
2494c2c66affSColin Finck             ch = *x->cp;
2495c2c66affSColin Finck             index = ch >> 3;
2496c2c66affSColin Finck             if (charSet->length != 0 &&
2497c2c66affSColin Finck                 ch <= charSet->length &&
2498c2c66affSColin Finck                 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2499c2c66affSColin Finck                 result = x;
2500c2c66affSColin Finck                 result->cp++;
2501c2c66affSColin Finck             }
2502c2c66affSColin Finck         }
2503c2c66affSColin Finck         break;
2504c2c66affSColin Finck       case REOP_NCLASS:
2505c2c66affSColin Finck         pc = ReadCompactIndex(pc, &index);
2506c2c66affSColin Finck         assert(index < gData->regexp->classCount);
2507c2c66affSColin Finck         if (x->cp != gData->cpend) {
2508c2c66affSColin Finck             charSet = &gData->regexp->classList[index];
2509c2c66affSColin Finck             assert(charSet->converted);
2510c2c66affSColin Finck             ch = *x->cp;
2511c2c66affSColin Finck             index = ch >> 3;
2512c2c66affSColin Finck             if (charSet->length == 0 ||
2513c2c66affSColin Finck                 ch > charSet->length ||
2514c2c66affSColin Finck                 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2515c2c66affSColin Finck                 result = x;
2516c2c66affSColin Finck                 result->cp++;
2517c2c66affSColin Finck             }
2518c2c66affSColin Finck         }
2519c2c66affSColin Finck         break;
2520c2c66affSColin Finck 
2521c2c66affSColin Finck       default:
2522c2c66affSColin Finck         assert(FALSE);
2523c2c66affSColin Finck     }
2524c2c66affSColin Finck     if (result) {
2525c2c66affSColin Finck         if (!updatecp)
2526c2c66affSColin Finck             x->cp = startcp;
2527c2c66affSColin Finck         *startpc = pc;
2528c2c66affSColin Finck         TRACE(" *\n");
2529c2c66affSColin Finck         return result;
2530c2c66affSColin Finck     }
2531c2c66affSColin Finck     x->cp = startcp;
2532c2c66affSColin Finck     return NULL;
2533c2c66affSColin Finck }
2534c2c66affSColin Finck 
2535c2c66affSColin Finck static inline match_state_t *
ExecuteREBytecode(REGlobalData * gData,match_state_t * x)2536c2c66affSColin Finck ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
2537c2c66affSColin Finck {
2538c2c66affSColin Finck     match_state_t *result = NULL;
2539c2c66affSColin Finck     REBackTrackData *backTrackData;
2540c2c66affSColin Finck     jsbytecode *nextpc, *testpc;
2541c2c66affSColin Finck     REOp nextop;
2542c2c66affSColin Finck     RECapture *cap;
2543c2c66affSColin Finck     REProgState *curState;
2544c2c66affSColin Finck     const WCHAR *startcp;
2545c2c66affSColin Finck     size_t parenIndex, k;
2546c2c66affSColin Finck     size_t parenSoFar = 0;
2547c2c66affSColin Finck 
2548c2c66affSColin Finck     WCHAR matchCh1, matchCh2;
2549c2c66affSColin Finck     RECharSet *charSet;
2550c2c66affSColin Finck 
2551c2c66affSColin Finck     BOOL anchor;
2552c2c66affSColin Finck     jsbytecode *pc = gData->regexp->program;
2553c2c66affSColin Finck     REOp op = (REOp) *pc++;
2554c2c66affSColin Finck 
2555c2c66affSColin Finck     /*
2556c2c66affSColin Finck      * If the first node is a simple match, step the index into the string
2557c2c66affSColin Finck      * until that match is made, or fail if it can't be found at all.
2558c2c66affSColin Finck      */
2559c2c66affSColin Finck     if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2560c2c66affSColin Finck         anchor = FALSE;
2561c2c66affSColin Finck         while (x->cp <= gData->cpend) {
2562c2c66affSColin Finck             nextpc = pc;    /* reset back to start each time */
2563c2c66affSColin Finck             result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2564c2c66affSColin Finck             if (result) {
2565c2c66affSColin Finck                 anchor = TRUE;
2566c2c66affSColin Finck                 x = result;
2567c2c66affSColin Finck                 pc = nextpc;    /* accept skip to next opcode */
2568c2c66affSColin Finck                 op = (REOp) *pc++;
2569c2c66affSColin Finck                 assert(op < REOP_LIMIT);
2570c2c66affSColin Finck                 break;
2571c2c66affSColin Finck             }
2572c2c66affSColin Finck             gData->skipped++;
2573c2c66affSColin Finck             x->cp++;
2574c2c66affSColin Finck         }
2575c2c66affSColin Finck         if (!anchor)
2576c2c66affSColin Finck             goto bad;
2577c2c66affSColin Finck     }
2578c2c66affSColin Finck 
2579c2c66affSColin Finck     for (;;) {
2580c2c66affSColin Finck         const char *opname = reop_names[op];
2581c2c66affSColin Finck         TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2582c2c66affSColin Finck               (int)gData->stateStackTop * 2, "", opname);
2583c2c66affSColin Finck 
2584c2c66affSColin Finck         if (REOP_IS_SIMPLE(op)) {
2585c2c66affSColin Finck             result = SimpleMatch(gData, x, op, &pc, TRUE);
2586c2c66affSColin Finck         } else {
2587c2c66affSColin Finck             curState = &gData->stateStack[gData->stateStackTop];
2588c2c66affSColin Finck             switch (op) {
2589c2c66affSColin Finck               case REOP_END:
2590c2c66affSColin Finck                 goto good;
2591c2c66affSColin Finck               case REOP_ALTPREREQ2:
2592c2c66affSColin Finck                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
2593c2c66affSColin Finck                 pc += ARG_LEN;
2594c2c66affSColin Finck                 matchCh2 = GET_ARG(pc);
2595c2c66affSColin Finck                 pc += ARG_LEN;
2596c2c66affSColin Finck                 k = GET_ARG(pc);
2597c2c66affSColin Finck                 pc += ARG_LEN;
2598c2c66affSColin Finck 
2599c2c66affSColin Finck                 if (x->cp != gData->cpend) {
2600c2c66affSColin Finck                     if (*x->cp == matchCh2)
2601c2c66affSColin Finck                         goto doAlt;
2602c2c66affSColin Finck 
2603c2c66affSColin Finck                     charSet = &gData->regexp->classList[k];
2604c2c66affSColin Finck                     if (!charSet->converted && !ProcessCharSet(gData, charSet))
2605c2c66affSColin Finck                         goto bad;
2606c2c66affSColin Finck                     matchCh1 = *x->cp;
2607c2c66affSColin Finck                     k = matchCh1 >> 3;
2608c2c66affSColin Finck                     if ((charSet->length == 0 ||
2609c2c66affSColin Finck                          matchCh1 > charSet->length ||
2610c2c66affSColin Finck                          !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2611c2c66affSColin Finck                         charSet->sense) {
2612c2c66affSColin Finck                         goto doAlt;
2613c2c66affSColin Finck                     }
2614c2c66affSColin Finck                 }
2615c2c66affSColin Finck                 result = NULL;
2616c2c66affSColin Finck                 break;
2617c2c66affSColin Finck 
2618c2c66affSColin Finck               case REOP_ALTPREREQ:
2619c2c66affSColin Finck                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
2620c2c66affSColin Finck                 pc += ARG_LEN;
2621c2c66affSColin Finck                 matchCh1 = GET_ARG(pc);
2622c2c66affSColin Finck                 pc += ARG_LEN;
2623c2c66affSColin Finck                 matchCh2 = GET_ARG(pc);
2624c2c66affSColin Finck                 pc += ARG_LEN;
2625c2c66affSColin Finck                 if (x->cp == gData->cpend ||
2626c2c66affSColin Finck                     (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2627c2c66affSColin Finck                     result = NULL;
2628c2c66affSColin Finck                     break;
2629c2c66affSColin Finck                 }
2630c2c66affSColin Finck                 /* else fall through... */
2631c2c66affSColin Finck 
2632c2c66affSColin Finck               case REOP_ALT:
2633c2c66affSColin Finck               doAlt:
2634c2c66affSColin Finck                 nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
2635c2c66affSColin Finck                 pc += ARG_LEN;                  /* start of this alternate */
2636c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2637c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2638c2c66affSColin Finck                 op = (REOp) *pc++;
2639c2c66affSColin Finck                 startcp = x->cp;
2640c2c66affSColin Finck                 if (REOP_IS_SIMPLE(op)) {
2641c2c66affSColin Finck                     if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2642c2c66affSColin Finck                         op = (REOp) *nextpc++;
2643c2c66affSColin Finck                         pc = nextpc;
2644c2c66affSColin Finck                         continue;
2645c2c66affSColin Finck                     }
2646c2c66affSColin Finck                     result = x;
2647c2c66affSColin Finck                     op = (REOp) *pc++;
2648c2c66affSColin Finck                 }
2649c2c66affSColin Finck                 nextop = (REOp) *nextpc++;
2650c2c66affSColin Finck                 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2651c2c66affSColin Finck                     goto bad;
2652c2c66affSColin Finck                 continue;
2653c2c66affSColin Finck 
2654c2c66affSColin Finck               /*
2655c2c66affSColin Finck                * Occurs at (successful) end of REOP_ALT,
2656c2c66affSColin Finck                */
2657c2c66affSColin Finck               case REOP_JUMP:
2658c2c66affSColin Finck                 /*
2659c2c66affSColin Finck                  * If we have not gotten a result here, it is because of an
2660c2c66affSColin Finck                  * empty match.  Do the same thing REOP_EMPTY would do.
2661c2c66affSColin Finck                  */
2662c2c66affSColin Finck                 if (!result)
2663c2c66affSColin Finck                     result = x;
2664c2c66affSColin Finck 
2665c2c66affSColin Finck                 --gData->stateStackTop;
2666c2c66affSColin Finck                 pc += GET_OFFSET(pc);
2667c2c66affSColin Finck                 op = (REOp) *pc++;
2668c2c66affSColin Finck                 continue;
2669c2c66affSColin Finck 
2670c2c66affSColin Finck               /*
2671c2c66affSColin Finck                * Occurs at last (successful) end of REOP_ALT,
2672c2c66affSColin Finck                */
2673c2c66affSColin Finck               case REOP_ENDALT:
2674c2c66affSColin Finck                 /*
2675c2c66affSColin Finck                  * If we have not gotten a result here, it is because of an
2676c2c66affSColin Finck                  * empty match.  Do the same thing REOP_EMPTY would do.
2677c2c66affSColin Finck                  */
2678c2c66affSColin Finck                 if (!result)
2679c2c66affSColin Finck                     result = x;
2680c2c66affSColin Finck 
2681c2c66affSColin Finck                 --gData->stateStackTop;
2682c2c66affSColin Finck                 op = (REOp) *pc++;
2683c2c66affSColin Finck                 continue;
2684c2c66affSColin Finck 
2685c2c66affSColin Finck               case REOP_LPAREN:
2686c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &parenIndex);
2687c2c66affSColin Finck                 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2688c2c66affSColin Finck                 assert(parenIndex < gData->regexp->parenCount);
2689c2c66affSColin Finck                 if (parenIndex + 1 > parenSoFar)
2690c2c66affSColin Finck                     parenSoFar = parenIndex + 1;
2691c2c66affSColin Finck                 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2692c2c66affSColin Finck                 x->parens[parenIndex].length = 0;
2693c2c66affSColin Finck                 op = (REOp) *pc++;
2694c2c66affSColin Finck                 continue;
2695c2c66affSColin Finck 
2696c2c66affSColin Finck               case REOP_RPAREN:
2697c2c66affSColin Finck               {
2698c2c66affSColin Finck                 ptrdiff_t delta;
2699c2c66affSColin Finck 
2700c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &parenIndex);
2701c2c66affSColin Finck                 assert(parenIndex < gData->regexp->parenCount);
2702c2c66affSColin Finck                 cap = &x->parens[parenIndex];
2703c2c66affSColin Finck                 delta = x->cp - (gData->cpbegin + cap->index);
2704c2c66affSColin Finck                 cap->length = (delta < 0) ? 0 : (size_t) delta;
2705c2c66affSColin Finck                 op = (REOp) *pc++;
2706c2c66affSColin Finck 
2707c2c66affSColin Finck                 if (!result)
2708c2c66affSColin Finck                     result = x;
2709c2c66affSColin Finck                 continue;
2710c2c66affSColin Finck               }
2711c2c66affSColin Finck               case REOP_ASSERT:
2712c2c66affSColin Finck                 nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
2713c2c66affSColin Finck                 pc += ARG_LEN;                 /* start of ASSERT child */
2714c2c66affSColin Finck                 op = (REOp) *pc++;
2715c2c66affSColin Finck                 testpc = pc;
2716c2c66affSColin Finck                 if (REOP_IS_SIMPLE(op) &&
2717c2c66affSColin Finck                     !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2718c2c66affSColin Finck                     result = NULL;
2719c2c66affSColin Finck                     break;
2720c2c66affSColin Finck                 }
2721c2c66affSColin Finck                 curState->u.assertion.top =
2722c2c66affSColin Finck                     (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2723c2c66affSColin Finck                 curState->u.assertion.sz = gData->cursz;
2724c2c66affSColin Finck                 curState->index = x->cp - gData->cpbegin;
2725c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2726c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2727c2c66affSColin Finck                 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2728c2c66affSColin Finck                                         nextpc, x, x->cp, 0, 0)) {
2729c2c66affSColin Finck                     goto bad;
2730c2c66affSColin Finck                 }
2731c2c66affSColin Finck                 continue;
2732c2c66affSColin Finck 
2733c2c66affSColin Finck               case REOP_ASSERT_NOT:
2734c2c66affSColin Finck                 nextpc = pc + GET_OFFSET(pc);
2735c2c66affSColin Finck                 pc += ARG_LEN;
2736c2c66affSColin Finck                 op = (REOp) *pc++;
2737c2c66affSColin Finck                 testpc = pc;
2738c2c66affSColin Finck                 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2739c2c66affSColin Finck                     SimpleMatch(gData, x, op, &testpc, FALSE) &&
2740c2c66affSColin Finck                     *testpc == REOP_ASSERTNOTTEST) {
2741c2c66affSColin Finck                     result = NULL;
2742c2c66affSColin Finck                     break;
2743c2c66affSColin Finck                 }
2744c2c66affSColin Finck                 curState->u.assertion.top
2745c2c66affSColin Finck                     = (char *)gData->backTrackSP -
2746c2c66affSColin Finck                       (char *)gData->backTrackStack;
2747c2c66affSColin Finck                 curState->u.assertion.sz = gData->cursz;
2748c2c66affSColin Finck                 curState->index = x->cp - gData->cpbegin;
2749c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2750c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2751c2c66affSColin Finck                 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2752c2c66affSColin Finck                                         nextpc, x, x->cp, 0, 0)) {
2753c2c66affSColin Finck                     goto bad;
2754c2c66affSColin Finck                 }
2755c2c66affSColin Finck                 continue;
2756c2c66affSColin Finck 
2757c2c66affSColin Finck               case REOP_ASSERTTEST:
2758c2c66affSColin Finck                 --gData->stateStackTop;
2759c2c66affSColin Finck                 --curState;
2760c2c66affSColin Finck                 x->cp = gData->cpbegin + curState->index;
2761c2c66affSColin Finck                 gData->backTrackSP =
2762c2c66affSColin Finck                     (REBackTrackData *) ((char *)gData->backTrackStack +
2763c2c66affSColin Finck                                          curState->u.assertion.top);
2764c2c66affSColin Finck                 gData->cursz = curState->u.assertion.sz;
2765c2c66affSColin Finck                 if (result)
2766c2c66affSColin Finck                     result = x;
2767c2c66affSColin Finck                 break;
2768c2c66affSColin Finck 
2769c2c66affSColin Finck               case REOP_ASSERTNOTTEST:
2770c2c66affSColin Finck                 --gData->stateStackTop;
2771c2c66affSColin Finck                 --curState;
2772c2c66affSColin Finck                 x->cp = gData->cpbegin + curState->index;
2773c2c66affSColin Finck                 gData->backTrackSP =
2774c2c66affSColin Finck                     (REBackTrackData *) ((char *)gData->backTrackStack +
2775c2c66affSColin Finck                                          curState->u.assertion.top);
2776c2c66affSColin Finck                 gData->cursz = curState->u.assertion.sz;
2777c2c66affSColin Finck                 result = (!result) ? x : NULL;
2778c2c66affSColin Finck                 break;
2779c2c66affSColin Finck               case REOP_STAR:
2780c2c66affSColin Finck                 curState->u.quantifier.min = 0;
2781c2c66affSColin Finck                 curState->u.quantifier.max = (UINT)-1;
2782c2c66affSColin Finck                 goto quantcommon;
2783c2c66affSColin Finck               case REOP_PLUS:
2784c2c66affSColin Finck                 curState->u.quantifier.min = 1;
2785c2c66affSColin Finck                 curState->u.quantifier.max = (UINT)-1;
2786c2c66affSColin Finck                 goto quantcommon;
2787c2c66affSColin Finck               case REOP_OPT:
2788c2c66affSColin Finck                 curState->u.quantifier.min = 0;
2789c2c66affSColin Finck                 curState->u.quantifier.max = 1;
2790c2c66affSColin Finck                 goto quantcommon;
2791c2c66affSColin Finck               case REOP_QUANT:
2792c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &k);
2793c2c66affSColin Finck                 curState->u.quantifier.min = k;
2794c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &k);
2795c2c66affSColin Finck                 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2796c2c66affSColin Finck                 curState->u.quantifier.max = k - 1;
2797c2c66affSColin Finck                 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2798c2c66affSColin Finck               quantcommon:
2799c2c66affSColin Finck                 if (curState->u.quantifier.max == 0) {
2800c2c66affSColin Finck                     pc = pc + GET_OFFSET(pc);
2801c2c66affSColin Finck                     op = (REOp) *pc++;
2802c2c66affSColin Finck                     result = x;
2803c2c66affSColin Finck                     continue;
2804c2c66affSColin Finck                 }
2805c2c66affSColin Finck                 /* Step over <next> */
2806c2c66affSColin Finck                 nextpc = pc + ARG_LEN;
2807c2c66affSColin Finck                 op = (REOp) *nextpc++;
2808c2c66affSColin Finck                 startcp = x->cp;
2809c2c66affSColin Finck                 if (REOP_IS_SIMPLE(op)) {
2810c2c66affSColin Finck                     if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2811c2c66affSColin Finck                         if (curState->u.quantifier.min == 0)
2812c2c66affSColin Finck                             result = x;
2813c2c66affSColin Finck                         else
2814c2c66affSColin Finck                             result = NULL;
2815c2c66affSColin Finck                         pc = pc + GET_OFFSET(pc);
2816c2c66affSColin Finck                         break;
2817c2c66affSColin Finck                     }
2818c2c66affSColin Finck                     op = (REOp) *nextpc++;
2819c2c66affSColin Finck                     result = x;
2820c2c66affSColin Finck                 }
2821c2c66affSColin Finck                 curState->index = startcp - gData->cpbegin;
2822c2c66affSColin Finck                 curState->continue_op = REOP_REPEAT;
2823c2c66affSColin Finck                 curState->continue_pc = pc;
2824c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2825c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2826c2c66affSColin Finck                 if (curState->u.quantifier.min == 0 &&
2827c2c66affSColin Finck                     !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2828c2c66affSColin Finck                                         0, 0)) {
2829c2c66affSColin Finck                     goto bad;
2830c2c66affSColin Finck                 }
2831c2c66affSColin Finck                 pc = nextpc;
2832c2c66affSColin Finck                 continue;
2833c2c66affSColin Finck 
2834c2c66affSColin Finck               case REOP_ENDCHILD: /* marks the end of a quantifier child */
2835c2c66affSColin Finck                 pc = curState[-1].continue_pc;
2836c2c66affSColin Finck                 op = (REOp) curState[-1].continue_op;
2837c2c66affSColin Finck 
2838c2c66affSColin Finck                 if (!result)
2839c2c66affSColin Finck                     result = x;
2840c2c66affSColin Finck                 continue;
2841c2c66affSColin Finck 
2842c2c66affSColin Finck               case REOP_REPEAT:
2843c2c66affSColin Finck                 --curState;
2844c2c66affSColin Finck                 do {
2845c2c66affSColin Finck                     --gData->stateStackTop;
2846c2c66affSColin Finck                     if (!result) {
2847c2c66affSColin Finck                         /* Failed, see if we have enough children. */
2848c2c66affSColin Finck                         if (curState->u.quantifier.min == 0)
2849c2c66affSColin Finck                             goto repeatDone;
2850c2c66affSColin Finck                         goto break_switch;
2851c2c66affSColin Finck                     }
2852c2c66affSColin Finck                     if (curState->u.quantifier.min == 0 &&
2853c2c66affSColin Finck                         x->cp == gData->cpbegin + curState->index) {
2854c2c66affSColin Finck                         /* matched an empty string, that'll get us nowhere */
2855c2c66affSColin Finck                         result = NULL;
2856c2c66affSColin Finck                         goto break_switch;
2857c2c66affSColin Finck                     }
2858c2c66affSColin Finck                     if (curState->u.quantifier.min != 0)
2859c2c66affSColin Finck                         curState->u.quantifier.min--;
2860c2c66affSColin Finck                     if (curState->u.quantifier.max != (UINT) -1)
2861c2c66affSColin Finck                         curState->u.quantifier.max--;
2862c2c66affSColin Finck                     if (curState->u.quantifier.max == 0)
2863c2c66affSColin Finck                         goto repeatDone;
2864c2c66affSColin Finck                     nextpc = pc + ARG_LEN;
2865c2c66affSColin Finck                     nextop = (REOp) *nextpc;
2866c2c66affSColin Finck                     startcp = x->cp;
2867c2c66affSColin Finck                     if (REOP_IS_SIMPLE(nextop)) {
2868c2c66affSColin Finck                         nextpc++;
2869c2c66affSColin Finck                         if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2870c2c66affSColin Finck                             if (curState->u.quantifier.min == 0)
2871c2c66affSColin Finck                                 goto repeatDone;
2872c2c66affSColin Finck                             result = NULL;
2873c2c66affSColin Finck                             goto break_switch;
2874c2c66affSColin Finck                         }
2875c2c66affSColin Finck                         result = x;
2876c2c66affSColin Finck                     }
2877c2c66affSColin Finck                     curState->index = startcp - gData->cpbegin;
2878c2c66affSColin Finck                     PUSH_STATE_STACK(gData);
2879c2c66affSColin Finck                     if (curState->u.quantifier.min == 0 &&
2880c2c66affSColin Finck                         !PushBackTrackState(gData, REOP_REPEAT,
2881c2c66affSColin Finck                                             pc, x, startcp,
2882c2c66affSColin Finck                                             curState->parenSoFar,
2883c2c66affSColin Finck                                             parenSoFar -
2884c2c66affSColin Finck                                             curState->parenSoFar)) {
2885c2c66affSColin Finck                         goto bad;
2886c2c66affSColin Finck                     }
2887c2c66affSColin Finck                 } while (*nextpc == REOP_ENDCHILD);
2888c2c66affSColin Finck                 pc = nextpc;
2889c2c66affSColin Finck                 op = (REOp) *pc++;
2890c2c66affSColin Finck                 parenSoFar = curState->parenSoFar;
2891c2c66affSColin Finck                 continue;
2892c2c66affSColin Finck 
2893c2c66affSColin Finck               repeatDone:
2894c2c66affSColin Finck                 result = x;
2895c2c66affSColin Finck                 pc += GET_OFFSET(pc);
2896c2c66affSColin Finck                 goto break_switch;
2897c2c66affSColin Finck 
2898c2c66affSColin Finck               case REOP_MINIMALSTAR:
2899c2c66affSColin Finck                 curState->u.quantifier.min = 0;
2900c2c66affSColin Finck                 curState->u.quantifier.max = (UINT)-1;
2901c2c66affSColin Finck                 goto minimalquantcommon;
2902c2c66affSColin Finck               case REOP_MINIMALPLUS:
2903c2c66affSColin Finck                 curState->u.quantifier.min = 1;
2904c2c66affSColin Finck                 curState->u.quantifier.max = (UINT)-1;
2905c2c66affSColin Finck                 goto minimalquantcommon;
2906c2c66affSColin Finck               case REOP_MINIMALOPT:
2907c2c66affSColin Finck                 curState->u.quantifier.min = 0;
2908c2c66affSColin Finck                 curState->u.quantifier.max = 1;
2909c2c66affSColin Finck                 goto minimalquantcommon;
2910c2c66affSColin Finck               case REOP_MINIMALQUANT:
2911c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &k);
2912c2c66affSColin Finck                 curState->u.quantifier.min = k;
2913c2c66affSColin Finck                 pc = ReadCompactIndex(pc, &k);
2914c2c66affSColin Finck                 /* See REOP_QUANT comments about k - 1. */
2915c2c66affSColin Finck                 curState->u.quantifier.max = k - 1;
2916c2c66affSColin Finck                 assert(curState->u.quantifier.min
2917c2c66affSColin Finck                           <= curState->u.quantifier.max);
2918c2c66affSColin Finck               minimalquantcommon:
2919c2c66affSColin Finck                 curState->index = x->cp - gData->cpbegin;
2920c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2921c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2922c2c66affSColin Finck                 if (curState->u.quantifier.min != 0) {
2923c2c66affSColin Finck                     curState->continue_op = REOP_MINIMALREPEAT;
2924c2c66affSColin Finck                     curState->continue_pc = pc;
2925c2c66affSColin Finck                     /* step over <next> */
2926c2c66affSColin Finck                     pc += OFFSET_LEN;
2927c2c66affSColin Finck                     op = (REOp) *pc++;
2928c2c66affSColin Finck                 } else {
2929c2c66affSColin Finck                     if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2930c2c66affSColin Finck                                             pc, x, x->cp, 0, 0)) {
2931c2c66affSColin Finck                         goto bad;
2932c2c66affSColin Finck                     }
2933c2c66affSColin Finck                     --gData->stateStackTop;
2934c2c66affSColin Finck                     pc = pc + GET_OFFSET(pc);
2935c2c66affSColin Finck                     op = (REOp) *pc++;
2936c2c66affSColin Finck                 }
2937c2c66affSColin Finck                 continue;
2938c2c66affSColin Finck 
2939c2c66affSColin Finck               case REOP_MINIMALREPEAT:
2940c2c66affSColin Finck                 --gData->stateStackTop;
2941c2c66affSColin Finck                 --curState;
2942c2c66affSColin Finck 
2943c2c66affSColin Finck                 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2944c2c66affSColin Finck #define PREPARE_REPEAT()                                                      \
2945c2c66affSColin Finck     do {                                                                      \
2946c2c66affSColin Finck         curState->index = x->cp - gData->cpbegin;                          \
2947c2c66affSColin Finck         curState->continue_op = REOP_MINIMALREPEAT;                           \
2948c2c66affSColin Finck         curState->continue_pc = pc;                                           \
2949c2c66affSColin Finck         pc += ARG_LEN;                                                        \
2950c2c66affSColin Finck         for (k = curState->parenSoFar; k < parenSoFar; k++)                   \
2951c2c66affSColin Finck             x->parens[k].index = -1;                                          \
2952c2c66affSColin Finck         PUSH_STATE_STACK(gData);                                              \
2953c2c66affSColin Finck         op = (REOp) *pc++;                                                    \
2954c2c66affSColin Finck         assert(op < REOP_LIMIT);                                              \
2955c2c66affSColin Finck     }while(0)
2956c2c66affSColin Finck 
2957c2c66affSColin Finck                 if (!result) {
2958c2c66affSColin Finck                     TRACE(" -\n");
2959c2c66affSColin Finck                     /*
2960c2c66affSColin Finck                      * Non-greedy failure - try to consume another child.
2961c2c66affSColin Finck                      */
2962c2c66affSColin Finck                     if (curState->u.quantifier.max == (UINT) -1 ||
2963c2c66affSColin Finck                         curState->u.quantifier.max > 0) {
2964c2c66affSColin Finck                         PREPARE_REPEAT();
2965c2c66affSColin Finck                         continue;
2966c2c66affSColin Finck                     }
2967c2c66affSColin Finck                     /* Don't need to adjust pc since we're going to pop. */
2968c2c66affSColin Finck                     break;
2969c2c66affSColin Finck                 }
2970c2c66affSColin Finck                 if (curState->u.quantifier.min == 0 &&
2971c2c66affSColin Finck                     x->cp == gData->cpbegin + curState->index) {
2972c2c66affSColin Finck                     /* Matched an empty string, that'll get us nowhere. */
2973c2c66affSColin Finck                     result = NULL;
2974c2c66affSColin Finck                     break;
2975c2c66affSColin Finck                 }
2976c2c66affSColin Finck                 if (curState->u.quantifier.min != 0)
2977c2c66affSColin Finck                     curState->u.quantifier.min--;
2978c2c66affSColin Finck                 if (curState->u.quantifier.max != (UINT) -1)
2979c2c66affSColin Finck                     curState->u.quantifier.max--;
2980c2c66affSColin Finck                 if (curState->u.quantifier.min != 0) {
2981c2c66affSColin Finck                     PREPARE_REPEAT();
2982c2c66affSColin Finck                     continue;
2983c2c66affSColin Finck                 }
2984c2c66affSColin Finck                 curState->index = x->cp - gData->cpbegin;
2985c2c66affSColin Finck                 curState->parenSoFar = parenSoFar;
2986c2c66affSColin Finck                 PUSH_STATE_STACK(gData);
2987c2c66affSColin Finck                 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2988c2c66affSColin Finck                                         pc, x, x->cp,
2989c2c66affSColin Finck                                         curState->parenSoFar,
2990c2c66affSColin Finck                                         parenSoFar - curState->parenSoFar)) {
2991c2c66affSColin Finck                     goto bad;
2992c2c66affSColin Finck                 }
2993c2c66affSColin Finck                 --gData->stateStackTop;
2994c2c66affSColin Finck                 pc = pc + GET_OFFSET(pc);
2995c2c66affSColin Finck                 op = (REOp) *pc++;
2996c2c66affSColin Finck                 assert(op < REOP_LIMIT);
2997c2c66affSColin Finck                 continue;
2998c2c66affSColin Finck               default:
2999c2c66affSColin Finck                 assert(FALSE);
3000c2c66affSColin Finck                 result = NULL;
3001c2c66affSColin Finck             }
3002c2c66affSColin Finck           break_switch:;
3003c2c66affSColin Finck         }
3004c2c66affSColin Finck 
3005c2c66affSColin Finck         /*
3006c2c66affSColin Finck          *  If the match failed and there's a backtrack option, take it.
3007c2c66affSColin Finck          *  Otherwise this is a complete and utter failure.
3008c2c66affSColin Finck          */
3009c2c66affSColin Finck         if (!result) {
3010c2c66affSColin Finck             if (gData->cursz == 0)
3011c2c66affSColin Finck                 return NULL;
3012c2c66affSColin Finck 
3013c2c66affSColin Finck             /* Potentially detect explosive regex here. */
3014c2c66affSColin Finck             gData->backTrackCount++;
3015c2c66affSColin Finck             if (gData->backTrackLimit &&
3016c2c66affSColin Finck                 gData->backTrackCount >= gData->backTrackLimit) {
3017c2c66affSColin Finck                 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3018c2c66affSColin Finck                                      JSMSG_REGEXP_TOO_COMPLEX);
3019c2c66affSColin Finck                 gData->ok = FALSE;
3020c2c66affSColin Finck                 return NULL;
3021c2c66affSColin Finck             }
3022c2c66affSColin Finck 
3023c2c66affSColin Finck             backTrackData = gData->backTrackSP;
3024c2c66affSColin Finck             gData->cursz = backTrackData->sz;
3025c2c66affSColin Finck             gData->backTrackSP =
3026c2c66affSColin Finck                 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3027c2c66affSColin Finck             x->cp = backTrackData->cp;
3028c2c66affSColin Finck             pc = backTrackData->backtrack_pc;
3029c2c66affSColin Finck             op = (REOp) backTrackData->backtrack_op;
3030c2c66affSColin Finck             assert(op < REOP_LIMIT);
3031c2c66affSColin Finck             gData->stateStackTop = backTrackData->saveStateStackTop;
3032c2c66affSColin Finck             assert(gData->stateStackTop);
3033c2c66affSColin Finck 
3034c2c66affSColin Finck             memcpy(gData->stateStack, backTrackData + 1,
3035c2c66affSColin Finck                    sizeof(REProgState) * backTrackData->saveStateStackTop);
3036c2c66affSColin Finck             curState = &gData->stateStack[gData->stateStackTop - 1];
3037c2c66affSColin Finck 
3038c2c66affSColin Finck             if (backTrackData->parenCount) {
3039c2c66affSColin Finck                 memcpy(&x->parens[backTrackData->parenIndex],
3040c2c66affSColin Finck                        (char *)(backTrackData + 1) +
3041c2c66affSColin Finck                        sizeof(REProgState) * backTrackData->saveStateStackTop,
3042c2c66affSColin Finck                        sizeof(RECapture) * backTrackData->parenCount);
3043c2c66affSColin Finck                 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3044c2c66affSColin Finck             } else {
3045c2c66affSColin Finck                 for (k = curState->parenSoFar; k < parenSoFar; k++)
3046c2c66affSColin Finck                     x->parens[k].index = -1;
3047c2c66affSColin Finck                 parenSoFar = curState->parenSoFar;
3048c2c66affSColin Finck             }
3049c2c66affSColin Finck 
3050c2c66affSColin Finck             TRACE("\tBT_Pop: %ld,%ld\n",
3051c2c66affSColin Finck                      (ULONG_PTR)backTrackData->parenIndex,
3052c2c66affSColin Finck                      (ULONG_PTR)backTrackData->parenCount);
3053c2c66affSColin Finck             continue;
3054c2c66affSColin Finck         }
3055c2c66affSColin Finck         x = result;
3056c2c66affSColin Finck 
3057c2c66affSColin Finck         /*
3058c2c66affSColin Finck          *  Continue with the expression.
3059c2c66affSColin Finck          */
3060c2c66affSColin Finck         op = (REOp)*pc++;
3061c2c66affSColin Finck         assert(op < REOP_LIMIT);
3062c2c66affSColin Finck     }
3063c2c66affSColin Finck 
3064c2c66affSColin Finck bad:
3065c2c66affSColin Finck     TRACE("\n");
3066c2c66affSColin Finck     return NULL;
3067c2c66affSColin Finck 
3068c2c66affSColin Finck good:
3069c2c66affSColin Finck     TRACE("\n");
3070c2c66affSColin Finck     return x;
3071c2c66affSColin Finck }
3072c2c66affSColin Finck 
MatchRegExp(REGlobalData * gData,match_state_t * x)3073c2c66affSColin Finck static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x)
3074c2c66affSColin Finck {
3075c2c66affSColin Finck     match_state_t *result;
3076c2c66affSColin Finck     const WCHAR *cp = x->cp;
3077c2c66affSColin Finck     const WCHAR *cp2;
3078c2c66affSColin Finck     UINT j;
3079c2c66affSColin Finck 
3080c2c66affSColin Finck     /*
3081c2c66affSColin Finck      * Have to include the position beyond the last character
3082c2c66affSColin Finck      * in order to detect end-of-input/line condition.
3083c2c66affSColin Finck      */
3084c2c66affSColin Finck     for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3085c2c66affSColin Finck         gData->skipped = cp2 - cp;
3086c2c66affSColin Finck         x->cp = cp2;
3087c2c66affSColin Finck         for (j = 0; j < gData->regexp->parenCount; j++)
3088c2c66affSColin Finck             x->parens[j].index = -1;
3089c2c66affSColin Finck         result = ExecuteREBytecode(gData, x);
3090c2c66affSColin Finck         if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3091c2c66affSColin Finck             return result;
3092c2c66affSColin Finck         gData->backTrackSP = gData->backTrackStack;
3093c2c66affSColin Finck         gData->cursz = 0;
3094c2c66affSColin Finck         gData->stateStackTop = 0;
3095c2c66affSColin Finck         cp2 = cp + gData->skipped;
3096c2c66affSColin Finck     }
3097c2c66affSColin Finck     return NULL;
3098c2c66affSColin Finck }
3099c2c66affSColin Finck 
InitMatch(regexp_t * re,void * cx,heap_pool_t * pool,REGlobalData * gData)3100c2c66affSColin Finck static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
3101c2c66affSColin Finck {
3102c2c66affSColin Finck     UINT i;
3103c2c66affSColin Finck 
3104c2c66affSColin Finck     gData->backTrackStackSize = INITIAL_BACKTRACK;
3105c2c66affSColin Finck     gData->backTrackStack = heap_pool_alloc(gData->pool, INITIAL_BACKTRACK);
3106c2c66affSColin Finck     if (!gData->backTrackStack)
3107c2c66affSColin Finck         goto bad;
3108c2c66affSColin Finck 
3109c2c66affSColin Finck     gData->backTrackSP = gData->backTrackStack;
3110c2c66affSColin Finck     gData->cursz = 0;
3111c2c66affSColin Finck     gData->backTrackCount = 0;
3112c2c66affSColin Finck     gData->backTrackLimit = 0;
3113c2c66affSColin Finck 
3114c2c66affSColin Finck     gData->stateStackLimit = INITIAL_STATESTACK;
3115c2c66affSColin Finck     gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3116c2c66affSColin Finck     if (!gData->stateStack)
3117c2c66affSColin Finck         goto bad;
3118c2c66affSColin Finck 
3119c2c66affSColin Finck     gData->stateStackTop = 0;
3120c2c66affSColin Finck     gData->cx = cx;
3121c2c66affSColin Finck     gData->pool = pool;
3122c2c66affSColin Finck     gData->regexp = re;
3123c2c66affSColin Finck     gData->ok = TRUE;
3124c2c66affSColin Finck 
3125c2c66affSColin Finck     for (i = 0; i < re->classCount; i++) {
3126c2c66affSColin Finck         if (!re->classList[i].converted &&
3127c2c66affSColin Finck                 !ProcessCharSet(gData, &re->classList[i])) {
3128c2c66affSColin Finck             return E_FAIL;
3129c2c66affSColin Finck         }
3130c2c66affSColin Finck     }
3131c2c66affSColin Finck 
3132c2c66affSColin Finck     return S_OK;
3133c2c66affSColin Finck 
3134c2c66affSColin Finck bad:
3135c2c66affSColin Finck     js_ReportOutOfScriptQuota(cx);
3136c2c66affSColin Finck     gData->ok = FALSE;
3137c2c66affSColin Finck     return E_OUTOFMEMORY;
3138c2c66affSColin Finck }
3139c2c66affSColin Finck 
regexp_execute(regexp_t * regexp,void * cx,heap_pool_t * pool,const WCHAR * str,DWORD str_len,match_state_t * result)3140c2c66affSColin Finck HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool,
3141c2c66affSColin Finck         const WCHAR *str, DWORD str_len, match_state_t *result)
3142c2c66affSColin Finck {
3143c2c66affSColin Finck     match_state_t *res;
3144c2c66affSColin Finck     REGlobalData gData;
3145c2c66affSColin Finck     heap_pool_t *mark = heap_pool_mark(pool);
3146c2c66affSColin Finck     const WCHAR *str_beg = result->cp;
3147c2c66affSColin Finck     HRESULT hres;
3148c2c66affSColin Finck 
3149c2c66affSColin Finck     assert(result->cp != NULL);
3150c2c66affSColin Finck 
3151c2c66affSColin Finck     gData.cpbegin = str;
3152c2c66affSColin Finck     gData.cpend = str+str_len;
3153c2c66affSColin Finck     gData.start = result->cp-str;
3154c2c66affSColin Finck     gData.skipped = 0;
3155c2c66affSColin Finck     gData.pool = pool;
3156c2c66affSColin Finck 
3157c2c66affSColin Finck     hres = InitMatch(regexp, cx, pool, &gData);
3158c2c66affSColin Finck     if(FAILED(hres)) {
3159c2c66affSColin Finck         WARN("InitMatch failed\n");
3160c2c66affSColin Finck         heap_pool_clear(mark);
3161c2c66affSColin Finck         return hres;
3162c2c66affSColin Finck     }
3163c2c66affSColin Finck 
3164c2c66affSColin Finck     res = MatchRegExp(&gData, result);
3165c2c66affSColin Finck     heap_pool_clear(mark);
3166c2c66affSColin Finck     if(!gData.ok) {
3167c2c66affSColin Finck         WARN("MatchRegExp failed\n");
3168c2c66affSColin Finck         return E_FAIL;
3169c2c66affSColin Finck     }
3170c2c66affSColin Finck 
3171c2c66affSColin Finck     if(!res) {
3172c2c66affSColin Finck         result->match_len = 0;
3173c2c66affSColin Finck         return S_FALSE;
3174c2c66affSColin Finck     }
3175c2c66affSColin Finck 
3176c2c66affSColin Finck     result->match_len = (result->cp-str_beg) - gData.skipped;
3177c2c66affSColin Finck     result->paren_count = regexp->parenCount;
3178c2c66affSColin Finck     return S_OK;
3179c2c66affSColin Finck }
3180c2c66affSColin Finck 
regexp_destroy(regexp_t * re)3181c2c66affSColin Finck void regexp_destroy(regexp_t *re)
3182c2c66affSColin Finck {
3183c2c66affSColin Finck     if (re->classList) {
3184c2c66affSColin Finck         UINT i;
3185c2c66affSColin Finck         for (i = 0; i < re->classCount; i++) {
3186c2c66affSColin Finck             if (re->classList[i].converted)
3187c2c66affSColin Finck                 heap_free(re->classList[i].u.bits);
3188c2c66affSColin Finck             re->classList[i].u.bits = NULL;
3189c2c66affSColin Finck         }
3190c2c66affSColin Finck         heap_free(re->classList);
3191c2c66affSColin Finck     }
3192c2c66affSColin Finck     heap_free(re);
3193c2c66affSColin Finck }
3194c2c66affSColin Finck 
regexp_new(void * cx,heap_pool_t * pool,const WCHAR * str,DWORD str_len,WORD flags,BOOL flat)3195c2c66affSColin Finck regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str,
3196c2c66affSColin Finck         DWORD str_len, WORD flags, BOOL flat)
3197c2c66affSColin Finck {
3198c2c66affSColin Finck     regexp_t *re;
3199c2c66affSColin Finck     heap_pool_t *mark;
3200c2c66affSColin Finck     CompilerState state;
3201c2c66affSColin Finck     size_t resize;
3202c2c66affSColin Finck     jsbytecode *endPC;
3203c2c66affSColin Finck     UINT i;
3204c2c66affSColin Finck     size_t len;
3205c2c66affSColin Finck 
3206c2c66affSColin Finck     re = NULL;
3207c2c66affSColin Finck     mark = heap_pool_mark(pool);
3208c2c66affSColin Finck     len = str_len;
3209c2c66affSColin Finck 
3210c2c66affSColin Finck     state.context = cx;
3211c2c66affSColin Finck     state.pool = pool;
3212c2c66affSColin Finck     state.cp = str;
3213c2c66affSColin Finck     if (!state.cp)
3214c2c66affSColin Finck         goto out;
3215c2c66affSColin Finck     state.cpbegin = state.cp;
3216c2c66affSColin Finck     state.cpend = state.cp + len;
3217c2c66affSColin Finck     state.flags = flags;
3218c2c66affSColin Finck     state.parenCount = 0;
3219c2c66affSColin Finck     state.classCount = 0;
3220c2c66affSColin Finck     state.progLength = 0;
3221c2c66affSColin Finck     state.treeDepth = 0;
3222c2c66affSColin Finck     state.classBitmapsMem = 0;
3223c2c66affSColin Finck     for (i = 0; i < CLASS_CACHE_SIZE; i++)
3224c2c66affSColin Finck         state.classCache[i].start = NULL;
3225c2c66affSColin Finck 
3226c2c66affSColin Finck     if (len != 0 && flat) {
3227c2c66affSColin Finck         state.result = NewRENode(&state, REOP_FLAT);
3228c2c66affSColin Finck         if (!state.result)
3229c2c66affSColin Finck             goto out;
3230c2c66affSColin Finck         state.result->u.flat.chr = *state.cpbegin;
3231c2c66affSColin Finck         state.result->u.flat.length = len;
3232c2c66affSColin Finck         state.result->kid = (void *) state.cpbegin;
3233c2c66affSColin Finck         /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3234c2c66affSColin Finck         state.progLength += 1 + GetCompactIndexWidth(0)
3235c2c66affSColin Finck                           + GetCompactIndexWidth(len);
3236c2c66affSColin Finck     } else {
3237c2c66affSColin Finck         if (!ParseRegExp(&state))
3238c2c66affSColin Finck             goto out;
3239c2c66affSColin Finck     }
3240c2c66affSColin Finck     resize = offsetof(regexp_t, program) + state.progLength + 1;
3241c2c66affSColin Finck     re = heap_alloc(resize);
3242c2c66affSColin Finck     if (!re)
3243c2c66affSColin Finck         goto out;
3244c2c66affSColin Finck 
3245c2c66affSColin Finck     assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3246c2c66affSColin Finck     re->classCount = state.classCount;
3247c2c66affSColin Finck     if (re->classCount) {
3248c2c66affSColin Finck         re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3249c2c66affSColin Finck         if (!re->classList) {
3250c2c66affSColin Finck             regexp_destroy(re);
3251c2c66affSColin Finck             re = NULL;
3252c2c66affSColin Finck             goto out;
3253c2c66affSColin Finck         }
3254c2c66affSColin Finck         for (i = 0; i < re->classCount; i++)
3255c2c66affSColin Finck             re->classList[i].converted = FALSE;
3256c2c66affSColin Finck     } else {
3257c2c66affSColin Finck         re->classList = NULL;
3258c2c66affSColin Finck     }
3259c2c66affSColin Finck     endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3260c2c66affSColin Finck     if (!endPC) {
3261c2c66affSColin Finck         regexp_destroy(re);
3262c2c66affSColin Finck         re = NULL;
3263c2c66affSColin Finck         goto out;
3264c2c66affSColin Finck     }
3265c2c66affSColin Finck     *endPC++ = REOP_END;
3266c2c66affSColin Finck     /*
3267c2c66affSColin Finck      * Check whether size was overestimated and shrink using realloc.
3268c2c66affSColin Finck      * This is safe since no pointers to newly parsed regexp or its parts
3269c2c66affSColin Finck      * besides re exist here.
3270c2c66affSColin Finck      */
3271c2c66affSColin Finck     if ((size_t)(endPC - re->program) != state.progLength + 1) {
3272c2c66affSColin Finck         regexp_t *tmp;
3273c2c66affSColin Finck         assert((size_t)(endPC - re->program) < state.progLength + 1);
3274c2c66affSColin Finck         resize = offsetof(regexp_t, program) + (endPC - re->program);
3275c2c66affSColin Finck         tmp = heap_realloc(re, resize);
3276c2c66affSColin Finck         if (tmp)
3277c2c66affSColin Finck             re = tmp;
3278c2c66affSColin Finck     }
3279c2c66affSColin Finck 
3280c2c66affSColin Finck     re->flags = flags;
3281c2c66affSColin Finck     re->parenCount = state.parenCount;
3282c2c66affSColin Finck     re->source = str;
3283c2c66affSColin Finck     re->source_len = str_len;
3284c2c66affSColin Finck 
3285c2c66affSColin Finck out:
3286c2c66affSColin Finck     heap_pool_clear(mark);
3287c2c66affSColin Finck     return re;
3288c2c66affSColin Finck }
3289