1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2013 University of Cambridge
10 
11   The machine code generator part (this module) was written by Zoltan Herczeg
12                       Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18     * Redistributions of source code must retain the above copyright notice,
19       this list of conditions and the following disclaimer.
20 
21     * Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25     * Neither the name of the University of Cambridge nor the names of its
26       contributors may be used to endorse or promote products derived from
27       this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71    2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99   'ab' - 'a' and 'b' regexps are concatenated
100   'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109    Matching path: match happens.
110    Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112    Matching path: no need to perform a match.
113    Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119    A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161   /* Pointers first. */
162   struct sljit_stack *stack;
163   const pcre_uchar *str;
164   const pcre_uchar *begin;
165   const pcre_uchar *end;
166   int *offsets;
167   pcre_uchar *mark_ptr;
168   void *callout_data;
169   /* Everything else after. */
170   sljit_u32 limit_match;
171   int real_offset_count;
172   int offset_count;
173   sljit_u8 notbol;
174   sljit_u8 noteol;
175   sljit_u8 notempty;
176   sljit_u8 notempty_atstart;
177 } jit_arguments;
178 
179 typedef struct executable_functions {
180   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
181   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
182   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183   PUBL(jit_callback) callback;
184   void *userdata;
185   sljit_u32 top_bracket;
186   sljit_u32 limit_match;
187 } executable_functions;
188 
189 typedef struct jump_list {
190   struct sljit_jump *jump;
191   struct jump_list *next;
192 } jump_list;
193 
194 typedef struct stub_list {
195   struct sljit_jump *start;
196   struct sljit_label *quit;
197   struct stub_list *next;
198 } stub_list;
199 
200 typedef struct label_addr_list {
201   struct sljit_label *label;
202   sljit_uw *update_addr;
203   struct label_addr_list *next;
204 } label_addr_list;
205 
206 enum frame_types {
207   no_frame = -1,
208   no_stack = -2
209 };
210 
211 enum control_types {
212   type_mark = 0,
213   type_then_trap = 1
214 };
215 
216 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
217 
218 /* The following structure is the key data type for the recursive
219 code generator. It is allocated by compile_matchingpath, and contains
220 the arguments for compile_backtrackingpath. Must be the first member
221 of its descendants. */
222 typedef struct backtrack_common {
223   /* Concatenation stack. */
224   struct backtrack_common *prev;
225   jump_list *nextbacktracks;
226   /* Internal stack (for component operators). */
227   struct backtrack_common *top;
228   jump_list *topbacktracks;
229   /* Opcode pointer. */
230   pcre_uchar *cc;
231 } backtrack_common;
232 
233 typedef struct assert_backtrack {
234   backtrack_common common;
235   jump_list *condfailed;
236   /* Less than 0 if a frame is not needed. */
237   int framesize;
238   /* Points to our private memory word on the stack. */
239   int private_data_ptr;
240   /* For iterators. */
241   struct sljit_label *matchingpath;
242 } assert_backtrack;
243 
244 typedef struct bracket_backtrack {
245   backtrack_common common;
246   /* Where to coninue if an alternative is successfully matched. */
247   struct sljit_label *alternative_matchingpath;
248   /* For rmin and rmax iterators. */
249   struct sljit_label *recursive_matchingpath;
250   /* For greedy ? operator. */
251   struct sljit_label *zero_matchingpath;
252   /* Contains the branches of a failed condition. */
253   union {
254     /* Both for OP_COND, OP_SCOND. */
255     jump_list *condfailed;
256     assert_backtrack *assert;
257     /* For OP_ONCE. Less than 0 if not needed. */
258     int framesize;
259   } u;
260   /* Points to our private memory word on the stack. */
261   int private_data_ptr;
262 } bracket_backtrack;
263 
264 typedef struct bracketpos_backtrack {
265   backtrack_common common;
266   /* Points to our private memory word on the stack. */
267   int private_data_ptr;
268   /* Reverting stack is needed. */
269   int framesize;
270   /* Allocated stack size. */
271   int stacksize;
272 } bracketpos_backtrack;
273 
274 typedef struct braminzero_backtrack {
275   backtrack_common common;
276   struct sljit_label *matchingpath;
277 } braminzero_backtrack;
278 
279 typedef struct char_iterator_backtrack {
280   backtrack_common common;
281   /* Next iteration. */
282   struct sljit_label *matchingpath;
283   union {
284     jump_list *backtracks;
285     struct {
286       unsigned int othercasebit;
287       pcre_uchar chr;
288       BOOL enabled;
289     } charpos;
290   } u;
291 } char_iterator_backtrack;
292 
293 typedef struct ref_iterator_backtrack {
294   backtrack_common common;
295   /* Next iteration. */
296   struct sljit_label *matchingpath;
297 } ref_iterator_backtrack;
298 
299 typedef struct recurse_entry {
300   struct recurse_entry *next;
301   /* Contains the function entry. */
302   struct sljit_label *entry;
303   /* Collects the calls until the function is not created. */
304   jump_list *calls;
305   /* Points to the starting opcode. */
306   sljit_sw start;
307 } recurse_entry;
308 
309 typedef struct recurse_backtrack {
310   backtrack_common common;
311   BOOL inlined_pattern;
312 } recurse_backtrack;
313 
314 #define OP_THEN_TRAP OP_TABLE_LENGTH
315 
316 typedef struct then_trap_backtrack {
317   backtrack_common common;
318   /* If then_trap is not NULL, this structure contains the real
319   then_trap for the backtracking path. */
320   struct then_trap_backtrack *then_trap;
321   /* Points to the starting opcode. */
322   sljit_sw start;
323   /* Exit point for the then opcodes of this alternative. */
324   jump_list *quit;
325   /* Frame size of the current alternative. */
326   int framesize;
327 } then_trap_backtrack;
328 
329 #define MAX_RANGE_SIZE 4
330 
331 typedef struct compiler_common {
332   /* The sljit ceneric compiler. */
333   struct sljit_compiler *compiler;
334   /* First byte code. */
335   pcre_uchar *start;
336   /* Maps private data offset to each opcode. */
337   sljit_s32 *private_data_ptrs;
338   /* Chain list of read-only data ptrs. */
339   void *read_only_data_head;
340   /* Tells whether the capturing bracket is optimized. */
341   sljit_u8 *optimized_cbracket;
342   /* Tells whether the starting offset is a target of then. */
343   sljit_u8 *then_offsets;
344   /* Current position where a THEN must jump. */
345   then_trap_backtrack *then_trap;
346   /* Starting offset of private data for capturing brackets. */
347   sljit_s32 cbra_ptr;
348   /* Output vector starting point. Must be divisible by 2. */
349   sljit_s32 ovector_start;
350   /* Points to the starting character of the current match. */
351   sljit_s32 start_ptr;
352   /* Last known position of the requested byte. */
353   sljit_s32 req_char_ptr;
354   /* Head of the last recursion. */
355   sljit_s32 recursive_head_ptr;
356   /* First inspected character for partial matching.
357      (Needed for avoiding zero length partial matches.) */
358   sljit_s32 start_used_ptr;
359   /* Starting pointer for partial soft matches. */
360   sljit_s32 hit_start;
361   /* Pointer of the match end position. */
362   sljit_s32 match_end_ptr;
363   /* Points to the marked string. */
364   sljit_s32 mark_ptr;
365   /* Recursive control verb management chain. */
366   sljit_s32 control_head_ptr;
367   /* Points to the last matched capture block index. */
368   sljit_s32 capture_last_ptr;
369   /* Fast forward skipping byte code pointer. */
370   pcre_uchar *fast_forward_bc_ptr;
371   /* Locals used by fast fail optimization. */
372   sljit_s32 fast_fail_start_ptr;
373   sljit_s32 fast_fail_end_ptr;
374 
375   /* Flipped and lower case tables. */
376   const sljit_u8 *fcc;
377   sljit_sw lcc;
378   /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
379   int mode;
380   /* TRUE, when minlength is greater than 0. */
381   BOOL might_be_empty;
382   /* \K is found in the pattern. */
383   BOOL has_set_som;
384   /* (*SKIP:arg) is found in the pattern. */
385   BOOL has_skip_arg;
386   /* (*THEN) is found in the pattern. */
387   BOOL has_then;
388   /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
389   BOOL has_skip_in_assert_back;
390   /* Currently in recurse or negative assert. */
391   BOOL local_exit;
392   /* Currently in a positive assert. */
393   BOOL positive_assert;
394   /* Newline control. */
395   int nltype;
396   sljit_u32 nlmax;
397   sljit_u32 nlmin;
398   int newline;
399   int bsr_nltype;
400   sljit_u32 bsr_nlmax;
401   sljit_u32 bsr_nlmin;
402   /* Dollar endonly. */
403   int endonly;
404   /* Tables. */
405   sljit_sw ctypes;
406   /* Named capturing brackets. */
407   pcre_uchar *name_table;
408   sljit_sw name_count;
409   sljit_sw name_entry_size;
410 
411   /* Labels and jump lists. */
412   struct sljit_label *partialmatchlabel;
413   struct sljit_label *quit_label;
414   struct sljit_label *forced_quit_label;
415   struct sljit_label *accept_label;
416   struct sljit_label *ff_newline_shortcut;
417   stub_list *stubs;
418   label_addr_list *label_addrs;
419   recurse_entry *entries;
420   recurse_entry *currententry;
421   jump_list *partialmatch;
422   jump_list *quit;
423   jump_list *positive_assert_quit;
424   jump_list *forced_quit;
425   jump_list *accept;
426   jump_list *calllimit;
427   jump_list *stackalloc;
428   jump_list *revertframes;
429   jump_list *wordboundary;
430   jump_list *anynewline;
431   jump_list *hspace;
432   jump_list *vspace;
433   jump_list *casefulcmp;
434   jump_list *caselesscmp;
435   jump_list *reset_match;
436   BOOL jscript_compat;
437 #ifdef SUPPORT_UTF
438   BOOL utf;
439 #ifdef SUPPORT_UCP
440   BOOL use_ucp;
441   jump_list *getucd;
442 #endif
443 #ifdef COMPILE_PCRE8
444   jump_list *utfreadchar;
445   jump_list *utfreadchar16;
446   jump_list *utfreadtype8;
447 #endif
448 #endif /* SUPPORT_UTF */
449 } compiler_common;
450 
451 /* For byte_sequence_compare. */
452 
453 typedef struct compare_context {
454   int length;
455   int sourcereg;
456 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
457   int ucharptr;
458   union {
459     sljit_s32 asint;
460     sljit_u16 asushort;
461 #if defined COMPILE_PCRE8
462     sljit_u8 asbyte;
463     sljit_u8 asuchars[4];
464 #elif defined COMPILE_PCRE16
465     sljit_u16 asuchars[2];
466 #elif defined COMPILE_PCRE32
467     sljit_u32 asuchars[1];
468 #endif
469   } c;
470   union {
471     sljit_s32 asint;
472     sljit_u16 asushort;
473 #if defined COMPILE_PCRE8
474     sljit_u8 asbyte;
475     sljit_u8 asuchars[4];
476 #elif defined COMPILE_PCRE16
477     sljit_u16 asuchars[2];
478 #elif defined COMPILE_PCRE32
479     sljit_u32 asuchars[1];
480 #endif
481   } oc;
482 #endif
483 } compare_context;
484 
485 /* Undefine sljit macros. */
486 #undef CMP
487 
488 /* Used for accessing the elements of the stack. */
489 #define STACK(i)      ((i) * (int)sizeof(sljit_sw))
490 
491 #ifdef SLJIT_PREF_SHIFT_REG
492 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
493 /* Nothing. */
494 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
495 #define SHIFT_REG_IS_R3
496 #else
497 #error "Unsupported shift register"
498 #endif
499 #endif
500 
501 #define TMP1          SLJIT_R0
502 #ifdef SHIFT_REG_IS_R3
503 #define TMP2          SLJIT_R3
504 #define TMP3          SLJIT_R2
505 #else
506 #define TMP2          SLJIT_R2
507 #define TMP3          SLJIT_R3
508 #endif
509 #define STR_PTR       SLJIT_S0
510 #define STR_END       SLJIT_S1
511 #define STACK_TOP     SLJIT_R1
512 #define STACK_LIMIT   SLJIT_S2
513 #define COUNT_MATCH   SLJIT_S3
514 #define ARGUMENTS     SLJIT_S4
515 #define RETURN_ADDR   SLJIT_R4
516 
517 /* Local space layout. */
518 /* These two locals can be used by the current opcode. */
519 #define LOCALS0          (0 * sizeof(sljit_sw))
520 #define LOCALS1          (1 * sizeof(sljit_sw))
521 /* Two local variables for possessive quantifiers (char1 cannot use them). */
522 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
523 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
524 /* Max limit of recursions. */
525 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
526 /* The output vector is stored on the stack, and contains pointers
527 to characters. The vector data is divided into two groups: the first
528 group contains the start / end character pointers, and the second is
529 the start pointers when the end of the capturing group has not yet reached. */
530 #define OVECTOR_START    (common->ovector_start)
531 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
533 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
534 
535 #if defined COMPILE_PCRE8
536 #define MOV_UCHAR  SLJIT_MOV_U8
537 #elif defined COMPILE_PCRE16
538 #define MOV_UCHAR  SLJIT_MOV_U16
539 #elif defined COMPILE_PCRE32
540 #define MOV_UCHAR  SLJIT_MOV_U32
541 #else
542 #error Unsupported compiling mode
543 #endif
544 
545 /* Shortcuts. */
546 #define DEFINE_COMPILER \
547   struct sljit_compiler *compiler = common->compiler
548 #define OP1(op, dst, dstw, src, srcw) \
549   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
550 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
551   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
552 #define LABEL() \
553   sljit_emit_label(compiler)
554 #define JUMP(type) \
555   sljit_emit_jump(compiler, (type))
556 #define JUMPTO(type, label) \
557   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
558 #define JUMPHERE(jump) \
559   sljit_set_label((jump), sljit_emit_label(compiler))
560 #define SET_LABEL(jump, label) \
561   sljit_set_label((jump), (label))
562 #define CMP(type, src1, src1w, src2, src2w) \
563   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
564 #define CMPTO(type, src1, src1w, src2, src2w, label) \
565   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
566 #define OP_FLAGS(op, dst, dstw, type) \
567   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
568 #define GET_LOCAL_BASE(dst, dstw, offset) \
569   sljit_get_local_base(compiler, (dst), (dstw), (offset))
570 
571 #define READ_CHAR_MAX 0x7fffffff
572 
573 #define INVALID_UTF_CHAR 888
574 
bracketend(pcre_uchar * cc)575 static pcre_uchar *bracketend(pcre_uchar *cc)
576 {
577 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
578 do cc += GET(cc, 1); while (*cc == OP_ALT);
579 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
580 cc += 1 + LINK_SIZE;
581 return cc;
582 }
583 
no_alternatives(pcre_uchar * cc)584 static int no_alternatives(pcre_uchar *cc)
585 {
586 int count = 0;
587 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
588 do
589   {
590   cc += GET(cc, 1);
591   count++;
592   }
593 while (*cc == OP_ALT);
594 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
595 return count;
596 }
597 
598 /* Functions whose might need modification for all new supported opcodes:
599  next_opcode
600  check_opcode_types
601  set_private_data_ptrs
602  get_framesize
603  init_frame
604  get_private_data_copy_length
605  copy_private_data
606  compile_matchingpath
607  compile_backtrackingpath
608 */
609 
next_opcode(compiler_common * common,pcre_uchar * cc)610 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
611 {
612 SLJIT_UNUSED_ARG(common);
613 switch(*cc)
614   {
615   case OP_SOD:
616   case OP_SOM:
617   case OP_SET_SOM:
618   case OP_NOT_WORD_BOUNDARY:
619   case OP_WORD_BOUNDARY:
620   case OP_NOT_DIGIT:
621   case OP_DIGIT:
622   case OP_NOT_WHITESPACE:
623   case OP_WHITESPACE:
624   case OP_NOT_WORDCHAR:
625   case OP_WORDCHAR:
626   case OP_ANY:
627   case OP_ALLANY:
628   case OP_NOTPROP:
629   case OP_PROP:
630   case OP_ANYNL:
631   case OP_NOT_HSPACE:
632   case OP_HSPACE:
633   case OP_NOT_VSPACE:
634   case OP_VSPACE:
635   case OP_EXTUNI:
636   case OP_EODN:
637   case OP_EOD:
638   case OP_CIRC:
639   case OP_CIRCM:
640   case OP_DOLL:
641   case OP_DOLLM:
642   case OP_CRSTAR:
643   case OP_CRMINSTAR:
644   case OP_CRPLUS:
645   case OP_CRMINPLUS:
646   case OP_CRQUERY:
647   case OP_CRMINQUERY:
648   case OP_CRRANGE:
649   case OP_CRMINRANGE:
650   case OP_CRPOSSTAR:
651   case OP_CRPOSPLUS:
652   case OP_CRPOSQUERY:
653   case OP_CRPOSRANGE:
654   case OP_CLASS:
655   case OP_NCLASS:
656   case OP_REF:
657   case OP_REFI:
658   case OP_DNREF:
659   case OP_DNREFI:
660   case OP_RECURSE:
661   case OP_CALLOUT:
662   case OP_ALT:
663   case OP_KET:
664   case OP_KETRMAX:
665   case OP_KETRMIN:
666   case OP_KETRPOS:
667   case OP_REVERSE:
668   case OP_ASSERT:
669   case OP_ASSERT_NOT:
670   case OP_ASSERTBACK:
671   case OP_ASSERTBACK_NOT:
672   case OP_ONCE:
673   case OP_ONCE_NC:
674   case OP_BRA:
675   case OP_BRAPOS:
676   case OP_CBRA:
677   case OP_CBRAPOS:
678   case OP_COND:
679   case OP_SBRA:
680   case OP_SBRAPOS:
681   case OP_SCBRA:
682   case OP_SCBRAPOS:
683   case OP_SCOND:
684   case OP_CREF:
685   case OP_DNCREF:
686   case OP_RREF:
687   case OP_DNRREF:
688   case OP_DEF:
689   case OP_BRAZERO:
690   case OP_BRAMINZERO:
691   case OP_BRAPOSZERO:
692   case OP_PRUNE:
693   case OP_SKIP:
694   case OP_THEN:
695   case OP_COMMIT:
696   case OP_FAIL:
697   case OP_ACCEPT:
698   case OP_ASSERT_ACCEPT:
699   case OP_CLOSE:
700   case OP_SKIPZERO:
701   return cc + PRIV(OP_lengths)[*cc];
702 
703   case OP_CHAR:
704   case OP_CHARI:
705   case OP_NOT:
706   case OP_NOTI:
707   case OP_STAR:
708   case OP_MINSTAR:
709   case OP_PLUS:
710   case OP_MINPLUS:
711   case OP_QUERY:
712   case OP_MINQUERY:
713   case OP_UPTO:
714   case OP_MINUPTO:
715   case OP_EXACT:
716   case OP_POSSTAR:
717   case OP_POSPLUS:
718   case OP_POSQUERY:
719   case OP_POSUPTO:
720   case OP_STARI:
721   case OP_MINSTARI:
722   case OP_PLUSI:
723   case OP_MINPLUSI:
724   case OP_QUERYI:
725   case OP_MINQUERYI:
726   case OP_UPTOI:
727   case OP_MINUPTOI:
728   case OP_EXACTI:
729   case OP_POSSTARI:
730   case OP_POSPLUSI:
731   case OP_POSQUERYI:
732   case OP_POSUPTOI:
733   case OP_NOTSTAR:
734   case OP_NOTMINSTAR:
735   case OP_NOTPLUS:
736   case OP_NOTMINPLUS:
737   case OP_NOTQUERY:
738   case OP_NOTMINQUERY:
739   case OP_NOTUPTO:
740   case OP_NOTMINUPTO:
741   case OP_NOTEXACT:
742   case OP_NOTPOSSTAR:
743   case OP_NOTPOSPLUS:
744   case OP_NOTPOSQUERY:
745   case OP_NOTPOSUPTO:
746   case OP_NOTSTARI:
747   case OP_NOTMINSTARI:
748   case OP_NOTPLUSI:
749   case OP_NOTMINPLUSI:
750   case OP_NOTQUERYI:
751   case OP_NOTMINQUERYI:
752   case OP_NOTUPTOI:
753   case OP_NOTMINUPTOI:
754   case OP_NOTEXACTI:
755   case OP_NOTPOSSTARI:
756   case OP_NOTPOSPLUSI:
757   case OP_NOTPOSQUERYI:
758   case OP_NOTPOSUPTOI:
759   cc += PRIV(OP_lengths)[*cc];
760 #ifdef SUPPORT_UTF
761   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
762 #endif
763   return cc;
764 
765   /* Special cases. */
766   case OP_TYPESTAR:
767   case OP_TYPEMINSTAR:
768   case OP_TYPEPLUS:
769   case OP_TYPEMINPLUS:
770   case OP_TYPEQUERY:
771   case OP_TYPEMINQUERY:
772   case OP_TYPEUPTO:
773   case OP_TYPEMINUPTO:
774   case OP_TYPEEXACT:
775   case OP_TYPEPOSSTAR:
776   case OP_TYPEPOSPLUS:
777   case OP_TYPEPOSQUERY:
778   case OP_TYPEPOSUPTO:
779   return cc + PRIV(OP_lengths)[*cc] - 1;
780 
781   case OP_ANYBYTE:
782 #ifdef SUPPORT_UTF
783   if (common->utf) return NULL;
784 #endif
785   return cc + 1;
786 
787 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
788   case OP_XCLASS:
789   return cc + GET(cc, 1);
790 #endif
791 
792   case OP_MARK:
793   case OP_PRUNE_ARG:
794   case OP_SKIP_ARG:
795   case OP_THEN_ARG:
796   return cc + 1 + 2 + cc[1];
797 
798   default:
799   /* All opcodes are supported now! */
800   SLJIT_UNREACHABLE();
801   return NULL;
802   }
803 }
804 
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)805 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
806 {
807 int count;
808 pcre_uchar *slot;
809 pcre_uchar *assert_back_end = cc - 1;
810 
811 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
812 while (cc < ccend)
813   {
814   switch(*cc)
815     {
816     case OP_SET_SOM:
817     common->has_set_som = TRUE;
818     common->might_be_empty = TRUE;
819     cc += 1;
820     break;
821 
822     case OP_REF:
823     case OP_REFI:
824     common->optimized_cbracket[GET2(cc, 1)] = 0;
825     cc += 1 + IMM2_SIZE;
826     break;
827 
828     case OP_CBRAPOS:
829     case OP_SCBRAPOS:
830     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
831     cc += 1 + LINK_SIZE + IMM2_SIZE;
832     break;
833 
834     case OP_COND:
835     case OP_SCOND:
836     /* Only AUTO_CALLOUT can insert this opcode. We do
837        not intend to support this case. */
838     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
839       return FALSE;
840     cc += 1 + LINK_SIZE;
841     break;
842 
843     case OP_CREF:
844     common->optimized_cbracket[GET2(cc, 1)] = 0;
845     cc += 1 + IMM2_SIZE;
846     break;
847 
848     case OP_DNREF:
849     case OP_DNREFI:
850     case OP_DNCREF:
851     count = GET2(cc, 1 + IMM2_SIZE);
852     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
853     while (count-- > 0)
854       {
855       common->optimized_cbracket[GET2(slot, 0)] = 0;
856       slot += common->name_entry_size;
857       }
858     cc += 1 + 2 * IMM2_SIZE;
859     break;
860 
861     case OP_RECURSE:
862     /* Set its value only once. */
863     if (common->recursive_head_ptr == 0)
864       {
865       common->recursive_head_ptr = common->ovector_start;
866       common->ovector_start += sizeof(sljit_sw);
867       }
868     cc += 1 + LINK_SIZE;
869     break;
870 
871     case OP_CALLOUT:
872     if (common->capture_last_ptr == 0)
873       {
874       common->capture_last_ptr = common->ovector_start;
875       common->ovector_start += sizeof(sljit_sw);
876       }
877     cc += 2 + 2 * LINK_SIZE;
878     break;
879 
880     case OP_ASSERTBACK:
881     slot = bracketend(cc);
882     if (slot > assert_back_end)
883       assert_back_end = slot;
884     cc += 1 + LINK_SIZE;
885     break;
886 
887     case OP_THEN_ARG:
888     common->has_then = TRUE;
889     common->control_head_ptr = 1;
890     /* Fall through. */
891 
892     case OP_PRUNE_ARG:
893     case OP_MARK:
894     if (common->mark_ptr == 0)
895       {
896       common->mark_ptr = common->ovector_start;
897       common->ovector_start += sizeof(sljit_sw);
898       }
899     cc += 1 + 2 + cc[1];
900     break;
901 
902     case OP_THEN:
903     common->has_then = TRUE;
904     common->control_head_ptr = 1;
905     cc += 1;
906     break;
907 
908     case OP_SKIP:
909     if (cc < assert_back_end)
910       common->has_skip_in_assert_back = TRUE;
911     cc += 1;
912     break;
913 
914     case OP_SKIP_ARG:
915     common->control_head_ptr = 1;
916     common->has_skip_arg = TRUE;
917     if (cc < assert_back_end)
918       common->has_skip_in_assert_back = TRUE;
919     cc += 1 + 2 + cc[1];
920     break;
921 
922     default:
923     cc = next_opcode(common, cc);
924     if (cc == NULL)
925       return FALSE;
926     break;
927     }
928   }
929 return TRUE;
930 }
931 
is_accelerated_repeat(pcre_uchar * cc)932 static BOOL is_accelerated_repeat(pcre_uchar *cc)
933 {
934 switch(*cc)
935   {
936   case OP_TYPESTAR:
937   case OP_TYPEMINSTAR:
938   case OP_TYPEPLUS:
939   case OP_TYPEMINPLUS:
940   case OP_TYPEPOSSTAR:
941   case OP_TYPEPOSPLUS:
942   return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
943 
944   case OP_STAR:
945   case OP_MINSTAR:
946   case OP_PLUS:
947   case OP_MINPLUS:
948   case OP_POSSTAR:
949   case OP_POSPLUS:
950 
951   case OP_STARI:
952   case OP_MINSTARI:
953   case OP_PLUSI:
954   case OP_MINPLUSI:
955   case OP_POSSTARI:
956   case OP_POSPLUSI:
957 
958   case OP_NOTSTAR:
959   case OP_NOTMINSTAR:
960   case OP_NOTPLUS:
961   case OP_NOTMINPLUS:
962   case OP_NOTPOSSTAR:
963   case OP_NOTPOSPLUS:
964 
965   case OP_NOTSTARI:
966   case OP_NOTMINSTARI:
967   case OP_NOTPLUSI:
968   case OP_NOTMINPLUSI:
969   case OP_NOTPOSSTARI:
970   case OP_NOTPOSPLUSI:
971   return TRUE;
972 
973   case OP_CLASS:
974   case OP_NCLASS:
975 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
976   case OP_XCLASS:
977   cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
978 #else
979   cc += (1 + (32 / sizeof(pcre_uchar)));
980 #endif
981 
982   switch(*cc)
983     {
984     case OP_CRSTAR:
985     case OP_CRMINSTAR:
986     case OP_CRPLUS:
987     case OP_CRMINPLUS:
988     case OP_CRPOSSTAR:
989     case OP_CRPOSPLUS:
990     return TRUE;
991     }
992   break;
993   }
994 return FALSE;
995 }
996 
detect_fast_forward_skip(compiler_common * common,int * private_data_start)997 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
998 {
999 pcre_uchar *cc = common->start;
1000 pcre_uchar *end;
1001 
1002 /* Skip not repeated brackets. */
1003 while (TRUE)
1004   {
1005   switch(*cc)
1006     {
1007     case OP_SOD:
1008     case OP_SOM:
1009     case OP_SET_SOM:
1010     case OP_NOT_WORD_BOUNDARY:
1011     case OP_WORD_BOUNDARY:
1012     case OP_EODN:
1013     case OP_EOD:
1014     case OP_CIRC:
1015     case OP_CIRCM:
1016     case OP_DOLL:
1017     case OP_DOLLM:
1018     /* Zero width assertions. */
1019     cc++;
1020     continue;
1021     }
1022 
1023   if (*cc != OP_BRA && *cc != OP_CBRA)
1024     break;
1025 
1026   end = cc + GET(cc, 1);
1027   if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1028     return FALSE;
1029   if (*cc == OP_CBRA)
1030     {
1031     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1032       return FALSE;
1033     cc += IMM2_SIZE;
1034     }
1035   cc += 1 + LINK_SIZE;
1036   }
1037 
1038 if (is_accelerated_repeat(cc))
1039   {
1040   common->fast_forward_bc_ptr = cc;
1041   common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1042   *private_data_start += sizeof(sljit_sw);
1043   return TRUE;
1044   }
1045 return FALSE;
1046 }
1047 
detect_fast_fail(compiler_common * common,pcre_uchar * cc,int * private_data_start,sljit_s32 depth)1048 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1049 {
1050   pcre_uchar *next_alt;
1051 
1052   SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1053 
1054   if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1055     return;
1056 
1057   next_alt = bracketend(cc) - (1 + LINK_SIZE);
1058   if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1059     return;
1060 
1061   do
1062     {
1063     next_alt = cc + GET(cc, 1);
1064 
1065     cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1066 
1067     while (TRUE)
1068       {
1069       switch(*cc)
1070         {
1071         case OP_SOD:
1072         case OP_SOM:
1073         case OP_SET_SOM:
1074         case OP_NOT_WORD_BOUNDARY:
1075         case OP_WORD_BOUNDARY:
1076         case OP_EODN:
1077         case OP_EOD:
1078         case OP_CIRC:
1079         case OP_CIRCM:
1080         case OP_DOLL:
1081         case OP_DOLLM:
1082         /* Zero width assertions. */
1083         cc++;
1084         continue;
1085         }
1086       break;
1087       }
1088 
1089     if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1090       detect_fast_fail(common, cc, private_data_start, depth - 1);
1091 
1092     if (is_accelerated_repeat(cc))
1093       {
1094       common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1095 
1096       if (common->fast_fail_start_ptr == 0)
1097         common->fast_fail_start_ptr = *private_data_start;
1098 
1099       *private_data_start += sizeof(sljit_sw);
1100       common->fast_fail_end_ptr = *private_data_start;
1101 
1102       if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1103         return;
1104       }
1105 
1106     cc = next_alt;
1107     }
1108   while (*cc == OP_ALT);
1109 }
1110 
get_class_iterator_size(pcre_uchar * cc)1111 static int get_class_iterator_size(pcre_uchar *cc)
1112 {
1113 sljit_u32 min;
1114 sljit_u32 max;
1115 switch(*cc)
1116   {
1117   case OP_CRSTAR:
1118   case OP_CRPLUS:
1119   return 2;
1120 
1121   case OP_CRMINSTAR:
1122   case OP_CRMINPLUS:
1123   case OP_CRQUERY:
1124   case OP_CRMINQUERY:
1125   return 1;
1126 
1127   case OP_CRRANGE:
1128   case OP_CRMINRANGE:
1129   min = GET2(cc, 1);
1130   max = GET2(cc, 1 + IMM2_SIZE);
1131   if (max == 0)
1132     return (*cc == OP_CRRANGE) ? 2 : 1;
1133   max -= min;
1134   if (max > 2)
1135     max = 2;
1136   return max;
1137 
1138   default:
1139   return 0;
1140   }
1141 }
1142 
detect_repeat(compiler_common * common,pcre_uchar * begin)1143 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1144 {
1145 pcre_uchar *end = bracketend(begin);
1146 pcre_uchar *next;
1147 pcre_uchar *next_end;
1148 pcre_uchar *max_end;
1149 pcre_uchar type;
1150 sljit_sw length = end - begin;
1151 int min, max, i;
1152 
1153 /* Detect fixed iterations first. */
1154 if (end[-(1 + LINK_SIZE)] != OP_KET)
1155   return FALSE;
1156 
1157 /* Already detected repeat. */
1158 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1159   return TRUE;
1160 
1161 next = end;
1162 min = 1;
1163 while (1)
1164   {
1165   if (*next != *begin)
1166     break;
1167   next_end = bracketend(next);
1168   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1169     break;
1170   next = next_end;
1171   min++;
1172   }
1173 
1174 if (min == 2)
1175   return FALSE;
1176 
1177 max = 0;
1178 max_end = next;
1179 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1180   {
1181   type = *next;
1182   while (1)
1183     {
1184     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1185       break;
1186     next_end = bracketend(next + 2 + LINK_SIZE);
1187     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1188       break;
1189     next = next_end;
1190     max++;
1191     }
1192 
1193   if (next[0] == type && next[1] == *begin && max >= 1)
1194     {
1195     next_end = bracketend(next + 1);
1196     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1197       {
1198       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1199         if (*next_end != OP_KET)
1200           break;
1201 
1202       if (i == max)
1203         {
1204         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1205         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1206         /* +2 the original and the last. */
1207         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1208         if (min == 1)
1209           return TRUE;
1210         min--;
1211         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1212         }
1213       }
1214     }
1215   }
1216 
1217 if (min >= 3)
1218   {
1219   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1220   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1221   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1222   return TRUE;
1223   }
1224 
1225 return FALSE;
1226 }
1227 
1228 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1229     case OP_MINSTAR: \
1230     case OP_MINPLUS: \
1231     case OP_QUERY: \
1232     case OP_MINQUERY: \
1233     case OP_MINSTARI: \
1234     case OP_MINPLUSI: \
1235     case OP_QUERYI: \
1236     case OP_MINQUERYI: \
1237     case OP_NOTMINSTAR: \
1238     case OP_NOTMINPLUS: \
1239     case OP_NOTQUERY: \
1240     case OP_NOTMINQUERY: \
1241     case OP_NOTMINSTARI: \
1242     case OP_NOTMINPLUSI: \
1243     case OP_NOTQUERYI: \
1244     case OP_NOTMINQUERYI:
1245 
1246 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1247     case OP_STAR: \
1248     case OP_PLUS: \
1249     case OP_STARI: \
1250     case OP_PLUSI: \
1251     case OP_NOTSTAR: \
1252     case OP_NOTPLUS: \
1253     case OP_NOTSTARI: \
1254     case OP_NOTPLUSI:
1255 
1256 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1257     case OP_UPTO: \
1258     case OP_MINUPTO: \
1259     case OP_UPTOI: \
1260     case OP_MINUPTOI: \
1261     case OP_NOTUPTO: \
1262     case OP_NOTMINUPTO: \
1263     case OP_NOTUPTOI: \
1264     case OP_NOTMINUPTOI:
1265 
1266 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1267     case OP_TYPEMINSTAR: \
1268     case OP_TYPEMINPLUS: \
1269     case OP_TYPEQUERY: \
1270     case OP_TYPEMINQUERY:
1271 
1272 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1273     case OP_TYPESTAR: \
1274     case OP_TYPEPLUS:
1275 
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1277     case OP_TYPEUPTO: \
1278     case OP_TYPEMINUPTO:
1279 
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1280 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1281 {
1282 pcre_uchar *cc = common->start;
1283 pcre_uchar *alternative;
1284 pcre_uchar *end = NULL;
1285 int private_data_ptr = *private_data_start;
1286 int space, size, bracketlen;
1287 BOOL repeat_check = TRUE;
1288 
1289 while (cc < ccend)
1290   {
1291   space = 0;
1292   size = 0;
1293   bracketlen = 0;
1294   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1295     break;
1296 
1297   if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1298     {
1299     if (detect_repeat(common, cc))
1300       {
1301       /* These brackets are converted to repeats, so no global
1302       based single character repeat is allowed. */
1303       if (cc >= end)
1304         end = bracketend(cc);
1305       }
1306     }
1307   repeat_check = TRUE;
1308 
1309   switch(*cc)
1310     {
1311     case OP_KET:
1312     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1313       {
1314       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1315       private_data_ptr += sizeof(sljit_sw);
1316       cc += common->private_data_ptrs[cc + 1 - common->start];
1317       }
1318     cc += 1 + LINK_SIZE;
1319     break;
1320 
1321     case OP_ASSERT:
1322     case OP_ASSERT_NOT:
1323     case OP_ASSERTBACK:
1324     case OP_ASSERTBACK_NOT:
1325     case OP_ONCE:
1326     case OP_ONCE_NC:
1327     case OP_BRAPOS:
1328     case OP_SBRA:
1329     case OP_SBRAPOS:
1330     case OP_SCOND:
1331     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1332     private_data_ptr += sizeof(sljit_sw);
1333     bracketlen = 1 + LINK_SIZE;
1334     break;
1335 
1336     case OP_CBRAPOS:
1337     case OP_SCBRAPOS:
1338     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1339     private_data_ptr += sizeof(sljit_sw);
1340     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1341     break;
1342 
1343     case OP_COND:
1344     /* Might be a hidden SCOND. */
1345     alternative = cc + GET(cc, 1);
1346     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1347       {
1348       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349       private_data_ptr += sizeof(sljit_sw);
1350       }
1351     bracketlen = 1 + LINK_SIZE;
1352     break;
1353 
1354     case OP_BRA:
1355     bracketlen = 1 + LINK_SIZE;
1356     break;
1357 
1358     case OP_CBRA:
1359     case OP_SCBRA:
1360     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1361     break;
1362 
1363     case OP_BRAZERO:
1364     case OP_BRAMINZERO:
1365     case OP_BRAPOSZERO:
1366     repeat_check = FALSE;
1367     size = 1;
1368     break;
1369 
1370     CASE_ITERATOR_PRIVATE_DATA_1
1371     space = 1;
1372     size = -2;
1373     break;
1374 
1375     CASE_ITERATOR_PRIVATE_DATA_2A
1376     space = 2;
1377     size = -2;
1378     break;
1379 
1380     CASE_ITERATOR_PRIVATE_DATA_2B
1381     space = 2;
1382     size = -(2 + IMM2_SIZE);
1383     break;
1384 
1385     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1386     space = 1;
1387     size = 1;
1388     break;
1389 
1390     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1391     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1392       space = 2;
1393     size = 1;
1394     break;
1395 
1396     case OP_TYPEUPTO:
1397     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1398       space = 2;
1399     size = 1 + IMM2_SIZE;
1400     break;
1401 
1402     case OP_TYPEMINUPTO:
1403     space = 2;
1404     size = 1 + IMM2_SIZE;
1405     break;
1406 
1407     case OP_CLASS:
1408     case OP_NCLASS:
1409     space = get_class_iterator_size(cc + size);
1410     size = 1 + 32 / sizeof(pcre_uchar);
1411     break;
1412 
1413 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1414     case OP_XCLASS:
1415     space = get_class_iterator_size(cc + size);
1416     size = GET(cc, 1);
1417     break;
1418 #endif
1419 
1420     default:
1421     cc = next_opcode(common, cc);
1422     SLJIT_ASSERT(cc != NULL);
1423     break;
1424     }
1425 
1426   /* Character iterators, which are not inside a repeated bracket,
1427      gets a private slot instead of allocating it on the stack. */
1428   if (space > 0 && cc >= end)
1429     {
1430     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1431     private_data_ptr += sizeof(sljit_sw) * space;
1432     }
1433 
1434   if (size != 0)
1435     {
1436     if (size < 0)
1437       {
1438       cc += -size;
1439 #ifdef SUPPORT_UTF
1440       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1441 #endif
1442       }
1443     else
1444       cc += size;
1445     }
1446 
1447   if (bracketlen > 0)
1448     {
1449     if (cc >= end)
1450       {
1451       end = bracketend(cc);
1452       if (end[-1 - LINK_SIZE] == OP_KET)
1453         end = NULL;
1454       }
1455     cc += bracketlen;
1456     }
1457   }
1458 *private_data_start = private_data_ptr;
1459 }
1460 
1461 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1462 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1463 {
1464 int length = 0;
1465 int possessive = 0;
1466 BOOL stack_restore = FALSE;
1467 BOOL setsom_found = recursive;
1468 BOOL setmark_found = recursive;
1469 /* The last capture is a local variable even for recursions. */
1470 BOOL capture_last_found = FALSE;
1471 
1472 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1473 SLJIT_ASSERT(common->control_head_ptr != 0);
1474 *needs_control_head = TRUE;
1475 #else
1476 *needs_control_head = FALSE;
1477 #endif
1478 
1479 if (ccend == NULL)
1480   {
1481   ccend = bracketend(cc) - (1 + LINK_SIZE);
1482   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1483     {
1484     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1485     /* This is correct regardless of common->capture_last_ptr. */
1486     capture_last_found = TRUE;
1487     }
1488   cc = next_opcode(common, cc);
1489   }
1490 
1491 SLJIT_ASSERT(cc != NULL);
1492 while (cc < ccend)
1493   switch(*cc)
1494     {
1495     case OP_SET_SOM:
1496     SLJIT_ASSERT(common->has_set_som);
1497     stack_restore = TRUE;
1498     if (!setsom_found)
1499       {
1500       length += 2;
1501       setsom_found = TRUE;
1502       }
1503     cc += 1;
1504     break;
1505 
1506     case OP_MARK:
1507     case OP_PRUNE_ARG:
1508     case OP_THEN_ARG:
1509     SLJIT_ASSERT(common->mark_ptr != 0);
1510     stack_restore = TRUE;
1511     if (!setmark_found)
1512       {
1513       length += 2;
1514       setmark_found = TRUE;
1515       }
1516     if (common->control_head_ptr != 0)
1517       *needs_control_head = TRUE;
1518     cc += 1 + 2 + cc[1];
1519     break;
1520 
1521     case OP_RECURSE:
1522     stack_restore = TRUE;
1523     if (common->has_set_som && !setsom_found)
1524       {
1525       length += 2;
1526       setsom_found = TRUE;
1527       }
1528     if (common->mark_ptr != 0 && !setmark_found)
1529       {
1530       length += 2;
1531       setmark_found = TRUE;
1532       }
1533     if (common->capture_last_ptr != 0 && !capture_last_found)
1534       {
1535       length += 2;
1536       capture_last_found = TRUE;
1537       }
1538     cc += 1 + LINK_SIZE;
1539     break;
1540 
1541     case OP_CBRA:
1542     case OP_CBRAPOS:
1543     case OP_SCBRA:
1544     case OP_SCBRAPOS:
1545     stack_restore = TRUE;
1546     if (common->capture_last_ptr != 0 && !capture_last_found)
1547       {
1548       length += 2;
1549       capture_last_found = TRUE;
1550       }
1551     length += 3;
1552     cc += 1 + LINK_SIZE + IMM2_SIZE;
1553     break;
1554 
1555     case OP_THEN:
1556     stack_restore = TRUE;
1557     if (common->control_head_ptr != 0)
1558       *needs_control_head = TRUE;
1559     cc ++;
1560     break;
1561 
1562     default:
1563     stack_restore = TRUE;
1564     /* Fall through. */
1565 
1566     case OP_NOT_WORD_BOUNDARY:
1567     case OP_WORD_BOUNDARY:
1568     case OP_NOT_DIGIT:
1569     case OP_DIGIT:
1570     case OP_NOT_WHITESPACE:
1571     case OP_WHITESPACE:
1572     case OP_NOT_WORDCHAR:
1573     case OP_WORDCHAR:
1574     case OP_ANY:
1575     case OP_ALLANY:
1576     case OP_ANYBYTE:
1577     case OP_NOTPROP:
1578     case OP_PROP:
1579     case OP_ANYNL:
1580     case OP_NOT_HSPACE:
1581     case OP_HSPACE:
1582     case OP_NOT_VSPACE:
1583     case OP_VSPACE:
1584     case OP_EXTUNI:
1585     case OP_EODN:
1586     case OP_EOD:
1587     case OP_CIRC:
1588     case OP_CIRCM:
1589     case OP_DOLL:
1590     case OP_DOLLM:
1591     case OP_CHAR:
1592     case OP_CHARI:
1593     case OP_NOT:
1594     case OP_NOTI:
1595 
1596     case OP_EXACT:
1597     case OP_POSSTAR:
1598     case OP_POSPLUS:
1599     case OP_POSQUERY:
1600     case OP_POSUPTO:
1601 
1602     case OP_EXACTI:
1603     case OP_POSSTARI:
1604     case OP_POSPLUSI:
1605     case OP_POSQUERYI:
1606     case OP_POSUPTOI:
1607 
1608     case OP_NOTEXACT:
1609     case OP_NOTPOSSTAR:
1610     case OP_NOTPOSPLUS:
1611     case OP_NOTPOSQUERY:
1612     case OP_NOTPOSUPTO:
1613 
1614     case OP_NOTEXACTI:
1615     case OP_NOTPOSSTARI:
1616     case OP_NOTPOSPLUSI:
1617     case OP_NOTPOSQUERYI:
1618     case OP_NOTPOSUPTOI:
1619 
1620     case OP_TYPEEXACT:
1621     case OP_TYPEPOSSTAR:
1622     case OP_TYPEPOSPLUS:
1623     case OP_TYPEPOSQUERY:
1624     case OP_TYPEPOSUPTO:
1625 
1626     case OP_CLASS:
1627     case OP_NCLASS:
1628     case OP_XCLASS:
1629     case OP_CALLOUT:
1630 
1631     cc = next_opcode(common, cc);
1632     SLJIT_ASSERT(cc != NULL);
1633     break;
1634     }
1635 
1636 /* Possessive quantifiers can use a special case. */
1637 if (SLJIT_UNLIKELY(possessive == length))
1638   return stack_restore ? no_frame : no_stack;
1639 
1640 if (length > 0)
1641   return length + 1;
1642 return stack_restore ? no_frame : no_stack;
1643 }
1644 
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1645 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1646 {
1647 DEFINE_COMPILER;
1648 BOOL setsom_found = recursive;
1649 BOOL setmark_found = recursive;
1650 /* The last capture is a local variable even for recursions. */
1651 BOOL capture_last_found = FALSE;
1652 int offset;
1653 
1654 /* >= 1 + shortest item size (2) */
1655 SLJIT_UNUSED_ARG(stacktop);
1656 SLJIT_ASSERT(stackpos >= stacktop + 2);
1657 
1658 stackpos = STACK(stackpos);
1659 if (ccend == NULL)
1660   {
1661   ccend = bracketend(cc) - (1 + LINK_SIZE);
1662   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1663     cc = next_opcode(common, cc);
1664   }
1665 
1666 SLJIT_ASSERT(cc != NULL);
1667 while (cc < ccend)
1668   switch(*cc)
1669     {
1670     case OP_SET_SOM:
1671     SLJIT_ASSERT(common->has_set_som);
1672     if (!setsom_found)
1673       {
1674       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1675       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1676       stackpos -= (int)sizeof(sljit_sw);
1677       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1678       stackpos -= (int)sizeof(sljit_sw);
1679       setsom_found = TRUE;
1680       }
1681     cc += 1;
1682     break;
1683 
1684     case OP_MARK:
1685     case OP_PRUNE_ARG:
1686     case OP_THEN_ARG:
1687     SLJIT_ASSERT(common->mark_ptr != 0);
1688     if (!setmark_found)
1689       {
1690       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1691       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1692       stackpos -= (int)sizeof(sljit_sw);
1693       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694       stackpos -= (int)sizeof(sljit_sw);
1695       setmark_found = TRUE;
1696       }
1697     cc += 1 + 2 + cc[1];
1698     break;
1699 
1700     case OP_RECURSE:
1701     if (common->has_set_som && !setsom_found)
1702       {
1703       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1704       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1705       stackpos -= (int)sizeof(sljit_sw);
1706       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1707       stackpos -= (int)sizeof(sljit_sw);
1708       setsom_found = TRUE;
1709       }
1710     if (common->mark_ptr != 0 && !setmark_found)
1711       {
1712       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1713       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1714       stackpos -= (int)sizeof(sljit_sw);
1715       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1716       stackpos -= (int)sizeof(sljit_sw);
1717       setmark_found = TRUE;
1718       }
1719     if (common->capture_last_ptr != 0 && !capture_last_found)
1720       {
1721       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1722       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1723       stackpos -= (int)sizeof(sljit_sw);
1724       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1725       stackpos -= (int)sizeof(sljit_sw);
1726       capture_last_found = TRUE;
1727       }
1728     cc += 1 + LINK_SIZE;
1729     break;
1730 
1731     case OP_CBRA:
1732     case OP_CBRAPOS:
1733     case OP_SCBRA:
1734     case OP_SCBRAPOS:
1735     if (common->capture_last_ptr != 0 && !capture_last_found)
1736       {
1737       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1738       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1739       stackpos -= (int)sizeof(sljit_sw);
1740       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1741       stackpos -= (int)sizeof(sljit_sw);
1742       capture_last_found = TRUE;
1743       }
1744     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1745     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1746     stackpos -= (int)sizeof(sljit_sw);
1747     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1748     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1749     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1750     stackpos -= (int)sizeof(sljit_sw);
1751     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1752     stackpos -= (int)sizeof(sljit_sw);
1753 
1754     cc += 1 + LINK_SIZE + IMM2_SIZE;
1755     break;
1756 
1757     default:
1758     cc = next_opcode(common, cc);
1759     SLJIT_ASSERT(cc != NULL);
1760     break;
1761     }
1762 
1763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1764 SLJIT_ASSERT(stackpos == STACK(stacktop));
1765 }
1766 
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1767 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1768 {
1769 int private_data_length = needs_control_head ? 3 : 2;
1770 int size;
1771 pcre_uchar *alternative;
1772 /* Calculate the sum of the private machine words. */
1773 while (cc < ccend)
1774   {
1775   size = 0;
1776   switch(*cc)
1777     {
1778     case OP_KET:
1779     if (PRIVATE_DATA(cc) != 0)
1780       {
1781       private_data_length++;
1782       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1783       cc += PRIVATE_DATA(cc + 1);
1784       }
1785     cc += 1 + LINK_SIZE;
1786     break;
1787 
1788     case OP_ASSERT:
1789     case OP_ASSERT_NOT:
1790     case OP_ASSERTBACK:
1791     case OP_ASSERTBACK_NOT:
1792     case OP_ONCE:
1793     case OP_ONCE_NC:
1794     case OP_BRAPOS:
1795     case OP_SBRA:
1796     case OP_SBRAPOS:
1797     case OP_SCOND:
1798     private_data_length++;
1799     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1800     cc += 1 + LINK_SIZE;
1801     break;
1802 
1803     case OP_CBRA:
1804     case OP_SCBRA:
1805     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1806       private_data_length++;
1807     cc += 1 + LINK_SIZE + IMM2_SIZE;
1808     break;
1809 
1810     case OP_CBRAPOS:
1811     case OP_SCBRAPOS:
1812     private_data_length += 2;
1813     cc += 1 + LINK_SIZE + IMM2_SIZE;
1814     break;
1815 
1816     case OP_COND:
1817     /* Might be a hidden SCOND. */
1818     alternative = cc + GET(cc, 1);
1819     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820       private_data_length++;
1821     cc += 1 + LINK_SIZE;
1822     break;
1823 
1824     CASE_ITERATOR_PRIVATE_DATA_1
1825     if (PRIVATE_DATA(cc))
1826       private_data_length++;
1827     cc += 2;
1828 #ifdef SUPPORT_UTF
1829     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1830 #endif
1831     break;
1832 
1833     CASE_ITERATOR_PRIVATE_DATA_2A
1834     if (PRIVATE_DATA(cc))
1835       private_data_length += 2;
1836     cc += 2;
1837 #ifdef SUPPORT_UTF
1838     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1839 #endif
1840     break;
1841 
1842     CASE_ITERATOR_PRIVATE_DATA_2B
1843     if (PRIVATE_DATA(cc))
1844       private_data_length += 2;
1845     cc += 2 + IMM2_SIZE;
1846 #ifdef SUPPORT_UTF
1847     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1848 #endif
1849     break;
1850 
1851     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1852     if (PRIVATE_DATA(cc))
1853       private_data_length++;
1854     cc += 1;
1855     break;
1856 
1857     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1858     if (PRIVATE_DATA(cc))
1859       private_data_length += 2;
1860     cc += 1;
1861     break;
1862 
1863     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1864     if (PRIVATE_DATA(cc))
1865       private_data_length += 2;
1866     cc += 1 + IMM2_SIZE;
1867     break;
1868 
1869     case OP_CLASS:
1870     case OP_NCLASS:
1871 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1872     case OP_XCLASS:
1873     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1874 #else
1875     size = 1 + 32 / (int)sizeof(pcre_uchar);
1876 #endif
1877     if (PRIVATE_DATA(cc))
1878       private_data_length += get_class_iterator_size(cc + size);
1879     cc += size;
1880     break;
1881 
1882     default:
1883     cc = next_opcode(common, cc);
1884     SLJIT_ASSERT(cc != NULL);
1885     break;
1886     }
1887   }
1888 SLJIT_ASSERT(cc == ccend);
1889 return private_data_length;
1890 }
1891 
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1892 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1893   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1894 {
1895 DEFINE_COMPILER;
1896 int srcw[2];
1897 int count, size;
1898 BOOL tmp1next = TRUE;
1899 BOOL tmp1empty = TRUE;
1900 BOOL tmp2empty = TRUE;
1901 pcre_uchar *alternative;
1902 enum {
1903   loop,
1904   end
1905 } status;
1906 
1907 status = loop;
1908 stackptr = STACK(stackptr);
1909 stacktop = STACK(stacktop - 1);
1910 
1911 if (!save)
1912   {
1913   stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1914   if (stackptr < stacktop)
1915     {
1916     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917     stackptr += sizeof(sljit_sw);
1918     tmp1empty = FALSE;
1919     }
1920   if (stackptr < stacktop)
1921     {
1922     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923     stackptr += sizeof(sljit_sw);
1924     tmp2empty = FALSE;
1925     }
1926   /* The tmp1next must be TRUE in either way. */
1927   }
1928 
1929 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1930 
1931 do
1932   {
1933   count = 0;
1934   if (cc >= ccend)
1935     {
1936     if (!save)
1937       break;
1938 
1939     count = 1;
1940     srcw[0] = common->recursive_head_ptr;
1941     if (needs_control_head)
1942       {
1943       SLJIT_ASSERT(common->control_head_ptr != 0);
1944       count = 2;
1945       srcw[0] = common->control_head_ptr;
1946       srcw[1] = common->recursive_head_ptr;
1947       }
1948     status = end;
1949     }
1950   else switch(*cc)
1951     {
1952     case OP_KET:
1953     if (PRIVATE_DATA(cc) != 0)
1954       {
1955       count = 1;
1956       srcw[0] = PRIVATE_DATA(cc);
1957       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1958       cc += PRIVATE_DATA(cc + 1);
1959       }
1960     cc += 1 + LINK_SIZE;
1961     break;
1962 
1963     case OP_ASSERT:
1964     case OP_ASSERT_NOT:
1965     case OP_ASSERTBACK:
1966     case OP_ASSERTBACK_NOT:
1967     case OP_ONCE:
1968     case OP_ONCE_NC:
1969     case OP_BRAPOS:
1970     case OP_SBRA:
1971     case OP_SBRAPOS:
1972     case OP_SCOND:
1973     count = 1;
1974     srcw[0] = PRIVATE_DATA(cc);
1975     SLJIT_ASSERT(srcw[0] != 0);
1976     cc += 1 + LINK_SIZE;
1977     break;
1978 
1979     case OP_CBRA:
1980     case OP_SCBRA:
1981     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1982       {
1983       count = 1;
1984       srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1985       }
1986     cc += 1 + LINK_SIZE + IMM2_SIZE;
1987     break;
1988 
1989     case OP_CBRAPOS:
1990     case OP_SCBRAPOS:
1991     count = 2;
1992     srcw[0] = PRIVATE_DATA(cc);
1993     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1994     SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1995     cc += 1 + LINK_SIZE + IMM2_SIZE;
1996     break;
1997 
1998     case OP_COND:
1999     /* Might be a hidden SCOND. */
2000     alternative = cc + GET(cc, 1);
2001     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2002       {
2003       count = 1;
2004       srcw[0] = PRIVATE_DATA(cc);
2005       SLJIT_ASSERT(srcw[0] != 0);
2006       }
2007     cc += 1 + LINK_SIZE;
2008     break;
2009 
2010     CASE_ITERATOR_PRIVATE_DATA_1
2011     if (PRIVATE_DATA(cc))
2012       {
2013       count = 1;
2014       srcw[0] = PRIVATE_DATA(cc);
2015       }
2016     cc += 2;
2017 #ifdef SUPPORT_UTF
2018     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2019 #endif
2020     break;
2021 
2022     CASE_ITERATOR_PRIVATE_DATA_2A
2023     if (PRIVATE_DATA(cc))
2024       {
2025       count = 2;
2026       srcw[0] = PRIVATE_DATA(cc);
2027       srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2028       }
2029     cc += 2;
2030 #ifdef SUPPORT_UTF
2031     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2032 #endif
2033     break;
2034 
2035     CASE_ITERATOR_PRIVATE_DATA_2B
2036     if (PRIVATE_DATA(cc))
2037       {
2038       count = 2;
2039       srcw[0] = PRIVATE_DATA(cc);
2040       srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2041       }
2042     cc += 2 + IMM2_SIZE;
2043 #ifdef SUPPORT_UTF
2044     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2045 #endif
2046     break;
2047 
2048     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2049     if (PRIVATE_DATA(cc))
2050       {
2051       count = 1;
2052       srcw[0] = PRIVATE_DATA(cc);
2053       }
2054     cc += 1;
2055     break;
2056 
2057     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2058     if (PRIVATE_DATA(cc))
2059       {
2060       count = 2;
2061       srcw[0] = PRIVATE_DATA(cc);
2062       srcw[1] = srcw[0] + sizeof(sljit_sw);
2063       }
2064     cc += 1;
2065     break;
2066 
2067     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2068     if (PRIVATE_DATA(cc))
2069       {
2070       count = 2;
2071       srcw[0] = PRIVATE_DATA(cc);
2072       srcw[1] = srcw[0] + sizeof(sljit_sw);
2073       }
2074     cc += 1 + IMM2_SIZE;
2075     break;
2076 
2077     case OP_CLASS:
2078     case OP_NCLASS:
2079 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2080     case OP_XCLASS:
2081     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2082 #else
2083     size = 1 + 32 / (int)sizeof(pcre_uchar);
2084 #endif
2085     if (PRIVATE_DATA(cc))
2086       switch(get_class_iterator_size(cc + size))
2087         {
2088         case 1:
2089         count = 1;
2090         srcw[0] = PRIVATE_DATA(cc);
2091         break;
2092 
2093         case 2:
2094         count = 2;
2095         srcw[0] = PRIVATE_DATA(cc);
2096         srcw[1] = srcw[0] + sizeof(sljit_sw);
2097         break;
2098 
2099         default:
2100         SLJIT_UNREACHABLE();
2101         break;
2102         }
2103     cc += size;
2104     break;
2105 
2106     default:
2107     cc = next_opcode(common, cc);
2108     SLJIT_ASSERT(cc != NULL);
2109     break;
2110     }
2111 
2112   while (count > 0)
2113     {
2114     count--;
2115     if (save)
2116       {
2117       if (tmp1next)
2118         {
2119         if (!tmp1empty)
2120           {
2121           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2122           stackptr += sizeof(sljit_sw);
2123           }
2124         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125         tmp1empty = FALSE;
2126         tmp1next = FALSE;
2127         }
2128       else
2129         {
2130         if (!tmp2empty)
2131           {
2132           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2133           stackptr += sizeof(sljit_sw);
2134           }
2135         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2136         tmp2empty = FALSE;
2137         tmp1next = TRUE;
2138         }
2139       }
2140     else
2141       {
2142       if (tmp1next)
2143         {
2144         SLJIT_ASSERT(!tmp1empty);
2145         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2146         tmp1empty = stackptr >= stacktop;
2147         if (!tmp1empty)
2148           {
2149           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2150           stackptr += sizeof(sljit_sw);
2151           }
2152         tmp1next = FALSE;
2153         }
2154       else
2155         {
2156         SLJIT_ASSERT(!tmp2empty);
2157         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2158         tmp2empty = stackptr >= stacktop;
2159         if (!tmp2empty)
2160           {
2161           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2162           stackptr += sizeof(sljit_sw);
2163           }
2164         tmp1next = TRUE;
2165         }
2166       }
2167     }
2168   }
2169 while (status != end);
2170 
2171 if (save)
2172   {
2173   if (tmp1next)
2174     {
2175     if (!tmp1empty)
2176       {
2177       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2178       stackptr += sizeof(sljit_sw);
2179       }
2180     if (!tmp2empty)
2181       {
2182       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2183       stackptr += sizeof(sljit_sw);
2184       }
2185     }
2186   else
2187     {
2188     if (!tmp2empty)
2189       {
2190       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2191       stackptr += sizeof(sljit_sw);
2192       }
2193     if (!tmp1empty)
2194       {
2195       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2196       stackptr += sizeof(sljit_sw);
2197       }
2198     }
2199   }
2200 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2201 }
2202 
set_then_offsets(compiler_common * common,pcre_uchar * cc,sljit_u8 * current_offset)2203 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2204 {
2205 pcre_uchar *end = bracketend(cc);
2206 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2207 
2208 /* Assert captures then. */
2209 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2210   current_offset = NULL;
2211 /* Conditional block does not. */
2212 if (*cc == OP_COND || *cc == OP_SCOND)
2213   has_alternatives = FALSE;
2214 
2215 cc = next_opcode(common, cc);
2216 if (has_alternatives)
2217   current_offset = common->then_offsets + (cc - common->start);
2218 
2219 while (cc < end)
2220   {
2221   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2222     cc = set_then_offsets(common, cc, current_offset);
2223   else
2224     {
2225     if (*cc == OP_ALT && has_alternatives)
2226       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2227     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2228       *current_offset = 1;
2229     cc = next_opcode(common, cc);
2230     }
2231   }
2232 
2233 return end;
2234 }
2235 
2236 #undef CASE_ITERATOR_PRIVATE_DATA_1
2237 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2238 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2240 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2241 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2242 
is_powerof2(unsigned int value)2243 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2244 {
2245 return (value & (value - 1)) == 0;
2246 }
2247 
set_jumps(jump_list * list,struct sljit_label * label)2248 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2249 {
2250 while (list)
2251   {
2252   /* sljit_set_label is clever enough to do nothing
2253   if either the jump or the label is NULL. */
2254   SET_LABEL(list->jump, label);
2255   list = list->next;
2256   }
2257 }
2258 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2259 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2260 {
2261 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2262 if (list_item)
2263   {
2264   list_item->next = *list;
2265   list_item->jump = jump;
2266   *list = list_item;
2267   }
2268 }
2269 
add_stub(compiler_common * common,struct sljit_jump * start)2270 static void add_stub(compiler_common *common, struct sljit_jump *start)
2271 {
2272 DEFINE_COMPILER;
2273 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2274 
2275 if (list_item)
2276   {
2277   list_item->start = start;
2278   list_item->quit = LABEL();
2279   list_item->next = common->stubs;
2280   common->stubs = list_item;
2281   }
2282 }
2283 
flush_stubs(compiler_common * common)2284 static void flush_stubs(compiler_common *common)
2285 {
2286 DEFINE_COMPILER;
2287 stub_list *list_item = common->stubs;
2288 
2289 while (list_item)
2290   {
2291   JUMPHERE(list_item->start);
2292   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2293   JUMPTO(SLJIT_JUMP, list_item->quit);
2294   list_item = list_item->next;
2295   }
2296 common->stubs = NULL;
2297 }
2298 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2299 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2300 {
2301 DEFINE_COMPILER;
2302 label_addr_list *label_addr;
2303 
2304 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2305 if (label_addr == NULL)
2306   return;
2307 label_addr->label = LABEL();
2308 label_addr->update_addr = update_addr;
2309 label_addr->next = common->label_addrs;
2310 common->label_addrs = label_addr;
2311 }
2312 
count_match(compiler_common * common)2313 static SLJIT_INLINE void count_match(compiler_common *common)
2314 {
2315 DEFINE_COMPILER;
2316 
2317 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2318 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2319 }
2320 
allocate_stack(compiler_common * common,int size)2321 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2322 {
2323 /* May destroy all locals and registers except TMP2. */
2324 DEFINE_COMPILER;
2325 
2326 SLJIT_ASSERT(size > 0);
2327 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2328 #ifdef DESTROY_REGISTERS
2329 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2330 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2331 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2334 #endif
2335 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2336 }
2337 
free_stack(compiler_common * common,int size)2338 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2339 {
2340 DEFINE_COMPILER;
2341 
2342 SLJIT_ASSERT(size > 0);
2343 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2344 }
2345 
allocate_read_only_data(compiler_common * common,sljit_uw size)2346 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2347 {
2348 DEFINE_COMPILER;
2349 sljit_uw *result;
2350 
2351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2352   return NULL;
2353 
2354 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2355 if (SLJIT_UNLIKELY(result == NULL))
2356   {
2357   sljit_set_compiler_memory_error(compiler);
2358   return NULL;
2359   }
2360 
2361 *(void**)result = common->read_only_data_head;
2362 common->read_only_data_head = (void *)result;
2363 return result + 1;
2364 }
2365 
free_read_only_data(void * current,void * allocator_data)2366 static void free_read_only_data(void *current, void *allocator_data)
2367 {
2368 void *next;
2369 
2370 SLJIT_UNUSED_ARG(allocator_data);
2371 
2372 while (current != NULL)
2373   {
2374   next = *(void**)current;
2375   SLJIT_FREE(current, allocator_data);
2376   current = next;
2377   }
2378 }
2379 
reset_ovector(compiler_common * common,int length)2380 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2381 {
2382 DEFINE_COMPILER;
2383 struct sljit_label *loop;
2384 int i;
2385 
2386 /* At this point we can freely use all temporary registers. */
2387 SLJIT_ASSERT(length > 1);
2388 /* TMP1 returns with begin - 1. */
2389 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2390 if (length < 8)
2391   {
2392   for (i = 1; i < length; i++)
2393     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2394   }
2395 else
2396   {
2397   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2398     {
2399     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2400     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2401     loop = LABEL();
2402     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2403     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2404     JUMPTO(SLJIT_NOT_ZERO, loop);
2405     }
2406   else
2407     {
2408     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2409     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2410     loop = LABEL();
2411     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2412     OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2413     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2414     JUMPTO(SLJIT_NOT_ZERO, loop);
2415     }
2416   }
2417 }
2418 
reset_fast_fail(compiler_common * common)2419 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2420 {
2421 DEFINE_COMPILER;
2422 sljit_s32 i;
2423 
2424 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2425 
2426 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2428   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2429 }
2430 
do_reset_match(compiler_common * common,int length)2431 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2432 {
2433 DEFINE_COMPILER;
2434 struct sljit_label *loop;
2435 int i;
2436 
2437 SLJIT_ASSERT(length > 1);
2438 /* OVECTOR(1) contains the "string begin - 1" constant. */
2439 if (length > 2)
2440   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2441 if (length < 8)
2442   {
2443   for (i = 2; i < length; i++)
2444     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2445   }
2446 else
2447   {
2448   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2449     {
2450     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2451     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2452     loop = LABEL();
2453     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2454     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2455     JUMPTO(SLJIT_NOT_ZERO, loop);
2456     }
2457   else
2458     {
2459     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2460     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2461     loop = LABEL();
2462     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2463     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2464     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2465     JUMPTO(SLJIT_NOT_ZERO, loop);
2466     }
2467   }
2468 
2469 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2470 if (common->mark_ptr != 0)
2471   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2472 if (common->control_head_ptr != 0)
2473   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2474 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2477 }
2478 
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2479 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2480 {
2481 while (current != NULL)
2482   {
2483   switch (current[1])
2484     {
2485     case type_then_trap:
2486     break;
2487 
2488     case type_mark:
2489     if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2490       return current[3];
2491     break;
2492 
2493     default:
2494     SLJIT_UNREACHABLE();
2495     break;
2496     }
2497   SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2498   current = (sljit_sw*)current[0];
2499   }
2500 return 0;
2501 }
2502 
copy_ovector(compiler_common * common,int topbracket)2503 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2504 {
2505 DEFINE_COMPILER;
2506 struct sljit_label *loop;
2507 struct sljit_jump *early_quit;
2508 BOOL has_pre;
2509 
2510 /* At this point we can freely use all registers. */
2511 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2513 
2514 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2515 if (common->mark_ptr != 0)
2516   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2517 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2518 if (common->mark_ptr != 0)
2519   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2520 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2521 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2522 
2523 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2524 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2525 
2526 /* Unlikely, but possible */
2527 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2528 loop = LABEL();
2529 
2530 if (has_pre)
2531   sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2532 else
2533   {
2534   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2535   OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2536   }
2537 
2538 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2539 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2540 /* Copy the integer value to the output buffer */
2541 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2542 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2543 #endif
2544 
2545 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2546 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2547 JUMPTO(SLJIT_NOT_ZERO, loop);
2548 JUMPHERE(early_quit);
2549 
2550 /* Calculate the return value, which is the maximum ovector value. */
2551 if (topbracket > 1)
2552   {
2553   if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2554     {
2555     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2556     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2557 
2558     /* OVECTOR(0) is never equal to SLJIT_S2. */
2559     loop = LABEL();
2560     sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2561     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2562     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2563     }
2564   else
2565     {
2566     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2567     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2568 
2569     /* OVECTOR(0) is never equal to SLJIT_S2. */
2570     loop = LABEL();
2571     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2572     OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2573     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2574     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2575     }
2576   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2577   }
2578 else
2579   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2580 }
2581 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2582 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2583 {
2584 DEFINE_COMPILER;
2585 struct sljit_jump *jump;
2586 
2587 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2588 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2589   && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2590 
2591 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2592 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2593 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2594 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2595 
2596 /* Store match begin and end. */
2597 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2598 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2599 
2600 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2601 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2602 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2603 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2604 #endif
2605 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2606 JUMPHERE(jump);
2607 
2608 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2609 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2610 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2611 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2612 #endif
2613 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2614 
2615 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2617 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2618 #endif
2619 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2620 
2621 JUMPTO(SLJIT_JUMP, quit);
2622 }
2623 
check_start_used_ptr(compiler_common * common)2624 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2625 {
2626 /* May destroy TMP1. */
2627 DEFINE_COMPILER;
2628 struct sljit_jump *jump;
2629 
2630 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2631   {
2632   /* The value of -1 must be kept for start_used_ptr! */
2633   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2634   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2635   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2636   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2637   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2638   JUMPHERE(jump);
2639   }
2640 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2641   {
2642   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2643   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2644   JUMPHERE(jump);
2645   }
2646 }
2647 
char_has_othercase(compiler_common * common,pcre_uchar * cc)2648 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2649 {
2650 /* Detects if the character has an othercase. */
2651 unsigned int c;
2652 
2653 #ifdef SUPPORT_UTF
2654 if (common->utf)
2655   {
2656   GETCHAR(c, cc);
2657   if (c > 127)
2658     {
2659 #ifdef SUPPORT_UCP
2660     return c != UCD_OTHERCASE(c);
2661 #else
2662     return FALSE;
2663 #endif
2664     }
2665 #ifndef COMPILE_PCRE8
2666   return common->fcc[c] != c;
2667 #endif
2668   }
2669 else
2670 #endif
2671   c = *cc;
2672 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2673 }
2674 
char_othercase(compiler_common * common,unsigned int c)2675 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2676 {
2677 /* Returns with the othercase. */
2678 #ifdef SUPPORT_UTF
2679 if (common->utf && c > 127)
2680   {
2681 #ifdef SUPPORT_UCP
2682   return UCD_OTHERCASE(c);
2683 #else
2684   return c;
2685 #endif
2686   }
2687 #endif
2688 return TABLE_GET(c, common->fcc, c);
2689 }
2690 
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2691 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2692 {
2693 /* Detects if the character and its othercase has only 1 bit difference. */
2694 unsigned int c, oc, bit;
2695 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2696 int n;
2697 #endif
2698 
2699 #ifdef SUPPORT_UTF
2700 if (common->utf)
2701   {
2702   GETCHAR(c, cc);
2703   if (c <= 127)
2704     oc = common->fcc[c];
2705   else
2706     {
2707 #ifdef SUPPORT_UCP
2708     oc = UCD_OTHERCASE(c);
2709 #else
2710     oc = c;
2711 #endif
2712     }
2713   }
2714 else
2715   {
2716   c = *cc;
2717   oc = TABLE_GET(c, common->fcc, c);
2718   }
2719 #else
2720 c = *cc;
2721 oc = TABLE_GET(c, common->fcc, c);
2722 #endif
2723 
2724 SLJIT_ASSERT(c != oc);
2725 
2726 bit = c ^ oc;
2727 /* Optimized for English alphabet. */
2728 if (c <= 127 && bit == 0x20)
2729   return (0 << 8) | 0x20;
2730 
2731 /* Since c != oc, they must have at least 1 bit difference. */
2732 if (!is_powerof2(bit))
2733   return 0;
2734 
2735 #if defined COMPILE_PCRE8
2736 
2737 #ifdef SUPPORT_UTF
2738 if (common->utf && c > 127)
2739   {
2740   n = GET_EXTRALEN(*cc);
2741   while ((bit & 0x3f) == 0)
2742     {
2743     n--;
2744     bit >>= 6;
2745     }
2746   return (n << 8) | bit;
2747   }
2748 #endif /* SUPPORT_UTF */
2749 return (0 << 8) | bit;
2750 
2751 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2752 
2753 #ifdef SUPPORT_UTF
2754 if (common->utf && c > 65535)
2755   {
2756   if (bit >= (1 << 10))
2757     bit >>= 10;
2758   else
2759     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2760   }
2761 #endif /* SUPPORT_UTF */
2762 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2763 
2764 #endif /* COMPILE_PCRE[8|16|32] */
2765 }
2766 
check_partial(compiler_common * common,BOOL force)2767 static void check_partial(compiler_common *common, BOOL force)
2768 {
2769 /* Checks whether a partial matching is occurred. Does not modify registers. */
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump = NULL;
2772 
2773 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2774 
2775 if (common->mode == JIT_COMPILE)
2776   return;
2777 
2778 if (!force)
2779   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2780 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2781   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2782 
2783 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2784   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785 else
2786   {
2787   if (common->partialmatchlabel != NULL)
2788     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2789   else
2790     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2791   }
2792 
2793 if (jump != NULL)
2794   JUMPHERE(jump);
2795 }
2796 
check_str_end(compiler_common * common,jump_list ** end_reached)2797 static void check_str_end(compiler_common *common, jump_list **end_reached)
2798 {
2799 /* Does not affect registers. Usually used in a tight spot. */
2800 DEFINE_COMPILER;
2801 struct sljit_jump *jump;
2802 
2803 if (common->mode == JIT_COMPILE)
2804   {
2805   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2806   return;
2807   }
2808 
2809 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2810 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2811   {
2812   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2813   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2814   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2815   }
2816 else
2817   {
2818   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2819   if (common->partialmatchlabel != NULL)
2820     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2821   else
2822     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2823   }
2824 JUMPHERE(jump);
2825 }
2826 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2827 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2828 {
2829 DEFINE_COMPILER;
2830 struct sljit_jump *jump;
2831 
2832 if (common->mode == JIT_COMPILE)
2833   {
2834   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2835   return;
2836   }
2837 
2838 /* Partial matching mode. */
2839 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2840 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2841 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2842   {
2843   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2844   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2845   }
2846 else
2847   {
2848   if (common->partialmatchlabel != NULL)
2849     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2850   else
2851     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2852   }
2853 JUMPHERE(jump);
2854 }
2855 
peek_char(compiler_common * common,sljit_u32 max)2856 static void peek_char(compiler_common *common, sljit_u32 max)
2857 {
2858 /* Reads the character into TMP1, keeps STR_PTR.
2859 Does not check STR_END. TMP2 Destroyed. */
2860 DEFINE_COMPILER;
2861 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2862 struct sljit_jump *jump;
2863 #endif
2864 
2865 SLJIT_UNUSED_ARG(max);
2866 
2867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2869 if (common->utf)
2870   {
2871   if (max < 128) return;
2872 
2873   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2874   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2876   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877   JUMPHERE(jump);
2878   }
2879 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2880 
2881 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2882 if (common->utf)
2883   {
2884   if (max < 0xd800) return;
2885 
2886   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2887   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2888   /* TMP2 contains the high surrogate. */
2889   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2891   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2892   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2893   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894   JUMPHERE(jump);
2895   }
2896 #endif
2897 }
2898 
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900 
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2901 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2902 {
2903 /* Tells whether the character codes below 128 are enough
2904 to determine a match. */
2905 const sljit_u8 value = nclass ? 0xff : 0;
2906 const sljit_u8 *end = bitset + 32;
2907 
2908 bitset += 16;
2909 do
2910   {
2911   if (*bitset++ != value)
2912     return FALSE;
2913   }
2914 while (bitset < end);
2915 return TRUE;
2916 }
2917 
read_char7_type(compiler_common * common,BOOL full_read)2918 static void read_char7_type(compiler_common *common, BOOL full_read)
2919 {
2920 /* Reads the precise character type of a character into TMP1, if the character
2921 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2922 full_read argument tells whether characters above max are accepted or not. */
2923 DEFINE_COMPILER;
2924 struct sljit_jump *jump;
2925 
2926 SLJIT_ASSERT(common->utf);
2927 
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2930 
2931 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2932 
2933 if (full_read)
2934   {
2935   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2936   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2937   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2938   JUMPHERE(jump);
2939   }
2940 }
2941 
2942 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2943 
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2944 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2945 {
2946 /* Reads the precise value of a character into TMP1, if the character is
2947 between min and max (c >= min && c <= max). Otherwise it returns with a value
2948 outside the range. Does not check STR_END. */
2949 DEFINE_COMPILER;
2950 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2951 struct sljit_jump *jump;
2952 #endif
2953 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2954 struct sljit_jump *jump2;
2955 #endif
2956 
2957 SLJIT_UNUSED_ARG(update_str_ptr);
2958 SLJIT_UNUSED_ARG(min);
2959 SLJIT_UNUSED_ARG(max);
2960 SLJIT_ASSERT(min <= max);
2961 
2962 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2964 
2965 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2966 if (common->utf)
2967   {
2968   if (max < 128 && !update_str_ptr) return;
2969 
2970   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2971   if (min >= 0x10000)
2972     {
2973     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2974     if (update_str_ptr)
2975       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2976     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2977     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2978     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2979     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2980     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2982     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2983     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2984     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2985     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2986     if (!update_str_ptr)
2987       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2988     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2989     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2990     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2991     JUMPHERE(jump2);
2992     if (update_str_ptr)
2993       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2994     }
2995   else if (min >= 0x800 && max <= 0xffff)
2996     {
2997     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2998     if (update_str_ptr)
2999       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3000     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3001     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3002     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3005     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006     if (!update_str_ptr)
3007       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3008     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011     JUMPHERE(jump2);
3012     if (update_str_ptr)
3013       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3014     }
3015   else if (max >= 0x800)
3016     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3017   else if (max < 128)
3018     {
3019     OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3020     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3021     }
3022   else
3023     {
3024     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3025     if (!update_str_ptr)
3026       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3027     else
3028       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3029     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3030     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3031     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3032     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3033     if (update_str_ptr)
3034       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3035     }
3036   JUMPHERE(jump);
3037   }
3038 #endif
3039 
3040 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3041 if (common->utf)
3042   {
3043   if (max >= 0x10000)
3044     {
3045     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3046     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3047     /* TMP2 contains the high surrogate. */
3048     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3049     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3050     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3051     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3053     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3054     JUMPHERE(jump);
3055     return;
3056     }
3057 
3058   if (max < 0xd800 && !update_str_ptr) return;
3059 
3060   /* Skip low surrogate if necessary. */
3061   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3063   if (update_str_ptr)
3064     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3065   if (max >= 0xd800)
3066     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3067   JUMPHERE(jump);
3068   }
3069 #endif
3070 }
3071 
read_char(compiler_common * common)3072 static SLJIT_INLINE void read_char(compiler_common *common)
3073 {
3074 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3075 }
3076 
read_char8_type(compiler_common * common,BOOL update_str_ptr)3077 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3078 {
3079 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3080 DEFINE_COMPILER;
3081 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3082 struct sljit_jump *jump;
3083 #endif
3084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3085 struct sljit_jump *jump2;
3086 #endif
3087 
3088 SLJIT_UNUSED_ARG(update_str_ptr);
3089 
3090 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3092 
3093 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3094 if (common->utf)
3095   {
3096   /* This can be an extra read in some situations, but hopefully
3097   it is needed in most cases. */
3098   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3099   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3100   if (!update_str_ptr)
3101     {
3102     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3103     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3104     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3105     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3106     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3107     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3108     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3109     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3110     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3111     JUMPHERE(jump2);
3112     }
3113   else
3114     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3115   JUMPHERE(jump);
3116   return;
3117   }
3118 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3119 
3120 #if !defined COMPILE_PCRE8
3121 /* The ctypes array contains only 256 values. */
3122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3123 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3124 #endif
3125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3126 #if !defined COMPILE_PCRE8
3127 JUMPHERE(jump);
3128 #endif
3129 
3130 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3131 if (common->utf && update_str_ptr)
3132   {
3133   /* Skip low surrogate if necessary. */
3134   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3135   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3136   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3137   JUMPHERE(jump);
3138   }
3139 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3140 }
3141 
skip_char_back(compiler_common * common)3142 static void skip_char_back(compiler_common *common)
3143 {
3144 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3145 DEFINE_COMPILER;
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 struct sljit_label *label;
3149 
3150 if (common->utf)
3151   {
3152   label = LABEL();
3153   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3154   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3155   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3156   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3157   return;
3158   }
3159 #elif defined COMPILE_PCRE16
3160 if (common->utf)
3161   {
3162   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3163   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3164   /* Skip low surrogate if necessary. */
3165   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3166   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3167   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3168   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3169   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3170   return;
3171   }
3172 #endif /* COMPILE_PCRE[8|16] */
3173 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3174 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3175 }
3176 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3177 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3178 {
3179 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3180 DEFINE_COMPILER;
3181 struct sljit_jump *jump;
3182 
3183 if (nltype == NLTYPE_ANY)
3184   {
3185   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3186   sljit_set_current_flags(compiler, SLJIT_SET_Z);
3187   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3188   }
3189 else if (nltype == NLTYPE_ANYCRLF)
3190   {
3191   if (jumpifmatch)
3192     {
3193     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3194     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3195     }
3196   else
3197     {
3198     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3199     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3200     JUMPHERE(jump);
3201     }
3202   }
3203 else
3204   {
3205   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3206   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3207   }
3208 }
3209 
3210 #ifdef SUPPORT_UTF
3211 
3212 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)3213 static void do_utfreadchar(compiler_common *common)
3214 {
3215 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3216 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3217 DEFINE_COMPILER;
3218 struct sljit_jump *jump;
3219 
3220 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3222 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226 
3227 /* Searching for the first zero. */
3228 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 jump = JUMP(SLJIT_NOT_ZERO);
3230 /* Two byte sequence. */
3231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3234 
3235 JUMPHERE(jump);
3236 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3237 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3239 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3240 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3241 
3242 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3243 jump = JUMP(SLJIT_NOT_ZERO);
3244 /* Three byte sequence. */
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3246 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3247 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3248 
3249 /* Four byte sequence. */
3250 JUMPHERE(jump);
3251 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3252 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3253 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3255 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3256 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3258 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3259 }
3260 
do_utfreadchar16(compiler_common * common)3261 static void do_utfreadchar16(compiler_common *common)
3262 {
3263 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3264 of the character (>= 0xc0). Return value in TMP1. */
3265 DEFINE_COMPILER;
3266 struct sljit_jump *jump;
3267 
3268 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3269 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3271 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3272 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3273 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3274 
3275 /* Searching for the first zero. */
3276 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3277 jump = JUMP(SLJIT_NOT_ZERO);
3278 /* Two byte sequence. */
3279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3280 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3281 
3282 JUMPHERE(jump);
3283 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3284 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3285 /* This code runs only in 8 bit mode. No need to shift the value. */
3286 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3289 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3290 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3291 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3292 /* Three byte sequence. */
3293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296 
do_utfreadtype8(compiler_common * common)3297 static void do_utfreadtype8(compiler_common *common)
3298 {
3299 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3301 DEFINE_COMPILER;
3302 struct sljit_jump *jump;
3303 struct sljit_jump *compare;
3304 
3305 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3306 
3307 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3308 jump = JUMP(SLJIT_NOT_ZERO);
3309 /* Two byte sequence. */
3310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3312 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3313 /* The upper 5 bits are known at this point. */
3314 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3315 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3316 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3317 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3318 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3319 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3320 
3321 JUMPHERE(compare);
3322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3323 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324 
3325 /* We only have types for characters less than 256. */
3326 JUMPHERE(jump);
3327 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331 }
3332 
3333 #endif /* COMPILE_PCRE8 */
3334 
3335 #endif /* SUPPORT_UTF */
3336 
3337 #ifdef SUPPORT_UCP
3338 
3339 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3340 #define UCD_BLOCK_MASK 127
3341 #define UCD_BLOCK_SHIFT 7
3342 
do_getucd(compiler_common * common)3343 static void do_getucd(compiler_common *common)
3344 {
3345 /* Search the UCD record for the character comes in TMP1.
3346 Returns chartype in TMP1 and UCD offset in TMP2. */
3347 DEFINE_COMPILER;
3348 #ifdef COMPILE_PCRE32
3349 struct sljit_jump *jump;
3350 #endif
3351 
3352 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3353 /* dummy_ucd_record */
3354 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3355 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3356 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3357 #endif
3358 
3359 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3360 
3361 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3362 
3363 #ifdef COMPILE_PCRE32
3364 if (!common->utf)
3365   {
3366   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3367   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3368   JUMPHERE(jump);
3369   }
3370 #endif
3371 
3372 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3373 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3374 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3375 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3378 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3381 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3382 }
3383 #endif
3384 
mainloop_entry(compiler_common * common,BOOL hascrorlf)3385 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3386 {
3387 DEFINE_COMPILER;
3388 struct sljit_label *mainloop;
3389 struct sljit_label *newlinelabel = NULL;
3390 struct sljit_jump *start;
3391 struct sljit_jump *end = NULL;
3392 struct sljit_jump *end2 = NULL;
3393 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3394 struct sljit_jump *singlechar;
3395 #endif
3396 jump_list *newline = NULL;
3397 BOOL newlinecheck = FALSE;
3398 BOOL readuchar = FALSE;
3399 
3400 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3401     (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3402   newlinecheck = TRUE;
3403 
3404 if (common->match_end_ptr != 0)
3405   {
3406   /* Search for the end of the first line. */
3407   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3408 
3409   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3410     {
3411     mainloop = LABEL();
3412     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3413     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3414     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3415     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3416     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3417     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3418     JUMPHERE(end);
3419     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3420     }
3421   else
3422     {
3423     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424     mainloop = LABEL();
3425     /* Continual stores does not cause data dependency. */
3426     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3427     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3428     check_newlinechar(common, common->nltype, &newline, TRUE);
3429     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3430     JUMPHERE(end);
3431     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3432     set_jumps(newline, LABEL());
3433     }
3434 
3435   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3436   }
3437 
3438 start = JUMP(SLJIT_JUMP);
3439 
3440 if (newlinecheck)
3441   {
3442   newlinelabel = LABEL();
3443   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3444   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3446   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3447   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3448 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3449   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3450 #endif
3451   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3452   end2 = JUMP(SLJIT_JUMP);
3453   }
3454 
3455 mainloop = LABEL();
3456 
3457 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3458 #ifdef SUPPORT_UTF
3459 if (common->utf) readuchar = TRUE;
3460 #endif
3461 if (newlinecheck) readuchar = TRUE;
3462 
3463 if (readuchar)
3464   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3465 
3466 if (newlinecheck)
3467   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3468 
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3471 #if defined COMPILE_PCRE8
3472 if (common->utf)
3473   {
3474   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3475   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3476   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3477   JUMPHERE(singlechar);
3478   }
3479 #elif defined COMPILE_PCRE16
3480 if (common->utf)
3481   {
3482   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3483   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3484   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3485   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3486   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3487   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488   JUMPHERE(singlechar);
3489   }
3490 #endif /* COMPILE_PCRE[8|16] */
3491 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3492 JUMPHERE(start);
3493 
3494 if (newlinecheck)
3495   {
3496   JUMPHERE(end);
3497   JUMPHERE(end2);
3498   }
3499 
3500 return mainloop;
3501 }
3502 
3503 #define MAX_N_CHARS 16
3504 #define MAX_DIFF_CHARS 6
3505 
add_prefix_char(pcre_uchar chr,pcre_uchar * chars)3506 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3507 {
3508 pcre_uchar i, len;
3509 
3510 len = chars[0];
3511 if (len == 255)
3512   return;
3513 
3514 if (len == 0)
3515   {
3516   chars[0] = 1;
3517   chars[1] = chr;
3518   return;
3519   }
3520 
3521 for (i = len; i > 0; i--)
3522   if (chars[i] == chr)
3523     return;
3524 
3525 if (len >= MAX_DIFF_CHARS - 1)
3526   {
3527   chars[0] = 255;
3528   return;
3529   }
3530 
3531 len++;
3532 chars[len] = chr;
3533 chars[0] = len;
3534 }
3535 
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uchar * chars,int max_chars,sljit_u32 * rec_count)3536 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3537 {
3538 /* Recursive function, which scans prefix literals. */
3539 BOOL last, any, class, caseless;
3540 int len, repeat, len_save, consumed = 0;
3541 sljit_u32 chr; /* Any unicode character. */
3542 sljit_u8 *bytes, *bytes_end, byte;
3543 pcre_uchar *alternative, *cc_save, *oc;
3544 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3545 pcre_uchar othercase[8];
3546 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3547 pcre_uchar othercase[2];
3548 #else
3549 pcre_uchar othercase[1];
3550 #endif
3551 
3552 repeat = 1;
3553 while (TRUE)
3554   {
3555   if (*rec_count == 0)
3556     return 0;
3557   (*rec_count)--;
3558 
3559   last = TRUE;
3560   any = FALSE;
3561   class = FALSE;
3562   caseless = FALSE;
3563 
3564   switch (*cc)
3565     {
3566     case OP_CHARI:
3567     caseless = TRUE;
3568     case OP_CHAR:
3569     last = FALSE;
3570     cc++;
3571     break;
3572 
3573     case OP_SOD:
3574     case OP_SOM:
3575     case OP_SET_SOM:
3576     case OP_NOT_WORD_BOUNDARY:
3577     case OP_WORD_BOUNDARY:
3578     case OP_EODN:
3579     case OP_EOD:
3580     case OP_CIRC:
3581     case OP_CIRCM:
3582     case OP_DOLL:
3583     case OP_DOLLM:
3584     /* Zero width assertions. */
3585     cc++;
3586     continue;
3587 
3588     case OP_ASSERT:
3589     case OP_ASSERT_NOT:
3590     case OP_ASSERTBACK:
3591     case OP_ASSERTBACK_NOT:
3592     cc = bracketend(cc);
3593     continue;
3594 
3595     case OP_PLUSI:
3596     case OP_MINPLUSI:
3597     case OP_POSPLUSI:
3598     caseless = TRUE;
3599     case OP_PLUS:
3600     case OP_MINPLUS:
3601     case OP_POSPLUS:
3602     cc++;
3603     break;
3604 
3605     case OP_EXACTI:
3606     caseless = TRUE;
3607     case OP_EXACT:
3608     repeat = GET2(cc, 1);
3609     last = FALSE;
3610     cc += 1 + IMM2_SIZE;
3611     break;
3612 
3613     case OP_QUERYI:
3614     case OP_MINQUERYI:
3615     case OP_POSQUERYI:
3616     caseless = TRUE;
3617     case OP_QUERY:
3618     case OP_MINQUERY:
3619     case OP_POSQUERY:
3620     len = 1;
3621     cc++;
3622 #ifdef SUPPORT_UTF
3623     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3624 #endif
3625     max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3626     if (max_chars == 0)
3627       return consumed;
3628     last = FALSE;
3629     break;
3630 
3631     case OP_KET:
3632     cc += 1 + LINK_SIZE;
3633     continue;
3634 
3635     case OP_ALT:
3636     cc += GET(cc, 1);
3637     continue;
3638 
3639     case OP_ONCE:
3640     case OP_ONCE_NC:
3641     case OP_BRA:
3642     case OP_BRAPOS:
3643     case OP_CBRA:
3644     case OP_CBRAPOS:
3645     alternative = cc + GET(cc, 1);
3646     while (*alternative == OP_ALT)
3647       {
3648       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3649       if (max_chars == 0)
3650         return consumed;
3651       alternative += GET(alternative, 1);
3652       }
3653 
3654     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3655       cc += IMM2_SIZE;
3656     cc += 1 + LINK_SIZE;
3657     continue;
3658 
3659     case OP_CLASS:
3660 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3661     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3662       return consumed;
3663 #endif
3664     class = TRUE;
3665     break;
3666 
3667     case OP_NCLASS:
3668 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3669     if (common->utf) return consumed;
3670 #endif
3671     class = TRUE;
3672     break;
3673 
3674 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3675     case OP_XCLASS:
3676 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3677     if (common->utf) return consumed;
3678 #endif
3679     any = TRUE;
3680     cc += GET(cc, 1);
3681     break;
3682 #endif
3683 
3684     case OP_DIGIT:
3685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3686     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3687       return consumed;
3688 #endif
3689     any = TRUE;
3690     cc++;
3691     break;
3692 
3693     case OP_WHITESPACE:
3694 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3695     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3696       return consumed;
3697 #endif
3698     any = TRUE;
3699     cc++;
3700     break;
3701 
3702     case OP_WORDCHAR:
3703 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3704     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3705       return consumed;
3706 #endif
3707     any = TRUE;
3708     cc++;
3709     break;
3710 
3711     case OP_NOT:
3712     case OP_NOTI:
3713     cc++;
3714     /* Fall through. */
3715     case OP_NOT_DIGIT:
3716     case OP_NOT_WHITESPACE:
3717     case OP_NOT_WORDCHAR:
3718     case OP_ANY:
3719     case OP_ALLANY:
3720 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3721     if (common->utf) return consumed;
3722 #endif
3723     any = TRUE;
3724     cc++;
3725     break;
3726 
3727 #ifdef SUPPORT_UTF
3728     case OP_NOTPROP:
3729     case OP_PROP:
3730 #ifndef COMPILE_PCRE32
3731     if (common->utf) return consumed;
3732 #endif
3733     any = TRUE;
3734     cc += 1 + 2;
3735     break;
3736 #endif
3737 
3738     case OP_TYPEEXACT:
3739     repeat = GET2(cc, 1);
3740     cc += 1 + IMM2_SIZE;
3741     continue;
3742 
3743     case OP_NOTEXACT:
3744     case OP_NOTEXACTI:
3745 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3746     if (common->utf) return consumed;
3747 #endif
3748     any = TRUE;
3749     repeat = GET2(cc, 1);
3750     cc += 1 + IMM2_SIZE + 1;
3751     break;
3752 
3753     default:
3754     return consumed;
3755     }
3756 
3757   if (any)
3758     {
3759     do
3760       {
3761       chars[0] = 255;
3762 
3763       consumed++;
3764       if (--max_chars == 0)
3765         return consumed;
3766       chars += MAX_DIFF_CHARS;
3767       }
3768     while (--repeat > 0);
3769 
3770     repeat = 1;
3771     continue;
3772     }
3773 
3774   if (class)
3775     {
3776     bytes = (sljit_u8*) (cc + 1);
3777     cc += 1 + 32 / sizeof(pcre_uchar);
3778 
3779     switch (*cc)
3780       {
3781       case OP_CRSTAR:
3782       case OP_CRMINSTAR:
3783       case OP_CRPOSSTAR:
3784       case OP_CRQUERY:
3785       case OP_CRMINQUERY:
3786       case OP_CRPOSQUERY:
3787       max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3788       if (max_chars == 0)
3789         return consumed;
3790       break;
3791 
3792       default:
3793       case OP_CRPLUS:
3794       case OP_CRMINPLUS:
3795       case OP_CRPOSPLUS:
3796       break;
3797 
3798       case OP_CRRANGE:
3799       case OP_CRMINRANGE:
3800       case OP_CRPOSRANGE:
3801       repeat = GET2(cc, 1);
3802       if (repeat <= 0)
3803         return consumed;
3804       break;
3805       }
3806 
3807     do
3808       {
3809       if (bytes[31] & 0x80)
3810         chars[0] = 255;
3811       else if (chars[0] != 255)
3812         {
3813         bytes_end = bytes + 32;
3814         chr = 0;
3815         do
3816           {
3817           byte = *bytes++;
3818           SLJIT_ASSERT((chr & 0x7) == 0);
3819           if (byte == 0)
3820             chr += 8;
3821           else
3822             {
3823             do
3824               {
3825               if ((byte & 0x1) != 0)
3826                 add_prefix_char(chr, chars);
3827               byte >>= 1;
3828               chr++;
3829               }
3830             while (byte != 0);
3831             chr = (chr + 7) & ~7;
3832             }
3833           }
3834         while (chars[0] != 255 && bytes < bytes_end);
3835         bytes = bytes_end - 32;
3836         }
3837 
3838       consumed++;
3839       if (--max_chars == 0)
3840         return consumed;
3841       chars += MAX_DIFF_CHARS;
3842       }
3843     while (--repeat > 0);
3844 
3845     switch (*cc)
3846       {
3847       case OP_CRSTAR:
3848       case OP_CRMINSTAR:
3849       case OP_CRPOSSTAR:
3850       return consumed;
3851 
3852       case OP_CRQUERY:
3853       case OP_CRMINQUERY:
3854       case OP_CRPOSQUERY:
3855       cc++;
3856       break;
3857 
3858       case OP_CRRANGE:
3859       case OP_CRMINRANGE:
3860       case OP_CRPOSRANGE:
3861       if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3862         return consumed;
3863       cc += 1 + 2 * IMM2_SIZE;
3864       break;
3865       }
3866 
3867     repeat = 1;
3868     continue;
3869     }
3870 
3871   len = 1;
3872 #ifdef SUPPORT_UTF
3873   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3874 #endif
3875 
3876   if (caseless && char_has_othercase(common, cc))
3877     {
3878 #ifdef SUPPORT_UTF
3879     if (common->utf)
3880       {
3881       GETCHAR(chr, cc);
3882       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3883         return consumed;
3884       }
3885     else
3886 #endif
3887       {
3888       chr = *cc;
3889       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3890       }
3891     }
3892   else
3893     {
3894     caseless = FALSE;
3895     othercase[0] = 0; /* Stops compiler warning - PH */
3896     }
3897 
3898   len_save = len;
3899   cc_save = cc;
3900   while (TRUE)
3901     {
3902     oc = othercase;
3903     do
3904       {
3905       chr = *cc;
3906       add_prefix_char(*cc, chars);
3907 
3908       if (caseless)
3909         add_prefix_char(*oc, chars);
3910 
3911       len--;
3912       consumed++;
3913       if (--max_chars == 0)
3914         return consumed;
3915       chars += MAX_DIFF_CHARS;
3916       cc++;
3917       oc++;
3918       }
3919     while (len > 0);
3920 
3921     if (--repeat == 0)
3922       break;
3923 
3924     len = len_save;
3925     cc = cc_save;
3926     }
3927 
3928   repeat = 1;
3929   if (last)
3930     return consumed;
3931   }
3932 }
3933 
3934 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3935 
character_to_int32(pcre_uchar chr)3936 static sljit_s32 character_to_int32(pcre_uchar chr)
3937 {
3938 sljit_s32 value = (sljit_s32)chr;
3939 #if defined COMPILE_PCRE8
3940 #define SSE2_COMPARE_TYPE_INDEX 0
3941 return ((unsigned int)value << 24) | ((unsigned int)value << 16) | ((unsigned int)value << 8) | (unsigned int)value;
3942 #elif defined COMPILE_PCRE16
3943 #define SSE2_COMPARE_TYPE_INDEX 1
3944 return ((unsigned int)value << 16) | value;
3945 #elif defined COMPILE_PCRE32
3946 #define SSE2_COMPARE_TYPE_INDEX 2
3947 return value;
3948 #else
3949 #error "Unsupported unit width"
3950 #endif
3951 }
3952 
fast_forward_first_char2_sse2(compiler_common * common,pcre_uchar char1,pcre_uchar char2)3953 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3954 {
3955 DEFINE_COMPILER;
3956 struct sljit_label *start;
3957 struct sljit_jump *quit[3];
3958 struct sljit_jump *nomatch;
3959 sljit_u8 instruction[8];
3960 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3961 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3962 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3963 BOOL load_twice = FALSE;
3964 pcre_uchar bit;
3965 
3966 bit = char1 ^ char2;
3967 if (!is_powerof2(bit))
3968   bit = 0;
3969 
3970 if ((char1 != char2) && bit == 0)
3971   load_twice = TRUE;
3972 
3973 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3974 
3975 /* First part (unaligned start) */
3976 
3977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3978 
3979 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3980 
3981 /* MOVD xmm, r/m32 */
3982 instruction[0] = 0x66;
3983 instruction[1] = 0x0f;
3984 instruction[2] = 0x6e;
3985 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987 
3988 if (char1 != char2)
3989   {
3990   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3991 
3992   /* MOVD xmm, r/m32 */
3993   instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3994   sljit_emit_op_custom(compiler, instruction, 4);
3995   }
3996 
3997 /* PSHUFD xmm1, xmm2/m128, imm8 */
3998 instruction[2] = 0x70;
3999 instruction[3] = 0xc0 | (2 << 3) | 2;
4000 instruction[4] = 0;
4001 sljit_emit_op_custom(compiler, instruction, 5);
4002 
4003 if (char1 != char2)
4004   {
4005   /* PSHUFD xmm1, xmm2/m128, imm8 */
4006   instruction[3] = 0xc0 | (3 << 3) | 3;
4007   instruction[4] = 0;
4008   sljit_emit_op_custom(compiler, instruction, 5);
4009   }
4010 
4011 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4012 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4013 
4014 /* MOVDQA xmm1, xmm2/m128 */
4015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4016 
4017 if (str_ptr_ind < 8)
4018   {
4019   instruction[2] = 0x6f;
4020   instruction[3] = (0 << 3) | str_ptr_ind;
4021   sljit_emit_op_custom(compiler, instruction, 4);
4022 
4023   if (load_twice)
4024     {
4025     instruction[3] = (1 << 3) | str_ptr_ind;
4026     sljit_emit_op_custom(compiler, instruction, 4);
4027     }
4028   }
4029 else
4030   {
4031   instruction[1] = 0x41;
4032   instruction[2] = 0x0f;
4033   instruction[3] = 0x6f;
4034   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4035   sljit_emit_op_custom(compiler, instruction, 5);
4036 
4037   if (load_twice)
4038     {
4039     instruction[4] = (1 << 3) | str_ptr_ind;
4040     sljit_emit_op_custom(compiler, instruction, 5);
4041     }
4042   instruction[1] = 0x0f;
4043   }
4044 
4045 #else
4046 
4047 instruction[2] = 0x6f;
4048 instruction[3] = (0 << 3) | str_ptr_ind;
4049 sljit_emit_op_custom(compiler, instruction, 4);
4050 
4051 if (load_twice)
4052   {
4053   instruction[3] = (1 << 3) | str_ptr_ind;
4054   sljit_emit_op_custom(compiler, instruction, 4);
4055   }
4056 
4057 #endif
4058 
4059 if (bit != 0)
4060   {
4061   /* POR xmm1, xmm2/m128 */
4062   instruction[2] = 0xeb;
4063   instruction[3] = 0xc0 | (0 << 3) | 3;
4064   sljit_emit_op_custom(compiler, instruction, 4);
4065   }
4066 
4067 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4068 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4069 instruction[3] = 0xc0 | (0 << 3) | 2;
4070 sljit_emit_op_custom(compiler, instruction, 4);
4071 
4072 if (load_twice)
4073   {
4074   instruction[3] = 0xc0 | (1 << 3) | 3;
4075   sljit_emit_op_custom(compiler, instruction, 4);
4076   }
4077 
4078 /* PMOVMSKB reg, xmm */
4079 instruction[2] = 0xd7;
4080 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4081 sljit_emit_op_custom(compiler, instruction, 4);
4082 
4083 if (load_twice)
4084   {
4085   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4086   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4087   sljit_emit_op_custom(compiler, instruction, 4);
4088 
4089   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4090   OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4091   }
4092 
4093 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4094 
4095 /* BSF r32, r/m32 */
4096 instruction[0] = 0x0f;
4097 instruction[1] = 0xbc;
4098 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4099 sljit_emit_op_custom(compiler, instruction, 3);
4100 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4101 
4102 nomatch = JUMP(SLJIT_ZERO);
4103 
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4106 quit[1] = JUMP(SLJIT_JUMP);
4107 
4108 JUMPHERE(nomatch);
4109 
4110 start = LABEL();
4111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4112 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4113 
4114 /* Second part (aligned) */
4115 
4116 instruction[0] = 0x66;
4117 instruction[1] = 0x0f;
4118 
4119 /* MOVDQA xmm1, xmm2/m128 */
4120 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4121 
4122 if (str_ptr_ind < 8)
4123   {
4124   instruction[2] = 0x6f;
4125   instruction[3] = (0 << 3) | str_ptr_ind;
4126   sljit_emit_op_custom(compiler, instruction, 4);
4127 
4128   if (load_twice)
4129     {
4130     instruction[3] = (1 << 3) | str_ptr_ind;
4131     sljit_emit_op_custom(compiler, instruction, 4);
4132     }
4133   }
4134 else
4135   {
4136   instruction[1] = 0x41;
4137   instruction[2] = 0x0f;
4138   instruction[3] = 0x6f;
4139   instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4140   sljit_emit_op_custom(compiler, instruction, 5);
4141 
4142   if (load_twice)
4143     {
4144     instruction[4] = (1 << 3) | str_ptr_ind;
4145     sljit_emit_op_custom(compiler, instruction, 5);
4146     }
4147   instruction[1] = 0x0f;
4148   }
4149 
4150 #else
4151 
4152 instruction[2] = 0x6f;
4153 instruction[3] = (0 << 3) | str_ptr_ind;
4154 sljit_emit_op_custom(compiler, instruction, 4);
4155 
4156 if (load_twice)
4157   {
4158   instruction[3] = (1 << 3) | str_ptr_ind;
4159   sljit_emit_op_custom(compiler, instruction, 4);
4160   }
4161 
4162 #endif
4163 
4164 if (bit != 0)
4165   {
4166   /* POR xmm1, xmm2/m128 */
4167   instruction[2] = 0xeb;
4168   instruction[3] = 0xc0 | (0 << 3) | 3;
4169   sljit_emit_op_custom(compiler, instruction, 4);
4170   }
4171 
4172 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4173 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4174 instruction[3] = 0xc0 | (0 << 3) | 2;
4175 sljit_emit_op_custom(compiler, instruction, 4);
4176 
4177 if (load_twice)
4178   {
4179   instruction[3] = 0xc0 | (1 << 3) | 3;
4180   sljit_emit_op_custom(compiler, instruction, 4);
4181   }
4182 
4183 /* PMOVMSKB reg, xmm */
4184 instruction[2] = 0xd7;
4185 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4186 sljit_emit_op_custom(compiler, instruction, 4);
4187 
4188 if (load_twice)
4189   {
4190   instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4191   sljit_emit_op_custom(compiler, instruction, 4);
4192 
4193   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4194   }
4195 
4196 /* BSF r32, r/m32 */
4197 instruction[0] = 0x0f;
4198 instruction[1] = 0xbc;
4199 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4200 sljit_emit_op_custom(compiler, instruction, 3);
4201 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4202 
4203 JUMPTO(SLJIT_ZERO, start);
4204 
4205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4206 
4207 start = LABEL();
4208 SET_LABEL(quit[0], start);
4209 SET_LABEL(quit[1], start);
4210 SET_LABEL(quit[2], start);
4211 }
4212 
4213 #undef SSE2_COMPARE_TYPE_INDEX
4214 
4215 #endif
4216 
fast_forward_first_char2(compiler_common * common,pcre_uchar char1,pcre_uchar char2,sljit_s32 offset)4217 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4218 {
4219 DEFINE_COMPILER;
4220 struct sljit_label *start;
4221 struct sljit_jump *quit;
4222 struct sljit_jump *found;
4223 pcre_uchar mask;
4224 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4225 struct sljit_label *utf_start = NULL;
4226 struct sljit_jump *utf_quit = NULL;
4227 #endif
4228 BOOL has_match_end = (common->match_end_ptr != 0);
4229 
4230 if (offset > 0)
4231   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4232 
4233 if (has_match_end)
4234   {
4235   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4236 
4237   OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4238   OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4239   sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4240   }
4241 
4242 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4243 if (common->utf && offset > 0)
4244   utf_start = LABEL();
4245 #endif
4246 
4247 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4248 
4249 /* SSE2 accelerated first character search. */
4250 
4251 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4252   {
4253   fast_forward_first_char2_sse2(common, char1, char2);
4254 
4255   SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4256   if (common->mode == JIT_COMPILE)
4257     {
4258     /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4259     SLJIT_ASSERT(common->forced_quit_label == NULL);
4260     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4261     add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4262 
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264     if (common->utf && offset > 0)
4265       {
4266       SLJIT_ASSERT(common->mode == JIT_COMPILE);
4267 
4268       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4269       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4270 #if defined COMPILE_PCRE8
4271       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4272       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4273 #elif defined COMPILE_PCRE16
4274       OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4275       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4276 #else
4277 #error "Unknown code width"
4278 #endif
4279       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4280       }
4281 #endif
4282 
4283     if (offset > 0)
4284       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4285     }
4286   else
4287     {
4288     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4289     if (has_match_end)
4290       {
4291       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4292       sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4293       }
4294     else
4295       sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4296     }
4297 
4298   if (has_match_end)
4299     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4300   return;
4301   }
4302 
4303 #endif
4304 
4305 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4306 
4307 start = LABEL();
4308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4309 
4310 if (char1 == char2)
4311   found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4312 else
4313   {
4314   mask = char1 ^ char2;
4315   if (is_powerof2(mask))
4316     {
4317     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4318     found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4319     }
4320   else
4321     {
4322     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4323     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4324     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4325     OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4326     found = JUMP(SLJIT_NOT_ZERO);
4327     }
4328   }
4329 
4330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4331 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4332 
4333 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4334 if (common->utf && offset > 0)
4335   utf_quit = JUMP(SLJIT_JUMP);
4336 #endif
4337 
4338 JUMPHERE(found);
4339 
4340 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4341 if (common->utf && offset > 0)
4342   {
4343   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4344   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4345 #if defined COMPILE_PCRE8
4346   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4347   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4348 #elif defined COMPILE_PCRE16
4349   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4350   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4351 #else
4352 #error "Unknown code width"
4353 #endif
4354   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355   JUMPHERE(utf_quit);
4356   }
4357 #endif
4358 
4359 JUMPHERE(quit);
4360 
4361 if (has_match_end)
4362   {
4363   quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4364   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4365   if (offset > 0)
4366     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4367   JUMPHERE(quit);
4368   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4369   }
4370 
4371 if (offset > 0)
4372   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4373 }
4374 
fast_forward_first_n_chars(compiler_common * common)4375 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4376 {
4377 DEFINE_COMPILER;
4378 struct sljit_label *start;
4379 struct sljit_jump *quit;
4380 struct sljit_jump *match;
4381 /* bytes[0] represent the number of characters between 0
4382 and MAX_N_BYTES - 1, 255 represents any character. */
4383 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4384 sljit_s32 offset;
4385 pcre_uchar mask;
4386 pcre_uchar *char_set, *char_set_end;
4387 int i, max, from;
4388 int range_right = -1, range_len;
4389 sljit_u8 *update_table = NULL;
4390 BOOL in_range;
4391 sljit_u32 rec_count;
4392 
4393 for (i = 0; i < MAX_N_CHARS; i++)
4394   chars[i * MAX_DIFF_CHARS] = 0;
4395 
4396 rec_count = 10000;
4397 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4398 
4399 if (max < 1)
4400   return FALSE;
4401 
4402 in_range = FALSE;
4403 /* Prevent compiler "uninitialized" warning */
4404 from = 0;
4405 range_len = 4 /* minimum length */ - 1;
4406 for (i = 0; i <= max; i++)
4407   {
4408   if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4409     {
4410     range_len = i - from;
4411     range_right = i - 1;
4412     }
4413 
4414   if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4415     {
4416     SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4417     if (!in_range)
4418       {
4419       in_range = TRUE;
4420       from = i;
4421       }
4422     }
4423   else
4424     in_range = FALSE;
4425   }
4426 
4427 if (range_right >= 0)
4428   {
4429   update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4430   if (update_table == NULL)
4431     return TRUE;
4432   memset(update_table, IN_UCHARS(range_len), 256);
4433 
4434   for (i = 0; i < range_len; i++)
4435     {
4436     char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4437     SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4438     char_set_end = char_set + char_set[0];
4439     char_set++;
4440     while (char_set <= char_set_end)
4441       {
4442       if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4443         update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4444       char_set++;
4445       }
4446     }
4447   }
4448 
4449 offset = -1;
4450 /* Scan forward. */
4451 for (i = 0; i < max; i++)
4452   {
4453   if (offset == -1)
4454     {
4455     if (chars[i * MAX_DIFF_CHARS] <= 2)
4456       offset = i;
4457     }
4458   else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4459     {
4460     if (chars[i * MAX_DIFF_CHARS] == 1)
4461       offset = i;
4462     else
4463       {
4464       mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4465       if (!is_powerof2(mask))
4466         {
4467         mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4468         if (is_powerof2(mask))
4469           offset = i;
4470         }
4471       }
4472     }
4473   }
4474 
4475 if (range_right < 0)
4476   {
4477   if (offset < 0)
4478     return FALSE;
4479   SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4480   /* Works regardless the value is 1 or 2. */
4481   mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4482   fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4483   return TRUE;
4484   }
4485 
4486 if (range_right == offset)
4487   offset = -1;
4488 
4489 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4490 
4491 max -= 1;
4492 SLJIT_ASSERT(max > 0);
4493 if (common->match_end_ptr != 0)
4494   {
4495   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4496   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4497   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4498   quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4499   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4500   JUMPHERE(quit);
4501   }
4502 else
4503   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4504 
4505 SLJIT_ASSERT(range_right >= 0);
4506 
4507 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4508 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4509 #endif
4510 
4511 start = LABEL();
4512 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4513 
4514 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4515 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4516 #else
4517 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4518 #endif
4519 
4520 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4521 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4522 #else
4523 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4524 #endif
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4526 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4527 
4528 if (offset >= 0)
4529   {
4530   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4531   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4532 
4533   if (chars[offset * MAX_DIFF_CHARS] == 1)
4534     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4535   else
4536     {
4537     mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4538     if (is_powerof2(mask))
4539       {
4540       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4541       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4542       }
4543     else
4544       {
4545       match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4546       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4547       JUMPHERE(match);
4548       }
4549     }
4550   }
4551 
4552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4553 if (common->utf && offset != 0)
4554   {
4555   if (offset < 0)
4556     {
4557     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4558     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559     }
4560   else
4561     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 #if defined COMPILE_PCRE8
4563   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4564   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4565 #elif defined COMPILE_PCRE16
4566   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4567   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4568 #else
4569 #error "Unknown code width"
4570 #endif
4571   if (offset < 0)
4572     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4573   }
4574 #endif
4575 
4576 if (offset >= 0)
4577   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4578 
4579 JUMPHERE(quit);
4580 
4581 if (common->match_end_ptr != 0)
4582   {
4583   if (range_right >= 0)
4584     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4586   if (range_right >= 0)
4587     {
4588     quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4589     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4590     JUMPHERE(quit);
4591     }
4592   }
4593 else
4594   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4595 return TRUE;
4596 }
4597 
4598 #undef MAX_N_CHARS
4599 #undef MAX_DIFF_CHARS
4600 
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless)4601 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4602 {
4603 pcre_uchar oc;
4604 
4605 oc = first_char;
4606 if (caseless)
4607   {
4608   oc = TABLE_GET(first_char, common->fcc, first_char);
4609 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4610   if (first_char > 127 && common->utf)
4611     oc = UCD_OTHERCASE(first_char);
4612 #endif
4613   }
4614 
4615 fast_forward_first_char2(common, first_char, oc, 0);
4616 }
4617 
fast_forward_newline(compiler_common * common)4618 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4619 {
4620 DEFINE_COMPILER;
4621 struct sljit_label *loop;
4622 struct sljit_jump *lastchar;
4623 struct sljit_jump *firstchar;
4624 struct sljit_jump *quit;
4625 struct sljit_jump *foundcr = NULL;
4626 struct sljit_jump *notfoundnl;
4627 jump_list *newline = NULL;
4628 
4629 if (common->match_end_ptr != 0)
4630   {
4631   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4632   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4633   }
4634 
4635 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4636   {
4637   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4638   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4639   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4640   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4641   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4642 
4643   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4644   OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4645   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4647   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4648 #endif
4649   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4650 
4651   loop = LABEL();
4652   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4653   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4655   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4656   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4657   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4658 
4659   JUMPHERE(quit);
4660   JUMPHERE(firstchar);
4661   JUMPHERE(lastchar);
4662 
4663   if (common->match_end_ptr != 0)
4664     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4665   return;
4666   }
4667 
4668 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4670 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4671 skip_char_back(common);
4672 
4673 loop = LABEL();
4674 common->ff_newline_shortcut = loop;
4675 
4676 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4677 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4679   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4680 check_newlinechar(common, common->nltype, &newline, FALSE);
4681 set_jumps(newline, loop);
4682 
4683 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4684   {
4685   quit = JUMP(SLJIT_JUMP);
4686   JUMPHERE(foundcr);
4687   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4689   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4690   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4692   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4693 #endif
4694   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4695   JUMPHERE(notfoundnl);
4696   JUMPHERE(quit);
4697   }
4698 JUMPHERE(lastchar);
4699 JUMPHERE(firstchar);
4700 
4701 if (common->match_end_ptr != 0)
4702   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4703 }
4704 
4705 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4706 
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4707 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4708 {
4709 DEFINE_COMPILER;
4710 struct sljit_label *start;
4711 struct sljit_jump *quit;
4712 struct sljit_jump *found = NULL;
4713 jump_list *matches = NULL;
4714 #ifndef COMPILE_PCRE8
4715 struct sljit_jump *jump;
4716 #endif
4717 
4718 if (common->match_end_ptr != 0)
4719   {
4720   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4721   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4722   }
4723 
4724 start = LABEL();
4725 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4727 #ifdef SUPPORT_UTF
4728 if (common->utf)
4729   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4730 #endif
4731 
4732 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4733   {
4734 #ifndef COMPILE_PCRE8
4735   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4736   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4737   JUMPHERE(jump);
4738 #endif
4739   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4740   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4741   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4742   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4743   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4744   found = JUMP(SLJIT_NOT_ZERO);
4745   }
4746 
4747 #ifdef SUPPORT_UTF
4748 if (common->utf)
4749   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4750 #endif
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4752 #ifdef SUPPORT_UTF
4753 #if defined COMPILE_PCRE8
4754 if (common->utf)
4755   {
4756   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4757   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4758   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4759   }
4760 #elif defined COMPILE_PCRE16
4761 if (common->utf)
4762   {
4763   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4764   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4765   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4766   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4767   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4768   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4769   }
4770 #endif /* COMPILE_PCRE[8|16] */
4771 #endif /* SUPPORT_UTF */
4772 JUMPTO(SLJIT_JUMP, start);
4773 if (found != NULL)
4774   JUMPHERE(found);
4775 if (matches != NULL)
4776   set_jumps(matches, LABEL());
4777 JUMPHERE(quit);
4778 
4779 if (common->match_end_ptr != 0)
4780   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4781 }
4782 
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4783 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4784 {
4785 DEFINE_COMPILER;
4786 struct sljit_label *loop;
4787 struct sljit_jump *toolong;
4788 struct sljit_jump *alreadyfound;
4789 struct sljit_jump *found;
4790 struct sljit_jump *foundoc = NULL;
4791 struct sljit_jump *notfound;
4792 sljit_u32 oc, bit;
4793 
4794 SLJIT_ASSERT(common->req_char_ptr != 0);
4795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4796 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4797 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4798 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4799 
4800 if (has_firstchar)
4801   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4802 else
4803   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4804 
4805 loop = LABEL();
4806 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4807 
4808 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4809 oc = req_char;
4810 if (caseless)
4811   {
4812   oc = TABLE_GET(req_char, common->fcc, req_char);
4813 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4814   if (req_char > 127 && common->utf)
4815     oc = UCD_OTHERCASE(req_char);
4816 #endif
4817   }
4818 if (req_char == oc)
4819   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4820 else
4821   {
4822   bit = req_char ^ oc;
4823   if (is_powerof2(bit))
4824     {
4825     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4826     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4827     }
4828   else
4829     {
4830     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4831     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4832     }
4833   }
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4835 JUMPTO(SLJIT_JUMP, loop);
4836 
4837 JUMPHERE(found);
4838 if (foundoc)
4839   JUMPHERE(foundoc);
4840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4841 JUMPHERE(alreadyfound);
4842 JUMPHERE(toolong);
4843 return notfound;
4844 }
4845 
do_revertframes(compiler_common * common)4846 static void do_revertframes(compiler_common *common)
4847 {
4848 DEFINE_COMPILER;
4849 struct sljit_jump *jump;
4850 struct sljit_label *mainloop;
4851 
4852 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4853 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4854 GET_LOCAL_BASE(TMP1, 0, 0);
4855 
4856 /* Drop frames until we reach STACK_TOP. */
4857 mainloop = LABEL();
4858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4859 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4860 
4861 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4863 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4864 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4865 JUMPTO(SLJIT_JUMP, mainloop);
4866 
4867 JUMPHERE(jump);
4868 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4869 /* End of reverting values. */
4870 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4871 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4872 
4873 JUMPHERE(jump);
4874 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4875 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4876 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4877 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4878 JUMPTO(SLJIT_JUMP, mainloop);
4879 }
4880 
check_wordboundary(compiler_common * common)4881 static void check_wordboundary(compiler_common *common)
4882 {
4883 DEFINE_COMPILER;
4884 struct sljit_jump *skipread;
4885 jump_list *skipread_list = NULL;
4886 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4887 struct sljit_jump *jump;
4888 #endif
4889 
4890 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4891 
4892 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4893 /* Get type of the previous char, and put it to LOCALS1. */
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4897 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4898 skip_char_back(common);
4899 check_start_used_ptr(common);
4900 read_char(common);
4901 
4902 /* Testing char type. */
4903 #ifdef SUPPORT_UCP
4904 if (common->use_ucp)
4905   {
4906   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4907   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4908   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4909   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4910   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4911   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4912   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4913   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4914   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4915   JUMPHERE(jump);
4916   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4917   }
4918 else
4919 #endif
4920   {
4921 #ifndef COMPILE_PCRE8
4922   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4923 #elif defined SUPPORT_UTF
4924   /* Here LOCALS1 has already been zeroed. */
4925   jump = NULL;
4926   if (common->utf)
4927     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4928 #endif /* COMPILE_PCRE8 */
4929   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4930   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4931   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4932   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4933 #ifndef COMPILE_PCRE8
4934   JUMPHERE(jump);
4935 #elif defined SUPPORT_UTF
4936   if (jump != NULL)
4937     JUMPHERE(jump);
4938 #endif /* COMPILE_PCRE8 */
4939   }
4940 JUMPHERE(skipread);
4941 
4942 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4943 check_str_end(common, &skipread_list);
4944 peek_char(common, READ_CHAR_MAX);
4945 
4946 /* Testing char type. This is a code duplication. */
4947 #ifdef SUPPORT_UCP
4948 if (common->use_ucp)
4949   {
4950   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4951   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4952   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4953   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4954   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4955   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4956   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4957   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4958   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4959   JUMPHERE(jump);
4960   }
4961 else
4962 #endif
4963   {
4964 #ifndef COMPILE_PCRE8
4965   /* TMP2 may be destroyed by peek_char. */
4966   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4967   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4968 #elif defined SUPPORT_UTF
4969   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4970   jump = NULL;
4971   if (common->utf)
4972     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4973 #endif
4974   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4975   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4976   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977 #ifndef COMPILE_PCRE8
4978   JUMPHERE(jump);
4979 #elif defined SUPPORT_UTF
4980   if (jump != NULL)
4981     JUMPHERE(jump);
4982 #endif /* COMPILE_PCRE8 */
4983   }
4984 set_jumps(skipread_list, LABEL());
4985 
4986 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4987 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4988 }
4989 
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4990 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4991 {
4992 /* May destroy TMP1. */
4993 DEFINE_COMPILER;
4994 int ranges[MAX_RANGE_SIZE];
4995 sljit_u8 bit, cbit, all;
4996 int i, byte, length = 0;
4997 
4998 bit = bits[0] & 0x1;
4999 /* All bits will be zero or one (since bit is zero or one). */
5000 all = -bit;
5001 
5002 for (i = 0; i < 256; )
5003   {
5004   byte = i >> 3;
5005   if ((i & 0x7) == 0 && bits[byte] == all)
5006     i += 8;
5007   else
5008     {
5009     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5010     if (cbit != bit)
5011       {
5012       if (length >= MAX_RANGE_SIZE)
5013         return FALSE;
5014       ranges[length] = i;
5015       length++;
5016       bit = cbit;
5017       all = -cbit;
5018       }
5019     i++;
5020     }
5021   }
5022 
5023 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5024   {
5025   if (length >= MAX_RANGE_SIZE)
5026     return FALSE;
5027   ranges[length] = 256;
5028   length++;
5029   }
5030 
5031 if (length < 0 || length > 4)
5032   return FALSE;
5033 
5034 bit = bits[0] & 0x1;
5035 if (invert) bit ^= 0x1;
5036 
5037 /* No character is accepted. */
5038 if (length == 0 && bit == 0)
5039   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5040 
5041 switch(length)
5042   {
5043   case 0:
5044   /* When bit != 0, all characters are accepted. */
5045   return TRUE;
5046 
5047   case 1:
5048   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5049   return TRUE;
5050 
5051   case 2:
5052   if (ranges[0] + 1 != ranges[1])
5053     {
5054     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5055     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5056     }
5057   else
5058     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5059   return TRUE;
5060 
5061   case 3:
5062   if (bit != 0)
5063     {
5064     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5065     if (ranges[0] + 1 != ranges[1])
5066       {
5067       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5068       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5069       }
5070     else
5071       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5072     return TRUE;
5073     }
5074 
5075   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5076   if (ranges[1] + 1 != ranges[2])
5077     {
5078     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5079     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5080     }
5081   else
5082     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5083   return TRUE;
5084 
5085   case 4:
5086   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5087       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5088       && (ranges[1] & (ranges[2] - ranges[0])) == 0
5089       && is_powerof2(ranges[2] - ranges[0]))
5090     {
5091     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5092     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5093     if (ranges[2] + 1 != ranges[3])
5094       {
5095       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5096       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5097       }
5098     else
5099       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5100     return TRUE;
5101     }
5102 
5103   if (bit != 0)
5104     {
5105     i = 0;
5106     if (ranges[0] + 1 != ranges[1])
5107       {
5108       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5109       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5110       i = ranges[0];
5111       }
5112     else
5113       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5114 
5115     if (ranges[2] + 1 != ranges[3])
5116       {
5117       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5118       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5119       }
5120     else
5121       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5122     return TRUE;
5123     }
5124 
5125   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5126   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5127   if (ranges[1] + 1 != ranges[2])
5128     {
5129     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5130     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5131     }
5132   else
5133     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5134   return TRUE;
5135 
5136   default:
5137   SLJIT_UNREACHABLE();
5138   return FALSE;
5139   }
5140 }
5141 
check_anynewline(compiler_common * common)5142 static void check_anynewline(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146 
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148 
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156   {
5157 #endif
5158   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162   }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5167 }
5168 
check_hspace(compiler_common * common)5169 static void check_hspace(compiler_common *common)
5170 {
5171 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5172 DEFINE_COMPILER;
5173 
5174 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5175 
5176 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5178 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5180 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5181 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5182 #ifdef COMPILE_PCRE8
5183 if (common->utf)
5184   {
5185 #endif
5186   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5187   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5188   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5189   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5190   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5191   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5192   OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5193   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5194   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5195   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5196   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5197   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5198   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5199 #ifdef COMPILE_PCRE8
5200   }
5201 #endif
5202 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5204 
5205 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5206 }
5207 
check_vspace(compiler_common * common)5208 static void check_vspace(compiler_common *common)
5209 {
5210 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5211 DEFINE_COMPILER;
5212 
5213 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5214 
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5216 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5218 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5219 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5220 #ifdef COMPILE_PCRE8
5221 if (common->utf)
5222   {
5223 #endif
5224   OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5225   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5226   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5227 #ifdef COMPILE_PCRE8
5228   }
5229 #endif
5230 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5231 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5232 
5233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5234 }
5235 
do_casefulcmp(compiler_common * common)5236 static void do_casefulcmp(compiler_common *common)
5237 {
5238 DEFINE_COMPILER;
5239 struct sljit_jump *jump;
5240 struct sljit_label *label;
5241 int char1_reg;
5242 int char2_reg;
5243 
5244 if (sljit_get_register_index(TMP3) < 0)
5245   {
5246   char1_reg = STR_END;
5247   char2_reg = STACK_TOP;
5248   }
5249 else
5250   {
5251   char1_reg = TMP3;
5252   char2_reg = RETURN_ADDR;
5253   }
5254 
5255 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5256 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5257 
5258 if (char1_reg == STR_END)
5259   {
5260   OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5261   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5262   }
5263 
5264 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5265   {
5266   label = LABEL();
5267   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5268   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5269   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5270   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5271   JUMPTO(SLJIT_NOT_ZERO, label);
5272 
5273   JUMPHERE(jump);
5274   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5275   }
5276 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5277   {
5278   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5279   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5280 
5281   label = LABEL();
5282   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5283   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5284   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5285   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5286   JUMPTO(SLJIT_NOT_ZERO, label);
5287 
5288   JUMPHERE(jump);
5289   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5290   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291   }
5292 else
5293   {
5294   label = LABEL();
5295   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5296   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5297   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5298   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5299   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5300   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5301   JUMPTO(SLJIT_NOT_ZERO, label);
5302 
5303   JUMPHERE(jump);
5304   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5305   }
5306 
5307 if (char1_reg == STR_END)
5308   {
5309   OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5310   OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5311   }
5312 
5313 sljit_emit_fast_return(compiler, TMP1, 0);
5314 }
5315 
do_caselesscmp(compiler_common * common)5316 static void do_caselesscmp(compiler_common *common)
5317 {
5318 DEFINE_COMPILER;
5319 struct sljit_jump *jump;
5320 struct sljit_label *label;
5321 int char1_reg = STR_END;
5322 int char2_reg;
5323 int lcc_table;
5324 int opt_type = 0;
5325 
5326 if (sljit_get_register_index(TMP3) < 0)
5327   {
5328   char2_reg = STACK_TOP;
5329   lcc_table = STACK_LIMIT;
5330   }
5331 else
5332   {
5333   char2_reg = RETURN_ADDR;
5334   lcc_table = TMP3;
5335   }
5336 
5337 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5338   opt_type = 1;
5339 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5340   opt_type = 2;
5341 
5342 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5343 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5344 
5345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5346 
5347 if (char2_reg == STACK_TOP)
5348   {
5349   OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5350   OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5351   }
5352 
5353 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5354 
5355 if (opt_type == 1)
5356   {
5357   label = LABEL();
5358   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5359   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5360   }
5361 else if (opt_type == 2)
5362   {
5363   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5364   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5365 
5366   label = LABEL();
5367   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5368   sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5369   }
5370 else
5371   {
5372   label = LABEL();
5373   OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5374   OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5375   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5376   }
5377 
5378 #ifndef COMPILE_PCRE8
5379 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5380 #endif
5381 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5382 #ifndef COMPILE_PCRE8
5383 JUMPHERE(jump);
5384 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5385 #endif
5386 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5387 #ifndef COMPILE_PCRE8
5388 JUMPHERE(jump);
5389 #endif
5390 
5391 if (opt_type == 0)
5392   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5393 
5394 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5395 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5396 JUMPTO(SLJIT_NOT_ZERO, label);
5397 
5398 JUMPHERE(jump);
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5400 
5401 if (opt_type == 2)
5402   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5403 
5404 if (char2_reg == STACK_TOP)
5405   {
5406   OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5407   OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5408   }
5409 
5410 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5411 sljit_emit_fast_return(compiler, TMP1, 0);
5412 }
5413 
5414 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5415 
do_utf_caselesscmp(pcre_uchar * src1,pcre_uchar * src2,pcre_uchar * end1,pcre_uchar * end2)5416 static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5417 {
5418 /* This function would be ineffective to do in JIT level. */
5419 sljit_u32 c1, c2;
5420 const ucd_record *ur;
5421 const sljit_u32 *pp;
5422 
5423 while (src1 < end1)
5424   {
5425   if (src2 >= end2)
5426     return (pcre_uchar*)1;
5427   GETCHARINC(c1, src1);
5428   GETCHARINC(c2, src2);
5429   ur = GET_UCD(c2);
5430   if (c1 != c2 && c1 != c2 + ur->other_case)
5431     {
5432     pp = PRIV(ucd_caseless_sets) + ur->caseset;
5433     for (;;)
5434       {
5435       if (c1 < *pp) return NULL;
5436       if (c1 == *pp++) break;
5437       }
5438     }
5439   }
5440 return src2;
5441 }
5442 
5443 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5444 
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)5445 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5446     compare_context *context, jump_list **backtracks)
5447 {
5448 DEFINE_COMPILER;
5449 unsigned int othercasebit = 0;
5450 pcre_uchar *othercasechar = NULL;
5451 #ifdef SUPPORT_UTF
5452 int utflength;
5453 #endif
5454 
5455 if (caseless && char_has_othercase(common, cc))
5456   {
5457   othercasebit = char_get_othercase_bit(common, cc);
5458   SLJIT_ASSERT(othercasebit);
5459   /* Extracting bit difference info. */
5460 #if defined COMPILE_PCRE8
5461   othercasechar = cc + (othercasebit >> 8);
5462   othercasebit &= 0xff;
5463 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5464   /* Note that this code only handles characters in the BMP. If there
5465   ever are characters outside the BMP whose othercase differs in only one
5466   bit from itself (there currently are none), this code will need to be
5467   revised for COMPILE_PCRE32. */
5468   othercasechar = cc + (othercasebit >> 9);
5469   if ((othercasebit & 0x100) != 0)
5470     othercasebit = (othercasebit & 0xff) << 8;
5471   else
5472     othercasebit &= 0xff;
5473 #endif /* COMPILE_PCRE[8|16|32] */
5474   }
5475 
5476 if (context->sourcereg == -1)
5477   {
5478 #if defined COMPILE_PCRE8
5479 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5480   if (context->length >= 4)
5481     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5482   else if (context->length >= 2)
5483     OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5484   else
5485 #endif
5486     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5487 #elif defined COMPILE_PCRE16
5488 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5489   if (context->length >= 4)
5490     OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5491   else
5492 #endif
5493     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5494 #elif defined COMPILE_PCRE32
5495   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5496 #endif /* COMPILE_PCRE[8|16|32] */
5497   context->sourcereg = TMP2;
5498   }
5499 
5500 #ifdef SUPPORT_UTF
5501 utflength = 1;
5502 if (common->utf && HAS_EXTRALEN(*cc))
5503   utflength += GET_EXTRALEN(*cc);
5504 
5505 do
5506   {
5507 #endif
5508 
5509   context->length -= IN_UCHARS(1);
5510 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5511 
5512   /* Unaligned read is supported. */
5513   if (othercasebit != 0 && othercasechar == cc)
5514     {
5515     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5516     context->oc.asuchars[context->ucharptr] = othercasebit;
5517     }
5518   else
5519     {
5520     context->c.asuchars[context->ucharptr] = *cc;
5521     context->oc.asuchars[context->ucharptr] = 0;
5522     }
5523   context->ucharptr++;
5524 
5525 #if defined COMPILE_PCRE8
5526   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5527 #else
5528   if (context->ucharptr >= 2 || context->length == 0)
5529 #endif
5530     {
5531     if (context->length >= 4)
5532       OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5533     else if (context->length >= 2)
5534       OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5535 #if defined COMPILE_PCRE8
5536     else if (context->length >= 1)
5537       OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5538 #endif /* COMPILE_PCRE8 */
5539     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5540 
5541     switch(context->ucharptr)
5542       {
5543       case 4 / sizeof(pcre_uchar):
5544       if (context->oc.asint != 0)
5545         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5546       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5547       break;
5548 
5549       case 2 / sizeof(pcre_uchar):
5550       if (context->oc.asushort != 0)
5551         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5552       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5553       break;
5554 
5555 #ifdef COMPILE_PCRE8
5556       case 1:
5557       if (context->oc.asbyte != 0)
5558         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5559       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5560       break;
5561 #endif
5562 
5563       default:
5564       SLJIT_UNREACHABLE();
5565       break;
5566       }
5567     context->ucharptr = 0;
5568     }
5569 
5570 #else
5571 
5572   /* Unaligned read is unsupported or in 32 bit mode. */
5573   if (context->length >= 1)
5574     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5575 
5576   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5577 
5578   if (othercasebit != 0 && othercasechar == cc)
5579     {
5580     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5581     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5582     }
5583   else
5584     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5585 
5586 #endif
5587 
5588   cc++;
5589 #ifdef SUPPORT_UTF
5590   utflength--;
5591   }
5592 while (utflength > 0);
5593 #endif
5594 
5595 return cc;
5596 }
5597 
5598 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5599 
5600 #define SET_TYPE_OFFSET(value) \
5601   if ((value) != typeoffset) \
5602     { \
5603     if ((value) < typeoffset) \
5604       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5605     else \
5606       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5607     } \
5608   typeoffset = (value);
5609 
5610 #define SET_CHAR_OFFSET(value) \
5611   if ((value) != charoffset) \
5612     { \
5613     if ((value) < charoffset) \
5614       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5615     else \
5616       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5617     } \
5618   charoffset = (value);
5619 
5620 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5621 
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5622 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5623 {
5624 DEFINE_COMPILER;
5625 jump_list *found = NULL;
5626 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5627 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5628 struct sljit_jump *jump = NULL;
5629 pcre_uchar *ccbegin;
5630 int compares, invertcmp, numberofcmps;
5631 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5632 BOOL utf = common->utf;
5633 #endif
5634 
5635 #ifdef SUPPORT_UCP
5636 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5637 BOOL charsaved = FALSE;
5638 int typereg = TMP1;
5639 const sljit_u32 *other_cases;
5640 sljit_uw typeoffset;
5641 #endif
5642 
5643 /* Scanning the necessary info. */
5644 cc++;
5645 ccbegin = cc;
5646 compares = 0;
5647 if (cc[-1] & XCL_MAP)
5648   {
5649   min = 0;
5650   cc += 32 / sizeof(pcre_uchar);
5651   }
5652 
5653 while (*cc != XCL_END)
5654   {
5655   compares++;
5656   if (*cc == XCL_SINGLE)
5657     {
5658     cc ++;
5659     GETCHARINCTEST(c, cc);
5660     if (c > max) max = c;
5661     if (c < min) min = c;
5662 #ifdef SUPPORT_UCP
5663     needschar = TRUE;
5664 #endif
5665     }
5666   else if (*cc == XCL_RANGE)
5667     {
5668     cc ++;
5669     GETCHARINCTEST(c, cc);
5670     if (c < min) min = c;
5671     GETCHARINCTEST(c, cc);
5672     if (c > max) max = c;
5673 #ifdef SUPPORT_UCP
5674     needschar = TRUE;
5675 #endif
5676     }
5677 #ifdef SUPPORT_UCP
5678   else
5679     {
5680     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5681     cc++;
5682     if (*cc == PT_CLIST)
5683       {
5684       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5685       while (*other_cases != NOTACHAR)
5686         {
5687         if (*other_cases > max) max = *other_cases;
5688         if (*other_cases < min) min = *other_cases;
5689         other_cases++;
5690         }
5691       }
5692     else
5693       {
5694       max = READ_CHAR_MAX;
5695       min = 0;
5696       }
5697 
5698     switch(*cc)
5699       {
5700       case PT_ANY:
5701       /* Any either accepts everything or ignored. */
5702       if (cc[-1] == XCL_PROP)
5703         {
5704         compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5705         if (list == backtracks)
5706           add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5707         return;
5708         }
5709       break;
5710 
5711       case PT_LAMP:
5712       case PT_GC:
5713       case PT_PC:
5714       case PT_ALNUM:
5715       needstype = TRUE;
5716       break;
5717 
5718       case PT_SC:
5719       needsscript = TRUE;
5720       break;
5721 
5722       case PT_SPACE:
5723       case PT_PXSPACE:
5724       case PT_WORD:
5725       case PT_PXGRAPH:
5726       case PT_PXPRINT:
5727       case PT_PXPUNCT:
5728       needstype = TRUE;
5729       needschar = TRUE;
5730       break;
5731 
5732       case PT_CLIST:
5733       case PT_UCNC:
5734       needschar = TRUE;
5735       break;
5736 
5737       default:
5738       SLJIT_UNREACHABLE();
5739       break;
5740       }
5741     cc += 2;
5742     }
5743 #endif
5744   }
5745 SLJIT_ASSERT(compares > 0);
5746 
5747 /* We are not necessary in utf mode even in 8 bit mode. */
5748 cc = ccbegin;
5749 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5750 
5751 if ((cc[-1] & XCL_HASPROP) == 0)
5752   {
5753   if ((cc[-1] & XCL_MAP) != 0)
5754     {
5755     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5756     if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5757       {
5758       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762       OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5764       }
5765 
5766     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5767     JUMPHERE(jump);
5768 
5769     cc += 32 / sizeof(pcre_uchar);
5770     }
5771   else
5772     {
5773     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5774     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5775     }
5776   }
5777 else if ((cc[-1] & XCL_MAP) != 0)
5778   {
5779   OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5780 #ifdef SUPPORT_UCP
5781   charsaved = TRUE;
5782 #endif
5783   if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5784     {
5785 #ifdef COMPILE_PCRE8
5786     jump = NULL;
5787     if (common->utf)
5788 #endif
5789       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5790 
5791     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5792     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5793     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5794     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5795     OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5796     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5797 
5798 #ifdef COMPILE_PCRE8
5799     if (common->utf)
5800 #endif
5801       JUMPHERE(jump);
5802     }
5803 
5804   OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5805   cc += 32 / sizeof(pcre_uchar);
5806   }
5807 
5808 #ifdef SUPPORT_UCP
5809 if (needstype || needsscript)
5810   {
5811   if (needschar && !charsaved)
5812     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5813 
5814 #ifdef COMPILE_PCRE32
5815   if (!common->utf)
5816     {
5817     jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5818     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5819     JUMPHERE(jump);
5820     }
5821 #endif
5822 
5823   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5824   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5825   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5826   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5827   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5828   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5829   OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5830 
5831   /* Before anything else, we deal with scripts. */
5832   if (needsscript)
5833     {
5834     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5835     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5836 
5837     ccbegin = cc;
5838 
5839     while (*cc != XCL_END)
5840       {
5841       if (*cc == XCL_SINGLE)
5842         {
5843         cc ++;
5844         GETCHARINCTEST(c, cc);
5845         }
5846       else if (*cc == XCL_RANGE)
5847         {
5848         cc ++;
5849         GETCHARINCTEST(c, cc);
5850         GETCHARINCTEST(c, cc);
5851         }
5852       else
5853         {
5854         SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5855         cc++;
5856         if (*cc == PT_SC)
5857           {
5858           compares--;
5859           invertcmp = (compares == 0 && list != backtracks);
5860           if (cc[-1] == XCL_NOTPROP)
5861             invertcmp ^= 0x1;
5862           jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5863           add_jump(compiler, compares > 0 ? list : backtracks, jump);
5864           }
5865         cc += 2;
5866         }
5867       }
5868 
5869     cc = ccbegin;
5870     }
5871 
5872   if (needschar)
5873     {
5874     OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5875     }
5876 
5877   if (needstype)
5878     {
5879     if (!needschar)
5880       {
5881       OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5882       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5883       }
5884     else
5885       {
5886       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5887       OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5888       typereg = RETURN_ADDR;
5889       }
5890     }
5891   }
5892 #endif
5893 
5894 /* Generating code. */
5895 charoffset = 0;
5896 numberofcmps = 0;
5897 #ifdef SUPPORT_UCP
5898 typeoffset = 0;
5899 #endif
5900 
5901 while (*cc != XCL_END)
5902   {
5903   compares--;
5904   invertcmp = (compares == 0 && list != backtracks);
5905   jump = NULL;
5906 
5907   if (*cc == XCL_SINGLE)
5908     {
5909     cc ++;
5910     GETCHARINCTEST(c, cc);
5911 
5912     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5913       {
5914       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5915       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5916       numberofcmps++;
5917       }
5918     else if (numberofcmps > 0)
5919       {
5920       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5921       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5922       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5923       numberofcmps = 0;
5924       }
5925     else
5926       {
5927       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5928       numberofcmps = 0;
5929       }
5930     }
5931   else if (*cc == XCL_RANGE)
5932     {
5933     cc ++;
5934     GETCHARINCTEST(c, cc);
5935     SET_CHAR_OFFSET(c);
5936     GETCHARINCTEST(c, cc);
5937 
5938     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5939       {
5940       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5941       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5942       numberofcmps++;
5943       }
5944     else if (numberofcmps > 0)
5945       {
5946       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5947       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5948       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5949       numberofcmps = 0;
5950       }
5951     else
5952       {
5953       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5954       numberofcmps = 0;
5955       }
5956     }
5957 #ifdef SUPPORT_UCP
5958   else
5959     {
5960     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5961     if (*cc == XCL_NOTPROP)
5962       invertcmp ^= 0x1;
5963     cc++;
5964     switch(*cc)
5965       {
5966       case PT_ANY:
5967       if (!invertcmp)
5968         jump = JUMP(SLJIT_JUMP);
5969       break;
5970 
5971       case PT_LAMP:
5972       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5973       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5974       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5975       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5976       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5977       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5978       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5979       break;
5980 
5981       case PT_GC:
5982       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5983       SET_TYPE_OFFSET(c);
5984       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5985       break;
5986 
5987       case PT_PC:
5988       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5989       break;
5990 
5991       case PT_SC:
5992       compares++;
5993       /* Do nothing. */
5994       break;
5995 
5996       case PT_SPACE:
5997       case PT_PXSPACE:
5998       SET_CHAR_OFFSET(9);
5999       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6000       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6001 
6002       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6003       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6004 
6005       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6006       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6007 
6008       SET_TYPE_OFFSET(ucp_Zl);
6009       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6010       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6011       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6012       break;
6013 
6014       case PT_WORD:
6015       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6016       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6017       /* Fall through. */
6018 
6019       case PT_ALNUM:
6020       SET_TYPE_OFFSET(ucp_Ll);
6021       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6022       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6023       SET_TYPE_OFFSET(ucp_Nd);
6024       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6025       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6026       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6027       break;
6028 
6029       case PT_CLIST:
6030       other_cases = PRIV(ucd_caseless_sets) + cc[1];
6031 
6032       /* At least three characters are required.
6033          Otherwise this case would be handled by the normal code path. */
6034       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6035       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6036 
6037       /* Optimizing character pairs, if their difference is power of 2. */
6038       if (is_powerof2(other_cases[1] ^ other_cases[0]))
6039         {
6040         if (charoffset == 0)
6041           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6042         else
6043           {
6044           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6045           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6046           }
6047         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6048         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6049         other_cases += 2;
6050         }
6051       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6052         {
6053         if (charoffset == 0)
6054           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6055         else
6056           {
6057           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6058           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6059           }
6060         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6061         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6062 
6063         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6064         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6065 
6066         other_cases += 3;
6067         }
6068       else
6069         {
6070         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6071         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6072         }
6073 
6074       while (*other_cases != NOTACHAR)
6075         {
6076         OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6077         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6078         }
6079       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6080       break;
6081 
6082       case PT_UCNC:
6083       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6084       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6085       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6086       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6087       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6088       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6089 
6090       SET_CHAR_OFFSET(0xa0);
6091       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6092       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6093       SET_CHAR_OFFSET(0);
6094       OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6095       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6096       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6097       break;
6098 
6099       case PT_PXGRAPH:
6100       /* C and Z groups are the farthest two groups. */
6101       SET_TYPE_OFFSET(ucp_Ll);
6102       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6103       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6104 
6105       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6106 
6107       /* In case of ucp_Cf, we overwrite the result. */
6108       SET_CHAR_OFFSET(0x2066);
6109       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6110       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6111 
6112       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6113       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6114 
6115       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6116       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6117 
6118       JUMPHERE(jump);
6119       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6120       break;
6121 
6122       case PT_PXPRINT:
6123       /* C and Z groups are the farthest two groups. */
6124       SET_TYPE_OFFSET(ucp_Ll);
6125       OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6126       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6127 
6128       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6129       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6130 
6131       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6132 
6133       /* In case of ucp_Cf, we overwrite the result. */
6134       SET_CHAR_OFFSET(0x2066);
6135       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6136       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6137 
6138       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6139       OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6140 
6141       JUMPHERE(jump);
6142       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6143       break;
6144 
6145       case PT_PXPUNCT:
6146       SET_TYPE_OFFSET(ucp_Sc);
6147       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6148       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6149 
6150       SET_CHAR_OFFSET(0);
6151       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6152       OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6153 
6154       SET_TYPE_OFFSET(ucp_Pc);
6155       OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6156       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6157       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6158       break;
6159 
6160       default:
6161       SLJIT_UNREACHABLE();
6162       break;
6163       }
6164     cc += 2;
6165     }
6166 #endif
6167 
6168   if (jump != NULL)
6169     add_jump(compiler, compares > 0 ? list : backtracks, jump);
6170   }
6171 
6172 if (found != NULL)
6173   set_jumps(found, LABEL());
6174 }
6175 
6176 #undef SET_TYPE_OFFSET
6177 #undef SET_CHAR_OFFSET
6178 
6179 #endif
6180 
compile_simple_assertion_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)6181 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6182 {
6183 DEFINE_COMPILER;
6184 int length;
6185 struct sljit_jump *jump[4];
6186 #ifdef SUPPORT_UTF
6187 struct sljit_label *label;
6188 #endif /* SUPPORT_UTF */
6189 
6190 switch(type)
6191   {
6192   case OP_SOD:
6193   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6194   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6195   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6196   return cc;
6197 
6198   case OP_SOM:
6199   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6200   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6201   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6202   return cc;
6203 
6204   case OP_NOT_WORD_BOUNDARY:
6205   case OP_WORD_BOUNDARY:
6206   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6207   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6208   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6209   return cc;
6210 
6211   case OP_EODN:
6212   /* Requires rather complex checks. */
6213   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6214   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6215     {
6216     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6217     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6218     if (common->mode == JIT_COMPILE)
6219       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6220     else
6221       {
6222       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6223       OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6224       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6225       OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6226       OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6227       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6228       check_partial(common, TRUE);
6229       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6230       JUMPHERE(jump[1]);
6231       }
6232     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6233     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6234     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6235     }
6236   else if (common->nltype == NLTYPE_FIXED)
6237     {
6238     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6239     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6240     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6241     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6242     }
6243   else
6244     {
6245     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6246     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6247     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6248     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6249     jump[2] = JUMP(SLJIT_GREATER);
6250     add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6251     /* Equal. */
6252     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6253     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6254     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6255 
6256     JUMPHERE(jump[1]);
6257     if (common->nltype == NLTYPE_ANYCRLF)
6258       {
6259       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6260       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6261       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6262       }
6263     else
6264       {
6265       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6266       read_char_range(common, common->nlmin, common->nlmax, TRUE);
6267       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6268       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6269       sljit_set_current_flags(compiler, SLJIT_SET_Z);
6270       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6271       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6272       }
6273     JUMPHERE(jump[2]);
6274     JUMPHERE(jump[3]);
6275     }
6276   JUMPHERE(jump[0]);
6277   check_partial(common, FALSE);
6278   return cc;
6279 
6280   case OP_EOD:
6281   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6282   check_partial(common, FALSE);
6283   return cc;
6284 
6285   case OP_DOLL:
6286   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6287   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6288   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6289 
6290   if (!common->endonly)
6291     compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6292   else
6293     {
6294     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6295     check_partial(common, FALSE);
6296     }
6297   return cc;
6298 
6299   case OP_DOLLM:
6300   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6301   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6302   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6303   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6304   check_partial(common, FALSE);
6305   jump[0] = JUMP(SLJIT_JUMP);
6306   JUMPHERE(jump[1]);
6307 
6308   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6309     {
6310     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6311     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6312     if (common->mode == JIT_COMPILE)
6313       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6314     else
6315       {
6316       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6317       /* STR_PTR = STR_END - IN_UCHARS(1) */
6318       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6319       check_partial(common, TRUE);
6320       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6321       JUMPHERE(jump[1]);
6322       }
6323 
6324     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6325     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6326     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6327     }
6328   else
6329     {
6330     peek_char(common, common->nlmax);
6331     check_newlinechar(common, common->nltype, backtracks, FALSE);
6332     }
6333   JUMPHERE(jump[0]);
6334   return cc;
6335 
6336   case OP_CIRC:
6337   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6338   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6339   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6340   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6341   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6342   return cc;
6343 
6344   case OP_CIRCM:
6345   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6346   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6347   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6348   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6349   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6350   jump[0] = JUMP(SLJIT_JUMP);
6351   JUMPHERE(jump[1]);
6352 
6353   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6354   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6355     {
6356     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6357     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6358     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6359     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6361     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6362     }
6363   else
6364     {
6365     skip_char_back(common);
6366     read_char_range(common, common->nlmin, common->nlmax, TRUE);
6367     check_newlinechar(common, common->nltype, backtracks, FALSE);
6368     }
6369   JUMPHERE(jump[0]);
6370   return cc;
6371 
6372   case OP_REVERSE:
6373   length = GET(cc, 0);
6374   if (length == 0)
6375     return cc + LINK_SIZE;
6376   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6377 #ifdef SUPPORT_UTF
6378   if (common->utf)
6379     {
6380     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6381     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6382     label = LABEL();
6383     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6384     skip_char_back(common);
6385     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6386     JUMPTO(SLJIT_NOT_ZERO, label);
6387     }
6388   else
6389 #endif
6390     {
6391     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6392     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6393     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6394     }
6395   check_start_used_ptr(common);
6396   return cc + LINK_SIZE;
6397   }
6398 SLJIT_UNREACHABLE();
6399 return cc;
6400 }
6401 
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks,BOOL check_str_ptr)6402 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6403 {
6404 DEFINE_COMPILER;
6405 int length;
6406 unsigned int c, oc, bit;
6407 compare_context context;
6408 struct sljit_jump *jump[3];
6409 jump_list *end_list;
6410 #ifdef SUPPORT_UTF
6411 struct sljit_label *label;
6412 #ifdef SUPPORT_UCP
6413 pcre_uchar propdata[5];
6414 #endif
6415 #endif /* SUPPORT_UTF */
6416 
6417 switch(type)
6418   {
6419   case OP_NOT_DIGIT:
6420   case OP_DIGIT:
6421   /* Digits are usually 0-9, so it is worth to optimize them. */
6422   if (check_str_ptr)
6423     detect_partial_match(common, backtracks);
6424 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6425   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6426     read_char7_type(common, type == OP_NOT_DIGIT);
6427   else
6428 #endif
6429     read_char8_type(common, type == OP_NOT_DIGIT);
6430     /* Flip the starting bit in the negative case. */
6431   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6432   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6433   return cc;
6434 
6435   case OP_NOT_WHITESPACE:
6436   case OP_WHITESPACE:
6437   if (check_str_ptr)
6438     detect_partial_match(common, backtracks);
6439 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6440   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6441     read_char7_type(common, type == OP_NOT_WHITESPACE);
6442   else
6443 #endif
6444     read_char8_type(common, type == OP_NOT_WHITESPACE);
6445   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6446   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6447   return cc;
6448 
6449   case OP_NOT_WORDCHAR:
6450   case OP_WORDCHAR:
6451   if (check_str_ptr)
6452     detect_partial_match(common, backtracks);
6453 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6454   if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6455     read_char7_type(common, type == OP_NOT_WORDCHAR);
6456   else
6457 #endif
6458     read_char8_type(common, type == OP_NOT_WORDCHAR);
6459   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6460   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6461   return cc;
6462 
6463   case OP_ANY:
6464   if (check_str_ptr)
6465     detect_partial_match(common, backtracks);
6466   read_char_range(common, common->nlmin, common->nlmax, TRUE);
6467   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6468     {
6469     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6470     end_list = NULL;
6471     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6472       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6473     else
6474       check_str_end(common, &end_list);
6475 
6476     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6477     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6478     set_jumps(end_list, LABEL());
6479     JUMPHERE(jump[0]);
6480     }
6481   else
6482     check_newlinechar(common, common->nltype, backtracks, TRUE);
6483   return cc;
6484 
6485   case OP_ALLANY:
6486   if (check_str_ptr)
6487     detect_partial_match(common, backtracks);
6488 #ifdef SUPPORT_UTF
6489   if (common->utf)
6490     {
6491     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6492     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6493 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6494 #if defined COMPILE_PCRE8
6495     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6496     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6497     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6498 #elif defined COMPILE_PCRE16
6499     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6500     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6501     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6502     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6503     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6504     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6505 #endif
6506     JUMPHERE(jump[0]);
6507 #endif /* COMPILE_PCRE[8|16] */
6508     return cc;
6509     }
6510 #endif
6511   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6512   return cc;
6513 
6514   case OP_ANYBYTE:
6515   if (check_str_ptr)
6516     detect_partial_match(common, backtracks);
6517   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6518   return cc;
6519 
6520 #ifdef SUPPORT_UTF
6521 #ifdef SUPPORT_UCP
6522   case OP_NOTPROP:
6523   case OP_PROP:
6524   propdata[0] = XCL_HASPROP;
6525   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6526   propdata[2] = cc[0];
6527   propdata[3] = cc[1];
6528   propdata[4] = XCL_END;
6529   if (check_str_ptr)
6530     detect_partial_match(common, backtracks);
6531   compile_xclass_matchingpath(common, propdata, backtracks);
6532   return cc + 2;
6533 #endif
6534 #endif
6535 
6536   case OP_ANYNL:
6537   if (check_str_ptr)
6538     detect_partial_match(common, backtracks);
6539   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6540   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6541   /* We don't need to handle soft partial matching case. */
6542   end_list = NULL;
6543   if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6544     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6545   else
6546     check_str_end(common, &end_list);
6547   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6548   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6549   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6550   jump[2] = JUMP(SLJIT_JUMP);
6551   JUMPHERE(jump[0]);
6552   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6553   set_jumps(end_list, LABEL());
6554   JUMPHERE(jump[1]);
6555   JUMPHERE(jump[2]);
6556   return cc;
6557 
6558   case OP_NOT_HSPACE:
6559   case OP_HSPACE:
6560   if (check_str_ptr)
6561     detect_partial_match(common, backtracks);
6562   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6563   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6564   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6565   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6566   return cc;
6567 
6568   case OP_NOT_VSPACE:
6569   case OP_VSPACE:
6570   if (check_str_ptr)
6571     detect_partial_match(common, backtracks);
6572   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6573   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6574   sljit_set_current_flags(compiler, SLJIT_SET_Z);
6575   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6576   return cc;
6577 
6578 #ifdef SUPPORT_UCP
6579   case OP_EXTUNI:
6580   if (check_str_ptr)
6581     detect_partial_match(common, backtracks);
6582   read_char(common);
6583   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6584   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6585   /* Optimize register allocation: use a real register. */
6586   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6587   OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6588 
6589   label = LABEL();
6590   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6591   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6592   read_char(common);
6593   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6594   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6595   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6596 
6597   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6598   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6599   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6600   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6601   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6602   JUMPTO(SLJIT_NOT_ZERO, label);
6603 
6604   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6605   JUMPHERE(jump[0]);
6606   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6607 
6608   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6609     {
6610     jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6611     /* Since we successfully read a char above, partial matching must occure. */
6612     check_partial(common, TRUE);
6613     JUMPHERE(jump[0]);
6614     }
6615   return cc;
6616 #endif
6617 
6618   case OP_CHAR:
6619   case OP_CHARI:
6620   length = 1;
6621 #ifdef SUPPORT_UTF
6622   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6623 #endif
6624   if (common->mode == JIT_COMPILE && check_str_ptr
6625       && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6626     {
6627     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6628     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6629 
6630     context.length = IN_UCHARS(length);
6631     context.sourcereg = -1;
6632 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6633     context.ucharptr = 0;
6634 #endif
6635     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6636     }
6637 
6638   if (check_str_ptr)
6639     detect_partial_match(common, backtracks);
6640 #ifdef SUPPORT_UTF
6641   if (common->utf)
6642     {
6643     GETCHAR(c, cc);
6644     }
6645   else
6646 #endif
6647     c = *cc;
6648 
6649   if (type == OP_CHAR || !char_has_othercase(common, cc))
6650     {
6651     read_char_range(common, c, c, FALSE);
6652     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6653     return cc + length;
6654     }
6655   oc = char_othercase(common, c);
6656   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6657   bit = c ^ oc;
6658   if (is_powerof2(bit))
6659     {
6660     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6661     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6662     return cc + length;
6663     }
6664   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6665   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6666   JUMPHERE(jump[0]);
6667   return cc + length;
6668 
6669   case OP_NOT:
6670   case OP_NOTI:
6671   if (check_str_ptr)
6672     detect_partial_match(common, backtracks);
6673   length = 1;
6674 #ifdef SUPPORT_UTF
6675   if (common->utf)
6676     {
6677 #ifdef COMPILE_PCRE8
6678     c = *cc;
6679     if (c < 128)
6680       {
6681       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6682       if (type == OP_NOT || !char_has_othercase(common, cc))
6683         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6684       else
6685         {
6686         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6687         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6688         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6689         }
6690       /* Skip the variable-length character. */
6691       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6692       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6693       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6694       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6695       JUMPHERE(jump[0]);
6696       return cc + 1;
6697       }
6698     else
6699 #endif /* COMPILE_PCRE8 */
6700       {
6701       GETCHARLEN(c, cc, length);
6702       }
6703     }
6704   else
6705 #endif /* SUPPORT_UTF */
6706     c = *cc;
6707 
6708   if (type == OP_NOT || !char_has_othercase(common, cc))
6709     {
6710     read_char_range(common, c, c, TRUE);
6711     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6712     }
6713   else
6714     {
6715     oc = char_othercase(common, c);
6716     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6717     bit = c ^ oc;
6718     if (is_powerof2(bit))
6719       {
6720       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6721       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6722       }
6723     else
6724       {
6725       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6726       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6727       }
6728     }
6729   return cc + length;
6730 
6731   case OP_CLASS:
6732   case OP_NCLASS:
6733   if (check_str_ptr)
6734     detect_partial_match(common, backtracks);
6735 
6736 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6737   bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6738   read_char_range(common, 0, bit, type == OP_NCLASS);
6739 #else
6740   read_char_range(common, 0, 255, type == OP_NCLASS);
6741 #endif
6742 
6743   if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6744     return cc + 32 / sizeof(pcre_uchar);
6745 
6746 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6747   jump[0] = NULL;
6748   if (common->utf)
6749     {
6750     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6751     if (type == OP_CLASS)
6752       {
6753       add_jump(compiler, backtracks, jump[0]);
6754       jump[0] = NULL;
6755       }
6756     }
6757 #elif !defined COMPILE_PCRE8
6758   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6759   if (type == OP_CLASS)
6760     {
6761     add_jump(compiler, backtracks, jump[0]);
6762     jump[0] = NULL;
6763     }
6764 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6765 
6766   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6767   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6768   OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6769   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6770   OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6771   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6772 
6773 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6774   if (jump[0] != NULL)
6775     JUMPHERE(jump[0]);
6776 #endif
6777   return cc + 32 / sizeof(pcre_uchar);
6778 
6779 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6780   case OP_XCLASS:
6781   if (check_str_ptr)
6782     detect_partial_match(common, backtracks);
6783   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6784   return cc + GET(cc, 0) - 1;
6785 #endif
6786   }
6787 SLJIT_UNREACHABLE();
6788 return cc;
6789 }
6790 
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)6791 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6792 {
6793 /* This function consumes at least one input character. */
6794 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6795 DEFINE_COMPILER;
6796 pcre_uchar *ccbegin = cc;
6797 compare_context context;
6798 int size;
6799 
6800 context.length = 0;
6801 do
6802   {
6803   if (cc >= ccend)
6804     break;
6805 
6806   if (*cc == OP_CHAR)
6807     {
6808     size = 1;
6809 #ifdef SUPPORT_UTF
6810     if (common->utf && HAS_EXTRALEN(cc[1]))
6811       size += GET_EXTRALEN(cc[1]);
6812 #endif
6813     }
6814   else if (*cc == OP_CHARI)
6815     {
6816     size = 1;
6817 #ifdef SUPPORT_UTF
6818     if (common->utf)
6819       {
6820       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6821         size = 0;
6822       else if (HAS_EXTRALEN(cc[1]))
6823         size += GET_EXTRALEN(cc[1]);
6824       }
6825     else
6826 #endif
6827     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6828       size = 0;
6829     }
6830   else
6831     size = 0;
6832 
6833   cc += 1 + size;
6834   context.length += IN_UCHARS(size);
6835   }
6836 while (size > 0 && context.length <= 128);
6837 
6838 cc = ccbegin;
6839 if (context.length > 0)
6840   {
6841   /* We have a fixed-length byte sequence. */
6842   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6843   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6844 
6845   context.sourcereg = -1;
6846 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6847   context.ucharptr = 0;
6848 #endif
6849   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6850   return cc;
6851   }
6852 
6853 /* A non-fixed length character will be checked if length == 0. */
6854 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6855 }
6856 
6857 /* Forward definitions. */
6858 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6859 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6860 
6861 #define PUSH_BACKTRACK(size, ccstart, error) \
6862   do \
6863     { \
6864     backtrack = sljit_alloc_memory(compiler, (size)); \
6865     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6866       return error; \
6867     memset(backtrack, 0, size); \
6868     backtrack->prev = parent->top; \
6869     backtrack->cc = (ccstart); \
6870     parent->top = backtrack; \
6871     } \
6872   while (0)
6873 
6874 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6875   do \
6876     { \
6877     backtrack = sljit_alloc_memory(compiler, (size)); \
6878     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6879       return; \
6880     memset(backtrack, 0, size); \
6881     backtrack->prev = parent->top; \
6882     backtrack->cc = (ccstart); \
6883     parent->top = backtrack; \
6884     } \
6885   while (0)
6886 
6887 #define BACKTRACK_AS(type) ((type *)backtrack)
6888 
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)6889 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6890 {
6891 /* The OVECTOR offset goes to TMP2. */
6892 DEFINE_COMPILER;
6893 int count = GET2(cc, 1 + IMM2_SIZE);
6894 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6895 unsigned int offset;
6896 jump_list *found = NULL;
6897 
6898 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6899 
6900 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6901 
6902 count--;
6903 while (count-- > 0)
6904   {
6905   offset = GET2(slot, 0) << 1;
6906   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6907   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6908   slot += common->name_entry_size;
6909   }
6910 
6911 offset = GET2(slot, 0) << 1;
6912 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6913 if (backtracks != NULL && !common->jscript_compat)
6914   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6915 
6916 set_jumps(found, LABEL());
6917 }
6918 
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6919 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6920 {
6921 DEFINE_COMPILER;
6922 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6923 int offset = 0;
6924 struct sljit_jump *jump = NULL;
6925 struct sljit_jump *partial;
6926 struct sljit_jump *nopartial;
6927 
6928 if (ref)
6929   {
6930   offset = GET2(cc, 1) << 1;
6931   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6932   /* OVECTOR(1) contains the "string begin - 1" constant. */
6933   if (withchecks && !common->jscript_compat)
6934     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6935   }
6936 else
6937   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6938 
6939 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6940 if (common->utf && *cc == OP_REFI)
6941   {
6942   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
6943   if (ref)
6944     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6945   else
6946     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6947 
6948   if (withchecks)
6949     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
6950 
6951   /* No free saved registers so save data on stack. */
6952   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6953   OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
6954   OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
6955   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6956   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6957   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6958 
6959   if (common->mode == JIT_COMPILE)
6960     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6961   else
6962     {
6963     OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6964 
6965     add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6966 
6967     nopartial = JUMP(SLJIT_NOT_EQUAL);
6968     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6969     check_partial(common, FALSE);
6970     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6971     JUMPHERE(nopartial);
6972     }
6973   }
6974 else
6975 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6976   {
6977   if (ref)
6978     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6979   else
6980     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6981 
6982   if (withchecks)
6983     jump = JUMP(SLJIT_ZERO);
6984 
6985   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6986   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6987   if (common->mode == JIT_COMPILE)
6988     add_jump(compiler, backtracks, partial);
6989 
6990   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6991   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6992 
6993   if (common->mode != JIT_COMPILE)
6994     {
6995     nopartial = JUMP(SLJIT_JUMP);
6996     JUMPHERE(partial);
6997     /* TMP2 -= STR_END - STR_PTR */
6998     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6999     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
7000     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7001     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7002     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7003     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7004     JUMPHERE(partial);
7005     check_partial(common, FALSE);
7006     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7007     JUMPHERE(nopartial);
7008     }
7009   }
7010 
7011 if (jump != NULL)
7012   {
7013   if (emptyfail)
7014     add_jump(compiler, backtracks, jump);
7015   else
7016     JUMPHERE(jump);
7017   }
7018 }
7019 
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7020 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7021 {
7022 DEFINE_COMPILER;
7023 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7024 backtrack_common *backtrack;
7025 pcre_uchar type;
7026 int offset = 0;
7027 struct sljit_label *label;
7028 struct sljit_jump *zerolength;
7029 struct sljit_jump *jump = NULL;
7030 pcre_uchar *ccbegin = cc;
7031 int min = 0, max = 0;
7032 BOOL minimize;
7033 
7034 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7035 
7036 if (ref)
7037   offset = GET2(cc, 1) << 1;
7038 else
7039   cc += IMM2_SIZE;
7040 type = cc[1 + IMM2_SIZE];
7041 
7042 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7043 minimize = (type & 0x1) != 0;
7044 switch(type)
7045   {
7046   case OP_CRSTAR:
7047   case OP_CRMINSTAR:
7048   min = 0;
7049   max = 0;
7050   cc += 1 + IMM2_SIZE + 1;
7051   break;
7052   case OP_CRPLUS:
7053   case OP_CRMINPLUS:
7054   min = 1;
7055   max = 0;
7056   cc += 1 + IMM2_SIZE + 1;
7057   break;
7058   case OP_CRQUERY:
7059   case OP_CRMINQUERY:
7060   min = 0;
7061   max = 1;
7062   cc += 1 + IMM2_SIZE + 1;
7063   break;
7064   case OP_CRRANGE:
7065   case OP_CRMINRANGE:
7066   min = GET2(cc, 1 + IMM2_SIZE + 1);
7067   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7068   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7069   break;
7070   default:
7071   SLJIT_UNREACHABLE();
7072   break;
7073   }
7074 
7075 if (!minimize)
7076   {
7077   if (min == 0)
7078     {
7079     allocate_stack(common, 2);
7080     if (ref)
7081       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7082     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7083     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7084     /* Temporary release of STR_PTR. */
7085     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7086     /* Handles both invalid and empty cases. Since the minimum repeat,
7087     is zero the invalid case is basically the same as an empty case. */
7088     if (ref)
7089       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7090     else
7091       {
7092       compile_dnref_search(common, ccbegin, NULL);
7093       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7094       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7095       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7096       }
7097     /* Restore if not zero length. */
7098     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7099     }
7100   else
7101     {
7102     allocate_stack(common, 1);
7103     if (ref)
7104       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7105     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7106     if (ref)
7107       {
7108       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7109       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7110       }
7111     else
7112       {
7113       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7114       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7115       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7116       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7117       }
7118     }
7119 
7120   if (min > 1 || max > 1)
7121     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7122 
7123   label = LABEL();
7124   if (!ref)
7125     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
7126   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
7127 
7128   if (min > 1 || max > 1)
7129     {
7130     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7131     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7132     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7133     if (min > 1)
7134       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7135     if (max > 1)
7136       {
7137       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7138       allocate_stack(common, 1);
7139       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7140       JUMPTO(SLJIT_JUMP, label);
7141       JUMPHERE(jump);
7142       }
7143     }
7144 
7145   if (max == 0)
7146     {
7147     /* Includes min > 1 case as well. */
7148     allocate_stack(common, 1);
7149     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7150     JUMPTO(SLJIT_JUMP, label);
7151     }
7152 
7153   JUMPHERE(zerolength);
7154   BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7155 
7156   count_match(common);
7157   return cc;
7158   }
7159 
7160 allocate_stack(common, ref ? 2 : 3);
7161 if (ref)
7162   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7164 if (type != OP_CRMINSTAR)
7165   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7166 
7167 if (min == 0)
7168   {
7169   /* Handles both invalid and empty cases. Since the minimum repeat,
7170   is zero the invalid case is basically the same as an empty case. */
7171   if (ref)
7172     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7173   else
7174     {
7175     compile_dnref_search(common, ccbegin, NULL);
7176     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7177     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7178     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7179     }
7180   /* Length is non-zero, we can match real repeats. */
7181   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7182   jump = JUMP(SLJIT_JUMP);
7183   }
7184 else
7185   {
7186   if (ref)
7187     {
7188     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7189     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7190     }
7191   else
7192     {
7193     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7194     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7195     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7196     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7197     }
7198   }
7199 
7200 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7201 if (max > 0)
7202   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7203 
7204 if (!ref)
7205   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7206 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7208 
7209 if (min > 1)
7210   {
7211   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7212   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7213   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7214   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7215   }
7216 else if (max > 0)
7217   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7218 
7219 if (jump != NULL)
7220   JUMPHERE(jump);
7221 JUMPHERE(zerolength);
7222 
7223 count_match(common);
7224 return cc;
7225 }
7226 
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7227 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7228 {
7229 DEFINE_COMPILER;
7230 backtrack_common *backtrack;
7231 recurse_entry *entry = common->entries;
7232 recurse_entry *prev = NULL;
7233 sljit_sw start = GET(cc, 1);
7234 pcre_uchar *start_cc;
7235 BOOL needs_control_head;
7236 
7237 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7238 
7239 /* Inlining simple patterns. */
7240 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7241   {
7242   start_cc = common->start + start;
7243   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7244   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7245   return cc + 1 + LINK_SIZE;
7246   }
7247 
7248 while (entry != NULL)
7249   {
7250   if (entry->start == start)
7251     break;
7252   prev = entry;
7253   entry = entry->next;
7254   }
7255 
7256 if (entry == NULL)
7257   {
7258   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7259   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7260     return NULL;
7261   entry->next = NULL;
7262   entry->entry = NULL;
7263   entry->calls = NULL;
7264   entry->start = start;
7265 
7266   if (prev != NULL)
7267     prev->next = entry;
7268   else
7269     common->entries = entry;
7270   }
7271 
7272 if (common->has_set_som && common->mark_ptr != 0)
7273   {
7274   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7275   allocate_stack(common, 2);
7276   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7277   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7278   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7279   }
7280 else if (common->has_set_som || common->mark_ptr != 0)
7281   {
7282   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7283   allocate_stack(common, 1);
7284   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7285   }
7286 
7287 if (entry->entry == NULL)
7288   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7289 else
7290   JUMPTO(SLJIT_FAST_CALL, entry->entry);
7291 /* Leave if the match is failed. */
7292 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7293 return cc + 1 + LINK_SIZE;
7294 }
7295 
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)7296 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7297 {
7298 const pcre_uchar *begin = arguments->begin;
7299 int *offset_vector = arguments->offsets;
7300 int offset_count = arguments->offset_count;
7301 int i;
7302 
7303 if (PUBL(callout) == NULL)
7304   return 0;
7305 
7306 callout_block->version = 2;
7307 callout_block->callout_data = arguments->callout_data;
7308 
7309 /* Offsets in subject. */
7310 callout_block->subject_length = arguments->end - arguments->begin;
7311 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7312 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7313 #if defined COMPILE_PCRE8
7314 callout_block->subject = (PCRE_SPTR)begin;
7315 #elif defined COMPILE_PCRE16
7316 callout_block->subject = (PCRE_SPTR16)begin;
7317 #elif defined COMPILE_PCRE32
7318 callout_block->subject = (PCRE_SPTR32)begin;
7319 #endif
7320 
7321 /* Convert and copy the JIT offset vector to the offset_vector array. */
7322 callout_block->capture_top = 0;
7323 callout_block->offset_vector = offset_vector;
7324 for (i = 2; i < offset_count; i += 2)
7325   {
7326   offset_vector[i] = jit_ovector[i] - begin;
7327   offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7328   if (jit_ovector[i] >= begin)
7329     callout_block->capture_top = i;
7330   }
7331 
7332 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7333 if (offset_count > 0)
7334   offset_vector[0] = -1;
7335 if (offset_count > 1)
7336   offset_vector[1] = -1;
7337 return (*PUBL(callout))(callout_block);
7338 }
7339 
7340 /* Aligning to 8 byte. */
7341 #define CALLOUT_ARG_SIZE \
7342     (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7343 
7344 #define CALLOUT_ARG_OFFSET(arg) \
7345     SLJIT_OFFSETOF(PUBL(callout_block), arg)
7346 
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7347 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7348 {
7349 DEFINE_COMPILER;
7350 backtrack_common *backtrack;
7351 
7352 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7353 
7354 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7355 
7356 SLJIT_ASSERT(common->capture_last_ptr != 0);
7357 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7359 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7360 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7361 
7362 /* These pointer sized fields temporarly stores internal variables. */
7363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7366 
7367 if (common->mark_ptr != 0)
7368   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7369 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7370 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7372 
7373 /* Needed to save important temporary registers. */
7374 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7375 /* SLJIT_R0 = arguments */
7376 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7377 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7378 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7379 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7380 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7381 
7382 /* Check return value. */
7383 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7384 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
7385 if (common->forced_quit_label == NULL)
7386   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
7387 else
7388   JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label);
7389 return cc + 2 + 2 * LINK_SIZE;
7390 }
7391 
7392 #undef CALLOUT_ARG_SIZE
7393 #undef CALLOUT_ARG_OFFSET
7394 
assert_needs_str_ptr_saving(pcre_uchar * cc)7395 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7396 {
7397 while (TRUE)
7398   {
7399   switch (*cc)
7400     {
7401     case OP_NOT_WORD_BOUNDARY:
7402     case OP_WORD_BOUNDARY:
7403     case OP_CIRC:
7404     case OP_CIRCM:
7405     case OP_DOLL:
7406     case OP_DOLLM:
7407     case OP_CALLOUT:
7408     case OP_ALT:
7409     cc += PRIV(OP_lengths)[*cc];
7410     break;
7411 
7412     case OP_KET:
7413     return FALSE;
7414 
7415     default:
7416     return TRUE;
7417     }
7418   }
7419 }
7420 
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)7421 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7422 {
7423 DEFINE_COMPILER;
7424 int framesize;
7425 int extrasize;
7426 BOOL needs_control_head;
7427 int private_data_ptr;
7428 backtrack_common altbacktrack;
7429 pcre_uchar *ccbegin;
7430 pcre_uchar opcode;
7431 pcre_uchar bra = OP_BRA;
7432 jump_list *tmp = NULL;
7433 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7434 jump_list **found;
7435 /* Saving previous accept variables. */
7436 BOOL save_local_exit = common->local_exit;
7437 BOOL save_positive_assert = common->positive_assert;
7438 then_trap_backtrack *save_then_trap = common->then_trap;
7439 struct sljit_label *save_quit_label = common->quit_label;
7440 struct sljit_label *save_accept_label = common->accept_label;
7441 jump_list *save_quit = common->quit;
7442 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7443 jump_list *save_accept = common->accept;
7444 struct sljit_jump *jump;
7445 struct sljit_jump *brajump = NULL;
7446 
7447 /* Assert captures then. */
7448 common->then_trap = NULL;
7449 
7450 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7451   {
7452   SLJIT_ASSERT(!conditional);
7453   bra = *cc;
7454   cc++;
7455   }
7456 private_data_ptr = PRIVATE_DATA(cc);
7457 SLJIT_ASSERT(private_data_ptr != 0);
7458 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7459 backtrack->framesize = framesize;
7460 backtrack->private_data_ptr = private_data_ptr;
7461 opcode = *cc;
7462 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7463 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7464 ccbegin = cc;
7465 cc += GET(cc, 1);
7466 
7467 if (bra == OP_BRAMINZERO)
7468   {
7469   /* This is a braminzero backtrack path. */
7470   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7471   free_stack(common, 1);
7472   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7473   }
7474 
7475 if (framesize < 0)
7476   {
7477   extrasize = 1;
7478   if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7479     extrasize = 0;
7480 
7481   if (needs_control_head)
7482     extrasize++;
7483 
7484   if (framesize == no_frame)
7485     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7486 
7487   if (extrasize > 0)
7488     allocate_stack(common, extrasize);
7489 
7490   if (needs_control_head)
7491     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7492 
7493   if (extrasize > 0)
7494     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7495 
7496   if (needs_control_head)
7497     {
7498     SLJIT_ASSERT(extrasize == 2);
7499     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7500     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7501     }
7502   }
7503 else
7504   {
7505   extrasize = needs_control_head ? 3 : 2;
7506   allocate_stack(common, framesize + extrasize);
7507 
7508   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7509   OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7510   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7511   if (needs_control_head)
7512     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7513   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7514 
7515   if (needs_control_head)
7516     {
7517     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7518     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7519     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7520     }
7521   else
7522     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7523 
7524   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7525   }
7526 
7527 memset(&altbacktrack, 0, sizeof(backtrack_common));
7528 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7529   {
7530   /* Negative assert is stronger than positive assert. */
7531   common->local_exit = TRUE;
7532   common->quit_label = NULL;
7533   common->quit = NULL;
7534   common->positive_assert = FALSE;
7535   }
7536 else
7537   common->positive_assert = TRUE;
7538 common->positive_assert_quit = NULL;
7539 
7540 while (1)
7541   {
7542   common->accept_label = NULL;
7543   common->accept = NULL;
7544   altbacktrack.top = NULL;
7545   altbacktrack.topbacktracks = NULL;
7546 
7547   if (*ccbegin == OP_ALT && extrasize > 0)
7548     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7549 
7550   altbacktrack.cc = ccbegin;
7551   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7552   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7553     {
7554     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7555       {
7556       common->local_exit = save_local_exit;
7557       common->quit_label = save_quit_label;
7558       common->quit = save_quit;
7559       }
7560     common->positive_assert = save_positive_assert;
7561     common->then_trap = save_then_trap;
7562     common->accept_label = save_accept_label;
7563     common->positive_assert_quit = save_positive_assert_quit;
7564     common->accept = save_accept;
7565     return NULL;
7566     }
7567   common->accept_label = LABEL();
7568   if (common->accept != NULL)
7569     set_jumps(common->accept, common->accept_label);
7570 
7571   /* Reset stack. */
7572   if (framesize < 0)
7573     {
7574     if (framesize == no_frame)
7575       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7576     else if (extrasize > 0)
7577       free_stack(common, extrasize);
7578 
7579     if (needs_control_head)
7580       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7581     }
7582   else
7583     {
7584     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7585       {
7586       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7587       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7588       if (needs_control_head)
7589         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7590       }
7591     else
7592       {
7593       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7594       if (needs_control_head)
7595         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7596       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7597       }
7598     }
7599 
7600   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7601     {
7602     /* We know that STR_PTR was stored on the top of the stack. */
7603     if (conditional)
7604       {
7605       if (extrasize > 0)
7606         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7607       }
7608     else if (bra == OP_BRAZERO)
7609       {
7610       if (framesize < 0)
7611         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7612       else
7613         {
7614         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7615         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7616         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7617         }
7618       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7619       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7620       }
7621     else if (framesize >= 0)
7622       {
7623       /* For OP_BRA and OP_BRAMINZERO. */
7624       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7625       }
7626     }
7627   add_jump(compiler, found, JUMP(SLJIT_JUMP));
7628 
7629   compile_backtrackingpath(common, altbacktrack.top);
7630   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7631     {
7632     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7633       {
7634       common->local_exit = save_local_exit;
7635       common->quit_label = save_quit_label;
7636       common->quit = save_quit;
7637       }
7638     common->positive_assert = save_positive_assert;
7639     common->then_trap = save_then_trap;
7640     common->accept_label = save_accept_label;
7641     common->positive_assert_quit = save_positive_assert_quit;
7642     common->accept = save_accept;
7643     return NULL;
7644     }
7645   set_jumps(altbacktrack.topbacktracks, LABEL());
7646 
7647   if (*cc != OP_ALT)
7648     break;
7649 
7650   ccbegin = cc;
7651   cc += GET(cc, 1);
7652   }
7653 
7654 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7655   {
7656   SLJIT_ASSERT(common->positive_assert_quit == NULL);
7657   /* Makes the check less complicated below. */
7658   common->positive_assert_quit = common->quit;
7659   }
7660 
7661 /* None of them matched. */
7662 if (common->positive_assert_quit != NULL)
7663   {
7664   jump = JUMP(SLJIT_JUMP);
7665   set_jumps(common->positive_assert_quit, LABEL());
7666   SLJIT_ASSERT(framesize != no_stack);
7667   if (framesize < 0)
7668     OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7669   else
7670     {
7671     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7672     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7673     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7674     }
7675   JUMPHERE(jump);
7676   }
7677 
7678 if (needs_control_head)
7679   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7680 
7681 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7682   {
7683   /* Assert is failed. */
7684   if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7685     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7686 
7687   if (framesize < 0)
7688     {
7689     /* The topmost item should be 0. */
7690     if (bra == OP_BRAZERO)
7691       {
7692       if (extrasize == 2)
7693         free_stack(common, 1);
7694       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7695       }
7696     else if (extrasize > 0)
7697       free_stack(common, extrasize);
7698     }
7699   else
7700     {
7701     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7702     /* The topmost item should be 0. */
7703     if (bra == OP_BRAZERO)
7704       {
7705       free_stack(common, framesize + extrasize - 1);
7706       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7707       }
7708     else
7709       free_stack(common, framesize + extrasize);
7710     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7711     }
7712   jump = JUMP(SLJIT_JUMP);
7713   if (bra != OP_BRAZERO)
7714     add_jump(compiler, target, jump);
7715 
7716   /* Assert is successful. */
7717   set_jumps(tmp, LABEL());
7718   if (framesize < 0)
7719     {
7720     /* We know that STR_PTR was stored on the top of the stack. */
7721     if (extrasize > 0)
7722       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7723 
7724     /* Keep the STR_PTR on the top of the stack. */
7725     if (bra == OP_BRAZERO)
7726       {
7727       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7728       if (extrasize == 2)
7729         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7730       }
7731     else if (bra == OP_BRAMINZERO)
7732       {
7733       OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7734       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7735       }
7736     }
7737   else
7738     {
7739     if (bra == OP_BRA)
7740       {
7741       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7742       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7743       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7744       }
7745     else
7746       {
7747       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7748       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7749       if (extrasize == 2)
7750         {
7751         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7752         if (bra == OP_BRAMINZERO)
7753           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7754         }
7755       else
7756         {
7757         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7758         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7759         }
7760       }
7761     }
7762 
7763   if (bra == OP_BRAZERO)
7764     {
7765     backtrack->matchingpath = LABEL();
7766     SET_LABEL(jump, backtrack->matchingpath);
7767     }
7768   else if (bra == OP_BRAMINZERO)
7769     {
7770     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7771     JUMPHERE(brajump);
7772     if (framesize >= 0)
7773       {
7774       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7775       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7776       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7777       }
7778     set_jumps(backtrack->common.topbacktracks, LABEL());
7779     }
7780   }
7781 else
7782   {
7783   /* AssertNot is successful. */
7784   if (framesize < 0)
7785     {
7786     if (extrasize > 0)
7787       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7788 
7789     if (bra != OP_BRA)
7790       {
7791       if (extrasize == 2)
7792         free_stack(common, 1);
7793       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7794       }
7795     else if (extrasize > 0)
7796       free_stack(common, extrasize);
7797     }
7798   else
7799     {
7800     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7801     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7802     /* The topmost item should be 0. */
7803     if (bra != OP_BRA)
7804       {
7805       free_stack(common, framesize + extrasize - 1);
7806       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7807       }
7808     else
7809       free_stack(common, framesize + extrasize);
7810     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7811     }
7812 
7813   if (bra == OP_BRAZERO)
7814     backtrack->matchingpath = LABEL();
7815   else if (bra == OP_BRAMINZERO)
7816     {
7817     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7818     JUMPHERE(brajump);
7819     }
7820 
7821   if (bra != OP_BRA)
7822     {
7823     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7824     set_jumps(backtrack->common.topbacktracks, LABEL());
7825     backtrack->common.topbacktracks = NULL;
7826     }
7827   }
7828 
7829 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7830   {
7831   common->local_exit = save_local_exit;
7832   common->quit_label = save_quit_label;
7833   common->quit = save_quit;
7834   }
7835 common->positive_assert = save_positive_assert;
7836 common->then_trap = save_then_trap;
7837 common->accept_label = save_accept_label;
7838 common->positive_assert_quit = save_positive_assert_quit;
7839 common->accept = save_accept;
7840 return cc + 1 + LINK_SIZE;
7841 }
7842 
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7843 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7844 {
7845 DEFINE_COMPILER;
7846 int stacksize;
7847 
7848 if (framesize < 0)
7849   {
7850   if (framesize == no_frame)
7851     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7852   else
7853     {
7854     stacksize = needs_control_head ? 1 : 0;
7855     if (ket != OP_KET || has_alternatives)
7856       stacksize++;
7857 
7858     if (stacksize > 0)
7859       free_stack(common, stacksize);
7860     }
7861 
7862   if (needs_control_head)
7863     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7864 
7865   /* TMP2 which is set here used by OP_KETRMAX below. */
7866   if (ket == OP_KETRMAX)
7867     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7868   else if (ket == OP_KETRMIN)
7869     {
7870     /* Move the STR_PTR to the private_data_ptr. */
7871     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7872     }
7873   }
7874 else
7875   {
7876   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7877   OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7878   if (needs_control_head)
7879     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7880 
7881   if (ket == OP_KETRMAX)
7882     {
7883     /* TMP2 which is set here used by OP_KETRMAX below. */
7884     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7885     }
7886   }
7887 if (needs_control_head)
7888   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7889 }
7890 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7891 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7892 {
7893 DEFINE_COMPILER;
7894 
7895 if (common->capture_last_ptr != 0)
7896   {
7897   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7898   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7899   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7900   stacksize++;
7901   }
7902 if (common->optimized_cbracket[offset >> 1] == 0)
7903   {
7904   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7905   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7906   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7907   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7908   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7909   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7910   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7911   stacksize += 2;
7912   }
7913 return stacksize;
7914 }
7915 
7916 /*
7917   Handling bracketed expressions is probably the most complex part.
7918 
7919   Stack layout naming characters:
7920     S - Push the current STR_PTR
7921     0 - Push a 0 (NULL)
7922     A - Push the current STR_PTR. Needed for restoring the STR_PTR
7923         before the next alternative. Not pushed if there are no alternatives.
7924     M - Any values pushed by the current alternative. Can be empty, or anything.
7925     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7926     L - Push the previous local (pointed by localptr) to the stack
7927    () - opional values stored on the stack
7928   ()* - optonal, can be stored multiple times
7929 
7930   The following list shows the regular expression templates, their PCRE byte codes
7931   and stack layout supported by pcre-sljit.
7932 
7933   (?:)                     OP_BRA     | OP_KET                A M
7934   ()                       OP_CBRA    | OP_KET                C M
7935   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
7936                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
7937   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
7938                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
7939   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
7940                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
7941   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
7942                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
7943   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
7944   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
7945   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
7946   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
7947   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
7948            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
7949   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
7950            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
7951   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
7952            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
7953   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
7954            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
7955 
7956 
7957   Stack layout naming characters:
7958     A - Push the alternative index (starting from 0) on the stack.
7959         Not pushed if there is no alternatives.
7960     M - Any values pushed by the current alternative. Can be empty, or anything.
7961 
7962   The next list shows the possible content of a bracket:
7963   (|)     OP_*BRA    | OP_ALT ...         M A
7964   (?()|)  OP_*COND   | OP_ALT             M A
7965   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
7966   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
7967                                           Or nothing, if trace is unnecessary
7968 */
7969 
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7970 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7971 {
7972 DEFINE_COMPILER;
7973 backtrack_common *backtrack;
7974 pcre_uchar opcode;
7975 int private_data_ptr = 0;
7976 int offset = 0;
7977 int i, stacksize;
7978 int repeat_ptr = 0, repeat_length = 0;
7979 int repeat_type = 0, repeat_count = 0;
7980 pcre_uchar *ccbegin;
7981 pcre_uchar *matchingpath;
7982 pcre_uchar *slot;
7983 pcre_uchar bra = OP_BRA;
7984 pcre_uchar ket;
7985 assert_backtrack *assert;
7986 BOOL has_alternatives;
7987 BOOL needs_control_head = FALSE;
7988 struct sljit_jump *jump;
7989 struct sljit_jump *skip;
7990 struct sljit_label *rmax_label = NULL;
7991 struct sljit_jump *braminzero = NULL;
7992 
7993 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7994 
7995 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7996   {
7997   bra = *cc;
7998   cc++;
7999   opcode = *cc;
8000   }
8001 
8002 opcode = *cc;
8003 ccbegin = cc;
8004 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8005 ket = *matchingpath;
8006 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8007   {
8008   repeat_ptr = PRIVATE_DATA(matchingpath);
8009   repeat_length = PRIVATE_DATA(matchingpath + 1);
8010   repeat_type = PRIVATE_DATA(matchingpath + 2);
8011   repeat_count = PRIVATE_DATA(matchingpath + 3);
8012   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8013   if (repeat_type == OP_UPTO)
8014     ket = OP_KETRMAX;
8015   if (repeat_type == OP_MINUPTO)
8016     ket = OP_KETRMIN;
8017   }
8018 
8019 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
8020   {
8021   /* Drop this bracket_backtrack. */
8022   parent->top = backtrack->prev;
8023   return matchingpath + 1 + LINK_SIZE + repeat_length;
8024   }
8025 
8026 matchingpath = ccbegin + 1 + LINK_SIZE;
8027 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8028 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8029 cc += GET(cc, 1);
8030 
8031 has_alternatives = *cc == OP_ALT;
8032 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8033   has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8034 
8035 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8036   opcode = OP_SCOND;
8037 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8038   opcode = OP_ONCE;
8039 
8040 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8041   {
8042   /* Capturing brackets has a pre-allocated space. */
8043   offset = GET2(ccbegin, 1 + LINK_SIZE);
8044   if (common->optimized_cbracket[offset] == 0)
8045     {
8046     private_data_ptr = OVECTOR_PRIV(offset);
8047     offset <<= 1;
8048     }
8049   else
8050     {
8051     offset <<= 1;
8052     private_data_ptr = OVECTOR(offset);
8053     }
8054   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8055   matchingpath += IMM2_SIZE;
8056   }
8057 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
8058   {
8059   /* Other brackets simply allocate the next entry. */
8060   private_data_ptr = PRIVATE_DATA(ccbegin);
8061   SLJIT_ASSERT(private_data_ptr != 0);
8062   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8063   if (opcode == OP_ONCE)
8064     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
8065   }
8066 
8067 /* Instructions before the first alternative. */
8068 stacksize = 0;
8069 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8070   stacksize++;
8071 if (bra == OP_BRAZERO)
8072   stacksize++;
8073 
8074 if (stacksize > 0)
8075   allocate_stack(common, stacksize);
8076 
8077 stacksize = 0;
8078 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8079   {
8080   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8081   stacksize++;
8082   }
8083 
8084 if (bra == OP_BRAZERO)
8085   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8086 
8087 if (bra == OP_BRAMINZERO)
8088   {
8089   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
8090   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8091   if (ket != OP_KETRMIN)
8092     {
8093     free_stack(common, 1);
8094     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8095     }
8096   else
8097     {
8098     if (opcode == OP_ONCE || opcode >= OP_SBRA)
8099       {
8100       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8101       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8102       /* Nothing stored during the first run. */
8103       skip = JUMP(SLJIT_JUMP);
8104       JUMPHERE(jump);
8105       /* Checking zero-length iteration. */
8106       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8107         {
8108         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
8109         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8110         }
8111       else
8112         {
8113         /* Except when the whole stack frame must be saved. */
8114         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8115         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
8116         }
8117       JUMPHERE(skip);
8118       }
8119     else
8120       {
8121       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8122       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8123       JUMPHERE(jump);
8124       }
8125     }
8126   }
8127 
8128 if (repeat_type != 0)
8129   {
8130   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
8131   if (repeat_type == OP_EXACT)
8132     rmax_label = LABEL();
8133   }
8134 
8135 if (ket == OP_KETRMIN)
8136   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8137 
8138 if (ket == OP_KETRMAX)
8139   {
8140   rmax_label = LABEL();
8141   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
8142     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
8143   }
8144 
8145 /* Handling capturing brackets and alternatives. */
8146 if (opcode == OP_ONCE)
8147   {
8148   stacksize = 0;
8149   if (needs_control_head)
8150     {
8151     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8152     stacksize++;
8153     }
8154 
8155   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8156     {
8157     /* Neither capturing brackets nor recursions are found in the block. */
8158     if (ket == OP_KETRMIN)
8159       {
8160       stacksize += 2;
8161       if (!needs_control_head)
8162         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8163       }
8164     else
8165       {
8166       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8167         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8168       if (ket == OP_KETRMAX || has_alternatives)
8169         stacksize++;
8170       }
8171 
8172     if (stacksize > 0)
8173       allocate_stack(common, stacksize);
8174 
8175     stacksize = 0;
8176     if (needs_control_head)
8177       {
8178       stacksize++;
8179       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8180       }
8181 
8182     if (ket == OP_KETRMIN)
8183       {
8184       if (needs_control_head)
8185         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8186       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8187       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8188         OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8189       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8190       }
8191     else if (ket == OP_KETRMAX || has_alternatives)
8192       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8193     }
8194   else
8195     {
8196     if (ket != OP_KET || has_alternatives)
8197       stacksize++;
8198 
8199     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8200     allocate_stack(common, stacksize);
8201 
8202     if (needs_control_head)
8203       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8204 
8205     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8206     OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8207 
8208     stacksize = needs_control_head ? 1 : 0;
8209     if (ket != OP_KET || has_alternatives)
8210       {
8211       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8212       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8213       stacksize++;
8214       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8215       }
8216     else
8217       {
8218       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8219       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8220       }
8221     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8222     }
8223   }
8224 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8225   {
8226   /* Saving the previous values. */
8227   if (common->optimized_cbracket[offset >> 1] != 0)
8228     {
8229     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8230     allocate_stack(common, 2);
8231     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8232     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8233     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8234     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8235     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8236     }
8237   else
8238     {
8239     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8240     allocate_stack(common, 1);
8241     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8242     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8243     }
8244   }
8245 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8246   {
8247   /* Saving the previous value. */
8248   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8249   allocate_stack(common, 1);
8250   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8251   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8252   }
8253 else if (has_alternatives)
8254   {
8255   /* Pushing the starting string pointer. */
8256   allocate_stack(common, 1);
8257   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8258   }
8259 
8260 /* Generating code for the first alternative. */
8261 if (opcode == OP_COND || opcode == OP_SCOND)
8262   {
8263   if (*matchingpath == OP_CREF)
8264     {
8265     SLJIT_ASSERT(has_alternatives);
8266     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8267       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8268     matchingpath += 1 + IMM2_SIZE;
8269     }
8270   else if (*matchingpath == OP_DNCREF)
8271     {
8272     SLJIT_ASSERT(has_alternatives);
8273 
8274     i = GET2(matchingpath, 1 + IMM2_SIZE);
8275     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8276     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8277     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8278     OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8279     slot += common->name_entry_size;
8280     i--;
8281     while (i-- > 0)
8282       {
8283       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8284       OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8285       slot += common->name_entry_size;
8286       }
8287     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8288     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8289     matchingpath += 1 + 2 * IMM2_SIZE;
8290     }
8291   else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8292     {
8293     /* Never has other case. */
8294     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8295     SLJIT_ASSERT(!has_alternatives);
8296 
8297     if (*matchingpath == OP_FAIL)
8298       stacksize = 0;
8299     else if (*matchingpath == OP_RREF)
8300       {
8301       stacksize = GET2(matchingpath, 1);
8302       if (common->currententry == NULL)
8303         stacksize = 0;
8304       else if (stacksize == RREF_ANY)
8305         stacksize = 1;
8306       else if (common->currententry->start == 0)
8307         stacksize = stacksize == 0;
8308       else
8309         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8310 
8311       if (stacksize != 0)
8312         matchingpath += 1 + IMM2_SIZE;
8313       }
8314     else
8315       {
8316       if (common->currententry == NULL || common->currententry->start == 0)
8317         stacksize = 0;
8318       else
8319         {
8320         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8321         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8322         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8323         while (stacksize > 0)
8324           {
8325           if ((int)GET2(slot, 0) == i)
8326             break;
8327           slot += common->name_entry_size;
8328           stacksize--;
8329           }
8330         }
8331 
8332       if (stacksize != 0)
8333         matchingpath += 1 + 2 * IMM2_SIZE;
8334       }
8335 
8336       /* The stacksize == 0 is a common "else" case. */
8337       if (stacksize == 0)
8338         {
8339         if (*cc == OP_ALT)
8340           {
8341           matchingpath = cc + 1 + LINK_SIZE;
8342           cc += GET(cc, 1);
8343           }
8344         else
8345           matchingpath = cc;
8346         }
8347     }
8348   else
8349     {
8350     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8351     /* Similar code as PUSH_BACKTRACK macro. */
8352     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8353     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8354       return NULL;
8355     memset(assert, 0, sizeof(assert_backtrack));
8356     assert->common.cc = matchingpath;
8357     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8358     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8359     }
8360   }
8361 
8362 compile_matchingpath(common, matchingpath, cc, backtrack);
8363 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8364   return NULL;
8365 
8366 if (opcode == OP_ONCE)
8367   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8368 
8369 stacksize = 0;
8370 if (repeat_type == OP_MINUPTO)
8371   {
8372   /* We need to preserve the counter. TMP2 will be used below. */
8373   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8374   stacksize++;
8375   }
8376 if (ket != OP_KET || bra != OP_BRA)
8377   stacksize++;
8378 if (offset != 0)
8379   {
8380   if (common->capture_last_ptr != 0)
8381     stacksize++;
8382   if (common->optimized_cbracket[offset >> 1] == 0)
8383     stacksize += 2;
8384   }
8385 if (has_alternatives && opcode != OP_ONCE)
8386   stacksize++;
8387 
8388 if (stacksize > 0)
8389   allocate_stack(common, stacksize);
8390 
8391 stacksize = 0;
8392 if (repeat_type == OP_MINUPTO)
8393   {
8394   /* TMP2 was set above. */
8395   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8396   stacksize++;
8397   }
8398 
8399 if (ket != OP_KET || bra != OP_BRA)
8400   {
8401   if (ket != OP_KET)
8402     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8403   else
8404     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8405   stacksize++;
8406   }
8407 
8408 if (offset != 0)
8409   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8410 
8411 if (has_alternatives)
8412   {
8413   if (opcode != OP_ONCE)
8414     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8415   if (ket != OP_KETRMAX)
8416     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8417   }
8418 
8419 /* Must be after the matchingpath label. */
8420 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8421   {
8422   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8423   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8424   }
8425 
8426 if (ket == OP_KETRMAX)
8427   {
8428   if (repeat_type != 0)
8429     {
8430     if (has_alternatives)
8431       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8432     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8433     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8434     /* Drop STR_PTR for greedy plus quantifier. */
8435     if (opcode != OP_ONCE)
8436       free_stack(common, 1);
8437     }
8438   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8439     {
8440     if (has_alternatives)
8441       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8442     /* Checking zero-length iteration. */
8443     if (opcode != OP_ONCE)
8444       {
8445       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8446       /* Drop STR_PTR for greedy plus quantifier. */
8447       if (bra != OP_BRAZERO)
8448         free_stack(common, 1);
8449       }
8450     else
8451       /* TMP2 must contain the starting STR_PTR. */
8452       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8453     }
8454   else
8455     JUMPTO(SLJIT_JUMP, rmax_label);
8456   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8457   }
8458 
8459 if (repeat_type == OP_EXACT)
8460   {
8461   count_match(common);
8462   OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8463   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8464   }
8465 else if (repeat_type == OP_UPTO)
8466   {
8467   /* We need to preserve the counter. */
8468   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8469   allocate_stack(common, 1);
8470   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8471   }
8472 
8473 if (bra == OP_BRAZERO)
8474   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8475 
8476 if (bra == OP_BRAMINZERO)
8477   {
8478   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8479   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8480   if (braminzero != NULL)
8481     {
8482     JUMPHERE(braminzero);
8483     /* We need to release the end pointer to perform the
8484     backtrack for the zero-length iteration. When
8485     framesize is < 0, OP_ONCE will do the release itself. */
8486     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8487       {
8488       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8489       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8490       }
8491     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8492       free_stack(common, 1);
8493     }
8494   /* Continue to the normal backtrack. */
8495   }
8496 
8497 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8498   count_match(common);
8499 
8500 /* Skip the other alternatives. */
8501 while (*cc == OP_ALT)
8502   cc += GET(cc, 1);
8503 cc += 1 + LINK_SIZE;
8504 
8505 if (opcode == OP_ONCE)
8506   {
8507   /* We temporarily encode the needs_control_head in the lowest bit.
8508      Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8509      the same value for small signed numbers (including negative numbers). */
8510   BACKTRACK_AS(bracket_backtrack)->u.framesize = ((unsigned int)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8511   }
8512 return cc + repeat_length;
8513 }
8514 
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8515 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8516 {
8517 DEFINE_COMPILER;
8518 backtrack_common *backtrack;
8519 pcre_uchar opcode;
8520 int private_data_ptr;
8521 int cbraprivptr = 0;
8522 BOOL needs_control_head;
8523 int framesize;
8524 int stacksize;
8525 int offset = 0;
8526 BOOL zero = FALSE;
8527 pcre_uchar *ccbegin = NULL;
8528 int stack; /* Also contains the offset of control head. */
8529 struct sljit_label *loop = NULL;
8530 struct jump_list *emptymatch = NULL;
8531 
8532 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8533 if (*cc == OP_BRAPOSZERO)
8534   {
8535   zero = TRUE;
8536   cc++;
8537   }
8538 
8539 opcode = *cc;
8540 private_data_ptr = PRIVATE_DATA(cc);
8541 SLJIT_ASSERT(private_data_ptr != 0);
8542 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8543 switch(opcode)
8544   {
8545   case OP_BRAPOS:
8546   case OP_SBRAPOS:
8547   ccbegin = cc + 1 + LINK_SIZE;
8548   break;
8549 
8550   case OP_CBRAPOS:
8551   case OP_SCBRAPOS:
8552   offset = GET2(cc, 1 + LINK_SIZE);
8553   /* This case cannot be optimized in the same was as
8554   normal capturing brackets. */
8555   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8556   cbraprivptr = OVECTOR_PRIV(offset);
8557   offset <<= 1;
8558   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8559   break;
8560 
8561   default:
8562   SLJIT_UNREACHABLE();
8563   break;
8564   }
8565 
8566 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8567 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8568 if (framesize < 0)
8569   {
8570   if (offset != 0)
8571     {
8572     stacksize = 2;
8573     if (common->capture_last_ptr != 0)
8574       stacksize++;
8575     }
8576   else
8577     stacksize = 1;
8578 
8579   if (needs_control_head)
8580     stacksize++;
8581   if (!zero)
8582     stacksize++;
8583 
8584   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8585   allocate_stack(common, stacksize);
8586   if (framesize == no_frame)
8587     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8588 
8589   stack = 0;
8590   if (offset != 0)
8591     {
8592     stack = 2;
8593     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8594     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8595     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8596     if (common->capture_last_ptr != 0)
8597       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8598     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8599     if (needs_control_head)
8600       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8601     if (common->capture_last_ptr != 0)
8602       {
8603       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8604       stack = 3;
8605       }
8606     }
8607   else
8608     {
8609     if (needs_control_head)
8610       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8611     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8612     stack = 1;
8613     }
8614 
8615   if (needs_control_head)
8616     stack++;
8617   if (!zero)
8618     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8619   if (needs_control_head)
8620     {
8621     stack--;
8622     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8623     }
8624   }
8625 else
8626   {
8627   stacksize = framesize + 1;
8628   if (!zero)
8629     stacksize++;
8630   if (needs_control_head)
8631     stacksize++;
8632   if (offset == 0)
8633     stacksize++;
8634   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8635 
8636   allocate_stack(common, stacksize);
8637   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8638   if (needs_control_head)
8639     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8640   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8641 
8642   stack = 0;
8643   if (!zero)
8644     {
8645     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8646     stack = 1;
8647     }
8648   if (needs_control_head)
8649     {
8650     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8651     stack++;
8652     }
8653   if (offset == 0)
8654     {
8655     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8656     stack++;
8657     }
8658   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8659   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8660   stack -= 1 + (offset == 0);
8661   }
8662 
8663 if (offset != 0)
8664   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8665 
8666 loop = LABEL();
8667 while (*cc != OP_KETRPOS)
8668   {
8669   backtrack->top = NULL;
8670   backtrack->topbacktracks = NULL;
8671   cc += GET(cc, 1);
8672 
8673   compile_matchingpath(common, ccbegin, cc, backtrack);
8674   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8675     return NULL;
8676 
8677   if (framesize < 0)
8678     {
8679     if (framesize == no_frame)
8680       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8681 
8682     if (offset != 0)
8683       {
8684       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8685       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8686       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8687       if (common->capture_last_ptr != 0)
8688         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8689       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8690       }
8691     else
8692       {
8693       if (opcode == OP_SBRAPOS)
8694         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8695       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8696       }
8697 
8698     /* Even if the match is empty, we need to reset the control head. */
8699     if (needs_control_head)
8700       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8701 
8702     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8703       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8704 
8705     if (!zero)
8706       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8707     }
8708   else
8709     {
8710     if (offset != 0)
8711       {
8712       OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8713       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8714       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8715       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8716       if (common->capture_last_ptr != 0)
8717         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8718       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8719       }
8720     else
8721       {
8722       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8723       OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8724       if (opcode == OP_SBRAPOS)
8725         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8726       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8727       }
8728 
8729     /* Even if the match is empty, we need to reset the control head. */
8730     if (needs_control_head)
8731       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8732 
8733     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8734       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8735 
8736     if (!zero)
8737       {
8738       if (framesize < 0)
8739         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8740       else
8741         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8742       }
8743     }
8744 
8745   JUMPTO(SLJIT_JUMP, loop);
8746   flush_stubs(common);
8747 
8748   compile_backtrackingpath(common, backtrack->top);
8749   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8750     return NULL;
8751   set_jumps(backtrack->topbacktracks, LABEL());
8752 
8753   if (framesize < 0)
8754     {
8755     if (offset != 0)
8756       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8757     else
8758       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8759     }
8760   else
8761     {
8762     if (offset != 0)
8763       {
8764       /* Last alternative. */
8765       if (*cc == OP_KETRPOS)
8766         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8767       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8768       }
8769     else
8770       {
8771       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8772       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8773       }
8774     }
8775 
8776   if (*cc == OP_KETRPOS)
8777     break;
8778   ccbegin = cc + 1 + LINK_SIZE;
8779   }
8780 
8781 /* We don't have to restore the control head in case of a failed match. */
8782 
8783 backtrack->topbacktracks = NULL;
8784 if (!zero)
8785   {
8786   if (framesize < 0)
8787     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8788   else /* TMP2 is set to [private_data_ptr] above. */
8789     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8790   }
8791 
8792 /* None of them matched. */
8793 set_jumps(emptymatch, LABEL());
8794 count_match(common);
8795 return cc + 1 + LINK_SIZE;
8796 }
8797 
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,sljit_u32 * max,sljit_u32 * exact,pcre_uchar ** end)8798 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8799 {
8800 int class_len;
8801 
8802 *opcode = *cc;
8803 *exact = 0;
8804 
8805 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8806   {
8807   cc++;
8808   *type = OP_CHAR;
8809   }
8810 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8811   {
8812   cc++;
8813   *type = OP_CHARI;
8814   *opcode -= OP_STARI - OP_STAR;
8815   }
8816 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8817   {
8818   cc++;
8819   *type = OP_NOT;
8820   *opcode -= OP_NOTSTAR - OP_STAR;
8821   }
8822 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8823   {
8824   cc++;
8825   *type = OP_NOTI;
8826   *opcode -= OP_NOTSTARI - OP_STAR;
8827   }
8828 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8829   {
8830   cc++;
8831   *opcode -= OP_TYPESTAR - OP_STAR;
8832   *type = OP_END;
8833   }
8834 else
8835   {
8836   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8837   *type = *opcode;
8838   cc++;
8839   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8840   *opcode = cc[class_len - 1];
8841 
8842   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8843     {
8844     *opcode -= OP_CRSTAR - OP_STAR;
8845     *end = cc + class_len;
8846 
8847     if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8848       {
8849       *exact = 1;
8850       *opcode -= OP_PLUS - OP_STAR;
8851       }
8852     }
8853   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8854     {
8855     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8856     *end = cc + class_len;
8857 
8858     if (*opcode == OP_POSPLUS)
8859       {
8860       *exact = 1;
8861       *opcode = OP_POSSTAR;
8862       }
8863     }
8864   else
8865     {
8866     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8867     *max = GET2(cc, (class_len + IMM2_SIZE));
8868     *exact = GET2(cc, class_len);
8869 
8870     if (*max == 0)
8871       {
8872       if (*opcode == OP_CRPOSRANGE)
8873         *opcode = OP_POSSTAR;
8874       else
8875         *opcode -= OP_CRRANGE - OP_STAR;
8876       }
8877     else
8878       {
8879       *max -= *exact;
8880       if (*max == 0)
8881         *opcode = OP_EXACT;
8882       else if (*max == 1)
8883         {
8884         if (*opcode == OP_CRPOSRANGE)
8885           *opcode = OP_POSQUERY;
8886         else
8887           *opcode -= OP_CRRANGE - OP_QUERY;
8888         }
8889       else
8890         {
8891         if (*opcode == OP_CRPOSRANGE)
8892           *opcode = OP_POSUPTO;
8893         else
8894           *opcode -= OP_CRRANGE - OP_UPTO;
8895         }
8896       }
8897     *end = cc + class_len + 2 * IMM2_SIZE;
8898     }
8899   return cc;
8900   }
8901 
8902 switch(*opcode)
8903   {
8904   case OP_EXACT:
8905   *exact = GET2(cc, 0);
8906   cc += IMM2_SIZE;
8907   break;
8908 
8909   case OP_PLUS:
8910   case OP_MINPLUS:
8911   *exact = 1;
8912   *opcode -= OP_PLUS - OP_STAR;
8913   break;
8914 
8915   case OP_POSPLUS:
8916   *exact = 1;
8917   *opcode = OP_POSSTAR;
8918   break;
8919 
8920   case OP_UPTO:
8921   case OP_MINUPTO:
8922   case OP_POSUPTO:
8923   *max = GET2(cc, 0);
8924   cc += IMM2_SIZE;
8925   break;
8926   }
8927 
8928 if (*type == OP_END)
8929   {
8930   *type = *cc;
8931   *end = next_opcode(common, cc);
8932   cc++;
8933   return cc;
8934   }
8935 
8936 *end = cc + 1;
8937 #ifdef SUPPORT_UTF
8938 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8939 #endif
8940 return cc;
8941 }
8942 
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8943 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8944 {
8945 DEFINE_COMPILER;
8946 backtrack_common *backtrack;
8947 pcre_uchar opcode;
8948 pcre_uchar type;
8949 sljit_u32 max = 0, exact;
8950 BOOL fast_fail;
8951 sljit_s32 fast_str_ptr;
8952 BOOL charpos_enabled;
8953 pcre_uchar charpos_char;
8954 unsigned int charpos_othercasebit;
8955 pcre_uchar *end;
8956 jump_list *no_match = NULL;
8957 jump_list *no_char1_match = NULL;
8958 struct sljit_jump *jump = NULL;
8959 struct sljit_label *label;
8960 int private_data_ptr = PRIVATE_DATA(cc);
8961 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8962 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8963 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8964 int tmp_base, tmp_offset;
8965 
8966 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8967 
8968 fast_str_ptr = PRIVATE_DATA(cc + 1);
8969 fast_fail = TRUE;
8970 
8971 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8972 
8973 if (cc == common->fast_forward_bc_ptr)
8974   fast_fail = FALSE;
8975 else if (common->fast_fail_start_ptr == 0)
8976   fast_str_ptr = 0;
8977 
8978 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8979   || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8980 
8981 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8982 
8983 if (type != OP_EXTUNI)
8984   {
8985   tmp_base = TMP3;
8986   tmp_offset = 0;
8987   }
8988 else
8989   {
8990   tmp_base = SLJIT_MEM1(SLJIT_SP);
8991   tmp_offset = POSSESSIVE0;
8992   }
8993 
8994 if (fast_fail && fast_str_ptr != 0)
8995   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8996 
8997 /* Handle fixed part first. */
8998 if (exact > 1)
8999   {
9000   SLJIT_ASSERT(fast_str_ptr == 0);
9001   if (common->mode == JIT_COMPILE
9002 #ifdef SUPPORT_UTF
9003       && !common->utf
9004 #endif
9005       && type != OP_ANYNL && type != OP_EXTUNI)
9006     {
9007     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9008     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9009     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9010     label = LABEL();
9011     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9012     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9013     JUMPTO(SLJIT_NOT_ZERO, label);
9014     }
9015   else
9016     {
9017     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9018     label = LABEL();
9019     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9020     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9021     JUMPTO(SLJIT_NOT_ZERO, label);
9022     }
9023   }
9024 else if (exact == 1)
9025   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9026 
9027 switch(opcode)
9028   {
9029   case OP_STAR:
9030   case OP_UPTO:
9031   SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9032 
9033   if (type == OP_ANYNL || type == OP_EXTUNI)
9034     {
9035     SLJIT_ASSERT(private_data_ptr == 0);
9036     SLJIT_ASSERT(fast_str_ptr == 0);
9037 
9038     allocate_stack(common, 2);
9039     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9040     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9041 
9042     if (opcode == OP_UPTO)
9043       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9044 
9045     label = LABEL();
9046     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9047     if (opcode == OP_UPTO)
9048       {
9049       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9050       OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9051       jump = JUMP(SLJIT_ZERO);
9052       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9053       }
9054 
9055     /* We cannot use TMP3 because of this allocate_stack. */
9056     allocate_stack(common, 1);
9057     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9058     JUMPTO(SLJIT_JUMP, label);
9059     if (jump != NULL)
9060       JUMPHERE(jump);
9061     }
9062   else
9063     {
9064     charpos_enabled = FALSE;
9065     charpos_char = 0;
9066     charpos_othercasebit = 0;
9067 
9068     if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
9069       {
9070       charpos_enabled = TRUE;
9071 #ifdef SUPPORT_UTF
9072       charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
9073 #endif
9074       if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
9075         {
9076         charpos_othercasebit = char_get_othercase_bit(common, end + 1);
9077         if (charpos_othercasebit == 0)
9078           charpos_enabled = FALSE;
9079         }
9080 
9081       if (charpos_enabled)
9082         {
9083         charpos_char = end[1];
9084         /* Consumpe the OP_CHAR opcode. */
9085         end += 2;
9086 #if defined COMPILE_PCRE8
9087         SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
9088 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9089         SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
9090         if ((charpos_othercasebit & 0x100) != 0)
9091           charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
9092 #endif
9093         if (charpos_othercasebit != 0)
9094           charpos_char |= charpos_othercasebit;
9095 
9096         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
9097         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
9098         BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
9099         }
9100       }
9101 
9102     if (charpos_enabled)
9103       {
9104       if (opcode == OP_UPTO)
9105         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
9106 
9107       /* Search the first instance of charpos_char. */
9108       jump = JUMP(SLJIT_JUMP);
9109       label = LABEL();
9110       if (opcode == OP_UPTO)
9111         {
9112         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9113         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
9114         }
9115       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9116       if (fast_str_ptr != 0)
9117         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9118       JUMPHERE(jump);
9119 
9120       detect_partial_match(common, &backtrack->topbacktracks);
9121       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9122       if (charpos_othercasebit != 0)
9123         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9124       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9125 
9126       if (private_data_ptr == 0)
9127         allocate_stack(common, 2);
9128       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9129       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9130       if (opcode == OP_UPTO)
9131         {
9132         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9133         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
9134         }
9135 
9136       /* Search the last instance of charpos_char. */
9137       label = LABEL();
9138       compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
9139       if (fast_str_ptr != 0)
9140         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9141       detect_partial_match(common, &no_match);
9142       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9143       if (charpos_othercasebit != 0)
9144         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9145       if (opcode == OP_STAR)
9146         {
9147         CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9148         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9149         }
9150       else
9151         {
9152         jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9153         OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9154         JUMPHERE(jump);
9155         }
9156 
9157       if (opcode == OP_UPTO)
9158         {
9159         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9160         JUMPTO(SLJIT_NOT_ZERO, label);
9161         }
9162       else
9163         JUMPTO(SLJIT_JUMP, label);
9164 
9165       set_jumps(no_match, LABEL());
9166       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9167       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9168       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9169       }
9170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9171     else if (common->utf)
9172       {
9173       if (private_data_ptr == 0)
9174         allocate_stack(common, 2);
9175 
9176       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9177       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9178 
9179       if (opcode == OP_UPTO)
9180         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9181 
9182       label = LABEL();
9183       compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9184       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9185 
9186       if (opcode == OP_UPTO)
9187         {
9188         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9189         JUMPTO(SLJIT_NOT_ZERO, label);
9190         }
9191       else
9192         JUMPTO(SLJIT_JUMP, label);
9193 
9194       set_jumps(no_match, LABEL());
9195       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9196       if (fast_str_ptr != 0)
9197         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9198       }
9199 #endif
9200     else
9201       {
9202       if (private_data_ptr == 0)
9203         allocate_stack(common, 2);
9204 
9205       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9206       if (opcode == OP_UPTO)
9207         OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9208 
9209       label = LABEL();
9210       detect_partial_match(common, &no_match);
9211       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9212       if (opcode == OP_UPTO)
9213         {
9214         OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9215         JUMPTO(SLJIT_NOT_ZERO, label);
9216         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9217         }
9218       else
9219         JUMPTO(SLJIT_JUMP, label);
9220 
9221       set_jumps(no_char1_match, LABEL());
9222       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9223       set_jumps(no_match, LABEL());
9224       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9225       if (fast_str_ptr != 0)
9226         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9227       }
9228     }
9229   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9230   break;
9231 
9232   case OP_MINSTAR:
9233   if (private_data_ptr == 0)
9234     allocate_stack(common, 1);
9235   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9236   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9237   if (fast_str_ptr != 0)
9238     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9239   break;
9240 
9241   case OP_MINUPTO:
9242   SLJIT_ASSERT(fast_str_ptr == 0);
9243   if (private_data_ptr == 0)
9244     allocate_stack(common, 2);
9245   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9246   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9247   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9248   break;
9249 
9250   case OP_QUERY:
9251   case OP_MINQUERY:
9252   SLJIT_ASSERT(fast_str_ptr == 0);
9253   if (private_data_ptr == 0)
9254     allocate_stack(common, 1);
9255   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9256   if (opcode == OP_QUERY)
9257     compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9258   BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9259   break;
9260 
9261   case OP_EXACT:
9262   break;
9263 
9264   case OP_POSSTAR:
9265 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9266   if (common->utf)
9267     {
9268     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9269     label = LABEL();
9270     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9271     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9272     JUMPTO(SLJIT_JUMP, label);
9273     set_jumps(no_match, LABEL());
9274     OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9275     if (fast_str_ptr != 0)
9276       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9277     break;
9278     }
9279 #endif
9280   label = LABEL();
9281   detect_partial_match(common, &no_match);
9282   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9283   JUMPTO(SLJIT_JUMP, label);
9284   set_jumps(no_char1_match, LABEL());
9285   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9286   set_jumps(no_match, LABEL());
9287   if (fast_str_ptr != 0)
9288     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9289   break;
9290 
9291   case OP_POSUPTO:
9292   SLJIT_ASSERT(fast_str_ptr == 0);
9293 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9294   if (common->utf)
9295     {
9296     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9297     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9298     label = LABEL();
9299     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9300     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9301     OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9302     JUMPTO(SLJIT_NOT_ZERO, label);
9303     set_jumps(no_match, LABEL());
9304     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9305     break;
9306     }
9307 #endif
9308   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9309   label = LABEL();
9310   detect_partial_match(common, &no_match);
9311   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9312   OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9313   JUMPTO(SLJIT_NOT_ZERO, label);
9314   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9315   set_jumps(no_char1_match, LABEL());
9316   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9317   set_jumps(no_match, LABEL());
9318   break;
9319 
9320   case OP_POSQUERY:
9321   SLJIT_ASSERT(fast_str_ptr == 0);
9322   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9323   compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9324   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9325   set_jumps(no_match, LABEL());
9326   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9327   break;
9328 
9329   default:
9330   SLJIT_UNREACHABLE();
9331   break;
9332   }
9333 
9334 count_match(common);
9335 return end;
9336 }
9337 
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9338 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9339 {
9340 DEFINE_COMPILER;
9341 backtrack_common *backtrack;
9342 
9343 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9344 
9345 if (*cc == OP_FAIL)
9346   {
9347   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9348   return cc + 1;
9349   }
9350 
9351 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9352   {
9353   /* No need to check notempty conditions. */
9354   if (common->accept_label == NULL)
9355     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9356   else
9357     JUMPTO(SLJIT_JUMP, common->accept_label);
9358   return cc + 1;
9359   }
9360 
9361 if (common->accept_label == NULL)
9362   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9363 else
9364   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9365 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9366 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9367 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9368 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9369 if (common->accept_label == NULL)
9370   add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9371 else
9372   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9374 if (common->accept_label == NULL)
9375   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9376 else
9377   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9378 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9379 return cc + 1;
9380 }
9381 
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)9382 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9383 {
9384 DEFINE_COMPILER;
9385 int offset = GET2(cc, 1);
9386 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9387 
9388 /* Data will be discarded anyway... */
9389 if (common->currententry != NULL)
9390   return cc + 1 + IMM2_SIZE;
9391 
9392 if (!optimized_cbracket)
9393   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9394 offset <<= 1;
9395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9396 if (!optimized_cbracket)
9397   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9398 return cc + 1 + IMM2_SIZE;
9399 }
9400 
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9401 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9402 {
9403 DEFINE_COMPILER;
9404 backtrack_common *backtrack;
9405 pcre_uchar opcode = *cc;
9406 pcre_uchar *ccend = cc + 1;
9407 
9408 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9409   ccend += 2 + cc[1];
9410 
9411 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9412 
9413 if (opcode == OP_SKIP)
9414   {
9415   allocate_stack(common, 1);
9416   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9417   return ccend;
9418   }
9419 
9420 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9421   {
9422   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9423   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9424   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9425   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9426   }
9427 
9428 return ccend;
9429 }
9430 
9431 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9432 
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9433 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9434 {
9435 DEFINE_COMPILER;
9436 backtrack_common *backtrack;
9437 BOOL needs_control_head;
9438 int size;
9439 
9440 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9441 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9442 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9443 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9444 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9445 
9446 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9447 size = 3 + (size < 0 ? 0 : size);
9448 
9449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9450 allocate_stack(common, size);
9451 if (size > 3)
9452   OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9453 else
9454   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9458 
9459 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9460 if (size >= 0)
9461   init_frame(common, cc, ccend, size - 1, 0, FALSE);
9462 }
9463 
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9464 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9465 {
9466 DEFINE_COMPILER;
9467 backtrack_common *backtrack;
9468 BOOL has_then_trap = FALSE;
9469 then_trap_backtrack *save_then_trap = NULL;
9470 
9471 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9472 
9473 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9474   {
9475   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9476   has_then_trap = TRUE;
9477   save_then_trap = common->then_trap;
9478   /* Tail item on backtrack. */
9479   compile_then_trap_matchingpath(common, cc, ccend, parent);
9480   }
9481 
9482 while (cc < ccend)
9483   {
9484   switch(*cc)
9485     {
9486     case OP_SOD:
9487     case OP_SOM:
9488     case OP_NOT_WORD_BOUNDARY:
9489     case OP_WORD_BOUNDARY:
9490     case OP_EODN:
9491     case OP_EOD:
9492     case OP_DOLL:
9493     case OP_DOLLM:
9494     case OP_CIRC:
9495     case OP_CIRCM:
9496     case OP_REVERSE:
9497     cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9498     break;
9499 
9500     case OP_NOT_DIGIT:
9501     case OP_DIGIT:
9502     case OP_NOT_WHITESPACE:
9503     case OP_WHITESPACE:
9504     case OP_NOT_WORDCHAR:
9505     case OP_WORDCHAR:
9506     case OP_ANY:
9507     case OP_ALLANY:
9508     case OP_ANYBYTE:
9509     case OP_NOTPROP:
9510     case OP_PROP:
9511     case OP_ANYNL:
9512     case OP_NOT_HSPACE:
9513     case OP_HSPACE:
9514     case OP_NOT_VSPACE:
9515     case OP_VSPACE:
9516     case OP_EXTUNI:
9517     case OP_NOT:
9518     case OP_NOTI:
9519     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9520     break;
9521 
9522     case OP_SET_SOM:
9523     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9524     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9525     allocate_stack(common, 1);
9526     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9527     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9528     cc++;
9529     break;
9530 
9531     case OP_CHAR:
9532     case OP_CHARI:
9533     if (common->mode == JIT_COMPILE)
9534       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9535     else
9536       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9537     break;
9538 
9539     case OP_STAR:
9540     case OP_MINSTAR:
9541     case OP_PLUS:
9542     case OP_MINPLUS:
9543     case OP_QUERY:
9544     case OP_MINQUERY:
9545     case OP_UPTO:
9546     case OP_MINUPTO:
9547     case OP_EXACT:
9548     case OP_POSSTAR:
9549     case OP_POSPLUS:
9550     case OP_POSQUERY:
9551     case OP_POSUPTO:
9552     case OP_STARI:
9553     case OP_MINSTARI:
9554     case OP_PLUSI:
9555     case OP_MINPLUSI:
9556     case OP_QUERYI:
9557     case OP_MINQUERYI:
9558     case OP_UPTOI:
9559     case OP_MINUPTOI:
9560     case OP_EXACTI:
9561     case OP_POSSTARI:
9562     case OP_POSPLUSI:
9563     case OP_POSQUERYI:
9564     case OP_POSUPTOI:
9565     case OP_NOTSTAR:
9566     case OP_NOTMINSTAR:
9567     case OP_NOTPLUS:
9568     case OP_NOTMINPLUS:
9569     case OP_NOTQUERY:
9570     case OP_NOTMINQUERY:
9571     case OP_NOTUPTO:
9572     case OP_NOTMINUPTO:
9573     case OP_NOTEXACT:
9574     case OP_NOTPOSSTAR:
9575     case OP_NOTPOSPLUS:
9576     case OP_NOTPOSQUERY:
9577     case OP_NOTPOSUPTO:
9578     case OP_NOTSTARI:
9579     case OP_NOTMINSTARI:
9580     case OP_NOTPLUSI:
9581     case OP_NOTMINPLUSI:
9582     case OP_NOTQUERYI:
9583     case OP_NOTMINQUERYI:
9584     case OP_NOTUPTOI:
9585     case OP_NOTMINUPTOI:
9586     case OP_NOTEXACTI:
9587     case OP_NOTPOSSTARI:
9588     case OP_NOTPOSPLUSI:
9589     case OP_NOTPOSQUERYI:
9590     case OP_NOTPOSUPTOI:
9591     case OP_TYPESTAR:
9592     case OP_TYPEMINSTAR:
9593     case OP_TYPEPLUS:
9594     case OP_TYPEMINPLUS:
9595     case OP_TYPEQUERY:
9596     case OP_TYPEMINQUERY:
9597     case OP_TYPEUPTO:
9598     case OP_TYPEMINUPTO:
9599     case OP_TYPEEXACT:
9600     case OP_TYPEPOSSTAR:
9601     case OP_TYPEPOSPLUS:
9602     case OP_TYPEPOSQUERY:
9603     case OP_TYPEPOSUPTO:
9604     cc = compile_iterator_matchingpath(common, cc, parent);
9605     break;
9606 
9607     case OP_CLASS:
9608     case OP_NCLASS:
9609     if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9610       cc = compile_iterator_matchingpath(common, cc, parent);
9611     else
9612       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9613     break;
9614 
9615 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9616     case OP_XCLASS:
9617     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9618       cc = compile_iterator_matchingpath(common, cc, parent);
9619     else
9620       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9621     break;
9622 #endif
9623 
9624     case OP_REF:
9625     case OP_REFI:
9626     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9627       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9628     else
9629       {
9630       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9631       cc += 1 + IMM2_SIZE;
9632       }
9633     break;
9634 
9635     case OP_DNREF:
9636     case OP_DNREFI:
9637     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9638       cc = compile_ref_iterator_matchingpath(common, cc, parent);
9639     else
9640       {
9641       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9642       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9643       cc += 1 + 2 * IMM2_SIZE;
9644       }
9645     break;
9646 
9647     case OP_RECURSE:
9648     cc = compile_recurse_matchingpath(common, cc, parent);
9649     break;
9650 
9651     case OP_CALLOUT:
9652     cc = compile_callout_matchingpath(common, cc, parent);
9653     break;
9654 
9655     case OP_ASSERT:
9656     case OP_ASSERT_NOT:
9657     case OP_ASSERTBACK:
9658     case OP_ASSERTBACK_NOT:
9659     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9660     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9661     break;
9662 
9663     case OP_BRAMINZERO:
9664     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9665     cc = bracketend(cc + 1);
9666     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9667       {
9668       allocate_stack(common, 1);
9669       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9670       }
9671     else
9672       {
9673       allocate_stack(common, 2);
9674       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9675       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9676       }
9677     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9678     count_match(common);
9679     break;
9680 
9681     case OP_ONCE:
9682     case OP_ONCE_NC:
9683     case OP_BRA:
9684     case OP_CBRA:
9685     case OP_COND:
9686     case OP_SBRA:
9687     case OP_SCBRA:
9688     case OP_SCOND:
9689     cc = compile_bracket_matchingpath(common, cc, parent);
9690     break;
9691 
9692     case OP_BRAZERO:
9693     if (cc[1] > OP_ASSERTBACK_NOT)
9694       cc = compile_bracket_matchingpath(common, cc, parent);
9695     else
9696       {
9697       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9698       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9699       }
9700     break;
9701 
9702     case OP_BRAPOS:
9703     case OP_CBRAPOS:
9704     case OP_SBRAPOS:
9705     case OP_SCBRAPOS:
9706     case OP_BRAPOSZERO:
9707     cc = compile_bracketpos_matchingpath(common, cc, parent);
9708     break;
9709 
9710     case OP_MARK:
9711     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9712     SLJIT_ASSERT(common->mark_ptr != 0);
9713     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9714     allocate_stack(common, common->has_skip_arg ? 5 : 1);
9715     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9716     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9717     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9718     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9719     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9720     if (common->has_skip_arg)
9721       {
9722       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9723       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9724       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9725       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9726       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9727       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9728       }
9729     cc += 1 + 2 + cc[1];
9730     break;
9731 
9732     case OP_PRUNE:
9733     case OP_PRUNE_ARG:
9734     case OP_SKIP:
9735     case OP_SKIP_ARG:
9736     case OP_THEN:
9737     case OP_THEN_ARG:
9738     case OP_COMMIT:
9739     cc = compile_control_verb_matchingpath(common, cc, parent);
9740     break;
9741 
9742     case OP_FAIL:
9743     case OP_ACCEPT:
9744     case OP_ASSERT_ACCEPT:
9745     cc = compile_fail_accept_matchingpath(common, cc, parent);
9746     break;
9747 
9748     case OP_CLOSE:
9749     cc = compile_close_matchingpath(common, cc);
9750     break;
9751 
9752     case OP_SKIPZERO:
9753     cc = bracketend(cc + 1);
9754     break;
9755 
9756     default:
9757     SLJIT_UNREACHABLE();
9758     return;
9759     }
9760   if (cc == NULL)
9761     return;
9762   }
9763 
9764 if (has_then_trap)
9765   {
9766   /* Head item on backtrack. */
9767   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9768   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9769   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9770   common->then_trap = save_then_trap;
9771   }
9772 SLJIT_ASSERT(cc == ccend);
9773 }
9774 
9775 #undef PUSH_BACKTRACK
9776 #undef PUSH_BACKTRACK_NOVALUE
9777 #undef BACKTRACK_AS
9778 
9779 #define COMPILE_BACKTRACKINGPATH(current) \
9780   do \
9781     { \
9782     compile_backtrackingpath(common, (current)); \
9783     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9784       return; \
9785     } \
9786   while (0)
9787 
9788 #define CURRENT_AS(type) ((type *)current)
9789 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9790 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9791 {
9792 DEFINE_COMPILER;
9793 pcre_uchar *cc = current->cc;
9794 pcre_uchar opcode;
9795 pcre_uchar type;
9796 sljit_u32 max = 0, exact;
9797 struct sljit_label *label = NULL;
9798 struct sljit_jump *jump = NULL;
9799 jump_list *jumplist = NULL;
9800 pcre_uchar *end;
9801 int private_data_ptr = PRIVATE_DATA(cc);
9802 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9803 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9804 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9805 
9806 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9807 
9808 switch(opcode)
9809   {
9810   case OP_STAR:
9811   case OP_UPTO:
9812   if (type == OP_ANYNL || type == OP_EXTUNI)
9813     {
9814     SLJIT_ASSERT(private_data_ptr == 0);
9815     set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9816     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9817     free_stack(common, 1);
9818     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9819     }
9820   else
9821     {
9822     if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9823       {
9824       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9825       OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9826       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9827 
9828       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9829       label = LABEL();
9830       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9831       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9832       if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9833         OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9834       CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9835       skip_char_back(common);
9836       CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9837       }
9838     else
9839       {
9840       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9841       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9842       skip_char_back(common);
9843       OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9844       JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9845       }
9846     JUMPHERE(jump);
9847     if (private_data_ptr == 0)
9848       free_stack(common, 2);
9849     }
9850   break;
9851 
9852   case OP_MINSTAR:
9853   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9854   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9855   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9856   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9857   set_jumps(jumplist, LABEL());
9858   if (private_data_ptr == 0)
9859     free_stack(common, 1);
9860   break;
9861 
9862   case OP_MINUPTO:
9863   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9864   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9865   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9866   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9867 
9868   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9869   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9870   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9871   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9872 
9873   set_jumps(jumplist, LABEL());
9874   if (private_data_ptr == 0)
9875     free_stack(common, 2);
9876   break;
9877 
9878   case OP_QUERY:
9879   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9880   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9881   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9882   jump = JUMP(SLJIT_JUMP);
9883   set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9884   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9885   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9886   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9887   JUMPHERE(jump);
9888   if (private_data_ptr == 0)
9889     free_stack(common, 1);
9890   break;
9891 
9892   case OP_MINQUERY:
9893   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9894   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9895   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9896   compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9897   JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9898   set_jumps(jumplist, LABEL());
9899   JUMPHERE(jump);
9900   if (private_data_ptr == 0)
9901     free_stack(common, 1);
9902   break;
9903 
9904   case OP_EXACT:
9905   case OP_POSSTAR:
9906   case OP_POSQUERY:
9907   case OP_POSUPTO:
9908   break;
9909 
9910   default:
9911   SLJIT_UNREACHABLE();
9912   break;
9913   }
9914 
9915 set_jumps(current->topbacktracks, LABEL());
9916 }
9917 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9918 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9919 {
9920 DEFINE_COMPILER;
9921 pcre_uchar *cc = current->cc;
9922 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9923 pcre_uchar type;
9924 
9925 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9926 
9927 if ((type & 0x1) == 0)
9928   {
9929   /* Maximize case. */
9930   set_jumps(current->topbacktracks, LABEL());
9931   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9932   free_stack(common, 1);
9933   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9934   return;
9935   }
9936 
9937 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9938 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9939 set_jumps(current->topbacktracks, LABEL());
9940 free_stack(common, ref ? 2 : 3);
9941 }
9942 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9943 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9944 {
9945 DEFINE_COMPILER;
9946 
9947 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9948   compile_backtrackingpath(common, current->top);
9949 set_jumps(current->topbacktracks, LABEL());
9950 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9951   return;
9952 
9953 if (common->has_set_som && common->mark_ptr != 0)
9954   {
9955   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9956   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9957   free_stack(common, 2);
9958   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9959   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9960   }
9961 else if (common->has_set_som || common->mark_ptr != 0)
9962   {
9963   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9964   free_stack(common, 1);
9965   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9966   }
9967 }
9968 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9969 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9970 {
9971 DEFINE_COMPILER;
9972 pcre_uchar *cc = current->cc;
9973 pcre_uchar bra = OP_BRA;
9974 struct sljit_jump *brajump = NULL;
9975 
9976 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9977 if (*cc == OP_BRAZERO)
9978   {
9979   bra = *cc;
9980   cc++;
9981   }
9982 
9983 if (bra == OP_BRAZERO)
9984   {
9985   SLJIT_ASSERT(current->topbacktracks == NULL);
9986   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9987   }
9988 
9989 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9990   {
9991   set_jumps(current->topbacktracks, LABEL());
9992 
9993   if (bra == OP_BRAZERO)
9994     {
9995     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9996     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9997     free_stack(common, 1);
9998     }
9999   return;
10000   }
10001 
10002 if (bra == OP_BRAZERO)
10003   {
10004   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10005     {
10006     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10007     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10008     free_stack(common, 1);
10009     return;
10010     }
10011   free_stack(common, 1);
10012   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10013   }
10014 
10015 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10016   {
10017   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10018   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10019   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
10020 
10021   set_jumps(current->topbacktracks, LABEL());
10022   }
10023 else
10024   set_jumps(current->topbacktracks, LABEL());
10025 
10026 if (bra == OP_BRAZERO)
10027   {
10028   /* We know there is enough place on the stack. */
10029   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10030   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10031   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10032   JUMPHERE(brajump);
10033   }
10034 }
10035 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)10036 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10037 {
10038 DEFINE_COMPILER;
10039 int opcode, stacksize, alt_count, alt_max;
10040 int offset = 0;
10041 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10042 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10043 pcre_uchar *cc = current->cc;
10044 pcre_uchar *ccbegin;
10045 pcre_uchar *ccprev;
10046 pcre_uchar bra = OP_BRA;
10047 pcre_uchar ket;
10048 assert_backtrack *assert;
10049 sljit_uw *next_update_addr = NULL;
10050 BOOL has_alternatives;
10051 BOOL needs_control_head = FALSE;
10052 struct sljit_jump *brazero = NULL;
10053 struct sljit_jump *alt1 = NULL;
10054 struct sljit_jump *alt2 = NULL;
10055 struct sljit_jump *once = NULL;
10056 struct sljit_jump *cond = NULL;
10057 struct sljit_label *rmin_label = NULL;
10058 struct sljit_label *exact_label = NULL;
10059 
10060 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10061   {
10062   bra = *cc;
10063   cc++;
10064   }
10065 
10066 opcode = *cc;
10067 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
10068 ket = *ccbegin;
10069 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
10070   {
10071   repeat_ptr = PRIVATE_DATA(ccbegin);
10072   repeat_type = PRIVATE_DATA(ccbegin + 2);
10073   repeat_count = PRIVATE_DATA(ccbegin + 3);
10074   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
10075   if (repeat_type == OP_UPTO)
10076     ket = OP_KETRMAX;
10077   if (repeat_type == OP_MINUPTO)
10078     ket = OP_KETRMIN;
10079   }
10080 ccbegin = cc;
10081 cc += GET(cc, 1);
10082 has_alternatives = *cc == OP_ALT;
10083 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10084   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
10085 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10086   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
10087 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10088   opcode = OP_SCOND;
10089 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
10090   opcode = OP_ONCE;
10091 
10092 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
10093 
10094 /* Decoding the needs_control_head in framesize. */
10095 if (opcode == OP_ONCE)
10096   {
10097   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
10098   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
10099   }
10100 
10101 if (ket != OP_KET && repeat_type != 0)
10102   {
10103   /* TMP1 is used in OP_KETRMIN below. */
10104   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10105   free_stack(common, 1);
10106   if (repeat_type == OP_UPTO)
10107     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
10108   else
10109     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10110   }
10111 
10112 if (ket == OP_KETRMAX)
10113   {
10114   if (bra == OP_BRAZERO)
10115     {
10116     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117     free_stack(common, 1);
10118     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10119     }
10120   }
10121 else if (ket == OP_KETRMIN)
10122   {
10123   if (bra != OP_BRAMINZERO)
10124     {
10125     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10126     if (repeat_type != 0)
10127       {
10128       /* TMP1 was set a few lines above. */
10129       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10130       /* Drop STR_PTR for non-greedy plus quantifier. */
10131       if (opcode != OP_ONCE)
10132         free_stack(common, 1);
10133       }
10134     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
10135       {
10136       /* Checking zero-length iteration. */
10137       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
10138         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10139       else
10140         {
10141         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10142         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10143         }
10144       /* Drop STR_PTR for non-greedy plus quantifier. */
10145       if (opcode != OP_ONCE)
10146         free_stack(common, 1);
10147       }
10148     else
10149       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10150     }
10151   rmin_label = LABEL();
10152   if (repeat_type != 0)
10153     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10154   }
10155 else if (bra == OP_BRAZERO)
10156   {
10157   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10158   free_stack(common, 1);
10159   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10160   }
10161 else if (repeat_type == OP_EXACT)
10162   {
10163   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10164   exact_label = LABEL();
10165   }
10166 
10167 if (offset != 0)
10168   {
10169   if (common->capture_last_ptr != 0)
10170     {
10171     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10172     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10173     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10174     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10175     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10176     free_stack(common, 3);
10177     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10178     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10179     }
10180   else if (common->optimized_cbracket[offset >> 1] == 0)
10181     {
10182     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10183     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10184     free_stack(common, 2);
10185     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10186     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10187     }
10188   }
10189 
10190 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10191   {
10192   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10193     {
10194     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10195     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10196     }
10197   once = JUMP(SLJIT_JUMP);
10198   }
10199 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10200   {
10201   if (has_alternatives)
10202     {
10203     /* Always exactly one alternative. */
10204     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10205     free_stack(common, 1);
10206 
10207     alt_max = 2;
10208     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10209     }
10210   }
10211 else if (has_alternatives)
10212   {
10213   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10214   free_stack(common, 1);
10215 
10216   if (alt_max > 4)
10217     {
10218     /* Table jump if alt_max is greater than 4. */
10219     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10220     if (SLJIT_UNLIKELY(next_update_addr == NULL))
10221       return;
10222     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10223     add_label_addr(common, next_update_addr++);
10224     }
10225   else
10226     {
10227     if (alt_max == 4)
10228       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10229     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10230     }
10231   }
10232 
10233 COMPILE_BACKTRACKINGPATH(current->top);
10234 if (current->topbacktracks)
10235   set_jumps(current->topbacktracks, LABEL());
10236 
10237 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10238   {
10239   /* Conditional block always has at most one alternative. */
10240   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10241     {
10242     SLJIT_ASSERT(has_alternatives);
10243     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10244     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10245       {
10246       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10247       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10248       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10249       }
10250     cond = JUMP(SLJIT_JUMP);
10251     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10252     }
10253   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10254     {
10255     SLJIT_ASSERT(has_alternatives);
10256     cond = JUMP(SLJIT_JUMP);
10257     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10258     }
10259   else
10260     SLJIT_ASSERT(!has_alternatives);
10261   }
10262 
10263 if (has_alternatives)
10264   {
10265   alt_count = sizeof(sljit_uw);
10266   do
10267     {
10268     current->top = NULL;
10269     current->topbacktracks = NULL;
10270     current->nextbacktracks = NULL;
10271     /* Conditional blocks always have an additional alternative, even if it is empty. */
10272     if (*cc == OP_ALT)
10273       {
10274       ccprev = cc + 1 + LINK_SIZE;
10275       cc += GET(cc, 1);
10276       if (opcode != OP_COND && opcode != OP_SCOND)
10277         {
10278         if (opcode != OP_ONCE)
10279           {
10280           if (private_data_ptr != 0)
10281             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10282           else
10283             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10284           }
10285         else
10286           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10287         }
10288       compile_matchingpath(common, ccprev, cc, current);
10289       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10290         return;
10291       }
10292 
10293     /* Instructions after the current alternative is successfully matched. */
10294     /* There is a similar code in compile_bracket_matchingpath. */
10295     if (opcode == OP_ONCE)
10296       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10297 
10298     stacksize = 0;
10299     if (repeat_type == OP_MINUPTO)
10300       {
10301       /* We need to preserve the counter. TMP2 will be used below. */
10302       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10303       stacksize++;
10304       }
10305     if (ket != OP_KET || bra != OP_BRA)
10306       stacksize++;
10307     if (offset != 0)
10308       {
10309       if (common->capture_last_ptr != 0)
10310         stacksize++;
10311       if (common->optimized_cbracket[offset >> 1] == 0)
10312         stacksize += 2;
10313       }
10314     if (opcode != OP_ONCE)
10315       stacksize++;
10316 
10317     if (stacksize > 0)
10318       allocate_stack(common, stacksize);
10319 
10320     stacksize = 0;
10321     if (repeat_type == OP_MINUPTO)
10322       {
10323       /* TMP2 was set above. */
10324       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10325       stacksize++;
10326       }
10327 
10328     if (ket != OP_KET || bra != OP_BRA)
10329       {
10330       if (ket != OP_KET)
10331         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10332       else
10333         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10334       stacksize++;
10335       }
10336 
10337     if (offset != 0)
10338       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10339 
10340     if (opcode != OP_ONCE)
10341       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10342 
10343     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10344       {
10345       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10346       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10347       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10348       }
10349 
10350     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10351 
10352     if (opcode != OP_ONCE)
10353       {
10354       if (alt_max > 4)
10355         add_label_addr(common, next_update_addr++);
10356       else
10357         {
10358         if (alt_count != 2 * sizeof(sljit_uw))
10359           {
10360           JUMPHERE(alt1);
10361           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10362             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10363           }
10364         else
10365           {
10366           JUMPHERE(alt2);
10367           if (alt_max == 4)
10368             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10369           }
10370         }
10371       alt_count += sizeof(sljit_uw);
10372       }
10373 
10374     COMPILE_BACKTRACKINGPATH(current->top);
10375     if (current->topbacktracks)
10376       set_jumps(current->topbacktracks, LABEL());
10377     SLJIT_ASSERT(!current->nextbacktracks);
10378     }
10379   while (*cc == OP_ALT);
10380 
10381   if (cond != NULL)
10382     {
10383     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10384     assert = CURRENT_AS(bracket_backtrack)->u.assert;
10385     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10386       {
10387       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10388       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10389       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10390       }
10391     JUMPHERE(cond);
10392     }
10393 
10394   /* Free the STR_PTR. */
10395   if (private_data_ptr == 0)
10396     free_stack(common, 1);
10397   }
10398 
10399 if (offset != 0)
10400   {
10401   /* Using both tmp register is better for instruction scheduling. */
10402   if (common->optimized_cbracket[offset >> 1] != 0)
10403     {
10404     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10405     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10406     free_stack(common, 2);
10407     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10408     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10409     }
10410   else
10411     {
10412     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10413     free_stack(common, 1);
10414     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10415     }
10416   }
10417 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10418   {
10419   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10420   free_stack(common, 1);
10421   }
10422 else if (opcode == OP_ONCE)
10423   {
10424   cc = ccbegin + GET(ccbegin, 1);
10425   stacksize = needs_control_head ? 1 : 0;
10426 
10427   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10428     {
10429     /* Reset head and drop saved frame. */
10430     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10431     }
10432   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10433     {
10434     /* The STR_PTR must be released. */
10435     stacksize++;
10436     }
10437 
10438   if (stacksize > 0)
10439     free_stack(common, stacksize);
10440 
10441   JUMPHERE(once);
10442   /* Restore previous private_data_ptr */
10443   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10444     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10445   else if (ket == OP_KETRMIN)
10446     {
10447     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10448     /* See the comment below. */
10449     free_stack(common, 2);
10450     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10451     }
10452   }
10453 
10454 if (repeat_type == OP_EXACT)
10455   {
10456   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10457   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10458   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10459   }
10460 else if (ket == OP_KETRMAX)
10461   {
10462   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10463   if (bra != OP_BRAZERO)
10464     free_stack(common, 1);
10465 
10466   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10467   if (bra == OP_BRAZERO)
10468     {
10469     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10470     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10471     JUMPHERE(brazero);
10472     free_stack(common, 1);
10473     }
10474   }
10475 else if (ket == OP_KETRMIN)
10476   {
10477   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10478 
10479   /* OP_ONCE removes everything in case of a backtrack, so we don't
10480   need to explicitly release the STR_PTR. The extra release would
10481   affect badly the free_stack(2) above. */
10482   if (opcode != OP_ONCE)
10483     free_stack(common, 1);
10484   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10485   if (opcode == OP_ONCE)
10486     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10487   else if (bra == OP_BRAMINZERO)
10488     free_stack(common, 1);
10489   }
10490 else if (bra == OP_BRAZERO)
10491   {
10492   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10493   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10494   JUMPHERE(brazero);
10495   }
10496 }
10497 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10498 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10499 {
10500 DEFINE_COMPILER;
10501 int offset;
10502 struct sljit_jump *jump;
10503 
10504 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10505   {
10506   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10507     {
10508     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10509     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10510     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10511     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10512     if (common->capture_last_ptr != 0)
10513       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10514     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10515     if (common->capture_last_ptr != 0)
10516       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10517     }
10518   set_jumps(current->topbacktracks, LABEL());
10519   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10520   return;
10521   }
10522 
10523 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10524 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10525 
10526 if (current->topbacktracks)
10527   {
10528   jump = JUMP(SLJIT_JUMP);
10529   set_jumps(current->topbacktracks, LABEL());
10530   /* Drop the stack frame. */
10531   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10532   JUMPHERE(jump);
10533   }
10534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10535 }
10536 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10537 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10538 {
10539 assert_backtrack backtrack;
10540 
10541 current->top = NULL;
10542 current->topbacktracks = NULL;
10543 current->nextbacktracks = NULL;
10544 if (current->cc[1] > OP_ASSERTBACK_NOT)
10545   {
10546   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10547   compile_bracket_matchingpath(common, current->cc, current);
10548   compile_bracket_backtrackingpath(common, current->top);
10549   }
10550 else
10551   {
10552   memset(&backtrack, 0, sizeof(backtrack));
10553   backtrack.common.cc = current->cc;
10554   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10555   /* Manual call of compile_assert_matchingpath. */
10556   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10557   }
10558 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10559 }
10560 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10561 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10562 {
10563 DEFINE_COMPILER;
10564 pcre_uchar opcode = *current->cc;
10565 struct sljit_label *loop;
10566 struct sljit_jump *jump;
10567 
10568 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10569   {
10570   if (common->then_trap != NULL)
10571     {
10572     SLJIT_ASSERT(common->control_head_ptr != 0);
10573 
10574     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10575     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10576     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10577     jump = JUMP(SLJIT_JUMP);
10578 
10579     loop = LABEL();
10580     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10581     JUMPHERE(jump);
10582     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10583     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10584     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10585     return;
10586     }
10587   else if (common->positive_assert)
10588     {
10589     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10590     return;
10591     }
10592   }
10593 
10594 if (common->local_exit)
10595   {
10596   if (common->quit_label == NULL)
10597     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10598   else
10599     JUMPTO(SLJIT_JUMP, common->quit_label);
10600   return;
10601   }
10602 
10603 if (opcode == OP_SKIP_ARG)
10604   {
10605   SLJIT_ASSERT(common->control_head_ptr != 0);
10606   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10607   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10608   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10609   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10610   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10611 
10612   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10613   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10614   return;
10615   }
10616 
10617 if (opcode == OP_SKIP)
10618   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10619 else
10620   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10621 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10622 }
10623 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10624 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10625 {
10626 DEFINE_COMPILER;
10627 struct sljit_jump *jump;
10628 int size;
10629 
10630 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10631   {
10632   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10633   return;
10634   }
10635 
10636 size = CURRENT_AS(then_trap_backtrack)->framesize;
10637 size = 3 + (size < 0 ? 0 : size);
10638 
10639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10640 free_stack(common, size);
10641 jump = JUMP(SLJIT_JUMP);
10642 
10643 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10644 /* STACK_TOP is set by THEN. */
10645 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10646   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10647 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10648 free_stack(common, 3);
10649 
10650 JUMPHERE(jump);
10651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10652 }
10653 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10654 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10655 {
10656 DEFINE_COMPILER;
10657 then_trap_backtrack *save_then_trap = common->then_trap;
10658 
10659 while (current)
10660   {
10661   if (current->nextbacktracks != NULL)
10662     set_jumps(current->nextbacktracks, LABEL());
10663   switch(*current->cc)
10664     {
10665     case OP_SET_SOM:
10666     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10667     free_stack(common, 1);
10668     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10669     break;
10670 
10671     case OP_STAR:
10672     case OP_MINSTAR:
10673     case OP_PLUS:
10674     case OP_MINPLUS:
10675     case OP_QUERY:
10676     case OP_MINQUERY:
10677     case OP_UPTO:
10678     case OP_MINUPTO:
10679     case OP_EXACT:
10680     case OP_POSSTAR:
10681     case OP_POSPLUS:
10682     case OP_POSQUERY:
10683     case OP_POSUPTO:
10684     case OP_STARI:
10685     case OP_MINSTARI:
10686     case OP_PLUSI:
10687     case OP_MINPLUSI:
10688     case OP_QUERYI:
10689     case OP_MINQUERYI:
10690     case OP_UPTOI:
10691     case OP_MINUPTOI:
10692     case OP_EXACTI:
10693     case OP_POSSTARI:
10694     case OP_POSPLUSI:
10695     case OP_POSQUERYI:
10696     case OP_POSUPTOI:
10697     case OP_NOTSTAR:
10698     case OP_NOTMINSTAR:
10699     case OP_NOTPLUS:
10700     case OP_NOTMINPLUS:
10701     case OP_NOTQUERY:
10702     case OP_NOTMINQUERY:
10703     case OP_NOTUPTO:
10704     case OP_NOTMINUPTO:
10705     case OP_NOTEXACT:
10706     case OP_NOTPOSSTAR:
10707     case OP_NOTPOSPLUS:
10708     case OP_NOTPOSQUERY:
10709     case OP_NOTPOSUPTO:
10710     case OP_NOTSTARI:
10711     case OP_NOTMINSTARI:
10712     case OP_NOTPLUSI:
10713     case OP_NOTMINPLUSI:
10714     case OP_NOTQUERYI:
10715     case OP_NOTMINQUERYI:
10716     case OP_NOTUPTOI:
10717     case OP_NOTMINUPTOI:
10718     case OP_NOTEXACTI:
10719     case OP_NOTPOSSTARI:
10720     case OP_NOTPOSPLUSI:
10721     case OP_NOTPOSQUERYI:
10722     case OP_NOTPOSUPTOI:
10723     case OP_TYPESTAR:
10724     case OP_TYPEMINSTAR:
10725     case OP_TYPEPLUS:
10726     case OP_TYPEMINPLUS:
10727     case OP_TYPEQUERY:
10728     case OP_TYPEMINQUERY:
10729     case OP_TYPEUPTO:
10730     case OP_TYPEMINUPTO:
10731     case OP_TYPEEXACT:
10732     case OP_TYPEPOSSTAR:
10733     case OP_TYPEPOSPLUS:
10734     case OP_TYPEPOSQUERY:
10735     case OP_TYPEPOSUPTO:
10736     case OP_CLASS:
10737     case OP_NCLASS:
10738 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10739     case OP_XCLASS:
10740 #endif
10741     compile_iterator_backtrackingpath(common, current);
10742     break;
10743 
10744     case OP_REF:
10745     case OP_REFI:
10746     case OP_DNREF:
10747     case OP_DNREFI:
10748     compile_ref_iterator_backtrackingpath(common, current);
10749     break;
10750 
10751     case OP_RECURSE:
10752     compile_recurse_backtrackingpath(common, current);
10753     break;
10754 
10755     case OP_ASSERT:
10756     case OP_ASSERT_NOT:
10757     case OP_ASSERTBACK:
10758     case OP_ASSERTBACK_NOT:
10759     compile_assert_backtrackingpath(common, current);
10760     break;
10761 
10762     case OP_ONCE:
10763     case OP_ONCE_NC:
10764     case OP_BRA:
10765     case OP_CBRA:
10766     case OP_COND:
10767     case OP_SBRA:
10768     case OP_SCBRA:
10769     case OP_SCOND:
10770     compile_bracket_backtrackingpath(common, current);
10771     break;
10772 
10773     case OP_BRAZERO:
10774     if (current->cc[1] > OP_ASSERTBACK_NOT)
10775       compile_bracket_backtrackingpath(common, current);
10776     else
10777       compile_assert_backtrackingpath(common, current);
10778     break;
10779 
10780     case OP_BRAPOS:
10781     case OP_CBRAPOS:
10782     case OP_SBRAPOS:
10783     case OP_SCBRAPOS:
10784     case OP_BRAPOSZERO:
10785     compile_bracketpos_backtrackingpath(common, current);
10786     break;
10787 
10788     case OP_BRAMINZERO:
10789     compile_braminzero_backtrackingpath(common, current);
10790     break;
10791 
10792     case OP_MARK:
10793     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10794     if (common->has_skip_arg)
10795       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10796     free_stack(common, common->has_skip_arg ? 5 : 1);
10797     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10798     if (common->has_skip_arg)
10799       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10800     break;
10801 
10802     case OP_THEN:
10803     case OP_THEN_ARG:
10804     case OP_PRUNE:
10805     case OP_PRUNE_ARG:
10806     case OP_SKIP:
10807     case OP_SKIP_ARG:
10808     compile_control_verb_backtrackingpath(common, current);
10809     break;
10810 
10811     case OP_COMMIT:
10812     if (!common->local_exit)
10813       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10814     if (common->quit_label == NULL)
10815       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10816     else
10817       JUMPTO(SLJIT_JUMP, common->quit_label);
10818     break;
10819 
10820     case OP_CALLOUT:
10821     case OP_FAIL:
10822     case OP_ACCEPT:
10823     case OP_ASSERT_ACCEPT:
10824     set_jumps(current->topbacktracks, LABEL());
10825     break;
10826 
10827     case OP_THEN_TRAP:
10828     /* A virtual opcode for then traps. */
10829     compile_then_trap_backtrackingpath(common, current);
10830     break;
10831 
10832     default:
10833     SLJIT_UNREACHABLE();
10834     break;
10835     }
10836   current = current->prev;
10837   }
10838 common->then_trap = save_then_trap;
10839 }
10840 
compile_recurse(compiler_common * common)10841 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10842 {
10843 DEFINE_COMPILER;
10844 pcre_uchar *cc = common->start + common->currententry->start;
10845 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10846 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10847 BOOL needs_control_head;
10848 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10849 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10850 int alternativesize;
10851 BOOL needs_frame;
10852 backtrack_common altbacktrack;
10853 struct sljit_jump *jump;
10854 
10855 /* Recurse captures then. */
10856 common->then_trap = NULL;
10857 
10858 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10859 needs_frame = framesize >= 0;
10860 if (!needs_frame)
10861   framesize = 0;
10862 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10863 
10864 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10865 common->currententry->entry = LABEL();
10866 set_jumps(common->currententry->calls, common->currententry->entry);
10867 
10868 sljit_emit_fast_enter(compiler, TMP2, 0);
10869 count_match(common);
10870 allocate_stack(common, private_data_size + framesize + alternativesize);
10871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10872 copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10873 if (needs_control_head)
10874   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10876 if (needs_frame)
10877   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10878 
10879 if (alternativesize > 0)
10880   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10881 
10882 memset(&altbacktrack, 0, sizeof(backtrack_common));
10883 common->quit_label = NULL;
10884 common->accept_label = NULL;
10885 common->quit = NULL;
10886 common->accept = NULL;
10887 altbacktrack.cc = ccbegin;
10888 cc += GET(cc, 1);
10889 while (1)
10890   {
10891   altbacktrack.top = NULL;
10892   altbacktrack.topbacktracks = NULL;
10893 
10894   if (altbacktrack.cc != ccbegin)
10895     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10896 
10897   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10898   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10899     return;
10900 
10901   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10902 
10903   compile_backtrackingpath(common, altbacktrack.top);
10904   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10905     return;
10906   set_jumps(altbacktrack.topbacktracks, LABEL());
10907 
10908   if (*cc != OP_ALT)
10909     break;
10910 
10911   altbacktrack.cc = cc + 1 + LINK_SIZE;
10912   cc += GET(cc, 1);
10913   }
10914 
10915 /* None of them matched. */
10916 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10917 jump = JUMP(SLJIT_JUMP);
10918 
10919 if (common->quit != NULL)
10920   {
10921   set_jumps(common->quit, LABEL());
10922   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10923   if (needs_frame)
10924     {
10925     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10926     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10927     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10928     }
10929   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10930   common->quit = NULL;
10931   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10932   }
10933 
10934 set_jumps(common->accept, LABEL());
10935 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10936 if (needs_frame)
10937   {
10938   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10939   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10940   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10941   }
10942 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10943 
10944 JUMPHERE(jump);
10945 if (common->quit != NULL)
10946   set_jumps(common->quit, LABEL());
10947 copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10948 free_stack(common, private_data_size + framesize + alternativesize);
10949 if (needs_control_head)
10950   {
10951   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10952   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10953   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10954   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10955   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10956   }
10957 else
10958   {
10959   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10960   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10961   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10962   }
10963 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
10964 }
10965 
10966 #undef COMPILE_BACKTRACKINGPATH
10967 #undef CURRENT_AS
10968 
10969 void
PRIV(jit_compile)10970 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10971 {
10972 struct sljit_compiler *compiler;
10973 backtrack_common rootbacktrack;
10974 compiler_common common_data;
10975 compiler_common *common = &common_data;
10976 const sljit_u8 *tables = re->tables;
10977 pcre_study_data *study;
10978 int private_data_size;
10979 pcre_uchar *ccend;
10980 executable_functions *functions;
10981 void *executable_func;
10982 sljit_uw executable_size;
10983 sljit_uw total_length;
10984 label_addr_list *label_addr;
10985 struct sljit_label *mainloop_label = NULL;
10986 struct sljit_label *continue_match_label;
10987 struct sljit_label *empty_match_found_label = NULL;
10988 struct sljit_label *empty_match_backtrack_label = NULL;
10989 struct sljit_label *reset_match_label;
10990 struct sljit_label *quit_label;
10991 struct sljit_jump *jump;
10992 struct sljit_jump *minlength_check_failed = NULL;
10993 struct sljit_jump *reqbyte_notfound = NULL;
10994 struct sljit_jump *empty_match = NULL;
10995 
10996 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10997 study = extra->study_data;
10998 
10999 if (!tables)
11000   tables = PRIV(default_tables);
11001 
11002 memset(&rootbacktrack, 0, sizeof(backtrack_common));
11003 memset(common, 0, sizeof(compiler_common));
11004 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
11005 
11006 common->start = rootbacktrack.cc;
11007 common->read_only_data_head = NULL;
11008 common->fcc = tables + fcc_offset;
11009 common->lcc = (sljit_sw)(tables + lcc_offset);
11010 common->mode = mode;
11011 common->might_be_empty = study->minlength == 0;
11012 common->nltype = NLTYPE_FIXED;
11013 switch(re->options & PCRE_NEWLINE_BITS)
11014   {
11015   case 0:
11016   /* Compile-time default */
11017   switch(NEWLINE)
11018     {
11019     case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11020     case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11021     default: common->newline = NEWLINE; break;
11022     }
11023   break;
11024   case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
11025   case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
11026   case PCRE_NEWLINE_CR+
11027        PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
11028   case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11029   case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11030   default: return;
11031   }
11032 common->nlmax = READ_CHAR_MAX;
11033 common->nlmin = 0;
11034 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
11035   common->bsr_nltype = NLTYPE_ANYCRLF;
11036 else if ((re->options & PCRE_BSR_UNICODE) != 0)
11037   common->bsr_nltype = NLTYPE_ANY;
11038 else
11039   {
11040 #ifdef BSR_ANYCRLF
11041   common->bsr_nltype = NLTYPE_ANYCRLF;
11042 #else
11043   common->bsr_nltype = NLTYPE_ANY;
11044 #endif
11045   }
11046 common->bsr_nlmax = READ_CHAR_MAX;
11047 common->bsr_nlmin = 0;
11048 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
11049 common->ctypes = (sljit_sw)(tables + ctypes_offset);
11050 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
11051 common->name_count = re->name_count;
11052 common->name_entry_size = re->name_entry_size;
11053 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
11054 #ifdef SUPPORT_UTF
11055 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
11056 common->utf = (re->options & PCRE_UTF8) != 0;
11057 #ifdef SUPPORT_UCP
11058 common->use_ucp = (re->options & PCRE_UCP) != 0;
11059 #endif
11060 if (common->utf)
11061   {
11062   if (common->nltype == NLTYPE_ANY)
11063     common->nlmax = 0x2029;
11064   else if (common->nltype == NLTYPE_ANYCRLF)
11065     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11066   else
11067     {
11068     /* We only care about the first newline character. */
11069     common->nlmax = common->newline & 0xff;
11070     }
11071 
11072   if (common->nltype == NLTYPE_FIXED)
11073     common->nlmin = common->newline & 0xff;
11074   else
11075     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11076 
11077   if (common->bsr_nltype == NLTYPE_ANY)
11078     common->bsr_nlmax = 0x2029;
11079   else
11080     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11081   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11082   }
11083 #endif /* SUPPORT_UTF */
11084 ccend = bracketend(common->start);
11085 
11086 /* Calculate the local space size on the stack. */
11087 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
11088 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
11089 if (!common->optimized_cbracket)
11090   return;
11091 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
11092 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11093 #else
11094 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
11095 #endif
11096 
11097 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
11098 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
11099 common->capture_last_ptr = common->ovector_start;
11100 common->ovector_start += sizeof(sljit_sw);
11101 #endif
11102 if (!check_opcode_types(common, common->start, ccend))
11103   {
11104   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11105   return;
11106   }
11107 
11108 /* Checking flags and updating ovector_start. */
11109 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11110   {
11111   common->req_char_ptr = common->ovector_start;
11112   common->ovector_start += sizeof(sljit_sw);
11113   }
11114 if (mode != JIT_COMPILE)
11115   {
11116   common->start_used_ptr = common->ovector_start;
11117   common->ovector_start += sizeof(sljit_sw);
11118   if (mode == JIT_PARTIAL_SOFT_COMPILE)
11119     {
11120     common->hit_start = common->ovector_start;
11121     common->ovector_start += 2 * sizeof(sljit_sw);
11122     }
11123   }
11124 if ((re->options & PCRE_FIRSTLINE) != 0)
11125   {
11126   common->match_end_ptr = common->ovector_start;
11127   common->ovector_start += sizeof(sljit_sw);
11128   }
11129 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
11130 common->control_head_ptr = 1;
11131 #endif
11132 if (common->control_head_ptr != 0)
11133   {
11134   common->control_head_ptr = common->ovector_start;
11135   common->ovector_start += sizeof(sljit_sw);
11136   }
11137 if (common->has_set_som)
11138   {
11139   /* Saving the real start pointer is necessary. */
11140   common->start_ptr = common->ovector_start;
11141   common->ovector_start += sizeof(sljit_sw);
11142   }
11143 
11144 /* Aligning ovector to even number of sljit words. */
11145 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
11146   common->ovector_start += sizeof(sljit_sw);
11147 
11148 if (common->start_ptr == 0)
11149   common->start_ptr = OVECTOR(0);
11150 
11151 /* Capturing brackets cannot be optimized if callouts are allowed. */
11152 if (common->capture_last_ptr != 0)
11153   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11154 
11155 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11156 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11157 
11158 total_length = ccend - common->start;
11159 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
11160 if (!common->private_data_ptrs)
11161   {
11162   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11163   return;
11164   }
11165 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11166 
11167 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11168 set_private_data_ptrs(common, &private_data_size, ccend);
11169 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11170   {
11171   if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11172     detect_fast_fail(common, common->start, &private_data_size, 4);
11173   }
11174 
11175 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11176 
11177 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11178   {
11179   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11180   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11181   return;
11182   }
11183 
11184 if (common->has_then)
11185   {
11186   common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11187   memset(common->then_offsets, 0, total_length);
11188   set_then_offsets(common, common->start, NULL);
11189   }
11190 
11191 compiler = sljit_create_compiler(NULL);
11192 if (!compiler)
11193   {
11194   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11195   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11196   return;
11197   }
11198 common->compiler = compiler;
11199 
11200 /* Main pcre_jit_exec entry. */
11201 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
11202 
11203 /* Register init. */
11204 reset_ovector(common, (re->top_bracket + 1) * 2);
11205 if (common->req_char_ptr != 0)
11206   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11207 
11208 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11210 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11211 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11213 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11214 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
11215 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
11216 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11218 
11219 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11220   reset_fast_fail(common);
11221 
11222 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11223   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11224 if (common->mark_ptr != 0)
11225   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11226 if (common->control_head_ptr != 0)
11227   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11228 
11229 /* Main part of the matching */
11230 if ((re->options & PCRE_ANCHORED) == 0)
11231   {
11232   mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11233   continue_match_label = LABEL();
11234   /* Forward search if possible. */
11235   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11236     {
11237     if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11238       ;
11239     else if ((re->flags & PCRE_FIRSTSET) != 0)
11240       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11241     else if ((re->flags & PCRE_STARTLINE) != 0)
11242       fast_forward_newline(common);
11243     else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11244       fast_forward_start_bits(common, study->start_bits);
11245     }
11246   }
11247 else
11248   continue_match_label = LABEL();
11249 
11250 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11251   {
11252   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11253   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11254   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11255   }
11256 if (common->req_char_ptr != 0)
11257   reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11258 
11259 /* Store the current STR_PTR in OVECTOR(0). */
11260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11261 /* Copy the limit of allowed recursions. */
11262 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11263 if (common->capture_last_ptr != 0)
11264   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11265 if (common->fast_forward_bc_ptr != NULL)
11266   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11267 
11268 if (common->start_ptr != OVECTOR(0))
11269   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11270 
11271 /* Copy the beginning of the string. */
11272 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11273   {
11274   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11275   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11276   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11277   JUMPHERE(jump);
11278   }
11279 else if (mode == JIT_PARTIAL_HARD_COMPILE)
11280   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11281 
11282 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11283 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11284   {
11285   sljit_free_compiler(compiler);
11286   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11287   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11288   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11289   return;
11290   }
11291 
11292 if (common->might_be_empty)
11293   {
11294   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11295   empty_match_found_label = LABEL();
11296   }
11297 
11298 common->accept_label = LABEL();
11299 if (common->accept != NULL)
11300   set_jumps(common->accept, common->accept_label);
11301 
11302 /* This means we have a match. Update the ovector. */
11303 copy_ovector(common, re->top_bracket + 1);
11304 common->quit_label = common->forced_quit_label = LABEL();
11305 if (common->quit != NULL)
11306   set_jumps(common->quit, common->quit_label);
11307 if (common->forced_quit != NULL)
11308   set_jumps(common->forced_quit, common->forced_quit_label);
11309 if (minlength_check_failed != NULL)
11310   SET_LABEL(minlength_check_failed, common->forced_quit_label);
11311 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11312 
11313 if (mode != JIT_COMPILE)
11314   {
11315   common->partialmatchlabel = LABEL();
11316   set_jumps(common->partialmatch, common->partialmatchlabel);
11317   return_with_partial_match(common, common->quit_label);
11318   }
11319 
11320 if (common->might_be_empty)
11321   empty_match_backtrack_label = LABEL();
11322 compile_backtrackingpath(common, rootbacktrack.top);
11323 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11324   {
11325   sljit_free_compiler(compiler);
11326   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11327   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11328   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11329   return;
11330   }
11331 
11332 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11333 reset_match_label = LABEL();
11334 
11335 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11336   {
11337   /* Update hit_start only in the first time. */
11338   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11339   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11340   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11341   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11342   JUMPHERE(jump);
11343   }
11344 
11345 /* Check we have remaining characters. */
11346 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11347   {
11348   SLJIT_ASSERT(common->match_end_ptr != 0);
11349   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11350   }
11351 
11352 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11353     (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11354 
11355 if ((re->options & PCRE_ANCHORED) == 0)
11356   {
11357   if (common->ff_newline_shortcut != NULL)
11358     {
11359     if ((re->options & PCRE_FIRSTLINE) == 0)
11360       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11361     /* There cannot be more newlines here. */
11362     }
11363   else
11364     CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11365   }
11366 
11367 /* No more remaining characters. */
11368 if (reqbyte_notfound != NULL)
11369   JUMPHERE(reqbyte_notfound);
11370 
11371 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11372   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11373 
11374 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11375 JUMPTO(SLJIT_JUMP, common->quit_label);
11376 
11377 flush_stubs(common);
11378 
11379 if (common->might_be_empty)
11380   {
11381   JUMPHERE(empty_match);
11382   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11383   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11384   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11385   OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11386   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11387   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11388   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11389   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11390   }
11391 
11392 common->fast_forward_bc_ptr = NULL;
11393 common->fast_fail_start_ptr = 0;
11394 common->fast_fail_end_ptr = 0;
11395 common->currententry = common->entries;
11396 common->local_exit = TRUE;
11397 quit_label = common->quit_label;
11398 while (common->currententry != NULL)
11399   {
11400   /* Might add new entries. */
11401   compile_recurse(common);
11402   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11403     {
11404     sljit_free_compiler(compiler);
11405     SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11406     SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11407     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11408     return;
11409     }
11410   flush_stubs(common);
11411   common->currententry = common->currententry->next;
11412   }
11413 common->local_exit = FALSE;
11414 common->quit_label = quit_label;
11415 
11416 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11417 /* This is a (really) rare case. */
11418 set_jumps(common->stackalloc, LABEL());
11419 /* RETURN_ADDR is not a saved register. */
11420 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11421 
11422 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
11423 
11424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0);
11425 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
11426 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
11427 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
11428 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
11429 
11430 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11431 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11432 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
11433 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
11434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11435 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11436 sljit_emit_fast_return(compiler, TMP1, 0);
11437 
11438 /* Allocation failed. */
11439 JUMPHERE(jump);
11440 /* We break the return address cache here, but this is a really rare case. */
11441 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11442 JUMPTO(SLJIT_JUMP, common->quit_label);
11443 
11444 /* Call limit reached. */
11445 set_jumps(common->calllimit, LABEL());
11446 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11447 JUMPTO(SLJIT_JUMP, common->quit_label);
11448 
11449 if (common->revertframes != NULL)
11450   {
11451   set_jumps(common->revertframes, LABEL());
11452   do_revertframes(common);
11453   }
11454 if (common->wordboundary != NULL)
11455   {
11456   set_jumps(common->wordboundary, LABEL());
11457   check_wordboundary(common);
11458   }
11459 if (common->anynewline != NULL)
11460   {
11461   set_jumps(common->anynewline, LABEL());
11462   check_anynewline(common);
11463   }
11464 if (common->hspace != NULL)
11465   {
11466   set_jumps(common->hspace, LABEL());
11467   check_hspace(common);
11468   }
11469 if (common->vspace != NULL)
11470   {
11471   set_jumps(common->vspace, LABEL());
11472   check_vspace(common);
11473   }
11474 if (common->casefulcmp != NULL)
11475   {
11476   set_jumps(common->casefulcmp, LABEL());
11477   do_casefulcmp(common);
11478   }
11479 if (common->caselesscmp != NULL)
11480   {
11481   set_jumps(common->caselesscmp, LABEL());
11482   do_caselesscmp(common);
11483   }
11484 if (common->reset_match != NULL)
11485   {
11486   set_jumps(common->reset_match, LABEL());
11487   do_reset_match(common, (re->top_bracket + 1) * 2);
11488   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11489   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11490   JUMPTO(SLJIT_JUMP, reset_match_label);
11491   }
11492 #ifdef SUPPORT_UTF
11493 #ifdef COMPILE_PCRE8
11494 if (common->utfreadchar != NULL)
11495   {
11496   set_jumps(common->utfreadchar, LABEL());
11497   do_utfreadchar(common);
11498   }
11499 if (common->utfreadchar16 != NULL)
11500   {
11501   set_jumps(common->utfreadchar16, LABEL());
11502   do_utfreadchar16(common);
11503   }
11504 if (common->utfreadtype8 != NULL)
11505   {
11506   set_jumps(common->utfreadtype8, LABEL());
11507   do_utfreadtype8(common);
11508   }
11509 #endif /* COMPILE_PCRE8 */
11510 #endif /* SUPPORT_UTF */
11511 #ifdef SUPPORT_UCP
11512 if (common->getucd != NULL)
11513   {
11514   set_jumps(common->getucd, LABEL());
11515   do_getucd(common);
11516   }
11517 #endif
11518 
11519 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11520 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11521 
11522 executable_func = sljit_generate_code(compiler);
11523 executable_size = sljit_get_generated_code_size(compiler);
11524 label_addr = common->label_addrs;
11525 while (label_addr != NULL)
11526   {
11527   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11528   label_addr = label_addr->next;
11529   }
11530 sljit_free_compiler(compiler);
11531 if (executable_func == NULL)
11532   {
11533   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11534   return;
11535   }
11536 
11537 /* Reuse the function descriptor if possible. */
11538 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11539   functions = (executable_functions *)extra->executable_jit;
11540 else
11541   {
11542   /* Note: If your memory-checker has flagged the allocation below as a
11543    * memory leak, it is probably because you either forgot to call
11544    * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11545    * pcre16_extra) object, or you called said function after having
11546    * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11547    * of the object. (The function will only free the JIT data if the
11548    * bit remains set, as the bit indicates that the pointer to the data
11549    * is valid.)
11550    */
11551   functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11552   if (functions == NULL)
11553     {
11554     /* This case is highly unlikely since we just recently
11555     freed a lot of memory. Not impossible though. */
11556     sljit_free_code(executable_func);
11557     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11558     return;
11559     }
11560   memset(functions, 0, sizeof(executable_functions));
11561   functions->top_bracket = (re->top_bracket + 1) * 2;
11562   functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11563   extra->executable_jit = functions;
11564   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11565   }
11566 
11567 functions->executable_funcs[mode] = executable_func;
11568 functions->read_only_data_heads[mode] = common->read_only_data_head;
11569 functions->executable_sizes[mode] = executable_size;
11570 }
11571 
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)11572 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11573 {
11574 union {
11575    void *executable_func;
11576    jit_function call_executable_func;
11577 } convert_executable_func;
11578 sljit_u8 local_space[MACHINE_STACK_SIZE];
11579 struct sljit_stack local_stack;
11580 
11581 local_stack.min_start = local_space;
11582 local_stack.start = local_space;
11583 local_stack.end = local_space + MACHINE_STACK_SIZE;
11584 local_stack.top = local_space + MACHINE_STACK_SIZE;
11585 arguments->stack = &local_stack;
11586 convert_executable_func.executable_func = executable_func;
11587 return convert_executable_func.call_executable_func(arguments);
11588 }
11589 
11590 int
PRIV(jit_exec)11591 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11592   int length, int start_offset, int options, int *offsets, int offset_count)
11593 {
11594 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11595 union {
11596    void *executable_func;
11597    jit_function call_executable_func;
11598 } convert_executable_func;
11599 jit_arguments arguments;
11600 int max_offset_count;
11601 int retval;
11602 int mode = JIT_COMPILE;
11603 
11604 if ((options & PCRE_PARTIAL_HARD) != 0)
11605   mode = JIT_PARTIAL_HARD_COMPILE;
11606 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11607   mode = JIT_PARTIAL_SOFT_COMPILE;
11608 
11609 if (functions->executable_funcs[mode] == NULL)
11610   return PCRE_ERROR_JIT_BADOPTION;
11611 
11612 /* Sanity checks should be handled by pcre_exec. */
11613 arguments.str = subject + start_offset;
11614 arguments.begin = subject;
11615 arguments.end = subject + length;
11616 arguments.mark_ptr = NULL;
11617 /* JIT decreases this value less frequently than the interpreter. */
11618 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11619 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11620   arguments.limit_match = functions->limit_match;
11621 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11622 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11623 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11624 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11625 arguments.offsets = offsets;
11626 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11627 arguments.real_offset_count = offset_count;
11628 
11629 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11630 the output vector for storing captured strings, with the remainder used as
11631 workspace. We don't need the workspace here. For compatibility, we limit the
11632 number of captured strings in the same way as pcre_exec(), so that the user
11633 gets the same result with and without JIT. */
11634 
11635 if (offset_count != 2)
11636   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11637 max_offset_count = functions->top_bracket;
11638 if (offset_count > max_offset_count)
11639   offset_count = max_offset_count;
11640 arguments.offset_count = offset_count;
11641 
11642 if (functions->callback)
11643   arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11644 else
11645   arguments.stack = (struct sljit_stack *)functions->userdata;
11646 
11647 if (arguments.stack == NULL)
11648   retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11649 else
11650   {
11651   convert_executable_func.executable_func = functions->executable_funcs[mode];
11652   retval = convert_executable_func.call_executable_func(&arguments);
11653   }
11654 
11655 if (retval * 2 > offset_count)
11656   retval = 0;
11657 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11658   *(extra_data->mark) = arguments.mark_ptr;
11659 
11660 return retval;
11661 }
11662 
11663 #if defined COMPILE_PCRE8
11664 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)11665 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11666   PCRE_SPTR subject, int length, int start_offset, int options,
11667   int *offsets, int offset_count, pcre_jit_stack *stack)
11668 #elif defined COMPILE_PCRE16
11669 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11670 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11671   PCRE_SPTR16 subject, int length, int start_offset, int options,
11672   int *offsets, int offset_count, pcre16_jit_stack *stack)
11673 #elif defined COMPILE_PCRE32
11674 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11675 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11676   PCRE_SPTR32 subject, int length, int start_offset, int options,
11677   int *offsets, int offset_count, pcre32_jit_stack *stack)
11678 #endif
11679 {
11680 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11681 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11682 union {
11683    void *executable_func;
11684    jit_function call_executable_func;
11685 } convert_executable_func;
11686 jit_arguments arguments;
11687 int max_offset_count;
11688 int retval;
11689 int mode = JIT_COMPILE;
11690 
11691 SLJIT_UNUSED_ARG(argument_re);
11692 
11693 /* Plausibility checks */
11694 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11695 
11696 if ((options & PCRE_PARTIAL_HARD) != 0)
11697   mode = JIT_PARTIAL_HARD_COMPILE;
11698 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11699   mode = JIT_PARTIAL_SOFT_COMPILE;
11700 
11701 if (functions == NULL || functions->executable_funcs[mode] == NULL)
11702   return PCRE_ERROR_JIT_BADOPTION;
11703 
11704 /* Sanity checks should be handled by pcre_exec. */
11705 arguments.stack = (struct sljit_stack *)stack;
11706 arguments.str = subject_ptr + start_offset;
11707 arguments.begin = subject_ptr;
11708 arguments.end = subject_ptr + length;
11709 arguments.mark_ptr = NULL;
11710 /* JIT decreases this value less frequently than the interpreter. */
11711 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11712 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11713   arguments.limit_match = functions->limit_match;
11714 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11715 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11716 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11717 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11718 arguments.offsets = offsets;
11719 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11720 arguments.real_offset_count = offset_count;
11721 
11722 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11723 the output vector for storing captured strings, with the remainder used as
11724 workspace. We don't need the workspace here. For compatibility, we limit the
11725 number of captured strings in the same way as pcre_exec(), so that the user
11726 gets the same result with and without JIT. */
11727 
11728 if (offset_count != 2)
11729   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11730 max_offset_count = functions->top_bracket;
11731 if (offset_count > max_offset_count)
11732   offset_count = max_offset_count;
11733 arguments.offset_count = offset_count;
11734 
11735 convert_executable_func.executable_func = functions->executable_funcs[mode];
11736 retval = convert_executable_func.call_executable_func(&arguments);
11737 
11738 if (retval * 2 > offset_count)
11739   retval = 0;
11740 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11741   *(extra_data->mark) = arguments.mark_ptr;
11742 
11743 return retval;
11744 }
11745 
11746 void
PRIV(jit_free)11747 PRIV(jit_free)(void *executable_funcs)
11748 {
11749 int i;
11750 executable_functions *functions = (executable_functions *)executable_funcs;
11751 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11752   {
11753   if (functions->executable_funcs[i] != NULL)
11754     sljit_free_code(functions->executable_funcs[i]);
11755   free_read_only_data(functions->read_only_data_heads[i], NULL);
11756   }
11757 SLJIT_FREE(functions, compiler->allocator_data);
11758 }
11759 
11760 int
PRIV(jit_get_size)11761 PRIV(jit_get_size)(void *executable_funcs)
11762 {
11763 int i;
11764 sljit_uw size = 0;
11765 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11766 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11767   size += executable_sizes[i];
11768 return (int)size;
11769 }
11770 
11771 const char*
PRIV(jit_get_target)11772 PRIV(jit_get_target)(void)
11773 {
11774 return sljit_get_platform_name();
11775 }
11776 
11777 #if defined COMPILE_PCRE8
11778 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11779 pcre_jit_stack_alloc(int startsize, int maxsize)
11780 #elif defined COMPILE_PCRE16
11781 PCRE_EXP_DECL pcre16_jit_stack *
11782 pcre16_jit_stack_alloc(int startsize, int maxsize)
11783 #elif defined COMPILE_PCRE32
11784 PCRE_EXP_DECL pcre32_jit_stack *
11785 pcre32_jit_stack_alloc(int startsize, int maxsize)
11786 #endif
11787 {
11788 if (startsize < 1 || maxsize < 1)
11789   return NULL;
11790 if (startsize > maxsize)
11791   startsize = maxsize;
11792 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11793 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11794 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11795 }
11796 
11797 #if defined COMPILE_PCRE8
11798 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11799 pcre_jit_stack_free(pcre_jit_stack *stack)
11800 #elif defined COMPILE_PCRE16
11801 PCRE_EXP_DECL void
11802 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11803 #elif defined COMPILE_PCRE32
11804 PCRE_EXP_DECL void
11805 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11806 #endif
11807 {
11808 sljit_free_stack((struct sljit_stack *)stack, NULL);
11809 }
11810 
11811 #if defined COMPILE_PCRE8
11812 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11813 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11814 #elif defined COMPILE_PCRE16
11815 PCRE_EXP_DECL void
11816 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11817 #elif defined COMPILE_PCRE32
11818 PCRE_EXP_DECL void
11819 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11820 #endif
11821 {
11822 executable_functions *functions;
11823 if (extra != NULL &&
11824     (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11825     extra->executable_jit != NULL)
11826   {
11827   functions = (executable_functions *)extra->executable_jit;
11828   functions->callback = callback;
11829   functions->userdata = userdata;
11830   }
11831 }
11832 
11833 #if defined COMPILE_PCRE8
11834 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11835 pcre_jit_free_unused_memory(void)
11836 #elif defined COMPILE_PCRE16
11837 PCRE_EXP_DECL void
11838 pcre16_jit_free_unused_memory(void)
11839 #elif defined COMPILE_PCRE32
11840 PCRE_EXP_DECL void
11841 pcre32_jit_free_unused_memory(void)
11842 #endif
11843 {
11844 sljit_free_unused_memory_exec();
11845 }
11846 
11847 #else  /* SUPPORT_JIT */
11848 
11849 /* These are dummy functions to avoid linking errors when JIT support is not
11850 being compiled. */
11851 
11852 #if defined COMPILE_PCRE8
11853 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11854 pcre_jit_stack_alloc(int startsize, int maxsize)
11855 #elif defined COMPILE_PCRE16
11856 PCRE_EXP_DECL pcre16_jit_stack *
11857 pcre16_jit_stack_alloc(int startsize, int maxsize)
11858 #elif defined COMPILE_PCRE32
11859 PCRE_EXP_DECL pcre32_jit_stack *
11860 pcre32_jit_stack_alloc(int startsize, int maxsize)
11861 #endif
11862 {
11863 (void)startsize;
11864 (void)maxsize;
11865 return NULL;
11866 }
11867 
11868 #if defined COMPILE_PCRE8
11869 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11870 pcre_jit_stack_free(pcre_jit_stack *stack)
11871 #elif defined COMPILE_PCRE16
11872 PCRE_EXP_DECL void
11873 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11874 #elif defined COMPILE_PCRE32
11875 PCRE_EXP_DECL void
11876 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11877 #endif
11878 {
11879 (void)stack;
11880 }
11881 
11882 #if defined COMPILE_PCRE8
11883 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11884 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11885 #elif defined COMPILE_PCRE16
11886 PCRE_EXP_DECL void
11887 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11888 #elif defined COMPILE_PCRE32
11889 PCRE_EXP_DECL void
11890 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11891 #endif
11892 {
11893 (void)extra;
11894 (void)callback;
11895 (void)userdata;
11896 }
11897 
11898 #if defined COMPILE_PCRE8
11899 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11900 pcre_jit_free_unused_memory(void)
11901 #elif defined COMPILE_PCRE16
11902 PCRE_EXP_DECL void
11903 pcre16_jit_free_unused_memory(void)
11904 #elif defined COMPILE_PCRE32
11905 PCRE_EXP_DECL void
11906 pcre32_jit_free_unused_memory(void)
11907 #endif
11908 {
11909 }
11910 
11911 #endif
11912 
11913 /* End of pcre_jit_compile.c */
11914