1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *mark_ptr;
168 void *callout_data;
169 /* Everything else after. */
170 sljit_u32 limit_match;
171 int real_offset_count;
172 int offset_count;
173 sljit_u8 notbol;
174 sljit_u8 noteol;
175 sljit_u8 notempty;
176 sljit_u8 notempty_atstart;
177 } jit_arguments;
178
179 typedef struct executable_functions {
180 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
181 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 PUBL(jit_callback) callback;
184 void *userdata;
185 sljit_u32 top_bracket;
186 sljit_u32 limit_match;
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 typedef struct label_addr_list {
201 struct sljit_label *label;
202 sljit_uw *update_addr;
203 struct label_addr_list *next;
204 } label_addr_list;
205
206 enum frame_types {
207 no_frame = -1,
208 no_stack = -2
209 };
210
211 enum control_types {
212 type_mark = 0,
213 type_then_trap = 1
214 };
215
216 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
217
218 /* The following structure is the key data type for the recursive
219 code generator. It is allocated by compile_matchingpath, and contains
220 the arguments for compile_backtrackingpath. Must be the first member
221 of its descendants. */
222 typedef struct backtrack_common {
223 /* Concatenation stack. */
224 struct backtrack_common *prev;
225 jump_list *nextbacktracks;
226 /* Internal stack (for component operators). */
227 struct backtrack_common *top;
228 jump_list *topbacktracks;
229 /* Opcode pointer. */
230 pcre_uchar *cc;
231 } backtrack_common;
232
233 typedef struct assert_backtrack {
234 backtrack_common common;
235 jump_list *condfailed;
236 /* Less than 0 if a frame is not needed. */
237 int framesize;
238 /* Points to our private memory word on the stack. */
239 int private_data_ptr;
240 /* For iterators. */
241 struct sljit_label *matchingpath;
242 } assert_backtrack;
243
244 typedef struct bracket_backtrack {
245 backtrack_common common;
246 /* Where to coninue if an alternative is successfully matched. */
247 struct sljit_label *alternative_matchingpath;
248 /* For rmin and rmax iterators. */
249 struct sljit_label *recursive_matchingpath;
250 /* For greedy ? operator. */
251 struct sljit_label *zero_matchingpath;
252 /* Contains the branches of a failed condition. */
253 union {
254 /* Both for OP_COND, OP_SCOND. */
255 jump_list *condfailed;
256 assert_backtrack *assert;
257 /* For OP_ONCE. Less than 0 if not needed. */
258 int framesize;
259 } u;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 } bracket_backtrack;
263
264 typedef struct bracketpos_backtrack {
265 backtrack_common common;
266 /* Points to our private memory word on the stack. */
267 int private_data_ptr;
268 /* Reverting stack is needed. */
269 int framesize;
270 /* Allocated stack size. */
271 int stacksize;
272 } bracketpos_backtrack;
273
274 typedef struct braminzero_backtrack {
275 backtrack_common common;
276 struct sljit_label *matchingpath;
277 } braminzero_backtrack;
278
279 typedef struct char_iterator_backtrack {
280 backtrack_common common;
281 /* Next iteration. */
282 struct sljit_label *matchingpath;
283 union {
284 jump_list *backtracks;
285 struct {
286 unsigned int othercasebit;
287 pcre_uchar chr;
288 BOOL enabled;
289 } charpos;
290 } u;
291 } char_iterator_backtrack;
292
293 typedef struct ref_iterator_backtrack {
294 backtrack_common common;
295 /* Next iteration. */
296 struct sljit_label *matchingpath;
297 } ref_iterator_backtrack;
298
299 typedef struct recurse_entry {
300 struct recurse_entry *next;
301 /* Contains the function entry. */
302 struct sljit_label *entry;
303 /* Collects the calls until the function is not created. */
304 jump_list *calls;
305 /* Points to the starting opcode. */
306 sljit_sw start;
307 } recurse_entry;
308
309 typedef struct recurse_backtrack {
310 backtrack_common common;
311 BOOL inlined_pattern;
312 } recurse_backtrack;
313
314 #define OP_THEN_TRAP OP_TABLE_LENGTH
315
316 typedef struct then_trap_backtrack {
317 backtrack_common common;
318 /* If then_trap is not NULL, this structure contains the real
319 then_trap for the backtracking path. */
320 struct then_trap_backtrack *then_trap;
321 /* Points to the starting opcode. */
322 sljit_sw start;
323 /* Exit point for the then opcodes of this alternative. */
324 jump_list *quit;
325 /* Frame size of the current alternative. */
326 int framesize;
327 } then_trap_backtrack;
328
329 #define MAX_RANGE_SIZE 4
330
331 typedef struct compiler_common {
332 /* The sljit ceneric compiler. */
333 struct sljit_compiler *compiler;
334 /* First byte code. */
335 pcre_uchar *start;
336 /* Maps private data offset to each opcode. */
337 sljit_s32 *private_data_ptrs;
338 /* Chain list of read-only data ptrs. */
339 void *read_only_data_head;
340 /* Tells whether the capturing bracket is optimized. */
341 sljit_u8 *optimized_cbracket;
342 /* Tells whether the starting offset is a target of then. */
343 sljit_u8 *then_offsets;
344 /* Current position where a THEN must jump. */
345 then_trap_backtrack *then_trap;
346 /* Starting offset of private data for capturing brackets. */
347 sljit_s32 cbra_ptr;
348 /* Output vector starting point. Must be divisible by 2. */
349 sljit_s32 ovector_start;
350 /* Points to the starting character of the current match. */
351 sljit_s32 start_ptr;
352 /* Last known position of the requested byte. */
353 sljit_s32 req_char_ptr;
354 /* Head of the last recursion. */
355 sljit_s32 recursive_head_ptr;
356 /* First inspected character for partial matching.
357 (Needed for avoiding zero length partial matches.) */
358 sljit_s32 start_used_ptr;
359 /* Starting pointer for partial soft matches. */
360 sljit_s32 hit_start;
361 /* Pointer of the match end position. */
362 sljit_s32 match_end_ptr;
363 /* Points to the marked string. */
364 sljit_s32 mark_ptr;
365 /* Recursive control verb management chain. */
366 sljit_s32 control_head_ptr;
367 /* Points to the last matched capture block index. */
368 sljit_s32 capture_last_ptr;
369 /* Fast forward skipping byte code pointer. */
370 pcre_uchar *fast_forward_bc_ptr;
371 /* Locals used by fast fail optimization. */
372 sljit_s32 fast_fail_start_ptr;
373 sljit_s32 fast_fail_end_ptr;
374
375 /* Flipped and lower case tables. */
376 const sljit_u8 *fcc;
377 sljit_sw lcc;
378 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
379 int mode;
380 /* TRUE, when minlength is greater than 0. */
381 BOOL might_be_empty;
382 /* \K is found in the pattern. */
383 BOOL has_set_som;
384 /* (*SKIP:arg) is found in the pattern. */
385 BOOL has_skip_arg;
386 /* (*THEN) is found in the pattern. */
387 BOOL has_then;
388 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
389 BOOL has_skip_in_assert_back;
390 /* Currently in recurse or negative assert. */
391 BOOL local_exit;
392 /* Currently in a positive assert. */
393 BOOL positive_assert;
394 /* Newline control. */
395 int nltype;
396 sljit_u32 nlmax;
397 sljit_u32 nlmin;
398 int newline;
399 int bsr_nltype;
400 sljit_u32 bsr_nlmax;
401 sljit_u32 bsr_nlmin;
402 /* Dollar endonly. */
403 int endonly;
404 /* Tables. */
405 sljit_sw ctypes;
406 /* Named capturing brackets. */
407 pcre_uchar *name_table;
408 sljit_sw name_count;
409 sljit_sw name_entry_size;
410
411 /* Labels and jump lists. */
412 struct sljit_label *partialmatchlabel;
413 struct sljit_label *quit_label;
414 struct sljit_label *forced_quit_label;
415 struct sljit_label *accept_label;
416 struct sljit_label *ff_newline_shortcut;
417 stub_list *stubs;
418 label_addr_list *label_addrs;
419 recurse_entry *entries;
420 recurse_entry *currententry;
421 jump_list *partialmatch;
422 jump_list *quit;
423 jump_list *positive_assert_quit;
424 jump_list *forced_quit;
425 jump_list *accept;
426 jump_list *calllimit;
427 jump_list *stackalloc;
428 jump_list *revertframes;
429 jump_list *wordboundary;
430 jump_list *anynewline;
431 jump_list *hspace;
432 jump_list *vspace;
433 jump_list *casefulcmp;
434 jump_list *caselesscmp;
435 jump_list *reset_match;
436 BOOL jscript_compat;
437 #ifdef SUPPORT_UTF
438 BOOL utf;
439 #ifdef SUPPORT_UCP
440 BOOL use_ucp;
441 jump_list *getucd;
442 #endif
443 #ifdef COMPILE_PCRE8
444 jump_list *utfreadchar;
445 jump_list *utfreadchar16;
446 jump_list *utfreadtype8;
447 #endif
448 #endif /* SUPPORT_UTF */
449 } compiler_common;
450
451 /* For byte_sequence_compare. */
452
453 typedef struct compare_context {
454 int length;
455 int sourcereg;
456 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
457 int ucharptr;
458 union {
459 sljit_s32 asint;
460 sljit_u16 asushort;
461 #if defined COMPILE_PCRE8
462 sljit_u8 asbyte;
463 sljit_u8 asuchars[4];
464 #elif defined COMPILE_PCRE16
465 sljit_u16 asuchars[2];
466 #elif defined COMPILE_PCRE32
467 sljit_u32 asuchars[1];
468 #endif
469 } c;
470 union {
471 sljit_s32 asint;
472 sljit_u16 asushort;
473 #if defined COMPILE_PCRE8
474 sljit_u8 asbyte;
475 sljit_u8 asuchars[4];
476 #elif defined COMPILE_PCRE16
477 sljit_u16 asuchars[2];
478 #elif defined COMPILE_PCRE32
479 sljit_u32 asuchars[1];
480 #endif
481 } oc;
482 #endif
483 } compare_context;
484
485 /* Undefine sljit macros. */
486 #undef CMP
487
488 /* Used for accessing the elements of the stack. */
489 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
490
491 #ifdef SLJIT_PREF_SHIFT_REG
492 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
493 /* Nothing. */
494 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
495 #define SHIFT_REG_IS_R3
496 #else
497 #error "Unsupported shift register"
498 #endif
499 #endif
500
501 #define TMP1 SLJIT_R0
502 #ifdef SHIFT_REG_IS_R3
503 #define TMP2 SLJIT_R3
504 #define TMP3 SLJIT_R2
505 #else
506 #define TMP2 SLJIT_R2
507 #define TMP3 SLJIT_R3
508 #endif
509 #define STR_PTR SLJIT_S0
510 #define STR_END SLJIT_S1
511 #define STACK_TOP SLJIT_R1
512 #define STACK_LIMIT SLJIT_S2
513 #define COUNT_MATCH SLJIT_S3
514 #define ARGUMENTS SLJIT_S4
515 #define RETURN_ADDR SLJIT_R4
516
517 /* Local space layout. */
518 /* These two locals can be used by the current opcode. */
519 #define LOCALS0 (0 * sizeof(sljit_sw))
520 #define LOCALS1 (1 * sizeof(sljit_sw))
521 /* Two local variables for possessive quantifiers (char1 cannot use them). */
522 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
523 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
524 /* Max limit of recursions. */
525 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
526 /* The output vector is stored on the stack, and contains pointers
527 to characters. The vector data is divided into two groups: the first
528 group contains the start / end character pointers, and the second is
529 the start pointers when the end of the capturing group has not yet reached. */
530 #define OVECTOR_START (common->ovector_start)
531 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
533 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
534
535 #if defined COMPILE_PCRE8
536 #define MOV_UCHAR SLJIT_MOV_U8
537 #elif defined COMPILE_PCRE16
538 #define MOV_UCHAR SLJIT_MOV_U16
539 #elif defined COMPILE_PCRE32
540 #define MOV_UCHAR SLJIT_MOV_U32
541 #else
542 #error Unsupported compiling mode
543 #endif
544
545 /* Shortcuts. */
546 #define DEFINE_COMPILER \
547 struct sljit_compiler *compiler = common->compiler
548 #define OP1(op, dst, dstw, src, srcw) \
549 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
550 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
551 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
552 #define LABEL() \
553 sljit_emit_label(compiler)
554 #define JUMP(type) \
555 sljit_emit_jump(compiler, (type))
556 #define JUMPTO(type, label) \
557 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
558 #define JUMPHERE(jump) \
559 sljit_set_label((jump), sljit_emit_label(compiler))
560 #define SET_LABEL(jump, label) \
561 sljit_set_label((jump), (label))
562 #define CMP(type, src1, src1w, src2, src2w) \
563 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
564 #define CMPTO(type, src1, src1w, src2, src2w, label) \
565 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
566 #define OP_FLAGS(op, dst, dstw, type) \
567 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
568 #define GET_LOCAL_BASE(dst, dstw, offset) \
569 sljit_get_local_base(compiler, (dst), (dstw), (offset))
570
571 #define READ_CHAR_MAX 0x7fffffff
572
573 #define INVALID_UTF_CHAR 888
574
bracketend(pcre_uchar * cc)575 static pcre_uchar *bracketend(pcre_uchar *cc)
576 {
577 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
578 do cc += GET(cc, 1); while (*cc == OP_ALT);
579 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
580 cc += 1 + LINK_SIZE;
581 return cc;
582 }
583
no_alternatives(pcre_uchar * cc)584 static int no_alternatives(pcre_uchar *cc)
585 {
586 int count = 0;
587 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
588 do
589 {
590 cc += GET(cc, 1);
591 count++;
592 }
593 while (*cc == OP_ALT);
594 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
595 return count;
596 }
597
598 /* Functions whose might need modification for all new supported opcodes:
599 next_opcode
600 check_opcode_types
601 set_private_data_ptrs
602 get_framesize
603 init_frame
604 get_private_data_copy_length
605 copy_private_data
606 compile_matchingpath
607 compile_backtrackingpath
608 */
609
next_opcode(compiler_common * common,pcre_uchar * cc)610 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
611 {
612 SLJIT_UNUSED_ARG(common);
613 switch(*cc)
614 {
615 case OP_SOD:
616 case OP_SOM:
617 case OP_SET_SOM:
618 case OP_NOT_WORD_BOUNDARY:
619 case OP_WORD_BOUNDARY:
620 case OP_NOT_DIGIT:
621 case OP_DIGIT:
622 case OP_NOT_WHITESPACE:
623 case OP_WHITESPACE:
624 case OP_NOT_WORDCHAR:
625 case OP_WORDCHAR:
626 case OP_ANY:
627 case OP_ALLANY:
628 case OP_NOTPROP:
629 case OP_PROP:
630 case OP_ANYNL:
631 case OP_NOT_HSPACE:
632 case OP_HSPACE:
633 case OP_NOT_VSPACE:
634 case OP_VSPACE:
635 case OP_EXTUNI:
636 case OP_EODN:
637 case OP_EOD:
638 case OP_CIRC:
639 case OP_CIRCM:
640 case OP_DOLL:
641 case OP_DOLLM:
642 case OP_CRSTAR:
643 case OP_CRMINSTAR:
644 case OP_CRPLUS:
645 case OP_CRMINPLUS:
646 case OP_CRQUERY:
647 case OP_CRMINQUERY:
648 case OP_CRRANGE:
649 case OP_CRMINRANGE:
650 case OP_CRPOSSTAR:
651 case OP_CRPOSPLUS:
652 case OP_CRPOSQUERY:
653 case OP_CRPOSRANGE:
654 case OP_CLASS:
655 case OP_NCLASS:
656 case OP_REF:
657 case OP_REFI:
658 case OP_DNREF:
659 case OP_DNREFI:
660 case OP_RECURSE:
661 case OP_CALLOUT:
662 case OP_ALT:
663 case OP_KET:
664 case OP_KETRMAX:
665 case OP_KETRMIN:
666 case OP_KETRPOS:
667 case OP_REVERSE:
668 case OP_ASSERT:
669 case OP_ASSERT_NOT:
670 case OP_ASSERTBACK:
671 case OP_ASSERTBACK_NOT:
672 case OP_ONCE:
673 case OP_ONCE_NC:
674 case OP_BRA:
675 case OP_BRAPOS:
676 case OP_CBRA:
677 case OP_CBRAPOS:
678 case OP_COND:
679 case OP_SBRA:
680 case OP_SBRAPOS:
681 case OP_SCBRA:
682 case OP_SCBRAPOS:
683 case OP_SCOND:
684 case OP_CREF:
685 case OP_DNCREF:
686 case OP_RREF:
687 case OP_DNRREF:
688 case OP_DEF:
689 case OP_BRAZERO:
690 case OP_BRAMINZERO:
691 case OP_BRAPOSZERO:
692 case OP_PRUNE:
693 case OP_SKIP:
694 case OP_THEN:
695 case OP_COMMIT:
696 case OP_FAIL:
697 case OP_ACCEPT:
698 case OP_ASSERT_ACCEPT:
699 case OP_CLOSE:
700 case OP_SKIPZERO:
701 return cc + PRIV(OP_lengths)[*cc];
702
703 case OP_CHAR:
704 case OP_CHARI:
705 case OP_NOT:
706 case OP_NOTI:
707 case OP_STAR:
708 case OP_MINSTAR:
709 case OP_PLUS:
710 case OP_MINPLUS:
711 case OP_QUERY:
712 case OP_MINQUERY:
713 case OP_UPTO:
714 case OP_MINUPTO:
715 case OP_EXACT:
716 case OP_POSSTAR:
717 case OP_POSPLUS:
718 case OP_POSQUERY:
719 case OP_POSUPTO:
720 case OP_STARI:
721 case OP_MINSTARI:
722 case OP_PLUSI:
723 case OP_MINPLUSI:
724 case OP_QUERYI:
725 case OP_MINQUERYI:
726 case OP_UPTOI:
727 case OP_MINUPTOI:
728 case OP_EXACTI:
729 case OP_POSSTARI:
730 case OP_POSPLUSI:
731 case OP_POSQUERYI:
732 case OP_POSUPTOI:
733 case OP_NOTSTAR:
734 case OP_NOTMINSTAR:
735 case OP_NOTPLUS:
736 case OP_NOTMINPLUS:
737 case OP_NOTQUERY:
738 case OP_NOTMINQUERY:
739 case OP_NOTUPTO:
740 case OP_NOTMINUPTO:
741 case OP_NOTEXACT:
742 case OP_NOTPOSSTAR:
743 case OP_NOTPOSPLUS:
744 case OP_NOTPOSQUERY:
745 case OP_NOTPOSUPTO:
746 case OP_NOTSTARI:
747 case OP_NOTMINSTARI:
748 case OP_NOTPLUSI:
749 case OP_NOTMINPLUSI:
750 case OP_NOTQUERYI:
751 case OP_NOTMINQUERYI:
752 case OP_NOTUPTOI:
753 case OP_NOTMINUPTOI:
754 case OP_NOTEXACTI:
755 case OP_NOTPOSSTARI:
756 case OP_NOTPOSPLUSI:
757 case OP_NOTPOSQUERYI:
758 case OP_NOTPOSUPTOI:
759 cc += PRIV(OP_lengths)[*cc];
760 #ifdef SUPPORT_UTF
761 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
762 #endif
763 return cc;
764
765 /* Special cases. */
766 case OP_TYPESTAR:
767 case OP_TYPEMINSTAR:
768 case OP_TYPEPLUS:
769 case OP_TYPEMINPLUS:
770 case OP_TYPEQUERY:
771 case OP_TYPEMINQUERY:
772 case OP_TYPEUPTO:
773 case OP_TYPEMINUPTO:
774 case OP_TYPEEXACT:
775 case OP_TYPEPOSSTAR:
776 case OP_TYPEPOSPLUS:
777 case OP_TYPEPOSQUERY:
778 case OP_TYPEPOSUPTO:
779 return cc + PRIV(OP_lengths)[*cc] - 1;
780
781 case OP_ANYBYTE:
782 #ifdef SUPPORT_UTF
783 if (common->utf) return NULL;
784 #endif
785 return cc + 1;
786
787 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
788 case OP_XCLASS:
789 return cc + GET(cc, 1);
790 #endif
791
792 case OP_MARK:
793 case OP_PRUNE_ARG:
794 case OP_SKIP_ARG:
795 case OP_THEN_ARG:
796 return cc + 1 + 2 + cc[1];
797
798 default:
799 /* All opcodes are supported now! */
800 SLJIT_UNREACHABLE();
801 return NULL;
802 }
803 }
804
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)805 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
806 {
807 int count;
808 pcre_uchar *slot;
809 pcre_uchar *assert_back_end = cc - 1;
810
811 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
812 while (cc < ccend)
813 {
814 switch(*cc)
815 {
816 case OP_SET_SOM:
817 common->has_set_som = TRUE;
818 common->might_be_empty = TRUE;
819 cc += 1;
820 break;
821
822 case OP_REF:
823 case OP_REFI:
824 common->optimized_cbracket[GET2(cc, 1)] = 0;
825 cc += 1 + IMM2_SIZE;
826 break;
827
828 case OP_CBRAPOS:
829 case OP_SCBRAPOS:
830 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
831 cc += 1 + LINK_SIZE + IMM2_SIZE;
832 break;
833
834 case OP_COND:
835 case OP_SCOND:
836 /* Only AUTO_CALLOUT can insert this opcode. We do
837 not intend to support this case. */
838 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
839 return FALSE;
840 cc += 1 + LINK_SIZE;
841 break;
842
843 case OP_CREF:
844 common->optimized_cbracket[GET2(cc, 1)] = 0;
845 cc += 1 + IMM2_SIZE;
846 break;
847
848 case OP_DNREF:
849 case OP_DNREFI:
850 case OP_DNCREF:
851 count = GET2(cc, 1 + IMM2_SIZE);
852 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
853 while (count-- > 0)
854 {
855 common->optimized_cbracket[GET2(slot, 0)] = 0;
856 slot += common->name_entry_size;
857 }
858 cc += 1 + 2 * IMM2_SIZE;
859 break;
860
861 case OP_RECURSE:
862 /* Set its value only once. */
863 if (common->recursive_head_ptr == 0)
864 {
865 common->recursive_head_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + LINK_SIZE;
869 break;
870
871 case OP_CALLOUT:
872 if (common->capture_last_ptr == 0)
873 {
874 common->capture_last_ptr = common->ovector_start;
875 common->ovector_start += sizeof(sljit_sw);
876 }
877 cc += 2 + 2 * LINK_SIZE;
878 break;
879
880 case OP_ASSERTBACK:
881 slot = bracketend(cc);
882 if (slot > assert_back_end)
883 assert_back_end = slot;
884 cc += 1 + LINK_SIZE;
885 break;
886
887 case OP_THEN_ARG:
888 common->has_then = TRUE;
889 common->control_head_ptr = 1;
890 /* Fall through. */
891
892 case OP_PRUNE_ARG:
893 case OP_MARK:
894 if (common->mark_ptr == 0)
895 {
896 common->mark_ptr = common->ovector_start;
897 common->ovector_start += sizeof(sljit_sw);
898 }
899 cc += 1 + 2 + cc[1];
900 break;
901
902 case OP_THEN:
903 common->has_then = TRUE;
904 common->control_head_ptr = 1;
905 cc += 1;
906 break;
907
908 case OP_SKIP:
909 if (cc < assert_back_end)
910 common->has_skip_in_assert_back = TRUE;
911 cc += 1;
912 break;
913
914 case OP_SKIP_ARG:
915 common->control_head_ptr = 1;
916 common->has_skip_arg = TRUE;
917 if (cc < assert_back_end)
918 common->has_skip_in_assert_back = TRUE;
919 cc += 1 + 2 + cc[1];
920 break;
921
922 default:
923 cc = next_opcode(common, cc);
924 if (cc == NULL)
925 return FALSE;
926 break;
927 }
928 }
929 return TRUE;
930 }
931
is_accelerated_repeat(pcre_uchar * cc)932 static BOOL is_accelerated_repeat(pcre_uchar *cc)
933 {
934 switch(*cc)
935 {
936 case OP_TYPESTAR:
937 case OP_TYPEMINSTAR:
938 case OP_TYPEPLUS:
939 case OP_TYPEMINPLUS:
940 case OP_TYPEPOSSTAR:
941 case OP_TYPEPOSPLUS:
942 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
943
944 case OP_STAR:
945 case OP_MINSTAR:
946 case OP_PLUS:
947 case OP_MINPLUS:
948 case OP_POSSTAR:
949 case OP_POSPLUS:
950
951 case OP_STARI:
952 case OP_MINSTARI:
953 case OP_PLUSI:
954 case OP_MINPLUSI:
955 case OP_POSSTARI:
956 case OP_POSPLUSI:
957
958 case OP_NOTSTAR:
959 case OP_NOTMINSTAR:
960 case OP_NOTPLUS:
961 case OP_NOTMINPLUS:
962 case OP_NOTPOSSTAR:
963 case OP_NOTPOSPLUS:
964
965 case OP_NOTSTARI:
966 case OP_NOTMINSTARI:
967 case OP_NOTPLUSI:
968 case OP_NOTMINPLUSI:
969 case OP_NOTPOSSTARI:
970 case OP_NOTPOSPLUSI:
971 return TRUE;
972
973 case OP_CLASS:
974 case OP_NCLASS:
975 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
976 case OP_XCLASS:
977 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
978 #else
979 cc += (1 + (32 / sizeof(pcre_uchar)));
980 #endif
981
982 switch(*cc)
983 {
984 case OP_CRSTAR:
985 case OP_CRMINSTAR:
986 case OP_CRPLUS:
987 case OP_CRMINPLUS:
988 case OP_CRPOSSTAR:
989 case OP_CRPOSPLUS:
990 return TRUE;
991 }
992 break;
993 }
994 return FALSE;
995 }
996
detect_fast_forward_skip(compiler_common * common,int * private_data_start)997 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
998 {
999 pcre_uchar *cc = common->start;
1000 pcre_uchar *end;
1001
1002 /* Skip not repeated brackets. */
1003 while (TRUE)
1004 {
1005 switch(*cc)
1006 {
1007 case OP_SOD:
1008 case OP_SOM:
1009 case OP_SET_SOM:
1010 case OP_NOT_WORD_BOUNDARY:
1011 case OP_WORD_BOUNDARY:
1012 case OP_EODN:
1013 case OP_EOD:
1014 case OP_CIRC:
1015 case OP_CIRCM:
1016 case OP_DOLL:
1017 case OP_DOLLM:
1018 /* Zero width assertions. */
1019 cc++;
1020 continue;
1021 }
1022
1023 if (*cc != OP_BRA && *cc != OP_CBRA)
1024 break;
1025
1026 end = cc + GET(cc, 1);
1027 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1028 return FALSE;
1029 if (*cc == OP_CBRA)
1030 {
1031 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1032 return FALSE;
1033 cc += IMM2_SIZE;
1034 }
1035 cc += 1 + LINK_SIZE;
1036 }
1037
1038 if (is_accelerated_repeat(cc))
1039 {
1040 common->fast_forward_bc_ptr = cc;
1041 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1042 *private_data_start += sizeof(sljit_sw);
1043 return TRUE;
1044 }
1045 return FALSE;
1046 }
1047
detect_fast_fail(compiler_common * common,pcre_uchar * cc,int * private_data_start,sljit_s32 depth)1048 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1049 {
1050 pcre_uchar *next_alt;
1051
1052 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1053
1054 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1055 return;
1056
1057 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1058 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1059 return;
1060
1061 do
1062 {
1063 next_alt = cc + GET(cc, 1);
1064
1065 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1066
1067 while (TRUE)
1068 {
1069 switch(*cc)
1070 {
1071 case OP_SOD:
1072 case OP_SOM:
1073 case OP_SET_SOM:
1074 case OP_NOT_WORD_BOUNDARY:
1075 case OP_WORD_BOUNDARY:
1076 case OP_EODN:
1077 case OP_EOD:
1078 case OP_CIRC:
1079 case OP_CIRCM:
1080 case OP_DOLL:
1081 case OP_DOLLM:
1082 /* Zero width assertions. */
1083 cc++;
1084 continue;
1085 }
1086 break;
1087 }
1088
1089 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1090 detect_fast_fail(common, cc, private_data_start, depth - 1);
1091
1092 if (is_accelerated_repeat(cc))
1093 {
1094 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1095
1096 if (common->fast_fail_start_ptr == 0)
1097 common->fast_fail_start_ptr = *private_data_start;
1098
1099 *private_data_start += sizeof(sljit_sw);
1100 common->fast_fail_end_ptr = *private_data_start;
1101
1102 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1103 return;
1104 }
1105
1106 cc = next_alt;
1107 }
1108 while (*cc == OP_ALT);
1109 }
1110
get_class_iterator_size(pcre_uchar * cc)1111 static int get_class_iterator_size(pcre_uchar *cc)
1112 {
1113 sljit_u32 min;
1114 sljit_u32 max;
1115 switch(*cc)
1116 {
1117 case OP_CRSTAR:
1118 case OP_CRPLUS:
1119 return 2;
1120
1121 case OP_CRMINSTAR:
1122 case OP_CRMINPLUS:
1123 case OP_CRQUERY:
1124 case OP_CRMINQUERY:
1125 return 1;
1126
1127 case OP_CRRANGE:
1128 case OP_CRMINRANGE:
1129 min = GET2(cc, 1);
1130 max = GET2(cc, 1 + IMM2_SIZE);
1131 if (max == 0)
1132 return (*cc == OP_CRRANGE) ? 2 : 1;
1133 max -= min;
1134 if (max > 2)
1135 max = 2;
1136 return max;
1137
1138 default:
1139 return 0;
1140 }
1141 }
1142
detect_repeat(compiler_common * common,pcre_uchar * begin)1143 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1144 {
1145 pcre_uchar *end = bracketend(begin);
1146 pcre_uchar *next;
1147 pcre_uchar *next_end;
1148 pcre_uchar *max_end;
1149 pcre_uchar type;
1150 sljit_sw length = end - begin;
1151 int min, max, i;
1152
1153 /* Detect fixed iterations first. */
1154 if (end[-(1 + LINK_SIZE)] != OP_KET)
1155 return FALSE;
1156
1157 /* Already detected repeat. */
1158 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1159 return TRUE;
1160
1161 next = end;
1162 min = 1;
1163 while (1)
1164 {
1165 if (*next != *begin)
1166 break;
1167 next_end = bracketend(next);
1168 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1169 break;
1170 next = next_end;
1171 min++;
1172 }
1173
1174 if (min == 2)
1175 return FALSE;
1176
1177 max = 0;
1178 max_end = next;
1179 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1180 {
1181 type = *next;
1182 while (1)
1183 {
1184 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1185 break;
1186 next_end = bracketend(next + 2 + LINK_SIZE);
1187 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1188 break;
1189 next = next_end;
1190 max++;
1191 }
1192
1193 if (next[0] == type && next[1] == *begin && max >= 1)
1194 {
1195 next_end = bracketend(next + 1);
1196 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1197 {
1198 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1199 if (*next_end != OP_KET)
1200 break;
1201
1202 if (i == max)
1203 {
1204 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1205 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1206 /* +2 the original and the last. */
1207 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1208 if (min == 1)
1209 return TRUE;
1210 min--;
1211 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1212 }
1213 }
1214 }
1215 }
1216
1217 if (min >= 3)
1218 {
1219 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1220 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1221 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1222 return TRUE;
1223 }
1224
1225 return FALSE;
1226 }
1227
1228 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1229 case OP_MINSTAR: \
1230 case OP_MINPLUS: \
1231 case OP_QUERY: \
1232 case OP_MINQUERY: \
1233 case OP_MINSTARI: \
1234 case OP_MINPLUSI: \
1235 case OP_QUERYI: \
1236 case OP_MINQUERYI: \
1237 case OP_NOTMINSTAR: \
1238 case OP_NOTMINPLUS: \
1239 case OP_NOTQUERY: \
1240 case OP_NOTMINQUERY: \
1241 case OP_NOTMINSTARI: \
1242 case OP_NOTMINPLUSI: \
1243 case OP_NOTQUERYI: \
1244 case OP_NOTMINQUERYI:
1245
1246 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1247 case OP_STAR: \
1248 case OP_PLUS: \
1249 case OP_STARI: \
1250 case OP_PLUSI: \
1251 case OP_NOTSTAR: \
1252 case OP_NOTPLUS: \
1253 case OP_NOTSTARI: \
1254 case OP_NOTPLUSI:
1255
1256 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1257 case OP_UPTO: \
1258 case OP_MINUPTO: \
1259 case OP_UPTOI: \
1260 case OP_MINUPTOI: \
1261 case OP_NOTUPTO: \
1262 case OP_NOTMINUPTO: \
1263 case OP_NOTUPTOI: \
1264 case OP_NOTMINUPTOI:
1265
1266 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1267 case OP_TYPEMINSTAR: \
1268 case OP_TYPEMINPLUS: \
1269 case OP_TYPEQUERY: \
1270 case OP_TYPEMINQUERY:
1271
1272 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1273 case OP_TYPESTAR: \
1274 case OP_TYPEPLUS:
1275
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1277 case OP_TYPEUPTO: \
1278 case OP_TYPEMINUPTO:
1279
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1280 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1281 {
1282 pcre_uchar *cc = common->start;
1283 pcre_uchar *alternative;
1284 pcre_uchar *end = NULL;
1285 int private_data_ptr = *private_data_start;
1286 int space, size, bracketlen;
1287 BOOL repeat_check = TRUE;
1288
1289 while (cc < ccend)
1290 {
1291 space = 0;
1292 size = 0;
1293 bracketlen = 0;
1294 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1295 break;
1296
1297 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1298 {
1299 if (detect_repeat(common, cc))
1300 {
1301 /* These brackets are converted to repeats, so no global
1302 based single character repeat is allowed. */
1303 if (cc >= end)
1304 end = bracketend(cc);
1305 }
1306 }
1307 repeat_check = TRUE;
1308
1309 switch(*cc)
1310 {
1311 case OP_KET:
1312 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1313 {
1314 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1315 private_data_ptr += sizeof(sljit_sw);
1316 cc += common->private_data_ptrs[cc + 1 - common->start];
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_ASSERT:
1322 case OP_ASSERT_NOT:
1323 case OP_ASSERTBACK:
1324 case OP_ASSERTBACK_NOT:
1325 case OP_ONCE:
1326 case OP_ONCE_NC:
1327 case OP_BRAPOS:
1328 case OP_SBRA:
1329 case OP_SBRAPOS:
1330 case OP_SCOND:
1331 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1332 private_data_ptr += sizeof(sljit_sw);
1333 bracketlen = 1 + LINK_SIZE;
1334 break;
1335
1336 case OP_CBRAPOS:
1337 case OP_SCBRAPOS:
1338 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1339 private_data_ptr += sizeof(sljit_sw);
1340 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1341 break;
1342
1343 case OP_COND:
1344 /* Might be a hidden SCOND. */
1345 alternative = cc + GET(cc, 1);
1346 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1347 {
1348 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349 private_data_ptr += sizeof(sljit_sw);
1350 }
1351 bracketlen = 1 + LINK_SIZE;
1352 break;
1353
1354 case OP_BRA:
1355 bracketlen = 1 + LINK_SIZE;
1356 break;
1357
1358 case OP_CBRA:
1359 case OP_SCBRA:
1360 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1361 break;
1362
1363 case OP_BRAZERO:
1364 case OP_BRAMINZERO:
1365 case OP_BRAPOSZERO:
1366 repeat_check = FALSE;
1367 size = 1;
1368 break;
1369
1370 CASE_ITERATOR_PRIVATE_DATA_1
1371 space = 1;
1372 size = -2;
1373 break;
1374
1375 CASE_ITERATOR_PRIVATE_DATA_2A
1376 space = 2;
1377 size = -2;
1378 break;
1379
1380 CASE_ITERATOR_PRIVATE_DATA_2B
1381 space = 2;
1382 size = -(2 + IMM2_SIZE);
1383 break;
1384
1385 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1386 space = 1;
1387 size = 1;
1388 break;
1389
1390 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1391 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1392 space = 2;
1393 size = 1;
1394 break;
1395
1396 case OP_TYPEUPTO:
1397 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1398 space = 2;
1399 size = 1 + IMM2_SIZE;
1400 break;
1401
1402 case OP_TYPEMINUPTO:
1403 space = 2;
1404 size = 1 + IMM2_SIZE;
1405 break;
1406
1407 case OP_CLASS:
1408 case OP_NCLASS:
1409 space = get_class_iterator_size(cc + size);
1410 size = 1 + 32 / sizeof(pcre_uchar);
1411 break;
1412
1413 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1414 case OP_XCLASS:
1415 space = get_class_iterator_size(cc + size);
1416 size = GET(cc, 1);
1417 break;
1418 #endif
1419
1420 default:
1421 cc = next_opcode(common, cc);
1422 SLJIT_ASSERT(cc != NULL);
1423 break;
1424 }
1425
1426 /* Character iterators, which are not inside a repeated bracket,
1427 gets a private slot instead of allocating it on the stack. */
1428 if (space > 0 && cc >= end)
1429 {
1430 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1431 private_data_ptr += sizeof(sljit_sw) * space;
1432 }
1433
1434 if (size != 0)
1435 {
1436 if (size < 0)
1437 {
1438 cc += -size;
1439 #ifdef SUPPORT_UTF
1440 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1441 #endif
1442 }
1443 else
1444 cc += size;
1445 }
1446
1447 if (bracketlen > 0)
1448 {
1449 if (cc >= end)
1450 {
1451 end = bracketend(cc);
1452 if (end[-1 - LINK_SIZE] == OP_KET)
1453 end = NULL;
1454 }
1455 cc += bracketlen;
1456 }
1457 }
1458 *private_data_start = private_data_ptr;
1459 }
1460
1461 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1462 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1463 {
1464 int length = 0;
1465 int possessive = 0;
1466 BOOL stack_restore = FALSE;
1467 BOOL setsom_found = recursive;
1468 BOOL setmark_found = recursive;
1469 /* The last capture is a local variable even for recursions. */
1470 BOOL capture_last_found = FALSE;
1471
1472 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1473 SLJIT_ASSERT(common->control_head_ptr != 0);
1474 *needs_control_head = TRUE;
1475 #else
1476 *needs_control_head = FALSE;
1477 #endif
1478
1479 if (ccend == NULL)
1480 {
1481 ccend = bracketend(cc) - (1 + LINK_SIZE);
1482 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1483 {
1484 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1485 /* This is correct regardless of common->capture_last_ptr. */
1486 capture_last_found = TRUE;
1487 }
1488 cc = next_opcode(common, cc);
1489 }
1490
1491 SLJIT_ASSERT(cc != NULL);
1492 while (cc < ccend)
1493 switch(*cc)
1494 {
1495 case OP_SET_SOM:
1496 SLJIT_ASSERT(common->has_set_som);
1497 stack_restore = TRUE;
1498 if (!setsom_found)
1499 {
1500 length += 2;
1501 setsom_found = TRUE;
1502 }
1503 cc += 1;
1504 break;
1505
1506 case OP_MARK:
1507 case OP_PRUNE_ARG:
1508 case OP_THEN_ARG:
1509 SLJIT_ASSERT(common->mark_ptr != 0);
1510 stack_restore = TRUE;
1511 if (!setmark_found)
1512 {
1513 length += 2;
1514 setmark_found = TRUE;
1515 }
1516 if (common->control_head_ptr != 0)
1517 *needs_control_head = TRUE;
1518 cc += 1 + 2 + cc[1];
1519 break;
1520
1521 case OP_RECURSE:
1522 stack_restore = TRUE;
1523 if (common->has_set_som && !setsom_found)
1524 {
1525 length += 2;
1526 setsom_found = TRUE;
1527 }
1528 if (common->mark_ptr != 0 && !setmark_found)
1529 {
1530 length += 2;
1531 setmark_found = TRUE;
1532 }
1533 if (common->capture_last_ptr != 0 && !capture_last_found)
1534 {
1535 length += 2;
1536 capture_last_found = TRUE;
1537 }
1538 cc += 1 + LINK_SIZE;
1539 break;
1540
1541 case OP_CBRA:
1542 case OP_CBRAPOS:
1543 case OP_SCBRA:
1544 case OP_SCBRAPOS:
1545 stack_restore = TRUE;
1546 if (common->capture_last_ptr != 0 && !capture_last_found)
1547 {
1548 length += 2;
1549 capture_last_found = TRUE;
1550 }
1551 length += 3;
1552 cc += 1 + LINK_SIZE + IMM2_SIZE;
1553 break;
1554
1555 case OP_THEN:
1556 stack_restore = TRUE;
1557 if (common->control_head_ptr != 0)
1558 *needs_control_head = TRUE;
1559 cc ++;
1560 break;
1561
1562 default:
1563 stack_restore = TRUE;
1564 /* Fall through. */
1565
1566 case OP_NOT_WORD_BOUNDARY:
1567 case OP_WORD_BOUNDARY:
1568 case OP_NOT_DIGIT:
1569 case OP_DIGIT:
1570 case OP_NOT_WHITESPACE:
1571 case OP_WHITESPACE:
1572 case OP_NOT_WORDCHAR:
1573 case OP_WORDCHAR:
1574 case OP_ANY:
1575 case OP_ALLANY:
1576 case OP_ANYBYTE:
1577 case OP_NOTPROP:
1578 case OP_PROP:
1579 case OP_ANYNL:
1580 case OP_NOT_HSPACE:
1581 case OP_HSPACE:
1582 case OP_NOT_VSPACE:
1583 case OP_VSPACE:
1584 case OP_EXTUNI:
1585 case OP_EODN:
1586 case OP_EOD:
1587 case OP_CIRC:
1588 case OP_CIRCM:
1589 case OP_DOLL:
1590 case OP_DOLLM:
1591 case OP_CHAR:
1592 case OP_CHARI:
1593 case OP_NOT:
1594 case OP_NOTI:
1595
1596 case OP_EXACT:
1597 case OP_POSSTAR:
1598 case OP_POSPLUS:
1599 case OP_POSQUERY:
1600 case OP_POSUPTO:
1601
1602 case OP_EXACTI:
1603 case OP_POSSTARI:
1604 case OP_POSPLUSI:
1605 case OP_POSQUERYI:
1606 case OP_POSUPTOI:
1607
1608 case OP_NOTEXACT:
1609 case OP_NOTPOSSTAR:
1610 case OP_NOTPOSPLUS:
1611 case OP_NOTPOSQUERY:
1612 case OP_NOTPOSUPTO:
1613
1614 case OP_NOTEXACTI:
1615 case OP_NOTPOSSTARI:
1616 case OP_NOTPOSPLUSI:
1617 case OP_NOTPOSQUERYI:
1618 case OP_NOTPOSUPTOI:
1619
1620 case OP_TYPEEXACT:
1621 case OP_TYPEPOSSTAR:
1622 case OP_TYPEPOSPLUS:
1623 case OP_TYPEPOSQUERY:
1624 case OP_TYPEPOSUPTO:
1625
1626 case OP_CLASS:
1627 case OP_NCLASS:
1628 case OP_XCLASS:
1629 case OP_CALLOUT:
1630
1631 cc = next_opcode(common, cc);
1632 SLJIT_ASSERT(cc != NULL);
1633 break;
1634 }
1635
1636 /* Possessive quantifiers can use a special case. */
1637 if (SLJIT_UNLIKELY(possessive == length))
1638 return stack_restore ? no_frame : no_stack;
1639
1640 if (length > 0)
1641 return length + 1;
1642 return stack_restore ? no_frame : no_stack;
1643 }
1644
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1645 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1646 {
1647 DEFINE_COMPILER;
1648 BOOL setsom_found = recursive;
1649 BOOL setmark_found = recursive;
1650 /* The last capture is a local variable even for recursions. */
1651 BOOL capture_last_found = FALSE;
1652 int offset;
1653
1654 /* >= 1 + shortest item size (2) */
1655 SLJIT_UNUSED_ARG(stacktop);
1656 SLJIT_ASSERT(stackpos >= stacktop + 2);
1657
1658 stackpos = STACK(stackpos);
1659 if (ccend == NULL)
1660 {
1661 ccend = bracketend(cc) - (1 + LINK_SIZE);
1662 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1663 cc = next_opcode(common, cc);
1664 }
1665
1666 SLJIT_ASSERT(cc != NULL);
1667 while (cc < ccend)
1668 switch(*cc)
1669 {
1670 case OP_SET_SOM:
1671 SLJIT_ASSERT(common->has_set_som);
1672 if (!setsom_found)
1673 {
1674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1676 stackpos -= (int)sizeof(sljit_sw);
1677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1678 stackpos -= (int)sizeof(sljit_sw);
1679 setsom_found = TRUE;
1680 }
1681 cc += 1;
1682 break;
1683
1684 case OP_MARK:
1685 case OP_PRUNE_ARG:
1686 case OP_THEN_ARG:
1687 SLJIT_ASSERT(common->mark_ptr != 0);
1688 if (!setmark_found)
1689 {
1690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1692 stackpos -= (int)sizeof(sljit_sw);
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694 stackpos -= (int)sizeof(sljit_sw);
1695 setmark_found = TRUE;
1696 }
1697 cc += 1 + 2 + cc[1];
1698 break;
1699
1700 case OP_RECURSE:
1701 if (common->has_set_som && !setsom_found)
1702 {
1703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1705 stackpos -= (int)sizeof(sljit_sw);
1706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1707 stackpos -= (int)sizeof(sljit_sw);
1708 setsom_found = TRUE;
1709 }
1710 if (common->mark_ptr != 0 && !setmark_found)
1711 {
1712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1714 stackpos -= (int)sizeof(sljit_sw);
1715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1716 stackpos -= (int)sizeof(sljit_sw);
1717 setmark_found = TRUE;
1718 }
1719 if (common->capture_last_ptr != 0 && !capture_last_found)
1720 {
1721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1723 stackpos -= (int)sizeof(sljit_sw);
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1725 stackpos -= (int)sizeof(sljit_sw);
1726 capture_last_found = TRUE;
1727 }
1728 cc += 1 + LINK_SIZE;
1729 break;
1730
1731 case OP_CBRA:
1732 case OP_CBRAPOS:
1733 case OP_SCBRA:
1734 case OP_SCBRAPOS:
1735 if (common->capture_last_ptr != 0 && !capture_last_found)
1736 {
1737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1739 stackpos -= (int)sizeof(sljit_sw);
1740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1741 stackpos -= (int)sizeof(sljit_sw);
1742 capture_last_found = TRUE;
1743 }
1744 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1746 stackpos -= (int)sizeof(sljit_sw);
1747 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1748 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1750 stackpos -= (int)sizeof(sljit_sw);
1751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1752 stackpos -= (int)sizeof(sljit_sw);
1753
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1756
1757 default:
1758 cc = next_opcode(common, cc);
1759 SLJIT_ASSERT(cc != NULL);
1760 break;
1761 }
1762
1763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1764 SLJIT_ASSERT(stackpos == STACK(stacktop));
1765 }
1766
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1767 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1768 {
1769 int private_data_length = needs_control_head ? 3 : 2;
1770 int size;
1771 pcre_uchar *alternative;
1772 /* Calculate the sum of the private machine words. */
1773 while (cc < ccend)
1774 {
1775 size = 0;
1776 switch(*cc)
1777 {
1778 case OP_KET:
1779 if (PRIVATE_DATA(cc) != 0)
1780 {
1781 private_data_length++;
1782 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1783 cc += PRIVATE_DATA(cc + 1);
1784 }
1785 cc += 1 + LINK_SIZE;
1786 break;
1787
1788 case OP_ASSERT:
1789 case OP_ASSERT_NOT:
1790 case OP_ASSERTBACK:
1791 case OP_ASSERTBACK_NOT:
1792 case OP_ONCE:
1793 case OP_ONCE_NC:
1794 case OP_BRAPOS:
1795 case OP_SBRA:
1796 case OP_SBRAPOS:
1797 case OP_SCOND:
1798 private_data_length++;
1799 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1800 cc += 1 + LINK_SIZE;
1801 break;
1802
1803 case OP_CBRA:
1804 case OP_SCBRA:
1805 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1806 private_data_length++;
1807 cc += 1 + LINK_SIZE + IMM2_SIZE;
1808 break;
1809
1810 case OP_CBRAPOS:
1811 case OP_SCBRAPOS:
1812 private_data_length += 2;
1813 cc += 1 + LINK_SIZE + IMM2_SIZE;
1814 break;
1815
1816 case OP_COND:
1817 /* Might be a hidden SCOND. */
1818 alternative = cc + GET(cc, 1);
1819 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820 private_data_length++;
1821 cc += 1 + LINK_SIZE;
1822 break;
1823
1824 CASE_ITERATOR_PRIVATE_DATA_1
1825 if (PRIVATE_DATA(cc))
1826 private_data_length++;
1827 cc += 2;
1828 #ifdef SUPPORT_UTF
1829 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1830 #endif
1831 break;
1832
1833 CASE_ITERATOR_PRIVATE_DATA_2A
1834 if (PRIVATE_DATA(cc))
1835 private_data_length += 2;
1836 cc += 2;
1837 #ifdef SUPPORT_UTF
1838 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1839 #endif
1840 break;
1841
1842 CASE_ITERATOR_PRIVATE_DATA_2B
1843 if (PRIVATE_DATA(cc))
1844 private_data_length += 2;
1845 cc += 2 + IMM2_SIZE;
1846 #ifdef SUPPORT_UTF
1847 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1848 #endif
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1852 if (PRIVATE_DATA(cc))
1853 private_data_length++;
1854 cc += 1;
1855 break;
1856
1857 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1858 if (PRIVATE_DATA(cc))
1859 private_data_length += 2;
1860 cc += 1;
1861 break;
1862
1863 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1864 if (PRIVATE_DATA(cc))
1865 private_data_length += 2;
1866 cc += 1 + IMM2_SIZE;
1867 break;
1868
1869 case OP_CLASS:
1870 case OP_NCLASS:
1871 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1872 case OP_XCLASS:
1873 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1874 #else
1875 size = 1 + 32 / (int)sizeof(pcre_uchar);
1876 #endif
1877 if (PRIVATE_DATA(cc))
1878 private_data_length += get_class_iterator_size(cc + size);
1879 cc += size;
1880 break;
1881
1882 default:
1883 cc = next_opcode(common, cc);
1884 SLJIT_ASSERT(cc != NULL);
1885 break;
1886 }
1887 }
1888 SLJIT_ASSERT(cc == ccend);
1889 return private_data_length;
1890 }
1891
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1892 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1893 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1894 {
1895 DEFINE_COMPILER;
1896 int srcw[2];
1897 int count, size;
1898 BOOL tmp1next = TRUE;
1899 BOOL tmp1empty = TRUE;
1900 BOOL tmp2empty = TRUE;
1901 pcre_uchar *alternative;
1902 enum {
1903 loop,
1904 end
1905 } status;
1906
1907 status = loop;
1908 stackptr = STACK(stackptr);
1909 stacktop = STACK(stacktop - 1);
1910
1911 if (!save)
1912 {
1913 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1914 if (stackptr < stacktop)
1915 {
1916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917 stackptr += sizeof(sljit_sw);
1918 tmp1empty = FALSE;
1919 }
1920 if (stackptr < stacktop)
1921 {
1922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923 stackptr += sizeof(sljit_sw);
1924 tmp2empty = FALSE;
1925 }
1926 /* The tmp1next must be TRUE in either way. */
1927 }
1928
1929 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1930
1931 do
1932 {
1933 count = 0;
1934 if (cc >= ccend)
1935 {
1936 if (!save)
1937 break;
1938
1939 count = 1;
1940 srcw[0] = common->recursive_head_ptr;
1941 if (needs_control_head)
1942 {
1943 SLJIT_ASSERT(common->control_head_ptr != 0);
1944 count = 2;
1945 srcw[0] = common->control_head_ptr;
1946 srcw[1] = common->recursive_head_ptr;
1947 }
1948 status = end;
1949 }
1950 else switch(*cc)
1951 {
1952 case OP_KET:
1953 if (PRIVATE_DATA(cc) != 0)
1954 {
1955 count = 1;
1956 srcw[0] = PRIVATE_DATA(cc);
1957 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1958 cc += PRIVATE_DATA(cc + 1);
1959 }
1960 cc += 1 + LINK_SIZE;
1961 break;
1962
1963 case OP_ASSERT:
1964 case OP_ASSERT_NOT:
1965 case OP_ASSERTBACK:
1966 case OP_ASSERTBACK_NOT:
1967 case OP_ONCE:
1968 case OP_ONCE_NC:
1969 case OP_BRAPOS:
1970 case OP_SBRA:
1971 case OP_SBRAPOS:
1972 case OP_SCOND:
1973 count = 1;
1974 srcw[0] = PRIVATE_DATA(cc);
1975 SLJIT_ASSERT(srcw[0] != 0);
1976 cc += 1 + LINK_SIZE;
1977 break;
1978
1979 case OP_CBRA:
1980 case OP_SCBRA:
1981 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1982 {
1983 count = 1;
1984 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1985 }
1986 cc += 1 + LINK_SIZE + IMM2_SIZE;
1987 break;
1988
1989 case OP_CBRAPOS:
1990 case OP_SCBRAPOS:
1991 count = 2;
1992 srcw[0] = PRIVATE_DATA(cc);
1993 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1994 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1995 cc += 1 + LINK_SIZE + IMM2_SIZE;
1996 break;
1997
1998 case OP_COND:
1999 /* Might be a hidden SCOND. */
2000 alternative = cc + GET(cc, 1);
2001 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2002 {
2003 count = 1;
2004 srcw[0] = PRIVATE_DATA(cc);
2005 SLJIT_ASSERT(srcw[0] != 0);
2006 }
2007 cc += 1 + LINK_SIZE;
2008 break;
2009
2010 CASE_ITERATOR_PRIVATE_DATA_1
2011 if (PRIVATE_DATA(cc))
2012 {
2013 count = 1;
2014 srcw[0] = PRIVATE_DATA(cc);
2015 }
2016 cc += 2;
2017 #ifdef SUPPORT_UTF
2018 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2019 #endif
2020 break;
2021
2022 CASE_ITERATOR_PRIVATE_DATA_2A
2023 if (PRIVATE_DATA(cc))
2024 {
2025 count = 2;
2026 srcw[0] = PRIVATE_DATA(cc);
2027 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2028 }
2029 cc += 2;
2030 #ifdef SUPPORT_UTF
2031 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2032 #endif
2033 break;
2034
2035 CASE_ITERATOR_PRIVATE_DATA_2B
2036 if (PRIVATE_DATA(cc))
2037 {
2038 count = 2;
2039 srcw[0] = PRIVATE_DATA(cc);
2040 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2041 }
2042 cc += 2 + IMM2_SIZE;
2043 #ifdef SUPPORT_UTF
2044 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2045 #endif
2046 break;
2047
2048 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2049 if (PRIVATE_DATA(cc))
2050 {
2051 count = 1;
2052 srcw[0] = PRIVATE_DATA(cc);
2053 }
2054 cc += 1;
2055 break;
2056
2057 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2058 if (PRIVATE_DATA(cc))
2059 {
2060 count = 2;
2061 srcw[0] = PRIVATE_DATA(cc);
2062 srcw[1] = srcw[0] + sizeof(sljit_sw);
2063 }
2064 cc += 1;
2065 break;
2066
2067 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2068 if (PRIVATE_DATA(cc))
2069 {
2070 count = 2;
2071 srcw[0] = PRIVATE_DATA(cc);
2072 srcw[1] = srcw[0] + sizeof(sljit_sw);
2073 }
2074 cc += 1 + IMM2_SIZE;
2075 break;
2076
2077 case OP_CLASS:
2078 case OP_NCLASS:
2079 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2080 case OP_XCLASS:
2081 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2082 #else
2083 size = 1 + 32 / (int)sizeof(pcre_uchar);
2084 #endif
2085 if (PRIVATE_DATA(cc))
2086 switch(get_class_iterator_size(cc + size))
2087 {
2088 case 1:
2089 count = 1;
2090 srcw[0] = PRIVATE_DATA(cc);
2091 break;
2092
2093 case 2:
2094 count = 2;
2095 srcw[0] = PRIVATE_DATA(cc);
2096 srcw[1] = srcw[0] + sizeof(sljit_sw);
2097 break;
2098
2099 default:
2100 SLJIT_UNREACHABLE();
2101 break;
2102 }
2103 cc += size;
2104 break;
2105
2106 default:
2107 cc = next_opcode(common, cc);
2108 SLJIT_ASSERT(cc != NULL);
2109 break;
2110 }
2111
2112 while (count > 0)
2113 {
2114 count--;
2115 if (save)
2116 {
2117 if (tmp1next)
2118 {
2119 if (!tmp1empty)
2120 {
2121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2122 stackptr += sizeof(sljit_sw);
2123 }
2124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125 tmp1empty = FALSE;
2126 tmp1next = FALSE;
2127 }
2128 else
2129 {
2130 if (!tmp2empty)
2131 {
2132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2133 stackptr += sizeof(sljit_sw);
2134 }
2135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2136 tmp2empty = FALSE;
2137 tmp1next = TRUE;
2138 }
2139 }
2140 else
2141 {
2142 if (tmp1next)
2143 {
2144 SLJIT_ASSERT(!tmp1empty);
2145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2146 tmp1empty = stackptr >= stacktop;
2147 if (!tmp1empty)
2148 {
2149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2150 stackptr += sizeof(sljit_sw);
2151 }
2152 tmp1next = FALSE;
2153 }
2154 else
2155 {
2156 SLJIT_ASSERT(!tmp2empty);
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2158 tmp2empty = stackptr >= stacktop;
2159 if (!tmp2empty)
2160 {
2161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2162 stackptr += sizeof(sljit_sw);
2163 }
2164 tmp1next = TRUE;
2165 }
2166 }
2167 }
2168 }
2169 while (status != end);
2170
2171 if (save)
2172 {
2173 if (tmp1next)
2174 {
2175 if (!tmp1empty)
2176 {
2177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2178 stackptr += sizeof(sljit_sw);
2179 }
2180 if (!tmp2empty)
2181 {
2182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2183 stackptr += sizeof(sljit_sw);
2184 }
2185 }
2186 else
2187 {
2188 if (!tmp2empty)
2189 {
2190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2191 stackptr += sizeof(sljit_sw);
2192 }
2193 if (!tmp1empty)
2194 {
2195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2196 stackptr += sizeof(sljit_sw);
2197 }
2198 }
2199 }
2200 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2201 }
2202
set_then_offsets(compiler_common * common,pcre_uchar * cc,sljit_u8 * current_offset)2203 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2204 {
2205 pcre_uchar *end = bracketend(cc);
2206 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2207
2208 /* Assert captures then. */
2209 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2210 current_offset = NULL;
2211 /* Conditional block does not. */
2212 if (*cc == OP_COND || *cc == OP_SCOND)
2213 has_alternatives = FALSE;
2214
2215 cc = next_opcode(common, cc);
2216 if (has_alternatives)
2217 current_offset = common->then_offsets + (cc - common->start);
2218
2219 while (cc < end)
2220 {
2221 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2222 cc = set_then_offsets(common, cc, current_offset);
2223 else
2224 {
2225 if (*cc == OP_ALT && has_alternatives)
2226 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2227 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2228 *current_offset = 1;
2229 cc = next_opcode(common, cc);
2230 }
2231 }
2232
2233 return end;
2234 }
2235
2236 #undef CASE_ITERATOR_PRIVATE_DATA_1
2237 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2238 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2240 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2241 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2242
is_powerof2(unsigned int value)2243 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2244 {
2245 return (value & (value - 1)) == 0;
2246 }
2247
set_jumps(jump_list * list,struct sljit_label * label)2248 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2249 {
2250 while (list)
2251 {
2252 /* sljit_set_label is clever enough to do nothing
2253 if either the jump or the label is NULL. */
2254 SET_LABEL(list->jump, label);
2255 list = list->next;
2256 }
2257 }
2258
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2259 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2260 {
2261 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2262 if (list_item)
2263 {
2264 list_item->next = *list;
2265 list_item->jump = jump;
2266 *list = list_item;
2267 }
2268 }
2269
add_stub(compiler_common * common,struct sljit_jump * start)2270 static void add_stub(compiler_common *common, struct sljit_jump *start)
2271 {
2272 DEFINE_COMPILER;
2273 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2274
2275 if (list_item)
2276 {
2277 list_item->start = start;
2278 list_item->quit = LABEL();
2279 list_item->next = common->stubs;
2280 common->stubs = list_item;
2281 }
2282 }
2283
flush_stubs(compiler_common * common)2284 static void flush_stubs(compiler_common *common)
2285 {
2286 DEFINE_COMPILER;
2287 stub_list *list_item = common->stubs;
2288
2289 while (list_item)
2290 {
2291 JUMPHERE(list_item->start);
2292 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2293 JUMPTO(SLJIT_JUMP, list_item->quit);
2294 list_item = list_item->next;
2295 }
2296 common->stubs = NULL;
2297 }
2298
add_label_addr(compiler_common * common,sljit_uw * update_addr)2299 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2300 {
2301 DEFINE_COMPILER;
2302 label_addr_list *label_addr;
2303
2304 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2305 if (label_addr == NULL)
2306 return;
2307 label_addr->label = LABEL();
2308 label_addr->update_addr = update_addr;
2309 label_addr->next = common->label_addrs;
2310 common->label_addrs = label_addr;
2311 }
2312
count_match(compiler_common * common)2313 static SLJIT_INLINE void count_match(compiler_common *common)
2314 {
2315 DEFINE_COMPILER;
2316
2317 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2318 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2319 }
2320
allocate_stack(compiler_common * common,int size)2321 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2322 {
2323 /* May destroy all locals and registers except TMP2. */
2324 DEFINE_COMPILER;
2325
2326 SLJIT_ASSERT(size > 0);
2327 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2328 #ifdef DESTROY_REGISTERS
2329 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2330 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2331 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2334 #endif
2335 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2336 }
2337
free_stack(compiler_common * common,int size)2338 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2339 {
2340 DEFINE_COMPILER;
2341
2342 SLJIT_ASSERT(size > 0);
2343 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2344 }
2345
allocate_read_only_data(compiler_common * common,sljit_uw size)2346 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2347 {
2348 DEFINE_COMPILER;
2349 sljit_uw *result;
2350
2351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2352 return NULL;
2353
2354 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2355 if (SLJIT_UNLIKELY(result == NULL))
2356 {
2357 sljit_set_compiler_memory_error(compiler);
2358 return NULL;
2359 }
2360
2361 *(void**)result = common->read_only_data_head;
2362 common->read_only_data_head = (void *)result;
2363 return result + 1;
2364 }
2365
free_read_only_data(void * current,void * allocator_data)2366 static void free_read_only_data(void *current, void *allocator_data)
2367 {
2368 void *next;
2369
2370 SLJIT_UNUSED_ARG(allocator_data);
2371
2372 while (current != NULL)
2373 {
2374 next = *(void**)current;
2375 SLJIT_FREE(current, allocator_data);
2376 current = next;
2377 }
2378 }
2379
reset_ovector(compiler_common * common,int length)2380 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2381 {
2382 DEFINE_COMPILER;
2383 struct sljit_label *loop;
2384 int i;
2385
2386 /* At this point we can freely use all temporary registers. */
2387 SLJIT_ASSERT(length > 1);
2388 /* TMP1 returns with begin - 1. */
2389 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2390 if (length < 8)
2391 {
2392 for (i = 1; i < length; i++)
2393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2394 }
2395 else
2396 {
2397 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2398 {
2399 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2400 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2401 loop = LABEL();
2402 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2403 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2404 JUMPTO(SLJIT_NOT_ZERO, loop);
2405 }
2406 else
2407 {
2408 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2409 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2410 loop = LABEL();
2411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2412 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2413 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2414 JUMPTO(SLJIT_NOT_ZERO, loop);
2415 }
2416 }
2417 }
2418
reset_fast_fail(compiler_common * common)2419 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2420 {
2421 DEFINE_COMPILER;
2422 sljit_s32 i;
2423
2424 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2425
2426 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2429 }
2430
do_reset_match(compiler_common * common,int length)2431 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2432 {
2433 DEFINE_COMPILER;
2434 struct sljit_label *loop;
2435 int i;
2436
2437 SLJIT_ASSERT(length > 1);
2438 /* OVECTOR(1) contains the "string begin - 1" constant. */
2439 if (length > 2)
2440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2441 if (length < 8)
2442 {
2443 for (i = 2; i < length; i++)
2444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2445 }
2446 else
2447 {
2448 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2449 {
2450 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2451 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2452 loop = LABEL();
2453 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2454 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2455 JUMPTO(SLJIT_NOT_ZERO, loop);
2456 }
2457 else
2458 {
2459 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2460 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2461 loop = LABEL();
2462 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2463 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2464 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2465 JUMPTO(SLJIT_NOT_ZERO, loop);
2466 }
2467 }
2468
2469 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2470 if (common->mark_ptr != 0)
2471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2472 if (common->control_head_ptr != 0)
2473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2474 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2477 }
2478
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2479 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2480 {
2481 while (current != NULL)
2482 {
2483 switch (current[1])
2484 {
2485 case type_then_trap:
2486 break;
2487
2488 case type_mark:
2489 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2490 return current[3];
2491 break;
2492
2493 default:
2494 SLJIT_UNREACHABLE();
2495 break;
2496 }
2497 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2498 current = (sljit_sw*)current[0];
2499 }
2500 return 0;
2501 }
2502
copy_ovector(compiler_common * common,int topbracket)2503 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2504 {
2505 DEFINE_COMPILER;
2506 struct sljit_label *loop;
2507 struct sljit_jump *early_quit;
2508 BOOL has_pre;
2509
2510 /* At this point we can freely use all registers. */
2511 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2513
2514 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2515 if (common->mark_ptr != 0)
2516 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2517 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2518 if (common->mark_ptr != 0)
2519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2520 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2521 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2522
2523 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2524 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2525
2526 /* Unlikely, but possible */
2527 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2528 loop = LABEL();
2529
2530 if (has_pre)
2531 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2532 else
2533 {
2534 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2535 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2536 }
2537
2538 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2539 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2540 /* Copy the integer value to the output buffer */
2541 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2542 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2543 #endif
2544
2545 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2546 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2547 JUMPTO(SLJIT_NOT_ZERO, loop);
2548 JUMPHERE(early_quit);
2549
2550 /* Calculate the return value, which is the maximum ovector value. */
2551 if (topbracket > 1)
2552 {
2553 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2554 {
2555 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2556 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2557
2558 /* OVECTOR(0) is never equal to SLJIT_S2. */
2559 loop = LABEL();
2560 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2561 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2562 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2563 }
2564 else
2565 {
2566 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2567 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2568
2569 /* OVECTOR(0) is never equal to SLJIT_S2. */
2570 loop = LABEL();
2571 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2572 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2573 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2574 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2575 }
2576 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2577 }
2578 else
2579 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2580 }
2581
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2582 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2583 {
2584 DEFINE_COMPILER;
2585 struct sljit_jump *jump;
2586
2587 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2588 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2589 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2590
2591 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2592 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2593 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2594 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2595
2596 /* Store match begin and end. */
2597 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2598 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2599
2600 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2601 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2602 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2603 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2604 #endif
2605 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2606 JUMPHERE(jump);
2607
2608 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2609 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2610 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2611 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2612 #endif
2613 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2614
2615 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2617 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2618 #endif
2619 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2620
2621 JUMPTO(SLJIT_JUMP, quit);
2622 }
2623
check_start_used_ptr(compiler_common * common)2624 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2625 {
2626 /* May destroy TMP1. */
2627 DEFINE_COMPILER;
2628 struct sljit_jump *jump;
2629
2630 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2631 {
2632 /* The value of -1 must be kept for start_used_ptr! */
2633 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2634 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2635 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2636 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2638 JUMPHERE(jump);
2639 }
2640 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2641 {
2642 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2644 JUMPHERE(jump);
2645 }
2646 }
2647
char_has_othercase(compiler_common * common,pcre_uchar * cc)2648 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2649 {
2650 /* Detects if the character has an othercase. */
2651 unsigned int c;
2652
2653 #ifdef SUPPORT_UTF
2654 if (common->utf)
2655 {
2656 GETCHAR(c, cc);
2657 if (c > 127)
2658 {
2659 #ifdef SUPPORT_UCP
2660 return c != UCD_OTHERCASE(c);
2661 #else
2662 return FALSE;
2663 #endif
2664 }
2665 #ifndef COMPILE_PCRE8
2666 return common->fcc[c] != c;
2667 #endif
2668 }
2669 else
2670 #endif
2671 c = *cc;
2672 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2673 }
2674
char_othercase(compiler_common * common,unsigned int c)2675 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2676 {
2677 /* Returns with the othercase. */
2678 #ifdef SUPPORT_UTF
2679 if (common->utf && c > 127)
2680 {
2681 #ifdef SUPPORT_UCP
2682 return UCD_OTHERCASE(c);
2683 #else
2684 return c;
2685 #endif
2686 }
2687 #endif
2688 return TABLE_GET(c, common->fcc, c);
2689 }
2690
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2691 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2692 {
2693 /* Detects if the character and its othercase has only 1 bit difference. */
2694 unsigned int c, oc, bit;
2695 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2696 int n;
2697 #endif
2698
2699 #ifdef SUPPORT_UTF
2700 if (common->utf)
2701 {
2702 GETCHAR(c, cc);
2703 if (c <= 127)
2704 oc = common->fcc[c];
2705 else
2706 {
2707 #ifdef SUPPORT_UCP
2708 oc = UCD_OTHERCASE(c);
2709 #else
2710 oc = c;
2711 #endif
2712 }
2713 }
2714 else
2715 {
2716 c = *cc;
2717 oc = TABLE_GET(c, common->fcc, c);
2718 }
2719 #else
2720 c = *cc;
2721 oc = TABLE_GET(c, common->fcc, c);
2722 #endif
2723
2724 SLJIT_ASSERT(c != oc);
2725
2726 bit = c ^ oc;
2727 /* Optimized for English alphabet. */
2728 if (c <= 127 && bit == 0x20)
2729 return (0 << 8) | 0x20;
2730
2731 /* Since c != oc, they must have at least 1 bit difference. */
2732 if (!is_powerof2(bit))
2733 return 0;
2734
2735 #if defined COMPILE_PCRE8
2736
2737 #ifdef SUPPORT_UTF
2738 if (common->utf && c > 127)
2739 {
2740 n = GET_EXTRALEN(*cc);
2741 while ((bit & 0x3f) == 0)
2742 {
2743 n--;
2744 bit >>= 6;
2745 }
2746 return (n << 8) | bit;
2747 }
2748 #endif /* SUPPORT_UTF */
2749 return (0 << 8) | bit;
2750
2751 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2752
2753 #ifdef SUPPORT_UTF
2754 if (common->utf && c > 65535)
2755 {
2756 if (bit >= (1 << 10))
2757 bit >>= 10;
2758 else
2759 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2760 }
2761 #endif /* SUPPORT_UTF */
2762 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2763
2764 #endif /* COMPILE_PCRE[8|16|32] */
2765 }
2766
check_partial(compiler_common * common,BOOL force)2767 static void check_partial(compiler_common *common, BOOL force)
2768 {
2769 /* Checks whether a partial matching is occurred. Does not modify registers. */
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump = NULL;
2772
2773 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2774
2775 if (common->mode == JIT_COMPILE)
2776 return;
2777
2778 if (!force)
2779 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2780 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2781 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2782
2783 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785 else
2786 {
2787 if (common->partialmatchlabel != NULL)
2788 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2789 else
2790 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2791 }
2792
2793 if (jump != NULL)
2794 JUMPHERE(jump);
2795 }
2796
check_str_end(compiler_common * common,jump_list ** end_reached)2797 static void check_str_end(compiler_common *common, jump_list **end_reached)
2798 {
2799 /* Does not affect registers. Usually used in a tight spot. */
2800 DEFINE_COMPILER;
2801 struct sljit_jump *jump;
2802
2803 if (common->mode == JIT_COMPILE)
2804 {
2805 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2806 return;
2807 }
2808
2809 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2810 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2811 {
2812 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2814 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2815 }
2816 else
2817 {
2818 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2819 if (common->partialmatchlabel != NULL)
2820 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2821 else
2822 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2823 }
2824 JUMPHERE(jump);
2825 }
2826
detect_partial_match(compiler_common * common,jump_list ** backtracks)2827 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2828 {
2829 DEFINE_COMPILER;
2830 struct sljit_jump *jump;
2831
2832 if (common->mode == JIT_COMPILE)
2833 {
2834 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2835 return;
2836 }
2837
2838 /* Partial matching mode. */
2839 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2840 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2841 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2842 {
2843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2844 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2845 }
2846 else
2847 {
2848 if (common->partialmatchlabel != NULL)
2849 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2850 else
2851 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2852 }
2853 JUMPHERE(jump);
2854 }
2855
peek_char(compiler_common * common,sljit_u32 max)2856 static void peek_char(compiler_common *common, sljit_u32 max)
2857 {
2858 /* Reads the character into TMP1, keeps STR_PTR.
2859 Does not check STR_END. TMP2 Destroyed. */
2860 DEFINE_COMPILER;
2861 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2862 struct sljit_jump *jump;
2863 #endif
2864
2865 SLJIT_UNUSED_ARG(max);
2866
2867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2869 if (common->utf)
2870 {
2871 if (max < 128) return;
2872
2873 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2874 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2876 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877 JUMPHERE(jump);
2878 }
2879 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2880
2881 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2882 if (common->utf)
2883 {
2884 if (max < 0xd800) return;
2885
2886 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2887 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2888 /* TMP2 contains the high surrogate. */
2889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2891 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2892 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894 JUMPHERE(jump);
2895 }
2896 #endif
2897 }
2898
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)2901 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2902 {
2903 /* Tells whether the character codes below 128 are enough
2904 to determine a match. */
2905 const sljit_u8 value = nclass ? 0xff : 0;
2906 const sljit_u8 *end = bitset + 32;
2907
2908 bitset += 16;
2909 do
2910 {
2911 if (*bitset++ != value)
2912 return FALSE;
2913 }
2914 while (bitset < end);
2915 return TRUE;
2916 }
2917
read_char7_type(compiler_common * common,BOOL full_read)2918 static void read_char7_type(compiler_common *common, BOOL full_read)
2919 {
2920 /* Reads the precise character type of a character into TMP1, if the character
2921 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2922 full_read argument tells whether characters above max are accepted or not. */
2923 DEFINE_COMPILER;
2924 struct sljit_jump *jump;
2925
2926 SLJIT_ASSERT(common->utf);
2927
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2930
2931 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2932
2933 if (full_read)
2934 {
2935 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2936 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2938 JUMPHERE(jump);
2939 }
2940 }
2941
2942 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2943
read_char_range(compiler_common * common,sljit_u32 min,sljit_u32 max,BOOL update_str_ptr)2944 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2945 {
2946 /* Reads the precise value of a character into TMP1, if the character is
2947 between min and max (c >= min && c <= max). Otherwise it returns with a value
2948 outside the range. Does not check STR_END. */
2949 DEFINE_COMPILER;
2950 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2951 struct sljit_jump *jump;
2952 #endif
2953 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2954 struct sljit_jump *jump2;
2955 #endif
2956
2957 SLJIT_UNUSED_ARG(update_str_ptr);
2958 SLJIT_UNUSED_ARG(min);
2959 SLJIT_UNUSED_ARG(max);
2960 SLJIT_ASSERT(min <= max);
2961
2962 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2964
2965 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2966 if (common->utf)
2967 {
2968 if (max < 128 && !update_str_ptr) return;
2969
2970 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2971 if (min >= 0x10000)
2972 {
2973 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2974 if (update_str_ptr)
2975 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2976 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2977 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2978 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2979 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2980 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2982 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2983 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2984 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2985 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2986 if (!update_str_ptr)
2987 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2988 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2989 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2991 JUMPHERE(jump2);
2992 if (update_str_ptr)
2993 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2994 }
2995 else if (min >= 0x800 && max <= 0xffff)
2996 {
2997 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2998 if (update_str_ptr)
2999 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3000 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3001 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3002 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3005 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006 if (!update_str_ptr)
3007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 JUMPHERE(jump2);
3012 if (update_str_ptr)
3013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3014 }
3015 else if (max >= 0x800)
3016 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3017 else if (max < 128)
3018 {
3019 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3021 }
3022 else
3023 {
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3025 if (!update_str_ptr)
3026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3027 else
3028 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3029 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3030 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3032 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3033 if (update_str_ptr)
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3035 }
3036 JUMPHERE(jump);
3037 }
3038 #endif
3039
3040 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3041 if (common->utf)
3042 {
3043 if (max >= 0x10000)
3044 {
3045 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3046 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3047 /* TMP2 contains the high surrogate. */
3048 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3049 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3051 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3053 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3054 JUMPHERE(jump);
3055 return;
3056 }
3057
3058 if (max < 0xd800 && !update_str_ptr) return;
3059
3060 /* Skip low surrogate if necessary. */
3061 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3063 if (update_str_ptr)
3064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3065 if (max >= 0xd800)
3066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3067 JUMPHERE(jump);
3068 }
3069 #endif
3070 }
3071
read_char(compiler_common * common)3072 static SLJIT_INLINE void read_char(compiler_common *common)
3073 {
3074 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3075 }
3076
read_char8_type(compiler_common * common,BOOL update_str_ptr)3077 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3078 {
3079 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3080 DEFINE_COMPILER;
3081 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3082 struct sljit_jump *jump;
3083 #endif
3084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3085 struct sljit_jump *jump2;
3086 #endif
3087
3088 SLJIT_UNUSED_ARG(update_str_ptr);
3089
3090 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3092
3093 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3094 if (common->utf)
3095 {
3096 /* This can be an extra read in some situations, but hopefully
3097 it is needed in most cases. */
3098 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3099 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3100 if (!update_str_ptr)
3101 {
3102 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3104 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3105 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3106 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3107 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3109 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3110 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3111 JUMPHERE(jump2);
3112 }
3113 else
3114 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3115 JUMPHERE(jump);
3116 return;
3117 }
3118 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3119
3120 #if !defined COMPILE_PCRE8
3121 /* The ctypes array contains only 256 values. */
3122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3123 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3124 #endif
3125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3126 #if !defined COMPILE_PCRE8
3127 JUMPHERE(jump);
3128 #endif
3129
3130 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3131 if (common->utf && update_str_ptr)
3132 {
3133 /* Skip low surrogate if necessary. */
3134 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3135 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3137 JUMPHERE(jump);
3138 }
3139 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3140 }
3141
skip_char_back(compiler_common * common)3142 static void skip_char_back(compiler_common *common)
3143 {
3144 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3145 DEFINE_COMPILER;
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 struct sljit_label *label;
3149
3150 if (common->utf)
3151 {
3152 label = LABEL();
3153 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3154 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3155 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3156 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3157 return;
3158 }
3159 #elif defined COMPILE_PCRE16
3160 if (common->utf)
3161 {
3162 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3163 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3164 /* Skip low surrogate if necessary. */
3165 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3166 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3167 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3168 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3169 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3170 return;
3171 }
3172 #endif /* COMPILE_PCRE[8|16] */
3173 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3174 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3175 }
3176
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)3177 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3178 {
3179 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3180 DEFINE_COMPILER;
3181 struct sljit_jump *jump;
3182
3183 if (nltype == NLTYPE_ANY)
3184 {
3185 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3186 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3187 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3188 }
3189 else if (nltype == NLTYPE_ANYCRLF)
3190 {
3191 if (jumpifmatch)
3192 {
3193 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3194 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3195 }
3196 else
3197 {
3198 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3199 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3200 JUMPHERE(jump);
3201 }
3202 }
3203 else
3204 {
3205 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3206 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3207 }
3208 }
3209
3210 #ifdef SUPPORT_UTF
3211
3212 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)3213 static void do_utfreadchar(compiler_common *common)
3214 {
3215 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3216 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3217 DEFINE_COMPILER;
3218 struct sljit_jump *jump;
3219
3220 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3222 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226
3227 /* Searching for the first zero. */
3228 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 jump = JUMP(SLJIT_NOT_ZERO);
3230 /* Two byte sequence. */
3231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3234
3235 JUMPHERE(jump);
3236 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3237 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3239 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3240 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3241
3242 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3243 jump = JUMP(SLJIT_NOT_ZERO);
3244 /* Three byte sequence. */
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3246 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3247 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3248
3249 /* Four byte sequence. */
3250 JUMPHERE(jump);
3251 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3252 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3253 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3255 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3256 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3258 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3259 }
3260
do_utfreadchar16(compiler_common * common)3261 static void do_utfreadchar16(compiler_common *common)
3262 {
3263 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3264 of the character (>= 0xc0). Return value in TMP1. */
3265 DEFINE_COMPILER;
3266 struct sljit_jump *jump;
3267
3268 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3269 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3271 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3272 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3273 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3274
3275 /* Searching for the first zero. */
3276 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3277 jump = JUMP(SLJIT_NOT_ZERO);
3278 /* Two byte sequence. */
3279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3280 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3281
3282 JUMPHERE(jump);
3283 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3284 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3285 /* This code runs only in 8 bit mode. No need to shift the value. */
3286 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3289 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3290 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3291 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3292 /* Three byte sequence. */
3293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296
do_utfreadtype8(compiler_common * common)3297 static void do_utfreadtype8(compiler_common *common)
3298 {
3299 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3301 DEFINE_COMPILER;
3302 struct sljit_jump *jump;
3303 struct sljit_jump *compare;
3304
3305 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3306
3307 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3308 jump = JUMP(SLJIT_NOT_ZERO);
3309 /* Two byte sequence. */
3310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3312 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3313 /* The upper 5 bits are known at this point. */
3314 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3315 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3316 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3317 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3318 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3319 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3320
3321 JUMPHERE(compare);
3322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3323 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324
3325 /* We only have types for characters less than 256. */
3326 JUMPHERE(jump);
3327 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331 }
3332
3333 #endif /* COMPILE_PCRE8 */
3334
3335 #endif /* SUPPORT_UTF */
3336
3337 #ifdef SUPPORT_UCP
3338
3339 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3340 #define UCD_BLOCK_MASK 127
3341 #define UCD_BLOCK_SHIFT 7
3342
do_getucd(compiler_common * common)3343 static void do_getucd(compiler_common *common)
3344 {
3345 /* Search the UCD record for the character comes in TMP1.
3346 Returns chartype in TMP1 and UCD offset in TMP2. */
3347 DEFINE_COMPILER;
3348 #ifdef COMPILE_PCRE32
3349 struct sljit_jump *jump;
3350 #endif
3351
3352 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3353 /* dummy_ucd_record */
3354 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3355 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3356 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3357 #endif
3358
3359 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3360
3361 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3362
3363 #ifdef COMPILE_PCRE32
3364 if (!common->utf)
3365 {
3366 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3368 JUMPHERE(jump);
3369 }
3370 #endif
3371
3372 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3373 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3374 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3375 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3378 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3381 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3382 }
3383 #endif
3384
mainloop_entry(compiler_common * common,BOOL hascrorlf)3385 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3386 {
3387 DEFINE_COMPILER;
3388 struct sljit_label *mainloop;
3389 struct sljit_label *newlinelabel = NULL;
3390 struct sljit_jump *start;
3391 struct sljit_jump *end = NULL;
3392 struct sljit_jump *end2 = NULL;
3393 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3394 struct sljit_jump *singlechar;
3395 #endif
3396 jump_list *newline = NULL;
3397 BOOL newlinecheck = FALSE;
3398 BOOL readuchar = FALSE;
3399
3400 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3401 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3402 newlinecheck = TRUE;
3403
3404 if (common->match_end_ptr != 0)
3405 {
3406 /* Search for the end of the first line. */
3407 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3408
3409 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3410 {
3411 mainloop = LABEL();
3412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3413 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3414 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3415 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3416 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3417 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3418 JUMPHERE(end);
3419 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3420 }
3421 else
3422 {
3423 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424 mainloop = LABEL();
3425 /* Continual stores does not cause data dependency. */
3426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3427 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3428 check_newlinechar(common, common->nltype, &newline, TRUE);
3429 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3430 JUMPHERE(end);
3431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3432 set_jumps(newline, LABEL());
3433 }
3434
3435 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3436 }
3437
3438 start = JUMP(SLJIT_JUMP);
3439
3440 if (newlinecheck)
3441 {
3442 newlinelabel = LABEL();
3443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3444 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3446 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3447 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3448 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3449 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3450 #endif
3451 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3452 end2 = JUMP(SLJIT_JUMP);
3453 }
3454
3455 mainloop = LABEL();
3456
3457 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3458 #ifdef SUPPORT_UTF
3459 if (common->utf) readuchar = TRUE;
3460 #endif
3461 if (newlinecheck) readuchar = TRUE;
3462
3463 if (readuchar)
3464 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3465
3466 if (newlinecheck)
3467 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3468
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3471 #if defined COMPILE_PCRE8
3472 if (common->utf)
3473 {
3474 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3475 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3477 JUMPHERE(singlechar);
3478 }
3479 #elif defined COMPILE_PCRE16
3480 if (common->utf)
3481 {
3482 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3483 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3484 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3485 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3486 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488 JUMPHERE(singlechar);
3489 }
3490 #endif /* COMPILE_PCRE[8|16] */
3491 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3492 JUMPHERE(start);
3493
3494 if (newlinecheck)
3495 {
3496 JUMPHERE(end);
3497 JUMPHERE(end2);
3498 }
3499
3500 return mainloop;
3501 }
3502
3503 #define MAX_N_CHARS 16
3504 #define MAX_DIFF_CHARS 6
3505
add_prefix_char(pcre_uchar chr,pcre_uchar * chars)3506 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3507 {
3508 pcre_uchar i, len;
3509
3510 len = chars[0];
3511 if (len == 255)
3512 return;
3513
3514 if (len == 0)
3515 {
3516 chars[0] = 1;
3517 chars[1] = chr;
3518 return;
3519 }
3520
3521 for (i = len; i > 0; i--)
3522 if (chars[i] == chr)
3523 return;
3524
3525 if (len >= MAX_DIFF_CHARS - 1)
3526 {
3527 chars[0] = 255;
3528 return;
3529 }
3530
3531 len++;
3532 chars[len] = chr;
3533 chars[0] = len;
3534 }
3535
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uchar * chars,int max_chars,sljit_u32 * rec_count)3536 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3537 {
3538 /* Recursive function, which scans prefix literals. */
3539 BOOL last, any, class, caseless;
3540 int len, repeat, len_save, consumed = 0;
3541 sljit_u32 chr; /* Any unicode character. */
3542 sljit_u8 *bytes, *bytes_end, byte;
3543 pcre_uchar *alternative, *cc_save, *oc;
3544 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3545 pcre_uchar othercase[8];
3546 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3547 pcre_uchar othercase[2];
3548 #else
3549 pcre_uchar othercase[1];
3550 #endif
3551
3552 repeat = 1;
3553 while (TRUE)
3554 {
3555 if (*rec_count == 0)
3556 return 0;
3557 (*rec_count)--;
3558
3559 last = TRUE;
3560 any = FALSE;
3561 class = FALSE;
3562 caseless = FALSE;
3563
3564 switch (*cc)
3565 {
3566 case OP_CHARI:
3567 caseless = TRUE;
3568 case OP_CHAR:
3569 last = FALSE;
3570 cc++;
3571 break;
3572
3573 case OP_SOD:
3574 case OP_SOM:
3575 case OP_SET_SOM:
3576 case OP_NOT_WORD_BOUNDARY:
3577 case OP_WORD_BOUNDARY:
3578 case OP_EODN:
3579 case OP_EOD:
3580 case OP_CIRC:
3581 case OP_CIRCM:
3582 case OP_DOLL:
3583 case OP_DOLLM:
3584 /* Zero width assertions. */
3585 cc++;
3586 continue;
3587
3588 case OP_ASSERT:
3589 case OP_ASSERT_NOT:
3590 case OP_ASSERTBACK:
3591 case OP_ASSERTBACK_NOT:
3592 cc = bracketend(cc);
3593 continue;
3594
3595 case OP_PLUSI:
3596 case OP_MINPLUSI:
3597 case OP_POSPLUSI:
3598 caseless = TRUE;
3599 case OP_PLUS:
3600 case OP_MINPLUS:
3601 case OP_POSPLUS:
3602 cc++;
3603 break;
3604
3605 case OP_EXACTI:
3606 caseless = TRUE;
3607 case OP_EXACT:
3608 repeat = GET2(cc, 1);
3609 last = FALSE;
3610 cc += 1 + IMM2_SIZE;
3611 break;
3612
3613 case OP_QUERYI:
3614 case OP_MINQUERYI:
3615 case OP_POSQUERYI:
3616 caseless = TRUE;
3617 case OP_QUERY:
3618 case OP_MINQUERY:
3619 case OP_POSQUERY:
3620 len = 1;
3621 cc++;
3622 #ifdef SUPPORT_UTF
3623 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3624 #endif
3625 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3626 if (max_chars == 0)
3627 return consumed;
3628 last = FALSE;
3629 break;
3630
3631 case OP_KET:
3632 cc += 1 + LINK_SIZE;
3633 continue;
3634
3635 case OP_ALT:
3636 cc += GET(cc, 1);
3637 continue;
3638
3639 case OP_ONCE:
3640 case OP_ONCE_NC:
3641 case OP_BRA:
3642 case OP_BRAPOS:
3643 case OP_CBRA:
3644 case OP_CBRAPOS:
3645 alternative = cc + GET(cc, 1);
3646 while (*alternative == OP_ALT)
3647 {
3648 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3649 if (max_chars == 0)
3650 return consumed;
3651 alternative += GET(alternative, 1);
3652 }
3653
3654 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3655 cc += IMM2_SIZE;
3656 cc += 1 + LINK_SIZE;
3657 continue;
3658
3659 case OP_CLASS:
3660 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3661 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3662 return consumed;
3663 #endif
3664 class = TRUE;
3665 break;
3666
3667 case OP_NCLASS:
3668 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3669 if (common->utf) return consumed;
3670 #endif
3671 class = TRUE;
3672 break;
3673
3674 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3675 case OP_XCLASS:
3676 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3677 if (common->utf) return consumed;
3678 #endif
3679 any = TRUE;
3680 cc += GET(cc, 1);
3681 break;
3682 #endif
3683
3684 case OP_DIGIT:
3685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3686 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3687 return consumed;
3688 #endif
3689 any = TRUE;
3690 cc++;
3691 break;
3692
3693 case OP_WHITESPACE:
3694 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3695 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3696 return consumed;
3697 #endif
3698 any = TRUE;
3699 cc++;
3700 break;
3701
3702 case OP_WORDCHAR:
3703 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3704 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3705 return consumed;
3706 #endif
3707 any = TRUE;
3708 cc++;
3709 break;
3710
3711 case OP_NOT:
3712 case OP_NOTI:
3713 cc++;
3714 /* Fall through. */
3715 case OP_NOT_DIGIT:
3716 case OP_NOT_WHITESPACE:
3717 case OP_NOT_WORDCHAR:
3718 case OP_ANY:
3719 case OP_ALLANY:
3720 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3721 if (common->utf) return consumed;
3722 #endif
3723 any = TRUE;
3724 cc++;
3725 break;
3726
3727 #ifdef SUPPORT_UTF
3728 case OP_NOTPROP:
3729 case OP_PROP:
3730 #ifndef COMPILE_PCRE32
3731 if (common->utf) return consumed;
3732 #endif
3733 any = TRUE;
3734 cc += 1 + 2;
3735 break;
3736 #endif
3737
3738 case OP_TYPEEXACT:
3739 repeat = GET2(cc, 1);
3740 cc += 1 + IMM2_SIZE;
3741 continue;
3742
3743 case OP_NOTEXACT:
3744 case OP_NOTEXACTI:
3745 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3746 if (common->utf) return consumed;
3747 #endif
3748 any = TRUE;
3749 repeat = GET2(cc, 1);
3750 cc += 1 + IMM2_SIZE + 1;
3751 break;
3752
3753 default:
3754 return consumed;
3755 }
3756
3757 if (any)
3758 {
3759 do
3760 {
3761 chars[0] = 255;
3762
3763 consumed++;
3764 if (--max_chars == 0)
3765 return consumed;
3766 chars += MAX_DIFF_CHARS;
3767 }
3768 while (--repeat > 0);
3769
3770 repeat = 1;
3771 continue;
3772 }
3773
3774 if (class)
3775 {
3776 bytes = (sljit_u8*) (cc + 1);
3777 cc += 1 + 32 / sizeof(pcre_uchar);
3778
3779 switch (*cc)
3780 {
3781 case OP_CRSTAR:
3782 case OP_CRMINSTAR:
3783 case OP_CRPOSSTAR:
3784 case OP_CRQUERY:
3785 case OP_CRMINQUERY:
3786 case OP_CRPOSQUERY:
3787 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3788 if (max_chars == 0)
3789 return consumed;
3790 break;
3791
3792 default:
3793 case OP_CRPLUS:
3794 case OP_CRMINPLUS:
3795 case OP_CRPOSPLUS:
3796 break;
3797
3798 case OP_CRRANGE:
3799 case OP_CRMINRANGE:
3800 case OP_CRPOSRANGE:
3801 repeat = GET2(cc, 1);
3802 if (repeat <= 0)
3803 return consumed;
3804 break;
3805 }
3806
3807 do
3808 {
3809 if (bytes[31] & 0x80)
3810 chars[0] = 255;
3811 else if (chars[0] != 255)
3812 {
3813 bytes_end = bytes + 32;
3814 chr = 0;
3815 do
3816 {
3817 byte = *bytes++;
3818 SLJIT_ASSERT((chr & 0x7) == 0);
3819 if (byte == 0)
3820 chr += 8;
3821 else
3822 {
3823 do
3824 {
3825 if ((byte & 0x1) != 0)
3826 add_prefix_char(chr, chars);
3827 byte >>= 1;
3828 chr++;
3829 }
3830 while (byte != 0);
3831 chr = (chr + 7) & ~7;
3832 }
3833 }
3834 while (chars[0] != 255 && bytes < bytes_end);
3835 bytes = bytes_end - 32;
3836 }
3837
3838 consumed++;
3839 if (--max_chars == 0)
3840 return consumed;
3841 chars += MAX_DIFF_CHARS;
3842 }
3843 while (--repeat > 0);
3844
3845 switch (*cc)
3846 {
3847 case OP_CRSTAR:
3848 case OP_CRMINSTAR:
3849 case OP_CRPOSSTAR:
3850 return consumed;
3851
3852 case OP_CRQUERY:
3853 case OP_CRMINQUERY:
3854 case OP_CRPOSQUERY:
3855 cc++;
3856 break;
3857
3858 case OP_CRRANGE:
3859 case OP_CRMINRANGE:
3860 case OP_CRPOSRANGE:
3861 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3862 return consumed;
3863 cc += 1 + 2 * IMM2_SIZE;
3864 break;
3865 }
3866
3867 repeat = 1;
3868 continue;
3869 }
3870
3871 len = 1;
3872 #ifdef SUPPORT_UTF
3873 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3874 #endif
3875
3876 if (caseless && char_has_othercase(common, cc))
3877 {
3878 #ifdef SUPPORT_UTF
3879 if (common->utf)
3880 {
3881 GETCHAR(chr, cc);
3882 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3883 return consumed;
3884 }
3885 else
3886 #endif
3887 {
3888 chr = *cc;
3889 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3890 }
3891 }
3892 else
3893 {
3894 caseless = FALSE;
3895 othercase[0] = 0; /* Stops compiler warning - PH */
3896 }
3897
3898 len_save = len;
3899 cc_save = cc;
3900 while (TRUE)
3901 {
3902 oc = othercase;
3903 do
3904 {
3905 chr = *cc;
3906 add_prefix_char(*cc, chars);
3907
3908 if (caseless)
3909 add_prefix_char(*oc, chars);
3910
3911 len--;
3912 consumed++;
3913 if (--max_chars == 0)
3914 return consumed;
3915 chars += MAX_DIFF_CHARS;
3916 cc++;
3917 oc++;
3918 }
3919 while (len > 0);
3920
3921 if (--repeat == 0)
3922 break;
3923
3924 len = len_save;
3925 cc = cc_save;
3926 }
3927
3928 repeat = 1;
3929 if (last)
3930 return consumed;
3931 }
3932 }
3933
3934 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3935
character_to_int32(pcre_uchar chr)3936 static sljit_s32 character_to_int32(pcre_uchar chr)
3937 {
3938 sljit_s32 value = (sljit_s32)chr;
3939 #if defined COMPILE_PCRE8
3940 #define SSE2_COMPARE_TYPE_INDEX 0
3941 return ((unsigned int)value << 24) | ((unsigned int)value << 16) | ((unsigned int)value << 8) | (unsigned int)value;
3942 #elif defined COMPILE_PCRE16
3943 #define SSE2_COMPARE_TYPE_INDEX 1
3944 return ((unsigned int)value << 16) | value;
3945 #elif defined COMPILE_PCRE32
3946 #define SSE2_COMPARE_TYPE_INDEX 2
3947 return value;
3948 #else
3949 #error "Unsupported unit width"
3950 #endif
3951 }
3952
fast_forward_first_char2_sse2(compiler_common * common,pcre_uchar char1,pcre_uchar char2)3953 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3954 {
3955 DEFINE_COMPILER;
3956 struct sljit_label *start;
3957 struct sljit_jump *quit[3];
3958 struct sljit_jump *nomatch;
3959 sljit_u8 instruction[8];
3960 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3961 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3962 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3963 BOOL load_twice = FALSE;
3964 pcre_uchar bit;
3965
3966 bit = char1 ^ char2;
3967 if (!is_powerof2(bit))
3968 bit = 0;
3969
3970 if ((char1 != char2) && bit == 0)
3971 load_twice = TRUE;
3972
3973 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3974
3975 /* First part (unaligned start) */
3976
3977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3978
3979 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3980
3981 /* MOVD xmm, r/m32 */
3982 instruction[0] = 0x66;
3983 instruction[1] = 0x0f;
3984 instruction[2] = 0x6e;
3985 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987
3988 if (char1 != char2)
3989 {
3990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3991
3992 /* MOVD xmm, r/m32 */
3993 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3994 sljit_emit_op_custom(compiler, instruction, 4);
3995 }
3996
3997 /* PSHUFD xmm1, xmm2/m128, imm8 */
3998 instruction[2] = 0x70;
3999 instruction[3] = 0xc0 | (2 << 3) | 2;
4000 instruction[4] = 0;
4001 sljit_emit_op_custom(compiler, instruction, 5);
4002
4003 if (char1 != char2)
4004 {
4005 /* PSHUFD xmm1, xmm2/m128, imm8 */
4006 instruction[3] = 0xc0 | (3 << 3) | 3;
4007 instruction[4] = 0;
4008 sljit_emit_op_custom(compiler, instruction, 5);
4009 }
4010
4011 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4012 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4013
4014 /* MOVDQA xmm1, xmm2/m128 */
4015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4016
4017 if (str_ptr_ind < 8)
4018 {
4019 instruction[2] = 0x6f;
4020 instruction[3] = (0 << 3) | str_ptr_ind;
4021 sljit_emit_op_custom(compiler, instruction, 4);
4022
4023 if (load_twice)
4024 {
4025 instruction[3] = (1 << 3) | str_ptr_ind;
4026 sljit_emit_op_custom(compiler, instruction, 4);
4027 }
4028 }
4029 else
4030 {
4031 instruction[1] = 0x41;
4032 instruction[2] = 0x0f;
4033 instruction[3] = 0x6f;
4034 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4035 sljit_emit_op_custom(compiler, instruction, 5);
4036
4037 if (load_twice)
4038 {
4039 instruction[4] = (1 << 3) | str_ptr_ind;
4040 sljit_emit_op_custom(compiler, instruction, 5);
4041 }
4042 instruction[1] = 0x0f;
4043 }
4044
4045 #else
4046
4047 instruction[2] = 0x6f;
4048 instruction[3] = (0 << 3) | str_ptr_ind;
4049 sljit_emit_op_custom(compiler, instruction, 4);
4050
4051 if (load_twice)
4052 {
4053 instruction[3] = (1 << 3) | str_ptr_ind;
4054 sljit_emit_op_custom(compiler, instruction, 4);
4055 }
4056
4057 #endif
4058
4059 if (bit != 0)
4060 {
4061 /* POR xmm1, xmm2/m128 */
4062 instruction[2] = 0xeb;
4063 instruction[3] = 0xc0 | (0 << 3) | 3;
4064 sljit_emit_op_custom(compiler, instruction, 4);
4065 }
4066
4067 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4068 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4069 instruction[3] = 0xc0 | (0 << 3) | 2;
4070 sljit_emit_op_custom(compiler, instruction, 4);
4071
4072 if (load_twice)
4073 {
4074 instruction[3] = 0xc0 | (1 << 3) | 3;
4075 sljit_emit_op_custom(compiler, instruction, 4);
4076 }
4077
4078 /* PMOVMSKB reg, xmm */
4079 instruction[2] = 0xd7;
4080 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4081 sljit_emit_op_custom(compiler, instruction, 4);
4082
4083 if (load_twice)
4084 {
4085 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4086 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4087 sljit_emit_op_custom(compiler, instruction, 4);
4088
4089 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4090 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4091 }
4092
4093 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4094
4095 /* BSF r32, r/m32 */
4096 instruction[0] = 0x0f;
4097 instruction[1] = 0xbc;
4098 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4099 sljit_emit_op_custom(compiler, instruction, 3);
4100 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4101
4102 nomatch = JUMP(SLJIT_ZERO);
4103
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4106 quit[1] = JUMP(SLJIT_JUMP);
4107
4108 JUMPHERE(nomatch);
4109
4110 start = LABEL();
4111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4112 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4113
4114 /* Second part (aligned) */
4115
4116 instruction[0] = 0x66;
4117 instruction[1] = 0x0f;
4118
4119 /* MOVDQA xmm1, xmm2/m128 */
4120 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4121
4122 if (str_ptr_ind < 8)
4123 {
4124 instruction[2] = 0x6f;
4125 instruction[3] = (0 << 3) | str_ptr_ind;
4126 sljit_emit_op_custom(compiler, instruction, 4);
4127
4128 if (load_twice)
4129 {
4130 instruction[3] = (1 << 3) | str_ptr_ind;
4131 sljit_emit_op_custom(compiler, instruction, 4);
4132 }
4133 }
4134 else
4135 {
4136 instruction[1] = 0x41;
4137 instruction[2] = 0x0f;
4138 instruction[3] = 0x6f;
4139 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4140 sljit_emit_op_custom(compiler, instruction, 5);
4141
4142 if (load_twice)
4143 {
4144 instruction[4] = (1 << 3) | str_ptr_ind;
4145 sljit_emit_op_custom(compiler, instruction, 5);
4146 }
4147 instruction[1] = 0x0f;
4148 }
4149
4150 #else
4151
4152 instruction[2] = 0x6f;
4153 instruction[3] = (0 << 3) | str_ptr_ind;
4154 sljit_emit_op_custom(compiler, instruction, 4);
4155
4156 if (load_twice)
4157 {
4158 instruction[3] = (1 << 3) | str_ptr_ind;
4159 sljit_emit_op_custom(compiler, instruction, 4);
4160 }
4161
4162 #endif
4163
4164 if (bit != 0)
4165 {
4166 /* POR xmm1, xmm2/m128 */
4167 instruction[2] = 0xeb;
4168 instruction[3] = 0xc0 | (0 << 3) | 3;
4169 sljit_emit_op_custom(compiler, instruction, 4);
4170 }
4171
4172 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4173 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4174 instruction[3] = 0xc0 | (0 << 3) | 2;
4175 sljit_emit_op_custom(compiler, instruction, 4);
4176
4177 if (load_twice)
4178 {
4179 instruction[3] = 0xc0 | (1 << 3) | 3;
4180 sljit_emit_op_custom(compiler, instruction, 4);
4181 }
4182
4183 /* PMOVMSKB reg, xmm */
4184 instruction[2] = 0xd7;
4185 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4186 sljit_emit_op_custom(compiler, instruction, 4);
4187
4188 if (load_twice)
4189 {
4190 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4191 sljit_emit_op_custom(compiler, instruction, 4);
4192
4193 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4194 }
4195
4196 /* BSF r32, r/m32 */
4197 instruction[0] = 0x0f;
4198 instruction[1] = 0xbc;
4199 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4200 sljit_emit_op_custom(compiler, instruction, 3);
4201 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4202
4203 JUMPTO(SLJIT_ZERO, start);
4204
4205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4206
4207 start = LABEL();
4208 SET_LABEL(quit[0], start);
4209 SET_LABEL(quit[1], start);
4210 SET_LABEL(quit[2], start);
4211 }
4212
4213 #undef SSE2_COMPARE_TYPE_INDEX
4214
4215 #endif
4216
fast_forward_first_char2(compiler_common * common,pcre_uchar char1,pcre_uchar char2,sljit_s32 offset)4217 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4218 {
4219 DEFINE_COMPILER;
4220 struct sljit_label *start;
4221 struct sljit_jump *quit;
4222 struct sljit_jump *found;
4223 pcre_uchar mask;
4224 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4225 struct sljit_label *utf_start = NULL;
4226 struct sljit_jump *utf_quit = NULL;
4227 #endif
4228 BOOL has_match_end = (common->match_end_ptr != 0);
4229
4230 if (offset > 0)
4231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4232
4233 if (has_match_end)
4234 {
4235 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4236
4237 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4238 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4239 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4240 }
4241
4242 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4243 if (common->utf && offset > 0)
4244 utf_start = LABEL();
4245 #endif
4246
4247 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4248
4249 /* SSE2 accelerated first character search. */
4250
4251 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4252 {
4253 fast_forward_first_char2_sse2(common, char1, char2);
4254
4255 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4256 if (common->mode == JIT_COMPILE)
4257 {
4258 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4259 SLJIT_ASSERT(common->forced_quit_label == NULL);
4260 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4261 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4262
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264 if (common->utf && offset > 0)
4265 {
4266 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4267
4268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4270 #if defined COMPILE_PCRE8
4271 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4272 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4273 #elif defined COMPILE_PCRE16
4274 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4275 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4276 #else
4277 #error "Unknown code width"
4278 #endif
4279 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4280 }
4281 #endif
4282
4283 if (offset > 0)
4284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4285 }
4286 else
4287 {
4288 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4289 if (has_match_end)
4290 {
4291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4292 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4293 }
4294 else
4295 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4296 }
4297
4298 if (has_match_end)
4299 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4300 return;
4301 }
4302
4303 #endif
4304
4305 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4306
4307 start = LABEL();
4308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4309
4310 if (char1 == char2)
4311 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4312 else
4313 {
4314 mask = char1 ^ char2;
4315 if (is_powerof2(mask))
4316 {
4317 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4318 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4319 }
4320 else
4321 {
4322 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4323 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4324 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4325 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4326 found = JUMP(SLJIT_NOT_ZERO);
4327 }
4328 }
4329
4330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4331 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4332
4333 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4334 if (common->utf && offset > 0)
4335 utf_quit = JUMP(SLJIT_JUMP);
4336 #endif
4337
4338 JUMPHERE(found);
4339
4340 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4341 if (common->utf && offset > 0)
4342 {
4343 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4344 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4345 #if defined COMPILE_PCRE8
4346 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4347 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4348 #elif defined COMPILE_PCRE16
4349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4350 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4351 #else
4352 #error "Unknown code width"
4353 #endif
4354 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355 JUMPHERE(utf_quit);
4356 }
4357 #endif
4358
4359 JUMPHERE(quit);
4360
4361 if (has_match_end)
4362 {
4363 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4364 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4365 if (offset > 0)
4366 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4367 JUMPHERE(quit);
4368 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4369 }
4370
4371 if (offset > 0)
4372 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4373 }
4374
fast_forward_first_n_chars(compiler_common * common)4375 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4376 {
4377 DEFINE_COMPILER;
4378 struct sljit_label *start;
4379 struct sljit_jump *quit;
4380 struct sljit_jump *match;
4381 /* bytes[0] represent the number of characters between 0
4382 and MAX_N_BYTES - 1, 255 represents any character. */
4383 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4384 sljit_s32 offset;
4385 pcre_uchar mask;
4386 pcre_uchar *char_set, *char_set_end;
4387 int i, max, from;
4388 int range_right = -1, range_len;
4389 sljit_u8 *update_table = NULL;
4390 BOOL in_range;
4391 sljit_u32 rec_count;
4392
4393 for (i = 0; i < MAX_N_CHARS; i++)
4394 chars[i * MAX_DIFF_CHARS] = 0;
4395
4396 rec_count = 10000;
4397 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4398
4399 if (max < 1)
4400 return FALSE;
4401
4402 in_range = FALSE;
4403 /* Prevent compiler "uninitialized" warning */
4404 from = 0;
4405 range_len = 4 /* minimum length */ - 1;
4406 for (i = 0; i <= max; i++)
4407 {
4408 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4409 {
4410 range_len = i - from;
4411 range_right = i - 1;
4412 }
4413
4414 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4415 {
4416 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4417 if (!in_range)
4418 {
4419 in_range = TRUE;
4420 from = i;
4421 }
4422 }
4423 else
4424 in_range = FALSE;
4425 }
4426
4427 if (range_right >= 0)
4428 {
4429 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4430 if (update_table == NULL)
4431 return TRUE;
4432 memset(update_table, IN_UCHARS(range_len), 256);
4433
4434 for (i = 0; i < range_len; i++)
4435 {
4436 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4437 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4438 char_set_end = char_set + char_set[0];
4439 char_set++;
4440 while (char_set <= char_set_end)
4441 {
4442 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4443 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4444 char_set++;
4445 }
4446 }
4447 }
4448
4449 offset = -1;
4450 /* Scan forward. */
4451 for (i = 0; i < max; i++)
4452 {
4453 if (offset == -1)
4454 {
4455 if (chars[i * MAX_DIFF_CHARS] <= 2)
4456 offset = i;
4457 }
4458 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4459 {
4460 if (chars[i * MAX_DIFF_CHARS] == 1)
4461 offset = i;
4462 else
4463 {
4464 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4465 if (!is_powerof2(mask))
4466 {
4467 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4468 if (is_powerof2(mask))
4469 offset = i;
4470 }
4471 }
4472 }
4473 }
4474
4475 if (range_right < 0)
4476 {
4477 if (offset < 0)
4478 return FALSE;
4479 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4480 /* Works regardless the value is 1 or 2. */
4481 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4482 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4483 return TRUE;
4484 }
4485
4486 if (range_right == offset)
4487 offset = -1;
4488
4489 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4490
4491 max -= 1;
4492 SLJIT_ASSERT(max > 0);
4493 if (common->match_end_ptr != 0)
4494 {
4495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4496 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4497 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4498 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4499 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4500 JUMPHERE(quit);
4501 }
4502 else
4503 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4504
4505 SLJIT_ASSERT(range_right >= 0);
4506
4507 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4508 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4509 #endif
4510
4511 start = LABEL();
4512 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4513
4514 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4515 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4516 #else
4517 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4518 #endif
4519
4520 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4521 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4522 #else
4523 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4524 #endif
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4526 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4527
4528 if (offset >= 0)
4529 {
4530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4532
4533 if (chars[offset * MAX_DIFF_CHARS] == 1)
4534 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4535 else
4536 {
4537 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4538 if (is_powerof2(mask))
4539 {
4540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4541 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4542 }
4543 else
4544 {
4545 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4546 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4547 JUMPHERE(match);
4548 }
4549 }
4550 }
4551
4552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4553 if (common->utf && offset != 0)
4554 {
4555 if (offset < 0)
4556 {
4557 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4558 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559 }
4560 else
4561 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 #if defined COMPILE_PCRE8
4563 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4564 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4565 #elif defined COMPILE_PCRE16
4566 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4567 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4568 #else
4569 #error "Unknown code width"
4570 #endif
4571 if (offset < 0)
4572 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4573 }
4574 #endif
4575
4576 if (offset >= 0)
4577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4578
4579 JUMPHERE(quit);
4580
4581 if (common->match_end_ptr != 0)
4582 {
4583 if (range_right >= 0)
4584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4586 if (range_right >= 0)
4587 {
4588 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4589 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4590 JUMPHERE(quit);
4591 }
4592 }
4593 else
4594 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4595 return TRUE;
4596 }
4597
4598 #undef MAX_N_CHARS
4599 #undef MAX_DIFF_CHARS
4600
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless)4601 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4602 {
4603 pcre_uchar oc;
4604
4605 oc = first_char;
4606 if (caseless)
4607 {
4608 oc = TABLE_GET(first_char, common->fcc, first_char);
4609 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4610 if (first_char > 127 && common->utf)
4611 oc = UCD_OTHERCASE(first_char);
4612 #endif
4613 }
4614
4615 fast_forward_first_char2(common, first_char, oc, 0);
4616 }
4617
fast_forward_newline(compiler_common * common)4618 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4619 {
4620 DEFINE_COMPILER;
4621 struct sljit_label *loop;
4622 struct sljit_jump *lastchar;
4623 struct sljit_jump *firstchar;
4624 struct sljit_jump *quit;
4625 struct sljit_jump *foundcr = NULL;
4626 struct sljit_jump *notfoundnl;
4627 jump_list *newline = NULL;
4628
4629 if (common->match_end_ptr != 0)
4630 {
4631 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4632 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4633 }
4634
4635 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4636 {
4637 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4638 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4641 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4642
4643 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4644 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4645 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4648 #endif
4649 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4650
4651 loop = LABEL();
4652 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4653 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4656 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4657 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4658
4659 JUMPHERE(quit);
4660 JUMPHERE(firstchar);
4661 JUMPHERE(lastchar);
4662
4663 if (common->match_end_ptr != 0)
4664 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4665 return;
4666 }
4667
4668 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4670 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4671 skip_char_back(common);
4672
4673 loop = LABEL();
4674 common->ff_newline_shortcut = loop;
4675
4676 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4677 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4679 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4680 check_newlinechar(common, common->nltype, &newline, FALSE);
4681 set_jumps(newline, loop);
4682
4683 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4684 {
4685 quit = JUMP(SLJIT_JUMP);
4686 JUMPHERE(foundcr);
4687 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4689 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4690 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4692 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4693 #endif
4694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4695 JUMPHERE(notfoundnl);
4696 JUMPHERE(quit);
4697 }
4698 JUMPHERE(lastchar);
4699 JUMPHERE(firstchar);
4700
4701 if (common->match_end_ptr != 0)
4702 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4703 }
4704
4705 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4706
fast_forward_start_bits(compiler_common * common,const sljit_u8 * start_bits)4707 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4708 {
4709 DEFINE_COMPILER;
4710 struct sljit_label *start;
4711 struct sljit_jump *quit;
4712 struct sljit_jump *found = NULL;
4713 jump_list *matches = NULL;
4714 #ifndef COMPILE_PCRE8
4715 struct sljit_jump *jump;
4716 #endif
4717
4718 if (common->match_end_ptr != 0)
4719 {
4720 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4721 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4722 }
4723
4724 start = LABEL();
4725 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4727 #ifdef SUPPORT_UTF
4728 if (common->utf)
4729 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4730 #endif
4731
4732 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4733 {
4734 #ifndef COMPILE_PCRE8
4735 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4737 JUMPHERE(jump);
4738 #endif
4739 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4740 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4741 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4742 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4743 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4744 found = JUMP(SLJIT_NOT_ZERO);
4745 }
4746
4747 #ifdef SUPPORT_UTF
4748 if (common->utf)
4749 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4750 #endif
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4752 #ifdef SUPPORT_UTF
4753 #if defined COMPILE_PCRE8
4754 if (common->utf)
4755 {
4756 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4757 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4758 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4759 }
4760 #elif defined COMPILE_PCRE16
4761 if (common->utf)
4762 {
4763 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4764 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4765 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4766 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4767 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4768 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4769 }
4770 #endif /* COMPILE_PCRE[8|16] */
4771 #endif /* SUPPORT_UTF */
4772 JUMPTO(SLJIT_JUMP, start);
4773 if (found != NULL)
4774 JUMPHERE(found);
4775 if (matches != NULL)
4776 set_jumps(matches, LABEL());
4777 JUMPHERE(quit);
4778
4779 if (common->match_end_ptr != 0)
4780 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4781 }
4782
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4783 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4784 {
4785 DEFINE_COMPILER;
4786 struct sljit_label *loop;
4787 struct sljit_jump *toolong;
4788 struct sljit_jump *alreadyfound;
4789 struct sljit_jump *found;
4790 struct sljit_jump *foundoc = NULL;
4791 struct sljit_jump *notfound;
4792 sljit_u32 oc, bit;
4793
4794 SLJIT_ASSERT(common->req_char_ptr != 0);
4795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4796 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4797 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4798 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4799
4800 if (has_firstchar)
4801 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4802 else
4803 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4804
4805 loop = LABEL();
4806 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4807
4808 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4809 oc = req_char;
4810 if (caseless)
4811 {
4812 oc = TABLE_GET(req_char, common->fcc, req_char);
4813 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4814 if (req_char > 127 && common->utf)
4815 oc = UCD_OTHERCASE(req_char);
4816 #endif
4817 }
4818 if (req_char == oc)
4819 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4820 else
4821 {
4822 bit = req_char ^ oc;
4823 if (is_powerof2(bit))
4824 {
4825 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4826 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4827 }
4828 else
4829 {
4830 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4831 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4832 }
4833 }
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4835 JUMPTO(SLJIT_JUMP, loop);
4836
4837 JUMPHERE(found);
4838 if (foundoc)
4839 JUMPHERE(foundoc);
4840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4841 JUMPHERE(alreadyfound);
4842 JUMPHERE(toolong);
4843 return notfound;
4844 }
4845
do_revertframes(compiler_common * common)4846 static void do_revertframes(compiler_common *common)
4847 {
4848 DEFINE_COMPILER;
4849 struct sljit_jump *jump;
4850 struct sljit_label *mainloop;
4851
4852 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4853 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4854 GET_LOCAL_BASE(TMP1, 0, 0);
4855
4856 /* Drop frames until we reach STACK_TOP. */
4857 mainloop = LABEL();
4858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4859 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4860
4861 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4863 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4864 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4865 JUMPTO(SLJIT_JUMP, mainloop);
4866
4867 JUMPHERE(jump);
4868 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4869 /* End of reverting values. */
4870 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4871 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4872
4873 JUMPHERE(jump);
4874 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4875 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4876 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4877 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4878 JUMPTO(SLJIT_JUMP, mainloop);
4879 }
4880
check_wordboundary(compiler_common * common)4881 static void check_wordboundary(compiler_common *common)
4882 {
4883 DEFINE_COMPILER;
4884 struct sljit_jump *skipread;
4885 jump_list *skipread_list = NULL;
4886 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4887 struct sljit_jump *jump;
4888 #endif
4889
4890 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4891
4892 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4893 /* Get type of the previous char, and put it to LOCALS1. */
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4897 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4898 skip_char_back(common);
4899 check_start_used_ptr(common);
4900 read_char(common);
4901
4902 /* Testing char type. */
4903 #ifdef SUPPORT_UCP
4904 if (common->use_ucp)
4905 {
4906 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4907 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4908 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4909 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4910 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4911 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4913 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4914 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4915 JUMPHERE(jump);
4916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4917 }
4918 else
4919 #endif
4920 {
4921 #ifndef COMPILE_PCRE8
4922 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4923 #elif defined SUPPORT_UTF
4924 /* Here LOCALS1 has already been zeroed. */
4925 jump = NULL;
4926 if (common->utf)
4927 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4928 #endif /* COMPILE_PCRE8 */
4929 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4930 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4933 #ifndef COMPILE_PCRE8
4934 JUMPHERE(jump);
4935 #elif defined SUPPORT_UTF
4936 if (jump != NULL)
4937 JUMPHERE(jump);
4938 #endif /* COMPILE_PCRE8 */
4939 }
4940 JUMPHERE(skipread);
4941
4942 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4943 check_str_end(common, &skipread_list);
4944 peek_char(common, READ_CHAR_MAX);
4945
4946 /* Testing char type. This is a code duplication. */
4947 #ifdef SUPPORT_UCP
4948 if (common->use_ucp)
4949 {
4950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4951 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4952 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4953 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4954 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4955 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4956 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4957 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4958 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4959 JUMPHERE(jump);
4960 }
4961 else
4962 #endif
4963 {
4964 #ifndef COMPILE_PCRE8
4965 /* TMP2 may be destroyed by peek_char. */
4966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4967 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4968 #elif defined SUPPORT_UTF
4969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4970 jump = NULL;
4971 if (common->utf)
4972 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4973 #endif
4974 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4975 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4976 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977 #ifndef COMPILE_PCRE8
4978 JUMPHERE(jump);
4979 #elif defined SUPPORT_UTF
4980 if (jump != NULL)
4981 JUMPHERE(jump);
4982 #endif /* COMPILE_PCRE8 */
4983 }
4984 set_jumps(skipread_list, LABEL());
4985
4986 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4987 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4988 }
4989
check_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4990 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4991 {
4992 /* May destroy TMP1. */
4993 DEFINE_COMPILER;
4994 int ranges[MAX_RANGE_SIZE];
4995 sljit_u8 bit, cbit, all;
4996 int i, byte, length = 0;
4997
4998 bit = bits[0] & 0x1;
4999 /* All bits will be zero or one (since bit is zero or one). */
5000 all = -bit;
5001
5002 for (i = 0; i < 256; )
5003 {
5004 byte = i >> 3;
5005 if ((i & 0x7) == 0 && bits[byte] == all)
5006 i += 8;
5007 else
5008 {
5009 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5010 if (cbit != bit)
5011 {
5012 if (length >= MAX_RANGE_SIZE)
5013 return FALSE;
5014 ranges[length] = i;
5015 length++;
5016 bit = cbit;
5017 all = -cbit;
5018 }
5019 i++;
5020 }
5021 }
5022
5023 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5024 {
5025 if (length >= MAX_RANGE_SIZE)
5026 return FALSE;
5027 ranges[length] = 256;
5028 length++;
5029 }
5030
5031 if (length < 0 || length > 4)
5032 return FALSE;
5033
5034 bit = bits[0] & 0x1;
5035 if (invert) bit ^= 0x1;
5036
5037 /* No character is accepted. */
5038 if (length == 0 && bit == 0)
5039 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5040
5041 switch(length)
5042 {
5043 case 0:
5044 /* When bit != 0, all characters are accepted. */
5045 return TRUE;
5046
5047 case 1:
5048 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5049 return TRUE;
5050
5051 case 2:
5052 if (ranges[0] + 1 != ranges[1])
5053 {
5054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5055 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5056 }
5057 else
5058 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5059 return TRUE;
5060
5061 case 3:
5062 if (bit != 0)
5063 {
5064 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5065 if (ranges[0] + 1 != ranges[1])
5066 {
5067 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5068 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5069 }
5070 else
5071 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5072 return TRUE;
5073 }
5074
5075 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5076 if (ranges[1] + 1 != ranges[2])
5077 {
5078 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5079 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5080 }
5081 else
5082 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5083 return TRUE;
5084
5085 case 4:
5086 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5087 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5088 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5089 && is_powerof2(ranges[2] - ranges[0]))
5090 {
5091 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5092 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5093 if (ranges[2] + 1 != ranges[3])
5094 {
5095 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5096 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5097 }
5098 else
5099 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5100 return TRUE;
5101 }
5102
5103 if (bit != 0)
5104 {
5105 i = 0;
5106 if (ranges[0] + 1 != ranges[1])
5107 {
5108 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5109 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5110 i = ranges[0];
5111 }
5112 else
5113 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5114
5115 if (ranges[2] + 1 != ranges[3])
5116 {
5117 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5118 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5119 }
5120 else
5121 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5122 return TRUE;
5123 }
5124
5125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5126 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5127 if (ranges[1] + 1 != ranges[2])
5128 {
5129 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5130 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5131 }
5132 else
5133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5134 return TRUE;
5135
5136 default:
5137 SLJIT_UNREACHABLE();
5138 return FALSE;
5139 }
5140 }
5141
check_anynewline(compiler_common * common)5142 static void check_anynewline(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156 {
5157 #endif
5158 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162 }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5167 }
5168
check_hspace(compiler_common * common)5169 static void check_hspace(compiler_common *common)
5170 {
5171 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5172 DEFINE_COMPILER;
5173
5174 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5175
5176 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5178 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5180 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5181 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5182 #ifdef COMPILE_PCRE8
5183 if (common->utf)
5184 {
5185 #endif
5186 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5187 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5188 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5189 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5190 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5192 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5193 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5194 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5195 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5196 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5197 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5198 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5199 #ifdef COMPILE_PCRE8
5200 }
5201 #endif
5202 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5204
5205 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5206 }
5207
check_vspace(compiler_common * common)5208 static void check_vspace(compiler_common *common)
5209 {
5210 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5211 DEFINE_COMPILER;
5212
5213 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5214
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5216 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5218 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5219 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5220 #ifdef COMPILE_PCRE8
5221 if (common->utf)
5222 {
5223 #endif
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5226 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5227 #ifdef COMPILE_PCRE8
5228 }
5229 #endif
5230 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5231 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5232
5233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5234 }
5235
do_casefulcmp(compiler_common * common)5236 static void do_casefulcmp(compiler_common *common)
5237 {
5238 DEFINE_COMPILER;
5239 struct sljit_jump *jump;
5240 struct sljit_label *label;
5241 int char1_reg;
5242 int char2_reg;
5243
5244 if (sljit_get_register_index(TMP3) < 0)
5245 {
5246 char1_reg = STR_END;
5247 char2_reg = STACK_TOP;
5248 }
5249 else
5250 {
5251 char1_reg = TMP3;
5252 char2_reg = RETURN_ADDR;
5253 }
5254
5255 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5256 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5257
5258 if (char1_reg == STR_END)
5259 {
5260 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5261 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5262 }
5263
5264 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5265 {
5266 label = LABEL();
5267 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5268 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5269 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5270 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5271 JUMPTO(SLJIT_NOT_ZERO, label);
5272
5273 JUMPHERE(jump);
5274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5275 }
5276 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5277 {
5278 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5279 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5280
5281 label = LABEL();
5282 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5283 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5284 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5285 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5286 JUMPTO(SLJIT_NOT_ZERO, label);
5287
5288 JUMPHERE(jump);
5289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291 }
5292 else
5293 {
5294 label = LABEL();
5295 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5296 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5297 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5298 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5299 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5300 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5301 JUMPTO(SLJIT_NOT_ZERO, label);
5302
5303 JUMPHERE(jump);
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5305 }
5306
5307 if (char1_reg == STR_END)
5308 {
5309 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5310 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5311 }
5312
5313 sljit_emit_fast_return(compiler, TMP1, 0);
5314 }
5315
do_caselesscmp(compiler_common * common)5316 static void do_caselesscmp(compiler_common *common)
5317 {
5318 DEFINE_COMPILER;
5319 struct sljit_jump *jump;
5320 struct sljit_label *label;
5321 int char1_reg = STR_END;
5322 int char2_reg;
5323 int lcc_table;
5324 int opt_type = 0;
5325
5326 if (sljit_get_register_index(TMP3) < 0)
5327 {
5328 char2_reg = STACK_TOP;
5329 lcc_table = STACK_LIMIT;
5330 }
5331 else
5332 {
5333 char2_reg = RETURN_ADDR;
5334 lcc_table = TMP3;
5335 }
5336
5337 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5338 opt_type = 1;
5339 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5340 opt_type = 2;
5341
5342 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5343 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5344
5345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5346
5347 if (char2_reg == STACK_TOP)
5348 {
5349 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5350 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5351 }
5352
5353 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5354
5355 if (opt_type == 1)
5356 {
5357 label = LABEL();
5358 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5359 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5360 }
5361 else if (opt_type == 2)
5362 {
5363 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5364 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5365
5366 label = LABEL();
5367 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5368 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5369 }
5370 else
5371 {
5372 label = LABEL();
5373 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5374 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5375 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5376 }
5377
5378 #ifndef COMPILE_PCRE8
5379 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5380 #endif
5381 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5382 #ifndef COMPILE_PCRE8
5383 JUMPHERE(jump);
5384 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5385 #endif
5386 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5387 #ifndef COMPILE_PCRE8
5388 JUMPHERE(jump);
5389 #endif
5390
5391 if (opt_type == 0)
5392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5393
5394 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5395 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5396 JUMPTO(SLJIT_NOT_ZERO, label);
5397
5398 JUMPHERE(jump);
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5400
5401 if (opt_type == 2)
5402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5403
5404 if (char2_reg == STACK_TOP)
5405 {
5406 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5407 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5408 }
5409
5410 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5411 sljit_emit_fast_return(compiler, TMP1, 0);
5412 }
5413
5414 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5415
do_utf_caselesscmp(pcre_uchar * src1,pcre_uchar * src2,pcre_uchar * end1,pcre_uchar * end2)5416 static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5417 {
5418 /* This function would be ineffective to do in JIT level. */
5419 sljit_u32 c1, c2;
5420 const ucd_record *ur;
5421 const sljit_u32 *pp;
5422
5423 while (src1 < end1)
5424 {
5425 if (src2 >= end2)
5426 return (pcre_uchar*)1;
5427 GETCHARINC(c1, src1);
5428 GETCHARINC(c2, src2);
5429 ur = GET_UCD(c2);
5430 if (c1 != c2 && c1 != c2 + ur->other_case)
5431 {
5432 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5433 for (;;)
5434 {
5435 if (c1 < *pp) return NULL;
5436 if (c1 == *pp++) break;
5437 }
5438 }
5439 }
5440 return src2;
5441 }
5442
5443 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5444
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)5445 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5446 compare_context *context, jump_list **backtracks)
5447 {
5448 DEFINE_COMPILER;
5449 unsigned int othercasebit = 0;
5450 pcre_uchar *othercasechar = NULL;
5451 #ifdef SUPPORT_UTF
5452 int utflength;
5453 #endif
5454
5455 if (caseless && char_has_othercase(common, cc))
5456 {
5457 othercasebit = char_get_othercase_bit(common, cc);
5458 SLJIT_ASSERT(othercasebit);
5459 /* Extracting bit difference info. */
5460 #if defined COMPILE_PCRE8
5461 othercasechar = cc + (othercasebit >> 8);
5462 othercasebit &= 0xff;
5463 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5464 /* Note that this code only handles characters in the BMP. If there
5465 ever are characters outside the BMP whose othercase differs in only one
5466 bit from itself (there currently are none), this code will need to be
5467 revised for COMPILE_PCRE32. */
5468 othercasechar = cc + (othercasebit >> 9);
5469 if ((othercasebit & 0x100) != 0)
5470 othercasebit = (othercasebit & 0xff) << 8;
5471 else
5472 othercasebit &= 0xff;
5473 #endif /* COMPILE_PCRE[8|16|32] */
5474 }
5475
5476 if (context->sourcereg == -1)
5477 {
5478 #if defined COMPILE_PCRE8
5479 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5480 if (context->length >= 4)
5481 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5482 else if (context->length >= 2)
5483 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5484 else
5485 #endif
5486 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5487 #elif defined COMPILE_PCRE16
5488 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5489 if (context->length >= 4)
5490 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5491 else
5492 #endif
5493 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5494 #elif defined COMPILE_PCRE32
5495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5496 #endif /* COMPILE_PCRE[8|16|32] */
5497 context->sourcereg = TMP2;
5498 }
5499
5500 #ifdef SUPPORT_UTF
5501 utflength = 1;
5502 if (common->utf && HAS_EXTRALEN(*cc))
5503 utflength += GET_EXTRALEN(*cc);
5504
5505 do
5506 {
5507 #endif
5508
5509 context->length -= IN_UCHARS(1);
5510 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5511
5512 /* Unaligned read is supported. */
5513 if (othercasebit != 0 && othercasechar == cc)
5514 {
5515 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5516 context->oc.asuchars[context->ucharptr] = othercasebit;
5517 }
5518 else
5519 {
5520 context->c.asuchars[context->ucharptr] = *cc;
5521 context->oc.asuchars[context->ucharptr] = 0;
5522 }
5523 context->ucharptr++;
5524
5525 #if defined COMPILE_PCRE8
5526 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5527 #else
5528 if (context->ucharptr >= 2 || context->length == 0)
5529 #endif
5530 {
5531 if (context->length >= 4)
5532 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5533 else if (context->length >= 2)
5534 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5535 #if defined COMPILE_PCRE8
5536 else if (context->length >= 1)
5537 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5538 #endif /* COMPILE_PCRE8 */
5539 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5540
5541 switch(context->ucharptr)
5542 {
5543 case 4 / sizeof(pcre_uchar):
5544 if (context->oc.asint != 0)
5545 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5546 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5547 break;
5548
5549 case 2 / sizeof(pcre_uchar):
5550 if (context->oc.asushort != 0)
5551 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5552 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5553 break;
5554
5555 #ifdef COMPILE_PCRE8
5556 case 1:
5557 if (context->oc.asbyte != 0)
5558 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5559 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5560 break;
5561 #endif
5562
5563 default:
5564 SLJIT_UNREACHABLE();
5565 break;
5566 }
5567 context->ucharptr = 0;
5568 }
5569
5570 #else
5571
5572 /* Unaligned read is unsupported or in 32 bit mode. */
5573 if (context->length >= 1)
5574 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5575
5576 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5577
5578 if (othercasebit != 0 && othercasechar == cc)
5579 {
5580 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5581 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5582 }
5583 else
5584 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5585
5586 #endif
5587
5588 cc++;
5589 #ifdef SUPPORT_UTF
5590 utflength--;
5591 }
5592 while (utflength > 0);
5593 #endif
5594
5595 return cc;
5596 }
5597
5598 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5599
5600 #define SET_TYPE_OFFSET(value) \
5601 if ((value) != typeoffset) \
5602 { \
5603 if ((value) < typeoffset) \
5604 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5605 else \
5606 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5607 } \
5608 typeoffset = (value);
5609
5610 #define SET_CHAR_OFFSET(value) \
5611 if ((value) != charoffset) \
5612 { \
5613 if ((value) < charoffset) \
5614 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5615 else \
5616 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5617 } \
5618 charoffset = (value);
5619
5620 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5621
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5622 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5623 {
5624 DEFINE_COMPILER;
5625 jump_list *found = NULL;
5626 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5627 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5628 struct sljit_jump *jump = NULL;
5629 pcre_uchar *ccbegin;
5630 int compares, invertcmp, numberofcmps;
5631 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5632 BOOL utf = common->utf;
5633 #endif
5634
5635 #ifdef SUPPORT_UCP
5636 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5637 BOOL charsaved = FALSE;
5638 int typereg = TMP1;
5639 const sljit_u32 *other_cases;
5640 sljit_uw typeoffset;
5641 #endif
5642
5643 /* Scanning the necessary info. */
5644 cc++;
5645 ccbegin = cc;
5646 compares = 0;
5647 if (cc[-1] & XCL_MAP)
5648 {
5649 min = 0;
5650 cc += 32 / sizeof(pcre_uchar);
5651 }
5652
5653 while (*cc != XCL_END)
5654 {
5655 compares++;
5656 if (*cc == XCL_SINGLE)
5657 {
5658 cc ++;
5659 GETCHARINCTEST(c, cc);
5660 if (c > max) max = c;
5661 if (c < min) min = c;
5662 #ifdef SUPPORT_UCP
5663 needschar = TRUE;
5664 #endif
5665 }
5666 else if (*cc == XCL_RANGE)
5667 {
5668 cc ++;
5669 GETCHARINCTEST(c, cc);
5670 if (c < min) min = c;
5671 GETCHARINCTEST(c, cc);
5672 if (c > max) max = c;
5673 #ifdef SUPPORT_UCP
5674 needschar = TRUE;
5675 #endif
5676 }
5677 #ifdef SUPPORT_UCP
5678 else
5679 {
5680 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5681 cc++;
5682 if (*cc == PT_CLIST)
5683 {
5684 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5685 while (*other_cases != NOTACHAR)
5686 {
5687 if (*other_cases > max) max = *other_cases;
5688 if (*other_cases < min) min = *other_cases;
5689 other_cases++;
5690 }
5691 }
5692 else
5693 {
5694 max = READ_CHAR_MAX;
5695 min = 0;
5696 }
5697
5698 switch(*cc)
5699 {
5700 case PT_ANY:
5701 /* Any either accepts everything or ignored. */
5702 if (cc[-1] == XCL_PROP)
5703 {
5704 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5705 if (list == backtracks)
5706 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5707 return;
5708 }
5709 break;
5710
5711 case PT_LAMP:
5712 case PT_GC:
5713 case PT_PC:
5714 case PT_ALNUM:
5715 needstype = TRUE;
5716 break;
5717
5718 case PT_SC:
5719 needsscript = TRUE;
5720 break;
5721
5722 case PT_SPACE:
5723 case PT_PXSPACE:
5724 case PT_WORD:
5725 case PT_PXGRAPH:
5726 case PT_PXPRINT:
5727 case PT_PXPUNCT:
5728 needstype = TRUE;
5729 needschar = TRUE;
5730 break;
5731
5732 case PT_CLIST:
5733 case PT_UCNC:
5734 needschar = TRUE;
5735 break;
5736
5737 default:
5738 SLJIT_UNREACHABLE();
5739 break;
5740 }
5741 cc += 2;
5742 }
5743 #endif
5744 }
5745 SLJIT_ASSERT(compares > 0);
5746
5747 /* We are not necessary in utf mode even in 8 bit mode. */
5748 cc = ccbegin;
5749 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5750
5751 if ((cc[-1] & XCL_HASPROP) == 0)
5752 {
5753 if ((cc[-1] & XCL_MAP) != 0)
5754 {
5755 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5756 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5757 {
5758 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5764 }
5765
5766 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5767 JUMPHERE(jump);
5768
5769 cc += 32 / sizeof(pcre_uchar);
5770 }
5771 else
5772 {
5773 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5774 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5775 }
5776 }
5777 else if ((cc[-1] & XCL_MAP) != 0)
5778 {
5779 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5780 #ifdef SUPPORT_UCP
5781 charsaved = TRUE;
5782 #endif
5783 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5784 {
5785 #ifdef COMPILE_PCRE8
5786 jump = NULL;
5787 if (common->utf)
5788 #endif
5789 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5790
5791 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5792 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5793 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5794 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5795 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5796 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5797
5798 #ifdef COMPILE_PCRE8
5799 if (common->utf)
5800 #endif
5801 JUMPHERE(jump);
5802 }
5803
5804 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5805 cc += 32 / sizeof(pcre_uchar);
5806 }
5807
5808 #ifdef SUPPORT_UCP
5809 if (needstype || needsscript)
5810 {
5811 if (needschar && !charsaved)
5812 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5813
5814 #ifdef COMPILE_PCRE32
5815 if (!common->utf)
5816 {
5817 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5819 JUMPHERE(jump);
5820 }
5821 #endif
5822
5823 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5824 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5826 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5827 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5828 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5829 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5830
5831 /* Before anything else, we deal with scripts. */
5832 if (needsscript)
5833 {
5834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5835 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5836
5837 ccbegin = cc;
5838
5839 while (*cc != XCL_END)
5840 {
5841 if (*cc == XCL_SINGLE)
5842 {
5843 cc ++;
5844 GETCHARINCTEST(c, cc);
5845 }
5846 else if (*cc == XCL_RANGE)
5847 {
5848 cc ++;
5849 GETCHARINCTEST(c, cc);
5850 GETCHARINCTEST(c, cc);
5851 }
5852 else
5853 {
5854 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5855 cc++;
5856 if (*cc == PT_SC)
5857 {
5858 compares--;
5859 invertcmp = (compares == 0 && list != backtracks);
5860 if (cc[-1] == XCL_NOTPROP)
5861 invertcmp ^= 0x1;
5862 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5863 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5864 }
5865 cc += 2;
5866 }
5867 }
5868
5869 cc = ccbegin;
5870 }
5871
5872 if (needschar)
5873 {
5874 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5875 }
5876
5877 if (needstype)
5878 {
5879 if (!needschar)
5880 {
5881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5882 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5883 }
5884 else
5885 {
5886 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5887 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5888 typereg = RETURN_ADDR;
5889 }
5890 }
5891 }
5892 #endif
5893
5894 /* Generating code. */
5895 charoffset = 0;
5896 numberofcmps = 0;
5897 #ifdef SUPPORT_UCP
5898 typeoffset = 0;
5899 #endif
5900
5901 while (*cc != XCL_END)
5902 {
5903 compares--;
5904 invertcmp = (compares == 0 && list != backtracks);
5905 jump = NULL;
5906
5907 if (*cc == XCL_SINGLE)
5908 {
5909 cc ++;
5910 GETCHARINCTEST(c, cc);
5911
5912 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5913 {
5914 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5915 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5916 numberofcmps++;
5917 }
5918 else if (numberofcmps > 0)
5919 {
5920 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5921 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5922 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5923 numberofcmps = 0;
5924 }
5925 else
5926 {
5927 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5928 numberofcmps = 0;
5929 }
5930 }
5931 else if (*cc == XCL_RANGE)
5932 {
5933 cc ++;
5934 GETCHARINCTEST(c, cc);
5935 SET_CHAR_OFFSET(c);
5936 GETCHARINCTEST(c, cc);
5937
5938 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5939 {
5940 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5941 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5942 numberofcmps++;
5943 }
5944 else if (numberofcmps > 0)
5945 {
5946 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5947 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5948 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5949 numberofcmps = 0;
5950 }
5951 else
5952 {
5953 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5954 numberofcmps = 0;
5955 }
5956 }
5957 #ifdef SUPPORT_UCP
5958 else
5959 {
5960 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5961 if (*cc == XCL_NOTPROP)
5962 invertcmp ^= 0x1;
5963 cc++;
5964 switch(*cc)
5965 {
5966 case PT_ANY:
5967 if (!invertcmp)
5968 jump = JUMP(SLJIT_JUMP);
5969 break;
5970
5971 case PT_LAMP:
5972 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5974 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5975 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5976 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5977 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5978 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5979 break;
5980
5981 case PT_GC:
5982 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5983 SET_TYPE_OFFSET(c);
5984 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5985 break;
5986
5987 case PT_PC:
5988 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5989 break;
5990
5991 case PT_SC:
5992 compares++;
5993 /* Do nothing. */
5994 break;
5995
5996 case PT_SPACE:
5997 case PT_PXSPACE:
5998 SET_CHAR_OFFSET(9);
5999 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6000 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6001
6002 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6003 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6004
6005 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6006 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6007
6008 SET_TYPE_OFFSET(ucp_Zl);
6009 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6010 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6011 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6012 break;
6013
6014 case PT_WORD:
6015 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6016 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6017 /* Fall through. */
6018
6019 case PT_ALNUM:
6020 SET_TYPE_OFFSET(ucp_Ll);
6021 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6022 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6023 SET_TYPE_OFFSET(ucp_Nd);
6024 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6025 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6026 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6027 break;
6028
6029 case PT_CLIST:
6030 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6031
6032 /* At least three characters are required.
6033 Otherwise this case would be handled by the normal code path. */
6034 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6035 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6036
6037 /* Optimizing character pairs, if their difference is power of 2. */
6038 if (is_powerof2(other_cases[1] ^ other_cases[0]))
6039 {
6040 if (charoffset == 0)
6041 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6042 else
6043 {
6044 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6045 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6046 }
6047 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6048 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6049 other_cases += 2;
6050 }
6051 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6052 {
6053 if (charoffset == 0)
6054 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6055 else
6056 {
6057 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6058 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6059 }
6060 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6061 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6062
6063 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6064 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6065
6066 other_cases += 3;
6067 }
6068 else
6069 {
6070 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6071 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6072 }
6073
6074 while (*other_cases != NOTACHAR)
6075 {
6076 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6077 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6078 }
6079 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6080 break;
6081
6082 case PT_UCNC:
6083 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6084 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6085 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6086 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6087 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6088 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6089
6090 SET_CHAR_OFFSET(0xa0);
6091 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6092 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6093 SET_CHAR_OFFSET(0);
6094 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6095 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6096 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6097 break;
6098
6099 case PT_PXGRAPH:
6100 /* C and Z groups are the farthest two groups. */
6101 SET_TYPE_OFFSET(ucp_Ll);
6102 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6104
6105 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6106
6107 /* In case of ucp_Cf, we overwrite the result. */
6108 SET_CHAR_OFFSET(0x2066);
6109 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6111
6112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6113 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6114
6115 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6116 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6117
6118 JUMPHERE(jump);
6119 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6120 break;
6121
6122 case PT_PXPRINT:
6123 /* C and Z groups are the farthest two groups. */
6124 SET_TYPE_OFFSET(ucp_Ll);
6125 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6126 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6127
6128 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6129 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6130
6131 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6132
6133 /* In case of ucp_Cf, we overwrite the result. */
6134 SET_CHAR_OFFSET(0x2066);
6135 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6136 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6137
6138 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6139 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6140
6141 JUMPHERE(jump);
6142 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6143 break;
6144
6145 case PT_PXPUNCT:
6146 SET_TYPE_OFFSET(ucp_Sc);
6147 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6148 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6149
6150 SET_CHAR_OFFSET(0);
6151 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6152 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6153
6154 SET_TYPE_OFFSET(ucp_Pc);
6155 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6156 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6157 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6158 break;
6159
6160 default:
6161 SLJIT_UNREACHABLE();
6162 break;
6163 }
6164 cc += 2;
6165 }
6166 #endif
6167
6168 if (jump != NULL)
6169 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6170 }
6171
6172 if (found != NULL)
6173 set_jumps(found, LABEL());
6174 }
6175
6176 #undef SET_TYPE_OFFSET
6177 #undef SET_CHAR_OFFSET
6178
6179 #endif
6180
compile_simple_assertion_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)6181 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6182 {
6183 DEFINE_COMPILER;
6184 int length;
6185 struct sljit_jump *jump[4];
6186 #ifdef SUPPORT_UTF
6187 struct sljit_label *label;
6188 #endif /* SUPPORT_UTF */
6189
6190 switch(type)
6191 {
6192 case OP_SOD:
6193 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6195 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6196 return cc;
6197
6198 case OP_SOM:
6199 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6201 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6202 return cc;
6203
6204 case OP_NOT_WORD_BOUNDARY:
6205 case OP_WORD_BOUNDARY:
6206 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6207 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6208 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6209 return cc;
6210
6211 case OP_EODN:
6212 /* Requires rather complex checks. */
6213 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6214 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6215 {
6216 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6217 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6218 if (common->mode == JIT_COMPILE)
6219 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6220 else
6221 {
6222 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6223 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6224 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6225 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6226 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6227 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6228 check_partial(common, TRUE);
6229 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6230 JUMPHERE(jump[1]);
6231 }
6232 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6233 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6234 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6235 }
6236 else if (common->nltype == NLTYPE_FIXED)
6237 {
6238 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6240 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6241 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6242 }
6243 else
6244 {
6245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6246 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6247 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6248 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6249 jump[2] = JUMP(SLJIT_GREATER);
6250 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6251 /* Equal. */
6252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6253 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6254 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6255
6256 JUMPHERE(jump[1]);
6257 if (common->nltype == NLTYPE_ANYCRLF)
6258 {
6259 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6260 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6261 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6262 }
6263 else
6264 {
6265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6266 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6267 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6268 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6269 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6270 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6271 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6272 }
6273 JUMPHERE(jump[2]);
6274 JUMPHERE(jump[3]);
6275 }
6276 JUMPHERE(jump[0]);
6277 check_partial(common, FALSE);
6278 return cc;
6279
6280 case OP_EOD:
6281 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6282 check_partial(common, FALSE);
6283 return cc;
6284
6285 case OP_DOLL:
6286 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6287 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6288 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6289
6290 if (!common->endonly)
6291 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6292 else
6293 {
6294 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6295 check_partial(common, FALSE);
6296 }
6297 return cc;
6298
6299 case OP_DOLLM:
6300 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6301 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6302 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6304 check_partial(common, FALSE);
6305 jump[0] = JUMP(SLJIT_JUMP);
6306 JUMPHERE(jump[1]);
6307
6308 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6309 {
6310 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6312 if (common->mode == JIT_COMPILE)
6313 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6314 else
6315 {
6316 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6317 /* STR_PTR = STR_END - IN_UCHARS(1) */
6318 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6319 check_partial(common, TRUE);
6320 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6321 JUMPHERE(jump[1]);
6322 }
6323
6324 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6325 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6326 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6327 }
6328 else
6329 {
6330 peek_char(common, common->nlmax);
6331 check_newlinechar(common, common->nltype, backtracks, FALSE);
6332 }
6333 JUMPHERE(jump[0]);
6334 return cc;
6335
6336 case OP_CIRC:
6337 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6339 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6340 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6342 return cc;
6343
6344 case OP_CIRCM:
6345 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6347 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6348 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6349 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6350 jump[0] = JUMP(SLJIT_JUMP);
6351 JUMPHERE(jump[1]);
6352
6353 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6354 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6355 {
6356 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6357 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6359 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6361 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6362 }
6363 else
6364 {
6365 skip_char_back(common);
6366 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6367 check_newlinechar(common, common->nltype, backtracks, FALSE);
6368 }
6369 JUMPHERE(jump[0]);
6370 return cc;
6371
6372 case OP_REVERSE:
6373 length = GET(cc, 0);
6374 if (length == 0)
6375 return cc + LINK_SIZE;
6376 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6377 #ifdef SUPPORT_UTF
6378 if (common->utf)
6379 {
6380 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6382 label = LABEL();
6383 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6384 skip_char_back(common);
6385 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6386 JUMPTO(SLJIT_NOT_ZERO, label);
6387 }
6388 else
6389 #endif
6390 {
6391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6392 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6393 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6394 }
6395 check_start_used_ptr(common);
6396 return cc + LINK_SIZE;
6397 }
6398 SLJIT_UNREACHABLE();
6399 return cc;
6400 }
6401
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks,BOOL check_str_ptr)6402 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6403 {
6404 DEFINE_COMPILER;
6405 int length;
6406 unsigned int c, oc, bit;
6407 compare_context context;
6408 struct sljit_jump *jump[3];
6409 jump_list *end_list;
6410 #ifdef SUPPORT_UTF
6411 struct sljit_label *label;
6412 #ifdef SUPPORT_UCP
6413 pcre_uchar propdata[5];
6414 #endif
6415 #endif /* SUPPORT_UTF */
6416
6417 switch(type)
6418 {
6419 case OP_NOT_DIGIT:
6420 case OP_DIGIT:
6421 /* Digits are usually 0-9, so it is worth to optimize them. */
6422 if (check_str_ptr)
6423 detect_partial_match(common, backtracks);
6424 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6425 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6426 read_char7_type(common, type == OP_NOT_DIGIT);
6427 else
6428 #endif
6429 read_char8_type(common, type == OP_NOT_DIGIT);
6430 /* Flip the starting bit in the negative case. */
6431 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6432 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6433 return cc;
6434
6435 case OP_NOT_WHITESPACE:
6436 case OP_WHITESPACE:
6437 if (check_str_ptr)
6438 detect_partial_match(common, backtracks);
6439 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6440 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6441 read_char7_type(common, type == OP_NOT_WHITESPACE);
6442 else
6443 #endif
6444 read_char8_type(common, type == OP_NOT_WHITESPACE);
6445 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6446 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6447 return cc;
6448
6449 case OP_NOT_WORDCHAR:
6450 case OP_WORDCHAR:
6451 if (check_str_ptr)
6452 detect_partial_match(common, backtracks);
6453 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6454 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6455 read_char7_type(common, type == OP_NOT_WORDCHAR);
6456 else
6457 #endif
6458 read_char8_type(common, type == OP_NOT_WORDCHAR);
6459 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6460 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6461 return cc;
6462
6463 case OP_ANY:
6464 if (check_str_ptr)
6465 detect_partial_match(common, backtracks);
6466 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6467 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6468 {
6469 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6470 end_list = NULL;
6471 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6472 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6473 else
6474 check_str_end(common, &end_list);
6475
6476 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6477 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6478 set_jumps(end_list, LABEL());
6479 JUMPHERE(jump[0]);
6480 }
6481 else
6482 check_newlinechar(common, common->nltype, backtracks, TRUE);
6483 return cc;
6484
6485 case OP_ALLANY:
6486 if (check_str_ptr)
6487 detect_partial_match(common, backtracks);
6488 #ifdef SUPPORT_UTF
6489 if (common->utf)
6490 {
6491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6492 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6493 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6494 #if defined COMPILE_PCRE8
6495 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6496 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6497 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6498 #elif defined COMPILE_PCRE16
6499 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6500 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6501 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6502 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6503 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6504 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6505 #endif
6506 JUMPHERE(jump[0]);
6507 #endif /* COMPILE_PCRE[8|16] */
6508 return cc;
6509 }
6510 #endif
6511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6512 return cc;
6513
6514 case OP_ANYBYTE:
6515 if (check_str_ptr)
6516 detect_partial_match(common, backtracks);
6517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6518 return cc;
6519
6520 #ifdef SUPPORT_UTF
6521 #ifdef SUPPORT_UCP
6522 case OP_NOTPROP:
6523 case OP_PROP:
6524 propdata[0] = XCL_HASPROP;
6525 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6526 propdata[2] = cc[0];
6527 propdata[3] = cc[1];
6528 propdata[4] = XCL_END;
6529 if (check_str_ptr)
6530 detect_partial_match(common, backtracks);
6531 compile_xclass_matchingpath(common, propdata, backtracks);
6532 return cc + 2;
6533 #endif
6534 #endif
6535
6536 case OP_ANYNL:
6537 if (check_str_ptr)
6538 detect_partial_match(common, backtracks);
6539 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6540 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6541 /* We don't need to handle soft partial matching case. */
6542 end_list = NULL;
6543 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6544 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6545 else
6546 check_str_end(common, &end_list);
6547 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6548 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6549 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6550 jump[2] = JUMP(SLJIT_JUMP);
6551 JUMPHERE(jump[0]);
6552 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6553 set_jumps(end_list, LABEL());
6554 JUMPHERE(jump[1]);
6555 JUMPHERE(jump[2]);
6556 return cc;
6557
6558 case OP_NOT_HSPACE:
6559 case OP_HSPACE:
6560 if (check_str_ptr)
6561 detect_partial_match(common, backtracks);
6562 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6563 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6564 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6565 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6566 return cc;
6567
6568 case OP_NOT_VSPACE:
6569 case OP_VSPACE:
6570 if (check_str_ptr)
6571 detect_partial_match(common, backtracks);
6572 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6573 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6574 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6575 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6576 return cc;
6577
6578 #ifdef SUPPORT_UCP
6579 case OP_EXTUNI:
6580 if (check_str_ptr)
6581 detect_partial_match(common, backtracks);
6582 read_char(common);
6583 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6585 /* Optimize register allocation: use a real register. */
6586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6587 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6588
6589 label = LABEL();
6590 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6591 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6592 read_char(common);
6593 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6595 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6596
6597 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6598 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6599 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6600 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6601 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6602 JUMPTO(SLJIT_NOT_ZERO, label);
6603
6604 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6605 JUMPHERE(jump[0]);
6606 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6607
6608 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6609 {
6610 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6611 /* Since we successfully read a char above, partial matching must occure. */
6612 check_partial(common, TRUE);
6613 JUMPHERE(jump[0]);
6614 }
6615 return cc;
6616 #endif
6617
6618 case OP_CHAR:
6619 case OP_CHARI:
6620 length = 1;
6621 #ifdef SUPPORT_UTF
6622 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6623 #endif
6624 if (common->mode == JIT_COMPILE && check_str_ptr
6625 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6626 {
6627 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6628 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6629
6630 context.length = IN_UCHARS(length);
6631 context.sourcereg = -1;
6632 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6633 context.ucharptr = 0;
6634 #endif
6635 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6636 }
6637
6638 if (check_str_ptr)
6639 detect_partial_match(common, backtracks);
6640 #ifdef SUPPORT_UTF
6641 if (common->utf)
6642 {
6643 GETCHAR(c, cc);
6644 }
6645 else
6646 #endif
6647 c = *cc;
6648
6649 if (type == OP_CHAR || !char_has_othercase(common, cc))
6650 {
6651 read_char_range(common, c, c, FALSE);
6652 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6653 return cc + length;
6654 }
6655 oc = char_othercase(common, c);
6656 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6657 bit = c ^ oc;
6658 if (is_powerof2(bit))
6659 {
6660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6661 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6662 return cc + length;
6663 }
6664 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6665 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6666 JUMPHERE(jump[0]);
6667 return cc + length;
6668
6669 case OP_NOT:
6670 case OP_NOTI:
6671 if (check_str_ptr)
6672 detect_partial_match(common, backtracks);
6673 length = 1;
6674 #ifdef SUPPORT_UTF
6675 if (common->utf)
6676 {
6677 #ifdef COMPILE_PCRE8
6678 c = *cc;
6679 if (c < 128)
6680 {
6681 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6682 if (type == OP_NOT || !char_has_othercase(common, cc))
6683 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6684 else
6685 {
6686 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6687 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6688 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6689 }
6690 /* Skip the variable-length character. */
6691 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6692 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6693 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6695 JUMPHERE(jump[0]);
6696 return cc + 1;
6697 }
6698 else
6699 #endif /* COMPILE_PCRE8 */
6700 {
6701 GETCHARLEN(c, cc, length);
6702 }
6703 }
6704 else
6705 #endif /* SUPPORT_UTF */
6706 c = *cc;
6707
6708 if (type == OP_NOT || !char_has_othercase(common, cc))
6709 {
6710 read_char_range(common, c, c, TRUE);
6711 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6712 }
6713 else
6714 {
6715 oc = char_othercase(common, c);
6716 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6717 bit = c ^ oc;
6718 if (is_powerof2(bit))
6719 {
6720 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6721 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6722 }
6723 else
6724 {
6725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6726 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6727 }
6728 }
6729 return cc + length;
6730
6731 case OP_CLASS:
6732 case OP_NCLASS:
6733 if (check_str_ptr)
6734 detect_partial_match(common, backtracks);
6735
6736 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6737 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6738 read_char_range(common, 0, bit, type == OP_NCLASS);
6739 #else
6740 read_char_range(common, 0, 255, type == OP_NCLASS);
6741 #endif
6742
6743 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6744 return cc + 32 / sizeof(pcre_uchar);
6745
6746 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6747 jump[0] = NULL;
6748 if (common->utf)
6749 {
6750 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6751 if (type == OP_CLASS)
6752 {
6753 add_jump(compiler, backtracks, jump[0]);
6754 jump[0] = NULL;
6755 }
6756 }
6757 #elif !defined COMPILE_PCRE8
6758 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6759 if (type == OP_CLASS)
6760 {
6761 add_jump(compiler, backtracks, jump[0]);
6762 jump[0] = NULL;
6763 }
6764 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6765
6766 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6767 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6768 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6769 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6770 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6771 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6772
6773 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6774 if (jump[0] != NULL)
6775 JUMPHERE(jump[0]);
6776 #endif
6777 return cc + 32 / sizeof(pcre_uchar);
6778
6779 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6780 case OP_XCLASS:
6781 if (check_str_ptr)
6782 detect_partial_match(common, backtracks);
6783 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6784 return cc + GET(cc, 0) - 1;
6785 #endif
6786 }
6787 SLJIT_UNREACHABLE();
6788 return cc;
6789 }
6790
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)6791 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6792 {
6793 /* This function consumes at least one input character. */
6794 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6795 DEFINE_COMPILER;
6796 pcre_uchar *ccbegin = cc;
6797 compare_context context;
6798 int size;
6799
6800 context.length = 0;
6801 do
6802 {
6803 if (cc >= ccend)
6804 break;
6805
6806 if (*cc == OP_CHAR)
6807 {
6808 size = 1;
6809 #ifdef SUPPORT_UTF
6810 if (common->utf && HAS_EXTRALEN(cc[1]))
6811 size += GET_EXTRALEN(cc[1]);
6812 #endif
6813 }
6814 else if (*cc == OP_CHARI)
6815 {
6816 size = 1;
6817 #ifdef SUPPORT_UTF
6818 if (common->utf)
6819 {
6820 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6821 size = 0;
6822 else if (HAS_EXTRALEN(cc[1]))
6823 size += GET_EXTRALEN(cc[1]);
6824 }
6825 else
6826 #endif
6827 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6828 size = 0;
6829 }
6830 else
6831 size = 0;
6832
6833 cc += 1 + size;
6834 context.length += IN_UCHARS(size);
6835 }
6836 while (size > 0 && context.length <= 128);
6837
6838 cc = ccbegin;
6839 if (context.length > 0)
6840 {
6841 /* We have a fixed-length byte sequence. */
6842 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6843 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6844
6845 context.sourcereg = -1;
6846 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6847 context.ucharptr = 0;
6848 #endif
6849 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6850 return cc;
6851 }
6852
6853 /* A non-fixed length character will be checked if length == 0. */
6854 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6855 }
6856
6857 /* Forward definitions. */
6858 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6859 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6860
6861 #define PUSH_BACKTRACK(size, ccstart, error) \
6862 do \
6863 { \
6864 backtrack = sljit_alloc_memory(compiler, (size)); \
6865 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6866 return error; \
6867 memset(backtrack, 0, size); \
6868 backtrack->prev = parent->top; \
6869 backtrack->cc = (ccstart); \
6870 parent->top = backtrack; \
6871 } \
6872 while (0)
6873
6874 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6875 do \
6876 { \
6877 backtrack = sljit_alloc_memory(compiler, (size)); \
6878 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6879 return; \
6880 memset(backtrack, 0, size); \
6881 backtrack->prev = parent->top; \
6882 backtrack->cc = (ccstart); \
6883 parent->top = backtrack; \
6884 } \
6885 while (0)
6886
6887 #define BACKTRACK_AS(type) ((type *)backtrack)
6888
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)6889 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6890 {
6891 /* The OVECTOR offset goes to TMP2. */
6892 DEFINE_COMPILER;
6893 int count = GET2(cc, 1 + IMM2_SIZE);
6894 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6895 unsigned int offset;
6896 jump_list *found = NULL;
6897
6898 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6899
6900 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6901
6902 count--;
6903 while (count-- > 0)
6904 {
6905 offset = GET2(slot, 0) << 1;
6906 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6907 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6908 slot += common->name_entry_size;
6909 }
6910
6911 offset = GET2(slot, 0) << 1;
6912 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6913 if (backtracks != NULL && !common->jscript_compat)
6914 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6915
6916 set_jumps(found, LABEL());
6917 }
6918
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)6919 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6920 {
6921 DEFINE_COMPILER;
6922 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6923 int offset = 0;
6924 struct sljit_jump *jump = NULL;
6925 struct sljit_jump *partial;
6926 struct sljit_jump *nopartial;
6927
6928 if (ref)
6929 {
6930 offset = GET2(cc, 1) << 1;
6931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6932 /* OVECTOR(1) contains the "string begin - 1" constant. */
6933 if (withchecks && !common->jscript_compat)
6934 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6935 }
6936 else
6937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6938
6939 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6940 if (common->utf && *cc == OP_REFI)
6941 {
6942 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
6943 if (ref)
6944 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6945 else
6946 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6947
6948 if (withchecks)
6949 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
6950
6951 /* No free saved registers so save data on stack. */
6952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6953 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
6954 OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
6955 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6956 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6957 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6958
6959 if (common->mode == JIT_COMPILE)
6960 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6961 else
6962 {
6963 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6964
6965 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6966
6967 nopartial = JUMP(SLJIT_NOT_EQUAL);
6968 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6969 check_partial(common, FALSE);
6970 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6971 JUMPHERE(nopartial);
6972 }
6973 }
6974 else
6975 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6976 {
6977 if (ref)
6978 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6979 else
6980 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6981
6982 if (withchecks)
6983 jump = JUMP(SLJIT_ZERO);
6984
6985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6986 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6987 if (common->mode == JIT_COMPILE)
6988 add_jump(compiler, backtracks, partial);
6989
6990 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6991 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6992
6993 if (common->mode != JIT_COMPILE)
6994 {
6995 nopartial = JUMP(SLJIT_JUMP);
6996 JUMPHERE(partial);
6997 /* TMP2 -= STR_END - STR_PTR */
6998 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6999 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
7000 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7001 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7002 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7003 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7004 JUMPHERE(partial);
7005 check_partial(common, FALSE);
7006 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7007 JUMPHERE(nopartial);
7008 }
7009 }
7010
7011 if (jump != NULL)
7012 {
7013 if (emptyfail)
7014 add_jump(compiler, backtracks, jump);
7015 else
7016 JUMPHERE(jump);
7017 }
7018 }
7019
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7020 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7021 {
7022 DEFINE_COMPILER;
7023 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7024 backtrack_common *backtrack;
7025 pcre_uchar type;
7026 int offset = 0;
7027 struct sljit_label *label;
7028 struct sljit_jump *zerolength;
7029 struct sljit_jump *jump = NULL;
7030 pcre_uchar *ccbegin = cc;
7031 int min = 0, max = 0;
7032 BOOL minimize;
7033
7034 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7035
7036 if (ref)
7037 offset = GET2(cc, 1) << 1;
7038 else
7039 cc += IMM2_SIZE;
7040 type = cc[1 + IMM2_SIZE];
7041
7042 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7043 minimize = (type & 0x1) != 0;
7044 switch(type)
7045 {
7046 case OP_CRSTAR:
7047 case OP_CRMINSTAR:
7048 min = 0;
7049 max = 0;
7050 cc += 1 + IMM2_SIZE + 1;
7051 break;
7052 case OP_CRPLUS:
7053 case OP_CRMINPLUS:
7054 min = 1;
7055 max = 0;
7056 cc += 1 + IMM2_SIZE + 1;
7057 break;
7058 case OP_CRQUERY:
7059 case OP_CRMINQUERY:
7060 min = 0;
7061 max = 1;
7062 cc += 1 + IMM2_SIZE + 1;
7063 break;
7064 case OP_CRRANGE:
7065 case OP_CRMINRANGE:
7066 min = GET2(cc, 1 + IMM2_SIZE + 1);
7067 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7068 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7069 break;
7070 default:
7071 SLJIT_UNREACHABLE();
7072 break;
7073 }
7074
7075 if (!minimize)
7076 {
7077 if (min == 0)
7078 {
7079 allocate_stack(common, 2);
7080 if (ref)
7081 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7083 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7084 /* Temporary release of STR_PTR. */
7085 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7086 /* Handles both invalid and empty cases. Since the minimum repeat,
7087 is zero the invalid case is basically the same as an empty case. */
7088 if (ref)
7089 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7090 else
7091 {
7092 compile_dnref_search(common, ccbegin, NULL);
7093 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7095 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7096 }
7097 /* Restore if not zero length. */
7098 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7099 }
7100 else
7101 {
7102 allocate_stack(common, 1);
7103 if (ref)
7104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7106 if (ref)
7107 {
7108 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7109 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7110 }
7111 else
7112 {
7113 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7116 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7117 }
7118 }
7119
7120 if (min > 1 || max > 1)
7121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7122
7123 label = LABEL();
7124 if (!ref)
7125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
7126 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
7127
7128 if (min > 1 || max > 1)
7129 {
7130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7131 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7133 if (min > 1)
7134 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7135 if (max > 1)
7136 {
7137 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7138 allocate_stack(common, 1);
7139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7140 JUMPTO(SLJIT_JUMP, label);
7141 JUMPHERE(jump);
7142 }
7143 }
7144
7145 if (max == 0)
7146 {
7147 /* Includes min > 1 case as well. */
7148 allocate_stack(common, 1);
7149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7150 JUMPTO(SLJIT_JUMP, label);
7151 }
7152
7153 JUMPHERE(zerolength);
7154 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7155
7156 count_match(common);
7157 return cc;
7158 }
7159
7160 allocate_stack(common, ref ? 2 : 3);
7161 if (ref)
7162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7164 if (type != OP_CRMINSTAR)
7165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7166
7167 if (min == 0)
7168 {
7169 /* Handles both invalid and empty cases. Since the minimum repeat,
7170 is zero the invalid case is basically the same as an empty case. */
7171 if (ref)
7172 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7173 else
7174 {
7175 compile_dnref_search(common, ccbegin, NULL);
7176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7178 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7179 }
7180 /* Length is non-zero, we can match real repeats. */
7181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7182 jump = JUMP(SLJIT_JUMP);
7183 }
7184 else
7185 {
7186 if (ref)
7187 {
7188 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7189 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7190 }
7191 else
7192 {
7193 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7196 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7197 }
7198 }
7199
7200 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7201 if (max > 0)
7202 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7203
7204 if (!ref)
7205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7206 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7208
7209 if (min > 1)
7210 {
7211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7212 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7214 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7215 }
7216 else if (max > 0)
7217 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7218
7219 if (jump != NULL)
7220 JUMPHERE(jump);
7221 JUMPHERE(zerolength);
7222
7223 count_match(common);
7224 return cc;
7225 }
7226
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7227 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7228 {
7229 DEFINE_COMPILER;
7230 backtrack_common *backtrack;
7231 recurse_entry *entry = common->entries;
7232 recurse_entry *prev = NULL;
7233 sljit_sw start = GET(cc, 1);
7234 pcre_uchar *start_cc;
7235 BOOL needs_control_head;
7236
7237 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7238
7239 /* Inlining simple patterns. */
7240 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7241 {
7242 start_cc = common->start + start;
7243 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7244 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7245 return cc + 1 + LINK_SIZE;
7246 }
7247
7248 while (entry != NULL)
7249 {
7250 if (entry->start == start)
7251 break;
7252 prev = entry;
7253 entry = entry->next;
7254 }
7255
7256 if (entry == NULL)
7257 {
7258 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7259 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7260 return NULL;
7261 entry->next = NULL;
7262 entry->entry = NULL;
7263 entry->calls = NULL;
7264 entry->start = start;
7265
7266 if (prev != NULL)
7267 prev->next = entry;
7268 else
7269 common->entries = entry;
7270 }
7271
7272 if (common->has_set_som && common->mark_ptr != 0)
7273 {
7274 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7275 allocate_stack(common, 2);
7276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7279 }
7280 else if (common->has_set_som || common->mark_ptr != 0)
7281 {
7282 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7283 allocate_stack(common, 1);
7284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7285 }
7286
7287 if (entry->entry == NULL)
7288 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7289 else
7290 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7291 /* Leave if the match is failed. */
7292 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7293 return cc + 1 + LINK_SIZE;
7294 }
7295
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)7296 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7297 {
7298 const pcre_uchar *begin = arguments->begin;
7299 int *offset_vector = arguments->offsets;
7300 int offset_count = arguments->offset_count;
7301 int i;
7302
7303 if (PUBL(callout) == NULL)
7304 return 0;
7305
7306 callout_block->version = 2;
7307 callout_block->callout_data = arguments->callout_data;
7308
7309 /* Offsets in subject. */
7310 callout_block->subject_length = arguments->end - arguments->begin;
7311 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7312 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7313 #if defined COMPILE_PCRE8
7314 callout_block->subject = (PCRE_SPTR)begin;
7315 #elif defined COMPILE_PCRE16
7316 callout_block->subject = (PCRE_SPTR16)begin;
7317 #elif defined COMPILE_PCRE32
7318 callout_block->subject = (PCRE_SPTR32)begin;
7319 #endif
7320
7321 /* Convert and copy the JIT offset vector to the offset_vector array. */
7322 callout_block->capture_top = 0;
7323 callout_block->offset_vector = offset_vector;
7324 for (i = 2; i < offset_count; i += 2)
7325 {
7326 offset_vector[i] = jit_ovector[i] - begin;
7327 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7328 if (jit_ovector[i] >= begin)
7329 callout_block->capture_top = i;
7330 }
7331
7332 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7333 if (offset_count > 0)
7334 offset_vector[0] = -1;
7335 if (offset_count > 1)
7336 offset_vector[1] = -1;
7337 return (*PUBL(callout))(callout_block);
7338 }
7339
7340 /* Aligning to 8 byte. */
7341 #define CALLOUT_ARG_SIZE \
7342 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7343
7344 #define CALLOUT_ARG_OFFSET(arg) \
7345 SLJIT_OFFSETOF(PUBL(callout_block), arg)
7346
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7347 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7348 {
7349 DEFINE_COMPILER;
7350 backtrack_common *backtrack;
7351
7352 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7353
7354 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7355
7356 SLJIT_ASSERT(common->capture_last_ptr != 0);
7357 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7359 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7360 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7361
7362 /* These pointer sized fields temporarly stores internal variables. */
7363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7366
7367 if (common->mark_ptr != 0)
7368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7369 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7370 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7372
7373 /* Needed to save important temporary registers. */
7374 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7375 /* SLJIT_R0 = arguments */
7376 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7377 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7378 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7379 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7380 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7381
7382 /* Check return value. */
7383 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7384 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
7385 if (common->forced_quit_label == NULL)
7386 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
7387 else
7388 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label);
7389 return cc + 2 + 2 * LINK_SIZE;
7390 }
7391
7392 #undef CALLOUT_ARG_SIZE
7393 #undef CALLOUT_ARG_OFFSET
7394
assert_needs_str_ptr_saving(pcre_uchar * cc)7395 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7396 {
7397 while (TRUE)
7398 {
7399 switch (*cc)
7400 {
7401 case OP_NOT_WORD_BOUNDARY:
7402 case OP_WORD_BOUNDARY:
7403 case OP_CIRC:
7404 case OP_CIRCM:
7405 case OP_DOLL:
7406 case OP_DOLLM:
7407 case OP_CALLOUT:
7408 case OP_ALT:
7409 cc += PRIV(OP_lengths)[*cc];
7410 break;
7411
7412 case OP_KET:
7413 return FALSE;
7414
7415 default:
7416 return TRUE;
7417 }
7418 }
7419 }
7420
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)7421 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7422 {
7423 DEFINE_COMPILER;
7424 int framesize;
7425 int extrasize;
7426 BOOL needs_control_head;
7427 int private_data_ptr;
7428 backtrack_common altbacktrack;
7429 pcre_uchar *ccbegin;
7430 pcre_uchar opcode;
7431 pcre_uchar bra = OP_BRA;
7432 jump_list *tmp = NULL;
7433 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7434 jump_list **found;
7435 /* Saving previous accept variables. */
7436 BOOL save_local_exit = common->local_exit;
7437 BOOL save_positive_assert = common->positive_assert;
7438 then_trap_backtrack *save_then_trap = common->then_trap;
7439 struct sljit_label *save_quit_label = common->quit_label;
7440 struct sljit_label *save_accept_label = common->accept_label;
7441 jump_list *save_quit = common->quit;
7442 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7443 jump_list *save_accept = common->accept;
7444 struct sljit_jump *jump;
7445 struct sljit_jump *brajump = NULL;
7446
7447 /* Assert captures then. */
7448 common->then_trap = NULL;
7449
7450 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7451 {
7452 SLJIT_ASSERT(!conditional);
7453 bra = *cc;
7454 cc++;
7455 }
7456 private_data_ptr = PRIVATE_DATA(cc);
7457 SLJIT_ASSERT(private_data_ptr != 0);
7458 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7459 backtrack->framesize = framesize;
7460 backtrack->private_data_ptr = private_data_ptr;
7461 opcode = *cc;
7462 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7463 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7464 ccbegin = cc;
7465 cc += GET(cc, 1);
7466
7467 if (bra == OP_BRAMINZERO)
7468 {
7469 /* This is a braminzero backtrack path. */
7470 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7471 free_stack(common, 1);
7472 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7473 }
7474
7475 if (framesize < 0)
7476 {
7477 extrasize = 1;
7478 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7479 extrasize = 0;
7480
7481 if (needs_control_head)
7482 extrasize++;
7483
7484 if (framesize == no_frame)
7485 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7486
7487 if (extrasize > 0)
7488 allocate_stack(common, extrasize);
7489
7490 if (needs_control_head)
7491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7492
7493 if (extrasize > 0)
7494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7495
7496 if (needs_control_head)
7497 {
7498 SLJIT_ASSERT(extrasize == 2);
7499 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7500 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7501 }
7502 }
7503 else
7504 {
7505 extrasize = needs_control_head ? 3 : 2;
7506 allocate_stack(common, framesize + extrasize);
7507
7508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7509 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7511 if (needs_control_head)
7512 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7513 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7514
7515 if (needs_control_head)
7516 {
7517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7520 }
7521 else
7522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7523
7524 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7525 }
7526
7527 memset(&altbacktrack, 0, sizeof(backtrack_common));
7528 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7529 {
7530 /* Negative assert is stronger than positive assert. */
7531 common->local_exit = TRUE;
7532 common->quit_label = NULL;
7533 common->quit = NULL;
7534 common->positive_assert = FALSE;
7535 }
7536 else
7537 common->positive_assert = TRUE;
7538 common->positive_assert_quit = NULL;
7539
7540 while (1)
7541 {
7542 common->accept_label = NULL;
7543 common->accept = NULL;
7544 altbacktrack.top = NULL;
7545 altbacktrack.topbacktracks = NULL;
7546
7547 if (*ccbegin == OP_ALT && extrasize > 0)
7548 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7549
7550 altbacktrack.cc = ccbegin;
7551 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7552 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7553 {
7554 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7555 {
7556 common->local_exit = save_local_exit;
7557 common->quit_label = save_quit_label;
7558 common->quit = save_quit;
7559 }
7560 common->positive_assert = save_positive_assert;
7561 common->then_trap = save_then_trap;
7562 common->accept_label = save_accept_label;
7563 common->positive_assert_quit = save_positive_assert_quit;
7564 common->accept = save_accept;
7565 return NULL;
7566 }
7567 common->accept_label = LABEL();
7568 if (common->accept != NULL)
7569 set_jumps(common->accept, common->accept_label);
7570
7571 /* Reset stack. */
7572 if (framesize < 0)
7573 {
7574 if (framesize == no_frame)
7575 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7576 else if (extrasize > 0)
7577 free_stack(common, extrasize);
7578
7579 if (needs_control_head)
7580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7581 }
7582 else
7583 {
7584 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7585 {
7586 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7587 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7588 if (needs_control_head)
7589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7590 }
7591 else
7592 {
7593 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7594 if (needs_control_head)
7595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7596 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7597 }
7598 }
7599
7600 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7601 {
7602 /* We know that STR_PTR was stored on the top of the stack. */
7603 if (conditional)
7604 {
7605 if (extrasize > 0)
7606 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7607 }
7608 else if (bra == OP_BRAZERO)
7609 {
7610 if (framesize < 0)
7611 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7612 else
7613 {
7614 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7615 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7617 }
7618 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7620 }
7621 else if (framesize >= 0)
7622 {
7623 /* For OP_BRA and OP_BRAMINZERO. */
7624 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7625 }
7626 }
7627 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7628
7629 compile_backtrackingpath(common, altbacktrack.top);
7630 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7631 {
7632 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7633 {
7634 common->local_exit = save_local_exit;
7635 common->quit_label = save_quit_label;
7636 common->quit = save_quit;
7637 }
7638 common->positive_assert = save_positive_assert;
7639 common->then_trap = save_then_trap;
7640 common->accept_label = save_accept_label;
7641 common->positive_assert_quit = save_positive_assert_quit;
7642 common->accept = save_accept;
7643 return NULL;
7644 }
7645 set_jumps(altbacktrack.topbacktracks, LABEL());
7646
7647 if (*cc != OP_ALT)
7648 break;
7649
7650 ccbegin = cc;
7651 cc += GET(cc, 1);
7652 }
7653
7654 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7655 {
7656 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7657 /* Makes the check less complicated below. */
7658 common->positive_assert_quit = common->quit;
7659 }
7660
7661 /* None of them matched. */
7662 if (common->positive_assert_quit != NULL)
7663 {
7664 jump = JUMP(SLJIT_JUMP);
7665 set_jumps(common->positive_assert_quit, LABEL());
7666 SLJIT_ASSERT(framesize != no_stack);
7667 if (framesize < 0)
7668 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7669 else
7670 {
7671 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7672 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7673 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7674 }
7675 JUMPHERE(jump);
7676 }
7677
7678 if (needs_control_head)
7679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7680
7681 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7682 {
7683 /* Assert is failed. */
7684 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7685 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7686
7687 if (framesize < 0)
7688 {
7689 /* The topmost item should be 0. */
7690 if (bra == OP_BRAZERO)
7691 {
7692 if (extrasize == 2)
7693 free_stack(common, 1);
7694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7695 }
7696 else if (extrasize > 0)
7697 free_stack(common, extrasize);
7698 }
7699 else
7700 {
7701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7702 /* The topmost item should be 0. */
7703 if (bra == OP_BRAZERO)
7704 {
7705 free_stack(common, framesize + extrasize - 1);
7706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7707 }
7708 else
7709 free_stack(common, framesize + extrasize);
7710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7711 }
7712 jump = JUMP(SLJIT_JUMP);
7713 if (bra != OP_BRAZERO)
7714 add_jump(compiler, target, jump);
7715
7716 /* Assert is successful. */
7717 set_jumps(tmp, LABEL());
7718 if (framesize < 0)
7719 {
7720 /* We know that STR_PTR was stored on the top of the stack. */
7721 if (extrasize > 0)
7722 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7723
7724 /* Keep the STR_PTR on the top of the stack. */
7725 if (bra == OP_BRAZERO)
7726 {
7727 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7728 if (extrasize == 2)
7729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7730 }
7731 else if (bra == OP_BRAMINZERO)
7732 {
7733 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7735 }
7736 }
7737 else
7738 {
7739 if (bra == OP_BRA)
7740 {
7741 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7742 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7743 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7744 }
7745 else
7746 {
7747 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7748 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7749 if (extrasize == 2)
7750 {
7751 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7752 if (bra == OP_BRAMINZERO)
7753 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7754 }
7755 else
7756 {
7757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7759 }
7760 }
7761 }
7762
7763 if (bra == OP_BRAZERO)
7764 {
7765 backtrack->matchingpath = LABEL();
7766 SET_LABEL(jump, backtrack->matchingpath);
7767 }
7768 else if (bra == OP_BRAMINZERO)
7769 {
7770 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7771 JUMPHERE(brajump);
7772 if (framesize >= 0)
7773 {
7774 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7775 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7777 }
7778 set_jumps(backtrack->common.topbacktracks, LABEL());
7779 }
7780 }
7781 else
7782 {
7783 /* AssertNot is successful. */
7784 if (framesize < 0)
7785 {
7786 if (extrasize > 0)
7787 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7788
7789 if (bra != OP_BRA)
7790 {
7791 if (extrasize == 2)
7792 free_stack(common, 1);
7793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7794 }
7795 else if (extrasize > 0)
7796 free_stack(common, extrasize);
7797 }
7798 else
7799 {
7800 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7802 /* The topmost item should be 0. */
7803 if (bra != OP_BRA)
7804 {
7805 free_stack(common, framesize + extrasize - 1);
7806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7807 }
7808 else
7809 free_stack(common, framesize + extrasize);
7810 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7811 }
7812
7813 if (bra == OP_BRAZERO)
7814 backtrack->matchingpath = LABEL();
7815 else if (bra == OP_BRAMINZERO)
7816 {
7817 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7818 JUMPHERE(brajump);
7819 }
7820
7821 if (bra != OP_BRA)
7822 {
7823 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7824 set_jumps(backtrack->common.topbacktracks, LABEL());
7825 backtrack->common.topbacktracks = NULL;
7826 }
7827 }
7828
7829 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7830 {
7831 common->local_exit = save_local_exit;
7832 common->quit_label = save_quit_label;
7833 common->quit = save_quit;
7834 }
7835 common->positive_assert = save_positive_assert;
7836 common->then_trap = save_then_trap;
7837 common->accept_label = save_accept_label;
7838 common->positive_assert_quit = save_positive_assert_quit;
7839 common->accept = save_accept;
7840 return cc + 1 + LINK_SIZE;
7841 }
7842
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)7843 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7844 {
7845 DEFINE_COMPILER;
7846 int stacksize;
7847
7848 if (framesize < 0)
7849 {
7850 if (framesize == no_frame)
7851 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7852 else
7853 {
7854 stacksize = needs_control_head ? 1 : 0;
7855 if (ket != OP_KET || has_alternatives)
7856 stacksize++;
7857
7858 if (stacksize > 0)
7859 free_stack(common, stacksize);
7860 }
7861
7862 if (needs_control_head)
7863 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7864
7865 /* TMP2 which is set here used by OP_KETRMAX below. */
7866 if (ket == OP_KETRMAX)
7867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7868 else if (ket == OP_KETRMIN)
7869 {
7870 /* Move the STR_PTR to the private_data_ptr. */
7871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7872 }
7873 }
7874 else
7875 {
7876 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7877 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7878 if (needs_control_head)
7879 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7880
7881 if (ket == OP_KETRMAX)
7882 {
7883 /* TMP2 which is set here used by OP_KETRMAX below. */
7884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7885 }
7886 }
7887 if (needs_control_head)
7888 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7889 }
7890
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)7891 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7892 {
7893 DEFINE_COMPILER;
7894
7895 if (common->capture_last_ptr != 0)
7896 {
7897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7900 stacksize++;
7901 }
7902 if (common->optimized_cbracket[offset >> 1] == 0)
7903 {
7904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7907 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7911 stacksize += 2;
7912 }
7913 return stacksize;
7914 }
7915
7916 /*
7917 Handling bracketed expressions is probably the most complex part.
7918
7919 Stack layout naming characters:
7920 S - Push the current STR_PTR
7921 0 - Push a 0 (NULL)
7922 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7923 before the next alternative. Not pushed if there are no alternatives.
7924 M - Any values pushed by the current alternative. Can be empty, or anything.
7925 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7926 L - Push the previous local (pointed by localptr) to the stack
7927 () - opional values stored on the stack
7928 ()* - optonal, can be stored multiple times
7929
7930 The following list shows the regular expression templates, their PCRE byte codes
7931 and stack layout supported by pcre-sljit.
7932
7933 (?:) OP_BRA | OP_KET A M
7934 () OP_CBRA | OP_KET C M
7935 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7936 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7937 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7938 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7939 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7940 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7941 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7942 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7943 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7944 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7945 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7946 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7947 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7948 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7949 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7950 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7951 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7952 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7953 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7954 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7955
7956
7957 Stack layout naming characters:
7958 A - Push the alternative index (starting from 0) on the stack.
7959 Not pushed if there is no alternatives.
7960 M - Any values pushed by the current alternative. Can be empty, or anything.
7961
7962 The next list shows the possible content of a bracket:
7963 (|) OP_*BRA | OP_ALT ... M A
7964 (?()|) OP_*COND | OP_ALT M A
7965 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
7966 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
7967 Or nothing, if trace is unnecessary
7968 */
7969
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7970 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7971 {
7972 DEFINE_COMPILER;
7973 backtrack_common *backtrack;
7974 pcre_uchar opcode;
7975 int private_data_ptr = 0;
7976 int offset = 0;
7977 int i, stacksize;
7978 int repeat_ptr = 0, repeat_length = 0;
7979 int repeat_type = 0, repeat_count = 0;
7980 pcre_uchar *ccbegin;
7981 pcre_uchar *matchingpath;
7982 pcre_uchar *slot;
7983 pcre_uchar bra = OP_BRA;
7984 pcre_uchar ket;
7985 assert_backtrack *assert;
7986 BOOL has_alternatives;
7987 BOOL needs_control_head = FALSE;
7988 struct sljit_jump *jump;
7989 struct sljit_jump *skip;
7990 struct sljit_label *rmax_label = NULL;
7991 struct sljit_jump *braminzero = NULL;
7992
7993 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7994
7995 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7996 {
7997 bra = *cc;
7998 cc++;
7999 opcode = *cc;
8000 }
8001
8002 opcode = *cc;
8003 ccbegin = cc;
8004 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8005 ket = *matchingpath;
8006 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8007 {
8008 repeat_ptr = PRIVATE_DATA(matchingpath);
8009 repeat_length = PRIVATE_DATA(matchingpath + 1);
8010 repeat_type = PRIVATE_DATA(matchingpath + 2);
8011 repeat_count = PRIVATE_DATA(matchingpath + 3);
8012 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8013 if (repeat_type == OP_UPTO)
8014 ket = OP_KETRMAX;
8015 if (repeat_type == OP_MINUPTO)
8016 ket = OP_KETRMIN;
8017 }
8018
8019 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
8020 {
8021 /* Drop this bracket_backtrack. */
8022 parent->top = backtrack->prev;
8023 return matchingpath + 1 + LINK_SIZE + repeat_length;
8024 }
8025
8026 matchingpath = ccbegin + 1 + LINK_SIZE;
8027 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8028 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8029 cc += GET(cc, 1);
8030
8031 has_alternatives = *cc == OP_ALT;
8032 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8033 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8034
8035 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8036 opcode = OP_SCOND;
8037 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8038 opcode = OP_ONCE;
8039
8040 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8041 {
8042 /* Capturing brackets has a pre-allocated space. */
8043 offset = GET2(ccbegin, 1 + LINK_SIZE);
8044 if (common->optimized_cbracket[offset] == 0)
8045 {
8046 private_data_ptr = OVECTOR_PRIV(offset);
8047 offset <<= 1;
8048 }
8049 else
8050 {
8051 offset <<= 1;
8052 private_data_ptr = OVECTOR(offset);
8053 }
8054 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8055 matchingpath += IMM2_SIZE;
8056 }
8057 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
8058 {
8059 /* Other brackets simply allocate the next entry. */
8060 private_data_ptr = PRIVATE_DATA(ccbegin);
8061 SLJIT_ASSERT(private_data_ptr != 0);
8062 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8063 if (opcode == OP_ONCE)
8064 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
8065 }
8066
8067 /* Instructions before the first alternative. */
8068 stacksize = 0;
8069 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8070 stacksize++;
8071 if (bra == OP_BRAZERO)
8072 stacksize++;
8073
8074 if (stacksize > 0)
8075 allocate_stack(common, stacksize);
8076
8077 stacksize = 0;
8078 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8079 {
8080 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8081 stacksize++;
8082 }
8083
8084 if (bra == OP_BRAZERO)
8085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8086
8087 if (bra == OP_BRAMINZERO)
8088 {
8089 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
8090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8091 if (ket != OP_KETRMIN)
8092 {
8093 free_stack(common, 1);
8094 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8095 }
8096 else
8097 {
8098 if (opcode == OP_ONCE || opcode >= OP_SBRA)
8099 {
8100 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8102 /* Nothing stored during the first run. */
8103 skip = JUMP(SLJIT_JUMP);
8104 JUMPHERE(jump);
8105 /* Checking zero-length iteration. */
8106 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8107 {
8108 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
8109 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8110 }
8111 else
8112 {
8113 /* Except when the whole stack frame must be saved. */
8114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8115 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
8116 }
8117 JUMPHERE(skip);
8118 }
8119 else
8120 {
8121 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8122 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8123 JUMPHERE(jump);
8124 }
8125 }
8126 }
8127
8128 if (repeat_type != 0)
8129 {
8130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
8131 if (repeat_type == OP_EXACT)
8132 rmax_label = LABEL();
8133 }
8134
8135 if (ket == OP_KETRMIN)
8136 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8137
8138 if (ket == OP_KETRMAX)
8139 {
8140 rmax_label = LABEL();
8141 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
8142 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
8143 }
8144
8145 /* Handling capturing brackets and alternatives. */
8146 if (opcode == OP_ONCE)
8147 {
8148 stacksize = 0;
8149 if (needs_control_head)
8150 {
8151 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8152 stacksize++;
8153 }
8154
8155 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8156 {
8157 /* Neither capturing brackets nor recursions are found in the block. */
8158 if (ket == OP_KETRMIN)
8159 {
8160 stacksize += 2;
8161 if (!needs_control_head)
8162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8163 }
8164 else
8165 {
8166 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8167 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8168 if (ket == OP_KETRMAX || has_alternatives)
8169 stacksize++;
8170 }
8171
8172 if (stacksize > 0)
8173 allocate_stack(common, stacksize);
8174
8175 stacksize = 0;
8176 if (needs_control_head)
8177 {
8178 stacksize++;
8179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8180 }
8181
8182 if (ket == OP_KETRMIN)
8183 {
8184 if (needs_control_head)
8185 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8187 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8188 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8190 }
8191 else if (ket == OP_KETRMAX || has_alternatives)
8192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8193 }
8194 else
8195 {
8196 if (ket != OP_KET || has_alternatives)
8197 stacksize++;
8198
8199 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8200 allocate_stack(common, stacksize);
8201
8202 if (needs_control_head)
8203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8204
8205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8206 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8207
8208 stacksize = needs_control_head ? 1 : 0;
8209 if (ket != OP_KET || has_alternatives)
8210 {
8211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8213 stacksize++;
8214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8215 }
8216 else
8217 {
8218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8220 }
8221 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8222 }
8223 }
8224 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8225 {
8226 /* Saving the previous values. */
8227 if (common->optimized_cbracket[offset >> 1] != 0)
8228 {
8229 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8230 allocate_stack(common, 2);
8231 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8236 }
8237 else
8238 {
8239 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8240 allocate_stack(common, 1);
8241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8243 }
8244 }
8245 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8246 {
8247 /* Saving the previous value. */
8248 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8249 allocate_stack(common, 1);
8250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8252 }
8253 else if (has_alternatives)
8254 {
8255 /* Pushing the starting string pointer. */
8256 allocate_stack(common, 1);
8257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8258 }
8259
8260 /* Generating code for the first alternative. */
8261 if (opcode == OP_COND || opcode == OP_SCOND)
8262 {
8263 if (*matchingpath == OP_CREF)
8264 {
8265 SLJIT_ASSERT(has_alternatives);
8266 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8267 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8268 matchingpath += 1 + IMM2_SIZE;
8269 }
8270 else if (*matchingpath == OP_DNCREF)
8271 {
8272 SLJIT_ASSERT(has_alternatives);
8273
8274 i = GET2(matchingpath, 1 + IMM2_SIZE);
8275 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8276 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8277 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8278 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8279 slot += common->name_entry_size;
8280 i--;
8281 while (i-- > 0)
8282 {
8283 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8284 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8285 slot += common->name_entry_size;
8286 }
8287 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8288 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8289 matchingpath += 1 + 2 * IMM2_SIZE;
8290 }
8291 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8292 {
8293 /* Never has other case. */
8294 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8295 SLJIT_ASSERT(!has_alternatives);
8296
8297 if (*matchingpath == OP_FAIL)
8298 stacksize = 0;
8299 else if (*matchingpath == OP_RREF)
8300 {
8301 stacksize = GET2(matchingpath, 1);
8302 if (common->currententry == NULL)
8303 stacksize = 0;
8304 else if (stacksize == RREF_ANY)
8305 stacksize = 1;
8306 else if (common->currententry->start == 0)
8307 stacksize = stacksize == 0;
8308 else
8309 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8310
8311 if (stacksize != 0)
8312 matchingpath += 1 + IMM2_SIZE;
8313 }
8314 else
8315 {
8316 if (common->currententry == NULL || common->currententry->start == 0)
8317 stacksize = 0;
8318 else
8319 {
8320 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8321 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8322 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8323 while (stacksize > 0)
8324 {
8325 if ((int)GET2(slot, 0) == i)
8326 break;
8327 slot += common->name_entry_size;
8328 stacksize--;
8329 }
8330 }
8331
8332 if (stacksize != 0)
8333 matchingpath += 1 + 2 * IMM2_SIZE;
8334 }
8335
8336 /* The stacksize == 0 is a common "else" case. */
8337 if (stacksize == 0)
8338 {
8339 if (*cc == OP_ALT)
8340 {
8341 matchingpath = cc + 1 + LINK_SIZE;
8342 cc += GET(cc, 1);
8343 }
8344 else
8345 matchingpath = cc;
8346 }
8347 }
8348 else
8349 {
8350 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8351 /* Similar code as PUSH_BACKTRACK macro. */
8352 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8353 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8354 return NULL;
8355 memset(assert, 0, sizeof(assert_backtrack));
8356 assert->common.cc = matchingpath;
8357 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8358 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8359 }
8360 }
8361
8362 compile_matchingpath(common, matchingpath, cc, backtrack);
8363 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8364 return NULL;
8365
8366 if (opcode == OP_ONCE)
8367 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8368
8369 stacksize = 0;
8370 if (repeat_type == OP_MINUPTO)
8371 {
8372 /* We need to preserve the counter. TMP2 will be used below. */
8373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8374 stacksize++;
8375 }
8376 if (ket != OP_KET || bra != OP_BRA)
8377 stacksize++;
8378 if (offset != 0)
8379 {
8380 if (common->capture_last_ptr != 0)
8381 stacksize++;
8382 if (common->optimized_cbracket[offset >> 1] == 0)
8383 stacksize += 2;
8384 }
8385 if (has_alternatives && opcode != OP_ONCE)
8386 stacksize++;
8387
8388 if (stacksize > 0)
8389 allocate_stack(common, stacksize);
8390
8391 stacksize = 0;
8392 if (repeat_type == OP_MINUPTO)
8393 {
8394 /* TMP2 was set above. */
8395 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8396 stacksize++;
8397 }
8398
8399 if (ket != OP_KET || bra != OP_BRA)
8400 {
8401 if (ket != OP_KET)
8402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8403 else
8404 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8405 stacksize++;
8406 }
8407
8408 if (offset != 0)
8409 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8410
8411 if (has_alternatives)
8412 {
8413 if (opcode != OP_ONCE)
8414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8415 if (ket != OP_KETRMAX)
8416 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8417 }
8418
8419 /* Must be after the matchingpath label. */
8420 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8421 {
8422 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8424 }
8425
8426 if (ket == OP_KETRMAX)
8427 {
8428 if (repeat_type != 0)
8429 {
8430 if (has_alternatives)
8431 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8432 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8433 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8434 /* Drop STR_PTR for greedy plus quantifier. */
8435 if (opcode != OP_ONCE)
8436 free_stack(common, 1);
8437 }
8438 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8439 {
8440 if (has_alternatives)
8441 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8442 /* Checking zero-length iteration. */
8443 if (opcode != OP_ONCE)
8444 {
8445 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8446 /* Drop STR_PTR for greedy plus quantifier. */
8447 if (bra != OP_BRAZERO)
8448 free_stack(common, 1);
8449 }
8450 else
8451 /* TMP2 must contain the starting STR_PTR. */
8452 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8453 }
8454 else
8455 JUMPTO(SLJIT_JUMP, rmax_label);
8456 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8457 }
8458
8459 if (repeat_type == OP_EXACT)
8460 {
8461 count_match(common);
8462 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8463 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8464 }
8465 else if (repeat_type == OP_UPTO)
8466 {
8467 /* We need to preserve the counter. */
8468 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8469 allocate_stack(common, 1);
8470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8471 }
8472
8473 if (bra == OP_BRAZERO)
8474 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8475
8476 if (bra == OP_BRAMINZERO)
8477 {
8478 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8479 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8480 if (braminzero != NULL)
8481 {
8482 JUMPHERE(braminzero);
8483 /* We need to release the end pointer to perform the
8484 backtrack for the zero-length iteration. When
8485 framesize is < 0, OP_ONCE will do the release itself. */
8486 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8487 {
8488 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8489 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8490 }
8491 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8492 free_stack(common, 1);
8493 }
8494 /* Continue to the normal backtrack. */
8495 }
8496
8497 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8498 count_match(common);
8499
8500 /* Skip the other alternatives. */
8501 while (*cc == OP_ALT)
8502 cc += GET(cc, 1);
8503 cc += 1 + LINK_SIZE;
8504
8505 if (opcode == OP_ONCE)
8506 {
8507 /* We temporarily encode the needs_control_head in the lowest bit.
8508 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8509 the same value for small signed numbers (including negative numbers). */
8510 BACKTRACK_AS(bracket_backtrack)->u.framesize = ((unsigned int)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8511 }
8512 return cc + repeat_length;
8513 }
8514
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8515 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8516 {
8517 DEFINE_COMPILER;
8518 backtrack_common *backtrack;
8519 pcre_uchar opcode;
8520 int private_data_ptr;
8521 int cbraprivptr = 0;
8522 BOOL needs_control_head;
8523 int framesize;
8524 int stacksize;
8525 int offset = 0;
8526 BOOL zero = FALSE;
8527 pcre_uchar *ccbegin = NULL;
8528 int stack; /* Also contains the offset of control head. */
8529 struct sljit_label *loop = NULL;
8530 struct jump_list *emptymatch = NULL;
8531
8532 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8533 if (*cc == OP_BRAPOSZERO)
8534 {
8535 zero = TRUE;
8536 cc++;
8537 }
8538
8539 opcode = *cc;
8540 private_data_ptr = PRIVATE_DATA(cc);
8541 SLJIT_ASSERT(private_data_ptr != 0);
8542 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8543 switch(opcode)
8544 {
8545 case OP_BRAPOS:
8546 case OP_SBRAPOS:
8547 ccbegin = cc + 1 + LINK_SIZE;
8548 break;
8549
8550 case OP_CBRAPOS:
8551 case OP_SCBRAPOS:
8552 offset = GET2(cc, 1 + LINK_SIZE);
8553 /* This case cannot be optimized in the same was as
8554 normal capturing brackets. */
8555 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8556 cbraprivptr = OVECTOR_PRIV(offset);
8557 offset <<= 1;
8558 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8559 break;
8560
8561 default:
8562 SLJIT_UNREACHABLE();
8563 break;
8564 }
8565
8566 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8567 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8568 if (framesize < 0)
8569 {
8570 if (offset != 0)
8571 {
8572 stacksize = 2;
8573 if (common->capture_last_ptr != 0)
8574 stacksize++;
8575 }
8576 else
8577 stacksize = 1;
8578
8579 if (needs_control_head)
8580 stacksize++;
8581 if (!zero)
8582 stacksize++;
8583
8584 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8585 allocate_stack(common, stacksize);
8586 if (framesize == no_frame)
8587 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8588
8589 stack = 0;
8590 if (offset != 0)
8591 {
8592 stack = 2;
8593 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8594 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8596 if (common->capture_last_ptr != 0)
8597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8599 if (needs_control_head)
8600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8601 if (common->capture_last_ptr != 0)
8602 {
8603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8604 stack = 3;
8605 }
8606 }
8607 else
8608 {
8609 if (needs_control_head)
8610 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8612 stack = 1;
8613 }
8614
8615 if (needs_control_head)
8616 stack++;
8617 if (!zero)
8618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8619 if (needs_control_head)
8620 {
8621 stack--;
8622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8623 }
8624 }
8625 else
8626 {
8627 stacksize = framesize + 1;
8628 if (!zero)
8629 stacksize++;
8630 if (needs_control_head)
8631 stacksize++;
8632 if (offset == 0)
8633 stacksize++;
8634 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8635
8636 allocate_stack(common, stacksize);
8637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8638 if (needs_control_head)
8639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8640 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8641
8642 stack = 0;
8643 if (!zero)
8644 {
8645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8646 stack = 1;
8647 }
8648 if (needs_control_head)
8649 {
8650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8651 stack++;
8652 }
8653 if (offset == 0)
8654 {
8655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8656 stack++;
8657 }
8658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8659 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8660 stack -= 1 + (offset == 0);
8661 }
8662
8663 if (offset != 0)
8664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8665
8666 loop = LABEL();
8667 while (*cc != OP_KETRPOS)
8668 {
8669 backtrack->top = NULL;
8670 backtrack->topbacktracks = NULL;
8671 cc += GET(cc, 1);
8672
8673 compile_matchingpath(common, ccbegin, cc, backtrack);
8674 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8675 return NULL;
8676
8677 if (framesize < 0)
8678 {
8679 if (framesize == no_frame)
8680 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8681
8682 if (offset != 0)
8683 {
8684 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8687 if (common->capture_last_ptr != 0)
8688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8689 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8690 }
8691 else
8692 {
8693 if (opcode == OP_SBRAPOS)
8694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8696 }
8697
8698 /* Even if the match is empty, we need to reset the control head. */
8699 if (needs_control_head)
8700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8701
8702 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8703 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8704
8705 if (!zero)
8706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8707 }
8708 else
8709 {
8710 if (offset != 0)
8711 {
8712 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8713 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8716 if (common->capture_last_ptr != 0)
8717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8719 }
8720 else
8721 {
8722 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8723 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8724 if (opcode == OP_SBRAPOS)
8725 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8726 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8727 }
8728
8729 /* Even if the match is empty, we need to reset the control head. */
8730 if (needs_control_head)
8731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8732
8733 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8734 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8735
8736 if (!zero)
8737 {
8738 if (framesize < 0)
8739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8740 else
8741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8742 }
8743 }
8744
8745 JUMPTO(SLJIT_JUMP, loop);
8746 flush_stubs(common);
8747
8748 compile_backtrackingpath(common, backtrack->top);
8749 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8750 return NULL;
8751 set_jumps(backtrack->topbacktracks, LABEL());
8752
8753 if (framesize < 0)
8754 {
8755 if (offset != 0)
8756 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8757 else
8758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8759 }
8760 else
8761 {
8762 if (offset != 0)
8763 {
8764 /* Last alternative. */
8765 if (*cc == OP_KETRPOS)
8766 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8767 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8768 }
8769 else
8770 {
8771 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8773 }
8774 }
8775
8776 if (*cc == OP_KETRPOS)
8777 break;
8778 ccbegin = cc + 1 + LINK_SIZE;
8779 }
8780
8781 /* We don't have to restore the control head in case of a failed match. */
8782
8783 backtrack->topbacktracks = NULL;
8784 if (!zero)
8785 {
8786 if (framesize < 0)
8787 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8788 else /* TMP2 is set to [private_data_ptr] above. */
8789 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8790 }
8791
8792 /* None of them matched. */
8793 set_jumps(emptymatch, LABEL());
8794 count_match(common);
8795 return cc + 1 + LINK_SIZE;
8796 }
8797
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,sljit_u32 * max,sljit_u32 * exact,pcre_uchar ** end)8798 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8799 {
8800 int class_len;
8801
8802 *opcode = *cc;
8803 *exact = 0;
8804
8805 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8806 {
8807 cc++;
8808 *type = OP_CHAR;
8809 }
8810 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8811 {
8812 cc++;
8813 *type = OP_CHARI;
8814 *opcode -= OP_STARI - OP_STAR;
8815 }
8816 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8817 {
8818 cc++;
8819 *type = OP_NOT;
8820 *opcode -= OP_NOTSTAR - OP_STAR;
8821 }
8822 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8823 {
8824 cc++;
8825 *type = OP_NOTI;
8826 *opcode -= OP_NOTSTARI - OP_STAR;
8827 }
8828 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8829 {
8830 cc++;
8831 *opcode -= OP_TYPESTAR - OP_STAR;
8832 *type = OP_END;
8833 }
8834 else
8835 {
8836 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8837 *type = *opcode;
8838 cc++;
8839 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8840 *opcode = cc[class_len - 1];
8841
8842 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8843 {
8844 *opcode -= OP_CRSTAR - OP_STAR;
8845 *end = cc + class_len;
8846
8847 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8848 {
8849 *exact = 1;
8850 *opcode -= OP_PLUS - OP_STAR;
8851 }
8852 }
8853 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8854 {
8855 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8856 *end = cc + class_len;
8857
8858 if (*opcode == OP_POSPLUS)
8859 {
8860 *exact = 1;
8861 *opcode = OP_POSSTAR;
8862 }
8863 }
8864 else
8865 {
8866 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8867 *max = GET2(cc, (class_len + IMM2_SIZE));
8868 *exact = GET2(cc, class_len);
8869
8870 if (*max == 0)
8871 {
8872 if (*opcode == OP_CRPOSRANGE)
8873 *opcode = OP_POSSTAR;
8874 else
8875 *opcode -= OP_CRRANGE - OP_STAR;
8876 }
8877 else
8878 {
8879 *max -= *exact;
8880 if (*max == 0)
8881 *opcode = OP_EXACT;
8882 else if (*max == 1)
8883 {
8884 if (*opcode == OP_CRPOSRANGE)
8885 *opcode = OP_POSQUERY;
8886 else
8887 *opcode -= OP_CRRANGE - OP_QUERY;
8888 }
8889 else
8890 {
8891 if (*opcode == OP_CRPOSRANGE)
8892 *opcode = OP_POSUPTO;
8893 else
8894 *opcode -= OP_CRRANGE - OP_UPTO;
8895 }
8896 }
8897 *end = cc + class_len + 2 * IMM2_SIZE;
8898 }
8899 return cc;
8900 }
8901
8902 switch(*opcode)
8903 {
8904 case OP_EXACT:
8905 *exact = GET2(cc, 0);
8906 cc += IMM2_SIZE;
8907 break;
8908
8909 case OP_PLUS:
8910 case OP_MINPLUS:
8911 *exact = 1;
8912 *opcode -= OP_PLUS - OP_STAR;
8913 break;
8914
8915 case OP_POSPLUS:
8916 *exact = 1;
8917 *opcode = OP_POSSTAR;
8918 break;
8919
8920 case OP_UPTO:
8921 case OP_MINUPTO:
8922 case OP_POSUPTO:
8923 *max = GET2(cc, 0);
8924 cc += IMM2_SIZE;
8925 break;
8926 }
8927
8928 if (*type == OP_END)
8929 {
8930 *type = *cc;
8931 *end = next_opcode(common, cc);
8932 cc++;
8933 return cc;
8934 }
8935
8936 *end = cc + 1;
8937 #ifdef SUPPORT_UTF
8938 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8939 #endif
8940 return cc;
8941 }
8942
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8943 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8944 {
8945 DEFINE_COMPILER;
8946 backtrack_common *backtrack;
8947 pcre_uchar opcode;
8948 pcre_uchar type;
8949 sljit_u32 max = 0, exact;
8950 BOOL fast_fail;
8951 sljit_s32 fast_str_ptr;
8952 BOOL charpos_enabled;
8953 pcre_uchar charpos_char;
8954 unsigned int charpos_othercasebit;
8955 pcre_uchar *end;
8956 jump_list *no_match = NULL;
8957 jump_list *no_char1_match = NULL;
8958 struct sljit_jump *jump = NULL;
8959 struct sljit_label *label;
8960 int private_data_ptr = PRIVATE_DATA(cc);
8961 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8962 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8963 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8964 int tmp_base, tmp_offset;
8965
8966 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8967
8968 fast_str_ptr = PRIVATE_DATA(cc + 1);
8969 fast_fail = TRUE;
8970
8971 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8972
8973 if (cc == common->fast_forward_bc_ptr)
8974 fast_fail = FALSE;
8975 else if (common->fast_fail_start_ptr == 0)
8976 fast_str_ptr = 0;
8977
8978 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8979 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8980
8981 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8982
8983 if (type != OP_EXTUNI)
8984 {
8985 tmp_base = TMP3;
8986 tmp_offset = 0;
8987 }
8988 else
8989 {
8990 tmp_base = SLJIT_MEM1(SLJIT_SP);
8991 tmp_offset = POSSESSIVE0;
8992 }
8993
8994 if (fast_fail && fast_str_ptr != 0)
8995 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8996
8997 /* Handle fixed part first. */
8998 if (exact > 1)
8999 {
9000 SLJIT_ASSERT(fast_str_ptr == 0);
9001 if (common->mode == JIT_COMPILE
9002 #ifdef SUPPORT_UTF
9003 && !common->utf
9004 #endif
9005 && type != OP_ANYNL && type != OP_EXTUNI)
9006 {
9007 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9008 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9009 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9010 label = LABEL();
9011 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9012 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9013 JUMPTO(SLJIT_NOT_ZERO, label);
9014 }
9015 else
9016 {
9017 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9018 label = LABEL();
9019 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9020 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9021 JUMPTO(SLJIT_NOT_ZERO, label);
9022 }
9023 }
9024 else if (exact == 1)
9025 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9026
9027 switch(opcode)
9028 {
9029 case OP_STAR:
9030 case OP_UPTO:
9031 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9032
9033 if (type == OP_ANYNL || type == OP_EXTUNI)
9034 {
9035 SLJIT_ASSERT(private_data_ptr == 0);
9036 SLJIT_ASSERT(fast_str_ptr == 0);
9037
9038 allocate_stack(common, 2);
9039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9041
9042 if (opcode == OP_UPTO)
9043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9044
9045 label = LABEL();
9046 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9047 if (opcode == OP_UPTO)
9048 {
9049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9050 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9051 jump = JUMP(SLJIT_ZERO);
9052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9053 }
9054
9055 /* We cannot use TMP3 because of this allocate_stack. */
9056 allocate_stack(common, 1);
9057 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9058 JUMPTO(SLJIT_JUMP, label);
9059 if (jump != NULL)
9060 JUMPHERE(jump);
9061 }
9062 else
9063 {
9064 charpos_enabled = FALSE;
9065 charpos_char = 0;
9066 charpos_othercasebit = 0;
9067
9068 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
9069 {
9070 charpos_enabled = TRUE;
9071 #ifdef SUPPORT_UTF
9072 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
9073 #endif
9074 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
9075 {
9076 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
9077 if (charpos_othercasebit == 0)
9078 charpos_enabled = FALSE;
9079 }
9080
9081 if (charpos_enabled)
9082 {
9083 charpos_char = end[1];
9084 /* Consumpe the OP_CHAR opcode. */
9085 end += 2;
9086 #if defined COMPILE_PCRE8
9087 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
9088 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9089 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
9090 if ((charpos_othercasebit & 0x100) != 0)
9091 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
9092 #endif
9093 if (charpos_othercasebit != 0)
9094 charpos_char |= charpos_othercasebit;
9095
9096 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
9097 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
9098 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
9099 }
9100 }
9101
9102 if (charpos_enabled)
9103 {
9104 if (opcode == OP_UPTO)
9105 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
9106
9107 /* Search the first instance of charpos_char. */
9108 jump = JUMP(SLJIT_JUMP);
9109 label = LABEL();
9110 if (opcode == OP_UPTO)
9111 {
9112 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9113 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
9114 }
9115 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9116 if (fast_str_ptr != 0)
9117 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9118 JUMPHERE(jump);
9119
9120 detect_partial_match(common, &backtrack->topbacktracks);
9121 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9122 if (charpos_othercasebit != 0)
9123 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9124 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9125
9126 if (private_data_ptr == 0)
9127 allocate_stack(common, 2);
9128 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9129 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9130 if (opcode == OP_UPTO)
9131 {
9132 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9133 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
9134 }
9135
9136 /* Search the last instance of charpos_char. */
9137 label = LABEL();
9138 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
9139 if (fast_str_ptr != 0)
9140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9141 detect_partial_match(common, &no_match);
9142 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9143 if (charpos_othercasebit != 0)
9144 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9145 if (opcode == OP_STAR)
9146 {
9147 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9148 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9149 }
9150 else
9151 {
9152 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9153 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9154 JUMPHERE(jump);
9155 }
9156
9157 if (opcode == OP_UPTO)
9158 {
9159 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9160 JUMPTO(SLJIT_NOT_ZERO, label);
9161 }
9162 else
9163 JUMPTO(SLJIT_JUMP, label);
9164
9165 set_jumps(no_match, LABEL());
9166 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9167 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9168 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9169 }
9170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9171 else if (common->utf)
9172 {
9173 if (private_data_ptr == 0)
9174 allocate_stack(common, 2);
9175
9176 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9177 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9178
9179 if (opcode == OP_UPTO)
9180 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9181
9182 label = LABEL();
9183 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9184 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9185
9186 if (opcode == OP_UPTO)
9187 {
9188 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9189 JUMPTO(SLJIT_NOT_ZERO, label);
9190 }
9191 else
9192 JUMPTO(SLJIT_JUMP, label);
9193
9194 set_jumps(no_match, LABEL());
9195 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9196 if (fast_str_ptr != 0)
9197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9198 }
9199 #endif
9200 else
9201 {
9202 if (private_data_ptr == 0)
9203 allocate_stack(common, 2);
9204
9205 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9206 if (opcode == OP_UPTO)
9207 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9208
9209 label = LABEL();
9210 detect_partial_match(common, &no_match);
9211 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9212 if (opcode == OP_UPTO)
9213 {
9214 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9215 JUMPTO(SLJIT_NOT_ZERO, label);
9216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9217 }
9218 else
9219 JUMPTO(SLJIT_JUMP, label);
9220
9221 set_jumps(no_char1_match, LABEL());
9222 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9223 set_jumps(no_match, LABEL());
9224 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9225 if (fast_str_ptr != 0)
9226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9227 }
9228 }
9229 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9230 break;
9231
9232 case OP_MINSTAR:
9233 if (private_data_ptr == 0)
9234 allocate_stack(common, 1);
9235 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9236 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9237 if (fast_str_ptr != 0)
9238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9239 break;
9240
9241 case OP_MINUPTO:
9242 SLJIT_ASSERT(fast_str_ptr == 0);
9243 if (private_data_ptr == 0)
9244 allocate_stack(common, 2);
9245 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9246 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9247 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9248 break;
9249
9250 case OP_QUERY:
9251 case OP_MINQUERY:
9252 SLJIT_ASSERT(fast_str_ptr == 0);
9253 if (private_data_ptr == 0)
9254 allocate_stack(common, 1);
9255 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9256 if (opcode == OP_QUERY)
9257 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9258 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9259 break;
9260
9261 case OP_EXACT:
9262 break;
9263
9264 case OP_POSSTAR:
9265 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9266 if (common->utf)
9267 {
9268 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9269 label = LABEL();
9270 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9271 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9272 JUMPTO(SLJIT_JUMP, label);
9273 set_jumps(no_match, LABEL());
9274 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9275 if (fast_str_ptr != 0)
9276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9277 break;
9278 }
9279 #endif
9280 label = LABEL();
9281 detect_partial_match(common, &no_match);
9282 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9283 JUMPTO(SLJIT_JUMP, label);
9284 set_jumps(no_char1_match, LABEL());
9285 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9286 set_jumps(no_match, LABEL());
9287 if (fast_str_ptr != 0)
9288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9289 break;
9290
9291 case OP_POSUPTO:
9292 SLJIT_ASSERT(fast_str_ptr == 0);
9293 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9294 if (common->utf)
9295 {
9296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9297 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9298 label = LABEL();
9299 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9301 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9302 JUMPTO(SLJIT_NOT_ZERO, label);
9303 set_jumps(no_match, LABEL());
9304 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9305 break;
9306 }
9307 #endif
9308 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9309 label = LABEL();
9310 detect_partial_match(common, &no_match);
9311 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9312 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9313 JUMPTO(SLJIT_NOT_ZERO, label);
9314 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9315 set_jumps(no_char1_match, LABEL());
9316 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9317 set_jumps(no_match, LABEL());
9318 break;
9319
9320 case OP_POSQUERY:
9321 SLJIT_ASSERT(fast_str_ptr == 0);
9322 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9323 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9324 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9325 set_jumps(no_match, LABEL());
9326 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9327 break;
9328
9329 default:
9330 SLJIT_UNREACHABLE();
9331 break;
9332 }
9333
9334 count_match(common);
9335 return end;
9336 }
9337
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9338 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9339 {
9340 DEFINE_COMPILER;
9341 backtrack_common *backtrack;
9342
9343 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9344
9345 if (*cc == OP_FAIL)
9346 {
9347 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9348 return cc + 1;
9349 }
9350
9351 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9352 {
9353 /* No need to check notempty conditions. */
9354 if (common->accept_label == NULL)
9355 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9356 else
9357 JUMPTO(SLJIT_JUMP, common->accept_label);
9358 return cc + 1;
9359 }
9360
9361 if (common->accept_label == NULL)
9362 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9363 else
9364 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9365 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9366 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9367 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9368 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9369 if (common->accept_label == NULL)
9370 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9371 else
9372 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9374 if (common->accept_label == NULL)
9375 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9376 else
9377 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9378 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9379 return cc + 1;
9380 }
9381
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)9382 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9383 {
9384 DEFINE_COMPILER;
9385 int offset = GET2(cc, 1);
9386 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9387
9388 /* Data will be discarded anyway... */
9389 if (common->currententry != NULL)
9390 return cc + 1 + IMM2_SIZE;
9391
9392 if (!optimized_cbracket)
9393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9394 offset <<= 1;
9395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9396 if (!optimized_cbracket)
9397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9398 return cc + 1 + IMM2_SIZE;
9399 }
9400
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)9401 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9402 {
9403 DEFINE_COMPILER;
9404 backtrack_common *backtrack;
9405 pcre_uchar opcode = *cc;
9406 pcre_uchar *ccend = cc + 1;
9407
9408 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9409 ccend += 2 + cc[1];
9410
9411 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9412
9413 if (opcode == OP_SKIP)
9414 {
9415 allocate_stack(common, 1);
9416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9417 return ccend;
9418 }
9419
9420 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9421 {
9422 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9425 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9426 }
9427
9428 return ccend;
9429 }
9430
9431 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9432
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9433 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9434 {
9435 DEFINE_COMPILER;
9436 backtrack_common *backtrack;
9437 BOOL needs_control_head;
9438 int size;
9439
9440 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9441 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9442 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9443 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9444 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9445
9446 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9447 size = 3 + (size < 0 ? 0 : size);
9448
9449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9450 allocate_stack(common, size);
9451 if (size > 3)
9452 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9453 else
9454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9458
9459 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9460 if (size >= 0)
9461 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9462 }
9463
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)9464 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9465 {
9466 DEFINE_COMPILER;
9467 backtrack_common *backtrack;
9468 BOOL has_then_trap = FALSE;
9469 then_trap_backtrack *save_then_trap = NULL;
9470
9471 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9472
9473 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9474 {
9475 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9476 has_then_trap = TRUE;
9477 save_then_trap = common->then_trap;
9478 /* Tail item on backtrack. */
9479 compile_then_trap_matchingpath(common, cc, ccend, parent);
9480 }
9481
9482 while (cc < ccend)
9483 {
9484 switch(*cc)
9485 {
9486 case OP_SOD:
9487 case OP_SOM:
9488 case OP_NOT_WORD_BOUNDARY:
9489 case OP_WORD_BOUNDARY:
9490 case OP_EODN:
9491 case OP_EOD:
9492 case OP_DOLL:
9493 case OP_DOLLM:
9494 case OP_CIRC:
9495 case OP_CIRCM:
9496 case OP_REVERSE:
9497 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9498 break;
9499
9500 case OP_NOT_DIGIT:
9501 case OP_DIGIT:
9502 case OP_NOT_WHITESPACE:
9503 case OP_WHITESPACE:
9504 case OP_NOT_WORDCHAR:
9505 case OP_WORDCHAR:
9506 case OP_ANY:
9507 case OP_ALLANY:
9508 case OP_ANYBYTE:
9509 case OP_NOTPROP:
9510 case OP_PROP:
9511 case OP_ANYNL:
9512 case OP_NOT_HSPACE:
9513 case OP_HSPACE:
9514 case OP_NOT_VSPACE:
9515 case OP_VSPACE:
9516 case OP_EXTUNI:
9517 case OP_NOT:
9518 case OP_NOTI:
9519 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9520 break;
9521
9522 case OP_SET_SOM:
9523 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9524 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9525 allocate_stack(common, 1);
9526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9527 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9528 cc++;
9529 break;
9530
9531 case OP_CHAR:
9532 case OP_CHARI:
9533 if (common->mode == JIT_COMPILE)
9534 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9535 else
9536 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9537 break;
9538
9539 case OP_STAR:
9540 case OP_MINSTAR:
9541 case OP_PLUS:
9542 case OP_MINPLUS:
9543 case OP_QUERY:
9544 case OP_MINQUERY:
9545 case OP_UPTO:
9546 case OP_MINUPTO:
9547 case OP_EXACT:
9548 case OP_POSSTAR:
9549 case OP_POSPLUS:
9550 case OP_POSQUERY:
9551 case OP_POSUPTO:
9552 case OP_STARI:
9553 case OP_MINSTARI:
9554 case OP_PLUSI:
9555 case OP_MINPLUSI:
9556 case OP_QUERYI:
9557 case OP_MINQUERYI:
9558 case OP_UPTOI:
9559 case OP_MINUPTOI:
9560 case OP_EXACTI:
9561 case OP_POSSTARI:
9562 case OP_POSPLUSI:
9563 case OP_POSQUERYI:
9564 case OP_POSUPTOI:
9565 case OP_NOTSTAR:
9566 case OP_NOTMINSTAR:
9567 case OP_NOTPLUS:
9568 case OP_NOTMINPLUS:
9569 case OP_NOTQUERY:
9570 case OP_NOTMINQUERY:
9571 case OP_NOTUPTO:
9572 case OP_NOTMINUPTO:
9573 case OP_NOTEXACT:
9574 case OP_NOTPOSSTAR:
9575 case OP_NOTPOSPLUS:
9576 case OP_NOTPOSQUERY:
9577 case OP_NOTPOSUPTO:
9578 case OP_NOTSTARI:
9579 case OP_NOTMINSTARI:
9580 case OP_NOTPLUSI:
9581 case OP_NOTMINPLUSI:
9582 case OP_NOTQUERYI:
9583 case OP_NOTMINQUERYI:
9584 case OP_NOTUPTOI:
9585 case OP_NOTMINUPTOI:
9586 case OP_NOTEXACTI:
9587 case OP_NOTPOSSTARI:
9588 case OP_NOTPOSPLUSI:
9589 case OP_NOTPOSQUERYI:
9590 case OP_NOTPOSUPTOI:
9591 case OP_TYPESTAR:
9592 case OP_TYPEMINSTAR:
9593 case OP_TYPEPLUS:
9594 case OP_TYPEMINPLUS:
9595 case OP_TYPEQUERY:
9596 case OP_TYPEMINQUERY:
9597 case OP_TYPEUPTO:
9598 case OP_TYPEMINUPTO:
9599 case OP_TYPEEXACT:
9600 case OP_TYPEPOSSTAR:
9601 case OP_TYPEPOSPLUS:
9602 case OP_TYPEPOSQUERY:
9603 case OP_TYPEPOSUPTO:
9604 cc = compile_iterator_matchingpath(common, cc, parent);
9605 break;
9606
9607 case OP_CLASS:
9608 case OP_NCLASS:
9609 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9610 cc = compile_iterator_matchingpath(common, cc, parent);
9611 else
9612 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9613 break;
9614
9615 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9616 case OP_XCLASS:
9617 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9618 cc = compile_iterator_matchingpath(common, cc, parent);
9619 else
9620 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9621 break;
9622 #endif
9623
9624 case OP_REF:
9625 case OP_REFI:
9626 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9627 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9628 else
9629 {
9630 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9631 cc += 1 + IMM2_SIZE;
9632 }
9633 break;
9634
9635 case OP_DNREF:
9636 case OP_DNREFI:
9637 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9638 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9639 else
9640 {
9641 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9642 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9643 cc += 1 + 2 * IMM2_SIZE;
9644 }
9645 break;
9646
9647 case OP_RECURSE:
9648 cc = compile_recurse_matchingpath(common, cc, parent);
9649 break;
9650
9651 case OP_CALLOUT:
9652 cc = compile_callout_matchingpath(common, cc, parent);
9653 break;
9654
9655 case OP_ASSERT:
9656 case OP_ASSERT_NOT:
9657 case OP_ASSERTBACK:
9658 case OP_ASSERTBACK_NOT:
9659 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9660 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9661 break;
9662
9663 case OP_BRAMINZERO:
9664 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9665 cc = bracketend(cc + 1);
9666 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9667 {
9668 allocate_stack(common, 1);
9669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9670 }
9671 else
9672 {
9673 allocate_stack(common, 2);
9674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9676 }
9677 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9678 count_match(common);
9679 break;
9680
9681 case OP_ONCE:
9682 case OP_ONCE_NC:
9683 case OP_BRA:
9684 case OP_CBRA:
9685 case OP_COND:
9686 case OP_SBRA:
9687 case OP_SCBRA:
9688 case OP_SCOND:
9689 cc = compile_bracket_matchingpath(common, cc, parent);
9690 break;
9691
9692 case OP_BRAZERO:
9693 if (cc[1] > OP_ASSERTBACK_NOT)
9694 cc = compile_bracket_matchingpath(common, cc, parent);
9695 else
9696 {
9697 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9698 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9699 }
9700 break;
9701
9702 case OP_BRAPOS:
9703 case OP_CBRAPOS:
9704 case OP_SBRAPOS:
9705 case OP_SCBRAPOS:
9706 case OP_BRAPOSZERO:
9707 cc = compile_bracketpos_matchingpath(common, cc, parent);
9708 break;
9709
9710 case OP_MARK:
9711 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9712 SLJIT_ASSERT(common->mark_ptr != 0);
9713 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9714 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9715 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9717 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9719 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9720 if (common->has_skip_arg)
9721 {
9722 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9728 }
9729 cc += 1 + 2 + cc[1];
9730 break;
9731
9732 case OP_PRUNE:
9733 case OP_PRUNE_ARG:
9734 case OP_SKIP:
9735 case OP_SKIP_ARG:
9736 case OP_THEN:
9737 case OP_THEN_ARG:
9738 case OP_COMMIT:
9739 cc = compile_control_verb_matchingpath(common, cc, parent);
9740 break;
9741
9742 case OP_FAIL:
9743 case OP_ACCEPT:
9744 case OP_ASSERT_ACCEPT:
9745 cc = compile_fail_accept_matchingpath(common, cc, parent);
9746 break;
9747
9748 case OP_CLOSE:
9749 cc = compile_close_matchingpath(common, cc);
9750 break;
9751
9752 case OP_SKIPZERO:
9753 cc = bracketend(cc + 1);
9754 break;
9755
9756 default:
9757 SLJIT_UNREACHABLE();
9758 return;
9759 }
9760 if (cc == NULL)
9761 return;
9762 }
9763
9764 if (has_then_trap)
9765 {
9766 /* Head item on backtrack. */
9767 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9768 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9769 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9770 common->then_trap = save_then_trap;
9771 }
9772 SLJIT_ASSERT(cc == ccend);
9773 }
9774
9775 #undef PUSH_BACKTRACK
9776 #undef PUSH_BACKTRACK_NOVALUE
9777 #undef BACKTRACK_AS
9778
9779 #define COMPILE_BACKTRACKINGPATH(current) \
9780 do \
9781 { \
9782 compile_backtrackingpath(common, (current)); \
9783 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9784 return; \
9785 } \
9786 while (0)
9787
9788 #define CURRENT_AS(type) ((type *)current)
9789
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9790 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9791 {
9792 DEFINE_COMPILER;
9793 pcre_uchar *cc = current->cc;
9794 pcre_uchar opcode;
9795 pcre_uchar type;
9796 sljit_u32 max = 0, exact;
9797 struct sljit_label *label = NULL;
9798 struct sljit_jump *jump = NULL;
9799 jump_list *jumplist = NULL;
9800 pcre_uchar *end;
9801 int private_data_ptr = PRIVATE_DATA(cc);
9802 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9803 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9804 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9805
9806 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9807
9808 switch(opcode)
9809 {
9810 case OP_STAR:
9811 case OP_UPTO:
9812 if (type == OP_ANYNL || type == OP_EXTUNI)
9813 {
9814 SLJIT_ASSERT(private_data_ptr == 0);
9815 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9816 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9817 free_stack(common, 1);
9818 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9819 }
9820 else
9821 {
9822 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9823 {
9824 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9825 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9826 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9827
9828 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9829 label = LABEL();
9830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9831 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9832 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9833 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9834 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9835 skip_char_back(common);
9836 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9837 }
9838 else
9839 {
9840 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9841 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9842 skip_char_back(common);
9843 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9844 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9845 }
9846 JUMPHERE(jump);
9847 if (private_data_ptr == 0)
9848 free_stack(common, 2);
9849 }
9850 break;
9851
9852 case OP_MINSTAR:
9853 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9854 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9855 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9856 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9857 set_jumps(jumplist, LABEL());
9858 if (private_data_ptr == 0)
9859 free_stack(common, 1);
9860 break;
9861
9862 case OP_MINUPTO:
9863 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9864 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9865 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9866 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9867
9868 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9869 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9870 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9871 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9872
9873 set_jumps(jumplist, LABEL());
9874 if (private_data_ptr == 0)
9875 free_stack(common, 2);
9876 break;
9877
9878 case OP_QUERY:
9879 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9880 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9881 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9882 jump = JUMP(SLJIT_JUMP);
9883 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9884 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9885 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9886 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9887 JUMPHERE(jump);
9888 if (private_data_ptr == 0)
9889 free_stack(common, 1);
9890 break;
9891
9892 case OP_MINQUERY:
9893 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9894 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9895 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9896 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9897 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9898 set_jumps(jumplist, LABEL());
9899 JUMPHERE(jump);
9900 if (private_data_ptr == 0)
9901 free_stack(common, 1);
9902 break;
9903
9904 case OP_EXACT:
9905 case OP_POSSTAR:
9906 case OP_POSQUERY:
9907 case OP_POSUPTO:
9908 break;
9909
9910 default:
9911 SLJIT_UNREACHABLE();
9912 break;
9913 }
9914
9915 set_jumps(current->topbacktracks, LABEL());
9916 }
9917
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)9918 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9919 {
9920 DEFINE_COMPILER;
9921 pcre_uchar *cc = current->cc;
9922 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9923 pcre_uchar type;
9924
9925 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9926
9927 if ((type & 0x1) == 0)
9928 {
9929 /* Maximize case. */
9930 set_jumps(current->topbacktracks, LABEL());
9931 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9932 free_stack(common, 1);
9933 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9934 return;
9935 }
9936
9937 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9938 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9939 set_jumps(current->topbacktracks, LABEL());
9940 free_stack(common, ref ? 2 : 3);
9941 }
9942
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)9943 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9944 {
9945 DEFINE_COMPILER;
9946
9947 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9948 compile_backtrackingpath(common, current->top);
9949 set_jumps(current->topbacktracks, LABEL());
9950 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9951 return;
9952
9953 if (common->has_set_som && common->mark_ptr != 0)
9954 {
9955 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9957 free_stack(common, 2);
9958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9960 }
9961 else if (common->has_set_som || common->mark_ptr != 0)
9962 {
9963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9964 free_stack(common, 1);
9965 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9966 }
9967 }
9968
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)9969 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9970 {
9971 DEFINE_COMPILER;
9972 pcre_uchar *cc = current->cc;
9973 pcre_uchar bra = OP_BRA;
9974 struct sljit_jump *brajump = NULL;
9975
9976 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9977 if (*cc == OP_BRAZERO)
9978 {
9979 bra = *cc;
9980 cc++;
9981 }
9982
9983 if (bra == OP_BRAZERO)
9984 {
9985 SLJIT_ASSERT(current->topbacktracks == NULL);
9986 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9987 }
9988
9989 if (CURRENT_AS(assert_backtrack)->framesize < 0)
9990 {
9991 set_jumps(current->topbacktracks, LABEL());
9992
9993 if (bra == OP_BRAZERO)
9994 {
9995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9996 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9997 free_stack(common, 1);
9998 }
9999 return;
10000 }
10001
10002 if (bra == OP_BRAZERO)
10003 {
10004 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10005 {
10006 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10007 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10008 free_stack(common, 1);
10009 return;
10010 }
10011 free_stack(common, 1);
10012 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10013 }
10014
10015 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10016 {
10017 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10018 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
10020
10021 set_jumps(current->topbacktracks, LABEL());
10022 }
10023 else
10024 set_jumps(current->topbacktracks, LABEL());
10025
10026 if (bra == OP_BRAZERO)
10027 {
10028 /* We know there is enough place on the stack. */
10029 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10031 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10032 JUMPHERE(brajump);
10033 }
10034 }
10035
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)10036 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10037 {
10038 DEFINE_COMPILER;
10039 int opcode, stacksize, alt_count, alt_max;
10040 int offset = 0;
10041 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10042 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10043 pcre_uchar *cc = current->cc;
10044 pcre_uchar *ccbegin;
10045 pcre_uchar *ccprev;
10046 pcre_uchar bra = OP_BRA;
10047 pcre_uchar ket;
10048 assert_backtrack *assert;
10049 sljit_uw *next_update_addr = NULL;
10050 BOOL has_alternatives;
10051 BOOL needs_control_head = FALSE;
10052 struct sljit_jump *brazero = NULL;
10053 struct sljit_jump *alt1 = NULL;
10054 struct sljit_jump *alt2 = NULL;
10055 struct sljit_jump *once = NULL;
10056 struct sljit_jump *cond = NULL;
10057 struct sljit_label *rmin_label = NULL;
10058 struct sljit_label *exact_label = NULL;
10059
10060 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10061 {
10062 bra = *cc;
10063 cc++;
10064 }
10065
10066 opcode = *cc;
10067 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
10068 ket = *ccbegin;
10069 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
10070 {
10071 repeat_ptr = PRIVATE_DATA(ccbegin);
10072 repeat_type = PRIVATE_DATA(ccbegin + 2);
10073 repeat_count = PRIVATE_DATA(ccbegin + 3);
10074 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
10075 if (repeat_type == OP_UPTO)
10076 ket = OP_KETRMAX;
10077 if (repeat_type == OP_MINUPTO)
10078 ket = OP_KETRMIN;
10079 }
10080 ccbegin = cc;
10081 cc += GET(cc, 1);
10082 has_alternatives = *cc == OP_ALT;
10083 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10084 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
10085 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10086 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
10087 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10088 opcode = OP_SCOND;
10089 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
10090 opcode = OP_ONCE;
10091
10092 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
10093
10094 /* Decoding the needs_control_head in framesize. */
10095 if (opcode == OP_ONCE)
10096 {
10097 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
10098 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
10099 }
10100
10101 if (ket != OP_KET && repeat_type != 0)
10102 {
10103 /* TMP1 is used in OP_KETRMIN below. */
10104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10105 free_stack(common, 1);
10106 if (repeat_type == OP_UPTO)
10107 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
10108 else
10109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10110 }
10111
10112 if (ket == OP_KETRMAX)
10113 {
10114 if (bra == OP_BRAZERO)
10115 {
10116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10117 free_stack(common, 1);
10118 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10119 }
10120 }
10121 else if (ket == OP_KETRMIN)
10122 {
10123 if (bra != OP_BRAMINZERO)
10124 {
10125 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10126 if (repeat_type != 0)
10127 {
10128 /* TMP1 was set a few lines above. */
10129 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10130 /* Drop STR_PTR for non-greedy plus quantifier. */
10131 if (opcode != OP_ONCE)
10132 free_stack(common, 1);
10133 }
10134 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
10135 {
10136 /* Checking zero-length iteration. */
10137 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
10138 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10139 else
10140 {
10141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10142 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10143 }
10144 /* Drop STR_PTR for non-greedy plus quantifier. */
10145 if (opcode != OP_ONCE)
10146 free_stack(common, 1);
10147 }
10148 else
10149 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10150 }
10151 rmin_label = LABEL();
10152 if (repeat_type != 0)
10153 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10154 }
10155 else if (bra == OP_BRAZERO)
10156 {
10157 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10158 free_stack(common, 1);
10159 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10160 }
10161 else if (repeat_type == OP_EXACT)
10162 {
10163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10164 exact_label = LABEL();
10165 }
10166
10167 if (offset != 0)
10168 {
10169 if (common->capture_last_ptr != 0)
10170 {
10171 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10173 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10176 free_stack(common, 3);
10177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10179 }
10180 else if (common->optimized_cbracket[offset >> 1] == 0)
10181 {
10182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10183 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10184 free_stack(common, 2);
10185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10187 }
10188 }
10189
10190 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10191 {
10192 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10193 {
10194 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10195 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10196 }
10197 once = JUMP(SLJIT_JUMP);
10198 }
10199 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10200 {
10201 if (has_alternatives)
10202 {
10203 /* Always exactly one alternative. */
10204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10205 free_stack(common, 1);
10206
10207 alt_max = 2;
10208 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10209 }
10210 }
10211 else if (has_alternatives)
10212 {
10213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10214 free_stack(common, 1);
10215
10216 if (alt_max > 4)
10217 {
10218 /* Table jump if alt_max is greater than 4. */
10219 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10220 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10221 return;
10222 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10223 add_label_addr(common, next_update_addr++);
10224 }
10225 else
10226 {
10227 if (alt_max == 4)
10228 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10229 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10230 }
10231 }
10232
10233 COMPILE_BACKTRACKINGPATH(current->top);
10234 if (current->topbacktracks)
10235 set_jumps(current->topbacktracks, LABEL());
10236
10237 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10238 {
10239 /* Conditional block always has at most one alternative. */
10240 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10241 {
10242 SLJIT_ASSERT(has_alternatives);
10243 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10244 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10245 {
10246 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10247 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10249 }
10250 cond = JUMP(SLJIT_JUMP);
10251 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10252 }
10253 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10254 {
10255 SLJIT_ASSERT(has_alternatives);
10256 cond = JUMP(SLJIT_JUMP);
10257 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10258 }
10259 else
10260 SLJIT_ASSERT(!has_alternatives);
10261 }
10262
10263 if (has_alternatives)
10264 {
10265 alt_count = sizeof(sljit_uw);
10266 do
10267 {
10268 current->top = NULL;
10269 current->topbacktracks = NULL;
10270 current->nextbacktracks = NULL;
10271 /* Conditional blocks always have an additional alternative, even if it is empty. */
10272 if (*cc == OP_ALT)
10273 {
10274 ccprev = cc + 1 + LINK_SIZE;
10275 cc += GET(cc, 1);
10276 if (opcode != OP_COND && opcode != OP_SCOND)
10277 {
10278 if (opcode != OP_ONCE)
10279 {
10280 if (private_data_ptr != 0)
10281 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10282 else
10283 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10284 }
10285 else
10286 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10287 }
10288 compile_matchingpath(common, ccprev, cc, current);
10289 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10290 return;
10291 }
10292
10293 /* Instructions after the current alternative is successfully matched. */
10294 /* There is a similar code in compile_bracket_matchingpath. */
10295 if (opcode == OP_ONCE)
10296 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10297
10298 stacksize = 0;
10299 if (repeat_type == OP_MINUPTO)
10300 {
10301 /* We need to preserve the counter. TMP2 will be used below. */
10302 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10303 stacksize++;
10304 }
10305 if (ket != OP_KET || bra != OP_BRA)
10306 stacksize++;
10307 if (offset != 0)
10308 {
10309 if (common->capture_last_ptr != 0)
10310 stacksize++;
10311 if (common->optimized_cbracket[offset >> 1] == 0)
10312 stacksize += 2;
10313 }
10314 if (opcode != OP_ONCE)
10315 stacksize++;
10316
10317 if (stacksize > 0)
10318 allocate_stack(common, stacksize);
10319
10320 stacksize = 0;
10321 if (repeat_type == OP_MINUPTO)
10322 {
10323 /* TMP2 was set above. */
10324 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10325 stacksize++;
10326 }
10327
10328 if (ket != OP_KET || bra != OP_BRA)
10329 {
10330 if (ket != OP_KET)
10331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10332 else
10333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10334 stacksize++;
10335 }
10336
10337 if (offset != 0)
10338 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10339
10340 if (opcode != OP_ONCE)
10341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10342
10343 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10344 {
10345 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10346 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10348 }
10349
10350 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10351
10352 if (opcode != OP_ONCE)
10353 {
10354 if (alt_max > 4)
10355 add_label_addr(common, next_update_addr++);
10356 else
10357 {
10358 if (alt_count != 2 * sizeof(sljit_uw))
10359 {
10360 JUMPHERE(alt1);
10361 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10362 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10363 }
10364 else
10365 {
10366 JUMPHERE(alt2);
10367 if (alt_max == 4)
10368 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10369 }
10370 }
10371 alt_count += sizeof(sljit_uw);
10372 }
10373
10374 COMPILE_BACKTRACKINGPATH(current->top);
10375 if (current->topbacktracks)
10376 set_jumps(current->topbacktracks, LABEL());
10377 SLJIT_ASSERT(!current->nextbacktracks);
10378 }
10379 while (*cc == OP_ALT);
10380
10381 if (cond != NULL)
10382 {
10383 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10384 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10385 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10386 {
10387 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10388 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10390 }
10391 JUMPHERE(cond);
10392 }
10393
10394 /* Free the STR_PTR. */
10395 if (private_data_ptr == 0)
10396 free_stack(common, 1);
10397 }
10398
10399 if (offset != 0)
10400 {
10401 /* Using both tmp register is better for instruction scheduling. */
10402 if (common->optimized_cbracket[offset >> 1] != 0)
10403 {
10404 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10405 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10406 free_stack(common, 2);
10407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10409 }
10410 else
10411 {
10412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10413 free_stack(common, 1);
10414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10415 }
10416 }
10417 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10418 {
10419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10420 free_stack(common, 1);
10421 }
10422 else if (opcode == OP_ONCE)
10423 {
10424 cc = ccbegin + GET(ccbegin, 1);
10425 stacksize = needs_control_head ? 1 : 0;
10426
10427 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10428 {
10429 /* Reset head and drop saved frame. */
10430 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10431 }
10432 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10433 {
10434 /* The STR_PTR must be released. */
10435 stacksize++;
10436 }
10437
10438 if (stacksize > 0)
10439 free_stack(common, stacksize);
10440
10441 JUMPHERE(once);
10442 /* Restore previous private_data_ptr */
10443 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10445 else if (ket == OP_KETRMIN)
10446 {
10447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10448 /* See the comment below. */
10449 free_stack(common, 2);
10450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10451 }
10452 }
10453
10454 if (repeat_type == OP_EXACT)
10455 {
10456 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10458 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10459 }
10460 else if (ket == OP_KETRMAX)
10461 {
10462 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10463 if (bra != OP_BRAZERO)
10464 free_stack(common, 1);
10465
10466 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10467 if (bra == OP_BRAZERO)
10468 {
10469 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10470 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10471 JUMPHERE(brazero);
10472 free_stack(common, 1);
10473 }
10474 }
10475 else if (ket == OP_KETRMIN)
10476 {
10477 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10478
10479 /* OP_ONCE removes everything in case of a backtrack, so we don't
10480 need to explicitly release the STR_PTR. The extra release would
10481 affect badly the free_stack(2) above. */
10482 if (opcode != OP_ONCE)
10483 free_stack(common, 1);
10484 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10485 if (opcode == OP_ONCE)
10486 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10487 else if (bra == OP_BRAMINZERO)
10488 free_stack(common, 1);
10489 }
10490 else if (bra == OP_BRAZERO)
10491 {
10492 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10493 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10494 JUMPHERE(brazero);
10495 }
10496 }
10497
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)10498 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10499 {
10500 DEFINE_COMPILER;
10501 int offset;
10502 struct sljit_jump *jump;
10503
10504 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10505 {
10506 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10507 {
10508 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10510 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10512 if (common->capture_last_ptr != 0)
10513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10515 if (common->capture_last_ptr != 0)
10516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10517 }
10518 set_jumps(current->topbacktracks, LABEL());
10519 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10520 return;
10521 }
10522
10523 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10524 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10525
10526 if (current->topbacktracks)
10527 {
10528 jump = JUMP(SLJIT_JUMP);
10529 set_jumps(current->topbacktracks, LABEL());
10530 /* Drop the stack frame. */
10531 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10532 JUMPHERE(jump);
10533 }
10534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10535 }
10536
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)10537 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10538 {
10539 assert_backtrack backtrack;
10540
10541 current->top = NULL;
10542 current->topbacktracks = NULL;
10543 current->nextbacktracks = NULL;
10544 if (current->cc[1] > OP_ASSERTBACK_NOT)
10545 {
10546 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10547 compile_bracket_matchingpath(common, current->cc, current);
10548 compile_bracket_backtrackingpath(common, current->top);
10549 }
10550 else
10551 {
10552 memset(&backtrack, 0, sizeof(backtrack));
10553 backtrack.common.cc = current->cc;
10554 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10555 /* Manual call of compile_assert_matchingpath. */
10556 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10557 }
10558 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10559 }
10560
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)10561 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10562 {
10563 DEFINE_COMPILER;
10564 pcre_uchar opcode = *current->cc;
10565 struct sljit_label *loop;
10566 struct sljit_jump *jump;
10567
10568 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10569 {
10570 if (common->then_trap != NULL)
10571 {
10572 SLJIT_ASSERT(common->control_head_ptr != 0);
10573
10574 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10575 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10576 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10577 jump = JUMP(SLJIT_JUMP);
10578
10579 loop = LABEL();
10580 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10581 JUMPHERE(jump);
10582 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10583 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10584 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10585 return;
10586 }
10587 else if (common->positive_assert)
10588 {
10589 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10590 return;
10591 }
10592 }
10593
10594 if (common->local_exit)
10595 {
10596 if (common->quit_label == NULL)
10597 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10598 else
10599 JUMPTO(SLJIT_JUMP, common->quit_label);
10600 return;
10601 }
10602
10603 if (opcode == OP_SKIP_ARG)
10604 {
10605 SLJIT_ASSERT(common->control_head_ptr != 0);
10606 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10608 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10609 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10610 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10611
10612 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10613 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10614 return;
10615 }
10616
10617 if (opcode == OP_SKIP)
10618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10619 else
10620 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10621 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10622 }
10623
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)10624 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10625 {
10626 DEFINE_COMPILER;
10627 struct sljit_jump *jump;
10628 int size;
10629
10630 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10631 {
10632 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10633 return;
10634 }
10635
10636 size = CURRENT_AS(then_trap_backtrack)->framesize;
10637 size = 3 + (size < 0 ? 0 : size);
10638
10639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10640 free_stack(common, size);
10641 jump = JUMP(SLJIT_JUMP);
10642
10643 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10644 /* STACK_TOP is set by THEN. */
10645 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10646 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10647 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10648 free_stack(common, 3);
10649
10650 JUMPHERE(jump);
10651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10652 }
10653
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)10654 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10655 {
10656 DEFINE_COMPILER;
10657 then_trap_backtrack *save_then_trap = common->then_trap;
10658
10659 while (current)
10660 {
10661 if (current->nextbacktracks != NULL)
10662 set_jumps(current->nextbacktracks, LABEL());
10663 switch(*current->cc)
10664 {
10665 case OP_SET_SOM:
10666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10667 free_stack(common, 1);
10668 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10669 break;
10670
10671 case OP_STAR:
10672 case OP_MINSTAR:
10673 case OP_PLUS:
10674 case OP_MINPLUS:
10675 case OP_QUERY:
10676 case OP_MINQUERY:
10677 case OP_UPTO:
10678 case OP_MINUPTO:
10679 case OP_EXACT:
10680 case OP_POSSTAR:
10681 case OP_POSPLUS:
10682 case OP_POSQUERY:
10683 case OP_POSUPTO:
10684 case OP_STARI:
10685 case OP_MINSTARI:
10686 case OP_PLUSI:
10687 case OP_MINPLUSI:
10688 case OP_QUERYI:
10689 case OP_MINQUERYI:
10690 case OP_UPTOI:
10691 case OP_MINUPTOI:
10692 case OP_EXACTI:
10693 case OP_POSSTARI:
10694 case OP_POSPLUSI:
10695 case OP_POSQUERYI:
10696 case OP_POSUPTOI:
10697 case OP_NOTSTAR:
10698 case OP_NOTMINSTAR:
10699 case OP_NOTPLUS:
10700 case OP_NOTMINPLUS:
10701 case OP_NOTQUERY:
10702 case OP_NOTMINQUERY:
10703 case OP_NOTUPTO:
10704 case OP_NOTMINUPTO:
10705 case OP_NOTEXACT:
10706 case OP_NOTPOSSTAR:
10707 case OP_NOTPOSPLUS:
10708 case OP_NOTPOSQUERY:
10709 case OP_NOTPOSUPTO:
10710 case OP_NOTSTARI:
10711 case OP_NOTMINSTARI:
10712 case OP_NOTPLUSI:
10713 case OP_NOTMINPLUSI:
10714 case OP_NOTQUERYI:
10715 case OP_NOTMINQUERYI:
10716 case OP_NOTUPTOI:
10717 case OP_NOTMINUPTOI:
10718 case OP_NOTEXACTI:
10719 case OP_NOTPOSSTARI:
10720 case OP_NOTPOSPLUSI:
10721 case OP_NOTPOSQUERYI:
10722 case OP_NOTPOSUPTOI:
10723 case OP_TYPESTAR:
10724 case OP_TYPEMINSTAR:
10725 case OP_TYPEPLUS:
10726 case OP_TYPEMINPLUS:
10727 case OP_TYPEQUERY:
10728 case OP_TYPEMINQUERY:
10729 case OP_TYPEUPTO:
10730 case OP_TYPEMINUPTO:
10731 case OP_TYPEEXACT:
10732 case OP_TYPEPOSSTAR:
10733 case OP_TYPEPOSPLUS:
10734 case OP_TYPEPOSQUERY:
10735 case OP_TYPEPOSUPTO:
10736 case OP_CLASS:
10737 case OP_NCLASS:
10738 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10739 case OP_XCLASS:
10740 #endif
10741 compile_iterator_backtrackingpath(common, current);
10742 break;
10743
10744 case OP_REF:
10745 case OP_REFI:
10746 case OP_DNREF:
10747 case OP_DNREFI:
10748 compile_ref_iterator_backtrackingpath(common, current);
10749 break;
10750
10751 case OP_RECURSE:
10752 compile_recurse_backtrackingpath(common, current);
10753 break;
10754
10755 case OP_ASSERT:
10756 case OP_ASSERT_NOT:
10757 case OP_ASSERTBACK:
10758 case OP_ASSERTBACK_NOT:
10759 compile_assert_backtrackingpath(common, current);
10760 break;
10761
10762 case OP_ONCE:
10763 case OP_ONCE_NC:
10764 case OP_BRA:
10765 case OP_CBRA:
10766 case OP_COND:
10767 case OP_SBRA:
10768 case OP_SCBRA:
10769 case OP_SCOND:
10770 compile_bracket_backtrackingpath(common, current);
10771 break;
10772
10773 case OP_BRAZERO:
10774 if (current->cc[1] > OP_ASSERTBACK_NOT)
10775 compile_bracket_backtrackingpath(common, current);
10776 else
10777 compile_assert_backtrackingpath(common, current);
10778 break;
10779
10780 case OP_BRAPOS:
10781 case OP_CBRAPOS:
10782 case OP_SBRAPOS:
10783 case OP_SCBRAPOS:
10784 case OP_BRAPOSZERO:
10785 compile_bracketpos_backtrackingpath(common, current);
10786 break;
10787
10788 case OP_BRAMINZERO:
10789 compile_braminzero_backtrackingpath(common, current);
10790 break;
10791
10792 case OP_MARK:
10793 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10794 if (common->has_skip_arg)
10795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10796 free_stack(common, common->has_skip_arg ? 5 : 1);
10797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10798 if (common->has_skip_arg)
10799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10800 break;
10801
10802 case OP_THEN:
10803 case OP_THEN_ARG:
10804 case OP_PRUNE:
10805 case OP_PRUNE_ARG:
10806 case OP_SKIP:
10807 case OP_SKIP_ARG:
10808 compile_control_verb_backtrackingpath(common, current);
10809 break;
10810
10811 case OP_COMMIT:
10812 if (!common->local_exit)
10813 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10814 if (common->quit_label == NULL)
10815 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10816 else
10817 JUMPTO(SLJIT_JUMP, common->quit_label);
10818 break;
10819
10820 case OP_CALLOUT:
10821 case OP_FAIL:
10822 case OP_ACCEPT:
10823 case OP_ASSERT_ACCEPT:
10824 set_jumps(current->topbacktracks, LABEL());
10825 break;
10826
10827 case OP_THEN_TRAP:
10828 /* A virtual opcode for then traps. */
10829 compile_then_trap_backtrackingpath(common, current);
10830 break;
10831
10832 default:
10833 SLJIT_UNREACHABLE();
10834 break;
10835 }
10836 current = current->prev;
10837 }
10838 common->then_trap = save_then_trap;
10839 }
10840
compile_recurse(compiler_common * common)10841 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10842 {
10843 DEFINE_COMPILER;
10844 pcre_uchar *cc = common->start + common->currententry->start;
10845 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10846 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10847 BOOL needs_control_head;
10848 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10849 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10850 int alternativesize;
10851 BOOL needs_frame;
10852 backtrack_common altbacktrack;
10853 struct sljit_jump *jump;
10854
10855 /* Recurse captures then. */
10856 common->then_trap = NULL;
10857
10858 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10859 needs_frame = framesize >= 0;
10860 if (!needs_frame)
10861 framesize = 0;
10862 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10863
10864 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10865 common->currententry->entry = LABEL();
10866 set_jumps(common->currententry->calls, common->currententry->entry);
10867
10868 sljit_emit_fast_enter(compiler, TMP2, 0);
10869 count_match(common);
10870 allocate_stack(common, private_data_size + framesize + alternativesize);
10871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10872 copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10873 if (needs_control_head)
10874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10876 if (needs_frame)
10877 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10878
10879 if (alternativesize > 0)
10880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10881
10882 memset(&altbacktrack, 0, sizeof(backtrack_common));
10883 common->quit_label = NULL;
10884 common->accept_label = NULL;
10885 common->quit = NULL;
10886 common->accept = NULL;
10887 altbacktrack.cc = ccbegin;
10888 cc += GET(cc, 1);
10889 while (1)
10890 {
10891 altbacktrack.top = NULL;
10892 altbacktrack.topbacktracks = NULL;
10893
10894 if (altbacktrack.cc != ccbegin)
10895 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10896
10897 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10898 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10899 return;
10900
10901 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10902
10903 compile_backtrackingpath(common, altbacktrack.top);
10904 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10905 return;
10906 set_jumps(altbacktrack.topbacktracks, LABEL());
10907
10908 if (*cc != OP_ALT)
10909 break;
10910
10911 altbacktrack.cc = cc + 1 + LINK_SIZE;
10912 cc += GET(cc, 1);
10913 }
10914
10915 /* None of them matched. */
10916 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10917 jump = JUMP(SLJIT_JUMP);
10918
10919 if (common->quit != NULL)
10920 {
10921 set_jumps(common->quit, LABEL());
10922 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10923 if (needs_frame)
10924 {
10925 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10926 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10927 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10928 }
10929 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10930 common->quit = NULL;
10931 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10932 }
10933
10934 set_jumps(common->accept, LABEL());
10935 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10936 if (needs_frame)
10937 {
10938 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10939 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10940 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10941 }
10942 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10943
10944 JUMPHERE(jump);
10945 if (common->quit != NULL)
10946 set_jumps(common->quit, LABEL());
10947 copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10948 free_stack(common, private_data_size + framesize + alternativesize);
10949 if (needs_control_head)
10950 {
10951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10953 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10954 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10956 }
10957 else
10958 {
10959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10960 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10962 }
10963 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
10964 }
10965
10966 #undef COMPILE_BACKTRACKINGPATH
10967 #undef CURRENT_AS
10968
10969 void
PRIV(jit_compile)10970 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10971 {
10972 struct sljit_compiler *compiler;
10973 backtrack_common rootbacktrack;
10974 compiler_common common_data;
10975 compiler_common *common = &common_data;
10976 const sljit_u8 *tables = re->tables;
10977 pcre_study_data *study;
10978 int private_data_size;
10979 pcre_uchar *ccend;
10980 executable_functions *functions;
10981 void *executable_func;
10982 sljit_uw executable_size;
10983 sljit_uw total_length;
10984 label_addr_list *label_addr;
10985 struct sljit_label *mainloop_label = NULL;
10986 struct sljit_label *continue_match_label;
10987 struct sljit_label *empty_match_found_label = NULL;
10988 struct sljit_label *empty_match_backtrack_label = NULL;
10989 struct sljit_label *reset_match_label;
10990 struct sljit_label *quit_label;
10991 struct sljit_jump *jump;
10992 struct sljit_jump *minlength_check_failed = NULL;
10993 struct sljit_jump *reqbyte_notfound = NULL;
10994 struct sljit_jump *empty_match = NULL;
10995
10996 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10997 study = extra->study_data;
10998
10999 if (!tables)
11000 tables = PRIV(default_tables);
11001
11002 memset(&rootbacktrack, 0, sizeof(backtrack_common));
11003 memset(common, 0, sizeof(compiler_common));
11004 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
11005
11006 common->start = rootbacktrack.cc;
11007 common->read_only_data_head = NULL;
11008 common->fcc = tables + fcc_offset;
11009 common->lcc = (sljit_sw)(tables + lcc_offset);
11010 common->mode = mode;
11011 common->might_be_empty = study->minlength == 0;
11012 common->nltype = NLTYPE_FIXED;
11013 switch(re->options & PCRE_NEWLINE_BITS)
11014 {
11015 case 0:
11016 /* Compile-time default */
11017 switch(NEWLINE)
11018 {
11019 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11020 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11021 default: common->newline = NEWLINE; break;
11022 }
11023 break;
11024 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
11025 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
11026 case PCRE_NEWLINE_CR+
11027 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
11028 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11029 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11030 default: return;
11031 }
11032 common->nlmax = READ_CHAR_MAX;
11033 common->nlmin = 0;
11034 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
11035 common->bsr_nltype = NLTYPE_ANYCRLF;
11036 else if ((re->options & PCRE_BSR_UNICODE) != 0)
11037 common->bsr_nltype = NLTYPE_ANY;
11038 else
11039 {
11040 #ifdef BSR_ANYCRLF
11041 common->bsr_nltype = NLTYPE_ANYCRLF;
11042 #else
11043 common->bsr_nltype = NLTYPE_ANY;
11044 #endif
11045 }
11046 common->bsr_nlmax = READ_CHAR_MAX;
11047 common->bsr_nlmin = 0;
11048 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
11049 common->ctypes = (sljit_sw)(tables + ctypes_offset);
11050 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
11051 common->name_count = re->name_count;
11052 common->name_entry_size = re->name_entry_size;
11053 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
11054 #ifdef SUPPORT_UTF
11055 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
11056 common->utf = (re->options & PCRE_UTF8) != 0;
11057 #ifdef SUPPORT_UCP
11058 common->use_ucp = (re->options & PCRE_UCP) != 0;
11059 #endif
11060 if (common->utf)
11061 {
11062 if (common->nltype == NLTYPE_ANY)
11063 common->nlmax = 0x2029;
11064 else if (common->nltype == NLTYPE_ANYCRLF)
11065 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11066 else
11067 {
11068 /* We only care about the first newline character. */
11069 common->nlmax = common->newline & 0xff;
11070 }
11071
11072 if (common->nltype == NLTYPE_FIXED)
11073 common->nlmin = common->newline & 0xff;
11074 else
11075 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11076
11077 if (common->bsr_nltype == NLTYPE_ANY)
11078 common->bsr_nlmax = 0x2029;
11079 else
11080 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11081 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11082 }
11083 #endif /* SUPPORT_UTF */
11084 ccend = bracketend(common->start);
11085
11086 /* Calculate the local space size on the stack. */
11087 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
11088 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
11089 if (!common->optimized_cbracket)
11090 return;
11091 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
11092 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11093 #else
11094 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
11095 #endif
11096
11097 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
11098 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
11099 common->capture_last_ptr = common->ovector_start;
11100 common->ovector_start += sizeof(sljit_sw);
11101 #endif
11102 if (!check_opcode_types(common, common->start, ccend))
11103 {
11104 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11105 return;
11106 }
11107
11108 /* Checking flags and updating ovector_start. */
11109 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11110 {
11111 common->req_char_ptr = common->ovector_start;
11112 common->ovector_start += sizeof(sljit_sw);
11113 }
11114 if (mode != JIT_COMPILE)
11115 {
11116 common->start_used_ptr = common->ovector_start;
11117 common->ovector_start += sizeof(sljit_sw);
11118 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11119 {
11120 common->hit_start = common->ovector_start;
11121 common->ovector_start += 2 * sizeof(sljit_sw);
11122 }
11123 }
11124 if ((re->options & PCRE_FIRSTLINE) != 0)
11125 {
11126 common->match_end_ptr = common->ovector_start;
11127 common->ovector_start += sizeof(sljit_sw);
11128 }
11129 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
11130 common->control_head_ptr = 1;
11131 #endif
11132 if (common->control_head_ptr != 0)
11133 {
11134 common->control_head_ptr = common->ovector_start;
11135 common->ovector_start += sizeof(sljit_sw);
11136 }
11137 if (common->has_set_som)
11138 {
11139 /* Saving the real start pointer is necessary. */
11140 common->start_ptr = common->ovector_start;
11141 common->ovector_start += sizeof(sljit_sw);
11142 }
11143
11144 /* Aligning ovector to even number of sljit words. */
11145 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
11146 common->ovector_start += sizeof(sljit_sw);
11147
11148 if (common->start_ptr == 0)
11149 common->start_ptr = OVECTOR(0);
11150
11151 /* Capturing brackets cannot be optimized if callouts are allowed. */
11152 if (common->capture_last_ptr != 0)
11153 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11154
11155 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11156 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11157
11158 total_length = ccend - common->start;
11159 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
11160 if (!common->private_data_ptrs)
11161 {
11162 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11163 return;
11164 }
11165 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11166
11167 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11168 set_private_data_ptrs(common, &private_data_size, ccend);
11169 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11170 {
11171 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11172 detect_fast_fail(common, common->start, &private_data_size, 4);
11173 }
11174
11175 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11176
11177 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11178 {
11179 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11180 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11181 return;
11182 }
11183
11184 if (common->has_then)
11185 {
11186 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11187 memset(common->then_offsets, 0, total_length);
11188 set_then_offsets(common, common->start, NULL);
11189 }
11190
11191 compiler = sljit_create_compiler(NULL);
11192 if (!compiler)
11193 {
11194 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11195 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11196 return;
11197 }
11198 common->compiler = compiler;
11199
11200 /* Main pcre_jit_exec entry. */
11201 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
11202
11203 /* Register init. */
11204 reset_ovector(common, (re->top_bracket + 1) * 2);
11205 if (common->req_char_ptr != 0)
11206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11207
11208 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11210 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11211 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11213 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11214 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
11215 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
11216 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11218
11219 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11220 reset_fast_fail(common);
11221
11222 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11224 if (common->mark_ptr != 0)
11225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11226 if (common->control_head_ptr != 0)
11227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11228
11229 /* Main part of the matching */
11230 if ((re->options & PCRE_ANCHORED) == 0)
11231 {
11232 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11233 continue_match_label = LABEL();
11234 /* Forward search if possible. */
11235 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11236 {
11237 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11238 ;
11239 else if ((re->flags & PCRE_FIRSTSET) != 0)
11240 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11241 else if ((re->flags & PCRE_STARTLINE) != 0)
11242 fast_forward_newline(common);
11243 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11244 fast_forward_start_bits(common, study->start_bits);
11245 }
11246 }
11247 else
11248 continue_match_label = LABEL();
11249
11250 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11251 {
11252 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11253 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11254 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11255 }
11256 if (common->req_char_ptr != 0)
11257 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11258
11259 /* Store the current STR_PTR in OVECTOR(0). */
11260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11261 /* Copy the limit of allowed recursions. */
11262 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11263 if (common->capture_last_ptr != 0)
11264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11265 if (common->fast_forward_bc_ptr != NULL)
11266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11267
11268 if (common->start_ptr != OVECTOR(0))
11269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11270
11271 /* Copy the beginning of the string. */
11272 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11273 {
11274 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11277 JUMPHERE(jump);
11278 }
11279 else if (mode == JIT_PARTIAL_HARD_COMPILE)
11280 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11281
11282 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11283 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11284 {
11285 sljit_free_compiler(compiler);
11286 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11287 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11288 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11289 return;
11290 }
11291
11292 if (common->might_be_empty)
11293 {
11294 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11295 empty_match_found_label = LABEL();
11296 }
11297
11298 common->accept_label = LABEL();
11299 if (common->accept != NULL)
11300 set_jumps(common->accept, common->accept_label);
11301
11302 /* This means we have a match. Update the ovector. */
11303 copy_ovector(common, re->top_bracket + 1);
11304 common->quit_label = common->forced_quit_label = LABEL();
11305 if (common->quit != NULL)
11306 set_jumps(common->quit, common->quit_label);
11307 if (common->forced_quit != NULL)
11308 set_jumps(common->forced_quit, common->forced_quit_label);
11309 if (minlength_check_failed != NULL)
11310 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11311 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11312
11313 if (mode != JIT_COMPILE)
11314 {
11315 common->partialmatchlabel = LABEL();
11316 set_jumps(common->partialmatch, common->partialmatchlabel);
11317 return_with_partial_match(common, common->quit_label);
11318 }
11319
11320 if (common->might_be_empty)
11321 empty_match_backtrack_label = LABEL();
11322 compile_backtrackingpath(common, rootbacktrack.top);
11323 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11324 {
11325 sljit_free_compiler(compiler);
11326 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11327 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11328 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11329 return;
11330 }
11331
11332 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11333 reset_match_label = LABEL();
11334
11335 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11336 {
11337 /* Update hit_start only in the first time. */
11338 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11342 JUMPHERE(jump);
11343 }
11344
11345 /* Check we have remaining characters. */
11346 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11347 {
11348 SLJIT_ASSERT(common->match_end_ptr != 0);
11349 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11350 }
11351
11352 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11353 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11354
11355 if ((re->options & PCRE_ANCHORED) == 0)
11356 {
11357 if (common->ff_newline_shortcut != NULL)
11358 {
11359 if ((re->options & PCRE_FIRSTLINE) == 0)
11360 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11361 /* There cannot be more newlines here. */
11362 }
11363 else
11364 CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11365 }
11366
11367 /* No more remaining characters. */
11368 if (reqbyte_notfound != NULL)
11369 JUMPHERE(reqbyte_notfound);
11370
11371 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11372 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11373
11374 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11375 JUMPTO(SLJIT_JUMP, common->quit_label);
11376
11377 flush_stubs(common);
11378
11379 if (common->might_be_empty)
11380 {
11381 JUMPHERE(empty_match);
11382 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11383 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11384 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11385 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11386 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11388 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11389 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11390 }
11391
11392 common->fast_forward_bc_ptr = NULL;
11393 common->fast_fail_start_ptr = 0;
11394 common->fast_fail_end_ptr = 0;
11395 common->currententry = common->entries;
11396 common->local_exit = TRUE;
11397 quit_label = common->quit_label;
11398 while (common->currententry != NULL)
11399 {
11400 /* Might add new entries. */
11401 compile_recurse(common);
11402 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11403 {
11404 sljit_free_compiler(compiler);
11405 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11406 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11407 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11408 return;
11409 }
11410 flush_stubs(common);
11411 common->currententry = common->currententry->next;
11412 }
11413 common->local_exit = FALSE;
11414 common->quit_label = quit_label;
11415
11416 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11417 /* This is a (really) rare case. */
11418 set_jumps(common->stackalloc, LABEL());
11419 /* RETURN_ADDR is not a saved register. */
11420 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11421
11422 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
11423
11424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0);
11425 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
11426 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
11427 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
11428 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
11429
11430 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11431 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11432 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
11433 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
11434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11435 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11436 sljit_emit_fast_return(compiler, TMP1, 0);
11437
11438 /* Allocation failed. */
11439 JUMPHERE(jump);
11440 /* We break the return address cache here, but this is a really rare case. */
11441 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11442 JUMPTO(SLJIT_JUMP, common->quit_label);
11443
11444 /* Call limit reached. */
11445 set_jumps(common->calllimit, LABEL());
11446 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11447 JUMPTO(SLJIT_JUMP, common->quit_label);
11448
11449 if (common->revertframes != NULL)
11450 {
11451 set_jumps(common->revertframes, LABEL());
11452 do_revertframes(common);
11453 }
11454 if (common->wordboundary != NULL)
11455 {
11456 set_jumps(common->wordboundary, LABEL());
11457 check_wordboundary(common);
11458 }
11459 if (common->anynewline != NULL)
11460 {
11461 set_jumps(common->anynewline, LABEL());
11462 check_anynewline(common);
11463 }
11464 if (common->hspace != NULL)
11465 {
11466 set_jumps(common->hspace, LABEL());
11467 check_hspace(common);
11468 }
11469 if (common->vspace != NULL)
11470 {
11471 set_jumps(common->vspace, LABEL());
11472 check_vspace(common);
11473 }
11474 if (common->casefulcmp != NULL)
11475 {
11476 set_jumps(common->casefulcmp, LABEL());
11477 do_casefulcmp(common);
11478 }
11479 if (common->caselesscmp != NULL)
11480 {
11481 set_jumps(common->caselesscmp, LABEL());
11482 do_caselesscmp(common);
11483 }
11484 if (common->reset_match != NULL)
11485 {
11486 set_jumps(common->reset_match, LABEL());
11487 do_reset_match(common, (re->top_bracket + 1) * 2);
11488 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11489 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11490 JUMPTO(SLJIT_JUMP, reset_match_label);
11491 }
11492 #ifdef SUPPORT_UTF
11493 #ifdef COMPILE_PCRE8
11494 if (common->utfreadchar != NULL)
11495 {
11496 set_jumps(common->utfreadchar, LABEL());
11497 do_utfreadchar(common);
11498 }
11499 if (common->utfreadchar16 != NULL)
11500 {
11501 set_jumps(common->utfreadchar16, LABEL());
11502 do_utfreadchar16(common);
11503 }
11504 if (common->utfreadtype8 != NULL)
11505 {
11506 set_jumps(common->utfreadtype8, LABEL());
11507 do_utfreadtype8(common);
11508 }
11509 #endif /* COMPILE_PCRE8 */
11510 #endif /* SUPPORT_UTF */
11511 #ifdef SUPPORT_UCP
11512 if (common->getucd != NULL)
11513 {
11514 set_jumps(common->getucd, LABEL());
11515 do_getucd(common);
11516 }
11517 #endif
11518
11519 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11520 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11521
11522 executable_func = sljit_generate_code(compiler);
11523 executable_size = sljit_get_generated_code_size(compiler);
11524 label_addr = common->label_addrs;
11525 while (label_addr != NULL)
11526 {
11527 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11528 label_addr = label_addr->next;
11529 }
11530 sljit_free_compiler(compiler);
11531 if (executable_func == NULL)
11532 {
11533 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11534 return;
11535 }
11536
11537 /* Reuse the function descriptor if possible. */
11538 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11539 functions = (executable_functions *)extra->executable_jit;
11540 else
11541 {
11542 /* Note: If your memory-checker has flagged the allocation below as a
11543 * memory leak, it is probably because you either forgot to call
11544 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11545 * pcre16_extra) object, or you called said function after having
11546 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11547 * of the object. (The function will only free the JIT data if the
11548 * bit remains set, as the bit indicates that the pointer to the data
11549 * is valid.)
11550 */
11551 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11552 if (functions == NULL)
11553 {
11554 /* This case is highly unlikely since we just recently
11555 freed a lot of memory. Not impossible though. */
11556 sljit_free_code(executable_func);
11557 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11558 return;
11559 }
11560 memset(functions, 0, sizeof(executable_functions));
11561 functions->top_bracket = (re->top_bracket + 1) * 2;
11562 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11563 extra->executable_jit = functions;
11564 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11565 }
11566
11567 functions->executable_funcs[mode] = executable_func;
11568 functions->read_only_data_heads[mode] = common->read_only_data_head;
11569 functions->executable_sizes[mode] = executable_size;
11570 }
11571
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)11572 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11573 {
11574 union {
11575 void *executable_func;
11576 jit_function call_executable_func;
11577 } convert_executable_func;
11578 sljit_u8 local_space[MACHINE_STACK_SIZE];
11579 struct sljit_stack local_stack;
11580
11581 local_stack.min_start = local_space;
11582 local_stack.start = local_space;
11583 local_stack.end = local_space + MACHINE_STACK_SIZE;
11584 local_stack.top = local_space + MACHINE_STACK_SIZE;
11585 arguments->stack = &local_stack;
11586 convert_executable_func.executable_func = executable_func;
11587 return convert_executable_func.call_executable_func(arguments);
11588 }
11589
11590 int
PRIV(jit_exec)11591 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11592 int length, int start_offset, int options, int *offsets, int offset_count)
11593 {
11594 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11595 union {
11596 void *executable_func;
11597 jit_function call_executable_func;
11598 } convert_executable_func;
11599 jit_arguments arguments;
11600 int max_offset_count;
11601 int retval;
11602 int mode = JIT_COMPILE;
11603
11604 if ((options & PCRE_PARTIAL_HARD) != 0)
11605 mode = JIT_PARTIAL_HARD_COMPILE;
11606 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11607 mode = JIT_PARTIAL_SOFT_COMPILE;
11608
11609 if (functions->executable_funcs[mode] == NULL)
11610 return PCRE_ERROR_JIT_BADOPTION;
11611
11612 /* Sanity checks should be handled by pcre_exec. */
11613 arguments.str = subject + start_offset;
11614 arguments.begin = subject;
11615 arguments.end = subject + length;
11616 arguments.mark_ptr = NULL;
11617 /* JIT decreases this value less frequently than the interpreter. */
11618 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11619 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11620 arguments.limit_match = functions->limit_match;
11621 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11622 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11623 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11624 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11625 arguments.offsets = offsets;
11626 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11627 arguments.real_offset_count = offset_count;
11628
11629 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11630 the output vector for storing captured strings, with the remainder used as
11631 workspace. We don't need the workspace here. For compatibility, we limit the
11632 number of captured strings in the same way as pcre_exec(), so that the user
11633 gets the same result with and without JIT. */
11634
11635 if (offset_count != 2)
11636 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11637 max_offset_count = functions->top_bracket;
11638 if (offset_count > max_offset_count)
11639 offset_count = max_offset_count;
11640 arguments.offset_count = offset_count;
11641
11642 if (functions->callback)
11643 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11644 else
11645 arguments.stack = (struct sljit_stack *)functions->userdata;
11646
11647 if (arguments.stack == NULL)
11648 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11649 else
11650 {
11651 convert_executable_func.executable_func = functions->executable_funcs[mode];
11652 retval = convert_executable_func.call_executable_func(&arguments);
11653 }
11654
11655 if (retval * 2 > offset_count)
11656 retval = 0;
11657 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11658 *(extra_data->mark) = arguments.mark_ptr;
11659
11660 return retval;
11661 }
11662
11663 #if defined COMPILE_PCRE8
11664 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)11665 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11666 PCRE_SPTR subject, int length, int start_offset, int options,
11667 int *offsets, int offset_count, pcre_jit_stack *stack)
11668 #elif defined COMPILE_PCRE16
11669 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11670 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11671 PCRE_SPTR16 subject, int length, int start_offset, int options,
11672 int *offsets, int offset_count, pcre16_jit_stack *stack)
11673 #elif defined COMPILE_PCRE32
11674 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11675 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11676 PCRE_SPTR32 subject, int length, int start_offset, int options,
11677 int *offsets, int offset_count, pcre32_jit_stack *stack)
11678 #endif
11679 {
11680 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11681 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11682 union {
11683 void *executable_func;
11684 jit_function call_executable_func;
11685 } convert_executable_func;
11686 jit_arguments arguments;
11687 int max_offset_count;
11688 int retval;
11689 int mode = JIT_COMPILE;
11690
11691 SLJIT_UNUSED_ARG(argument_re);
11692
11693 /* Plausibility checks */
11694 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11695
11696 if ((options & PCRE_PARTIAL_HARD) != 0)
11697 mode = JIT_PARTIAL_HARD_COMPILE;
11698 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11699 mode = JIT_PARTIAL_SOFT_COMPILE;
11700
11701 if (functions == NULL || functions->executable_funcs[mode] == NULL)
11702 return PCRE_ERROR_JIT_BADOPTION;
11703
11704 /* Sanity checks should be handled by pcre_exec. */
11705 arguments.stack = (struct sljit_stack *)stack;
11706 arguments.str = subject_ptr + start_offset;
11707 arguments.begin = subject_ptr;
11708 arguments.end = subject_ptr + length;
11709 arguments.mark_ptr = NULL;
11710 /* JIT decreases this value less frequently than the interpreter. */
11711 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11712 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11713 arguments.limit_match = functions->limit_match;
11714 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11715 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11716 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11717 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11718 arguments.offsets = offsets;
11719 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11720 arguments.real_offset_count = offset_count;
11721
11722 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11723 the output vector for storing captured strings, with the remainder used as
11724 workspace. We don't need the workspace here. For compatibility, we limit the
11725 number of captured strings in the same way as pcre_exec(), so that the user
11726 gets the same result with and without JIT. */
11727
11728 if (offset_count != 2)
11729 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11730 max_offset_count = functions->top_bracket;
11731 if (offset_count > max_offset_count)
11732 offset_count = max_offset_count;
11733 arguments.offset_count = offset_count;
11734
11735 convert_executable_func.executable_func = functions->executable_funcs[mode];
11736 retval = convert_executable_func.call_executable_func(&arguments);
11737
11738 if (retval * 2 > offset_count)
11739 retval = 0;
11740 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11741 *(extra_data->mark) = arguments.mark_ptr;
11742
11743 return retval;
11744 }
11745
11746 void
PRIV(jit_free)11747 PRIV(jit_free)(void *executable_funcs)
11748 {
11749 int i;
11750 executable_functions *functions = (executable_functions *)executable_funcs;
11751 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11752 {
11753 if (functions->executable_funcs[i] != NULL)
11754 sljit_free_code(functions->executable_funcs[i]);
11755 free_read_only_data(functions->read_only_data_heads[i], NULL);
11756 }
11757 SLJIT_FREE(functions, compiler->allocator_data);
11758 }
11759
11760 int
PRIV(jit_get_size)11761 PRIV(jit_get_size)(void *executable_funcs)
11762 {
11763 int i;
11764 sljit_uw size = 0;
11765 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11766 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11767 size += executable_sizes[i];
11768 return (int)size;
11769 }
11770
11771 const char*
PRIV(jit_get_target)11772 PRIV(jit_get_target)(void)
11773 {
11774 return sljit_get_platform_name();
11775 }
11776
11777 #if defined COMPILE_PCRE8
11778 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11779 pcre_jit_stack_alloc(int startsize, int maxsize)
11780 #elif defined COMPILE_PCRE16
11781 PCRE_EXP_DECL pcre16_jit_stack *
11782 pcre16_jit_stack_alloc(int startsize, int maxsize)
11783 #elif defined COMPILE_PCRE32
11784 PCRE_EXP_DECL pcre32_jit_stack *
11785 pcre32_jit_stack_alloc(int startsize, int maxsize)
11786 #endif
11787 {
11788 if (startsize < 1 || maxsize < 1)
11789 return NULL;
11790 if (startsize > maxsize)
11791 startsize = maxsize;
11792 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11793 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11794 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11795 }
11796
11797 #if defined COMPILE_PCRE8
11798 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11799 pcre_jit_stack_free(pcre_jit_stack *stack)
11800 #elif defined COMPILE_PCRE16
11801 PCRE_EXP_DECL void
11802 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11803 #elif defined COMPILE_PCRE32
11804 PCRE_EXP_DECL void
11805 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11806 #endif
11807 {
11808 sljit_free_stack((struct sljit_stack *)stack, NULL);
11809 }
11810
11811 #if defined COMPILE_PCRE8
11812 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11813 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11814 #elif defined COMPILE_PCRE16
11815 PCRE_EXP_DECL void
11816 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11817 #elif defined COMPILE_PCRE32
11818 PCRE_EXP_DECL void
11819 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11820 #endif
11821 {
11822 executable_functions *functions;
11823 if (extra != NULL &&
11824 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11825 extra->executable_jit != NULL)
11826 {
11827 functions = (executable_functions *)extra->executable_jit;
11828 functions->callback = callback;
11829 functions->userdata = userdata;
11830 }
11831 }
11832
11833 #if defined COMPILE_PCRE8
11834 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11835 pcre_jit_free_unused_memory(void)
11836 #elif defined COMPILE_PCRE16
11837 PCRE_EXP_DECL void
11838 pcre16_jit_free_unused_memory(void)
11839 #elif defined COMPILE_PCRE32
11840 PCRE_EXP_DECL void
11841 pcre32_jit_free_unused_memory(void)
11842 #endif
11843 {
11844 sljit_free_unused_memory_exec();
11845 }
11846
11847 #else /* SUPPORT_JIT */
11848
11849 /* These are dummy functions to avoid linking errors when JIT support is not
11850 being compiled. */
11851
11852 #if defined COMPILE_PCRE8
11853 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)11854 pcre_jit_stack_alloc(int startsize, int maxsize)
11855 #elif defined COMPILE_PCRE16
11856 PCRE_EXP_DECL pcre16_jit_stack *
11857 pcre16_jit_stack_alloc(int startsize, int maxsize)
11858 #elif defined COMPILE_PCRE32
11859 PCRE_EXP_DECL pcre32_jit_stack *
11860 pcre32_jit_stack_alloc(int startsize, int maxsize)
11861 #endif
11862 {
11863 (void)startsize;
11864 (void)maxsize;
11865 return NULL;
11866 }
11867
11868 #if defined COMPILE_PCRE8
11869 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)11870 pcre_jit_stack_free(pcre_jit_stack *stack)
11871 #elif defined COMPILE_PCRE16
11872 PCRE_EXP_DECL void
11873 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11874 #elif defined COMPILE_PCRE32
11875 PCRE_EXP_DECL void
11876 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11877 #endif
11878 {
11879 (void)stack;
11880 }
11881
11882 #if defined COMPILE_PCRE8
11883 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)11884 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11885 #elif defined COMPILE_PCRE16
11886 PCRE_EXP_DECL void
11887 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11888 #elif defined COMPILE_PCRE32
11889 PCRE_EXP_DECL void
11890 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11891 #endif
11892 {
11893 (void)extra;
11894 (void)callback;
11895 (void)userdata;
11896 }
11897
11898 #if defined COMPILE_PCRE8
11899 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)11900 pcre_jit_free_unused_memory(void)
11901 #elif defined COMPILE_PCRE16
11902 PCRE_EXP_DECL void
11903 pcre16_jit_free_unused_memory(void)
11904 #elif defined COMPILE_PCRE32
11905 PCRE_EXP_DECL void
11906 pcre32_jit_free_unused_memory(void)
11907 #endif
11908 {
11909 }
11910
11911 #endif
11912
11913 /* End of pcre_jit_compile.c */
11914